/[escript]/branches/symbolic_from_3470/paso/src/FCT_Solver.c
ViewVC logotype

Diff of /branches/symbolic_from_3470/paso/src/FCT_Solver.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3867 by caltinay, Thu Feb 9 00:27:46 2012 UTC revision 3868 by caltinay, Thu Mar 15 06:07:08 2012 UTC
# Line 97  void Paso_FCT_Solver_free(Paso_FCT_Solve Line 97  void Paso_FCT_Solver_free(Paso_FCT_Solve
97  double Paso_FCT_Solver_getSafeTimeStepSize(Paso_TransportProblem* fctp)  double Paso_FCT_Solver_getSafeTimeStepSize(Paso_TransportProblem* fctp)
98  {  {
99     dim_t i, n;     dim_t i, n;
100       double dt_max_loc=LARGE_POSITIVE_FLOAT;
101     double dt_max=LARGE_POSITIVE_FLOAT;     double dt_max=LARGE_POSITIVE_FLOAT;
    index_t fail=0;  
102     n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);     n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
103     /* set low order transport operator */     /* set low order transport operator */
104     Paso_FCT_setLowOrderOperator(fctp);     Paso_FCT_setLowOrderOperator(fctp);
# Line 108  double Paso_FCT_Solver_getSafeTimeStepSi Line 108  double Paso_FCT_Solver_getSafeTimeStepSi
108           *  calculate time step size:                                                     *  calculate time step size:                                          
109          */          */
110          dt_max=LARGE_POSITIVE_FLOAT;          dt_max=LARGE_POSITIVE_FLOAT;
111      fail=0;          #pragma omp parallel private(i, dt_max_loc)
         #pragma omp parallel private(i)  
112          {          {
113                 double dt_max_loc=LARGE_POSITIVE_FLOAT;                 dt_max_loc=LARGE_POSITIVE_FLOAT;
            index_t fail_loc=0;  
114                 #pragma omp for schedule(static)                 #pragma omp for schedule(static)
115                 for (i=0;i<n;++i) {                 for (i=0;i<n;++i) {
116                    const double l_ii=fctp->main_diagonal_low_order_transport_matrix[i];                    const double l_ii=fctp->main_diagonal_low_order_transport_matrix[i];
117                    const double m_i=fctp->lumped_mass_matrix[i];                    const double m_i=fctp->lumped_mass_matrix[i];
118            if ( (m_i > 0) ) {            if ( m_i > 0 ) {
119                if (l_ii<0) dt_max_loc=MIN(dt_max_loc,m_i/(-l_ii));                if (l_ii<0) dt_max_loc=MIN(dt_max_loc,m_i/(-l_ii));
120            } else {                    }
121                fail_loc=-1;             }
           }  
                }  
122                 #pragma omp critical                 #pragma omp critical
123                 {                 {
124                    dt_max=MIN(dt_max,dt_max_loc);                    dt_max=MIN(dt_max,dt_max_loc);
           fail=MIN(fail, fail_loc);  
125                 }                 }
126          }          }
127          #ifdef ESYS_MPI          #ifdef ESYS_MPI
128          {          {
129             double rtmp_loc[2], rtmp[2];                 dt_max_loc=dt_max;
130                 rtmp_loc[0]=dt_max;                 MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
            rtmp_loc[1]= (double) fail;  
                MPI_Allreduce(rtmp_loc, rtmp, 2, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);  
            dt_max=rtmp[0];  
            fail = rtmp[1] < 0 ? -1 : 0;  
131      }      }
132          #endif          #endif
133          if (fail < 0 ) {      if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=2.;
        Esys_setError(VALUE_ERROR, "Paso_FCTSolver_getSafeTimeStepSize: negative mass matrix entries detected.");  
        return -1;  
     } else {  
         if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=2.;  
     }  
134     }     }
135     return dt_max;     return dt_max;
136  }  }
# Line 171  void Paso_FCT_Solver_initialize(const do Line 157  void Paso_FCT_Solver_initialize(const do
157      fct_solver->dt = dt;      fct_solver->dt = dt;
158      #pragma omp parallel for private(i)      #pragma omp parallel for private(i)
159      for (i = 0; i < n; ++i) {      for (i = 0; i < n; ++i) {
160             const double m=fctp->lumped_mass_matrix[i];             const double m_i=fctp->lumped_mass_matrix[i];
161         const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];         const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
162             fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m * omega - l_ii;         if ( m_i > 0 ) {
163                   fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m_i * omega - l_ii;
164           } else {
165               fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = ABS(m_i * omega - l_ii)/(EPSILON*EPSILON);
166           }
167      }      }
168            
169      /* allocate preconditioner/solver */      /* allocate preconditioner/solver */
# Line 219  err_t Paso_FCT_Solver_update(Paso_FCT_So Line 209  err_t Paso_FCT_Solver_update(Paso_FCT_So
209  err_t Paso_FCT_Solver_update_LCN(Paso_FCT_Solver *fct_solver, double * u, double *u_old, Paso_Options* options, Paso_Performance *pp)  err_t Paso_FCT_Solver_update_LCN(Paso_FCT_Solver *fct_solver, double * u, double *u_old, Paso_Options* options, Paso_Performance *pp)
210  {  {
211      double const dt = fct_solver->dt;      double const dt = fct_solver->dt;
212      dim_t sweep_max;      dim_t sweep_max, i;
213      double *b = fct_solver->b;      double *b = fct_solver->b;
214      double const RTOL = options->tolerance;      double const RTOL = options->tolerance;
215      const dim_t n=Paso_TransportProblem_getTotalNumRows(fct_solver->transportproblem);      const dim_t n=Paso_TransportProblem_getTotalNumRows(fct_solver->transportproblem);
216      Paso_SystemMatrix * iteration_matrix = fct_solver->transportproblem->iteration_matrix;      Paso_SystemMatrix * iteration_matrix = fct_solver->transportproblem->iteration_matrix;
217        const index_t*  main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fct_solver->transportproblem);
218      err_t errorCode = SOLVER_NO_ERROR;      err_t errorCode = SOLVER_NO_ERROR;
219        double norm_u_tilde;
220        
221      Paso_Coupler_startCollect(fct_solver->u_old_coupler,u_old);      Paso_Coupler_startCollect(fct_solver->u_old_coupler,u_old);
222      Paso_Coupler_finishCollect(fct_solver->u_old_coupler);      Paso_Coupler_finishCollect(fct_solver->u_old_coupler);
223            
224      /* b[i]=m*u_tilde[i] = m u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i])      /* b[i]=m*u_tilde[i] = m u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i])  
225               = u_tilde[i]   = u_old[i] where constraint m<0.
226          * note that iteration_matrix stores the negative values of the          * note that iteration_matrix stores the negative values of the
227          * low order transport matrix l. Therefore a=-dt*0.5 is used. */          * low order transport matrix l. Therefore a=-dt*0.5 is used. */
228                
# Line 250  err_t Paso_FCT_Solver_update_LCN(Paso_FC Line 243  err_t Paso_FCT_Solver_update_LCN(Paso_FC
243     Paso_FCT_FluxLimiter_addLimitedFluxes_Start(fct_solver->flux_limiter);     Paso_FCT_FluxLimiter_addLimitedFluxes_Start(fct_solver->flux_limiter);
244     Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(fct_solver->flux_limiter, b);     Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(fct_solver->flux_limiter, b);
245        
246         Paso_Scale(n, b,fct_solver->omega );    
247     /* solve (m-dt/2*L) u = b in the form (omega*m-L) u = b * omega with omega*dt/2=1 */     /* solve (m-dt/2*L) u = b in the form (omega*m-L) u = b * omega with omega*dt/2=1 */
248         #pragma omp for private(i) schedule(static)
249       for (i = 0; i < n; ++i) {
250           if (! (fct_solver->transportproblem->lumped_mass_matrix[i] > 0 ) ) {
251         b[i] = fct_solver->flux_limiter->u_tilde[i]
252             * fct_solver->transportproblem->iteration_matrix->mainBlock->val[main_iptr[i]];
253           }
254       }
255     /* initial guess is u<- -u + 2*u_tilde */     /* initial guess is u<- -u + 2*u_tilde */
256     Paso_Update(n, -1., u, 2., fct_solver->flux_limiter->u_tilde);     Paso_Update(n, -1., u, 2., fct_solver->flux_limiter->u_tilde);
    Paso_Scale(n, b,fct_solver->omega );  
    sweep_max = MAX((int) (- 2 * log(RTOL)/log(2.)-0.5),1);  
257    
258       sweep_max = MAX((int) (- 2 * log(RTOL)/log(2.)-0.5),1);
259       norm_u_tilde=Paso_lsup(n, fct_solver->flux_limiter->u_tilde, fct_solver->flux_limiter->mpi_info);
260     if (options->verbose) {     if (options->verbose) {
261         const double norm_u_tilde=Paso_lsup(n, fct_solver->flux_limiter->u_tilde, fct_solver->flux_limiter->mpi_info);         printf("Paso_FCT_Solver_update_LCN: u_tilda lsup = %e (rtol = %e, max. sweeps = %d)\n",norm_u_tilde,RTOL * norm_u_tilde ,sweep_max);
        printf("Paso_FCT_Solver_update_LCN: u_tilda lsup = %e (rtol = %e, max. sweeps = %d)\n",norm_u_tilde,RTOL,sweep_max);  
262     }     }
263     errorCode = Paso_Preconditioner_Smoother_solve_byTolerance( iteration_matrix,  ((Paso_Preconditioner*) (iteration_matrix->solver_p))->gs,     errorCode = Paso_Preconditioner_Smoother_solve_byTolerance( iteration_matrix,  ((Paso_Preconditioner*) (iteration_matrix->solver_p))->gs,
264                             u, b, RTOL, &sweep_max, TRUE);                             u, b, RTOL, &sweep_max, TRUE);
# Line 306  err_t Paso_FCT_Solver_updateNL(Paso_FCT_ Line 304  err_t Paso_FCT_Solver_updateNL(Paso_FCT_
304            /* b[i]=m_i* u_old[i] */            /* b[i]=m_i* u_old[i] */
305            #pragma omp for private(i) schedule(static)            #pragma omp for private(i) schedule(static)
306            for (i = 0; i < n; ++i) {            for (i = 0; i < n; ++i) {
307                 b[i]=u_old[i]* fctp->lumped_mass_matrix[i];             if (fctp->lumped_mass_matrix[i] > 0 ) {
308               b[i]=u_old[i]* fctp->lumped_mass_matrix[i];
309               } else {
310               b[i]=u_old[i];
311               }
312            }            }
313      } else {      } else {
314         /* b[i]=m_i* u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i]) = m_i * u_tilde_i         /* b[i]=m_i* u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i]) = m_i * u_tilde_i where m_i>0
315        *     = u_old[i]  otherwise
316          * note that iteration_matrix stores the negative values of the          * note that iteration_matrix stores the negative values of the
317          * low order transport matrix l. Therefore a=-dt*0.5 is used. */          * low order transport matrix l. Therefore a=-dt*0.5 is used. */
318         Paso_FCT_Solver_setMuPaLu(b,fctp->lumped_mass_matrix,fct_solver->u_old_coupler,-dt*0.5,fctp->iteration_matrix);         Paso_FCT_Solver_setMuPaLu(b,fctp->lumped_mass_matrix,fct_solver->u_old_coupler,-dt*0.5,fctp->iteration_matrix);
319      }              }        
320      Paso_FCT_FluxLimiter_setU_tilda(flux_limiter, b); /* u_tilda = m^{-1} b */      Paso_FCT_FluxLimiter_setU_tilda(flux_limiter, b); /* u_tilda = m^{-1} b */
321      /* u_tilda_connector is completed */      /* u_tilda_connector is completed */
     
322      /**********************************************************************************************************************/        /**********************************************************************************************************************/  
323      /* calculate stopping criterium */      /* calculate stopping criterium */
324      norm_u_tilde=Paso_lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);      norm_u_tilde=Paso_lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
# Line 341  err_t Paso_FCT_Solver_updateNL(Paso_FCT_ Line 343  err_t Paso_FCT_Solver_updateNL(Paso_FCT_
343           Paso_FCT_FluxLimiter_addLimitedFluxes_Start(flux_limiter); /* uses u_tilde */             Paso_FCT_FluxLimiter_addLimitedFluxes_Start(flux_limiter); /* uses u_tilde */  
344    
345           /*           /*
346        * z_m[i]=b[i] - (m_i*u[i] - omega*sum_{j<>i} l_{ij} (u[j]-u[i]) ) omega = dt/2 or dt .        * z_m[i]=b[i] - (m_i*u[i] - omega*sum_{j<>i} l_{ij} (u[j]-u[i]) ) where m_i>0
347          *       ==b[i] - u[i] = u_tilda[i]-u[i] =0 otherwise
348          *
349          * omega = dt/2 or dt .
350        *        *
351        * note that iteration_matrix stores the negative values of the        * note that iteration_matrix stores the negative values of the
352        * low order transport matrix l. Therefore a=dt*theta is used.        * low order transport matrix l. Therefore a=dt*theta is used.
# Line 355  err_t Paso_FCT_Solver_updateNL(Paso_FCT_ Line 360  err_t Paso_FCT_Solver_updateNL(Paso_FCT_
360    
361        Paso_Update(n,-1.,z,1.,b);  /* z=b-z */        Paso_Update(n,-1.,z,1.,b);  /* z=b-z */
362    
   
363        /* z_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */        /* z_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
364        Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(flux_limiter, z);        Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(flux_limiter, z);
365        
# Line 382  err_t Paso_FCT_Solver_updateNL(Paso_FCT_ Line 386  err_t Paso_FCT_Solver_updateNL(Paso_FCT_
386    
387            Paso_Preconditioner_Smoother_solve(fctp->iteration_matrix, ((Paso_Preconditioner*) (fctp->iteration_matrix->solver_p))->jacobi,            Paso_Preconditioner_Smoother_solve(fctp->iteration_matrix, ((Paso_Preconditioner*) (fctp->iteration_matrix->solver_p))->jacobi,
388                        du, z, 1, FALSE);                        du, z, 1, FALSE);
389                
390            options->num_iter++;            options->num_iter++;
391         }                   }          
392            
393    
394         Paso_Update(n,1.,u,fct_solver->omega,du);         Paso_Update(n,1.,u,fct_solver->omega,du);
395         norm_du_old=norm_du;         norm_du_old=norm_du;
# Line 674  printf("a[%d,%d]=%e\n",j,i,rtmp2); Line 679  printf("a[%d,%d]=%e\n",j,i,rtmp2);
679  }  }
680    
681  /*  /*
682   * out_i=m_i u_i + a * \sum_{j <> i} l_{ij} (u_j-u_i)   * out_i=m_i u_i + a * \sum_{j <> i} l_{ij} (u_j-u_i) where m_i>0
683     *       = u_i                                        where m_i<=0
684   *   *
685   */   */
686  void Paso_FCT_Solver_setMuPaLu(double* out,  void Paso_FCT_Solver_setMuPaLu(double* out,
# Line 692  void Paso_FCT_Solver_setMuPaLu(double* o Line 698  void Paso_FCT_Solver_setMuPaLu(double* o
698    
699    #pragma omp parallel for private(i) schedule(static)    #pragma omp parallel for private(i) schedule(static)
700    for (i = 0; i < n; ++i) {    for (i = 0; i < n; ++i) {
701        out[i]=M[i]*u[i];        if ( M[i] > 0.) {
702              out[i]=M[i]*u[i];
703          } else {
704          out[i]=u[i];
705          }
706    }    }
707    if (ABS(a)>0) {    if (ABS(a)>0) {
708        #pragma omp parallel for schedule(static) private(i, iptr_ij)        #pragma omp parallel for schedule(static) private(i, iptr_ij)
709        for (i = 0; i < n; ++i) {        for (i = 0; i < n; ++i) {
710        if ( M[i] > 0.) {
711            double sum=0;            double sum=0;
712            const double u_i=u[i];            const double u_i=u[i];
713            #pragma ivdep            #pragma ivdep
# Line 713  void Paso_FCT_Solver_setMuPaLu(double* o Line 724  void Paso_FCT_Solver_setMuPaLu(double* o
724                 sum+=l_ij*(remote_u[j]-u_i);                 sum+=l_ij*(remote_u[j]-u_i);
725            }            }
726            out[i]+=a*sum;            out[i]+=a*sum;
727        }
728        }        }
729    }    }
730  }  }

Legend:
Removed from v.3867  
changed lines
  Added in v.3868

  ViewVC Help
Powered by ViewVC 1.1.26