/[escript]/trunk/paso/src/FCT_Solver.cpp
ViewVC logotype

Diff of /trunk/paso/src/FCT_Solver.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 4836 by caltinay, Mon Apr 7 05:51:55 2014 UTC revision 4843 by caltinay, Tue Apr 8 05:32:07 2014 UTC
# Line 15  Line 15 
15  *****************************************************************************/  *****************************************************************************/
16    
17    
18  /************************************************************************************/  /****************************************************************************/
19    
20  /* Paso: Transport solver with flux correction (L is row sum zero)  /* Paso: Transport solver with flux correction (L is row sum zero)
21   *   *
22   *   - Mv_t=Lv   v(0)=u   *   - Mv_t=Lv   v(0)=u
23   *   *
24   *  to return v(dt)   *  to return v(dt)
25   *   *
26  */  */
27  /************************************************************************************/  /****************************************************************************/
28    
29  /* Author: l.gross@uq.edu.au */  /* Author: l.gross@uq.edu.au */
30    
31  /************************************************************************************/  /****************************************************************************/
32    
33  #include "FCT_Solver.h"  #include "FCT_Solver.h"
34  #include "Preconditioner.h"  #include "Preconditioner.h"
# Line 40  Paso_FCT_Solver* Paso_FCT_Solver_alloc(P Line 40  Paso_FCT_Solver* Paso_FCT_Solver_alloc(P
40      Paso_FCT_Solver* out=NULL;      Paso_FCT_Solver* out=NULL;
41      const dim_t blockSize=Paso_TransportProblem_getBlockSize(fctp);      const dim_t blockSize=Paso_TransportProblem_getBlockSize(fctp);
42      const dim_t n =  Paso_TransportProblem_getTotalNumRows(fctp);      const dim_t n =  Paso_TransportProblem_getTotalNumRows(fctp);
43      
44      out=new Paso_FCT_Solver;      out=new Paso_FCT_Solver;
45          out->transportproblem  = Paso_TransportProblem_getReference(fctp);      out->transportproblem  = Paso_TransportProblem_getReference(fctp);
46          out->mpi_info          = Esys_MPIInfo_getReference(fctp->mpi_info);      out->mpi_info          = Esys_MPIInfo_getReference(fctp->mpi_info);
47      out->flux_limiter      = Paso_FCT_FluxLimiter_alloc(fctp);      out->flux_limiter      = Paso_FCT_FluxLimiter_alloc(fctp);
48      out->b                 = new double[n];      out->b                 = new double[n];
49      if ( (options->ode_solver == PASO_CRANK_NICOLSON) || (options->ode_solver == PASO_BACKWARD_EULER) ) {      if ( (options->ode_solver == PASO_CRANK_NICOLSON) || (options->ode_solver == PASO_BACKWARD_EULER) ) {
50          out->du = new double[n];          out->du = new double[n];
51          out->z = new double[n];          out->z = new double[n];
52          } else {      } else {
53           out->du = NULL;          out->du = NULL;
54           out->z=NULL;          out->z=NULL;
55          }      }
56      out->u_coupler.reset(new paso::Coupler(Paso_TransportProblem_borrowConnector(fctp), blockSize));      out->u_coupler.reset(new paso::Coupler(Paso_TransportProblem_borrowConnector(fctp), blockSize));
57      out->u_old_coupler.reset(new paso::Coupler(Paso_TransportProblem_borrowConnector(fctp), blockSize));      out->u_old_coupler.reset(new paso::Coupler(Paso_TransportProblem_borrowConnector(fctp), blockSize));
58      out->omega=0;      out->omega=0;
59        
60      if ( options->ode_solver == PASO_LINEAR_CRANK_NICOLSON ) {      if ( options->ode_solver == PASO_LINEAR_CRANK_NICOLSON ) {
61           out->method   = PASO_LINEAR_CRANK_NICOLSON;          out->method   = PASO_LINEAR_CRANK_NICOLSON;
62      } else if ( options->ode_solver == PASO_CRANK_NICOLSON ) {      } else if ( options->ode_solver == PASO_CRANK_NICOLSON ) {
63           out->method = PASO_CRANK_NICOLSON;          out->method = PASO_CRANK_NICOLSON;
64      } else if ( options->ode_solver == PASO_BACKWARD_EULER ) {      } else if ( options->ode_solver == PASO_BACKWARD_EULER ) {
65           out->method = PASO_BACKWARD_EULER;          out->method = PASO_BACKWARD_EULER;
66      } else {      } else {
67          Esys_setError(VALUE_ERROR, "Paso_FCT_Solver_alloc: unknown integration scheme.");          Esys_setError(VALUE_ERROR, "Paso_FCT_Solver_alloc: unknown integration scheme.");
68          out->method = UNKNOWN;          out->method = UNKNOWN;
69      }      }
70        
71      if (Esys_noError()) {      if (Esys_noError()) {
72          return out;          return out;
73      } else {      } else {
74          Paso_FCT_Solver_free(out);          Paso_FCT_Solver_free(out);
75          return NULL;          return NULL;
76      }          }
     
77  }  }
78    
79  void Paso_FCT_Solver_free(Paso_FCT_Solver *in)  void Paso_FCT_Solver_free(Paso_FCT_Solver *in)
# Line 98  double Paso_FCT_Solver_getSafeTimeStepSi Line 97  double Paso_FCT_Solver_getSafeTimeStepSi
97     const dim_t n = fctp->transport_matrix->getTotalNumRows();     const dim_t n = fctp->transport_matrix->getTotalNumRows();
98     /* set low order transport operator */     /* set low order transport operator */
99     Paso_FCT_setLowOrderOperator(fctp);     Paso_FCT_setLowOrderOperator(fctp);
100              
101     if (Esys_noError()) {     if (Esys_noError()) {
102          /*          /*
103           *  calculate time step size:                                                     *  calculate time step size:
104          */          */
105          dt_max=LARGE_POSITIVE_FLOAT;          dt_max=LARGE_POSITIVE_FLOAT;
106          #pragma omp parallel private(i, dt_max_loc)          #pragma omp parallel private(i, dt_max_loc)
107          {          {
108                 dt_max_loc=LARGE_POSITIVE_FLOAT;                 dt_max_loc=LARGE_POSITIVE_FLOAT;
109                 #pragma omp for schedule(static)                 #pragma omp for schedule(static)
110                 for (i=0;i<n;++i) {                 for (i=0;i<n;++i) {
111                    const double l_ii=fctp->main_diagonal_low_order_transport_matrix[i];                    const double l_ii=fctp->main_diagonal_low_order_transport_matrix[i];
112                    const double m_i=fctp->lumped_mass_matrix[i];                    const double m_i=fctp->lumped_mass_matrix[i];
113            if ( m_i > 0 ) {                    if ( m_i > 0 ) {
114                if (l_ii<0) dt_max_loc=MIN(dt_max_loc,m_i/(-l_ii));                        if (l_ii<0) dt_max_loc=MIN(dt_max_loc,m_i/(-l_ii));
115                    }                    }
116             }                 }
117                 #pragma omp critical                 #pragma omp critical
118                 {                 {
119                    dt_max=MIN(dt_max,dt_max_loc);                    dt_max=MIN(dt_max,dt_max_loc);
120                 }                 }
# Line 124  double Paso_FCT_Solver_getSafeTimeStepSi Line 123  double Paso_FCT_Solver_getSafeTimeStepSi
123          {          {
124                 dt_max_loc=dt_max;                 dt_max_loc=dt_max;
125                 MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);                 MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
126      }          }
127          #endif          #endif
128      if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=2.;          if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=2.;
129     }     }
130     return dt_max;     return dt_max;
131  }  }
132    
133  /* modifies the main diagonal of the iteration matrix to introduce new dt */  /* modifies the main diagonal of the iteration matrix to introduce new dt */
134  void Paso_FCT_Solver_initialize(const double dt, Paso_FCT_Solver *fct_solver, Paso_Options* options, Paso_Performance* pp)  void Paso_FCT_Solver_initialize(const double dt, Paso_FCT_Solver *fct_solver, Paso_Options* options, Paso_Performance* pp)
135  {  {
136     Paso_TransportProblem* fctp = fct_solver->transportproblem;     Paso_TransportProblem* fctp = fct_solver->transportproblem;
137     const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fctp);     const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fctp);
# Line 141  void Paso_FCT_Solver_initialize(const do Line 140  void Paso_FCT_Solver_initialize(const do
140     const double omega=1./(dt* theta);     const double omega=1./(dt* theta);
141     dim_t i;     dim_t i;
142     Paso_Options options2;     Paso_Options options2;
143      
144    
145    
146     Paso_solve_free(fctp->iteration_matrix.get());     Paso_solve_free(fctp->iteration_matrix.get());
# Line 149  void Paso_FCT_Solver_initialize(const do Line 148  void Paso_FCT_Solver_initialize(const do
148      *   fctp->iteration_matrix[i,i]=m[i]/(dt theta) -l[i,i]      *   fctp->iteration_matrix[i,i]=m[i]/(dt theta) -l[i,i]
149      *      *
150     */     */
151      fct_solver->omega=omega;      fct_solver->omega=omega;
152      fct_solver->dt = dt;      fct_solver->dt = dt;
153      #pragma omp parallel for private(i)      #pragma omp parallel for private(i)
154      for (i = 0; i < n; ++i) {      for (i = 0; i < n; ++i) {
155             const double m_i=fctp->lumped_mass_matrix[i];             const double m_i=fctp->lumped_mass_matrix[i];
156         const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];             const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
157         if ( m_i > 0 ) {             if ( m_i > 0 ) {
158                 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m_i * omega - l_ii;                 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m_i * omega - l_ii;
159         } else {             } else {
160             fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = ABS(m_i * omega - l_ii)/(EPSILON*EPSILON);                 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = ABS(m_i * omega - l_ii)/(EPSILON*EPSILON);
161         }             }
162      }      }
163        
164      /* allocate preconditioner/solver */      /* allocate preconditioner/solver */
165      Paso_Options_setDefaults(&options2);      Paso_Options_setDefaults(&options2);
166      options2.verbose = options->verbose;      options2.verbose = options->verbose;
167      if (fct_solver->method == PASO_LINEAR_CRANK_NICOLSON  ) {      if (fct_solver->method == PASO_LINEAR_CRANK_NICOLSON  ) {
168          options2.preconditioner = PASO_GS;          options2.preconditioner = PASO_GS;
169      } else  {      } else  {
170      options2.preconditioner = PASO_JACOBI;          options2.preconditioner = PASO_JACOBI;
171  /* options2.preconditioner = PASO_GS; */  /* options2.preconditioner = PASO_GS; */
172      }      }
173      options2.use_local_preconditioner = FALSE;      options2.use_local_preconditioner = FALSE;
174      options2.sweeps=-1;      options2.sweeps=-1;
175        
176      Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);      Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
177      fctp->iteration_matrix->setPreconditioner(&options2);      fctp->iteration_matrix->setPreconditioner(&options2);
178      Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);      Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
179  }  }
180    
181  /* entry point for update procedures */  /* entry point for update procedures */
182  err_t Paso_FCT_Solver_update(Paso_FCT_Solver *fct_solver, double* u, double *u_old,  Paso_Options* options, Paso_Performance *pp)  err_t Paso_FCT_Solver_update(Paso_FCT_Solver *fct_solver, double* u, double *u_old,  Paso_Options* options, Paso_Performance *pp)
183  {      {
184      const index_t method=fct_solver->method;      const index_t method=fct_solver->method;
185      err_t err_out = SOLVER_NO_ERROR;      err_t err_out = SOLVER_NO_ERROR;
186        
     
187      if (method == PASO_LINEAR_CRANK_NICOLSON) {      if (method == PASO_LINEAR_CRANK_NICOLSON) {
188          err_out=Paso_FCT_Solver_update_LCN(fct_solver, u, u_old, options, pp);          err_out=Paso_FCT_Solver_update_LCN(fct_solver, u, u_old, options, pp);
         
189      } else if (method == PASO_CRANK_NICOLSON) {      } else if (method == PASO_CRANK_NICOLSON) {
190          err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);          err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);
         
191      } else if (method == PASO_BACKWARD_EULER) {      } else if (method == PASO_BACKWARD_EULER) {
192          err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);          err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);
193      } else {      } else {
# Line 201  err_t Paso_FCT_Solver_update(Paso_FCT_So Line 197  err_t Paso_FCT_Solver_update(Paso_FCT_So
197  }  }
198    
199  /* linear crank-nicolson update */  /* linear crank-nicolson update */
200  err_t Paso_FCT_Solver_update_LCN(Paso_FCT_Solver *fct_solver, double * u, double *u_old, Paso_Options* options, Paso_Performance *pp)  err_t Paso_FCT_Solver_update_LCN(Paso_FCT_Solver *fct_solver, double * u, double *u_old, Paso_Options* options, Paso_Performance *pp)
201  {  {
202      double const dt = fct_solver->dt;      double const dt = fct_solver->dt;
203      dim_t sweep_max, i;      dim_t sweep_max, i;
204      double *b = fct_solver->b;      double *b = fct_solver->b;
205      double const RTOL = options->tolerance;      double const RTOL = options->tolerance;
206      const dim_t n=Paso_TransportProblem_getTotalNumRows(fct_solver->transportproblem);      const dim_t n=Paso_TransportProblem_getTotalNumRows(fct_solver->transportproblem);
207      paso::SystemMatrix_ptr iteration_matrix(fct_solver->transportproblem->iteration_matrix);      paso::SystemMatrix_ptr iteration_matrix(fct_solver->transportproblem->iteration_matrix);
208      const index_t*  main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fct_solver->transportproblem);      const index_t*  main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fct_solver->transportproblem);
209      err_t errorCode = SOLVER_NO_ERROR;      err_t errorCode = SOLVER_NO_ERROR;
210      double norm_u_tilde;      double norm_u_tilde;
211      
212      fct_solver->u_old_coupler->startCollect(u_old);      fct_solver->u_old_coupler->startCollect(u_old);
213      fct_solver->u_old_coupler->finishCollect();      fct_solver->u_old_coupler->finishCollect();
214        
215      /* b[i]=m*u_tilde[i] = m u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i])        /* b[i]=m*u_tilde[i] = m u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i])
216             = u_tilde[i]   = u_old[i] where constraint m<0.             = u_tilde[i]   = u_old[i] where constraint m<0.
217          * note that iteration_matrix stores the negative values of the          * note that iteration_matrix stores the negative values of the
218          * low order transport matrix l. Therefore a=-dt*0.5 is used. */          * low order transport matrix l. Therefore a=-dt*0.5 is used. */
219          
220      Paso_FCT_Solver_setMuPaLu(b, fct_solver->transportproblem->lumped_mass_matrix,      Paso_FCT_Solver_setMuPaLu(b, fct_solver->transportproblem->lumped_mass_matrix,
221                   fct_solver->u_old_coupler, -dt*0.5, iteration_matrix);                               fct_solver->u_old_coupler, -dt*0.5, iteration_matrix);
222      /* solve for u_tilde : u_tilda = m^{-1} * b   */      /* solve for u_tilde : u_tilda = m^{-1} * b   */
223      Paso_FCT_FluxLimiter_setU_tilda(fct_solver->flux_limiter, b);      Paso_FCT_FluxLimiter_setU_tilda(fct_solver->flux_limiter, b);
224      /* u_tilda_connector is completed */      /* u_tilda_connector is completed */
225        
226      /* calculate anti-diffusive fluxes for u_tilda */      /* calculate anti-diffusive fluxes for u_tilda */
227      Paso_FCT_setAntiDiffusionFlux_linearCN(fct_solver->flux_limiter->antidiffusive_fluxes,      Paso_FCT_setAntiDiffusionFlux_linearCN(fct_solver->flux_limiter->antidiffusive_fluxes,
228                         fct_solver->transportproblem, dt,                                             fct_solver->transportproblem, dt,
229                         fct_solver->flux_limiter->u_tilde_coupler,                                             fct_solver->flux_limiter->u_tilde_coupler,
230                         fct_solver->u_old_coupler);                                             fct_solver->u_old_coupler);
231    
232    
233     /* b_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */     /* b_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
234     Paso_FCT_FluxLimiter_addLimitedFluxes_Start(fct_solver->flux_limiter);     Paso_FCT_FluxLimiter_addLimitedFluxes_Start(fct_solver->flux_limiter);
235     Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(fct_solver->flux_limiter, b);     Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(fct_solver->flux_limiter, b);
236      
237     Paso_Scale(n, b,fct_solver->omega );         Paso_Scale(n, b,fct_solver->omega );
238     /* solve (m-dt/2*L) u = b in the form (omega*m-L) u = b * omega with omega*dt/2=1 */     /* solve (m-dt/2*L) u = b in the form (omega*m-L) u = b * omega with omega*dt/2=1 */
239     #pragma omp for private(i) schedule(static)     #pragma omp for private(i) schedule(static)
240     for (i = 0; i < n; ++i) {     for (i = 0; i < n; ++i) {
241         if (! (fct_solver->transportproblem->lumped_mass_matrix[i] > 0 ) ) {         if (! (fct_solver->transportproblem->lumped_mass_matrix[i] > 0 ) ) {
242       b[i] = fct_solver->flux_limiter->u_tilde[i]           b[i] = fct_solver->flux_limiter->u_tilde[i]
243           * fct_solver->transportproblem->iteration_matrix->mainBlock->val[main_iptr[i]];               * fct_solver->transportproblem->iteration_matrix->mainBlock->val[main_iptr[i]];
244         }         }
245     }     }
246     /* initial guess is u<- -u + 2*u_tilde */     /* initial guess is u<- -u + 2*u_tilde */
247     Paso_Update(n, -1., u, 2., fct_solver->flux_limiter->u_tilde);     Paso_Update(n, -1., u, 2., fct_solver->flux_limiter->u_tilde);
# Line 253  err_t Paso_FCT_Solver_update_LCN(Paso_FC Line 249  err_t Paso_FCT_Solver_update_LCN(Paso_FC
249     sweep_max = MAX((int) (- 2 * log(RTOL)/log(2.)-0.5),1);     sweep_max = MAX((int) (- 2 * log(RTOL)/log(2.)-0.5),1);
250     norm_u_tilde=Paso_lsup(n, fct_solver->flux_limiter->u_tilde, fct_solver->flux_limiter->mpi_info);     norm_u_tilde=Paso_lsup(n, fct_solver->flux_limiter->u_tilde, fct_solver->flux_limiter->mpi_info);
251     if (options->verbose) {     if (options->verbose) {
252         printf("Paso_FCT_Solver_update_LCN: u_tilda lsup = %e (rtol = %e, max. sweeps = %d)\n",norm_u_tilde,RTOL * norm_u_tilde ,sweep_max);         printf("Paso_FCT_Solver_update_LCN: u_tilda lsup = %e (rtol = %e, max. sweeps = %d)\n",norm_u_tilde,RTOL * norm_u_tilde ,sweep_max);
253     }     }
254     errorCode = Paso_Preconditioner_Smoother_solve_byTolerance( iteration_matrix,  ((Paso_Preconditioner*) (iteration_matrix->solver_p))->gs,     errorCode = paso::Preconditioner_Smoother_solve_byTolerance( iteration_matrix,  ((paso::Preconditioner*) (iteration_matrix->solver_p))->gs,
255                             u, b, RTOL, &sweep_max, TRUE);                                                     u, b, RTOL, &sweep_max, TRUE);
256     if (errorCode == PRECONDITIONER_NO_ERROR) {     if (errorCode == PRECONDITIONER_NO_ERROR) {
257        if (options->verbose) printf("Paso_FCT_Solver_update_LCN: convergence after %d Gauss-Seidel steps.\n",sweep_max);        if (options->verbose) printf("Paso_FCT_Solver_update_LCN: convergence after %d Gauss-Seidel steps.\n",sweep_max);
258        errorCode=SOLVER_NO_ERROR;        errorCode=SOLVER_NO_ERROR;
# Line 265  err_t Paso_FCT_Solver_update_LCN(Paso_FC Line 261  err_t Paso_FCT_Solver_update_LCN(Paso_FC
261        errorCode= SOLVER_MAXITER_REACHED;        errorCode= SOLVER_MAXITER_REACHED;
262     }     }
263     return errorCode;     return errorCode;
     
 }    
264    
265  err_t Paso_FCT_Solver_updateNL(Paso_FCT_Solver *fct_solver, double* u, double *u_old, Paso_Options* options, Paso_Performance *pp)  }
266    
267    err_t Paso_FCT_Solver_updateNL(Paso_FCT_Solver *fct_solver, double* u, double *u_old, Paso_Options* options, Paso_Performance *pp)
268  {  {
269     const dim_t num_critical_rates_max=3; /* number of rates >=critical_rate accepted before divergence is triggered */     const dim_t num_critical_rates_max=3; /* number of rates >=critical_rate accepted before divergence is triggered */
270     const double critical_rate=0.95;   /* expected value of convergence rate */     const double critical_rate=0.95;   /* expected value of convergence rate */
# Line 276  err_t Paso_FCT_Solver_updateNL(Paso_FCT_ Line 272  err_t Paso_FCT_Solver_updateNL(Paso_FCT_
272     double *b = fct_solver->b;     double *b = fct_solver->b;
273     double *z = fct_solver->z;     double *z = fct_solver->z;
274     double *du = fct_solver->du;     double *du = fct_solver->du;
275     double const dt = fct_solver->dt;     double const dt = fct_solver->dt;
276     Paso_TransportProblem* fctp = fct_solver->transportproblem;     Paso_TransportProblem* fctp = fct_solver->transportproblem;
277     Paso_FCT_FluxLimiter* flux_limiter = fct_solver->flux_limiter;     Paso_FCT_FluxLimiter* flux_limiter = fct_solver->flux_limiter;
278     dim_t i;     dim_t i;
279     double norm_u_tilde, ATOL, norm_du=LARGE_POSITIVE_FLOAT, norm_du_old, rate=1.;     double norm_u_tilde, ATOL, norm_du=LARGE_POSITIVE_FLOAT, norm_du_old, rate=1.;
280     err_t errorCode=SOLVER_NO_ERROR;     err_t errorCode=SOLVER_NO_ERROR;
281     const dim_t n = fctp->transport_matrix->getTotalNumRows();     const dim_t n = fctp->transport_matrix->getTotalNumRows();
282     const double atol=options->absolute_tolerance;       const double atol=options->absolute_tolerance;
283     const double rtol=options->tolerance;     const double rtol=options->tolerance;
284     const dim_t max_m=options->iter_max;     const dim_t max_m=options->iter_max;
285     dim_t m=0, num_critical_rates=0 ;     dim_t m=0, num_critical_rates=0 ;
286    /* ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// */    /* ////////////////////////////////////////////////////////////////////// */
287      
288     bool converged=FALSE, max_m_reached=FALSE,diverged=FALSE;       bool converged=FALSE, max_m_reached=FALSE,diverged=FALSE;
289     options->num_iter=0;     options->num_iter=0;
290      
291     fct_solver->u_old_coupler->startCollect(u_old);     fct_solver->u_old_coupler->startCollect(u_old);
292     fct_solver->u_old_coupler->finishCollect();     fct_solver->u_old_coupler->finishCollect();
293      /* prepare u_tilda and flux limiter */      /* prepare u_tilda and flux limiter */
294      if ( fct_solver->method == PASO_BACKWARD_EULER ) {      if ( fct_solver->method == PASO_BACKWARD_EULER ) {
295            /* b[i]=m_i* u_old[i] */            /* b[i]=m_i* u_old[i] */
296            #pragma omp for private(i) schedule(static)            #pragma omp for private(i) schedule(static)
297            for (i = 0; i < n; ++i) {            for (i = 0; i < n; ++i) {
298             if (fctp->lumped_mass_matrix[i] > 0 ) {                 if (fctp->lumped_mass_matrix[i] > 0 ) {
299             b[i]=u_old[i]* fctp->lumped_mass_matrix[i];                     b[i]=u_old[i]* fctp->lumped_mass_matrix[i];
300             } else {                 } else {
301             b[i]=u_old[i];                     b[i]=u_old[i];
302             }                 }
303            }            }
304      } else {      } else {
305         /* b[i]=m_i* u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i]) = m_i * u_tilde_i where m_i>0         /* b[i]=m_i* u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i]) = m_i * u_tilde_i where m_i>0
306      *     = u_old[i]  otherwise          *     = u_old[i]  otherwise
307          * note that iteration_matrix stores the negative values of the          * note that iteration_matrix stores the negative values of the
308          * low order transport matrix l. Therefore a=-dt*0.5 is used. */          * low order transport matrix l. Therefore a=-dt*0.5 is used. */
309         Paso_FCT_Solver_setMuPaLu(b,fctp->lumped_mass_matrix,fct_solver->u_old_coupler,-dt*0.5,fctp->iteration_matrix);         Paso_FCT_Solver_setMuPaLu(b,fctp->lumped_mass_matrix,fct_solver->u_old_coupler,-dt*0.5,fctp->iteration_matrix);
310      }              }
311      Paso_FCT_FluxLimiter_setU_tilda(flux_limiter, b); /* u_tilda = m^{-1} b */      Paso_FCT_FluxLimiter_setU_tilda(flux_limiter, b); /* u_tilda = m^{-1} b */
312      /* u_tilda_connector is completed */      /* u_tilda_connector is completed */
313      /********************************************************************************************************************************************/        /************************************************************************/
314      /* calculate stopping criterion */      /* calculate stopping criterion */
315      norm_u_tilde=Paso_lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);      norm_u_tilde=Paso_lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
316      ATOL= rtol * norm_u_tilde + atol ;      ATOL= rtol * norm_u_tilde + atol ;
# Line 323  err_t Paso_FCT_Solver_updateNL(Paso_FCT_ Line 319  err_t Paso_FCT_Solver_updateNL(Paso_FCT_
319    
320      /* u_old is an initial guess for u*/      /* u_old is an initial guess for u*/
321      Paso_Copy(n,u,u_old);      Paso_Copy(n,u,u_old);
322        
323      while ( (!converged) && (!diverged) && (! max_m_reached) && Esys_noError()) {      while ( (!converged) && (!diverged) && (! max_m_reached) && Esys_noError()) {
324          fct_solver->u_coupler->startCollect(u);          fct_solver->u_coupler->startCollect(u);
325          fct_solver->u_coupler->finishCollect();          fct_solver->u_coupler->finishCollect();
326    
327           /*  set antidiffusive_flux_{ij} for u */           /*  set antidiffusive_flux_{ij} for u */
328           if (fct_solver->method == PASO_BACKWARD_EULER) {           if (fct_solver->method == PASO_BACKWARD_EULER) {
329               Paso_FCT_setAntiDiffusionFlux_BE(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);                 Paso_FCT_setAntiDiffusionFlux_BE(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);
330           } else {                   } else {
331           Paso_FCT_setAntiDiffusionFlux_CN(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);               Paso_FCT_setAntiDiffusionFlux_CN(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);
332           }           }
333           /* start the calculation of the limitation factors_{fct_solver->ij} */           /* start the calculation of the limitation factors_{fct_solver->ij} */
334           Paso_FCT_FluxLimiter_addLimitedFluxes_Start(flux_limiter); /* uses u_tilde */             Paso_FCT_FluxLimiter_addLimitedFluxes_Start(flux_limiter); /* uses u_tilde */
335    
336           /*           /*
337        * z_m[i]=b[i] - (m_i*u[i] - omega*sum_{j<>i} l_{ij} (u[j]-u[i]) ) where m_i>0            * z_m[i]=b[i] - (m_i*u[i] - omega*sum_{j<>i} l_{ij} (u[j]-u[i]) ) where m_i>0
338        *       ==b[i] - u[i] = u_tilda[i]-u[i] =0 otherwise            *       ==b[i] - u[i] = u_tilda[i]-u[i] =0 otherwise
339        *            *
340        * omega = dt/2 or dt .            * omega = dt/2 or dt .
341        *            *
342        * note that iteration_matrix stores the negative values of the            * note that iteration_matrix stores the negative values of the
343        * low order transport matrix l. Therefore a=dt*theta is used.            * low order transport matrix l. Therefore a=dt*theta is used.
344        */            */
345        if (fct_solver-> method == PASO_BACKWARD_EULER) {            if (fct_solver-> method == PASO_BACKWARD_EULER) {
346            Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt, fctp->iteration_matrix);                Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt, fctp->iteration_matrix);
347        } else {            } else {
348            Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt/2, fctp->iteration_matrix);                Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt/2, fctp->iteration_matrix);
349        }            }
350      
351              Paso_Update(n,-1.,z,1.,b);  /* z=b-z */
352        Paso_Update(n,-1.,z,1.,b);  /* z=b-z */  
353              /* z_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
354        /* z_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */            Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(flux_limiter, z);
355        Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(flux_limiter, z);  
356                /* we solve (m/omega - L ) * du = z */
357        /* we solve (m/omega - L ) * du = z */            if (fct_solver->method == PASO_BACKWARD_EULER) {
358        if (fct_solver->method == PASO_BACKWARD_EULER) {                dim_t cntIter = options->iter_max;
359            dim_t cntIter = options->iter_max;                double tol= Paso_l2(n, z, fctp->mpi_info) ;
360            double tol= Paso_l2(n, z, fctp->mpi_info) ;  
361                  if ( m ==0) {
362            if ( m ==0) {                    tol *=0.5;
363            tol *=0.5;                } else {
364            } else {                    tol *= MIN(MAX(rate*rate, 1e-2), 0.5);
365            tol *= MIN(MAX(rate*rate, 1e-2), 0.5);                }
366            }                /* use BiCGSTab with jacobi preconditioner ( m - omega * L ) */
367            /* use BiCGSTab with jacobi preconditioner ( m - omega * L ) */                Paso_zeroes(n,du);
368                Paso_zeroes(n,du);                errorCode = Paso_Solver_BiCGStab(fctp->iteration_matrix, z, du, &cntIter, &tol, pp);
369            errorCode = Paso_Solver_BiCGStab(fctp->iteration_matrix, z, du, &cntIter, &tol, pp);  
370                  /* errorCode =  Paso_Solver_GMRES(fctp->iteration_matrix, z, du, &cntIter, &tol, 10, 2000, pp); */
371            /* errorCode =  Paso_Solver_GMRES(fctp->iteration_matrix, z, du, &cntIter, &tol, 10, 2000, pp); */                if (options->verbose) printf("Paso_FCT_Solver_updateNL: BiCGStab is completed after %d steps (residual =%e).\n",cntIter, tol);
372            if (options->verbose) printf("Paso_FCT_Solver_updateNL: BiCGStab is completed after %d steps (residual =%e).\n",cntIter, tol);                options->num_iter+=cntIter;
373            options->num_iter+=cntIter;                if ( errorCode !=  SOLVER_NO_ERROR) break;
374            if ( errorCode !=  SOLVER_NO_ERROR) break;             } else {
375         } else {                /* just use the main diagonal of (m/omega - L ) */
376            /* just use the main diagonal of (m/omega - L ) */  
377                   paso::Preconditioner_Smoother_solve(fctp->iteration_matrix, ((paso::Preconditioner*) (fctp->iteration_matrix->solver_p))->jacobi,
378            Paso_Preconditioner_Smoother_solve(fctp->iteration_matrix, ((Paso_Preconditioner*) (fctp->iteration_matrix->solver_p))->jacobi,                                            du, z, 1, FALSE);
379                        du, z, 1, FALSE);  
380                              options->num_iter++;
381            options->num_iter++;             }
382         }            
383                 Paso_Update(n,1.,u,fct_solver->omega,du);
384               norm_du_old=norm_du;
        Paso_Update(n,1.,u,fct_solver->omega,du);  
        norm_du_old=norm_du;  
385             norm_du=Paso_lsup(n,du, fctp->mpi_info);             norm_du=Paso_lsup(n,du, fctp->mpi_info);
386         if (m ==0) {             if (m ==0) {
387                  if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e\n",m+1, norm_du * fct_solver->omega);                  if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e\n",m+1, norm_du * fct_solver->omega);
388         } else {             } else {
389              if (norm_du_old > 0.) {                  if (norm_du_old > 0.) {
390                  rate=norm_du/norm_du_old;                      rate=norm_du/norm_du_old;
391              } else if (norm_du <= 0.) {                  } else if (norm_du <= 0.) {
392               rate=0.;                       rate=0.;
393          } else {                  } else {
394               rate=LARGE_POSITIVE_FLOAT;                       rate=LARGE_POSITIVE_FLOAT;
395          }                  }
396                  if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e (rate = %e)\n",m+1, norm_du * fct_solver->omega, rate);                  if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e (rate = %e)\n",m+1, norm_du * fct_solver->omega, rate);
397          num_critical_rates+=( rate<critical_rate ? 0 : 1);                  num_critical_rates+=( rate<critical_rate ? 0 : 1);
398                  max_m_reached=(m>max_m);                  max_m_reached=(m>max_m);
399          diverged = (num_critical_rates >= num_critical_rates_max);                  diverged = (num_critical_rates >= num_critical_rates_max);
400                  converged=(norm_du * fct_solver->omega  <= ATOL) ;                  converged=(norm_du * fct_solver->omega  <= ATOL);
401             }             }
402             m++;             m++;
403        } /* end of while loop */        } /* end of while loop */
404        if (errorCode == SOLVER_NO_ERROR) {        if (errorCode == SOLVER_NO_ERROR) {
405              if (converged) {                  if (converged) {
406                  if (options->verbose) printf("Paso_FCT_Solver_updateNL: iteration is completed.\n");                      if (options->verbose) printf("Paso_FCT_Solver_updateNL: iteration is completed.\n");
407                      errorCode=SOLVER_NO_ERROR;                      errorCode=SOLVER_NO_ERROR;
408                  } else if (diverged) {                  } else if (diverged) {
409              if (options->verbose) printf("Paso_FCT_Solver_updateNL: divergence.\n");                      if (options->verbose) printf("Paso_FCT_Solver_updateNL: divergence.\n");
410              errorCode=SOLVER_DIVERGENCE;                      errorCode=SOLVER_DIVERGENCE;
411          } else if (max_m_reached) {                  } else if (max_m_reached) {
412               if (options->verbose) printf("Paso_FCT_Solver_updateNL: maximum number of iteration steps reached.\n");                       if (options->verbose) printf("Paso_FCT_Solver_updateNL: maximum number of iteration steps reached.\n");
413              errorCode=SOLVER_MAXITER_REACHED;                      errorCode=SOLVER_MAXITER_REACHED;
414          }                  }
           
415      }      }
416      return errorCode;      return errorCode;
417  }  }
418    
419    
420  /*    /*
421   *  AntiDiffusionFlux:   *  AntiDiffusionFlux:
422   *   *
423   *        f_{ij} = (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])   *        f_{ij} = (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])
424   *           *
425   *     m=fc->mass matrix   *     m=fc->mass matrix
426   *     d=artificial diffusion matrix = L - K = - fc->iteration matrix - fc->transport matrix (away from main diagonal)   *     d=artificial diffusion matrix = L - K = - fc->iteration matrix - fc->transport matrix (away from main diagonal)
427   *   *
# Line 437  err_t Paso_FCT_Solver_updateNL(Paso_FCT_ Line 430  err_t Paso_FCT_Solver_updateNL(Paso_FCT_
430   */   */
431    
432  void Paso_FCT_setAntiDiffusionFlux_CN(paso::SystemMatrix_ptr flux_matrix,  void Paso_FCT_setAntiDiffusionFlux_CN(paso::SystemMatrix_ptr flux_matrix,
433                                        const Paso_TransportProblem* fct,                                        const Paso_TransportProblem* fct,
434                                        const double dt,                                        const double dt,
435                                        paso::const_Coupler_ptr u_coupler,                                          paso::const_Coupler_ptr u_coupler,
436                                        paso::const_Coupler_ptr u_old_coupler)                                        paso::const_Coupler_ptr u_old_coupler)
437  {  {
438    dim_t i;    dim_t i;
439    index_t iptr_ij;    index_t iptr_ij;
440      
441    const double *u    = u_coupler->borrowLocalData();    const double *u    = u_coupler->borrowLocalData();
442    const double *u_old= u_old_coupler->borrowLocalData();    const double *u_old= u_old_coupler->borrowLocalData();
443    const double *remote_u=u_coupler->borrowRemoteData();    const double *remote_u=u_coupler->borrowRemoteData();
# Line 460  void Paso_FCT_setAntiDiffusionFlux_CN(pa Line 453  void Paso_FCT_setAntiDiffusionFlux_CN(pa
453    
454        #pragma ivdep        #pragma ivdep
455        for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {        for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
456      const index_t j      = pattern->mainPattern->index[iptr_ij];          const index_t j      = pattern->mainPattern->index[iptr_ij];
457      const double m_ij    = fct->mass_matrix->mainBlock->val[iptr_ij];          const double m_ij    = fct->mass_matrix->mainBlock->val[iptr_ij];
458      const double d_ij    = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */          const double d_ij    = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
459      const double u_old_j = u_old[j];          const double u_old_j = u_old[j];
460      const double u_j     = u[j];          const double u_j     = u[j];
461        
462      /* (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i]) */          /* (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i]) */
463      flux_matrix->mainBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i) - (m_ij-dt_half*d_ij)*(u_j-u_i);          flux_matrix->mainBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i) - (m_ij-dt_half*d_ij)*(u_j-u_i);
464    
465        }        }
466        #pragma ivdep        #pragma ivdep
467        for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {        for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
468      const index_t j      = pattern->col_couplePattern->index[iptr_ij];          const index_t j      = pattern->col_couplePattern->index[iptr_ij];
469      const double m_ij    = fct->mass_matrix->col_coupleBlock->val[iptr_ij];          const double m_ij    = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
470      const double d_ij    = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */          const double d_ij    = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
471          const double u_old_j = remote_u_old[j];          const double u_old_j = remote_u_old[j];
472      const double u_j     = remote_u[j];          const double u_j     = remote_u[j];
473      flux_matrix->col_coupleBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i)- (m_ij-dt_half*d_ij)*(u_j-u_i);          flux_matrix->col_coupleBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i)- (m_ij-dt_half*d_ij)*(u_j-u_i);
474        }        }
475    }    }
476  }  }
477    
478  void Paso_FCT_setAntiDiffusionFlux_BE(paso::SystemMatrix_ptr flux_matrix,  void Paso_FCT_setAntiDiffusionFlux_BE(paso::SystemMatrix_ptr flux_matrix,
479                                        const Paso_TransportProblem* fct,                                        const Paso_TransportProblem* fct,
480                                        const double dt,                                        const double dt,
481                                        paso::const_Coupler_ptr u_coupler,                                          paso::const_Coupler_ptr u_coupler,
482                                        paso::const_Coupler_ptr u_old_coupler)                                        paso::const_Coupler_ptr u_old_coupler)
483  {  {
484    dim_t i;    dim_t i;
485    index_t iptr_ij;    index_t iptr_ij;
486      
487    const double *u = u_coupler->borrowLocalData();    const double *u = u_coupler->borrowLocalData();
488    const double *u_old = u_old_coupler->borrowLocalData();    const double *u_old = u_old_coupler->borrowLocalData();
489    const double *remote_u = u_coupler->borrowRemoteData();    const double *remote_u = u_coupler->borrowRemoteData();
# Line 505  void Paso_FCT_setAntiDiffusionFlux_BE(pa Line 498  void Paso_FCT_setAntiDiffusionFlux_BE(pa
498        #pragma ivdep        #pragma ivdep
499        for (iptr_ij=pattern->mainPattern->ptr[i]; iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {        for (iptr_ij=pattern->mainPattern->ptr[i]; iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
500    
501      const index_t j      = pattern->mainPattern->index[iptr_ij];          const index_t j      = pattern->mainPattern->index[iptr_ij];
502      const double m_ij    = fct->mass_matrix->mainBlock->val[iptr_ij];          const double m_ij    = fct->mass_matrix->mainBlock->val[iptr_ij];
503      const double d_ij    = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */          const double d_ij    = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
504      const double u_old_j = u_old[j];          const double u_old_j = u_old[j];
505      const double u_j     = u[j];          const double u_j     = u[j];
506        
507      flux_matrix->mainBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);          flux_matrix->mainBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
508        }        }
509        #pragma ivdep        #pragma ivdep
510        for (iptr_ij=pattern->col_couplePattern->ptr[i]; iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {        for (iptr_ij=pattern->col_couplePattern->ptr[i]; iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
511      const index_t j      = pattern->col_couplePattern->index[iptr_ij];          const index_t j      = pattern->col_couplePattern->index[iptr_ij];
512      const double m_ij    = fct->mass_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */          const double m_ij    = fct->mass_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
513      const double d_ij    = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];          const double d_ij    = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
514          const double u_old_j = remote_u_old[j];          const double u_old_j = remote_u_old[j];
515      const double u_j     = remote_u[j];          const double u_j     = remote_u[j];
516        
517      flux_matrix->col_coupleBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);          flux_matrix->col_coupleBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
518        }        }
519    }    }
520  }  }
521    
522  /* special version of the ant-diffusive fluxes for the linear Crank-Nicolson scheme  /* special version of the ant-diffusive fluxes for the linear Crank-Nicolson scheme
523   * in fact this is evaluated for u = 2*u_tilde - u_old which is the predictor   * in fact this is evaluated for u = 2*u_tilde - u_old which is the predictor
524   * of the solution of the the stabilised problem at time dt using the forward Euler scheme   * of the solution of the the stabilised problem at time dt using the forward Euler scheme
525   *   *
526   *   f_{ij} = (m_{ij} - dt/2 d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) (u[j]-u[i])   *   f_{ij} = (m_{ij} - dt/2 d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) (u[j]-u[i])
527   *    =  (m_{ij} - dt/2 d_{ij}) * (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) * ( 2*(u_tilde[j]-u_tilde[i]) - (u_old[j] -u_old [i]) )   *    =  (m_{ij} - dt/2 d_{ij}) * (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) * ( 2*(u_tilde[j]-u_tilde[i]) - (u_old[j] -u_old [i]) )
528   *    =  2*  m_{ij} * ( (u_old[j]-u_tilde[j] - (u_old[i]) - u_tilde[i]) ) - dt d_{ij} * (u_tilde[j]-u_tilde[i])   *    =  2*  m_{ij} * ( (u_old[j]-u_tilde[j] - (u_old[i]) - u_tilde[i]) ) - dt d_{ij} * (u_tilde[j]-u_tilde[i])
529   *   *
530   */   */
531      
532  void Paso_FCT_setAntiDiffusionFlux_linearCN(paso::SystemMatrix_ptr flux_matrix,  void Paso_FCT_setAntiDiffusionFlux_linearCN(paso::SystemMatrix_ptr flux_matrix,
533                          const Paso_TransportProblem* fct, const double dt,                          const Paso_TransportProblem* fct, const double dt,
534                          paso::const_Coupler_ptr u_tilde_coupler,                          paso::const_Coupler_ptr u_tilde_coupler,
# Line 543  void Paso_FCT_setAntiDiffusionFlux_linea Line 536  void Paso_FCT_setAntiDiffusionFlux_linea
536  {  {
537    dim_t i;    dim_t i;
538    index_t iptr_ij;    index_t iptr_ij;
539      
540    const double *u_tilde = u_tilde_coupler->borrowLocalData();    const double *u_tilde = u_tilde_coupler->borrowLocalData();
541    const double *u_old = u_old_coupler->borrowLocalData();    const double *u_old = u_old_coupler->borrowLocalData();
542    const double *remote_u_tilde = u_tilde_coupler->borrowRemoteData();    const double *remote_u_tilde = u_tilde_coupler->borrowRemoteData();
# Line 558  void Paso_FCT_setAntiDiffusionFlux_linea Line 551  void Paso_FCT_setAntiDiffusionFlux_linea
551        const double du_i      = u_tilde_i - u_old_i;        const double du_i      = u_tilde_i - u_old_i;
552        #pragma ivdep        #pragma ivdep
553        for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {        for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
554        
555        const index_t j      = pattern->mainPattern->index[iptr_ij];            const index_t j      = pattern->mainPattern->index[iptr_ij];
556        const double m_ij    = fct->mass_matrix->mainBlock->val[iptr_ij];            const double m_ij    = fct->mass_matrix->mainBlock->val[iptr_ij];
557        const double d_ij    = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */            const double d_ij    = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
558            const double u_tilde_j = u_tilde[j];            const double u_tilde_j = u_tilde[j];
559        const double u_old_j = u_old[j];            const double u_old_j = u_old[j];
560        const double du_j    = u_tilde_j - u_old_j;            const double du_j    = u_tilde_j - u_old_j;
561          
562        flux_matrix->mainBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij * ( u_tilde_i - u_tilde_j);            flux_matrix->mainBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij * ( u_tilde_i - u_tilde_j);
563        }        }
564        #pragma ivdep        #pragma ivdep
565        for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {        for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
566        
567      const index_t j      = pattern->col_couplePattern->index[iptr_ij];          const index_t j      = pattern->col_couplePattern->index[iptr_ij];
568      const double m_ij    = fct->mass_matrix->col_coupleBlock->val[iptr_ij];          const double m_ij    = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
569      const double d_ij    = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];/* this is in fact -d_ij */          const double d_ij    = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];/* this is in fact -d_ij */
570          const double u_tilde_j = remote_u_tilde[j];          const double u_tilde_j = remote_u_tilde[j];
571      const double u_old_j = remote_u_old[j];          const double u_old_j = remote_u_old[j];
572      const double du_j    = u_tilde_j - u_old_j;          const double du_j    = u_tilde_j - u_old_j;
573    
574            flux_matrix->col_coupleBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij *  ( u_tilde_i - u_tilde_j);
575    
     flux_matrix->col_coupleBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij *  ( u_tilde_i - u_tilde_j);  
       
576        }        }
577    }    }
578    
579  }  }
580    
581  /************************************************************************************/  /****************************************************************************/
582    
583  /* Creates the low order transport matrix and stores its negative values  /* Creates the low order transport matrix and stores its negative values
584   * into the iteration_matrix except for the main diagonal which is stored   * into the iteration_matrix except for the main diagonal which is stored
585   * separately.   * separately.
586   * If fc->iteration_matrix==NULL, fc->iteration_matrix is allocated   * If fc->iteration_matrix==NULL, fc->iteration_matrix is allocated
587   *   *
588   * a=transport_matrix   * a=transport_matrix
589   * b= low_order_transport_matrix = - iteration_matrix   * b= low_order_transport_matrix = - iteration_matrix
590   * c=main diagonal low_order_transport_matrix   * c=main diagonal low_order_transport_matrix
591   * initialise c[i] mit a[i,i]   * initialise c[i] mit a[i,i]
592   *   *
593   *    d_ij=max(0,-a[i,j],-a[j,i])     *    d_ij=max(0,-a[i,j],-a[j,i])
594   *    b[i,j]=-(a[i,j]+d_ij)           *    b[i,j]=-(a[i,j]+d_ij)
595   *    c[i]-=d_ij                     *    c[i]-=d_ij
596   */   */
597    
598  void Paso_FCT_setLowOrderOperator(Paso_TransportProblem * fc) {  void Paso_FCT_setLowOrderOperator(Paso_TransportProblem * fc)
599      {
600    dim_t i;    dim_t i;
601    index_t iptr_ij, iptr_ji;    index_t iptr_ij, iptr_ji;
602    const index_t*  main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fc);    const index_t*  main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fc);
# Line 621  void Paso_FCT_setLowOrderOperator(Paso_T Line 614  void Paso_FCT_setLowOrderOperator(Paso_T
614        #pragma omp parallel for private(i, iptr_ij, iptr_ji)  schedule(static)        #pragma omp parallel for private(i, iptr_ij, iptr_ji)  schedule(static)
615        for (i = 0; i < n; ++i) {        for (i = 0; i < n; ++i) {
616            double sum=fc->transport_matrix->mainBlock->val[main_iptr[i]];            double sum=fc->transport_matrix->mainBlock->val[main_iptr[i]];
617          
618  /* printf("sum[%d] = %e -> ", i, sum); */  /* printf("sum[%d] = %e -> ", i, sum); */
619            /* look at a[i,j] */            /* look at a[i,j] */
620            for (iptr_ij=pattern->mainPattern->ptr[i];iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {            for (iptr_ij=pattern->mainPattern->ptr[i];iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
621                const index_t j    = pattern->mainPattern->index[iptr_ij];                const index_t j    = pattern->mainPattern->index[iptr_ij];
622                const double rtmp1 = fc->transport_matrix->mainBlock->val[iptr_ij];                const double rtmp1 = fc->transport_matrix->mainBlock->val[iptr_ij];
623            if (j!=i) {                if (j!=i) {
624                   /* find entry a[j,i] */                   /* find entry a[j,i] */
625                   #pragma ivdep                   #pragma ivdep
626                   for (iptr_ji=pattern->mainPattern->ptr[j]; iptr_ji<pattern->mainPattern->ptr[j+1]; ++iptr_ji) {                   for (iptr_ji=pattern->mainPattern->ptr[j]; iptr_ji<pattern->mainPattern->ptr[j+1]; ++iptr_ji) {
627              
628                      if ( pattern->mainPattern->index[iptr_ji] == i) {                      if ( pattern->mainPattern->index[iptr_ji] == i) {
629                  const double rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji];                          const double rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji];
630  /*  /*
631  printf("a[%d,%d]=%e\n",i,j,rtmp1);  printf("a[%d,%d]=%e\n",i,j,rtmp1);
632  printf("a[%d,%d]=%e\n",j,i,rtmp2);  printf("a[%d,%d]=%e\n",j,i,rtmp2);
# Line 651  printf("a[%d,%d]=%e\n",j,i,rtmp2); Line 644  printf("a[%d,%d]=%e\n",j,i,rtmp2);
644            for (iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {            for (iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
645                const index_t    j = pattern->col_couplePattern->index[iptr_ij];                const index_t    j = pattern->col_couplePattern->index[iptr_ij];
646                const double  rtmp1 = fc->transport_matrix->col_coupleBlock->val[iptr_ij];                const double  rtmp1 = fc->transport_matrix->col_coupleBlock->val[iptr_ij];
647            /* find entry a[j,i] */                /* find entry a[j,i] */
648                #pragma ivdep                #pragma ivdep
649                for (iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_ji<pattern->row_couplePattern->ptr[j+1]; ++iptr_ji) {                for (iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_ji<pattern->row_couplePattern->ptr[j+1]; ++iptr_ji) {
650                      if (pattern->row_couplePattern->index[iptr_ji]==i) {                      if (pattern->row_couplePattern->index[iptr_ji]==i) {
# Line 666  printf("a[%d,%d]=%e\n",j,i,rtmp2); Line 659  printf("a[%d,%d]=%e\n",j,i,rtmp2);
659           }           }
660           /* set main diagonal entry */           /* set main diagonal entry */
661           fc->main_diagonal_low_order_transport_matrix[i]=sum;           fc->main_diagonal_low_order_transport_matrix[i]=sum;
662  /*   printf("%e \n", sum); */  /*       printf("%e \n", sum); */
663        }        }
664    
665    }    }
# Line 678  printf("a[%d,%d]=%e\n",j,i,rtmp2); Line 671  printf("a[%d,%d]=%e\n",j,i,rtmp2);
671   *   *
672   */   */
673  void Paso_FCT_Solver_setMuPaLu(double* out,  void Paso_FCT_Solver_setMuPaLu(double* out,
674                                 const double* M,                                 const double* M,
675                                 paso::const_Coupler_ptr u_coupler,                                 paso::const_Coupler_ptr u_coupler,
676                                 const double a,                                 const double a,
677                                 paso::const_SystemMatrix_ptr L)                                 paso::const_SystemMatrix_ptr L)
# Line 695  void Paso_FCT_Solver_setMuPaLu(double* o Line 688  void Paso_FCT_Solver_setMuPaLu(double* o
688        if ( M[i] > 0.) {        if ( M[i] > 0.) {
689            out[i]=M[i]*u[i];            out[i]=M[i]*u[i];
690        } else {        } else {
691        out[i]=u[i];            out[i]=u[i];
692        }        }
693    }    }
694    if (ABS(a)>0) {    if (ABS(a)>0) {
695        #pragma omp parallel for schedule(static) private(i, iptr_ij)        #pragma omp parallel for schedule(static) private(i, iptr_ij)
696        for (i = 0; i < n; ++i) {        for (i = 0; i < n; ++i) {
697      if ( M[i] > 0.) {          if ( M[i] > 0.) {
698            double sum=0;            double sum=0;
699            const double u_i=u[i];            const double u_i=u[i];
700            #pragma ivdep            #pragma ivdep
701        for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {            for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
702                 const index_t j=pattern->mainPattern->index[iptr_ij];                 const index_t j=pattern->mainPattern->index[iptr_ij];
703                 const double l_ij=L->mainBlock->val[iptr_ij];                 const double l_ij=L->mainBlock->val[iptr_ij];
704                 sum+=l_ij*(u[j]-u_i);                 sum+=l_ij*(u[j]-u_i);
             
705            }            }
706            #pragma ivdep            #pragma ivdep
707        for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {            for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
708                 const index_t j=pattern->col_couplePattern->index[iptr_ij];                 const index_t j=pattern->col_couplePattern->index[iptr_ij];
709                 const double l_ij=L->col_coupleBlock->val[iptr_ij];                 const double l_ij=L->col_coupleBlock->val[iptr_ij];
710                 sum+=l_ij*(remote_u[j]-u_i);                 sum+=l_ij*(remote_u[j]-u_i);
711            }            }
712            out[i]+=a*sum;            out[i]+=a*sum;
713      }          }
714        }        }
715    }    }
716  }  }
717    
 /* ************************************************************************************************************************************************* */  
   
   
   
   
   
   
   

Legend:
Removed from v.4836  
changed lines
  Added in v.4843

  ViewVC Help
Powered by ViewVC 1.1.26