/[escript]/trunk/paso/src/FCT_Solver.cpp
ViewVC logotype

Diff of /trunk/paso/src/FCT_Solver.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 4856 by caltinay, Wed Apr 9 06:46:55 2014 UTC revision 4867 by caltinay, Fri Apr 11 12:29:49 2014 UTC
# Line 34  Line 34 
34  #include "Preconditioner.h"  #include "Preconditioner.h"
35  #include "PasoUtil.h"  #include "PasoUtil.h"
36    
37    namespace paso {
38    
39  Paso_FCT_Solver* Paso_FCT_Solver_alloc(Paso_TransportProblem *fctp, paso::Options* options)  FCT_Solver::FCT_Solver(const_TransportProblem_ptr tp, Options* options) :
40  {      transportproblem(tp),
41      Paso_FCT_Solver* out=NULL;      omega(0),
42      const dim_t blockSize=Paso_TransportProblem_getBlockSize(fctp);      z(NULL),
43      const dim_t n =  Paso_TransportProblem_getTotalNumRows(fctp);      du(NULL)
44    {
45      out=new Paso_FCT_Solver;      const dim_t blockSize = tp->getBlockSize();
46      out->transportproblem  = Paso_TransportProblem_getReference(fctp);      const dim_t n = tp->getTotalNumRows();
47      out->mpi_info          = Esys_MPIInfo_getReference(fctp->mpi_info);      mpi_info = Esys_MPIInfo_getReference(tp->mpi_info);
48      out->flux_limiter      = Paso_FCT_FluxLimiter_alloc(fctp);      flux_limiter = new FCT_FluxLimiter(tp);
49      out->b                 = new double[n];      b = new double[n];
50      if ( (options->ode_solver == PASO_CRANK_NICOLSON) || (options->ode_solver == PASO_BACKWARD_EULER) ) {      if (options->ode_solver == PASO_CRANK_NICOLSON || options->ode_solver == PASO_BACKWARD_EULER) {
51          out->du = new double[n];          du = new double[n];
52          out->z = new double[n];          z = new double[n];
     } else {  
         out->du = NULL;  
         out->z=NULL;  
     }  
     out->u_coupler.reset(new paso::Coupler(Paso_TransportProblem_borrowConnector(fctp), blockSize));  
     out->u_old_coupler.reset(new paso::Coupler(Paso_TransportProblem_borrowConnector(fctp), blockSize));  
     out->omega=0;  
   
     if ( options->ode_solver == PASO_LINEAR_CRANK_NICOLSON ) {  
         out->method   = PASO_LINEAR_CRANK_NICOLSON;  
     } else if ( options->ode_solver == PASO_CRANK_NICOLSON ) {  
         out->method = PASO_CRANK_NICOLSON;  
     } else if ( options->ode_solver == PASO_BACKWARD_EULER ) {  
         out->method = PASO_BACKWARD_EULER;  
     } else {  
         Esys_setError(VALUE_ERROR, "Paso_FCT_Solver_alloc: unknown integration scheme.");  
         out->method = UNKNOWN;  
53      }      }
54        u_coupler.reset(new Coupler(tp->borrowConnector(), blockSize));
55        u_old_coupler.reset(new Coupler(tp->borrowConnector(), blockSize));
56    
57      if (Esys_noError()) {      if (options->ode_solver == PASO_LINEAR_CRANK_NICOLSON) {
58          return out;          method = PASO_LINEAR_CRANK_NICOLSON;
59        } else if (options->ode_solver == PASO_CRANK_NICOLSON) {
60            method = PASO_CRANK_NICOLSON;
61        } else if (options->ode_solver == PASO_BACKWARD_EULER) {
62            method = PASO_BACKWARD_EULER;
63      } else {      } else {
64          Paso_FCT_Solver_free(out);          Esys_setError(VALUE_ERROR, "FCT_Solver: unknown integration scheme.");
65          return NULL;          method = UNKNOWN;
     }  
 }  
   
 void Paso_FCT_Solver_free(Paso_FCT_Solver *in)  
 {  
     if (in != NULL) {  
         Paso_TransportProblem_free(in->transportproblem);  
         Paso_FCT_FluxLimiter_free(in->flux_limiter);  
         Esys_MPIInfo_free(in->mpi_info);  
         delete[] in->b;  
         delete[] in->z;  
         delete[] in->du;  
         delete in;  
66      }      }
67  }  }
68    
69  double Paso_FCT_Solver_getSafeTimeStepSize(Paso_TransportProblem* fctp)  FCT_Solver::~FCT_Solver()
70  {  {
71     dim_t i;      delete flux_limiter;
72     double dt_max_loc=LARGE_POSITIVE_FLOAT;      Esys_MPIInfo_free(mpi_info);
73     double dt_max=LARGE_POSITIVE_FLOAT;      delete[] b;
74     const dim_t n = fctp->transport_matrix->getTotalNumRows();      delete[] z;
75     /* set low order transport operator */      delete[] du;
76     Paso_FCT_setLowOrderOperator(fctp);  }
77    
78     if (Esys_noError()) {  // modifies the main diagonal of the iteration matrix to introduce new dt
79          /*  void FCT_Solver::initialize(double _dt, Options* options, Paso_Performance* pp)
80           *  calculate time step size:  {
81          */      const_TransportProblem_ptr fctp(transportproblem);
82          dt_max=LARGE_POSITIVE_FLOAT;      const index_t* main_iptr = fctp->borrowMainDiagonalPointer();
83          #pragma omp parallel private(i, dt_max_loc)      const dim_t n = fctp->transport_matrix->getTotalNumRows();
84          {      const double theta = getTheta();
85                 dt_max_loc=LARGE_POSITIVE_FLOAT;      omega = 1. / (_dt * theta);
86                 #pragma omp for schedule(static)      dim_t i;
87                 for (i=0;i<n;++i) {      Options options2;
88                    const double l_ii=fctp->main_diagonal_low_order_transport_matrix[i];  
89                    const double m_i=fctp->lumped_mass_matrix[i];      Paso_solve_free(fctp->iteration_matrix.get());
90                    if ( m_i > 0 ) {      //   fctp->iteration_matrix[i,i]=m[i]/(dt theta) -l[i,i]
91                        if (l_ii<0) dt_max_loc=MIN(dt_max_loc,m_i/(-l_ii));      dt = _dt;
                   }  
                }  
                #pragma omp critical  
                {  
                   dt_max=MIN(dt_max,dt_max_loc);  
                }  
         }  
         #ifdef ESYS_MPI  
         {  
                dt_max_loc=dt_max;  
                MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);  
         }  
         #endif  
         if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=2.;  
    }  
    return dt_max;  
 }  
   
 /* modifies the main diagonal of the iteration matrix to introduce new dt */  
 void Paso_FCT_Solver_initialize(const double dt, Paso_FCT_Solver *fct_solver, paso::Options* options, Paso_Performance* pp)  
 {  
    Paso_TransportProblem* fctp = fct_solver->transportproblem;  
    const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fctp);  
    const dim_t n = fctp->transport_matrix->getTotalNumRows();  
    const double theta = Paso_FCT_Solver_getTheta(fct_solver);  
    const double omega=1./(dt* theta);  
    dim_t i;  
    paso::Options options2;  
   
   
   
    Paso_solve_free(fctp->iteration_matrix.get());  
    /*  
     *   fctp->iteration_matrix[i,i]=m[i]/(dt theta) -l[i,i]  
     *  
    */  
     fct_solver->omega=omega;  
     fct_solver->dt = dt;  
92      #pragma omp parallel for private(i)      #pragma omp parallel for private(i)
93      for (i = 0; i < n; ++i) {      for (i = 0; i < n; ++i) {
94             const double m_i=fctp->lumped_mass_matrix[i];          const double m_i = fctp->lumped_mass_matrix[i];
95             const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];          const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
96             if ( m_i > 0 ) {          if ( m_i > 0 ) {
97                 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m_i * omega - l_ii;              fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m_i * omega - l_ii;
98             } else {          } else {
99                 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = ABS(m_i * omega - l_ii)/(EPSILON*EPSILON);              fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = ABS(m_i * omega - l_ii)/(EPSILON*EPSILON);
100             }          }
101      }      }
102    
103      /* allocate preconditioner/solver */      // allocate preconditioner/solver
104      options2.verbose = options->verbose;      options2.verbose = options->verbose;
105      if (fct_solver->method == PASO_LINEAR_CRANK_NICOLSON) {      if (method == PASO_LINEAR_CRANK_NICOLSON) {
106          options2.preconditioner = PASO_GS;          options2.preconditioner = PASO_GS;
107      } else  {      } else {
108          options2.preconditioner = PASO_JACOBI;          options2.preconditioner = PASO_JACOBI;
109  /* options2.preconditioner = PASO_GS; */          //options2.preconditioner = PASO_GS;
110      }      }
111      options2.use_local_preconditioner = FALSE;      options2.use_local_preconditioner = false;
112      options2.sweeps=-1;      options2.sweeps = -1;
113    
114      Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);      Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
115      fctp->iteration_matrix->setPreconditioner(&options2);      fctp->iteration_matrix->setPreconditioner(&options2);
116      Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);      Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
117  }  }
118    
119  /* entry point for update procedures */  // entry point for update procedures
120  err_t Paso_FCT_Solver_update(Paso_FCT_Solver *fct_solver, double* u, double *u_old,  paso::Options* options, Paso_Performance *pp)  err_t FCT_Solver::update(double* u, double* u_old, Options* options, Paso_Performance* pp)
121  {  {
     const index_t method=fct_solver->method;  
122      err_t err_out = SOLVER_NO_ERROR;      err_t err_out = SOLVER_NO_ERROR;
123    
124      if (method == PASO_LINEAR_CRANK_NICOLSON) {      if (method == PASO_LINEAR_CRANK_NICOLSON) {
125          err_out=Paso_FCT_Solver_update_LCN(fct_solver, u, u_old, options, pp);          err_out = updateLCN(u, u_old, options, pp);
126      } else if (method == PASO_CRANK_NICOLSON) {      } else if (method == PASO_CRANK_NICOLSON) {
127          err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);          err_out = updateNL(u, u_old, options, pp);
128      } else if (method == PASO_BACKWARD_EULER) {      } else if (method == PASO_BACKWARD_EULER) {
129          err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);          err_out = updateNL(u, u_old, options, pp);
130      } else {      } else {
131          err_out = SOLVER_INPUT_ERROR;          err_out = SOLVER_INPUT_ERROR;
132      }      }
133      return err_out;      return err_out;
134  }  }
135    
136  /* linear crank-nicolson update */  /// linear crank-nicolson update
137  err_t Paso_FCT_Solver_update_LCN(Paso_FCT_Solver *fct_solver, double * u, double *u_old, paso::Options* options, Paso_Performance *pp)  err_t FCT_Solver::updateLCN(double* u, double* u_old, Options* options, Paso_Performance *pp)
138  {  {
     double const dt = fct_solver->dt;  
139      dim_t sweep_max, i;      dim_t sweep_max, i;
     double *b = fct_solver->b;  
140      double const RTOL = options->tolerance;      double const RTOL = options->tolerance;
141      const dim_t n=Paso_TransportProblem_getTotalNumRows(fct_solver->transportproblem);      const dim_t n = transportproblem->getTotalNumRows();
142      paso::SystemMatrix_ptr iteration_matrix(fct_solver->transportproblem->iteration_matrix);      SystemMatrix_ptr iteration_matrix(transportproblem->iteration_matrix);
143      const index_t*  main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fct_solver->transportproblem);      const index_t* main_iptr = transportproblem->borrowMainDiagonalPointer();
144      err_t errorCode = SOLVER_NO_ERROR;      err_t errorCode = SOLVER_NO_ERROR;
145      double norm_u_tilde;      double norm_u_tilde;
146    
147      fct_solver->u_old_coupler->startCollect(u_old);      u_old_coupler->startCollect(u_old);
148      fct_solver->u_old_coupler->finishCollect();      u_old_coupler->finishCollect();
149    
150      /* b[i]=m*u_tilde[i] = m u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i])      // b[i]=m*u_tilde[i] = m u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i])
151             = u_tilde[i]   = u_old[i] where constraint m<0.      //     = u_tilde[i]  = u_old[i] where constraint m<0.
152          * note that iteration_matrix stores the negative values of the      //  note that iteration_matrix stores the negative values of the
153          * low order transport matrix l. Therefore a=-dt*0.5 is used. */      //  low order transport matrix l. Therefore a=-dt*0.5 is used.
154    
155      Paso_FCT_Solver_setMuPaLu(b, fct_solver->transportproblem->lumped_mass_matrix,      setMuPaLu(b, u_old_coupler, -dt*0.5);
                              fct_solver->u_old_coupler, -dt*0.5, iteration_matrix);  
156      /* solve for u_tilde : u_tilda = m^{-1} * b   */      /* solve for u_tilde : u_tilda = m^{-1} * b   */
157      Paso_FCT_FluxLimiter_setU_tilda(fct_solver->flux_limiter, b);      flux_limiter->setU_tilde(b);
158      /* u_tilda_connector is completed */      // u_tilde_connector is completed
159    
160        // calculate anti-diffusive fluxes for u_tilde
161        setAntiDiffusionFlux_linearCN(flux_limiter->antidiffusive_fluxes);
162    
163      /* calculate anti-diffusive fluxes for u_tilda */      /* b_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
164      Paso_FCT_setAntiDiffusionFlux_linearCN(fct_solver->flux_limiter->antidiffusive_fluxes,      flux_limiter->addLimitedFluxes_Start();
165                                             fct_solver->transportproblem, dt,      flux_limiter->addLimitedFluxes_Complete(b);
                                            fct_solver->flux_limiter->u_tilde_coupler,  
                                            fct_solver->u_old_coupler);  
   
   
    /* b_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */  
    Paso_FCT_FluxLimiter_addLimitedFluxes_Start(fct_solver->flux_limiter);  
    Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(fct_solver->flux_limiter, b);  
166    
167     paso::util::scale(n, b,fct_solver->omega );      util::scale(n, b, omega);
168     /* solve (m-dt/2*L) u = b in the form (omega*m-L) u = b * omega with omega*dt/2=1 */      // solve (m-dt/2*L) u = b in the form (omega*m-L) u = b * omega with omega*dt/2=1
169     #pragma omp for private(i) schedule(static)     #pragma omp for private(i) schedule(static)
170     for (i = 0; i < n; ++i) {      for (i = 0; i < n; ++i) {
171         if (! (fct_solver->transportproblem->lumped_mass_matrix[i] > 0 ) ) {         if (!(transportproblem->lumped_mass_matrix[i] > 0)) {
172           b[i] = fct_solver->flux_limiter->u_tilde[i]           b[i] = flux_limiter->u_tilde[i]
173               * fct_solver->transportproblem->iteration_matrix->mainBlock->val[main_iptr[i]];               * transportproblem->iteration_matrix->mainBlock->val[main_iptr[i]];
174         }         }
175     }      }
176     /* initial guess is u<- -u + 2*u_tilde */      // initial guess is u<- -u + 2*u_tilde
177     paso::util::update(n, -1., u, 2., fct_solver->flux_limiter->u_tilde);      util::update(n, -1., u, 2., flux_limiter->u_tilde);
   
    sweep_max = MAX((int) (- 2 * log(RTOL)/log(2.)-0.5),1);  
    norm_u_tilde=paso::util::lsup(n, fct_solver->flux_limiter->u_tilde, fct_solver->flux_limiter->mpi_info);  
    if (options->verbose) {  
        printf("Paso_FCT_Solver_update_LCN: u_tilda lsup = %e (rtol = %e, max. sweeps = %d)\n",norm_u_tilde,RTOL * norm_u_tilde ,sweep_max);  
    }  
    errorCode = paso::Preconditioner_Smoother_solve_byTolerance( iteration_matrix,  ((paso::Preconditioner*) (iteration_matrix->solver_p))->gs,  
                                                    u, b, RTOL, &sweep_max, TRUE);  
    if (errorCode == PRECONDITIONER_NO_ERROR) {  
       if (options->verbose) printf("Paso_FCT_Solver_update_LCN: convergence after %d Gauss-Seidel steps.\n",sweep_max);  
       errorCode=SOLVER_NO_ERROR;  
    } else {  
       if (options->verbose) printf("Paso_FCT_Solver_update_LCN: Gauss-Seidel failed within %d stesp (rel. tolerance %e).\n",sweep_max,RTOL);  
       errorCode= SOLVER_MAXITER_REACHED;  
    }  
    return errorCode;  
178    
179        sweep_max = MAX((int) (- 2 * log(RTOL)/log(2.)-0.5),1);
180        norm_u_tilde = util::lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
181        if (options->verbose) {
182            printf("FCT_Solver::updateLCN: u_tilde lsup = %e (rtol = %e, max. sweeps = %d)\n", norm_u_tilde, RTOL*norm_u_tilde, sweep_max);
183        }
184        errorCode = Preconditioner_Smoother_solve_byTolerance(iteration_matrix,
185                ((Preconditioner*)(iteration_matrix->solver_p))->gs, u, b, RTOL,
186                &sweep_max, true);
187        if (errorCode == PRECONDITIONER_NO_ERROR) {
188            if (options->verbose) printf("FCT_Solver::updateLCN: convergence after %d Gauss-Seidel steps.\n",sweep_max);
189            errorCode = SOLVER_NO_ERROR;
190        } else {
191            if (options->verbose) printf("FCT_Solver::updateLCN: Gauss-Seidel failed within %d stesp (rel. tolerance %e).\n",sweep_max,RTOL);
192            errorCode = SOLVER_MAXITER_REACHED;
193        }
194        return errorCode;
195  }  }
196    
197  err_t Paso_FCT_Solver_updateNL(Paso_FCT_Solver *fct_solver, double* u, double *u_old, paso::Options* options, Paso_Performance *pp)  err_t FCT_Solver::updateNL(double* u, double *u_old, Options* options,
198  {                             Paso_Performance *pp)
199     const dim_t num_critical_rates_max=3; /* number of rates >=critical_rate accepted before divergence is triggered */  {
200     const double critical_rate=0.95;   /* expected value of convergence rate */      // number of rates >=critical_rate accepted before divergence is triggered
201        const dim_t num_critical_rates_max = 3;
202     double *b = fct_solver->b;      // expected value of convergence rate
203     double *z = fct_solver->z;      const double critical_rate = 0.95;
204     double *du = fct_solver->du;  
205     double const dt = fct_solver->dt;      const_TransportProblem_ptr fctp(transportproblem);
206     Paso_TransportProblem* fctp = fct_solver->transportproblem;      dim_t i;
207     Paso_FCT_FluxLimiter* flux_limiter = fct_solver->flux_limiter;      double norm_u_tilde, norm_du=LARGE_POSITIVE_FLOAT, norm_du_old, rate=1.;
208     dim_t i;      const dim_t n = fctp->transport_matrix->getTotalNumRows();
209     double norm_u_tilde, ATOL, norm_du=LARGE_POSITIVE_FLOAT, norm_du_old, rate=1.;      const double atol = options->absolute_tolerance;
210     err_t errorCode=SOLVER_NO_ERROR;      const double rtol = options->tolerance;
211     const dim_t n = fctp->transport_matrix->getTotalNumRows();      const dim_t max_m = options->iter_max;
212     const double atol=options->absolute_tolerance;      dim_t m = 0, num_critical_rates = 0;
213     const double rtol=options->tolerance;      err_t errorCode = SOLVER_NO_ERROR;
214     const dim_t max_m=options->iter_max;      bool converged=false, max_m_reached=false, diverged=false;
215     dim_t m=0, num_critical_rates=0 ;      /* //////////////////////////////////////////////////////////////////// */
216    /* ////////////////////////////////////////////////////////////////////// */  
217        options->num_iter=0;
218     bool converged=FALSE, max_m_reached=FALSE,diverged=FALSE;      u_old_coupler->startCollect(u_old);
219     options->num_iter=0;      u_old_coupler->finishCollect();
220        // prepare u_tilde and flux limiter
221     fct_solver->u_old_coupler->startCollect(u_old);      if (method == PASO_BACKWARD_EULER) {
222     fct_solver->u_old_coupler->finishCollect();          // b[i]=m_i* u_old[i]
223      /* prepare u_tilda and flux limiter */          #pragma omp for private(i) schedule(static)
224      if ( fct_solver->method == PASO_BACKWARD_EULER ) {          for (i = 0; i < n; ++i) {
225            /* b[i]=m_i* u_old[i] */              if (fctp->lumped_mass_matrix[i] > 0 ) {
226            #pragma omp for private(i) schedule(static)                  b[i]=u_old[i]* fctp->lumped_mass_matrix[i];
227            for (i = 0; i < n; ++i) {              } else {
228                 if (fctp->lumped_mass_matrix[i] > 0 ) {                  b[i]=u_old[i];
229                     b[i]=u_old[i]* fctp->lumped_mass_matrix[i];              }
230                 } else {          }
                    b[i]=u_old[i];  
                }  
           }  
231      } else {      } else {
232         /* b[i]=m_i* u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i]) = m_i * u_tilde_i where m_i>0         /* b[i]=m_i* u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i]) = m_i * u_tilde_i where m_i>0
233          *     = u_old[i]  otherwise          *     = u_old[i]  otherwise
234          * note that iteration_matrix stores the negative values of the          * note that iteration_matrix stores the negative values of the
235          * low order transport matrix l. Therefore a=-dt*0.5 is used. */          * low order transport matrix l. Therefore a=-dt*0.5 is used. */
236         Paso_FCT_Solver_setMuPaLu(b,fctp->lumped_mass_matrix,fct_solver->u_old_coupler,-dt*0.5,fctp->iteration_matrix);          setMuPaLu(b, u_old_coupler, -dt*0.5);
237      }      }
238      Paso_FCT_FluxLimiter_setU_tilda(flux_limiter, b); /* u_tilda = m^{-1} b */      flux_limiter->setU_tilde(b); // u_tilde = m^{-1} b */
239      /* u_tilda_connector is completed */      // u_tilde_connector is completed
240      /************************************************************************/      /************************************************************************/
241      /* calculate stopping criterion */      // calculate stopping criterion
242      norm_u_tilde=paso::util::lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);      norm_u_tilde=util::lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
243      ATOL= rtol * norm_u_tilde + atol ;      const double ATOL = rtol * norm_u_tilde + atol;
244      if (options->verbose) printf("Paso_FCT_Solver_updateNL: iteration starts u_tilda lsup = %e (abs. tol = %e)\n",norm_u_tilde,ATOL);      if (options->verbose)
245      /* ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// */          printf("FCT_Solver::updateNL: iteration starts u_tilde lsup = %e "
246                   "(abs. tol = %e)\n", norm_u_tilde, ATOL);
247      /* u_old is an initial guess for u*/  
248      paso::util::copy(n,u,u_old);      // u_old is an initial guess for u
249        util::copy(n, u, u_old);
250      while ( (!converged) && (!diverged) && (! max_m_reached) && Esys_noError()) {  
251          fct_solver->u_coupler->startCollect(u);      while (!converged && !diverged && !max_m_reached && Esys_noError()) {
252          fct_solver->u_coupler->finishCollect();          u_coupler->startCollect(u);
253            u_coupler->finishCollect();
254           /*  set antidiffusive_flux_{ij} for u */  
255           if (fct_solver->method == PASO_BACKWARD_EULER) {          // set antidiffusive_flux_{ij} for u
256               Paso_FCT_setAntiDiffusionFlux_BE(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);          if (method == PASO_BACKWARD_EULER) {
257                 setAntiDiffusionFlux_BE(flux_limiter->antidiffusive_fluxes);
258           } else {           } else {
259               Paso_FCT_setAntiDiffusionFlux_CN(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);               setAntiDiffusionFlux_CN(flux_limiter->antidiffusive_fluxes);
260           }           }
261           /* start the calculation of the limitation factors_{fct_solver->ij} */           // start the calculation of the limitation factors_{fct_solver->ij}
262           Paso_FCT_FluxLimiter_addLimitedFluxes_Start(flux_limiter); /* uses u_tilde */           flux_limiter->addLimitedFluxes_Start(); // uses u_tilde
263    
264           /*           /*
265            * z_m[i]=b[i] - (m_i*u[i] - omega*sum_{j<>i} l_{ij} (u[j]-u[i]) ) where m_i>0            * z_m[i]=b[i] - (m_i*u[i] - omega*sum_{j<>i} l_{ij} (u[j]-u[i]) ) where m_i>0
# Line 341  err_t Paso_FCT_Solver_updateNL(Paso_FCT_ Line 270  err_t Paso_FCT_Solver_updateNL(Paso_FCT_
270            * note that iteration_matrix stores the negative values of the            * note that iteration_matrix stores the negative values of the
271            * low order transport matrix l. Therefore a=dt*theta is used.            * low order transport matrix l. Therefore a=dt*theta is used.
272            */            */
273            if (fct_solver-> method == PASO_BACKWARD_EULER) {            if (method == PASO_BACKWARD_EULER) {
274                Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt, fctp->iteration_matrix);                setMuPaLu(z, u_coupler, dt);
275            } else {            } else {
276                Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt/2, fctp->iteration_matrix);                setMuPaLu(z, u_coupler, dt/2);
277            }            }
278    
279            paso::util::update(n,-1.,z,1.,b);  /* z=b-z */          util::update(n, -1., z, 1., b);  // z=b-z
280    
281            /* z_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */          // z_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij}
282            Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(flux_limiter, z);          flux_limiter->addLimitedFluxes_Complete(z);
283    
284            /* we solve (m/omega - L ) * du = z */          // we solve (m/omega - L ) * du = z
285            if (fct_solver->method == PASO_BACKWARD_EULER) {          if (method == PASO_BACKWARD_EULER) {
286                dim_t cntIter = options->iter_max;              dim_t cntIter = options->iter_max;
287                double tol= paso::util::l2(n, z, fctp->mpi_info) ;              double tol= util::l2(n, z, fctp->mpi_info) ;
288    
289                if ( m ==0) {              if (m==0) {
290                    tol *=0.5;                  tol *= 0.5;
291                } else {              } else {
292                    tol *= MIN(MAX(rate*rate, 1e-2), 0.5);                  tol *= MIN(MAX(rate*rate, 1e-2), 0.5);
293                }              }
294                /* use BiCGSTab with jacobi preconditioner ( m - omega * L ) */              // use BiCGStab with Jacobi preconditioner ( m - omega * L )
295                paso::util::zeroes(n,du);              util::zeroes(n,du);
296                errorCode = paso::Solver_BiCGStab(fctp->iteration_matrix, z, du, &cntIter, &tol, pp);              errorCode = Solver_BiCGStab(fctp->iteration_matrix, z, du, &cntIter, &tol, pp);
297    
298                /* errorCode =  Paso_Solver_GMRES(fctp->iteration_matrix, z, du, &cntIter, &tol, 10, 2000, pp); */              // errorCode = Solver_GMRES(fctp->iteration_matrix, z, du, &cntIter, &tol, 10, 2000, pp);
299                if (options->verbose) printf("Paso_FCT_Solver_updateNL: BiCGStab is completed after %d steps (residual =%e).\n",cntIter, tol);              if (options->verbose)
300                options->num_iter+=cntIter;                  printf("FCT_Solver::updateNL: BiCGStab completed after %d steps (residual =%e).\n",cntIter, tol);
301                if ( errorCode !=  SOLVER_NO_ERROR) break;              options->num_iter += cntIter;
302             } else {              if (errorCode != SOLVER_NO_ERROR) break;
303                /* just use the main diagonal of (m/omega - L ) */          } else {
304                // just use the main diagonal of (m/omega - L )
305                 paso::Preconditioner_Smoother_solve(fctp->iteration_matrix, ((paso::Preconditioner*) (fctp->iteration_matrix->solver_p))->jacobi,  
306                                            du, z, 1, FALSE);              Preconditioner_Smoother_solve(fctp->iteration_matrix,
307                    ((Preconditioner*) (fctp->iteration_matrix->solver_p))->jacobi,
308                options->num_iter++;                  du, z, 1, false);
309             }  
310                options->num_iter++;
311            paso::util::update(n,1.,u,fct_solver->omega,du);          }
312             norm_du_old=norm_du;  
313             norm_du=paso::util::lsup(n,du, fctp->mpi_info);          util::update(n, 1., u, omega, du);
314             if (m ==0) {          norm_du_old = norm_du;
315                  if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e\n",m+1, norm_du * fct_solver->omega);          norm_du = util::lsup(n, du, fctp->mpi_info);
316             } else {          if (m == 0) {
317                  if (norm_du_old > 0.) {              if (options->verbose)
318                      rate=norm_du/norm_du_old;                  printf("FCT_Solver::updateNL: step %d: increment= %e\n", m+1,
319                  } else if (norm_du <= 0.) {                          norm_du * omega);
320                       rate=0.;          } else {
321                  } else {              if (norm_du_old > 0.) {
322                       rate=LARGE_POSITIVE_FLOAT;                  rate = norm_du/norm_du_old;
323                  }              } else if (norm_du <= 0.) {
324                  if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e (rate = %e)\n",m+1, norm_du * fct_solver->omega, rate);                  rate = 0.;
325                  num_critical_rates+=( rate<critical_rate ? 0 : 1);              } else {
326                  max_m_reached=(m>max_m);                  rate = LARGE_POSITIVE_FLOAT;
327                  diverged = (num_critical_rates >= num_critical_rates_max);              }
328                  converged=(norm_du * fct_solver->omega  <= ATOL);              if (options->verbose)
329             }                  printf("FCT_Solver::updateNL: step %d: increment= %e (rate = %e)\n",m+1, norm_du * omega, rate);
330             m++;              num_critical_rates += (rate<critical_rate ? 0 : 1);
331        } /* end of while loop */              max_m_reached = (m>max_m);
332        if (errorCode == SOLVER_NO_ERROR) {              diverged = (num_critical_rates >= num_critical_rates_max);
333                  if (converged) {              converged = (norm_du * omega <= ATOL);
334                      if (options->verbose) printf("Paso_FCT_Solver_updateNL: iteration is completed.\n");          }
335                      errorCode=SOLVER_NO_ERROR;          m++;
336                  } else if (diverged) {      } // end of while loop
337                      if (options->verbose) printf("Paso_FCT_Solver_updateNL: divergence.\n");      if (errorCode == SOLVER_NO_ERROR) {
338                      errorCode=SOLVER_DIVERGENCE;          if (converged) {
339                  } else if (max_m_reached) {              if (options->verbose)
340                       if (options->verbose) printf("Paso_FCT_Solver_updateNL: maximum number of iteration steps reached.\n");                  printf("FCT_Solver::updateNL: iteration is completed.\n");
341                      errorCode=SOLVER_MAXITER_REACHED;              errorCode = SOLVER_NO_ERROR;
342                  }          } else if (diverged) {
343                if (options->verbose)
344                    printf("FCT_Solver::updateNL: divergence.\n");
345                errorCode = SOLVER_DIVERGENCE;
346            } else if (max_m_reached) {
347                if (options->verbose)
348                    printf("FCT_Solver::updateNL: maximum number of iteration steps reached.\n");
349                errorCode = SOLVER_MAXITER_REACHED;
350            }
351      }      }
352      return errorCode;      return errorCode;
353  }  }
# Line 424  err_t Paso_FCT_Solver_updateNL(Paso_FCT_ Line 361  err_t Paso_FCT_Solver_updateNL(Paso_FCT_
361   *     m=fc->mass matrix   *     m=fc->mass matrix
362   *     d=artificial diffusion matrix = L - K = - fc->iteration matrix - fc->transport matrix (away from main diagonal)   *     d=artificial diffusion matrix = L - K = - fc->iteration matrix - fc->transport matrix (away from main diagonal)
363   *   *
364   *   for CN : theta =0.5   *   for CN : theta = 0.5
365   *   for BE : theta = 1.   *   for BE : theta = 1.
366   */   */
367    
368  void Paso_FCT_setAntiDiffusionFlux_CN(paso::SystemMatrix_ptr flux_matrix,  void FCT_Solver::setAntiDiffusionFlux_CN(SystemMatrix_ptr flux_matrix)
369                                        const Paso_TransportProblem* fct,  {
370                                        const double dt,      const double* u = u_coupler->borrowLocalData();
371                                        paso::const_Coupler_ptr u_coupler,      const double* u_old = u_old_coupler->borrowLocalData();
372                                        paso::const_Coupler_ptr u_old_coupler)      const double* remote_u = u_coupler->borrowRemoteData();
373  {      const double* remote_u_old = u_old_coupler->borrowRemoteData();
374    dim_t i;      const double dt_half = dt/2;
375    index_t iptr_ij;      const_TransportProblem_ptr fct(transportproblem);
376        const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
377    const double *u    = u_coupler->borrowLocalData();      const dim_t n = fct->iteration_matrix->getTotalNumRows();
378    const double *u_old= u_old_coupler->borrowLocalData();  
379    const double *remote_u=u_coupler->borrowRemoteData();  #pragma omp parallel for
380    const double *remote_u_old=u_old_coupler->borrowRemoteData();      for (dim_t i = 0; i < n; ++i) {
381    const double dt_half= dt/2;          const double u_i = u[i];
382    paso::const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);          const double u_old_i = u_old[i];
383    const dim_t n = fct->iteration_matrix->getTotalNumRows();  
384            #pragma ivdep
385    #pragma omp parallel for schedule(static) private(i, iptr_ij)          for (index_t iptr_ij = pattern->mainPattern->ptr[i];
386    for (i = 0; i < n; ++i) {                       iptr_ij < pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
387        const double u_i     = u[i];              const index_t j   = pattern->mainPattern->index[iptr_ij];
388        const double u_old_i = u_old[i];              const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
389                // this is in fact -d_ij
390        #pragma ivdep              const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+
391        for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {                  fct->iteration_matrix->mainBlock->val[iptr_ij];
392          const index_t j      = pattern->mainPattern->index[iptr_ij];              const double u_old_j = u_old[j];
393          const double m_ij    = fct->mass_matrix->mainBlock->val[iptr_ij];              const double u_j = u[j];
394          const double d_ij    = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */  
395          const double u_old_j = u_old[j];              // (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])
396          const double u_j     = u[j];              flux_matrix->mainBlock->val[iptr_ij] =
397                    (m_ij+dt_half*d_ij)*(u_old_j-u_old_i) -
398          /* (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i]) */                          (m_ij-dt_half*d_ij)*(u_j-u_i);
399          flux_matrix->mainBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i) - (m_ij-dt_half*d_ij)*(u_j-u_i);  
400            }
401        }          #pragma ivdep
402        #pragma ivdep          for (index_t iptr_ij = pattern->col_couplePattern->ptr[i];
403        for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {                     iptr_ij < pattern->col_couplePattern->ptr[i+1]; iptr_ij++) {
404          const index_t j      = pattern->col_couplePattern->index[iptr_ij];              const index_t j = pattern->col_couplePattern->index[iptr_ij];
405          const double m_ij    = fct->mass_matrix->col_coupleBlock->val[iptr_ij];              const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
406          const double d_ij    = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */              // this is in fact -d_ij
407          const double u_old_j = remote_u_old[j];              const double d_ij =
408          const double u_j     = remote_u[j];                  fct->transport_matrix->col_coupleBlock->val[iptr_ij] +
409          flux_matrix->col_coupleBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i)- (m_ij-dt_half*d_ij)*(u_j-u_i);                  fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
410        }              const double u_old_j = remote_u_old[j];
411    }              const double u_j = remote_u[j];
412  }              flux_matrix->col_coupleBlock->val[iptr_ij] =
413                    (m_ij+dt_half*d_ij)*(u_old_j-u_old_i) -
414  void Paso_FCT_setAntiDiffusionFlux_BE(paso::SystemMatrix_ptr flux_matrix,                          (m_ij-dt_half*d_ij)*(u_j-u_i);
415                                        const Paso_TransportProblem* fct,          }
416                                        const double dt,      }
417                                        paso::const_Coupler_ptr u_coupler,  }
418                                        paso::const_Coupler_ptr u_old_coupler)  
419  {  void FCT_Solver::setAntiDiffusionFlux_BE(SystemMatrix_ptr flux_matrix)
420    dim_t i;  {
421    index_t iptr_ij;      const double* u = u_coupler->borrowLocalData();
422        const double* u_old = u_old_coupler->borrowLocalData();
423    const double *u = u_coupler->borrowLocalData();      const double* remote_u = u_coupler->borrowRemoteData();
424    const double *u_old = u_old_coupler->borrowLocalData();      const double* remote_u_old = u_old_coupler->borrowRemoteData();
425    const double *remote_u = u_coupler->borrowRemoteData();      const_TransportProblem_ptr fct(transportproblem);
426    const double *remote_u_old = u_old_coupler->borrowRemoteData();      const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
427    paso::const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);      const dim_t n = fct->iteration_matrix->getTotalNumRows();
428    const dim_t n = fct->iteration_matrix->getTotalNumRows();  
429    #pragma omp parallel for
430    #pragma omp parallel for schedule(static) private(i, iptr_ij)      for (dim_t i = 0; i < n; ++i) {
431    for (i = 0; i < n; ++i) {          const double u_i = u[i];
432        const double u_i     = u[i];          const double u_old_i = u_old[i];
433        const double u_old_i = u_old[i];          #pragma ivdep
434        #pragma ivdep          for (index_t iptr_ij = pattern->mainPattern->ptr[i];
435        for (iptr_ij=pattern->mainPattern->ptr[i]; iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {                       iptr_ij < pattern->mainPattern->ptr[i+1]; iptr_ij++) {
436                const index_t j = pattern->mainPattern->index[iptr_ij];
437          const index_t j      = pattern->mainPattern->index[iptr_ij];              const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
438          const double m_ij    = fct->mass_matrix->mainBlock->val[iptr_ij];              // this is in fact -d_ij
439          const double d_ij    = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */              const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+
440          const double u_old_j = u_old[j];                  fct->iteration_matrix->mainBlock->val[iptr_ij];
441          const double u_j     = u[j];              const double u_old_j = u_old[j];
442                const double u_j = u[j];
443          flux_matrix->mainBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);  
444        }              flux_matrix->mainBlock->val[iptr_ij] =
445        #pragma ivdep                  m_ij*(u_old_j-u_old_i) - (m_ij-dt*d_ij)*(u_j-u_i);
446        for (iptr_ij=pattern->col_couplePattern->ptr[i]; iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {          }
447          const index_t j      = pattern->col_couplePattern->index[iptr_ij];          #pragma ivdep
448          const double m_ij    = fct->mass_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */          for (index_t iptr_ij = pattern->col_couplePattern->ptr[i];
449          const double d_ij    = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];                     iptr_ij < pattern->col_couplePattern->ptr[i+1]; iptr_ij++) {
450          const double u_old_j = remote_u_old[j];              const index_t j = pattern->col_couplePattern->index[iptr_ij];
451          const double u_j     = remote_u[j];              const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
452                // this is in fact -d_ij
453          flux_matrix->col_coupleBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);              const double d_ij =
454        }                  fct->transport_matrix->col_coupleBlock->val[iptr_ij] +
455    }                  fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
456  }              const double u_old_j = remote_u_old[j];
457                const double u_j = remote_u[j];
458  /* special version of the ant-diffusive fluxes for the linear Crank-Nicolson scheme  
459   * in fact this is evaluated for u = 2*u_tilde - u_old which is the predictor              flux_matrix->col_coupleBlock->val[iptr_ij] =
460   * of the solution of the the stabilised problem at time dt using the forward Euler scheme                  m_ij*(u_old_j-u_old_i) - (m_ij-dt*d_ij)*(u_j-u_i);
461            }
462        }
463    }
464    
465    /* special version of the ant-diffusive fluxes for the linear Crank-Nicolson
466     * scheme. In fact this is evaluated for u = 2*u_tilde - u_old which is the
467     * predictor of the solution of the the stabilised problem at time dt using
468     * the forward Euler scheme
469   *   *
470   *   f_{ij} = (m_{ij} - dt/2 d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) (u[j]-u[i])   * f_{ij} = (m_{ij} - dt/2 d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) (u[j]-u[i])
471   *    =  (m_{ij} - dt/2 d_{ij}) * (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) * ( 2*(u_tilde[j]-u_tilde[i]) - (u_old[j] -u_old [i]) )   *    =  (m_{ij} - dt/2 d_{ij}) * (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) * ( 2*(u_tilde[j]-u_tilde[i]) - (u_old[j] -u_old [i]) )
472   *    =  2*  m_{ij} * ( (u_old[j]-u_tilde[j] - (u_old[i]) - u_tilde[i]) ) - dt d_{ij} * (u_tilde[j]-u_tilde[i])   *    =  2*  m_{ij} * ( (u_old[j]-u_tilde[j] - (u_old[i]) - u_tilde[i]) ) - dt d_{ij} * (u_tilde[j]-u_tilde[i])
473   *   *
474   */   */
475    void FCT_Solver::setAntiDiffusionFlux_linearCN(SystemMatrix_ptr flux_matrix)
476    {
477        const_Coupler_ptr u_tilde_coupler(flux_limiter->u_tilde_coupler);
478        const double* u_tilde = u_tilde_coupler->borrowLocalData();
479        const double* u_old = u_old_coupler->borrowLocalData();
480        const double* remote_u_tilde = u_tilde_coupler->borrowRemoteData();
481        const double* remote_u_old = u_old_coupler->borrowRemoteData();
482        const_TransportProblem_ptr fct(transportproblem);
483        const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
484        const dim_t n = fct->iteration_matrix->getTotalNumRows();
485    
486    #pragma omp parallel for
487        for (dim_t i = 0; i < n; ++i) {
488            const double u_tilde_i = u_tilde[i];
489            const double u_old_i = u_old[i];
490            const double du_i = u_tilde_i - u_old_i;
491            #pragma ivdep
492            for (index_t iptr_ij = pattern->mainPattern->ptr[i];
493                         iptr_ij < pattern->mainPattern->ptr[i+1]; iptr_ij++) {
494                const index_t j = pattern->mainPattern->index[iptr_ij];
495                const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
496                // this is in fact -d_ij
497                const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+
498                    fct->iteration_matrix->mainBlock->val[iptr_ij];
499                const double u_tilde_j = u_tilde[j];
500                const double u_old_j = u_old[j];
501                const double du_j = u_tilde_j - u_old_j;
502    
503  void Paso_FCT_setAntiDiffusionFlux_linearCN(paso::SystemMatrix_ptr flux_matrix,              flux_matrix->mainBlock->val[iptr_ij] = 2 * m_ij * ( du_i - du_j ) -
504                          const Paso_TransportProblem* fct, const double dt,                                            dt * d_ij * ( u_tilde_i - u_tilde_j);
505                          paso::const_Coupler_ptr u_tilde_coupler,          }
506                          paso::const_Coupler_ptr u_old_coupler)          #pragma ivdep
507  {          for (index_t iptr_ij=pattern->col_couplePattern->ptr[i];
508    dim_t i;                       iptr_ij<pattern->col_couplePattern->ptr[i+1]; iptr_ij++) {
509    index_t iptr_ij;  
510                const index_t j = pattern->col_couplePattern->index[iptr_ij];
511    const double *u_tilde = u_tilde_coupler->borrowLocalData();              const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
512    const double *u_old = u_old_coupler->borrowLocalData();              // this is in fact -d_ij
513    const double *remote_u_tilde = u_tilde_coupler->borrowRemoteData();              const double d_ij =
514    const double *remote_u_old = u_old_coupler->borrowRemoteData();                  fct->transport_matrix->col_coupleBlock->val[iptr_ij] +
515    paso::const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);                  fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
516    const dim_t n = fct->iteration_matrix->getTotalNumRows();              const double u_tilde_j = remote_u_tilde[j];
517                const double u_old_j = remote_u_old[j];
518    #pragma omp parallel for schedule(static) private(i, iptr_ij)              const double du_j = u_tilde_j - u_old_j;
   for (i = 0; i < n; ++i) {  
       const double u_tilde_i = u_tilde[i];  
       const double u_old_i   = u_old[i];  
       const double du_i      = u_tilde_i - u_old_i;  
       #pragma ivdep  
       for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {  
   
           const index_t j      = pattern->mainPattern->index[iptr_ij];  
           const double m_ij    = fct->mass_matrix->mainBlock->val[iptr_ij];  
           const double d_ij    = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */  
           const double u_tilde_j = u_tilde[j];  
           const double u_old_j = u_old[j];  
           const double du_j    = u_tilde_j - u_old_j;  
   
           flux_matrix->mainBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij * ( u_tilde_i - u_tilde_j);  
       }  
       #pragma ivdep  
       for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {  
   
         const index_t j      = pattern->col_couplePattern->index[iptr_ij];  
         const double m_ij    = fct->mass_matrix->col_coupleBlock->val[iptr_ij];  
         const double d_ij    = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];/* this is in fact -d_ij */  
         const double u_tilde_j = remote_u_tilde[j];  
         const double u_old_j = remote_u_old[j];  
         const double du_j    = u_tilde_j - u_old_j;  
   
         flux_matrix->col_coupleBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij *  ( u_tilde_i - u_tilde_j);  
   
       }  
   }  
519    
520                flux_matrix->col_coupleBlock->val[iptr_ij] =
521                    2*m_ij * ( du_i - du_j ) - dt * d_ij * (u_tilde_i - u_tilde_j);
522            }
523        }
524  }  }
525    
526  /****************************************************************************/  /****************************************************************************/
527    
528    double FCT_Solver::getSafeTimeStepSize(TransportProblem_ptr fctp)
529    {
530        double dt_max = LARGE_POSITIVE_FLOAT;
531        const dim_t n = fctp->transport_matrix->getTotalNumRows();
532    
533        // set low order transport operator
534        setLowOrderOperator(fctp);
535    
536        if (Esys_noError()) {
537            // calculate time step size
538            dt_max = LARGE_POSITIVE_FLOAT;
539    #pragma omp parallel
540            {
541                double dt_max_loc = LARGE_POSITIVE_FLOAT;
542    #pragma omp for schedule(static)
543                for (dim_t i=0; i<n; ++i) {
544                    const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
545                    const double m_i = fctp->lumped_mass_matrix[i];
546                    if (m_i > 0) {
547                        if (l_ii<0)
548                            dt_max_loc = MIN(dt_max_loc,m_i/(-l_ii));
549                    }
550                }
551                #pragma omp critical
552                {
553                    dt_max = MIN(dt_max,dt_max_loc);
554                }
555            }
556    #ifdef ESYS_MPI
557            double dt_max_loc = dt_max;
558            MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
559    #endif
560            if (dt_max < LARGE_POSITIVE_FLOAT)
561                dt_max *= 2.;
562        }
563        return dt_max;
564    }
565    
566  /* Creates the low order transport matrix and stores its negative values  /* Creates the low order transport matrix and stores its negative values
567   * into the iteration_matrix except for the main diagonal which is stored   * into the iteration_matrix except for the main diagonal which is stored
568   * separately.   * separately.
# Line 594  void Paso_FCT_setAntiDiffusionFlux_linea Line 578  void Paso_FCT_setAntiDiffusionFlux_linea
578   *    c[i]-=d_ij   *    c[i]-=d_ij
579   */   */
580    
581  void Paso_FCT_setLowOrderOperator(Paso_TransportProblem * fc)  void FCT_Solver::setLowOrderOperator(TransportProblem_ptr fc)
582  {  {
583    dim_t i;      const index_t* main_iptr = fc->borrowMainDiagonalPointer();
   index_t iptr_ij, iptr_ji;  
   const index_t*  main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fc);  
584    
585    if (!fc->iteration_matrix.get()) {      if (!fc->iteration_matrix.get()) {
586        fc->iteration_matrix.reset(new paso::SystemMatrix(          fc->iteration_matrix.reset(new SystemMatrix(
587                    fc->transport_matrix->type, fc->transport_matrix->pattern,                    fc->transport_matrix->type, fc->transport_matrix->pattern,
588                    fc->transport_matrix->row_block_size,                    fc->transport_matrix->row_block_size,
589                    fc->transport_matrix->col_block_size, true));                    fc->transport_matrix->col_block_size, true));
590    }      }
591    
592    if (Esys_noError()) {      const_SystemMatrixPattern_ptr pattern(fc->iteration_matrix->pattern);
593        paso::const_SystemMatrixPattern_ptr pattern(fc->iteration_matrix->pattern);      const dim_t n = fc->iteration_matrix->getTotalNumRows();
594        const dim_t n = fc->iteration_matrix->getTotalNumRows();  #pragma omp parallel for
595        #pragma omp parallel for private(i, iptr_ij, iptr_ji)  schedule(static)      for (dim_t i = 0; i < n; ++i) {
596        for (i = 0; i < n; ++i) {          double sum = fc->transport_matrix->mainBlock->val[main_iptr[i]];
597            double sum=fc->transport_matrix->mainBlock->val[main_iptr[i]];          //printf("sum[%d] = %e -> ", i, sum);
598    
599  /* printf("sum[%d] = %e -> ", i, sum); */          // look at a[i,j]
600            /* look at a[i,j] */          for (index_t iptr_ij=pattern->mainPattern->ptr[i];iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
601            for (iptr_ij=pattern->mainPattern->ptr[i];iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {              const index_t j = pattern->mainPattern->index[iptr_ij];
602                const index_t j    = pattern->mainPattern->index[iptr_ij];              const double rtmp1 = fc->transport_matrix->mainBlock->val[iptr_ij];
603                const double rtmp1 = fc->transport_matrix->mainBlock->val[iptr_ij];              if (j != i) {
604                if (j!=i) {                  // find entry a[j,i]
605                   /* find entry a[j,i] */                  #pragma ivdep
606                   #pragma ivdep                  for (index_t iptr_ji=pattern->mainPattern->ptr[j]; iptr_ji<pattern->mainPattern->ptr[j+1]; ++iptr_ji) {
                  for (iptr_ji=pattern->mainPattern->ptr[j]; iptr_ji<pattern->mainPattern->ptr[j+1]; ++iptr_ji) {  
607    
608                      if ( pattern->mainPattern->index[iptr_ji] == i) {                      if (pattern->mainPattern->index[iptr_ji] == i) {
609                          const double rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji];                          const double rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji];
610  /*                          //printf("a[%d,%d]=%e\n",i,j,rtmp1);
611  printf("a[%d,%d]=%e\n",i,j,rtmp1);                          //printf("a[%d,%d]=%e\n",j,i,rtmp2);
 printf("a[%d,%d]=%e\n",j,i,rtmp2);  
 */  
   
612                          const double d_ij=-MIN3(0.,rtmp1,rtmp2);                          const double d_ij=-MIN3(0.,rtmp1,rtmp2);
613                          fc->iteration_matrix->mainBlock->val[iptr_ij]=-(rtmp1+d_ij);                          fc->iteration_matrix->mainBlock->val[iptr_ij]=-(rtmp1+d_ij);
614  /* printf("l[%d,%d]=%e\n",i,j,fc->iteration_matrix->mainBlock->val[iptr_ij]); */  //printf("l[%d,%d]=%e\n",i,j,fc->iteration_matrix->mainBlock->val[iptr_ij]);
                         sum-=d_ij;  
                         break;  
                     }  
                  }  
              }  
           }  
           for (iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {  
               const index_t    j = pattern->col_couplePattern->index[iptr_ij];  
               const double  rtmp1 = fc->transport_matrix->col_coupleBlock->val[iptr_ij];  
               /* find entry a[j,i] */  
               #pragma ivdep  
               for (iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_ji<pattern->row_couplePattern->ptr[j+1]; ++iptr_ji) {  
                     if (pattern->row_couplePattern->index[iptr_ji]==i) {  
                         const double rtmp2=fc->transport_matrix->row_coupleBlock->val[iptr_ji];  
                         const double d_ij=-MIN3(0.,rtmp1,rtmp2);  
                         fc->iteration_matrix->col_coupleBlock->val[iptr_ij]=-(rtmp1+d_ij);  
                         fc->iteration_matrix->row_coupleBlock->val[iptr_ji]=-(rtmp2+d_ij);  
615                          sum-=d_ij;                          sum-=d_ij;
616                          break;                          break;
617                      }                      }
618                }                  }
619           }              }
620           /* set main diagonal entry */          }
621           fc->main_diagonal_low_order_transport_matrix[i]=sum;          for (index_t iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
622  /*       printf("%e \n", sum); */              const index_t j = pattern->col_couplePattern->index[iptr_ij];
623        }              const double rtmp1 = fc->transport_matrix->col_coupleBlock->val[iptr_ij];
624                // find entry a[j,i]
625    }              #pragma ivdep
626                for (index_t iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_ji<pattern->row_couplePattern->ptr[j+1]; ++iptr_ji) {
627                    if (pattern->row_couplePattern->index[iptr_ji]==i) {
628                        const double rtmp2=fc->transport_matrix->row_coupleBlock->val[iptr_ji];
629                        const double d_ij=-MIN3(0.,rtmp1,rtmp2);
630                        fc->iteration_matrix->col_coupleBlock->val[iptr_ij]=-(rtmp1+d_ij);
631                        fc->iteration_matrix->row_coupleBlock->val[iptr_ji]=-(rtmp2+d_ij);
632                        sum-=d_ij;
633                        break;
634                    }
635                }
636            }
637            // set main diagonal entry
638            fc->main_diagonal_low_order_transport_matrix[i] = sum;
639            //printf("%e\n", sum);
640        }
641  }  }
642    
643  /*  /*
# Line 669  printf("a[%d,%d]=%e\n",j,i,rtmp2); Line 645  printf("a[%d,%d]=%e\n",j,i,rtmp2);
645   *       = u_i                                        where m_i<=0   *       = u_i                                        where m_i<=0
646   *   *
647   */   */
648  void Paso_FCT_Solver_setMuPaLu(double* out,  void FCT_Solver::setMuPaLu(double* out, const_Coupler_ptr coupler, double a)
649                                 const double* M,  {
650                                 paso::const_Coupler_ptr u_coupler,      const_SystemMatrix_ptr L(transportproblem->iteration_matrix);
651                                 const double a,      const double* M = transportproblem->lumped_mass_matrix;
652                                 paso::const_SystemMatrix_ptr L)      const_SystemMatrixPattern_ptr pattern(L->pattern);
653  {      const double* u = coupler->borrowLocalData();
654    dim_t i;      const double* remote_u = coupler->borrowRemoteData();
655    paso::const_SystemMatrixPattern_ptr pattern(L->pattern);      const dim_t n = L->getTotalNumRows();
656    const double *u = u_coupler->borrowLocalData();  
657    const double *remote_u = u_coupler->borrowRemoteData();  #pragma omp parallel for
658    index_t iptr_ij;      for (dim_t i = 0; i < n; ++i) {
659    const dim_t n = L->getTotalNumRows();          if (M[i] > 0.) {
660                out[i] = M[i]*u[i];
661    #pragma omp parallel for private(i) schedule(static)          } else {
662    for (i = 0; i < n; ++i) {              out[i] = u[i];
       if ( M[i] > 0.) {  
           out[i]=M[i]*u[i];  
       } else {  
           out[i]=u[i];  
       }  
   }  
   if (ABS(a)>0) {  
       #pragma omp parallel for schedule(static) private(i, iptr_ij)  
       for (i = 0; i < n; ++i) {  
         if ( M[i] > 0.) {  
           double sum=0;  
           const double u_i=u[i];  
           #pragma ivdep  
           for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {  
                const index_t j=pattern->mainPattern->index[iptr_ij];  
                const double l_ij=L->mainBlock->val[iptr_ij];  
                sum+=l_ij*(u[j]-u_i);  
           }  
           #pragma ivdep  
           for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {  
                const index_t j=pattern->col_couplePattern->index[iptr_ij];  
                const double l_ij=L->col_coupleBlock->val[iptr_ij];  
                sum+=l_ij*(remote_u[j]-u_i);  
           }  
           out[i]+=a*sum;  
663          }          }
664        }      }
665    }      if (ABS(a) > 0) {
666    #pragma omp parallel for
667            for (dim_t i = 0; i < n; ++i) {
668                if (M[i] > 0.) {
669                    double sum = 0;
670                    const double u_i = u[i];
671                    #pragma ivdep
672                    for (index_t iptr_ij = pattern->mainPattern->ptr[i];
673                                 iptr_ij < pattern->mainPattern->ptr[i+1];
674                                 iptr_ij++) {
675                        const index_t j = pattern->mainPattern->index[iptr_ij];
676                        const double l_ij = L->mainBlock->val[iptr_ij];
677                        sum += l_ij*(u[j]-u_i);
678                    }
679                    #pragma ivdep
680                    for (index_t iptr_ij = pattern->col_couplePattern->ptr[i];
681                                 iptr_ij < pattern->col_couplePattern->ptr[i+1];
682                                 iptr_ij++) {
683                        const index_t j=pattern->col_couplePattern->index[iptr_ij];
684                        const double l_ij = L->col_coupleBlock->val[iptr_ij];
685                        sum += l_ij*(remote_u[j]-u_i);
686                    }
687                    out[i] += a*sum;
688                }
689            }
690        }
691  }  }
692    
693    } // namespace paso
694    

Legend:
Removed from v.4856  
changed lines
  Added in v.4867

  ViewVC Help
Powered by ViewVC 1.1.26