/[escript]/trunk/paso/src/FCT_Solver.cpp
ViewVC logotype

Annotation of /trunk/paso/src/FCT_Solver.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4846 - (hide annotations)
Tue Apr 8 23:55:38 2014 UTC (6 years, 1 month ago) by caltinay
File size: 31067 byte(s)
paso::Options.

1 gross 3793
2 jfenwick 3981 /*****************************************************************************
3 gross 3793 *
4 jfenwick 4657 * Copyright (c) 2003-2014 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 gross 3793 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 gross 3793
17    
18 caltinay 4843 /****************************************************************************/
19 gross 3793
20 caltinay 4843 /* Paso: Transport solver with flux correction (L is row sum zero)
21 gross 3793 *
22     * - Mv_t=Lv v(0)=u
23     *
24     * to return v(dt)
25     *
26     */
27 caltinay 4843 /****************************************************************************/
28 gross 3793
29     /* Author: l.gross@uq.edu.au */
30    
31 caltinay 4843 /****************************************************************************/
32 gross 3793
33     #include "FCT_Solver.h"
34     #include "Preconditioner.h"
35     #include "PasoUtil.h"
36    
37    
38 caltinay 4846 Paso_FCT_Solver* Paso_FCT_Solver_alloc(Paso_TransportProblem *fctp, paso::Options* options)
39 gross 3793 {
40     Paso_FCT_Solver* out=NULL;
41     const dim_t blockSize=Paso_TransportProblem_getBlockSize(fctp);
42     const dim_t n = Paso_TransportProblem_getTotalNumRows(fctp);
43 caltinay 4843
44 jfenwick 4325 out=new Paso_FCT_Solver;
45 caltinay 4843 out->transportproblem = Paso_TransportProblem_getReference(fctp);
46     out->mpi_info = Esys_MPIInfo_getReference(fctp->mpi_info);
47     out->flux_limiter = Paso_FCT_FluxLimiter_alloc(fctp);
48     out->b = new double[n];
49     if ( (options->ode_solver == PASO_CRANK_NICOLSON) || (options->ode_solver == PASO_BACKWARD_EULER) ) {
50     out->du = new double[n];
51     out->z = new double[n];
52     } else {
53     out->du = NULL;
54     out->z=NULL;
55     }
56 caltinay 4817 out->u_coupler.reset(new paso::Coupler(Paso_TransportProblem_borrowConnector(fctp), blockSize));
57     out->u_old_coupler.reset(new paso::Coupler(Paso_TransportProblem_borrowConnector(fctp), blockSize));
58     out->omega=0;
59 caltinay 4843
60     if ( options->ode_solver == PASO_LINEAR_CRANK_NICOLSON ) {
61     out->method = PASO_LINEAR_CRANK_NICOLSON;
62     } else if ( options->ode_solver == PASO_CRANK_NICOLSON ) {
63     out->method = PASO_CRANK_NICOLSON;
64     } else if ( options->ode_solver == PASO_BACKWARD_EULER ) {
65     out->method = PASO_BACKWARD_EULER;
66     } else {
67     Esys_setError(VALUE_ERROR, "Paso_FCT_Solver_alloc: unknown integration scheme.");
68     out->method = UNKNOWN;
69     }
70    
71 gross 3793 if (Esys_noError()) {
72     return out;
73     } else {
74     Paso_FCT_Solver_free(out);
75     return NULL;
76 caltinay 4843 }
77 gross 3793 }
78    
79     void Paso_FCT_Solver_free(Paso_FCT_Solver *in)
80     {
81     if (in != NULL) {
82 caltinay 4817 Paso_TransportProblem_free(in->transportproblem);
83     Paso_FCT_FluxLimiter_free(in->flux_limiter);
84     Esys_MPIInfo_free(in->mpi_info);
85     delete[] in->b;
86     delete[] in->z;
87     delete[] in->du;
88     delete in;
89 gross 3793 }
90     }
91    
92     double Paso_FCT_Solver_getSafeTimeStepSize(Paso_TransportProblem* fctp)
93     {
94 caltinay 4836 dim_t i;
95 gross 3823 double dt_max_loc=LARGE_POSITIVE_FLOAT;
96 gross 3793 double dt_max=LARGE_POSITIVE_FLOAT;
97 caltinay 4836 const dim_t n = fctp->transport_matrix->getTotalNumRows();
98 gross 3793 /* set low order transport operator */
99     Paso_FCT_setLowOrderOperator(fctp);
100 caltinay 4843
101 gross 3793 if (Esys_noError()) {
102     /*
103 caltinay 4843 * calculate time step size:
104 gross 3793 */
105     dt_max=LARGE_POSITIVE_FLOAT;
106 gross 3823 #pragma omp parallel private(i, dt_max_loc)
107 gross 3793 {
108 caltinay 4843 dt_max_loc=LARGE_POSITIVE_FLOAT;
109 gross 3793 #pragma omp for schedule(static)
110     for (i=0;i<n;++i) {
111 caltinay 3795 const double l_ii=fctp->main_diagonal_low_order_transport_matrix[i];
112     const double m_i=fctp->lumped_mass_matrix[i];
113 caltinay 4843 if ( m_i > 0 ) {
114     if (l_ii<0) dt_max_loc=MIN(dt_max_loc,m_i/(-l_ii));
115 gross 3822 }
116 caltinay 4843 }
117     #pragma omp critical
118 gross 3793 {
119     dt_max=MIN(dt_max,dt_max_loc);
120     }
121     }
122     #ifdef ESYS_MPI
123     {
124 gross 3822 dt_max_loc=dt_max;
125     MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
126 caltinay 4843 }
127 gross 3793 #endif
128 caltinay 4843 if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=2.;
129 gross 3793 }
130     return dt_max;
131     }
132    
133     /* modifies the main diagonal of the iteration matrix to introduce new dt */
134 caltinay 4846 void Paso_FCT_Solver_initialize(const double dt, Paso_FCT_Solver *fct_solver, paso::Options* options, Paso_Performance* pp)
135 gross 3793 {
136     Paso_TransportProblem* fctp = fct_solver->transportproblem;
137     const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fctp);
138 caltinay 4836 const dim_t n = fctp->transport_matrix->getTotalNumRows();
139 gross 3793 const double theta = Paso_FCT_Solver_getTheta(fct_solver);
140     const double omega=1./(dt* theta);
141     dim_t i;
142 caltinay 4846 paso::Options options2;
143 gross 3793
144    
145 caltinay 4843
146 caltinay 4836 Paso_solve_free(fctp->iteration_matrix.get());
147 gross 3793 /*
148     * fctp->iteration_matrix[i,i]=m[i]/(dt theta) -l[i,i]
149     *
150     */
151 caltinay 4843 fct_solver->omega=omega;
152 gross 3793 fct_solver->dt = dt;
153     #pragma omp parallel for private(i)
154     for (i = 0; i < n; ++i) {
155 gross 3822 const double m_i=fctp->lumped_mass_matrix[i];
156 caltinay 4843 const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
157     if ( m_i > 0 ) {
158 gross 3822 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m_i * omega - l_ii;
159 caltinay 4843 } else {
160     fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = ABS(m_i * omega - l_ii)/(EPSILON*EPSILON);
161     }
162 gross 3793 }
163 caltinay 4843
164 gross 3793 /* allocate preconditioner/solver */
165     options2.verbose = options->verbose;
166 caltinay 4846 if (fct_solver->method == PASO_LINEAR_CRANK_NICOLSON) {
167 gross 3793 options2.preconditioner = PASO_GS;
168 caltinay 4843 } else {
169     options2.preconditioner = PASO_JACOBI;
170 gross 3793 /* options2.preconditioner = PASO_GS; */
171     }
172     options2.use_local_preconditioner = FALSE;
173 gross 3807 options2.sweeps=-1;
174 caltinay 4843
175 gross 3793 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
176 caltinay 4836 fctp->iteration_matrix->setPreconditioner(&options2);
177 gross 3793 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
178     }
179    
180 jfenwick 4345 /* entry point for update procedures */
181 caltinay 4846 err_t Paso_FCT_Solver_update(Paso_FCT_Solver *fct_solver, double* u, double *u_old, paso::Options* options, Paso_Performance *pp)
182 caltinay 4843 {
183 gross 3793 const index_t method=fct_solver->method;
184     err_t err_out = SOLVER_NO_ERROR;
185 caltinay 4843
186 gross 3793 if (method == PASO_LINEAR_CRANK_NICOLSON) {
187     err_out=Paso_FCT_Solver_update_LCN(fct_solver, u, u_old, options, pp);
188     } else if (method == PASO_CRANK_NICOLSON) {
189     err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);
190     } else if (method == PASO_BACKWARD_EULER) {
191     err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);
192     } else {
193     err_out = SOLVER_INPUT_ERROR;
194     }
195     return err_out;
196     }
197    
198     /* linear crank-nicolson update */
199 caltinay 4846 err_t Paso_FCT_Solver_update_LCN(Paso_FCT_Solver *fct_solver, double * u, double *u_old, paso::Options* options, Paso_Performance *pp)
200 gross 3793 {
201     double const dt = fct_solver->dt;
202 gross 3823 dim_t sweep_max, i;
203 gross 3793 double *b = fct_solver->b;
204     double const RTOL = options->tolerance;
205 caltinay 4843 const dim_t n=Paso_TransportProblem_getTotalNumRows(fct_solver->transportproblem);
206 caltinay 4836 paso::SystemMatrix_ptr iteration_matrix(fct_solver->transportproblem->iteration_matrix);
207 gross 3823 const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fct_solver->transportproblem);
208 gross 3793 err_t errorCode = SOLVER_NO_ERROR;
209 gross 3823 double norm_u_tilde;
210 caltinay 4843
211 caltinay 4817 fct_solver->u_old_coupler->startCollect(u_old);
212     fct_solver->u_old_coupler->finishCollect();
213 caltinay 4843
214     /* b[i]=m*u_tilde[i] = m u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i])
215 gross 3822 = u_tilde[i] = u_old[i] where constraint m<0.
216 caltinay 4843 * note that iteration_matrix stores the negative values of the
217 gross 3793 * low order transport matrix l. Therefore a=-dt*0.5 is used. */
218 caltinay 4843
219     Paso_FCT_Solver_setMuPaLu(b, fct_solver->transportproblem->lumped_mass_matrix,
220     fct_solver->u_old_coupler, -dt*0.5, iteration_matrix);
221     /* solve for u_tilde : u_tilda = m^{-1} * b */
222 gross 3793 Paso_FCT_FluxLimiter_setU_tilda(fct_solver->flux_limiter, b);
223     /* u_tilda_connector is completed */
224 caltinay 4843
225 gross 3793 /* calculate anti-diffusive fluxes for u_tilda */
226 caltinay 4843 Paso_FCT_setAntiDiffusionFlux_linearCN(fct_solver->flux_limiter->antidiffusive_fluxes,
227     fct_solver->transportproblem, dt,
228     fct_solver->flux_limiter->u_tilde_coupler,
229     fct_solver->u_old_coupler);
230 gross 3807
231    
232 gross 3793 /* b_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
233     Paso_FCT_FluxLimiter_addLimitedFluxes_Start(fct_solver->flux_limiter);
234 caltinay 4843 Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(fct_solver->flux_limiter, b);
235    
236     Paso_Scale(n, b,fct_solver->omega );
237 gross 3807 /* solve (m-dt/2*L) u = b in the form (omega*m-L) u = b * omega with omega*dt/2=1 */
238 caltinay 4843 #pragma omp for private(i) schedule(static)
239 gross 3823 for (i = 0; i < n; ++i) {
240     if (! (fct_solver->transportproblem->lumped_mass_matrix[i] > 0 ) ) {
241 caltinay 4843 b[i] = fct_solver->flux_limiter->u_tilde[i]
242     * fct_solver->transportproblem->iteration_matrix->mainBlock->val[main_iptr[i]];
243     }
244 gross 3823 }
245 gross 3793 /* initial guess is u<- -u + 2*u_tilde */
246     Paso_Update(n, -1., u, 2., fct_solver->flux_limiter->u_tilde);
247 gross 3823
248 gross 3793 sweep_max = MAX((int) (- 2 * log(RTOL)/log(2.)-0.5),1);
249 gross 3823 norm_u_tilde=Paso_lsup(n, fct_solver->flux_limiter->u_tilde, fct_solver->flux_limiter->mpi_info);
250 gross 3807 if (options->verbose) {
251 caltinay 4843 printf("Paso_FCT_Solver_update_LCN: u_tilda lsup = %e (rtol = %e, max. sweeps = %d)\n",norm_u_tilde,RTOL * norm_u_tilde ,sweep_max);
252 gross 3807 }
253 caltinay 4843 errorCode = paso::Preconditioner_Smoother_solve_byTolerance( iteration_matrix, ((paso::Preconditioner*) (iteration_matrix->solver_p))->gs,
254     u, b, RTOL, &sweep_max, TRUE);
255 gross 3793 if (errorCode == PRECONDITIONER_NO_ERROR) {
256     if (options->verbose) printf("Paso_FCT_Solver_update_LCN: convergence after %d Gauss-Seidel steps.\n",sweep_max);
257     errorCode=SOLVER_NO_ERROR;
258     } else {
259     if (options->verbose) printf("Paso_FCT_Solver_update_LCN: Gauss-Seidel failed within %d stesp (rel. tolerance %e).\n",sweep_max,RTOL);
260     errorCode= SOLVER_MAXITER_REACHED;
261     }
262     return errorCode;
263    
264 caltinay 4843 }
265    
266 caltinay 4846 err_t Paso_FCT_Solver_updateNL(Paso_FCT_Solver *fct_solver, double* u, double *u_old, paso::Options* options, Paso_Performance *pp)
267 gross 3793 {
268 gross 3794 const dim_t num_critical_rates_max=3; /* number of rates >=critical_rate accepted before divergence is triggered */
269     const double critical_rate=0.95; /* expected value of convergence rate */
270    
271 gross 3793 double *b = fct_solver->b;
272     double *z = fct_solver->z;
273     double *du = fct_solver->du;
274 caltinay 4843 double const dt = fct_solver->dt;
275 gross 3793 Paso_TransportProblem* fctp = fct_solver->transportproblem;
276     Paso_FCT_FluxLimiter* flux_limiter = fct_solver->flux_limiter;
277     dim_t i;
278 gross 3794 double norm_u_tilde, ATOL, norm_du=LARGE_POSITIVE_FLOAT, norm_du_old, rate=1.;
279 gross 3793 err_t errorCode=SOLVER_NO_ERROR;
280 caltinay 4836 const dim_t n = fctp->transport_matrix->getTotalNumRows();
281 caltinay 4843 const double atol=options->absolute_tolerance;
282 gross 3793 const double rtol=options->tolerance;
283     const dim_t max_m=options->iter_max;
284     dim_t m=0, num_critical_rates=0 ;
285 caltinay 4843 /* ////////////////////////////////////////////////////////////////////// */
286    
287     bool converged=FALSE, max_m_reached=FALSE,diverged=FALSE;
288 gross 3793 options->num_iter=0;
289 caltinay 4843
290 caltinay 4817 fct_solver->u_old_coupler->startCollect(u_old);
291     fct_solver->u_old_coupler->finishCollect();
292 gross 3793 /* prepare u_tilda and flux limiter */
293     if ( fct_solver->method == PASO_BACKWARD_EULER ) {
294     /* b[i]=m_i* u_old[i] */
295 caltinay 4843 #pragma omp for private(i) schedule(static)
296 gross 3793 for (i = 0; i < n; ++i) {
297 caltinay 4843 if (fctp->lumped_mass_matrix[i] > 0 ) {
298     b[i]=u_old[i]* fctp->lumped_mass_matrix[i];
299     } else {
300     b[i]=u_old[i];
301     }
302     }
303 gross 3793 } else {
304 gross 3822 /* b[i]=m_i* u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i]) = m_i * u_tilde_i where m_i>0
305 caltinay 4843 * = u_old[i] otherwise
306     * note that iteration_matrix stores the negative values of the
307 gross 3793 * low order transport matrix l. Therefore a=-dt*0.5 is used. */
308     Paso_FCT_Solver_setMuPaLu(b,fctp->lumped_mass_matrix,fct_solver->u_old_coupler,-dt*0.5,fctp->iteration_matrix);
309 caltinay 4843 }
310 gross 3793 Paso_FCT_FluxLimiter_setU_tilda(flux_limiter, b); /* u_tilda = m^{-1} b */
311     /* u_tilda_connector is completed */
312 caltinay 4843 /************************************************************************/
313 jfenwick 4345 /* calculate stopping criterion */
314 gross 3793 norm_u_tilde=Paso_lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
315     ATOL= rtol * norm_u_tilde + atol ;
316     if (options->verbose) printf("Paso_FCT_Solver_updateNL: iteration starts u_tilda lsup = %e (abs. tol = %e)\n",norm_u_tilde,ATOL);
317     /* ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// */
318    
319     /* u_old is an initial guess for u*/
320     Paso_Copy(n,u,u_old);
321 caltinay 4843
322 gross 3793 while ( (!converged) && (!diverged) && (! max_m_reached) && Esys_noError()) {
323 caltinay 4817 fct_solver->u_coupler->startCollect(u);
324 caltinay 4843 fct_solver->u_coupler->finishCollect();
325 gross 3793
326     /* set antidiffusive_flux_{ij} for u */
327     if (fct_solver->method == PASO_BACKWARD_EULER) {
328 caltinay 4843 Paso_FCT_setAntiDiffusionFlux_BE(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);
329     } else {
330     Paso_FCT_setAntiDiffusionFlux_CN(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);
331 gross 3793 }
332 caltinay 4843 /* start the calculation of the limitation factors_{fct_solver->ij} */
333     Paso_FCT_FluxLimiter_addLimitedFluxes_Start(flux_limiter); /* uses u_tilde */
334 gross 3793
335     /*
336 caltinay 4843 * z_m[i]=b[i] - (m_i*u[i] - omega*sum_{j<>i} l_{ij} (u[j]-u[i]) ) where m_i>0
337     * ==b[i] - u[i] = u_tilda[i]-u[i] =0 otherwise
338     *
339     * omega = dt/2 or dt .
340     *
341     * note that iteration_matrix stores the negative values of the
342     * low order transport matrix l. Therefore a=dt*theta is used.
343     */
344     if (fct_solver-> method == PASO_BACKWARD_EULER) {
345     Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt, fctp->iteration_matrix);
346     } else {
347     Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt/2, fctp->iteration_matrix);
348     }
349 gross 3793
350 caltinay 4843 Paso_Update(n,-1.,z,1.,b); /* z=b-z */
351 gross 3793
352 caltinay 4843 /* z_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
353     Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(flux_limiter, z);
354 gross 3793
355 caltinay 4843 /* we solve (m/omega - L ) * du = z */
356     if (fct_solver->method == PASO_BACKWARD_EULER) {
357     dim_t cntIter = options->iter_max;
358     double tol= Paso_l2(n, z, fctp->mpi_info) ;
359 gross 3793
360 caltinay 4843 if ( m ==0) {
361     tol *=0.5;
362     } else {
363     tol *= MIN(MAX(rate*rate, 1e-2), 0.5);
364     }
365     /* use BiCGSTab with jacobi preconditioner ( m - omega * L ) */
366     Paso_zeroes(n,du);
367     errorCode = Paso_Solver_BiCGStab(fctp->iteration_matrix, z, du, &cntIter, &tol, pp);
368    
369     /* errorCode = Paso_Solver_GMRES(fctp->iteration_matrix, z, du, &cntIter, &tol, 10, 2000, pp); */
370     if (options->verbose) printf("Paso_FCT_Solver_updateNL: BiCGStab is completed after %d steps (residual =%e).\n",cntIter, tol);
371     options->num_iter+=cntIter;
372     if ( errorCode != SOLVER_NO_ERROR) break;
373     } else {
374     /* just use the main diagonal of (m/omega - L ) */
375    
376     paso::Preconditioner_Smoother_solve(fctp->iteration_matrix, ((paso::Preconditioner*) (fctp->iteration_matrix->solver_p))->jacobi,
377     du, z, 1, FALSE);
378    
379     options->num_iter++;
380     }
381    
382     Paso_Update(n,1.,u,fct_solver->omega,du);
383     norm_du_old=norm_du;
384 gross 3793 norm_du=Paso_lsup(n,du, fctp->mpi_info);
385 caltinay 4843 if (m ==0) {
386 gross 3793 if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e\n",m+1, norm_du * fct_solver->omega);
387 caltinay 4843 } else {
388     if (norm_du_old > 0.) {
389     rate=norm_du/norm_du_old;
390     } else if (norm_du <= 0.) {
391     rate=0.;
392     } else {
393     rate=LARGE_POSITIVE_FLOAT;
394     }
395 gross 3793 if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e (rate = %e)\n",m+1, norm_du * fct_solver->omega, rate);
396 caltinay 4843 num_critical_rates+=( rate<critical_rate ? 0 : 1);
397 gross 3793 max_m_reached=(m>max_m);
398 caltinay 4843 diverged = (num_critical_rates >= num_critical_rates_max);
399     converged=(norm_du * fct_solver->omega <= ATOL);
400 gross 3793 }
401     m++;
402     } /* end of while loop */
403     if (errorCode == SOLVER_NO_ERROR) {
404 caltinay 4843 if (converged) {
405     if (options->verbose) printf("Paso_FCT_Solver_updateNL: iteration is completed.\n");
406 gross 3793 errorCode=SOLVER_NO_ERROR;
407     } else if (diverged) {
408 caltinay 4843 if (options->verbose) printf("Paso_FCT_Solver_updateNL: divergence.\n");
409     errorCode=SOLVER_DIVERGENCE;
410     } else if (max_m_reached) {
411     if (options->verbose) printf("Paso_FCT_Solver_updateNL: maximum number of iteration steps reached.\n");
412     errorCode=SOLVER_MAXITER_REACHED;
413     }
414 gross 3793 }
415     return errorCode;
416     }
417    
418    
419 caltinay 4843 /*
420     * AntiDiffusionFlux:
421 gross 3793 *
422     * f_{ij} = (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])
423 caltinay 4843 *
424 gross 3793 * m=fc->mass matrix
425 jfenwick 4345 * d=artificial diffusion matrix = L - K = - fc->iteration matrix - fc->transport matrix (away from main diagonal)
426 gross 3793 *
427     * for CN : theta =0.5
428     * for BE : theta = 1.
429     */
430    
431 caltinay 4836 void Paso_FCT_setAntiDiffusionFlux_CN(paso::SystemMatrix_ptr flux_matrix,
432 caltinay 4843 const Paso_TransportProblem* fct,
433 caltinay 4817 const double dt,
434 caltinay 4843 paso::const_Coupler_ptr u_coupler,
435 caltinay 4817 paso::const_Coupler_ptr u_old_coupler)
436 gross 3793 {
437     dim_t i;
438     index_t iptr_ij;
439 caltinay 4843
440 caltinay 4817 const double *u = u_coupler->borrowLocalData();
441     const double *u_old= u_old_coupler->borrowLocalData();
442     const double *remote_u=u_coupler->borrowRemoteData();
443     const double *remote_u_old=u_old_coupler->borrowRemoteData();
444 gross 3793 const double dt_half= dt/2;
445 caltinay 4818 paso::const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
446 caltinay 4836 const dim_t n = fct->iteration_matrix->getTotalNumRows();
447 gross 3793
448     #pragma omp parallel for schedule(static) private(i, iptr_ij)
449     for (i = 0; i < n; ++i) {
450 caltinay 3795 const double u_i = u[i];
451     const double u_old_i = u_old[i];
452 gross 3793
453     #pragma ivdep
454     for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
455 caltinay 4843 const index_t j = pattern->mainPattern->index[iptr_ij];
456     const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
457     const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
458     const double u_old_j = u_old[j];
459     const double u_j = u[j];
460 gross 3793
461 caltinay 4843 /* (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i]) */
462     flux_matrix->mainBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i) - (m_ij-dt_half*d_ij)*(u_j-u_i);
463    
464 gross 3793 }
465     #pragma ivdep
466     for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
467 caltinay 4843 const index_t j = pattern->col_couplePattern->index[iptr_ij];
468     const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
469     const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
470 caltinay 3795 const double u_old_j = remote_u_old[j];
471 caltinay 4843 const double u_j = remote_u[j];
472     flux_matrix->col_coupleBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i)- (m_ij-dt_half*d_ij)*(u_j-u_i);
473 gross 3793 }
474     }
475     }
476    
477 caltinay 4836 void Paso_FCT_setAntiDiffusionFlux_BE(paso::SystemMatrix_ptr flux_matrix,
478 caltinay 4843 const Paso_TransportProblem* fct,
479 caltinay 4817 const double dt,
480 caltinay 4843 paso::const_Coupler_ptr u_coupler,
481 caltinay 4817 paso::const_Coupler_ptr u_old_coupler)
482 gross 3793 {
483     dim_t i;
484     index_t iptr_ij;
485 caltinay 4843
486 caltinay 4817 const double *u = u_coupler->borrowLocalData();
487     const double *u_old = u_old_coupler->borrowLocalData();
488     const double *remote_u = u_coupler->borrowRemoteData();
489     const double *remote_u_old = u_old_coupler->borrowRemoteData();
490 caltinay 4818 paso::const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
491 caltinay 4836 const dim_t n = fct->iteration_matrix->getTotalNumRows();
492 gross 3793
493     #pragma omp parallel for schedule(static) private(i, iptr_ij)
494     for (i = 0; i < n; ++i) {
495 caltinay 3795 const double u_i = u[i];
496     const double u_old_i = u_old[i];
497 gross 3793 #pragma ivdep
498     for (iptr_ij=pattern->mainPattern->ptr[i]; iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
499    
500 caltinay 4843 const index_t j = pattern->mainPattern->index[iptr_ij];
501     const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
502     const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
503     const double u_old_j = u_old[j];
504     const double u_j = u[j];
505    
506     flux_matrix->mainBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
507 gross 3793 }
508     #pragma ivdep
509     for (iptr_ij=pattern->col_couplePattern->ptr[i]; iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
510 caltinay 4843 const index_t j = pattern->col_couplePattern->index[iptr_ij];
511     const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
512     const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
513 caltinay 3795 const double u_old_j = remote_u_old[j];
514 caltinay 4843 const double u_j = remote_u[j];
515    
516     flux_matrix->col_coupleBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
517 gross 3793 }
518     }
519     }
520    
521 caltinay 4843 /* special version of the ant-diffusive fluxes for the linear Crank-Nicolson scheme
522 gross 3793 * in fact this is evaluated for u = 2*u_tilde - u_old which is the predictor
523 caltinay 4843 * of the solution of the the stabilised problem at time dt using the forward Euler scheme
524     *
525 gross 3793 * f_{ij} = (m_{ij} - dt/2 d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) (u[j]-u[i])
526     * = (m_{ij} - dt/2 d_{ij}) * (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) * ( 2*(u_tilde[j]-u_tilde[i]) - (u_old[j] -u_old [i]) )
527 caltinay 4843 * = 2* m_{ij} * ( (u_old[j]-u_tilde[j] - (u_old[i]) - u_tilde[i]) ) - dt d_{ij} * (u_tilde[j]-u_tilde[i])
528     *
529 gross 3793 */
530 caltinay 4843
531 caltinay 4836 void Paso_FCT_setAntiDiffusionFlux_linearCN(paso::SystemMatrix_ptr flux_matrix,
532 caltinay 4817 const Paso_TransportProblem* fct, const double dt,
533     paso::const_Coupler_ptr u_tilde_coupler,
534     paso::const_Coupler_ptr u_old_coupler)
535 gross 3793 {
536     dim_t i;
537     index_t iptr_ij;
538 caltinay 4843
539 caltinay 4817 const double *u_tilde = u_tilde_coupler->borrowLocalData();
540     const double *u_old = u_old_coupler->borrowLocalData();
541     const double *remote_u_tilde = u_tilde_coupler->borrowRemoteData();
542     const double *remote_u_old = u_old_coupler->borrowRemoteData();
543 caltinay 4818 paso::const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
544 caltinay 4836 const dim_t n = fct->iteration_matrix->getTotalNumRows();
545 gross 3793
546     #pragma omp parallel for schedule(static) private(i, iptr_ij)
547     for (i = 0; i < n; ++i) {
548 caltinay 3795 const double u_tilde_i = u_tilde[i];
549     const double u_old_i = u_old[i];
550 gross 3807 const double du_i = u_tilde_i - u_old_i;
551 gross 3793 #pragma ivdep
552     for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
553 caltinay 4843
554     const index_t j = pattern->mainPattern->index[iptr_ij];
555     const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
556     const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
557 caltinay 3795 const double u_tilde_j = u_tilde[j];
558 caltinay 4843 const double u_old_j = u_old[j];
559     const double du_j = u_tilde_j - u_old_j;
560    
561     flux_matrix->mainBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij * ( u_tilde_i - u_tilde_j);
562 gross 3793 }
563     #pragma ivdep
564     for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
565 caltinay 4843
566     const index_t j = pattern->col_couplePattern->index[iptr_ij];
567     const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
568     const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];/* this is in fact -d_ij */
569 caltinay 3795 const double u_tilde_j = remote_u_tilde[j];
570 caltinay 4843 const double u_old_j = remote_u_old[j];
571     const double du_j = u_tilde_j - u_old_j;
572 gross 3793
573 caltinay 4843 flux_matrix->col_coupleBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij * ( u_tilde_i - u_tilde_j);
574    
575 gross 3793 }
576     }
577    
578     }
579    
580 caltinay 4843 /****************************************************************************/
581 gross 3793
582 caltinay 4843 /* Creates the low order transport matrix and stores its negative values
583 gross 3793 * into the iteration_matrix except for the main diagonal which is stored
584     * separately.
585 caltinay 4843 * If fc->iteration_matrix==NULL, fc->iteration_matrix is allocated
586 gross 3793 *
587 caltinay 4843 * a=transport_matrix
588 gross 3793 * b= low_order_transport_matrix = - iteration_matrix
589 caltinay 4843 * c=main diagonal low_order_transport_matrix
590     * initialise c[i] mit a[i,i]
591 gross 3793 *
592 caltinay 4843 * d_ij=max(0,-a[i,j],-a[j,i])
593     * b[i,j]=-(a[i,j]+d_ij)
594     * c[i]-=d_ij
595 gross 3793 */
596    
597 caltinay 4843 void Paso_FCT_setLowOrderOperator(Paso_TransportProblem * fc)
598     {
599 gross 3793 dim_t i;
600     index_t iptr_ij, iptr_ji;
601     const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fc);
602    
603 caltinay 4836 if (!fc->iteration_matrix.get()) {
604     fc->iteration_matrix.reset(new paso::SystemMatrix(
605     fc->transport_matrix->type, fc->transport_matrix->pattern,
606     fc->transport_matrix->row_block_size,
607     fc->transport_matrix->col_block_size, true));
608 gross 3793 }
609    
610     if (Esys_noError()) {
611 caltinay 4818 paso::const_SystemMatrixPattern_ptr pattern(fc->iteration_matrix->pattern);
612 caltinay 4836 const dim_t n = fc->iteration_matrix->getTotalNumRows();
613 gross 3793 #pragma omp parallel for private(i, iptr_ij, iptr_ji) schedule(static)
614     for (i = 0; i < n; ++i) {
615     double sum=fc->transport_matrix->mainBlock->val[main_iptr[i]];
616 caltinay 4843
617 gross 3793 /* printf("sum[%d] = %e -> ", i, sum); */
618     /* look at a[i,j] */
619     for (iptr_ij=pattern->mainPattern->ptr[i];iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
620 caltinay 3795 const index_t j = pattern->mainPattern->index[iptr_ij];
621     const double rtmp1 = fc->transport_matrix->mainBlock->val[iptr_ij];
622 caltinay 4843 if (j!=i) {
623 gross 3793 /* find entry a[j,i] */
624     #pragma ivdep
625     for (iptr_ji=pattern->mainPattern->ptr[j]; iptr_ji<pattern->mainPattern->ptr[j+1]; ++iptr_ji) {
626 caltinay 4843
627 gross 3793 if ( pattern->mainPattern->index[iptr_ji] == i) {
628 caltinay 4843 const double rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji];
629 gross 3793 /*
630     printf("a[%d,%d]=%e\n",i,j,rtmp1);
631     printf("a[%d,%d]=%e\n",j,i,rtmp2);
632     */
633    
634 caltinay 3795 const double d_ij=-MIN3(0.,rtmp1,rtmp2);
635 gross 3793 fc->iteration_matrix->mainBlock->val[iptr_ij]=-(rtmp1+d_ij);
636     /* printf("l[%d,%d]=%e\n",i,j,fc->iteration_matrix->mainBlock->val[iptr_ij]); */
637     sum-=d_ij;
638     break;
639     }
640     }
641     }
642     }
643     for (iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
644 caltinay 3795 const index_t j = pattern->col_couplePattern->index[iptr_ij];
645     const double rtmp1 = fc->transport_matrix->col_coupleBlock->val[iptr_ij];
646 caltinay 4843 /* find entry a[j,i] */
647 gross 3793 #pragma ivdep
648     for (iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_ji<pattern->row_couplePattern->ptr[j+1]; ++iptr_ji) {
649     if (pattern->row_couplePattern->index[iptr_ji]==i) {
650 caltinay 3795 const double rtmp2=fc->transport_matrix->row_coupleBlock->val[iptr_ji];
651     const double d_ij=-MIN3(0.,rtmp1,rtmp2);
652 gross 3793 fc->iteration_matrix->col_coupleBlock->val[iptr_ij]=-(rtmp1+d_ij);
653     fc->iteration_matrix->row_coupleBlock->val[iptr_ji]=-(rtmp2+d_ij);
654     sum-=d_ij;
655     break;
656     }
657     }
658     }
659     /* set main diagonal entry */
660     fc->main_diagonal_low_order_transport_matrix[i]=sum;
661 caltinay 4843 /* printf("%e \n", sum); */
662 gross 3793 }
663    
664     }
665     }
666    
667     /*
668 gross 3822 * out_i=m_i u_i + a * \sum_{j <> i} l_{ij} (u_j-u_i) where m_i>0
669     * = u_i where m_i<=0
670 gross 3793 *
671     */
672     void Paso_FCT_Solver_setMuPaLu(double* out,
673 caltinay 4843 const double* M,
674 caltinay 4817 paso::const_Coupler_ptr u_coupler,
675     const double a,
676 caltinay 4836 paso::const_SystemMatrix_ptr L)
677 gross 3793 {
678     dim_t i;
679 caltinay 4818 paso::const_SystemMatrixPattern_ptr pattern(L->pattern);
680 caltinay 4817 const double *u = u_coupler->borrowLocalData();
681     const double *remote_u = u_coupler->borrowRemoteData();
682 caltinay 3795 index_t iptr_ij;
683 caltinay 4836 const dim_t n = L->getTotalNumRows();
684 gross 3793
685     #pragma omp parallel for private(i) schedule(static)
686     for (i = 0; i < n; ++i) {
687 gross 3822 if ( M[i] > 0.) {
688     out[i]=M[i]*u[i];
689     } else {
690 caltinay 4843 out[i]=u[i];
691 gross 3822 }
692 caltinay 4843 }
693 gross 3793 if (ABS(a)>0) {
694 caltinay 4843 #pragma omp parallel for schedule(static) private(i, iptr_ij)
695 gross 3793 for (i = 0; i < n; ++i) {
696 caltinay 4843 if ( M[i] > 0.) {
697 caltinay 3795 double sum=0;
698     const double u_i=u[i];
699 gross 3793 #pragma ivdep
700 caltinay 4843 for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
701 gross 3793 const index_t j=pattern->mainPattern->index[iptr_ij];
702 caltinay 3795 const double l_ij=L->mainBlock->val[iptr_ij];
703 gross 3793 sum+=l_ij*(u[j]-u_i);
704     }
705     #pragma ivdep
706 caltinay 4843 for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
707 gross 3793 const index_t j=pattern->col_couplePattern->index[iptr_ij];
708 caltinay 3795 const double l_ij=L->col_coupleBlock->val[iptr_ij];
709 gross 3793 sum+=l_ij*(remote_u[j]-u_i);
710     }
711     out[i]+=a*sum;
712 caltinay 4843 }
713 gross 3793 }
714     }
715     }
716    

Properties

Name Value
svn:mergeinfo /branches/amg_from_3530/paso/src/FCT_Solver.cpp:3531-3826 /branches/lapack2681/paso/src/FCT_Solver.cpp:2682-2741 /branches/pasowrap/paso/src/FCT_Solver.cpp:3661-3674 /branches/py3_attempt2/paso/src/FCT_Solver.cpp:3871-3891 /branches/restext/paso/src/FCT_Solver.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/FCT_Solver.cpp:3669-3791 /branches/stage3.0/paso/src/FCT_Solver.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/FCT_Solver.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/FCT_Solver.cpp:3517-3974 /release/3.0/paso/src/FCT_Solver.cpp:2591-2601 /trunk/paso/src/FCT_Solver.cpp:4257-4344 /trunk/ripley/test/python/paso/src/FCT_Solver.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26