/[escript]/trunk/paso/src/FCT_Solver.cpp
ViewVC logotype

Annotation of /trunk/paso/src/FCT_Solver.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4818 - (hide annotations)
Mon Mar 31 00:16:20 2014 UTC (6 years, 2 months ago) by caltinay
File size: 30755 byte(s)
SystemMatrixPattern shptr

1 gross 3793
2 jfenwick 3981 /*****************************************************************************
3 gross 3793 *
4 jfenwick 4657 * Copyright (c) 2003-2014 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 gross 3793 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 gross 3793
17    
18 jfenwick 3981 /************************************************************************************/
19 gross 3793
20     /* Paso: Transport solver with flux correction (L is row sum zero)
21     *
22     * - Mv_t=Lv v(0)=u
23     *
24     * to return v(dt)
25     *
26     */
27 jfenwick 3981 /************************************************************************************/
28 gross 3793
29     /* Author: l.gross@uq.edu.au */
30    
31 jfenwick 3981 /************************************************************************************/
32 gross 3793
33     #include "FCT_Solver.h"
34     #include "Preconditioner.h"
35     #include "PasoUtil.h"
36    
37    
38     Paso_FCT_Solver* Paso_FCT_Solver_alloc(Paso_TransportProblem *fctp, Paso_Options* options)
39     {
40     Paso_FCT_Solver* out=NULL;
41     const dim_t blockSize=Paso_TransportProblem_getBlockSize(fctp);
42     const dim_t n = Paso_TransportProblem_getTotalNumRows(fctp);
43    
44 jfenwick 4325 out=new Paso_FCT_Solver;
45 gross 3793 out->transportproblem = Paso_TransportProblem_getReference(fctp);
46     out->mpi_info = Esys_MPIInfo_getReference(fctp->mpi_info);
47     out->flux_limiter = Paso_FCT_FluxLimiter_alloc(fctp);
48 jfenwick 4325 out->b = new double[n];
49 gross 3793 if ( (options->ode_solver == PASO_CRANK_NICOLSON) || (options->ode_solver == PASO_BACKWARD_EULER) ) {
50 jfenwick 4325 out->du = new double[n];
51     out->z = new double[n];
52 gross 3793 } else {
53     out->du = NULL;
54     out->z=NULL;
55     }
56 caltinay 4817 out->u_coupler.reset(new paso::Coupler(Paso_TransportProblem_borrowConnector(fctp), blockSize));
57     out->u_old_coupler.reset(new paso::Coupler(Paso_TransportProblem_borrowConnector(fctp), blockSize));
58     out->omega=0;
59 gross 3793
60     if ( options->ode_solver == PASO_LINEAR_CRANK_NICOLSON ) {
61     out->method = PASO_LINEAR_CRANK_NICOLSON;
62     } else if ( options->ode_solver == PASO_CRANK_NICOLSON ) {
63     out->method = PASO_CRANK_NICOLSON;
64     } else if ( options->ode_solver == PASO_BACKWARD_EULER ) {
65     out->method = PASO_BACKWARD_EULER;
66     } else {
67     Esys_setError(VALUE_ERROR, "Paso_FCT_Solver_alloc: unknown integration scheme.");
68     out->method = UNKNOWN;
69     }
70    
71     if (Esys_noError()) {
72     return out;
73     } else {
74     Paso_FCT_Solver_free(out);
75     return NULL;
76     }
77    
78     }
79    
80     void Paso_FCT_Solver_free(Paso_FCT_Solver *in)
81     {
82     if (in != NULL) {
83 caltinay 4817 Paso_TransportProblem_free(in->transportproblem);
84     Paso_FCT_FluxLimiter_free(in->flux_limiter);
85     Esys_MPIInfo_free(in->mpi_info);
86     delete[] in->b;
87     delete[] in->z;
88     delete[] in->du;
89     delete in;
90 gross 3793 }
91     }
92    
93     double Paso_FCT_Solver_getSafeTimeStepSize(Paso_TransportProblem* fctp)
94     {
95     dim_t i, n;
96 gross 3823 double dt_max_loc=LARGE_POSITIVE_FLOAT;
97 gross 3793 double dt_max=LARGE_POSITIVE_FLOAT;
98     n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
99     /* set low order transport operator */
100     Paso_FCT_setLowOrderOperator(fctp);
101    
102     if (Esys_noError()) {
103     /*
104     * calculate time step size:
105     */
106     dt_max=LARGE_POSITIVE_FLOAT;
107 gross 3823 #pragma omp parallel private(i, dt_max_loc)
108 gross 3793 {
109 gross 3824 dt_max_loc=LARGE_POSITIVE_FLOAT;
110 gross 3793 #pragma omp for schedule(static)
111     for (i=0;i<n;++i) {
112 caltinay 3795 const double l_ii=fctp->main_diagonal_low_order_transport_matrix[i];
113     const double m_i=fctp->lumped_mass_matrix[i];
114 gross 3822 if ( m_i > 0 ) {
115 gross 3793 if (l_ii<0) dt_max_loc=MIN(dt_max_loc,m_i/(-l_ii));
116 gross 3822 }
117     }
118 gross 3793 #pragma omp critical
119     {
120     dt_max=MIN(dt_max,dt_max_loc);
121     }
122     }
123     #ifdef ESYS_MPI
124     {
125 gross 3822 dt_max_loc=dt_max;
126     MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
127 gross 3793 }
128     #endif
129 gross 3822 if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=2.;
130 gross 3793 }
131     return dt_max;
132     }
133    
134     /* modifies the main diagonal of the iteration matrix to introduce new dt */
135     void Paso_FCT_Solver_initialize(const double dt, Paso_FCT_Solver *fct_solver, Paso_Options* options, Paso_Performance* pp)
136     {
137     Paso_TransportProblem* fctp = fct_solver->transportproblem;
138     const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fctp);
139     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
140     const double theta = Paso_FCT_Solver_getTheta(fct_solver);
141     const double omega=1./(dt* theta);
142     dim_t i;
143     Paso_Options options2;
144    
145    
146    
147     Paso_solve_free(fctp->iteration_matrix);
148     /*
149     * fctp->iteration_matrix[i,i]=m[i]/(dt theta) -l[i,i]
150     *
151     */
152     fct_solver->omega=omega;
153     fct_solver->dt = dt;
154     #pragma omp parallel for private(i)
155     for (i = 0; i < n; ++i) {
156 gross 3822 const double m_i=fctp->lumped_mass_matrix[i];
157 caltinay 3795 const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
158 gross 3822 if ( m_i > 0 ) {
159     fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m_i * omega - l_ii;
160     } else {
161     fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = ABS(m_i * omega - l_ii)/(EPSILON*EPSILON);
162     }
163 gross 3793 }
164    
165     /* allocate preconditioner/solver */
166     Paso_Options_setDefaults(&options2);
167     options2.verbose = options->verbose;
168     if (fct_solver->method == PASO_LINEAR_CRANK_NICOLSON ) {
169     options2.preconditioner = PASO_GS;
170     } else {
171     options2.preconditioner = PASO_JACOBI;
172     /* options2.preconditioner = PASO_GS; */
173     }
174     options2.use_local_preconditioner = FALSE;
175 gross 3807 options2.sweeps=-1;
176 gross 3793
177     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
178     Paso_SystemMatrix_setPreconditioner(fctp->iteration_matrix, &options2);
179     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
180     }
181    
182 jfenwick 4345 /* entry point for update procedures */
183 gross 3793 err_t Paso_FCT_Solver_update(Paso_FCT_Solver *fct_solver, double* u, double *u_old, Paso_Options* options, Paso_Performance *pp)
184     {
185     const index_t method=fct_solver->method;
186     err_t err_out = SOLVER_NO_ERROR;
187    
188    
189     if (method == PASO_LINEAR_CRANK_NICOLSON) {
190     err_out=Paso_FCT_Solver_update_LCN(fct_solver, u, u_old, options, pp);
191    
192     } else if (method == PASO_CRANK_NICOLSON) {
193     err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);
194    
195     } else if (method == PASO_BACKWARD_EULER) {
196     err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);
197     } else {
198     err_out = SOLVER_INPUT_ERROR;
199     }
200     return err_out;
201    
202     }
203    
204     /* linear crank-nicolson update */
205     err_t Paso_FCT_Solver_update_LCN(Paso_FCT_Solver *fct_solver, double * u, double *u_old, Paso_Options* options, Paso_Performance *pp)
206     {
207     double const dt = fct_solver->dt;
208 gross 3823 dim_t sweep_max, i;
209 gross 3793 double *b = fct_solver->b;
210     double const RTOL = options->tolerance;
211     const dim_t n=Paso_TransportProblem_getTotalNumRows(fct_solver->transportproblem);
212     Paso_SystemMatrix * iteration_matrix = fct_solver->transportproblem->iteration_matrix;
213 gross 3823 const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fct_solver->transportproblem);
214 gross 3793 err_t errorCode = SOLVER_NO_ERROR;
215 gross 3823 double norm_u_tilde;
216 gross 3793
217 caltinay 4817 fct_solver->u_old_coupler->startCollect(u_old);
218     fct_solver->u_old_coupler->finishCollect();
219 gross 3793
220 gross 3822 /* b[i]=m*u_tilde[i] = m u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i])
221     = u_tilde[i] = u_old[i] where constraint m<0.
222 gross 3793 * note that iteration_matrix stores the negative values of the
223     * low order transport matrix l. Therefore a=-dt*0.5 is used. */
224    
225     Paso_FCT_Solver_setMuPaLu(b, fct_solver->transportproblem->lumped_mass_matrix,
226 gross 3807 fct_solver->u_old_coupler, -dt*0.5, iteration_matrix);
227 gross 3793 /* solve for u_tilde : u_tilda = m^{-1} * b */
228     Paso_FCT_FluxLimiter_setU_tilda(fct_solver->flux_limiter, b);
229     /* u_tilda_connector is completed */
230 gross 3807
231 gross 3793 /* calculate anti-diffusive fluxes for u_tilda */
232     Paso_FCT_setAntiDiffusionFlux_linearCN(fct_solver->flux_limiter->antidiffusive_fluxes,
233     fct_solver->transportproblem, dt,
234     fct_solver->flux_limiter->u_tilde_coupler,
235     fct_solver->u_old_coupler);
236 gross 3807
237    
238 gross 3793 /* b_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
239     Paso_FCT_FluxLimiter_addLimitedFluxes_Start(fct_solver->flux_limiter);
240     Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(fct_solver->flux_limiter, b);
241    
242 gross 3823 Paso_Scale(n, b,fct_solver->omega );
243 gross 3807 /* solve (m-dt/2*L) u = b in the form (omega*m-L) u = b * omega with omega*dt/2=1 */
244 gross 3823 #pragma omp for private(i) schedule(static)
245     for (i = 0; i < n; ++i) {
246     if (! (fct_solver->transportproblem->lumped_mass_matrix[i] > 0 ) ) {
247     b[i] = fct_solver->flux_limiter->u_tilde[i]
248     * fct_solver->transportproblem->iteration_matrix->mainBlock->val[main_iptr[i]];
249     }
250     }
251 gross 3793 /* initial guess is u<- -u + 2*u_tilde */
252     Paso_Update(n, -1., u, 2., fct_solver->flux_limiter->u_tilde);
253 gross 3823
254 gross 3793 sweep_max = MAX((int) (- 2 * log(RTOL)/log(2.)-0.5),1);
255 gross 3823 norm_u_tilde=Paso_lsup(n, fct_solver->flux_limiter->u_tilde, fct_solver->flux_limiter->mpi_info);
256 gross 3807 if (options->verbose) {
257 gross 3823 printf("Paso_FCT_Solver_update_LCN: u_tilda lsup = %e (rtol = %e, max. sweeps = %d)\n",norm_u_tilde,RTOL * norm_u_tilde ,sweep_max);
258 gross 3807 }
259 gross 3793 errorCode = Paso_Preconditioner_Smoother_solve_byTolerance( iteration_matrix, ((Paso_Preconditioner*) (iteration_matrix->solver_p))->gs,
260     u, b, RTOL, &sweep_max, TRUE);
261     if (errorCode == PRECONDITIONER_NO_ERROR) {
262     if (options->verbose) printf("Paso_FCT_Solver_update_LCN: convergence after %d Gauss-Seidel steps.\n",sweep_max);
263     errorCode=SOLVER_NO_ERROR;
264     } else {
265     if (options->verbose) printf("Paso_FCT_Solver_update_LCN: Gauss-Seidel failed within %d stesp (rel. tolerance %e).\n",sweep_max,RTOL);
266     errorCode= SOLVER_MAXITER_REACHED;
267     }
268     return errorCode;
269    
270     }
271    
272     err_t Paso_FCT_Solver_updateNL(Paso_FCT_Solver *fct_solver, double* u, double *u_old, Paso_Options* options, Paso_Performance *pp)
273     {
274 gross 3794 const dim_t num_critical_rates_max=3; /* number of rates >=critical_rate accepted before divergence is triggered */
275     const double critical_rate=0.95; /* expected value of convergence rate */
276    
277 gross 3793 double *b = fct_solver->b;
278     double *z = fct_solver->z;
279     double *du = fct_solver->du;
280     double const dt = fct_solver->dt;
281     Paso_TransportProblem* fctp = fct_solver->transportproblem;
282     Paso_FCT_FluxLimiter* flux_limiter = fct_solver->flux_limiter;
283     dim_t i;
284 gross 3794 double norm_u_tilde, ATOL, norm_du=LARGE_POSITIVE_FLOAT, norm_du_old, rate=1.;
285 gross 3793 err_t errorCode=SOLVER_NO_ERROR;
286     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
287     const double atol=options->absolute_tolerance;
288     const double rtol=options->tolerance;
289     const dim_t max_m=options->iter_max;
290     dim_t m=0, num_critical_rates=0 ;
291     /* ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// */
292    
293 jfenwick 4521 bool converged=FALSE, max_m_reached=FALSE,diverged=FALSE;
294 gross 3793 options->num_iter=0;
295    
296 caltinay 4817 fct_solver->u_old_coupler->startCollect(u_old);
297     fct_solver->u_old_coupler->finishCollect();
298 gross 3793 /* prepare u_tilda and flux limiter */
299     if ( fct_solver->method == PASO_BACKWARD_EULER ) {
300     /* b[i]=m_i* u_old[i] */
301     #pragma omp for private(i) schedule(static)
302     for (i = 0; i < n; ++i) {
303 gross 3822 if (fctp->lumped_mass_matrix[i] > 0 ) {
304     b[i]=u_old[i]* fctp->lumped_mass_matrix[i];
305     } else {
306     b[i]=u_old[i];
307     }
308 gross 3793 }
309     } else {
310 gross 3822 /* b[i]=m_i* u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i]) = m_i * u_tilde_i where m_i>0
311     * = u_old[i] otherwise
312 gross 3793 * note that iteration_matrix stores the negative values of the
313     * low order transport matrix l. Therefore a=-dt*0.5 is used. */
314     Paso_FCT_Solver_setMuPaLu(b,fctp->lumped_mass_matrix,fct_solver->u_old_coupler,-dt*0.5,fctp->iteration_matrix);
315     }
316     Paso_FCT_FluxLimiter_setU_tilda(flux_limiter, b); /* u_tilda = m^{-1} b */
317     /* u_tilda_connector is completed */
318 jfenwick 3981 /********************************************************************************************************************************************/
319 jfenwick 4345 /* calculate stopping criterion */
320 gross 3793 norm_u_tilde=Paso_lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
321     ATOL= rtol * norm_u_tilde + atol ;
322     if (options->verbose) printf("Paso_FCT_Solver_updateNL: iteration starts u_tilda lsup = %e (abs. tol = %e)\n",norm_u_tilde,ATOL);
323     /* ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// */
324    
325     /* u_old is an initial guess for u*/
326     Paso_Copy(n,u,u_old);
327    
328     while ( (!converged) && (!diverged) && (! max_m_reached) && Esys_noError()) {
329 caltinay 4817 fct_solver->u_coupler->startCollect(u);
330     fct_solver->u_coupler->finishCollect();
331 gross 3793
332     /* set antidiffusive_flux_{ij} for u */
333     if (fct_solver->method == PASO_BACKWARD_EULER) {
334     Paso_FCT_setAntiDiffusionFlux_BE(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);
335     } else {
336     Paso_FCT_setAntiDiffusionFlux_CN(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);
337     }
338     /* start the calculation of the limitation factors_{fct_solver->ij} */
339     Paso_FCT_FluxLimiter_addLimitedFluxes_Start(flux_limiter); /* uses u_tilde */
340    
341     /*
342 gross 3822 * z_m[i]=b[i] - (m_i*u[i] - omega*sum_{j<>i} l_{ij} (u[j]-u[i]) ) where m_i>0
343     * ==b[i] - u[i] = u_tilda[i]-u[i] =0 otherwise
344     *
345     * omega = dt/2 or dt .
346 gross 3793 *
347     * note that iteration_matrix stores the negative values of the
348     * low order transport matrix l. Therefore a=dt*theta is used.
349     */
350     if (fct_solver-> method == PASO_BACKWARD_EULER) {
351     Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt, fctp->iteration_matrix);
352     } else {
353     Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt/2, fctp->iteration_matrix);
354     }
355    
356    
357     Paso_Update(n,-1.,z,1.,b); /* z=b-z */
358    
359     /* z_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
360     Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(flux_limiter, z);
361    
362     /* we solve (m/omega - L ) * du = z */
363     if (fct_solver->method == PASO_BACKWARD_EULER) {
364     dim_t cntIter = options->iter_max;
365     double tol= Paso_l2(n, z, fctp->mpi_info) ;
366    
367     if ( m ==0) {
368     tol *=0.5;
369     } else {
370     tol *= MIN(MAX(rate*rate, 1e-2), 0.5);
371     }
372     /* use BiCGSTab with jacobi preconditioner ( m - omega * L ) */
373     Paso_zeroes(n,du);
374     errorCode = Paso_Solver_BiCGStab(fctp->iteration_matrix, z, du, &cntIter, &tol, pp);
375    
376     /* errorCode = Paso_Solver_GMRES(fctp->iteration_matrix, z, du, &cntIter, &tol, 10, 2000, pp); */
377     if (options->verbose) printf("Paso_FCT_Solver_updateNL: BiCGStab is completed after %d steps (residual =%e).\n",cntIter, tol);
378     options->num_iter+=cntIter;
379     if ( errorCode != SOLVER_NO_ERROR) break;
380     } else {
381     /* just use the main diagonal of (m/omega - L ) */
382    
383     Paso_Preconditioner_Smoother_solve(fctp->iteration_matrix, ((Paso_Preconditioner*) (fctp->iteration_matrix->solver_p))->jacobi,
384     du, z, 1, FALSE);
385 gross 3825
386 gross 3793 options->num_iter++;
387     }
388 gross 3823
389 gross 3793
390     Paso_Update(n,1.,u,fct_solver->omega,du);
391     norm_du_old=norm_du;
392     norm_du=Paso_lsup(n,du, fctp->mpi_info);
393     if (m ==0) {
394     if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e\n",m+1, norm_du * fct_solver->omega);
395     } else {
396     if (norm_du_old > 0.) {
397     rate=norm_du/norm_du_old;
398     } else if (norm_du <= 0.) {
399     rate=0.;
400     } else {
401     rate=LARGE_POSITIVE_FLOAT;
402     }
403     if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e (rate = %e)\n",m+1, norm_du * fct_solver->omega, rate);
404     num_critical_rates+=( rate<critical_rate ? 0 : 1);
405     max_m_reached=(m>max_m);
406     diverged = (num_critical_rates >= num_critical_rates_max);
407     converged=(norm_du * fct_solver->omega <= ATOL) ;
408     }
409     m++;
410     } /* end of while loop */
411     if (errorCode == SOLVER_NO_ERROR) {
412     if (converged) {
413     if (options->verbose) printf("Paso_FCT_Solver_updateNL: iteration is completed.\n");
414     errorCode=SOLVER_NO_ERROR;
415     } else if (diverged) {
416     if (options->verbose) printf("Paso_FCT_Solver_updateNL: divergence.\n");
417     errorCode=SOLVER_DIVERGENCE;
418     } else if (max_m_reached) {
419     if (options->verbose) printf("Paso_FCT_Solver_updateNL: maximum number of iteration steps reached.\n");
420     errorCode=SOLVER_MAXITER_REACHED;
421     }
422    
423     }
424     return errorCode;
425     }
426    
427    
428     /*
429     * AntiDiffusionFlux:
430     *
431     * f_{ij} = (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])
432     *
433     * m=fc->mass matrix
434 jfenwick 4345 * d=artificial diffusion matrix = L - K = - fc->iteration matrix - fc->transport matrix (away from main diagonal)
435 gross 3793 *
436     * for CN : theta =0.5
437     * for BE : theta = 1.
438     */
439    
440     void Paso_FCT_setAntiDiffusionFlux_CN(Paso_SystemMatrix *flux_matrix,
441     const Paso_TransportProblem* fct,
442 caltinay 4817 const double dt,
443     paso::const_Coupler_ptr u_coupler,
444     paso::const_Coupler_ptr u_old_coupler)
445 gross 3793 {
446     dim_t i;
447     index_t iptr_ij;
448    
449 caltinay 4817 const double *u = u_coupler->borrowLocalData();
450     const double *u_old= u_old_coupler->borrowLocalData();
451     const double *remote_u=u_coupler->borrowRemoteData();
452     const double *remote_u_old=u_old_coupler->borrowRemoteData();
453 gross 3793 const double dt_half= dt/2;
454 caltinay 4818 paso::const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
455 gross 3793 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fct->iteration_matrix);
456    
457     #pragma omp parallel for schedule(static) private(i, iptr_ij)
458     for (i = 0; i < n; ++i) {
459 caltinay 3795 const double u_i = u[i];
460     const double u_old_i = u_old[i];
461 gross 3793
462     #pragma ivdep
463     for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
464 caltinay 3795 const index_t j = pattern->mainPattern->index[iptr_ij];
465     const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
466     const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
467     const double u_old_j = u_old[j];
468     const double u_j = u[j];
469 gross 3793
470     /* (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i]) */
471     flux_matrix->mainBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i) - (m_ij-dt_half*d_ij)*(u_j-u_i);
472    
473     }
474     #pragma ivdep
475     for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
476 caltinay 3795 const index_t j = pattern->col_couplePattern->index[iptr_ij];
477     const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
478     const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
479     const double u_old_j = remote_u_old[j];
480     const double u_j = remote_u[j];
481 gross 3793 flux_matrix->col_coupleBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i)- (m_ij-dt_half*d_ij)*(u_j-u_i);
482     }
483     }
484     }
485    
486     void Paso_FCT_setAntiDiffusionFlux_BE(Paso_SystemMatrix *flux_matrix,
487     const Paso_TransportProblem* fct,
488 caltinay 4817 const double dt,
489     paso::const_Coupler_ptr u_coupler,
490     paso::const_Coupler_ptr u_old_coupler)
491 gross 3793 {
492     dim_t i;
493     index_t iptr_ij;
494    
495 caltinay 4817 const double *u = u_coupler->borrowLocalData();
496     const double *u_old = u_old_coupler->borrowLocalData();
497     const double *remote_u = u_coupler->borrowRemoteData();
498     const double *remote_u_old = u_old_coupler->borrowRemoteData();
499 caltinay 4818 paso::const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
500 gross 3793 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fct->iteration_matrix);
501    
502     #pragma omp parallel for schedule(static) private(i, iptr_ij)
503     for (i = 0; i < n; ++i) {
504 caltinay 3795 const double u_i = u[i];
505     const double u_old_i = u_old[i];
506 gross 3793 #pragma ivdep
507     for (iptr_ij=pattern->mainPattern->ptr[i]; iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
508    
509 caltinay 3795 const index_t j = pattern->mainPattern->index[iptr_ij];
510     const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
511     const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
512     const double u_old_j = u_old[j];
513     const double u_j = u[j];
514 gross 3793
515     flux_matrix->mainBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
516     }
517     #pragma ivdep
518     for (iptr_ij=pattern->col_couplePattern->ptr[i]; iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
519 caltinay 3795 const index_t j = pattern->col_couplePattern->index[iptr_ij];
520     const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
521     const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
522     const double u_old_j = remote_u_old[j];
523     const double u_j = remote_u[j];
524 gross 3793
525     flux_matrix->col_coupleBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
526     }
527     }
528     }
529    
530     /* special version of the ant-diffusive fluxes for the linear Crank-Nicolson scheme
531     * in fact this is evaluated for u = 2*u_tilde - u_old which is the predictor
532 jfenwick 4345 * of the solution of the the stabilised problem at time dt using the forward Euler scheme
533 gross 3793 *
534     * f_{ij} = (m_{ij} - dt/2 d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) (u[j]-u[i])
535     * = (m_{ij} - dt/2 d_{ij}) * (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) * ( 2*(u_tilde[j]-u_tilde[i]) - (u_old[j] -u_old [i]) )
536     * = 2* m_{ij} * ( (u_old[j]-u_tilde[j] - (u_old[i]) - u_tilde[i]) ) - dt d_{ij} * (u_tilde[j]-u_tilde[i])
537     *
538     */
539    
540     void Paso_FCT_setAntiDiffusionFlux_linearCN(Paso_SystemMatrix *flux_matrix,
541 caltinay 4817 const Paso_TransportProblem* fct, const double dt,
542     paso::const_Coupler_ptr u_tilde_coupler,
543     paso::const_Coupler_ptr u_old_coupler)
544 gross 3793 {
545     dim_t i;
546     index_t iptr_ij;
547    
548 caltinay 4817 const double *u_tilde = u_tilde_coupler->borrowLocalData();
549     const double *u_old = u_old_coupler->borrowLocalData();
550     const double *remote_u_tilde = u_tilde_coupler->borrowRemoteData();
551     const double *remote_u_old = u_old_coupler->borrowRemoteData();
552 caltinay 4818 paso::const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
553 gross 3793 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fct->iteration_matrix);
554    
555     #pragma omp parallel for schedule(static) private(i, iptr_ij)
556     for (i = 0; i < n; ++i) {
557 caltinay 3795 const double u_tilde_i = u_tilde[i];
558     const double u_old_i = u_old[i];
559 gross 3807 const double du_i = u_tilde_i - u_old_i;
560 gross 3793 #pragma ivdep
561     for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
562    
563 caltinay 3795 const index_t j = pattern->mainPattern->index[iptr_ij];
564     const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
565     const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
566     const double u_tilde_j = u_tilde[j];
567     const double u_old_j = u_old[j];
568 gross 3807 const double du_j = u_tilde_j - u_old_j;
569 gross 3793
570 gross 3807 flux_matrix->mainBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij * ( u_tilde_i - u_tilde_j);
571 gross 3793 }
572     #pragma ivdep
573     for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
574    
575 caltinay 3795 const index_t j = pattern->col_couplePattern->index[iptr_ij];
576     const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
577     const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];/* this is in fact -d_ij */
578     const double u_tilde_j = remote_u_tilde[j];
579     const double u_old_j = remote_u_old[j];
580 gross 3807 const double du_j = u_tilde_j - u_old_j;
581 gross 3793
582 gross 3807 flux_matrix->col_coupleBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij * ( u_tilde_i - u_tilde_j);
583 gross 3793
584     }
585     }
586    
587     }
588    
589 jfenwick 3981 /************************************************************************************/
590 gross 3793
591     /* Creates the low order transport matrix and stores its negative values
592     * into the iteration_matrix except for the main diagonal which is stored
593     * separately.
594     * If fc->iteration_matrix==NULL, fc->iteration_matrix is allocated
595     *
596     * a=transport_matrix
597     * b= low_order_transport_matrix = - iteration_matrix
598     * c=main diagonal low_order_transport_matrix
599 jfenwick 4345 * initialise c[i] mit a[i,i]
600 gross 3793 *
601     * d_ij=max(0,-a[i,j],-a[j,i])
602     * b[i,j]=-(a[i,j]+d_ij)
603     * c[i]-=d_ij
604     */
605    
606     void Paso_FCT_setLowOrderOperator(Paso_TransportProblem * fc) {
607    
608     dim_t i;
609     index_t iptr_ij, iptr_ji;
610     const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fc);
611    
612     if (fc->iteration_matrix==NULL) {
613     fc->iteration_matrix=Paso_SystemMatrix_alloc(fc->transport_matrix->type,
614     fc->transport_matrix->pattern,
615     fc->transport_matrix->row_block_size,
616     fc->transport_matrix->col_block_size, TRUE);
617     }
618    
619     if (Esys_noError()) {
620 caltinay 4818 paso::const_SystemMatrixPattern_ptr pattern(fc->iteration_matrix->pattern);
621 gross 3793 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fc->iteration_matrix);
622     #pragma omp parallel for private(i, iptr_ij, iptr_ji) schedule(static)
623     for (i = 0; i < n; ++i) {
624     double sum=fc->transport_matrix->mainBlock->val[main_iptr[i]];
625    
626     /* printf("sum[%d] = %e -> ", i, sum); */
627     /* look at a[i,j] */
628     for (iptr_ij=pattern->mainPattern->ptr[i];iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
629 caltinay 3795 const index_t j = pattern->mainPattern->index[iptr_ij];
630     const double rtmp1 = fc->transport_matrix->mainBlock->val[iptr_ij];
631 gross 3793 if (j!=i) {
632     /* find entry a[j,i] */
633     #pragma ivdep
634     for (iptr_ji=pattern->mainPattern->ptr[j]; iptr_ji<pattern->mainPattern->ptr[j+1]; ++iptr_ji) {
635    
636     if ( pattern->mainPattern->index[iptr_ji] == i) {
637 caltinay 3795 const double rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji];
638 gross 3793 /*
639     printf("a[%d,%d]=%e\n",i,j,rtmp1);
640     printf("a[%d,%d]=%e\n",j,i,rtmp2);
641     */
642    
643 caltinay 3795 const double d_ij=-MIN3(0.,rtmp1,rtmp2);
644 gross 3793 fc->iteration_matrix->mainBlock->val[iptr_ij]=-(rtmp1+d_ij);
645     /* printf("l[%d,%d]=%e\n",i,j,fc->iteration_matrix->mainBlock->val[iptr_ij]); */
646     sum-=d_ij;
647     break;
648     }
649     }
650     }
651     }
652     for (iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
653 caltinay 3795 const index_t j = pattern->col_couplePattern->index[iptr_ij];
654     const double rtmp1 = fc->transport_matrix->col_coupleBlock->val[iptr_ij];
655 gross 3793 /* find entry a[j,i] */
656     #pragma ivdep
657     for (iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_ji<pattern->row_couplePattern->ptr[j+1]; ++iptr_ji) {
658     if (pattern->row_couplePattern->index[iptr_ji]==i) {
659 caltinay 3795 const double rtmp2=fc->transport_matrix->row_coupleBlock->val[iptr_ji];
660     const double d_ij=-MIN3(0.,rtmp1,rtmp2);
661 gross 3793 fc->iteration_matrix->col_coupleBlock->val[iptr_ij]=-(rtmp1+d_ij);
662     fc->iteration_matrix->row_coupleBlock->val[iptr_ji]=-(rtmp2+d_ij);
663     sum-=d_ij;
664     break;
665     }
666     }
667     }
668     /* set main diagonal entry */
669     fc->main_diagonal_low_order_transport_matrix[i]=sum;
670     /* printf("%e \n", sum); */
671     }
672    
673     }
674     }
675    
676     /*
677 gross 3822 * out_i=m_i u_i + a * \sum_{j <> i} l_{ij} (u_j-u_i) where m_i>0
678     * = u_i where m_i<=0
679 gross 3793 *
680     */
681     void Paso_FCT_Solver_setMuPaLu(double* out,
682 caltinay 4817 const double* M,
683     paso::const_Coupler_ptr u_coupler,
684     const double a,
685     const Paso_SystemMatrix *L)
686 gross 3793 {
687     dim_t i;
688 caltinay 4818 paso::const_SystemMatrixPattern_ptr pattern(L->pattern);
689 caltinay 4817 const double *u = u_coupler->borrowLocalData();
690     const double *remote_u = u_coupler->borrowRemoteData();
691 caltinay 3795 index_t iptr_ij;
692 gross 3793 const dim_t n=Paso_SystemMatrix_getTotalNumRows(L);
693    
694     #pragma omp parallel for private(i) schedule(static)
695     for (i = 0; i < n; ++i) {
696 gross 3822 if ( M[i] > 0.) {
697     out[i]=M[i]*u[i];
698     } else {
699     out[i]=u[i];
700     }
701 gross 3793 }
702     if (ABS(a)>0) {
703     #pragma omp parallel for schedule(static) private(i, iptr_ij)
704     for (i = 0; i < n; ++i) {
705 gross 3823 if ( M[i] > 0.) {
706 caltinay 3795 double sum=0;
707     const double u_i=u[i];
708 gross 3793 #pragma ivdep
709     for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
710     const index_t j=pattern->mainPattern->index[iptr_ij];
711 caltinay 3795 const double l_ij=L->mainBlock->val[iptr_ij];
712 gross 3793 sum+=l_ij*(u[j]-u_i);
713    
714     }
715     #pragma ivdep
716     for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
717     const index_t j=pattern->col_couplePattern->index[iptr_ij];
718 caltinay 3795 const double l_ij=L->col_coupleBlock->val[iptr_ij];
719 gross 3793 sum+=l_ij*(remote_u[j]-u_i);
720     }
721     out[i]+=a*sum;
722 gross 3823 }
723 gross 3793 }
724     }
725     }
726    
727 jfenwick 3981 /* ************************************************************************************************************************************************* */
728 gross 3793
729    
730    
731    
732    
733    
734    

Properties

Name Value
svn:mergeinfo /branches/amg_from_3530/paso/src/FCT_Solver.cpp:3531-3826 /branches/lapack2681/paso/src/FCT_Solver.cpp:2682-2741 /branches/pasowrap/paso/src/FCT_Solver.cpp:3661-3674 /branches/py3_attempt2/paso/src/FCT_Solver.cpp:3871-3891 /branches/restext/paso/src/FCT_Solver.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/FCT_Solver.cpp:3669-3791 /branches/stage3.0/paso/src/FCT_Solver.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/FCT_Solver.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/FCT_Solver.cpp:3517-3974 /release/3.0/paso/src/FCT_Solver.cpp:2591-2601 /trunk/paso/src/FCT_Solver.cpp:4257-4344 /trunk/ripley/test/python/paso/src/FCT_Solver.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26