/[escript]/trunk/paso/src/FCT_Solver.cpp
ViewVC logotype

Annotation of /trunk/paso/src/FCT_Solver.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4801 - (hide annotations)
Wed Mar 26 03:26:58 2014 UTC (6 years, 2 months ago) by caltinay
File size: 30935 byte(s)
paso::Coupler and paso::Connector.

1 gross 3793
2 jfenwick 3981 /*****************************************************************************
3 gross 3793 *
4 jfenwick 4657 * Copyright (c) 2003-2014 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 gross 3793 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 gross 3793
17    
18 jfenwick 3981 /************************************************************************************/
19 gross 3793
20     /* Paso: Transport solver with flux correction (L is row sum zero)
21     *
22     * - Mv_t=Lv v(0)=u
23     *
24     * to return v(dt)
25     *
26     */
27 jfenwick 3981 /************************************************************************************/
28 gross 3793
29     /* Author: l.gross@uq.edu.au */
30    
31 jfenwick 3981 /************************************************************************************/
32 gross 3793
33     #include "FCT_Solver.h"
34     #include "Preconditioner.h"
35     #include "PasoUtil.h"
36    
37    
38     Paso_FCT_Solver* Paso_FCT_Solver_alloc(Paso_TransportProblem *fctp, Paso_Options* options)
39     {
40     Paso_FCT_Solver* out=NULL;
41     const dim_t blockSize=Paso_TransportProblem_getBlockSize(fctp);
42     const dim_t n = Paso_TransportProblem_getTotalNumRows(fctp);
43    
44 jfenwick 4325 out=new Paso_FCT_Solver;
45 gross 3793 if (! Esys_checkPtr(out)) {
46     out->transportproblem = Paso_TransportProblem_getReference(fctp);
47     out->mpi_info = Esys_MPIInfo_getReference(fctp->mpi_info);
48     out->flux_limiter = Paso_FCT_FluxLimiter_alloc(fctp);
49 jfenwick 4325 out->b = new double[n];
50 gross 3793 if ( (options->ode_solver == PASO_CRANK_NICOLSON) || (options->ode_solver == PASO_BACKWARD_EULER) ) {
51 jfenwick 4325 out->du = new double[n];
52     out->z = new double[n];
53 gross 3793 } else {
54     out->du = NULL;
55     out->z=NULL;
56     }
57 caltinay 4801 out->u_coupler = paso::Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp), blockSize);
58     out->u_old_coupler = paso::Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp), blockSize);
59 gross 3793 out->omega=0;
60    
61     if ( options->ode_solver == PASO_LINEAR_CRANK_NICOLSON ) {
62     out->method = PASO_LINEAR_CRANK_NICOLSON;
63     } else if ( options->ode_solver == PASO_CRANK_NICOLSON ) {
64     out->method = PASO_CRANK_NICOLSON;
65     } else if ( options->ode_solver == PASO_BACKWARD_EULER ) {
66     out->method = PASO_BACKWARD_EULER;
67     } else {
68     Esys_setError(VALUE_ERROR, "Paso_FCT_Solver_alloc: unknown integration scheme.");
69     out->method = UNKNOWN;
70     }
71    
72     }
73    
74     if (Esys_noError()) {
75     return out;
76     } else {
77     Paso_FCT_Solver_free(out);
78     return NULL;
79     }
80    
81     }
82    
83     void Paso_FCT_Solver_free(Paso_FCT_Solver *in)
84     {
85     if (in != NULL) {
86     Paso_TransportProblem_free(in->transportproblem);
87     Paso_FCT_FluxLimiter_free(in->flux_limiter);
88     Esys_MPIInfo_free(in->mpi_info);
89 caltinay 4801 paso::Coupler_free(in->u_old_coupler);
90     paso::Coupler_free(in->u_coupler);
91 gross 3793
92 jfenwick 4325 delete[] in->b;
93     delete[] in->z;
94     delete[] in->du;
95     delete in;
96 gross 3793
97     }
98     }
99    
100     double Paso_FCT_Solver_getSafeTimeStepSize(Paso_TransportProblem* fctp)
101     {
102     dim_t i, n;
103 gross 3823 double dt_max_loc=LARGE_POSITIVE_FLOAT;
104 gross 3793 double dt_max=LARGE_POSITIVE_FLOAT;
105     n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
106     /* set low order transport operator */
107     Paso_FCT_setLowOrderOperator(fctp);
108    
109     if (Esys_noError()) {
110     /*
111     * calculate time step size:
112     */
113     dt_max=LARGE_POSITIVE_FLOAT;
114 gross 3823 #pragma omp parallel private(i, dt_max_loc)
115 gross 3793 {
116 gross 3824 dt_max_loc=LARGE_POSITIVE_FLOAT;
117 gross 3793 #pragma omp for schedule(static)
118     for (i=0;i<n;++i) {
119 caltinay 3795 const double l_ii=fctp->main_diagonal_low_order_transport_matrix[i];
120     const double m_i=fctp->lumped_mass_matrix[i];
121 gross 3822 if ( m_i > 0 ) {
122 gross 3793 if (l_ii<0) dt_max_loc=MIN(dt_max_loc,m_i/(-l_ii));
123 gross 3822 }
124     }
125 gross 3793 #pragma omp critical
126     {
127     dt_max=MIN(dt_max,dt_max_loc);
128     }
129     }
130     #ifdef ESYS_MPI
131     {
132 gross 3822 dt_max_loc=dt_max;
133     MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
134 gross 3793 }
135     #endif
136 gross 3822 if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=2.;
137 gross 3793 }
138     return dt_max;
139     }
140    
141     /* modifies the main diagonal of the iteration matrix to introduce new dt */
142     void Paso_FCT_Solver_initialize(const double dt, Paso_FCT_Solver *fct_solver, Paso_Options* options, Paso_Performance* pp)
143     {
144     Paso_TransportProblem* fctp = fct_solver->transportproblem;
145     const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fctp);
146     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
147     const double theta = Paso_FCT_Solver_getTheta(fct_solver);
148     const double omega=1./(dt* theta);
149     dim_t i;
150     Paso_Options options2;
151    
152    
153    
154     Paso_solve_free(fctp->iteration_matrix);
155     /*
156     * fctp->iteration_matrix[i,i]=m[i]/(dt theta) -l[i,i]
157     *
158     */
159     fct_solver->omega=omega;
160     fct_solver->dt = dt;
161     #pragma omp parallel for private(i)
162     for (i = 0; i < n; ++i) {
163 gross 3822 const double m_i=fctp->lumped_mass_matrix[i];
164 caltinay 3795 const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
165 gross 3822 if ( m_i > 0 ) {
166     fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m_i * omega - l_ii;
167     } else {
168     fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = ABS(m_i * omega - l_ii)/(EPSILON*EPSILON);
169     }
170 gross 3793 }
171    
172     /* allocate preconditioner/solver */
173     Paso_Options_setDefaults(&options2);
174     options2.verbose = options->verbose;
175     if (fct_solver->method == PASO_LINEAR_CRANK_NICOLSON ) {
176     options2.preconditioner = PASO_GS;
177     } else {
178     options2.preconditioner = PASO_JACOBI;
179     /* options2.preconditioner = PASO_GS; */
180     }
181     options2.use_local_preconditioner = FALSE;
182 gross 3807 options2.sweeps=-1;
183 gross 3793
184     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
185     Paso_SystemMatrix_setPreconditioner(fctp->iteration_matrix, &options2);
186     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
187     }
188    
189 jfenwick 4345 /* entry point for update procedures */
190 gross 3793 err_t Paso_FCT_Solver_update(Paso_FCT_Solver *fct_solver, double* u, double *u_old, Paso_Options* options, Paso_Performance *pp)
191     {
192     const index_t method=fct_solver->method;
193     err_t err_out = SOLVER_NO_ERROR;
194    
195    
196     if (method == PASO_LINEAR_CRANK_NICOLSON) {
197     err_out=Paso_FCT_Solver_update_LCN(fct_solver, u, u_old, options, pp);
198    
199     } else if (method == PASO_CRANK_NICOLSON) {
200     err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);
201    
202     } else if (method == PASO_BACKWARD_EULER) {
203     err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);
204     } else {
205     err_out = SOLVER_INPUT_ERROR;
206     }
207     return err_out;
208    
209     }
210    
211     /* linear crank-nicolson update */
212     err_t Paso_FCT_Solver_update_LCN(Paso_FCT_Solver *fct_solver, double * u, double *u_old, Paso_Options* options, Paso_Performance *pp)
213     {
214     double const dt = fct_solver->dt;
215 gross 3823 dim_t sweep_max, i;
216 gross 3793 double *b = fct_solver->b;
217     double const RTOL = options->tolerance;
218     const dim_t n=Paso_TransportProblem_getTotalNumRows(fct_solver->transportproblem);
219     Paso_SystemMatrix * iteration_matrix = fct_solver->transportproblem->iteration_matrix;
220 gross 3823 const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fct_solver->transportproblem);
221 gross 3793 err_t errorCode = SOLVER_NO_ERROR;
222 gross 3823 double norm_u_tilde;
223 gross 3793
224 caltinay 4801 paso::Coupler_startCollect(fct_solver->u_old_coupler,u_old);
225     paso::Coupler_finishCollect(fct_solver->u_old_coupler);
226 gross 3793
227 gross 3822 /* b[i]=m*u_tilde[i] = m u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i])
228     = u_tilde[i] = u_old[i] where constraint m<0.
229 gross 3793 * note that iteration_matrix stores the negative values of the
230     * low order transport matrix l. Therefore a=-dt*0.5 is used. */
231    
232     Paso_FCT_Solver_setMuPaLu(b, fct_solver->transportproblem->lumped_mass_matrix,
233 gross 3807 fct_solver->u_old_coupler, -dt*0.5, iteration_matrix);
234 gross 3793 /* solve for u_tilde : u_tilda = m^{-1} * b */
235     Paso_FCT_FluxLimiter_setU_tilda(fct_solver->flux_limiter, b);
236     /* u_tilda_connector is completed */
237 gross 3807
238 gross 3793 /* calculate anti-diffusive fluxes for u_tilda */
239     Paso_FCT_setAntiDiffusionFlux_linearCN(fct_solver->flux_limiter->antidiffusive_fluxes,
240     fct_solver->transportproblem, dt,
241     fct_solver->flux_limiter->u_tilde_coupler,
242     fct_solver->u_old_coupler);
243 gross 3807
244    
245 gross 3793 /* b_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
246     Paso_FCT_FluxLimiter_addLimitedFluxes_Start(fct_solver->flux_limiter);
247     Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(fct_solver->flux_limiter, b);
248    
249 gross 3823 Paso_Scale(n, b,fct_solver->omega );
250 gross 3807 /* solve (m-dt/2*L) u = b in the form (omega*m-L) u = b * omega with omega*dt/2=1 */
251 gross 3823 #pragma omp for private(i) schedule(static)
252     for (i = 0; i < n; ++i) {
253     if (! (fct_solver->transportproblem->lumped_mass_matrix[i] > 0 ) ) {
254     b[i] = fct_solver->flux_limiter->u_tilde[i]
255     * fct_solver->transportproblem->iteration_matrix->mainBlock->val[main_iptr[i]];
256     }
257     }
258 gross 3793 /* initial guess is u<- -u + 2*u_tilde */
259     Paso_Update(n, -1., u, 2., fct_solver->flux_limiter->u_tilde);
260 gross 3823
261 gross 3793 sweep_max = MAX((int) (- 2 * log(RTOL)/log(2.)-0.5),1);
262 gross 3823 norm_u_tilde=Paso_lsup(n, fct_solver->flux_limiter->u_tilde, fct_solver->flux_limiter->mpi_info);
263 gross 3807 if (options->verbose) {
264 gross 3823 printf("Paso_FCT_Solver_update_LCN: u_tilda lsup = %e (rtol = %e, max. sweeps = %d)\n",norm_u_tilde,RTOL * norm_u_tilde ,sweep_max);
265 gross 3807 }
266 gross 3793 errorCode = Paso_Preconditioner_Smoother_solve_byTolerance( iteration_matrix, ((Paso_Preconditioner*) (iteration_matrix->solver_p))->gs,
267     u, b, RTOL, &sweep_max, TRUE);
268     if (errorCode == PRECONDITIONER_NO_ERROR) {
269     if (options->verbose) printf("Paso_FCT_Solver_update_LCN: convergence after %d Gauss-Seidel steps.\n",sweep_max);
270     errorCode=SOLVER_NO_ERROR;
271     } else {
272     if (options->verbose) printf("Paso_FCT_Solver_update_LCN: Gauss-Seidel failed within %d stesp (rel. tolerance %e).\n",sweep_max,RTOL);
273     errorCode= SOLVER_MAXITER_REACHED;
274     }
275     return errorCode;
276    
277     }
278    
279     err_t Paso_FCT_Solver_updateNL(Paso_FCT_Solver *fct_solver, double* u, double *u_old, Paso_Options* options, Paso_Performance *pp)
280     {
281 gross 3794 const dim_t num_critical_rates_max=3; /* number of rates >=critical_rate accepted before divergence is triggered */
282     const double critical_rate=0.95; /* expected value of convergence rate */
283    
284 gross 3793 double *b = fct_solver->b;
285     double *z = fct_solver->z;
286     double *du = fct_solver->du;
287     double const dt = fct_solver->dt;
288     Paso_TransportProblem* fctp = fct_solver->transportproblem;
289     Paso_FCT_FluxLimiter* flux_limiter = fct_solver->flux_limiter;
290     dim_t i;
291 gross 3794 double norm_u_tilde, ATOL, norm_du=LARGE_POSITIVE_FLOAT, norm_du_old, rate=1.;
292 gross 3793 err_t errorCode=SOLVER_NO_ERROR;
293     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
294     const double atol=options->absolute_tolerance;
295     const double rtol=options->tolerance;
296     const dim_t max_m=options->iter_max;
297     dim_t m=0, num_critical_rates=0 ;
298     /* ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// */
299    
300 jfenwick 4521 bool converged=FALSE, max_m_reached=FALSE,diverged=FALSE;
301 gross 3793 options->num_iter=0;
302    
303 caltinay 4801 paso::Coupler_startCollect(fct_solver->u_old_coupler,u_old);
304     paso::Coupler_finishCollect(fct_solver->u_old_coupler);
305 gross 3793 /* prepare u_tilda and flux limiter */
306     if ( fct_solver->method == PASO_BACKWARD_EULER ) {
307     /* b[i]=m_i* u_old[i] */
308     #pragma omp for private(i) schedule(static)
309     for (i = 0; i < n; ++i) {
310 gross 3822 if (fctp->lumped_mass_matrix[i] > 0 ) {
311     b[i]=u_old[i]* fctp->lumped_mass_matrix[i];
312     } else {
313     b[i]=u_old[i];
314     }
315 gross 3793 }
316     } else {
317 gross 3822 /* b[i]=m_i* u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i]) = m_i * u_tilde_i where m_i>0
318     * = u_old[i] otherwise
319 gross 3793 * note that iteration_matrix stores the negative values of the
320     * low order transport matrix l. Therefore a=-dt*0.5 is used. */
321     Paso_FCT_Solver_setMuPaLu(b,fctp->lumped_mass_matrix,fct_solver->u_old_coupler,-dt*0.5,fctp->iteration_matrix);
322     }
323     Paso_FCT_FluxLimiter_setU_tilda(flux_limiter, b); /* u_tilda = m^{-1} b */
324     /* u_tilda_connector is completed */
325 jfenwick 3981 /********************************************************************************************************************************************/
326 jfenwick 4345 /* calculate stopping criterion */
327 gross 3793 norm_u_tilde=Paso_lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
328     ATOL= rtol * norm_u_tilde + atol ;
329     if (options->verbose) printf("Paso_FCT_Solver_updateNL: iteration starts u_tilda lsup = %e (abs. tol = %e)\n",norm_u_tilde,ATOL);
330     /* ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// */
331    
332     /* u_old is an initial guess for u*/
333     Paso_Copy(n,u,u_old);
334    
335     while ( (!converged) && (!diverged) && (! max_m_reached) && Esys_noError()) {
336 caltinay 4801 paso::Coupler_startCollect(fct_solver->u_coupler,u);
337     paso::Coupler_finishCollect(fct_solver->u_coupler);
338 gross 3793
339     /* set antidiffusive_flux_{ij} for u */
340     if (fct_solver->method == PASO_BACKWARD_EULER) {
341     Paso_FCT_setAntiDiffusionFlux_BE(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);
342     } else {
343     Paso_FCT_setAntiDiffusionFlux_CN(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);
344     }
345     /* start the calculation of the limitation factors_{fct_solver->ij} */
346     Paso_FCT_FluxLimiter_addLimitedFluxes_Start(flux_limiter); /* uses u_tilde */
347    
348     /*
349 gross 3822 * z_m[i]=b[i] - (m_i*u[i] - omega*sum_{j<>i} l_{ij} (u[j]-u[i]) ) where m_i>0
350     * ==b[i] - u[i] = u_tilda[i]-u[i] =0 otherwise
351     *
352     * omega = dt/2 or dt .
353 gross 3793 *
354     * note that iteration_matrix stores the negative values of the
355     * low order transport matrix l. Therefore a=dt*theta is used.
356     */
357     if (fct_solver-> method == PASO_BACKWARD_EULER) {
358     Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt, fctp->iteration_matrix);
359     } else {
360     Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt/2, fctp->iteration_matrix);
361     }
362    
363    
364     Paso_Update(n,-1.,z,1.,b); /* z=b-z */
365    
366     /* z_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
367     Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(flux_limiter, z);
368    
369     /* we solve (m/omega - L ) * du = z */
370     if (fct_solver->method == PASO_BACKWARD_EULER) {
371     dim_t cntIter = options->iter_max;
372     double tol= Paso_l2(n, z, fctp->mpi_info) ;
373    
374     if ( m ==0) {
375     tol *=0.5;
376     } else {
377     tol *= MIN(MAX(rate*rate, 1e-2), 0.5);
378     }
379     /* use BiCGSTab with jacobi preconditioner ( m - omega * L ) */
380     Paso_zeroes(n,du);
381     errorCode = Paso_Solver_BiCGStab(fctp->iteration_matrix, z, du, &cntIter, &tol, pp);
382    
383     /* errorCode = Paso_Solver_GMRES(fctp->iteration_matrix, z, du, &cntIter, &tol, 10, 2000, pp); */
384     if (options->verbose) printf("Paso_FCT_Solver_updateNL: BiCGStab is completed after %d steps (residual =%e).\n",cntIter, tol);
385     options->num_iter+=cntIter;
386     if ( errorCode != SOLVER_NO_ERROR) break;
387     } else {
388     /* just use the main diagonal of (m/omega - L ) */
389    
390     Paso_Preconditioner_Smoother_solve(fctp->iteration_matrix, ((Paso_Preconditioner*) (fctp->iteration_matrix->solver_p))->jacobi,
391     du, z, 1, FALSE);
392 gross 3825
393 gross 3793 options->num_iter++;
394     }
395 gross 3823
396 gross 3793
397     Paso_Update(n,1.,u,fct_solver->omega,du);
398     norm_du_old=norm_du;
399     norm_du=Paso_lsup(n,du, fctp->mpi_info);
400     if (m ==0) {
401     if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e\n",m+1, norm_du * fct_solver->omega);
402     } else {
403     if (norm_du_old > 0.) {
404     rate=norm_du/norm_du_old;
405     } else if (norm_du <= 0.) {
406     rate=0.;
407     } else {
408     rate=LARGE_POSITIVE_FLOAT;
409     }
410     if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e (rate = %e)\n",m+1, norm_du * fct_solver->omega, rate);
411     num_critical_rates+=( rate<critical_rate ? 0 : 1);
412     max_m_reached=(m>max_m);
413     diverged = (num_critical_rates >= num_critical_rates_max);
414     converged=(norm_du * fct_solver->omega <= ATOL) ;
415     }
416     m++;
417     } /* end of while loop */
418     if (errorCode == SOLVER_NO_ERROR) {
419     if (converged) {
420     if (options->verbose) printf("Paso_FCT_Solver_updateNL: iteration is completed.\n");
421     errorCode=SOLVER_NO_ERROR;
422     } else if (diverged) {
423     if (options->verbose) printf("Paso_FCT_Solver_updateNL: divergence.\n");
424     errorCode=SOLVER_DIVERGENCE;
425     } else if (max_m_reached) {
426     if (options->verbose) printf("Paso_FCT_Solver_updateNL: maximum number of iteration steps reached.\n");
427     errorCode=SOLVER_MAXITER_REACHED;
428     }
429    
430     }
431     return errorCode;
432     }
433    
434    
435     /*
436     * AntiDiffusionFlux:
437     *
438     * f_{ij} = (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])
439     *
440     * m=fc->mass matrix
441 jfenwick 4345 * d=artificial diffusion matrix = L - K = - fc->iteration matrix - fc->transport matrix (away from main diagonal)
442 gross 3793 *
443     * for CN : theta =0.5
444     * for BE : theta = 1.
445     */
446    
447     void Paso_FCT_setAntiDiffusionFlux_CN(Paso_SystemMatrix *flux_matrix,
448     const Paso_TransportProblem* fct,
449     const double dt,
450 caltinay 4801 const paso::Coupler* u_coupler,
451     const paso::Coupler* u_old_coupler)
452 gross 3793 {
453     dim_t i;
454     index_t iptr_ij;
455    
456 caltinay 4801 const double *u = paso::Coupler_borrowLocalData(u_coupler);
457     const double *u_old= paso::Coupler_borrowLocalData(u_old_coupler);
458     const double *remote_u=paso::Coupler_borrowRemoteData(u_coupler);
459     const double *remote_u_old=paso::Coupler_borrowRemoteData(u_old_coupler);
460 gross 3793 const double dt_half= dt/2;
461 caltinay 4800 const paso::SystemMatrixPattern *pattern=fct->iteration_matrix->pattern;
462 gross 3793 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fct->iteration_matrix);
463    
464     #pragma omp parallel for schedule(static) private(i, iptr_ij)
465     for (i = 0; i < n; ++i) {
466 caltinay 3795 const double u_i = u[i];
467     const double u_old_i = u_old[i];
468 gross 3793
469     #pragma ivdep
470     for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
471 caltinay 3795 const index_t j = pattern->mainPattern->index[iptr_ij];
472     const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
473     const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
474     const double u_old_j = u_old[j];
475     const double u_j = u[j];
476 gross 3793
477     /* (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i]) */
478     flux_matrix->mainBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i) - (m_ij-dt_half*d_ij)*(u_j-u_i);
479    
480     }
481     #pragma ivdep
482     for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
483 caltinay 3795 const index_t j = pattern->col_couplePattern->index[iptr_ij];
484     const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
485     const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
486     const double u_old_j = remote_u_old[j];
487     const double u_j = remote_u[j];
488 gross 3793 flux_matrix->col_coupleBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i)- (m_ij-dt_half*d_ij)*(u_j-u_i);
489     }
490     }
491     }
492    
493     void Paso_FCT_setAntiDiffusionFlux_BE(Paso_SystemMatrix *flux_matrix,
494     const Paso_TransportProblem* fct,
495     const double dt,
496 caltinay 4801 const paso::Coupler* u_coupler,
497     const paso::Coupler* u_old_coupler)
498 gross 3793 {
499     dim_t i;
500     index_t iptr_ij;
501    
502 caltinay 4801 const double *u=paso::Coupler_borrowLocalData(u_coupler);
503     const double *u_old= paso::Coupler_borrowLocalData(u_old_coupler);
504     const double *remote_u=paso::Coupler_borrowRemoteData(u_coupler);
505     const double *remote_u_old=paso::Coupler_borrowRemoteData(u_old_coupler);
506 caltinay 4800 const paso::SystemMatrixPattern *pattern=fct->iteration_matrix->pattern;
507 gross 3793 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fct->iteration_matrix);
508    
509     #pragma omp parallel for schedule(static) private(i, iptr_ij)
510     for (i = 0; i < n; ++i) {
511 caltinay 3795 const double u_i = u[i];
512     const double u_old_i = u_old[i];
513 gross 3793 #pragma ivdep
514     for (iptr_ij=pattern->mainPattern->ptr[i]; iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
515    
516 caltinay 3795 const index_t j = pattern->mainPattern->index[iptr_ij];
517     const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
518     const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
519     const double u_old_j = u_old[j];
520     const double u_j = u[j];
521 gross 3793
522     flux_matrix->mainBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
523     }
524     #pragma ivdep
525     for (iptr_ij=pattern->col_couplePattern->ptr[i]; iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
526 caltinay 3795 const index_t j = pattern->col_couplePattern->index[iptr_ij];
527     const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
528     const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
529     const double u_old_j = remote_u_old[j];
530     const double u_j = remote_u[j];
531 gross 3793
532     flux_matrix->col_coupleBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
533     }
534     }
535     }
536    
537     /* special version of the ant-diffusive fluxes for the linear Crank-Nicolson scheme
538     * in fact this is evaluated for u = 2*u_tilde - u_old which is the predictor
539 jfenwick 4345 * of the solution of the the stabilised problem at time dt using the forward Euler scheme
540 gross 3793 *
541     * f_{ij} = (m_{ij} - dt/2 d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) (u[j]-u[i])
542     * = (m_{ij} - dt/2 d_{ij}) * (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) * ( 2*(u_tilde[j]-u_tilde[i]) - (u_old[j] -u_old [i]) )
543     * = 2* m_{ij} * ( (u_old[j]-u_tilde[j] - (u_old[i]) - u_tilde[i]) ) - dt d_{ij} * (u_tilde[j]-u_tilde[i])
544     *
545     */
546    
547     void Paso_FCT_setAntiDiffusionFlux_linearCN(Paso_SystemMatrix *flux_matrix,
548     const Paso_TransportProblem* fct,
549     const double dt,
550 caltinay 4801 const paso::Coupler* u_tilde_coupler,
551     const paso::Coupler* u_old_coupler)
552 gross 3793 {
553     dim_t i;
554     index_t iptr_ij;
555    
556 caltinay 4801 const double *u_tilde=paso::Coupler_borrowLocalData(u_tilde_coupler);
557     const double *u_old= paso::Coupler_borrowLocalData(u_old_coupler);
558     const double *remote_u_tilde=paso::Coupler_borrowRemoteData(u_tilde_coupler);
559     const double *remote_u_old=paso::Coupler_borrowRemoteData(u_old_coupler);
560 caltinay 4800 const paso::SystemMatrixPattern *pattern=fct->iteration_matrix->pattern;
561 gross 3793 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fct->iteration_matrix);
562    
563     #pragma omp parallel for schedule(static) private(i, iptr_ij)
564     for (i = 0; i < n; ++i) {
565 caltinay 3795 const double u_tilde_i = u_tilde[i];
566     const double u_old_i = u_old[i];
567 gross 3807 const double du_i = u_tilde_i - u_old_i;
568 gross 3793 #pragma ivdep
569     for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
570    
571 caltinay 3795 const index_t j = pattern->mainPattern->index[iptr_ij];
572     const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
573     const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
574     const double u_tilde_j = u_tilde[j];
575     const double u_old_j = u_old[j];
576 gross 3807 const double du_j = u_tilde_j - u_old_j;
577 gross 3793
578 gross 3807 flux_matrix->mainBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij * ( u_tilde_i - u_tilde_j);
579 gross 3793 }
580     #pragma ivdep
581     for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
582    
583 caltinay 3795 const index_t j = pattern->col_couplePattern->index[iptr_ij];
584     const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
585     const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];/* this is in fact -d_ij */
586     const double u_tilde_j = remote_u_tilde[j];
587     const double u_old_j = remote_u_old[j];
588 gross 3807 const double du_j = u_tilde_j - u_old_j;
589 gross 3793
590 gross 3807 flux_matrix->col_coupleBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij * ( u_tilde_i - u_tilde_j);
591 gross 3793
592     }
593     }
594    
595     }
596    
597 jfenwick 3981 /************************************************************************************/
598 gross 3793
599     /* Creates the low order transport matrix and stores its negative values
600     * into the iteration_matrix except for the main diagonal which is stored
601     * separately.
602     * If fc->iteration_matrix==NULL, fc->iteration_matrix is allocated
603     *
604     * a=transport_matrix
605     * b= low_order_transport_matrix = - iteration_matrix
606     * c=main diagonal low_order_transport_matrix
607 jfenwick 4345 * initialise c[i] mit a[i,i]
608 gross 3793 *
609     * d_ij=max(0,-a[i,j],-a[j,i])
610     * b[i,j]=-(a[i,j]+d_ij)
611     * c[i]-=d_ij
612     */
613    
614     void Paso_FCT_setLowOrderOperator(Paso_TransportProblem * fc) {
615    
616     dim_t i;
617     index_t iptr_ij, iptr_ji;
618     const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fc);
619    
620     if (fc->iteration_matrix==NULL) {
621     fc->iteration_matrix=Paso_SystemMatrix_alloc(fc->transport_matrix->type,
622     fc->transport_matrix->pattern,
623     fc->transport_matrix->row_block_size,
624     fc->transport_matrix->col_block_size, TRUE);
625     }
626    
627     if (Esys_noError()) {
628 caltinay 4800 const paso::SystemMatrixPattern *pattern=fc->iteration_matrix->pattern;
629 gross 3793 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fc->iteration_matrix);
630     #pragma omp parallel for private(i, iptr_ij, iptr_ji) schedule(static)
631     for (i = 0; i < n; ++i) {
632     double sum=fc->transport_matrix->mainBlock->val[main_iptr[i]];
633    
634     /* printf("sum[%d] = %e -> ", i, sum); */
635     /* look at a[i,j] */
636     for (iptr_ij=pattern->mainPattern->ptr[i];iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
637 caltinay 3795 const index_t j = pattern->mainPattern->index[iptr_ij];
638     const double rtmp1 = fc->transport_matrix->mainBlock->val[iptr_ij];
639 gross 3793 if (j!=i) {
640     /* find entry a[j,i] */
641     #pragma ivdep
642     for (iptr_ji=pattern->mainPattern->ptr[j]; iptr_ji<pattern->mainPattern->ptr[j+1]; ++iptr_ji) {
643    
644     if ( pattern->mainPattern->index[iptr_ji] == i) {
645 caltinay 3795 const double rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji];
646 gross 3793 /*
647     printf("a[%d,%d]=%e\n",i,j,rtmp1);
648     printf("a[%d,%d]=%e\n",j,i,rtmp2);
649     */
650    
651 caltinay 3795 const double d_ij=-MIN3(0.,rtmp1,rtmp2);
652 gross 3793 fc->iteration_matrix->mainBlock->val[iptr_ij]=-(rtmp1+d_ij);
653     /* printf("l[%d,%d]=%e\n",i,j,fc->iteration_matrix->mainBlock->val[iptr_ij]); */
654     sum-=d_ij;
655     break;
656     }
657     }
658     }
659     }
660     for (iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
661 caltinay 3795 const index_t j = pattern->col_couplePattern->index[iptr_ij];
662     const double rtmp1 = fc->transport_matrix->col_coupleBlock->val[iptr_ij];
663 gross 3793 /* find entry a[j,i] */
664     #pragma ivdep
665     for (iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_ji<pattern->row_couplePattern->ptr[j+1]; ++iptr_ji) {
666     if (pattern->row_couplePattern->index[iptr_ji]==i) {
667 caltinay 3795 const double rtmp2=fc->transport_matrix->row_coupleBlock->val[iptr_ji];
668     const double d_ij=-MIN3(0.,rtmp1,rtmp2);
669 gross 3793 fc->iteration_matrix->col_coupleBlock->val[iptr_ij]=-(rtmp1+d_ij);
670     fc->iteration_matrix->row_coupleBlock->val[iptr_ji]=-(rtmp2+d_ij);
671     sum-=d_ij;
672     break;
673     }
674     }
675     }
676     /* set main diagonal entry */
677     fc->main_diagonal_low_order_transport_matrix[i]=sum;
678     /* printf("%e \n", sum); */
679     }
680    
681     }
682     }
683    
684     /*
685 gross 3822 * out_i=m_i u_i + a * \sum_{j <> i} l_{ij} (u_j-u_i) where m_i>0
686     * = u_i where m_i<=0
687 gross 3793 *
688     */
689     void Paso_FCT_Solver_setMuPaLu(double* out,
690     const double* M,
691 caltinay 4801 const paso::Coupler* u_coupler,
692 gross 3793 const double a,
693     const Paso_SystemMatrix *L)
694     {
695     dim_t i;
696 caltinay 4800 const paso::SystemMatrixPattern *pattern = L->pattern;
697 caltinay 4801 const double *u=paso::Coupler_borrowLocalData(u_coupler);
698     const double *remote_u=paso::Coupler_borrowRemoteData(u_coupler);
699 caltinay 3795 index_t iptr_ij;
700 gross 3793 const dim_t n=Paso_SystemMatrix_getTotalNumRows(L);
701    
702     #pragma omp parallel for private(i) schedule(static)
703     for (i = 0; i < n; ++i) {
704 gross 3822 if ( M[i] > 0.) {
705     out[i]=M[i]*u[i];
706     } else {
707     out[i]=u[i];
708     }
709 gross 3793 }
710     if (ABS(a)>0) {
711     #pragma omp parallel for schedule(static) private(i, iptr_ij)
712     for (i = 0; i < n; ++i) {
713 gross 3823 if ( M[i] > 0.) {
714 caltinay 3795 double sum=0;
715     const double u_i=u[i];
716 gross 3793 #pragma ivdep
717     for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
718     const index_t j=pattern->mainPattern->index[iptr_ij];
719 caltinay 3795 const double l_ij=L->mainBlock->val[iptr_ij];
720 gross 3793 sum+=l_ij*(u[j]-u_i);
721    
722     }
723     #pragma ivdep
724     for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
725     const index_t j=pattern->col_couplePattern->index[iptr_ij];
726 caltinay 3795 const double l_ij=L->col_coupleBlock->val[iptr_ij];
727 gross 3793 sum+=l_ij*(remote_u[j]-u_i);
728     }
729     out[i]+=a*sum;
730 gross 3823 }
731 gross 3793 }
732     }
733     }
734    
735 jfenwick 3981 /* ************************************************************************************************************************************************* */
736 gross 3793
737    
738    
739    
740    
741    
742    

Properties

Name Value
svn:mergeinfo /branches/amg_from_3530/paso/src/FCT_Solver.cpp:3531-3826 /branches/lapack2681/paso/src/FCT_Solver.cpp:2682-2741 /branches/pasowrap/paso/src/FCT_Solver.cpp:3661-3674 /branches/py3_attempt2/paso/src/FCT_Solver.cpp:3871-3891 /branches/restext/paso/src/FCT_Solver.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/FCT_Solver.cpp:3669-3791 /branches/stage3.0/paso/src/FCT_Solver.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/FCT_Solver.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/FCT_Solver.cpp:3517-3974 /release/3.0/paso/src/FCT_Solver.cpp:2591-2601 /trunk/paso/src/FCT_Solver.cpp:4257-4344 /trunk/ripley/test/python/paso/src/FCT_Solver.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26