/[escript]/trunk/paso/src/FCT_Solver.c
ViewVC logotype

Annotation of /trunk/paso/src/FCT_Solver.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4154 - (hide annotations)
Tue Jan 22 09:30:23 2013 UTC (6 years, 8 months ago) by jfenwick
File MIME type: text/plain
File size: 30849 byte(s)
Round 1 of copyright fixes
1 gross 3793
2 jfenwick 3981 /*****************************************************************************
3 gross 3793 *
4 jfenwick 4154 * Copyright (c) 2003-2013 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 gross 3793 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12     * Development since 2012 by School of Earth Sciences
13     *
14     *****************************************************************************/
15 gross 3793
16    
17 jfenwick 3981 /************************************************************************************/
18 gross 3793
19     /* Paso: Transport solver with flux correction (L is row sum zero)
20     *
21     * - Mv_t=Lv v(0)=u
22     *
23     * to return v(dt)
24     *
25     */
26 jfenwick 3981 /************************************************************************************/
27 gross 3793
28     /* Author: l.gross@uq.edu.au */
29    
30 jfenwick 3981 /************************************************************************************/
31 gross 3793
32     #include "FCT_Solver.h"
33     #include "Preconditioner.h"
34     #include "PasoUtil.h"
35    
36    
37     Paso_FCT_Solver* Paso_FCT_Solver_alloc(Paso_TransportProblem *fctp, Paso_Options* options)
38     {
39     Paso_FCT_Solver* out=NULL;
40     const dim_t blockSize=Paso_TransportProblem_getBlockSize(fctp);
41     const dim_t n = Paso_TransportProblem_getTotalNumRows(fctp);
42    
43     out=MEMALLOC(1,Paso_FCT_Solver);
44     if (! Esys_checkPtr(out)) {
45     out->transportproblem = Paso_TransportProblem_getReference(fctp);
46     out->mpi_info = Esys_MPIInfo_getReference(fctp->mpi_info);
47     out->flux_limiter = Paso_FCT_FluxLimiter_alloc(fctp);
48     out->b = MEMALLOC(n, double);
49     if ( (options->ode_solver == PASO_CRANK_NICOLSON) || (options->ode_solver == PASO_BACKWARD_EULER) ) {
50     out->du = MEMALLOC(n, double);
51     out->z = MEMALLOC(n, double);
52     } else {
53     out->du = NULL;
54     out->z=NULL;
55     }
56     out->u_coupler = Paso_Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp), blockSize);
57     out->u_old_coupler = Paso_Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp), blockSize);
58     out->omega=0;
59    
60     if ( options->ode_solver == PASO_LINEAR_CRANK_NICOLSON ) {
61     out->method = PASO_LINEAR_CRANK_NICOLSON;
62     } else if ( options->ode_solver == PASO_CRANK_NICOLSON ) {
63     out->method = PASO_CRANK_NICOLSON;
64     } else if ( options->ode_solver == PASO_BACKWARD_EULER ) {
65     out->method = PASO_BACKWARD_EULER;
66     } else {
67     Esys_setError(VALUE_ERROR, "Paso_FCT_Solver_alloc: unknown integration scheme.");
68     out->method = UNKNOWN;
69     }
70    
71     }
72    
73     if (Esys_noError()) {
74     return out;
75     } else {
76     Paso_FCT_Solver_free(out);
77     return NULL;
78     }
79    
80     }
81    
82     void Paso_FCT_Solver_free(Paso_FCT_Solver *in)
83     {
84     if (in != NULL) {
85     Paso_TransportProblem_free(in->transportproblem);
86     Paso_FCT_FluxLimiter_free(in->flux_limiter);
87     Esys_MPIInfo_free(in->mpi_info);
88     Paso_Coupler_free(in->u_old_coupler);
89     Paso_Coupler_free(in->u_coupler);
90    
91     MEMFREE(in->b);
92     MEMFREE(in->z);
93     MEMFREE(in->du);
94     MEMFREE(in);
95    
96     }
97     }
98    
99     double Paso_FCT_Solver_getSafeTimeStepSize(Paso_TransportProblem* fctp)
100     {
101     dim_t i, n;
102 gross 3823 double dt_max_loc=LARGE_POSITIVE_FLOAT;
103 gross 3793 double dt_max=LARGE_POSITIVE_FLOAT;
104     n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
105     /* set low order transport operator */
106     Paso_FCT_setLowOrderOperator(fctp);
107    
108     if (Esys_noError()) {
109     /*
110     * calculate time step size:
111     */
112     dt_max=LARGE_POSITIVE_FLOAT;
113 gross 3823 #pragma omp parallel private(i, dt_max_loc)
114 gross 3793 {
115 gross 3824 dt_max_loc=LARGE_POSITIVE_FLOAT;
116 gross 3793 #pragma omp for schedule(static)
117     for (i=0;i<n;++i) {
118 caltinay 3795 const double l_ii=fctp->main_diagonal_low_order_transport_matrix[i];
119     const double m_i=fctp->lumped_mass_matrix[i];
120 gross 3822 if ( m_i > 0 ) {
121 gross 3793 if (l_ii<0) dt_max_loc=MIN(dt_max_loc,m_i/(-l_ii));
122 gross 3822 }
123     }
124 gross 3793 #pragma omp critical
125     {
126     dt_max=MIN(dt_max,dt_max_loc);
127     }
128     }
129     #ifdef ESYS_MPI
130     {
131 gross 3822 dt_max_loc=dt_max;
132     MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
133 gross 3793 }
134     #endif
135 gross 3822 if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=2.;
136 gross 3793 }
137     return dt_max;
138     }
139    
140     /* modifies the main diagonal of the iteration matrix to introduce new dt */
141     void Paso_FCT_Solver_initialize(const double dt, Paso_FCT_Solver *fct_solver, Paso_Options* options, Paso_Performance* pp)
142     {
143     Paso_TransportProblem* fctp = fct_solver->transportproblem;
144     const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fctp);
145     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
146     const double theta = Paso_FCT_Solver_getTheta(fct_solver);
147     const double omega=1./(dt* theta);
148     dim_t i;
149     Paso_Options options2;
150    
151    
152    
153     Paso_solve_free(fctp->iteration_matrix);
154     /*
155     * fctp->iteration_matrix[i,i]=m[i]/(dt theta) -l[i,i]
156     *
157     */
158     fct_solver->omega=omega;
159     fct_solver->dt = dt;
160     #pragma omp parallel for private(i)
161     for (i = 0; i < n; ++i) {
162 gross 3822 const double m_i=fctp->lumped_mass_matrix[i];
163 caltinay 3795 const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
164 gross 3822 if ( m_i > 0 ) {
165     fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m_i * omega - l_ii;
166     } else {
167     fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = ABS(m_i * omega - l_ii)/(EPSILON*EPSILON);
168     }
169 gross 3793 }
170    
171     /* allocate preconditioner/solver */
172     Paso_Options_setDefaults(&options2);
173     options2.verbose = options->verbose;
174     if (fct_solver->method == PASO_LINEAR_CRANK_NICOLSON ) {
175     options2.preconditioner = PASO_GS;
176     } else {
177     options2.preconditioner = PASO_JACOBI;
178     /* options2.preconditioner = PASO_GS; */
179     }
180     options2.use_local_preconditioner = FALSE;
181 gross 3807 options2.sweeps=-1;
182 gross 3793
183     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
184     Paso_SystemMatrix_setPreconditioner(fctp->iteration_matrix, &options2);
185     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
186     }
187    
188     /* entry point for update proceedures */
189     err_t Paso_FCT_Solver_update(Paso_FCT_Solver *fct_solver, double* u, double *u_old, Paso_Options* options, Paso_Performance *pp)
190     {
191     const index_t method=fct_solver->method;
192     err_t err_out = SOLVER_NO_ERROR;
193    
194    
195     if (method == PASO_LINEAR_CRANK_NICOLSON) {
196     err_out=Paso_FCT_Solver_update_LCN(fct_solver, u, u_old, options, pp);
197    
198     } else if (method == PASO_CRANK_NICOLSON) {
199     err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);
200    
201     } else if (method == PASO_BACKWARD_EULER) {
202     err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);
203     } else {
204     err_out = SOLVER_INPUT_ERROR;
205     }
206     return err_out;
207    
208     }
209    
210     /* linear crank-nicolson update */
211     err_t Paso_FCT_Solver_update_LCN(Paso_FCT_Solver *fct_solver, double * u, double *u_old, Paso_Options* options, Paso_Performance *pp)
212     {
213     double const dt = fct_solver->dt;
214 gross 3823 dim_t sweep_max, i;
215 gross 3793 double *b = fct_solver->b;
216     double const RTOL = options->tolerance;
217     const dim_t n=Paso_TransportProblem_getTotalNumRows(fct_solver->transportproblem);
218     Paso_SystemMatrix * iteration_matrix = fct_solver->transportproblem->iteration_matrix;
219 gross 3823 const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fct_solver->transportproblem);
220 gross 3793 err_t errorCode = SOLVER_NO_ERROR;
221 gross 3823 double norm_u_tilde;
222 gross 3793
223     Paso_Coupler_startCollect(fct_solver->u_old_coupler,u_old);
224     Paso_Coupler_finishCollect(fct_solver->u_old_coupler);
225    
226 gross 3822 /* b[i]=m*u_tilde[i] = m u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i])
227     = u_tilde[i] = u_old[i] where constraint m<0.
228 gross 3793 * note that iteration_matrix stores the negative values of the
229     * low order transport matrix l. Therefore a=-dt*0.5 is used. */
230    
231     Paso_FCT_Solver_setMuPaLu(b, fct_solver->transportproblem->lumped_mass_matrix,
232 gross 3807 fct_solver->u_old_coupler, -dt*0.5, iteration_matrix);
233 gross 3793 /* solve for u_tilde : u_tilda = m^{-1} * b */
234     Paso_FCT_FluxLimiter_setU_tilda(fct_solver->flux_limiter, b);
235     /* u_tilda_connector is completed */
236 gross 3807
237 gross 3793 /* calculate anti-diffusive fluxes for u_tilda */
238     Paso_FCT_setAntiDiffusionFlux_linearCN(fct_solver->flux_limiter->antidiffusive_fluxes,
239     fct_solver->transportproblem, dt,
240     fct_solver->flux_limiter->u_tilde_coupler,
241     fct_solver->u_old_coupler);
242 gross 3807
243    
244 gross 3793 /* b_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
245     Paso_FCT_FluxLimiter_addLimitedFluxes_Start(fct_solver->flux_limiter);
246     Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(fct_solver->flux_limiter, b);
247    
248 gross 3823 Paso_Scale(n, b,fct_solver->omega );
249 gross 3807 /* solve (m-dt/2*L) u = b in the form (omega*m-L) u = b * omega with omega*dt/2=1 */
250 gross 3823 #pragma omp for private(i) schedule(static)
251     for (i = 0; i < n; ++i) {
252     if (! (fct_solver->transportproblem->lumped_mass_matrix[i] > 0 ) ) {
253     b[i] = fct_solver->flux_limiter->u_tilde[i]
254     * fct_solver->transportproblem->iteration_matrix->mainBlock->val[main_iptr[i]];
255     }
256     }
257 gross 3793 /* initial guess is u<- -u + 2*u_tilde */
258     Paso_Update(n, -1., u, 2., fct_solver->flux_limiter->u_tilde);
259 gross 3823
260 gross 3793 sweep_max = MAX((int) (- 2 * log(RTOL)/log(2.)-0.5),1);
261 gross 3823 norm_u_tilde=Paso_lsup(n, fct_solver->flux_limiter->u_tilde, fct_solver->flux_limiter->mpi_info);
262 gross 3807 if (options->verbose) {
263 gross 3823 printf("Paso_FCT_Solver_update_LCN: u_tilda lsup = %e (rtol = %e, max. sweeps = %d)\n",norm_u_tilde,RTOL * norm_u_tilde ,sweep_max);
264 gross 3807 }
265 gross 3793 errorCode = Paso_Preconditioner_Smoother_solve_byTolerance( iteration_matrix, ((Paso_Preconditioner*) (iteration_matrix->solver_p))->gs,
266     u, b, RTOL, &sweep_max, TRUE);
267     if (errorCode == PRECONDITIONER_NO_ERROR) {
268     if (options->verbose) printf("Paso_FCT_Solver_update_LCN: convergence after %d Gauss-Seidel steps.\n",sweep_max);
269     errorCode=SOLVER_NO_ERROR;
270     } else {
271     if (options->verbose) printf("Paso_FCT_Solver_update_LCN: Gauss-Seidel failed within %d stesp (rel. tolerance %e).\n",sweep_max,RTOL);
272     errorCode= SOLVER_MAXITER_REACHED;
273     }
274     return errorCode;
275    
276     }
277    
278     err_t Paso_FCT_Solver_updateNL(Paso_FCT_Solver *fct_solver, double* u, double *u_old, Paso_Options* options, Paso_Performance *pp)
279     {
280 gross 3794 const dim_t num_critical_rates_max=3; /* number of rates >=critical_rate accepted before divergence is triggered */
281     const double critical_rate=0.95; /* expected value of convergence rate */
282    
283 gross 3793 double *b = fct_solver->b;
284     double *z = fct_solver->z;
285     double *du = fct_solver->du;
286     double const dt = fct_solver->dt;
287     Paso_TransportProblem* fctp = fct_solver->transportproblem;
288     Paso_FCT_FluxLimiter* flux_limiter = fct_solver->flux_limiter;
289     dim_t i;
290 gross 3794 double norm_u_tilde, ATOL, norm_du=LARGE_POSITIVE_FLOAT, norm_du_old, rate=1.;
291 gross 3793 err_t errorCode=SOLVER_NO_ERROR;
292     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
293     const double atol=options->absolute_tolerance;
294     const double rtol=options->tolerance;
295     const dim_t max_m=options->iter_max;
296     dim_t m=0, num_critical_rates=0 ;
297     /* ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// */
298    
299     bool_t converged=FALSE, max_m_reached=FALSE,diverged=FALSE;
300     options->num_iter=0;
301    
302     Paso_Coupler_startCollect(fct_solver->u_old_coupler,u_old);
303     Paso_Coupler_finishCollect(fct_solver->u_old_coupler);
304     /* prepare u_tilda and flux limiter */
305     if ( fct_solver->method == PASO_BACKWARD_EULER ) {
306     /* b[i]=m_i* u_old[i] */
307     #pragma omp for private(i) schedule(static)
308     for (i = 0; i < n; ++i) {
309 gross 3822 if (fctp->lumped_mass_matrix[i] > 0 ) {
310     b[i]=u_old[i]* fctp->lumped_mass_matrix[i];
311     } else {
312     b[i]=u_old[i];
313     }
314 gross 3793 }
315     } else {
316 gross 3822 /* b[i]=m_i* u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i]) = m_i * u_tilde_i where m_i>0
317     * = u_old[i] otherwise
318 gross 3793 * note that iteration_matrix stores the negative values of the
319     * low order transport matrix l. Therefore a=-dt*0.5 is used. */
320     Paso_FCT_Solver_setMuPaLu(b,fctp->lumped_mass_matrix,fct_solver->u_old_coupler,-dt*0.5,fctp->iteration_matrix);
321     }
322     Paso_FCT_FluxLimiter_setU_tilda(flux_limiter, b); /* u_tilda = m^{-1} b */
323     /* u_tilda_connector is completed */
324 jfenwick 3981 /********************************************************************************************************************************************/
325 gross 3793 /* calculate stopping criterium */
326     norm_u_tilde=Paso_lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
327     ATOL= rtol * norm_u_tilde + atol ;
328     if (options->verbose) printf("Paso_FCT_Solver_updateNL: iteration starts u_tilda lsup = %e (abs. tol = %e)\n",norm_u_tilde,ATOL);
329     /* ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// */
330    
331     /* u_old is an initial guess for u*/
332     Paso_Copy(n,u,u_old);
333    
334     while ( (!converged) && (!diverged) && (! max_m_reached) && Esys_noError()) {
335     Paso_Coupler_startCollect(fct_solver->u_coupler,u);
336     Paso_Coupler_finishCollect(fct_solver->u_coupler);
337    
338     /* set antidiffusive_flux_{ij} for u */
339     if (fct_solver->method == PASO_BACKWARD_EULER) {
340     Paso_FCT_setAntiDiffusionFlux_BE(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);
341     } else {
342     Paso_FCT_setAntiDiffusionFlux_CN(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);
343     }
344     /* start the calculation of the limitation factors_{fct_solver->ij} */
345     Paso_FCT_FluxLimiter_addLimitedFluxes_Start(flux_limiter); /* uses u_tilde */
346    
347     /*
348 gross 3822 * z_m[i]=b[i] - (m_i*u[i] - omega*sum_{j<>i} l_{ij} (u[j]-u[i]) ) where m_i>0
349     * ==b[i] - u[i] = u_tilda[i]-u[i] =0 otherwise
350     *
351     * omega = dt/2 or dt .
352 gross 3793 *
353     * note that iteration_matrix stores the negative values of the
354     * low order transport matrix l. Therefore a=dt*theta is used.
355     */
356     if (fct_solver-> method == PASO_BACKWARD_EULER) {
357     Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt, fctp->iteration_matrix);
358     } else {
359     Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt/2, fctp->iteration_matrix);
360     }
361    
362    
363     Paso_Update(n,-1.,z,1.,b); /* z=b-z */
364    
365     /* z_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
366     Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(flux_limiter, z);
367    
368     /* we solve (m/omega - L ) * du = z */
369     if (fct_solver->method == PASO_BACKWARD_EULER) {
370     dim_t cntIter = options->iter_max;
371     double tol= Paso_l2(n, z, fctp->mpi_info) ;
372    
373     if ( m ==0) {
374     tol *=0.5;
375     } else {
376     tol *= MIN(MAX(rate*rate, 1e-2), 0.5);
377     }
378     /* use BiCGSTab with jacobi preconditioner ( m - omega * L ) */
379     Paso_zeroes(n,du);
380     errorCode = Paso_Solver_BiCGStab(fctp->iteration_matrix, z, du, &cntIter, &tol, pp);
381    
382     /* errorCode = Paso_Solver_GMRES(fctp->iteration_matrix, z, du, &cntIter, &tol, 10, 2000, pp); */
383     if (options->verbose) printf("Paso_FCT_Solver_updateNL: BiCGStab is completed after %d steps (residual =%e).\n",cntIter, tol);
384     options->num_iter+=cntIter;
385     if ( errorCode != SOLVER_NO_ERROR) break;
386     } else {
387     /* just use the main diagonal of (m/omega - L ) */
388    
389     Paso_Preconditioner_Smoother_solve(fctp->iteration_matrix, ((Paso_Preconditioner*) (fctp->iteration_matrix->solver_p))->jacobi,
390     du, z, 1, FALSE);
391 gross 3825
392 gross 3793 options->num_iter++;
393     }
394 gross 3823
395 gross 3793
396     Paso_Update(n,1.,u,fct_solver->omega,du);
397     norm_du_old=norm_du;
398     norm_du=Paso_lsup(n,du, fctp->mpi_info);
399     if (m ==0) {
400     if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e\n",m+1, norm_du * fct_solver->omega);
401     } else {
402     if (norm_du_old > 0.) {
403     rate=norm_du/norm_du_old;
404     } else if (norm_du <= 0.) {
405     rate=0.;
406     } else {
407     rate=LARGE_POSITIVE_FLOAT;
408     }
409     if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e (rate = %e)\n",m+1, norm_du * fct_solver->omega, rate);
410     num_critical_rates+=( rate<critical_rate ? 0 : 1);
411     max_m_reached=(m>max_m);
412     diverged = (num_critical_rates >= num_critical_rates_max);
413     converged=(norm_du * fct_solver->omega <= ATOL) ;
414     }
415     m++;
416     } /* end of while loop */
417     if (errorCode == SOLVER_NO_ERROR) {
418     if (converged) {
419     if (options->verbose) printf("Paso_FCT_Solver_updateNL: iteration is completed.\n");
420     errorCode=SOLVER_NO_ERROR;
421     } else if (diverged) {
422     if (options->verbose) printf("Paso_FCT_Solver_updateNL: divergence.\n");
423     errorCode=SOLVER_DIVERGENCE;
424     } else if (max_m_reached) {
425     if (options->verbose) printf("Paso_FCT_Solver_updateNL: maximum number of iteration steps reached.\n");
426     errorCode=SOLVER_MAXITER_REACHED;
427     }
428    
429     }
430     return errorCode;
431     }
432    
433    
434     /*
435     * AntiDiffusionFlux:
436     *
437     * f_{ij} = (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])
438     *
439     * m=fc->mass matrix
440     * d=artifical diffusion matrix = L - K = - fc->iteration matrix - fc->transport matrix (away from main diagonal)
441     *
442     * for CN : theta =0.5
443     * for BE : theta = 1.
444     */
445    
446     void Paso_FCT_setAntiDiffusionFlux_CN(Paso_SystemMatrix *flux_matrix,
447     const Paso_TransportProblem* fct,
448     const double dt,
449     const Paso_Coupler* u_coupler,
450     const Paso_Coupler* u_old_coupler)
451     {
452     dim_t i;
453     index_t iptr_ij;
454    
455     const double *u = Paso_Coupler_borrowLocalData(u_coupler);
456     const double *u_old= Paso_Coupler_borrowLocalData(u_old_coupler);
457     const double *remote_u=Paso_Coupler_borrowRemoteData(u_coupler);
458     const double *remote_u_old=Paso_Coupler_borrowRemoteData(u_old_coupler);
459     const double dt_half= dt/2;
460     const Paso_SystemMatrixPattern *pattern=fct->iteration_matrix->pattern;
461     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fct->iteration_matrix);
462    
463     #pragma omp parallel for schedule(static) private(i, iptr_ij)
464     for (i = 0; i < n; ++i) {
465 caltinay 3795 const double u_i = u[i];
466     const double u_old_i = u_old[i];
467 gross 3793
468     #pragma ivdep
469     for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
470 caltinay 3795 const index_t j = pattern->mainPattern->index[iptr_ij];
471     const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
472     const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
473     const double u_old_j = u_old[j];
474     const double u_j = u[j];
475 gross 3793
476     /* (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i]) */
477     flux_matrix->mainBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i) - (m_ij-dt_half*d_ij)*(u_j-u_i);
478    
479     }
480     #pragma ivdep
481     for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
482 caltinay 3795 const index_t j = pattern->col_couplePattern->index[iptr_ij];
483     const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
484     const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
485     const double u_old_j = remote_u_old[j];
486     const double u_j = remote_u[j];
487 gross 3793 flux_matrix->col_coupleBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i)- (m_ij-dt_half*d_ij)*(u_j-u_i);
488     }
489     }
490     }
491    
492     void Paso_FCT_setAntiDiffusionFlux_BE(Paso_SystemMatrix *flux_matrix,
493     const Paso_TransportProblem* fct,
494     const double dt,
495     const Paso_Coupler* u_coupler,
496     const Paso_Coupler* u_old_coupler)
497     {
498     dim_t i;
499     index_t iptr_ij;
500    
501     const double *u=Paso_Coupler_borrowLocalData(u_coupler);
502     const double *u_old= Paso_Coupler_borrowLocalData(u_old_coupler);
503     const double *remote_u=Paso_Coupler_borrowRemoteData(u_coupler);
504     const double *remote_u_old=Paso_Coupler_borrowRemoteData(u_old_coupler);
505     const Paso_SystemMatrixPattern *pattern=fct->iteration_matrix->pattern;
506     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fct->iteration_matrix);
507    
508     #pragma omp parallel for schedule(static) private(i, iptr_ij)
509     for (i = 0; i < n; ++i) {
510 caltinay 3795 const double u_i = u[i];
511     const double u_old_i = u_old[i];
512 gross 3793 #pragma ivdep
513     for (iptr_ij=pattern->mainPattern->ptr[i]; iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
514    
515 caltinay 3795 const index_t j = pattern->mainPattern->index[iptr_ij];
516     const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
517     const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
518     const double u_old_j = u_old[j];
519     const double u_j = u[j];
520 gross 3793
521     flux_matrix->mainBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
522     }
523     #pragma ivdep
524     for (iptr_ij=pattern->col_couplePattern->ptr[i]; iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
525 caltinay 3795 const index_t j = pattern->col_couplePattern->index[iptr_ij];
526     const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
527     const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
528     const double u_old_j = remote_u_old[j];
529     const double u_j = remote_u[j];
530 gross 3793
531     flux_matrix->col_coupleBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
532     }
533     }
534     }
535    
536     /* special version of the ant-diffusive fluxes for the linear Crank-Nicolson scheme
537     * in fact this is evaluated for u = 2*u_tilde - u_old which is the predictor
538     * of the solution of the the stabilized problem at time dt using the forward Euler scheme
539     *
540     * f_{ij} = (m_{ij} - dt/2 d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) (u[j]-u[i])
541     * = (m_{ij} - dt/2 d_{ij}) * (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) * ( 2*(u_tilde[j]-u_tilde[i]) - (u_old[j] -u_old [i]) )
542     * = 2* m_{ij} * ( (u_old[j]-u_tilde[j] - (u_old[i]) - u_tilde[i]) ) - dt d_{ij} * (u_tilde[j]-u_tilde[i])
543     *
544     */
545    
546     void Paso_FCT_setAntiDiffusionFlux_linearCN(Paso_SystemMatrix *flux_matrix,
547     const Paso_TransportProblem* fct,
548     const double dt,
549     const Paso_Coupler* u_tilde_coupler,
550     const Paso_Coupler* u_old_coupler)
551     {
552     dim_t i;
553     index_t iptr_ij;
554    
555     const double *u_tilde=Paso_Coupler_borrowLocalData(u_tilde_coupler);
556     const double *u_old= Paso_Coupler_borrowLocalData(u_old_coupler);
557     const double *remote_u_tilde=Paso_Coupler_borrowRemoteData(u_tilde_coupler);
558     const double *remote_u_old=Paso_Coupler_borrowRemoteData(u_old_coupler);
559     const Paso_SystemMatrixPattern *pattern=fct->iteration_matrix->pattern;
560     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fct->iteration_matrix);
561    
562     #pragma omp parallel for schedule(static) private(i, iptr_ij)
563     for (i = 0; i < n; ++i) {
564 caltinay 3795 const double u_tilde_i = u_tilde[i];
565     const double u_old_i = u_old[i];
566 gross 3807 const double du_i = u_tilde_i - u_old_i;
567 gross 3793 #pragma ivdep
568     for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
569    
570 caltinay 3795 const index_t j = pattern->mainPattern->index[iptr_ij];
571     const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
572     const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
573     const double u_tilde_j = u_tilde[j];
574     const double u_old_j = u_old[j];
575 gross 3807 const double du_j = u_tilde_j - u_old_j;
576 gross 3793
577 gross 3807 flux_matrix->mainBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij * ( u_tilde_i - u_tilde_j);
578 gross 3793 }
579     #pragma ivdep
580     for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
581    
582 caltinay 3795 const index_t j = pattern->col_couplePattern->index[iptr_ij];
583     const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
584     const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];/* this is in fact -d_ij */
585     const double u_tilde_j = remote_u_tilde[j];
586     const double u_old_j = remote_u_old[j];
587 gross 3807 const double du_j = u_tilde_j - u_old_j;
588 gross 3793
589 gross 3807 flux_matrix->col_coupleBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij * ( u_tilde_i - u_tilde_j);
590 gross 3793
591     }
592     }
593    
594     }
595    
596 jfenwick 3981 /************************************************************************************/
597 gross 3793
598     /* Creates the low order transport matrix and stores its negative values
599     * into the iteration_matrix except for the main diagonal which is stored
600     * separately.
601     * If fc->iteration_matrix==NULL, fc->iteration_matrix is allocated
602     *
603     * a=transport_matrix
604     * b= low_order_transport_matrix = - iteration_matrix
605     * c=main diagonal low_order_transport_matrix
606     * initialize c[i] mit a[i,i]
607     *
608     * d_ij=max(0,-a[i,j],-a[j,i])
609     * b[i,j]=-(a[i,j]+d_ij)
610     * c[i]-=d_ij
611     */
612    
613     void Paso_FCT_setLowOrderOperator(Paso_TransportProblem * fc) {
614    
615     dim_t i;
616     index_t iptr_ij, iptr_ji;
617     const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fc);
618    
619     if (fc->iteration_matrix==NULL) {
620     fc->iteration_matrix=Paso_SystemMatrix_alloc(fc->transport_matrix->type,
621     fc->transport_matrix->pattern,
622     fc->transport_matrix->row_block_size,
623     fc->transport_matrix->col_block_size, TRUE);
624     }
625    
626     if (Esys_noError()) {
627     const Paso_SystemMatrixPattern *pattern=fc->iteration_matrix->pattern;
628     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fc->iteration_matrix);
629     #pragma omp parallel for private(i, iptr_ij, iptr_ji) schedule(static)
630     for (i = 0; i < n; ++i) {
631     double sum=fc->transport_matrix->mainBlock->val[main_iptr[i]];
632    
633     /* printf("sum[%d] = %e -> ", i, sum); */
634     /* look at a[i,j] */
635     for (iptr_ij=pattern->mainPattern->ptr[i];iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
636 caltinay 3795 const index_t j = pattern->mainPattern->index[iptr_ij];
637     const double rtmp1 = fc->transport_matrix->mainBlock->val[iptr_ij];
638 gross 3793 if (j!=i) {
639     /* find entry a[j,i] */
640     #pragma ivdep
641     for (iptr_ji=pattern->mainPattern->ptr[j]; iptr_ji<pattern->mainPattern->ptr[j+1]; ++iptr_ji) {
642    
643     if ( pattern->mainPattern->index[iptr_ji] == i) {
644 caltinay 3795 const double rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji];
645 gross 3793 /*
646     printf("a[%d,%d]=%e\n",i,j,rtmp1);
647     printf("a[%d,%d]=%e\n",j,i,rtmp2);
648     */
649    
650 caltinay 3795 const double d_ij=-MIN3(0.,rtmp1,rtmp2);
651 gross 3793 fc->iteration_matrix->mainBlock->val[iptr_ij]=-(rtmp1+d_ij);
652     /* printf("l[%d,%d]=%e\n",i,j,fc->iteration_matrix->mainBlock->val[iptr_ij]); */
653     sum-=d_ij;
654     break;
655     }
656     }
657     }
658     }
659     for (iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
660 caltinay 3795 const index_t j = pattern->col_couplePattern->index[iptr_ij];
661     const double rtmp1 = fc->transport_matrix->col_coupleBlock->val[iptr_ij];
662 gross 3793 /* find entry a[j,i] */
663     #pragma ivdep
664     for (iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_ji<pattern->row_couplePattern->ptr[j+1]; ++iptr_ji) {
665     if (pattern->row_couplePattern->index[iptr_ji]==i) {
666 caltinay 3795 const double rtmp2=fc->transport_matrix->row_coupleBlock->val[iptr_ji];
667     const double d_ij=-MIN3(0.,rtmp1,rtmp2);
668 gross 3793 fc->iteration_matrix->col_coupleBlock->val[iptr_ij]=-(rtmp1+d_ij);
669     fc->iteration_matrix->row_coupleBlock->val[iptr_ji]=-(rtmp2+d_ij);
670     sum-=d_ij;
671     break;
672     }
673     }
674     }
675     /* set main diagonal entry */
676     fc->main_diagonal_low_order_transport_matrix[i]=sum;
677     /* printf("%e \n", sum); */
678     }
679    
680     }
681     }
682    
683     /*
684 gross 3822 * out_i=m_i u_i + a * \sum_{j <> i} l_{ij} (u_j-u_i) where m_i>0
685     * = u_i where m_i<=0
686 gross 3793 *
687     */
688     void Paso_FCT_Solver_setMuPaLu(double* out,
689     const double* M,
690     const Paso_Coupler* u_coupler,
691     const double a,
692     const Paso_SystemMatrix *L)
693     {
694     dim_t i;
695     const Paso_SystemMatrixPattern *pattern = L->pattern;
696     const double *u=Paso_Coupler_borrowLocalData(u_coupler);
697     const double *remote_u=Paso_Coupler_borrowRemoteData(u_coupler);
698 caltinay 3795 index_t iptr_ij;
699 gross 3793 const dim_t n=Paso_SystemMatrix_getTotalNumRows(L);
700    
701     #pragma omp parallel for private(i) schedule(static)
702     for (i = 0; i < n; ++i) {
703 gross 3822 if ( M[i] > 0.) {
704     out[i]=M[i]*u[i];
705     } else {
706     out[i]=u[i];
707     }
708 gross 3793 }
709     if (ABS(a)>0) {
710     #pragma omp parallel for schedule(static) private(i, iptr_ij)
711     for (i = 0; i < n; ++i) {
712 gross 3823 if ( M[i] > 0.) {
713 caltinay 3795 double sum=0;
714     const double u_i=u[i];
715 gross 3793 #pragma ivdep
716     for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
717     const index_t j=pattern->mainPattern->index[iptr_ij];
718 caltinay 3795 const double l_ij=L->mainBlock->val[iptr_ij];
719 gross 3793 sum+=l_ij*(u[j]-u_i);
720    
721     }
722     #pragma ivdep
723     for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
724     const index_t j=pattern->col_couplePattern->index[iptr_ij];
725 caltinay 3795 const double l_ij=L->col_coupleBlock->val[iptr_ij];
726 gross 3793 sum+=l_ij*(remote_u[j]-u_i);
727     }
728     out[i]+=a*sum;
729 gross 3823 }
730 gross 3793 }
731     }
732     }
733    
734 jfenwick 3981 /* ************************************************************************************************************************************************* */
735 gross 3793
736    
737    
738    
739    
740    
741    

  ViewVC Help
Powered by ViewVC 1.1.26