/[escript]/trunk/paso/src/FCT_Solver.c
ViewVC logotype

Annotation of /trunk/paso/src/FCT_Solver.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3795 - (hide annotations)
Thu Feb 2 05:54:34 2012 UTC (7 years, 7 months ago) by caltinay
File MIME type: text/plain
File size: 29570 byte(s)
Had to remove 'nowait' directive from PDE boundary conditions since all
4 (2D) / 8 (3D) values are updated the way it is implemented. This caused
race conditions when updating RHS.

1 gross 3793
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2010 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17     /* Paso: Transport solver with flux correction (L is row sum zero)
18     *
19     * - Mv_t=Lv v(0)=u
20     *
21     * to return v(dt)
22     *
23     */
24     /**************************************************************/
25    
26     /* Author: l.gross@uq.edu.au */
27    
28     /**************************************************************/
29    
30     #include "FCT_Solver.h"
31     #include "Preconditioner.h"
32     #include "PasoUtil.h"
33    
34    
35     Paso_FCT_Solver* Paso_FCT_Solver_alloc(Paso_TransportProblem *fctp, Paso_Options* options)
36     {
37     Paso_FCT_Solver* out=NULL;
38     const dim_t blockSize=Paso_TransportProblem_getBlockSize(fctp);
39     const dim_t n = Paso_TransportProblem_getTotalNumRows(fctp);
40    
41     out=MEMALLOC(1,Paso_FCT_Solver);
42     if (! Esys_checkPtr(out)) {
43     out->transportproblem = Paso_TransportProblem_getReference(fctp);
44     out->mpi_info = Esys_MPIInfo_getReference(fctp->mpi_info);
45     out->flux_limiter = Paso_FCT_FluxLimiter_alloc(fctp);
46     out->b = MEMALLOC(n, double);
47     if ( (options->ode_solver == PASO_CRANK_NICOLSON) || (options->ode_solver == PASO_BACKWARD_EULER) ) {
48     out->du = MEMALLOC(n, double);
49     out->z = MEMALLOC(n, double);
50     } else {
51     out->du = NULL;
52     out->z=NULL;
53     }
54     out->u_coupler = Paso_Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp), blockSize);
55     out->u_old_coupler = Paso_Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp), blockSize);
56     out->omega=0;
57    
58     if ( options->ode_solver == PASO_LINEAR_CRANK_NICOLSON ) {
59     out->method = PASO_LINEAR_CRANK_NICOLSON;
60     } else if ( options->ode_solver == PASO_CRANK_NICOLSON ) {
61     out->method = PASO_CRANK_NICOLSON;
62     } else if ( options->ode_solver == PASO_BACKWARD_EULER ) {
63     out->method = PASO_BACKWARD_EULER;
64     } else {
65     Esys_setError(VALUE_ERROR, "Paso_FCT_Solver_alloc: unknown integration scheme.");
66     out->method = UNKNOWN;
67     }
68    
69     }
70    
71     if (Esys_noError()) {
72     return out;
73     } else {
74     Paso_FCT_Solver_free(out);
75     return NULL;
76     }
77    
78     }
79    
80     void Paso_FCT_Solver_free(Paso_FCT_Solver *in)
81     {
82     if (in != NULL) {
83     Paso_TransportProblem_free(in->transportproblem);
84     Paso_FCT_FluxLimiter_free(in->flux_limiter);
85     Esys_MPIInfo_free(in->mpi_info);
86     Paso_Coupler_free(in->u_old_coupler);
87     Paso_Coupler_free(in->u_coupler);
88    
89     MEMFREE(in->b);
90     MEMFREE(in->z);
91     MEMFREE(in->du);
92     MEMFREE(in);
93    
94     }
95     }
96    
97     double Paso_FCT_Solver_getSafeTimeStepSize(Paso_TransportProblem* fctp)
98     {
99     dim_t i, n;
100     double dt_max=LARGE_POSITIVE_FLOAT;
101     index_t fail=0;
102     n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
103     /* set low order transport operator */
104     Paso_FCT_setLowOrderOperator(fctp);
105    
106     if (Esys_noError()) {
107     /*
108     * calculate time step size:
109     */
110     dt_max=LARGE_POSITIVE_FLOAT;
111     fail=0;
112     #pragma omp parallel private(i)
113     {
114     double dt_max_loc=LARGE_POSITIVE_FLOAT;
115     index_t fail_loc=0;
116     #pragma omp for schedule(static)
117     for (i=0;i<n;++i) {
118 caltinay 3795 const double l_ii=fctp->main_diagonal_low_order_transport_matrix[i];
119     const double m_i=fctp->lumped_mass_matrix[i];
120 gross 3793 if ( (m_i > 0) ) {
121     if (l_ii<0) dt_max_loc=MIN(dt_max_loc,m_i/(-l_ii));
122     } else {
123     fail_loc=-1;
124     }
125     }
126     #pragma omp critical
127     {
128     dt_max=MIN(dt_max,dt_max_loc);
129     fail=MIN(fail, fail_loc);
130     }
131     }
132     #ifdef ESYS_MPI
133     {
134     double rtmp_loc[2], rtmp[2];
135     rtmp_loc[0]=dt_max;
136     rtmp_loc[1]= (double) fail;
137     MPI_Allreduce(rtmp_loc, rtmp, 2, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
138     dt_max=rtmp[0];
139     fail = rtmp[1] < 0 ? -1 : 0;
140     }
141     #endif
142     if (fail < 0 ) {
143     Esys_setError(VALUE_ERROR, "Paso_FCTSolver_getSafeTimeStepSize: negative mass matrix entries detected.");
144     return -1;
145     } else {
146     if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=2.;
147     }
148     }
149     return dt_max;
150     }
151    
152     /* modifies the main diagonal of the iteration matrix to introduce new dt */
153     void Paso_FCT_Solver_initialize(const double dt, Paso_FCT_Solver *fct_solver, Paso_Options* options, Paso_Performance* pp)
154     {
155     Paso_TransportProblem* fctp = fct_solver->transportproblem;
156     const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fctp);
157     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
158     const double theta = Paso_FCT_Solver_getTheta(fct_solver);
159     const double omega=1./(dt* theta);
160     dim_t i;
161     Paso_Options options2;
162    
163    
164    
165     Paso_solve_free(fctp->iteration_matrix);
166     /*
167     * fctp->iteration_matrix[i,i]=m[i]/(dt theta) -l[i,i]
168     *
169     */
170     fct_solver->omega=omega;
171     fct_solver->dt = dt;
172     #pragma omp parallel for private(i)
173     for (i = 0; i < n; ++i) {
174 caltinay 3795 const double m=fctp->lumped_mass_matrix[i];
175     const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
176 gross 3793 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m * omega - l_ii;
177     }
178    
179     /* allocate preconditioner/solver */
180     Paso_Options_setDefaults(&options2);
181     options2.verbose = options->verbose;
182     if (fct_solver->method == PASO_LINEAR_CRANK_NICOLSON ) {
183     options2.preconditioner = PASO_GS;
184     } else {
185     options2.preconditioner = PASO_JACOBI;
186     /* options2.preconditioner = PASO_GS; */
187     }
188     options2.use_local_preconditioner = FALSE;
189     options2.sweeps=1;
190    
191     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
192     Paso_SystemMatrix_setPreconditioner(fctp->iteration_matrix, &options2);
193     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
194     }
195    
196     /* entry point for update proceedures */
197     err_t Paso_FCT_Solver_update(Paso_FCT_Solver *fct_solver, double* u, double *u_old, Paso_Options* options, Paso_Performance *pp)
198     {
199     const index_t method=fct_solver->method;
200     err_t err_out = SOLVER_NO_ERROR;
201    
202    
203     if (method == PASO_LINEAR_CRANK_NICOLSON) {
204     err_out=Paso_FCT_Solver_update_LCN(fct_solver, u, u_old, options, pp);
205    
206     } else if (method == PASO_CRANK_NICOLSON) {
207     err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);
208    
209     } else if (method == PASO_BACKWARD_EULER) {
210     err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);
211     } else {
212     err_out = SOLVER_INPUT_ERROR;
213     }
214     return err_out;
215    
216     }
217    
218     /* linear crank-nicolson update */
219     err_t Paso_FCT_Solver_update_LCN(Paso_FCT_Solver *fct_solver, double * u, double *u_old, Paso_Options* options, Paso_Performance *pp)
220     {
221     double const dt = fct_solver->dt;
222     dim_t sweep_max;
223     double *b = fct_solver->b;
224     double const RTOL = options->tolerance;
225     const dim_t n=Paso_TransportProblem_getTotalNumRows(fct_solver->transportproblem);
226     Paso_SystemMatrix * iteration_matrix = fct_solver->transportproblem->iteration_matrix;
227     err_t errorCode = SOLVER_NO_ERROR;
228    
229    
230     Paso_Coupler_startCollect(fct_solver->u_old_coupler,u_old);
231     Paso_Coupler_finishCollect(fct_solver->u_old_coupler);
232    
233     /* b[i]=m*u_tilde[i] = m u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i])
234     * note that iteration_matrix stores the negative values of the
235     * low order transport matrix l. Therefore a=-dt*0.5 is used. */
236    
237     Paso_FCT_Solver_setMuPaLu(b, fct_solver->transportproblem->lumped_mass_matrix,
238     fct_solver->u_old_coupler, -dt*0.5, iteration_matrix);
239    
240     /* solve for u_tilde : u_tilda = m^{-1} * b */
241     Paso_FCT_FluxLimiter_setU_tilda(fct_solver->flux_limiter, b);
242     /* u_tilda_connector is completed */
243    
244     /* calculate anti-diffusive fluxes for u_tilda */
245     Paso_FCT_setAntiDiffusionFlux_linearCN(fct_solver->flux_limiter->antidiffusive_fluxes,
246     fct_solver->transportproblem, dt,
247     fct_solver->flux_limiter->u_tilde_coupler,
248     fct_solver->u_old_coupler);
249    
250     /* b_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
251     Paso_FCT_FluxLimiter_addLimitedFluxes_Start(fct_solver->flux_limiter);
252     Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(fct_solver->flux_limiter, b);
253    
254    
255     /* solve (m-dt/2*L) u = b */
256     /* initial guess is u<- -u + 2*u_tilde */
257     Paso_Update(n, -1., u, 2., fct_solver->flux_limiter->u_tilde);
258     sweep_max = MAX((int) (- 2 * log(RTOL)/log(2.)-0.5),1);
259    
260     errorCode = Paso_Preconditioner_Smoother_solve_byTolerance( iteration_matrix, ((Paso_Preconditioner*) (iteration_matrix->solver_p))->gs,
261     u, b, RTOL, &sweep_max, TRUE);
262     if (errorCode == PRECONDITIONER_NO_ERROR) {
263     if (options->verbose) printf("Paso_FCT_Solver_update_LCN: convergence after %d Gauss-Seidel steps.\n",sweep_max);
264     errorCode=SOLVER_NO_ERROR;
265     } else {
266     if (options->verbose) printf("Paso_FCT_Solver_update_LCN: Gauss-Seidel failed within %d stesp (rel. tolerance %e).\n",sweep_max,RTOL);
267     errorCode= SOLVER_MAXITER_REACHED;
268     }
269     return errorCode;
270    
271     }
272    
273     err_t Paso_FCT_Solver_updateNL(Paso_FCT_Solver *fct_solver, double* u, double *u_old, Paso_Options* options, Paso_Performance *pp)
274     {
275 gross 3794 const dim_t num_critical_rates_max=3; /* number of rates >=critical_rate accepted before divergence is triggered */
276     const double critical_rate=0.95; /* expected value of convergence rate */
277    
278 gross 3793 double *b = fct_solver->b;
279     double *z = fct_solver->z;
280     double *du = fct_solver->du;
281     double const dt = fct_solver->dt;
282     Paso_TransportProblem* fctp = fct_solver->transportproblem;
283     Paso_FCT_FluxLimiter* flux_limiter = fct_solver->flux_limiter;
284     dim_t i;
285 gross 3794 double norm_u_tilde, ATOL, norm_du=LARGE_POSITIVE_FLOAT, norm_du_old, rate=1.;
286 gross 3793 err_t errorCode=SOLVER_NO_ERROR;
287     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
288     const double atol=options->absolute_tolerance;
289     const double rtol=options->tolerance;
290     const dim_t max_m=options->iter_max;
291     dim_t m=0, num_critical_rates=0 ;
292     /* ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// */
293    
294     bool_t converged=FALSE, max_m_reached=FALSE,diverged=FALSE;
295     options->num_iter=0;
296    
297     Paso_Coupler_startCollect(fct_solver->u_old_coupler,u_old);
298     Paso_Coupler_finishCollect(fct_solver->u_old_coupler);
299     /* prepare u_tilda and flux limiter */
300     if ( fct_solver->method == PASO_BACKWARD_EULER ) {
301     /* b[i]=m_i* u_old[i] */
302     #pragma omp for private(i) schedule(static)
303     for (i = 0; i < n; ++i) {
304     b[i]=u_old[i]* fctp->lumped_mass_matrix[i];
305     }
306     } else {
307     /* b[i]=m_i* u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i]) = m_i * u_tilde_i
308     * note that iteration_matrix stores the negative values of the
309     * low order transport matrix l. Therefore a=-dt*0.5 is used. */
310     Paso_FCT_Solver_setMuPaLu(b,fctp->lumped_mass_matrix,fct_solver->u_old_coupler,-dt*0.5,fctp->iteration_matrix);
311     }
312     Paso_FCT_FluxLimiter_setU_tilda(flux_limiter, b); /* u_tilda = m^{-1} b */
313     /* u_tilda_connector is completed */
314    
315     /**********************************************************************************************************************/
316     /* calculate stopping criterium */
317     norm_u_tilde=Paso_lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
318     ATOL= rtol * norm_u_tilde + atol ;
319     if (options->verbose) printf("Paso_FCT_Solver_updateNL: iteration starts u_tilda lsup = %e (abs. tol = %e)\n",norm_u_tilde,ATOL);
320     /* ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// */
321    
322     /* u_old is an initial guess for u*/
323     Paso_Copy(n,u,u_old);
324    
325     while ( (!converged) && (!diverged) && (! max_m_reached) && Esys_noError()) {
326     Paso_Coupler_startCollect(fct_solver->u_coupler,u);
327     Paso_Coupler_finishCollect(fct_solver->u_coupler);
328    
329     /* set antidiffusive_flux_{ij} for u */
330     if (fct_solver->method == PASO_BACKWARD_EULER) {
331     Paso_FCT_setAntiDiffusionFlux_BE(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);
332     } else {
333     Paso_FCT_setAntiDiffusionFlux_CN(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);
334     }
335     /* start the calculation of the limitation factors_{fct_solver->ij} */
336     Paso_FCT_FluxLimiter_addLimitedFluxes_Start(flux_limiter); /* uses u_tilde */
337    
338     /*
339     * z_m[i]=b[i] - (m_i*u[i] - omega*sum_{j<>i} l_{ij} (u[j]-u[i]) ) omega = dt/2 or dt .
340     *
341     * note that iteration_matrix stores the negative values of the
342     * low order transport matrix l. Therefore a=dt*theta is used.
343     */
344     if (fct_solver-> method == PASO_BACKWARD_EULER) {
345     Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt, fctp->iteration_matrix);
346     } else {
347     Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt/2, fctp->iteration_matrix);
348     }
349    
350    
351     Paso_Update(n,-1.,z,1.,b); /* z=b-z */
352    
353    
354     /* z_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
355     Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(flux_limiter, z);
356    
357     /* we solve (m/omega - L ) * du = z */
358     if (fct_solver->method == PASO_BACKWARD_EULER) {
359     dim_t cntIter = options->iter_max;
360     double tol= Paso_l2(n, z, fctp->mpi_info) ;
361    
362     if ( m ==0) {
363     tol *=0.5;
364     } else {
365     tol *= MIN(MAX(rate*rate, 1e-2), 0.5);
366     }
367     /* use BiCGSTab with jacobi preconditioner ( m - omega * L ) */
368     Paso_zeroes(n,du);
369     errorCode = Paso_Solver_BiCGStab(fctp->iteration_matrix, z, du, &cntIter, &tol, pp);
370    
371     /* errorCode = Paso_Solver_GMRES(fctp->iteration_matrix, z, du, &cntIter, &tol, 10, 2000, pp); */
372     if (options->verbose) printf("Paso_FCT_Solver_updateNL: BiCGStab is completed after %d steps (residual =%e).\n",cntIter, tol);
373     options->num_iter+=cntIter;
374     if ( errorCode != SOLVER_NO_ERROR) break;
375     } else {
376     /* just use the main diagonal of (m/omega - L ) */
377    
378     Paso_Preconditioner_Smoother_solve(fctp->iteration_matrix, ((Paso_Preconditioner*) (fctp->iteration_matrix->solver_p))->jacobi,
379     du, z, 1, FALSE);
380     options->num_iter++;
381     }
382    
383    
384     Paso_Update(n,1.,u,fct_solver->omega,du);
385     norm_du_old=norm_du;
386     norm_du=Paso_lsup(n,du, fctp->mpi_info);
387     if (m ==0) {
388     if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e\n",m+1, norm_du * fct_solver->omega);
389     } else {
390     if (norm_du_old > 0.) {
391     rate=norm_du/norm_du_old;
392     } else if (norm_du <= 0.) {
393     rate=0.;
394     } else {
395     rate=LARGE_POSITIVE_FLOAT;
396     }
397     if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e (rate = %e)\n",m+1, norm_du * fct_solver->omega, rate);
398     num_critical_rates+=( rate<critical_rate ? 0 : 1);
399     max_m_reached=(m>max_m);
400     diverged = (num_critical_rates >= num_critical_rates_max);
401     converged=(norm_du * fct_solver->omega <= ATOL) ;
402     }
403     m++;
404     } /* end of while loop */
405     if (errorCode == SOLVER_NO_ERROR) {
406     if (converged) {
407     if (options->verbose) printf("Paso_FCT_Solver_updateNL: iteration is completed.\n");
408     errorCode=SOLVER_NO_ERROR;
409     } else if (diverged) {
410     if (options->verbose) printf("Paso_FCT_Solver_updateNL: divergence.\n");
411     errorCode=SOLVER_DIVERGENCE;
412     } else if (max_m_reached) {
413     if (options->verbose) printf("Paso_FCT_Solver_updateNL: maximum number of iteration steps reached.\n");
414     errorCode=SOLVER_MAXITER_REACHED;
415     }
416    
417     }
418     return errorCode;
419     }
420    
421    
422     /*
423     * AntiDiffusionFlux:
424     *
425     * f_{ij} = (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])
426     *
427     * m=fc->mass matrix
428     * d=artifical diffusion matrix = L - K = - fc->iteration matrix - fc->transport matrix (away from main diagonal)
429     *
430     * for CN : theta =0.5
431     * for BE : theta = 1.
432     */
433    
434     void Paso_FCT_setAntiDiffusionFlux_CN(Paso_SystemMatrix *flux_matrix,
435     const Paso_TransportProblem* fct,
436     const double dt,
437     const Paso_Coupler* u_coupler,
438     const Paso_Coupler* u_old_coupler)
439     {
440     dim_t i;
441     index_t iptr_ij;
442    
443     const double *u = Paso_Coupler_borrowLocalData(u_coupler);
444     const double *u_old= Paso_Coupler_borrowLocalData(u_old_coupler);
445     const double *remote_u=Paso_Coupler_borrowRemoteData(u_coupler);
446     const double *remote_u_old=Paso_Coupler_borrowRemoteData(u_old_coupler);
447     const double dt_half= dt/2;
448     const Paso_SystemMatrixPattern *pattern=fct->iteration_matrix->pattern;
449     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fct->iteration_matrix);
450    
451     #pragma omp parallel for schedule(static) private(i, iptr_ij)
452     for (i = 0; i < n; ++i) {
453 caltinay 3795 const double u_i = u[i];
454     const double u_old_i = u_old[i];
455 gross 3793
456     #pragma ivdep
457     for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
458 caltinay 3795 const index_t j = pattern->mainPattern->index[iptr_ij];
459     const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
460     const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
461     const double u_old_j = u_old[j];
462     const double u_j = u[j];
463 gross 3793
464     /* (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i]) */
465     flux_matrix->mainBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i) - (m_ij-dt_half*d_ij)*(u_j-u_i);
466    
467     }
468     #pragma ivdep
469     for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
470 caltinay 3795 const index_t j = pattern->col_couplePattern->index[iptr_ij];
471     const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
472     const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
473     const double u_old_j = remote_u_old[j];
474     const double u_j = remote_u[j];
475 gross 3793 flux_matrix->col_coupleBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i)- (m_ij-dt_half*d_ij)*(u_j-u_i);
476     }
477     }
478     }
479    
480     void Paso_FCT_setAntiDiffusionFlux_BE(Paso_SystemMatrix *flux_matrix,
481     const Paso_TransportProblem* fct,
482     const double dt,
483     const Paso_Coupler* u_coupler,
484     const Paso_Coupler* u_old_coupler)
485     {
486     dim_t i;
487     index_t iptr_ij;
488    
489     const double *u=Paso_Coupler_borrowLocalData(u_coupler);
490     const double *u_old= Paso_Coupler_borrowLocalData(u_old_coupler);
491     const double *remote_u=Paso_Coupler_borrowRemoteData(u_coupler);
492     const double *remote_u_old=Paso_Coupler_borrowRemoteData(u_old_coupler);
493     const Paso_SystemMatrixPattern *pattern=fct->iteration_matrix->pattern;
494     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fct->iteration_matrix);
495    
496     #pragma omp parallel for schedule(static) private(i, iptr_ij)
497     for (i = 0; i < n; ++i) {
498 caltinay 3795 const double u_i = u[i];
499     const double u_old_i = u_old[i];
500 gross 3793 #pragma ivdep
501     for (iptr_ij=pattern->mainPattern->ptr[i]; iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
502    
503 caltinay 3795 const index_t j = pattern->mainPattern->index[iptr_ij];
504     const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
505     const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
506     const double u_old_j = u_old[j];
507     const double u_j = u[j];
508 gross 3793
509     flux_matrix->mainBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
510     }
511     #pragma ivdep
512     for (iptr_ij=pattern->col_couplePattern->ptr[i]; iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
513 caltinay 3795 const index_t j = pattern->col_couplePattern->index[iptr_ij];
514     const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
515     const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
516     const double u_old_j = remote_u_old[j];
517     const double u_j = remote_u[j];
518 gross 3793
519     flux_matrix->col_coupleBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
520     }
521     }
522     }
523    
524     /* special version of the ant-diffusive fluxes for the linear Crank-Nicolson scheme
525     * in fact this is evaluated for u = 2*u_tilde - u_old which is the predictor
526     * of the solution of the the stabilized problem at time dt using the forward Euler scheme
527     *
528     * f_{ij} = (m_{ij} - dt/2 d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) (u[j]-u[i])
529     * = (m_{ij} - dt/2 d_{ij}) * (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) * ( 2*(u_tilde[j]-u_tilde[i]) - (u_old[j] -u_old [i]) )
530     * = 2* m_{ij} * ( (u_old[j]-u_tilde[j] - (u_old[i]) - u_tilde[i]) ) - dt d_{ij} * (u_tilde[j]-u_tilde[i])
531     *
532     */
533    
534     void Paso_FCT_setAntiDiffusionFlux_linearCN(Paso_SystemMatrix *flux_matrix,
535     const Paso_TransportProblem* fct,
536     const double dt,
537     const Paso_Coupler* u_tilde_coupler,
538     const Paso_Coupler* u_old_coupler)
539     {
540     dim_t i;
541     index_t iptr_ij;
542    
543     const double *u_tilde=Paso_Coupler_borrowLocalData(u_tilde_coupler);
544     const double *u_old= Paso_Coupler_borrowLocalData(u_old_coupler);
545     const double *remote_u_tilde=Paso_Coupler_borrowRemoteData(u_tilde_coupler);
546     const double *remote_u_old=Paso_Coupler_borrowRemoteData(u_old_coupler);
547     const Paso_SystemMatrixPattern *pattern=fct->iteration_matrix->pattern;
548     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fct->iteration_matrix);
549    
550     #pragma omp parallel for schedule(static) private(i, iptr_ij)
551     for (i = 0; i < n; ++i) {
552 caltinay 3795 const double u_tilde_i = u_tilde[i];
553     const double u_old_i = u_old[i];
554     const double du_i = u_old_i - u_tilde_i;
555 gross 3793 #pragma ivdep
556     for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
557    
558 caltinay 3795 const index_t j = pattern->mainPattern->index[iptr_ij];
559     const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
560     const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
561     const double u_tilde_j = u_tilde[j];
562     const double u_old_j = u_old[j];
563     const double du_j = u_old_j - u_tilde_j;
564 gross 3793
565     flux_matrix->mainBlock->val[iptr_ij]=2 * m_ij * ( du_i - du_j ) + dt * d_ij * ( u_tilde_j - u_tilde_i);
566    
567     }
568     #pragma ivdep
569     for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
570    
571 caltinay 3795 const index_t j = pattern->col_couplePattern->index[iptr_ij];
572     const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
573     const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];/* this is in fact -d_ij */
574     const double u_tilde_j = remote_u_tilde[j];
575     const double u_old_j = remote_u_old[j];
576     const double du_j = u_old_j - u_tilde_j;
577 gross 3793
578     flux_matrix->col_coupleBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) + dt * d_ij * ( u_tilde_j - u_tilde_i);
579    
580     }
581     }
582    
583     }
584    
585     /**************************************************************/
586    
587     /* Creates the low order transport matrix and stores its negative values
588     * into the iteration_matrix except for the main diagonal which is stored
589     * separately.
590     * If fc->iteration_matrix==NULL, fc->iteration_matrix is allocated
591     *
592     * a=transport_matrix
593     * b= low_order_transport_matrix = - iteration_matrix
594     * c=main diagonal low_order_transport_matrix
595     * initialize c[i] mit a[i,i]
596     *
597     * d_ij=max(0,-a[i,j],-a[j,i])
598     * b[i,j]=-(a[i,j]+d_ij)
599     * c[i]-=d_ij
600     */
601    
602     void Paso_FCT_setLowOrderOperator(Paso_TransportProblem * fc) {
603    
604     dim_t i;
605     index_t iptr_ij, iptr_ji;
606     const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fc);
607    
608     if (fc->iteration_matrix==NULL) {
609     fc->iteration_matrix=Paso_SystemMatrix_alloc(fc->transport_matrix->type,
610     fc->transport_matrix->pattern,
611     fc->transport_matrix->row_block_size,
612     fc->transport_matrix->col_block_size, TRUE);
613     }
614    
615     if (Esys_noError()) {
616     const Paso_SystemMatrixPattern *pattern=fc->iteration_matrix->pattern;
617     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fc->iteration_matrix);
618     #pragma omp parallel for private(i, iptr_ij, iptr_ji) schedule(static)
619     for (i = 0; i < n; ++i) {
620     double sum=fc->transport_matrix->mainBlock->val[main_iptr[i]];
621    
622     /* printf("sum[%d] = %e -> ", i, sum); */
623     /* look at a[i,j] */
624     for (iptr_ij=pattern->mainPattern->ptr[i];iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
625 caltinay 3795 const index_t j = pattern->mainPattern->index[iptr_ij];
626     const double rtmp1 = fc->transport_matrix->mainBlock->val[iptr_ij];
627 gross 3793 if (j!=i) {
628     /* find entry a[j,i] */
629     #pragma ivdep
630     for (iptr_ji=pattern->mainPattern->ptr[j]; iptr_ji<pattern->mainPattern->ptr[j+1]; ++iptr_ji) {
631    
632     if ( pattern->mainPattern->index[iptr_ji] == i) {
633 caltinay 3795 const double rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji];
634 gross 3793 /*
635     printf("a[%d,%d]=%e\n",i,j,rtmp1);
636     printf("a[%d,%d]=%e\n",j,i,rtmp2);
637     */
638    
639 caltinay 3795 const double d_ij=-MIN3(0.,rtmp1,rtmp2);
640 gross 3793 fc->iteration_matrix->mainBlock->val[iptr_ij]=-(rtmp1+d_ij);
641     /* printf("l[%d,%d]=%e\n",i,j,fc->iteration_matrix->mainBlock->val[iptr_ij]); */
642     sum-=d_ij;
643     break;
644     }
645     }
646     }
647     }
648     for (iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
649 caltinay 3795 const index_t j = pattern->col_couplePattern->index[iptr_ij];
650     const double rtmp1 = fc->transport_matrix->col_coupleBlock->val[iptr_ij];
651 gross 3793 /* find entry a[j,i] */
652     #pragma ivdep
653     for (iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_ji<pattern->row_couplePattern->ptr[j+1]; ++iptr_ji) {
654     if (pattern->row_couplePattern->index[iptr_ji]==i) {
655 caltinay 3795 const double rtmp2=fc->transport_matrix->row_coupleBlock->val[iptr_ji];
656     const double d_ij=-MIN3(0.,rtmp1,rtmp2);
657 gross 3793 fc->iteration_matrix->col_coupleBlock->val[iptr_ij]=-(rtmp1+d_ij);
658     fc->iteration_matrix->row_coupleBlock->val[iptr_ji]=-(rtmp2+d_ij);
659     sum-=d_ij;
660     break;
661     }
662     }
663     }
664     /* set main diagonal entry */
665     fc->main_diagonal_low_order_transport_matrix[i]=sum;
666     /* printf("%e \n", sum); */
667     }
668    
669     }
670     }
671    
672     /*
673     * out_i=m_i u_i + a * \sum_{j <> i} l_{ij} (u_j-u_i)
674     *
675     */
676     void Paso_FCT_Solver_setMuPaLu(double* out,
677     const double* M,
678     const Paso_Coupler* u_coupler,
679     const double a,
680     const Paso_SystemMatrix *L)
681     {
682     dim_t i;
683     const Paso_SystemMatrixPattern *pattern = L->pattern;
684     const double *u=Paso_Coupler_borrowLocalData(u_coupler);
685     const double *remote_u=Paso_Coupler_borrowRemoteData(u_coupler);
686 caltinay 3795 index_t iptr_ij;
687 gross 3793 const dim_t n=Paso_SystemMatrix_getTotalNumRows(L);
688    
689     #pragma omp parallel for private(i) schedule(static)
690     for (i = 0; i < n; ++i) {
691     out[i]=M[i]*u[i];
692     }
693     if (ABS(a)>0) {
694     #pragma omp parallel for schedule(static) private(i, iptr_ij)
695     for (i = 0; i < n; ++i) {
696 caltinay 3795 double sum=0;
697     const double u_i=u[i];
698 gross 3793 #pragma ivdep
699     for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
700     const index_t j=pattern->mainPattern->index[iptr_ij];
701 caltinay 3795 const double l_ij=L->mainBlock->val[iptr_ij];
702 gross 3793 sum+=l_ij*(u[j]-u_i);
703    
704     }
705     #pragma ivdep
706     for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
707     const index_t j=pattern->col_couplePattern->index[iptr_ij];
708 caltinay 3795 const double l_ij=L->col_coupleBlock->val[iptr_ij];
709 gross 3793 sum+=l_ij*(remote_u[j]-u_i);
710     }
711     out[i]+=a*sum;
712     }
713     }
714     }
715    
716     /* *************************************************************************************************************************** */
717    
718    
719    
720    
721    
722    
723    

  ViewVC Help
Powered by ViewVC 1.1.26