/[escript]/trunk/paso/src/FCT_Solver.c
ViewVC logotype

Annotation of /trunk/paso/src/FCT_Solver.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3824 - (hide annotations)
Tue Feb 14 01:22:16 2012 UTC (7 years, 7 months ago) by gross
File MIME type: text/plain
File size: 30585 byte(s)
fix for openmp
1 gross 3793
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2010 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17     /* Paso: Transport solver with flux correction (L is row sum zero)
18     *
19     * - Mv_t=Lv v(0)=u
20     *
21     * to return v(dt)
22     *
23     */
24     /**************************************************************/
25    
26     /* Author: l.gross@uq.edu.au */
27    
28     /**************************************************************/
29    
30     #include "FCT_Solver.h"
31     #include "Preconditioner.h"
32     #include "PasoUtil.h"
33    
34    
35     Paso_FCT_Solver* Paso_FCT_Solver_alloc(Paso_TransportProblem *fctp, Paso_Options* options)
36     {
37     Paso_FCT_Solver* out=NULL;
38     const dim_t blockSize=Paso_TransportProblem_getBlockSize(fctp);
39     const dim_t n = Paso_TransportProblem_getTotalNumRows(fctp);
40    
41     out=MEMALLOC(1,Paso_FCT_Solver);
42     if (! Esys_checkPtr(out)) {
43     out->transportproblem = Paso_TransportProblem_getReference(fctp);
44     out->mpi_info = Esys_MPIInfo_getReference(fctp->mpi_info);
45     out->flux_limiter = Paso_FCT_FluxLimiter_alloc(fctp);
46     out->b = MEMALLOC(n, double);
47     if ( (options->ode_solver == PASO_CRANK_NICOLSON) || (options->ode_solver == PASO_BACKWARD_EULER) ) {
48     out->du = MEMALLOC(n, double);
49     out->z = MEMALLOC(n, double);
50     } else {
51     out->du = NULL;
52     out->z=NULL;
53     }
54     out->u_coupler = Paso_Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp), blockSize);
55     out->u_old_coupler = Paso_Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp), blockSize);
56     out->omega=0;
57    
58     if ( options->ode_solver == PASO_LINEAR_CRANK_NICOLSON ) {
59     out->method = PASO_LINEAR_CRANK_NICOLSON;
60     } else if ( options->ode_solver == PASO_CRANK_NICOLSON ) {
61     out->method = PASO_CRANK_NICOLSON;
62     } else if ( options->ode_solver == PASO_BACKWARD_EULER ) {
63     out->method = PASO_BACKWARD_EULER;
64     } else {
65     Esys_setError(VALUE_ERROR, "Paso_FCT_Solver_alloc: unknown integration scheme.");
66     out->method = UNKNOWN;
67     }
68    
69     }
70    
71     if (Esys_noError()) {
72     return out;
73     } else {
74     Paso_FCT_Solver_free(out);
75     return NULL;
76     }
77    
78     }
79    
80     void Paso_FCT_Solver_free(Paso_FCT_Solver *in)
81     {
82     if (in != NULL) {
83     Paso_TransportProblem_free(in->transportproblem);
84     Paso_FCT_FluxLimiter_free(in->flux_limiter);
85     Esys_MPIInfo_free(in->mpi_info);
86     Paso_Coupler_free(in->u_old_coupler);
87     Paso_Coupler_free(in->u_coupler);
88    
89     MEMFREE(in->b);
90     MEMFREE(in->z);
91     MEMFREE(in->du);
92     MEMFREE(in);
93    
94     }
95     }
96    
97     double Paso_FCT_Solver_getSafeTimeStepSize(Paso_TransportProblem* fctp)
98     {
99     dim_t i, n;
100 gross 3823 double dt_max_loc=LARGE_POSITIVE_FLOAT;
101 gross 3793 double dt_max=LARGE_POSITIVE_FLOAT;
102     n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
103     /* set low order transport operator */
104     Paso_FCT_setLowOrderOperator(fctp);
105    
106     if (Esys_noError()) {
107     /*
108     * calculate time step size:
109     */
110     dt_max=LARGE_POSITIVE_FLOAT;
111 gross 3823 #pragma omp parallel private(i, dt_max_loc)
112 gross 3793 {
113 gross 3824 dt_max_loc=LARGE_POSITIVE_FLOAT;
114 gross 3793 #pragma omp for schedule(static)
115     for (i=0;i<n;++i) {
116 caltinay 3795 const double l_ii=fctp->main_diagonal_low_order_transport_matrix[i];
117     const double m_i=fctp->lumped_mass_matrix[i];
118 gross 3822 if ( m_i > 0 ) {
119 gross 3793 if (l_ii<0) dt_max_loc=MIN(dt_max_loc,m_i/(-l_ii));
120 gross 3822 }
121     }
122 gross 3793 #pragma omp critical
123     {
124     dt_max=MIN(dt_max,dt_max_loc);
125     }
126     }
127     #ifdef ESYS_MPI
128     {
129 gross 3822 dt_max_loc=dt_max;
130     MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
131 gross 3793 }
132     #endif
133 gross 3822 if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=2.;
134 gross 3793 }
135     return dt_max;
136     }
137    
138     /* modifies the main diagonal of the iteration matrix to introduce new dt */
139     void Paso_FCT_Solver_initialize(const double dt, Paso_FCT_Solver *fct_solver, Paso_Options* options, Paso_Performance* pp)
140     {
141     Paso_TransportProblem* fctp = fct_solver->transportproblem;
142     const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fctp);
143     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
144     const double theta = Paso_FCT_Solver_getTheta(fct_solver);
145     const double omega=1./(dt* theta);
146     dim_t i;
147     Paso_Options options2;
148    
149    
150    
151     Paso_solve_free(fctp->iteration_matrix);
152     /*
153     * fctp->iteration_matrix[i,i]=m[i]/(dt theta) -l[i,i]
154     *
155     */
156     fct_solver->omega=omega;
157     fct_solver->dt = dt;
158     #pragma omp parallel for private(i)
159     for (i = 0; i < n; ++i) {
160 gross 3822 const double m_i=fctp->lumped_mass_matrix[i];
161 caltinay 3795 const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
162 gross 3822 if ( m_i > 0 ) {
163     fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m_i * omega - l_ii;
164     } else {
165     fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = ABS(m_i * omega - l_ii)/(EPSILON*EPSILON);
166     }
167 gross 3793 }
168    
169     /* allocate preconditioner/solver */
170     Paso_Options_setDefaults(&options2);
171     options2.verbose = options->verbose;
172     if (fct_solver->method == PASO_LINEAR_CRANK_NICOLSON ) {
173     options2.preconditioner = PASO_GS;
174     } else {
175     options2.preconditioner = PASO_JACOBI;
176     /* options2.preconditioner = PASO_GS; */
177     }
178     options2.use_local_preconditioner = FALSE;
179 gross 3807 options2.sweeps=-1;
180 gross 3793
181     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
182     Paso_SystemMatrix_setPreconditioner(fctp->iteration_matrix, &options2);
183     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
184     }
185    
186     /* entry point for update proceedures */
187     err_t Paso_FCT_Solver_update(Paso_FCT_Solver *fct_solver, double* u, double *u_old, Paso_Options* options, Paso_Performance *pp)
188     {
189     const index_t method=fct_solver->method;
190     err_t err_out = SOLVER_NO_ERROR;
191    
192    
193     if (method == PASO_LINEAR_CRANK_NICOLSON) {
194     err_out=Paso_FCT_Solver_update_LCN(fct_solver, u, u_old, options, pp);
195    
196     } else if (method == PASO_CRANK_NICOLSON) {
197     err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);
198    
199     } else if (method == PASO_BACKWARD_EULER) {
200     err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);
201     } else {
202     err_out = SOLVER_INPUT_ERROR;
203     }
204     return err_out;
205    
206     }
207    
208     /* linear crank-nicolson update */
209     err_t Paso_FCT_Solver_update_LCN(Paso_FCT_Solver *fct_solver, double * u, double *u_old, Paso_Options* options, Paso_Performance *pp)
210     {
211     double const dt = fct_solver->dt;
212 gross 3823 dim_t sweep_max, i;
213 gross 3793 double *b = fct_solver->b;
214     double const RTOL = options->tolerance;
215     const dim_t n=Paso_TransportProblem_getTotalNumRows(fct_solver->transportproblem);
216     Paso_SystemMatrix * iteration_matrix = fct_solver->transportproblem->iteration_matrix;
217 gross 3823 const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fct_solver->transportproblem);
218 gross 3793 err_t errorCode = SOLVER_NO_ERROR;
219 gross 3823 double norm_u_tilde;
220 gross 3793
221     Paso_Coupler_startCollect(fct_solver->u_old_coupler,u_old);
222     Paso_Coupler_finishCollect(fct_solver->u_old_coupler);
223    
224 gross 3822 /* b[i]=m*u_tilde[i] = m u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i])
225     = u_tilde[i] = u_old[i] where constraint m<0.
226 gross 3793 * note that iteration_matrix stores the negative values of the
227     * low order transport matrix l. Therefore a=-dt*0.5 is used. */
228    
229     Paso_FCT_Solver_setMuPaLu(b, fct_solver->transportproblem->lumped_mass_matrix,
230 gross 3807 fct_solver->u_old_coupler, -dt*0.5, iteration_matrix);
231 gross 3793 /* solve for u_tilde : u_tilda = m^{-1} * b */
232     Paso_FCT_FluxLimiter_setU_tilda(fct_solver->flux_limiter, b);
233     /* u_tilda_connector is completed */
234 gross 3807
235 gross 3793 /* calculate anti-diffusive fluxes for u_tilda */
236     Paso_FCT_setAntiDiffusionFlux_linearCN(fct_solver->flux_limiter->antidiffusive_fluxes,
237     fct_solver->transportproblem, dt,
238     fct_solver->flux_limiter->u_tilde_coupler,
239     fct_solver->u_old_coupler);
240 gross 3807
241    
242 gross 3793 /* b_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
243     Paso_FCT_FluxLimiter_addLimitedFluxes_Start(fct_solver->flux_limiter);
244     Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(fct_solver->flux_limiter, b);
245    
246 gross 3823 Paso_Scale(n, b,fct_solver->omega );
247 gross 3807 /* solve (m-dt/2*L) u = b in the form (omega*m-L) u = b * omega with omega*dt/2=1 */
248 gross 3823 #pragma omp for private(i) schedule(static)
249     for (i = 0; i < n; ++i) {
250     if (! (fct_solver->transportproblem->lumped_mass_matrix[i] > 0 ) ) {
251     b[i] = fct_solver->flux_limiter->u_tilde[i]
252     * fct_solver->transportproblem->iteration_matrix->mainBlock->val[main_iptr[i]];
253     }
254     }
255 gross 3793 /* initial guess is u<- -u + 2*u_tilde */
256     Paso_Update(n, -1., u, 2., fct_solver->flux_limiter->u_tilde);
257 gross 3823
258 gross 3793 sweep_max = MAX((int) (- 2 * log(RTOL)/log(2.)-0.5),1);
259 gross 3823 norm_u_tilde=Paso_lsup(n, fct_solver->flux_limiter->u_tilde, fct_solver->flux_limiter->mpi_info);
260 gross 3807 if (options->verbose) {
261 gross 3823 printf("Paso_FCT_Solver_update_LCN: u_tilda lsup = %e (rtol = %e, max. sweeps = %d)\n",norm_u_tilde,RTOL * norm_u_tilde ,sweep_max);
262 gross 3807 }
263 gross 3793 errorCode = Paso_Preconditioner_Smoother_solve_byTolerance( iteration_matrix, ((Paso_Preconditioner*) (iteration_matrix->solver_p))->gs,
264     u, b, RTOL, &sweep_max, TRUE);
265     if (errorCode == PRECONDITIONER_NO_ERROR) {
266     if (options->verbose) printf("Paso_FCT_Solver_update_LCN: convergence after %d Gauss-Seidel steps.\n",sweep_max);
267     errorCode=SOLVER_NO_ERROR;
268     } else {
269     if (options->verbose) printf("Paso_FCT_Solver_update_LCN: Gauss-Seidel failed within %d stesp (rel. tolerance %e).\n",sweep_max,RTOL);
270     errorCode= SOLVER_MAXITER_REACHED;
271     }
272     return errorCode;
273    
274     }
275    
276     err_t Paso_FCT_Solver_updateNL(Paso_FCT_Solver *fct_solver, double* u, double *u_old, Paso_Options* options, Paso_Performance *pp)
277     {
278 gross 3794 const dim_t num_critical_rates_max=3; /* number of rates >=critical_rate accepted before divergence is triggered */
279     const double critical_rate=0.95; /* expected value of convergence rate */
280    
281 gross 3793 double *b = fct_solver->b;
282     double *z = fct_solver->z;
283     double *du = fct_solver->du;
284     double const dt = fct_solver->dt;
285     Paso_TransportProblem* fctp = fct_solver->transportproblem;
286     Paso_FCT_FluxLimiter* flux_limiter = fct_solver->flux_limiter;
287     dim_t i;
288 gross 3794 double norm_u_tilde, ATOL, norm_du=LARGE_POSITIVE_FLOAT, norm_du_old, rate=1.;
289 gross 3793 err_t errorCode=SOLVER_NO_ERROR;
290     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
291     const double atol=options->absolute_tolerance;
292     const double rtol=options->tolerance;
293     const dim_t max_m=options->iter_max;
294     dim_t m=0, num_critical_rates=0 ;
295     /* ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// */
296    
297     bool_t converged=FALSE, max_m_reached=FALSE,diverged=FALSE;
298     options->num_iter=0;
299    
300     Paso_Coupler_startCollect(fct_solver->u_old_coupler,u_old);
301     Paso_Coupler_finishCollect(fct_solver->u_old_coupler);
302     /* prepare u_tilda and flux limiter */
303     if ( fct_solver->method == PASO_BACKWARD_EULER ) {
304     /* b[i]=m_i* u_old[i] */
305     #pragma omp for private(i) schedule(static)
306     for (i = 0; i < n; ++i) {
307 gross 3822 if (fctp->lumped_mass_matrix[i] > 0 ) {
308     b[i]=u_old[i]* fctp->lumped_mass_matrix[i];
309     } else {
310     b[i]=u_old[i];
311     }
312 gross 3793 }
313     } else {
314 gross 3822 /* b[i]=m_i* u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i]) = m_i * u_tilde_i where m_i>0
315     * = u_old[i] otherwise
316 gross 3793 * note that iteration_matrix stores the negative values of the
317     * low order transport matrix l. Therefore a=-dt*0.5 is used. */
318     Paso_FCT_Solver_setMuPaLu(b,fctp->lumped_mass_matrix,fct_solver->u_old_coupler,-dt*0.5,fctp->iteration_matrix);
319     }
320     Paso_FCT_FluxLimiter_setU_tilda(flux_limiter, b); /* u_tilda = m^{-1} b */
321     /* u_tilda_connector is completed */
322     /**********************************************************************************************************************/
323     /* calculate stopping criterium */
324     norm_u_tilde=Paso_lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
325     ATOL= rtol * norm_u_tilde + atol ;
326     if (options->verbose) printf("Paso_FCT_Solver_updateNL: iteration starts u_tilda lsup = %e (abs. tol = %e)\n",norm_u_tilde,ATOL);
327     /* ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// */
328    
329     /* u_old is an initial guess for u*/
330     Paso_Copy(n,u,u_old);
331    
332     while ( (!converged) && (!diverged) && (! max_m_reached) && Esys_noError()) {
333     Paso_Coupler_startCollect(fct_solver->u_coupler,u);
334     Paso_Coupler_finishCollect(fct_solver->u_coupler);
335    
336     /* set antidiffusive_flux_{ij} for u */
337     if (fct_solver->method == PASO_BACKWARD_EULER) {
338     Paso_FCT_setAntiDiffusionFlux_BE(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);
339     } else {
340     Paso_FCT_setAntiDiffusionFlux_CN(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);
341     }
342     /* start the calculation of the limitation factors_{fct_solver->ij} */
343     Paso_FCT_FluxLimiter_addLimitedFluxes_Start(flux_limiter); /* uses u_tilde */
344    
345     /*
346 gross 3822 * z_m[i]=b[i] - (m_i*u[i] - omega*sum_{j<>i} l_{ij} (u[j]-u[i]) ) where m_i>0
347     * ==b[i] - u[i] = u_tilda[i]-u[i] =0 otherwise
348     *
349     * omega = dt/2 or dt .
350 gross 3793 *
351     * note that iteration_matrix stores the negative values of the
352     * low order transport matrix l. Therefore a=dt*theta is used.
353     */
354     if (fct_solver-> method == PASO_BACKWARD_EULER) {
355     Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt, fctp->iteration_matrix);
356     } else {
357     Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt/2, fctp->iteration_matrix);
358     }
359    
360    
361     Paso_Update(n,-1.,z,1.,b); /* z=b-z */
362    
363     /* z_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
364     Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(flux_limiter, z);
365    
366     /* we solve (m/omega - L ) * du = z */
367     if (fct_solver->method == PASO_BACKWARD_EULER) {
368     dim_t cntIter = options->iter_max;
369     double tol= Paso_l2(n, z, fctp->mpi_info) ;
370    
371     if ( m ==0) {
372     tol *=0.5;
373     } else {
374     tol *= MIN(MAX(rate*rate, 1e-2), 0.5);
375     }
376     /* use BiCGSTab with jacobi preconditioner ( m - omega * L ) */
377     Paso_zeroes(n,du);
378     errorCode = Paso_Solver_BiCGStab(fctp->iteration_matrix, z, du, &cntIter, &tol, pp);
379    
380     /* errorCode = Paso_Solver_GMRES(fctp->iteration_matrix, z, du, &cntIter, &tol, 10, 2000, pp); */
381     if (options->verbose) printf("Paso_FCT_Solver_updateNL: BiCGStab is completed after %d steps (residual =%e).\n",cntIter, tol);
382     options->num_iter+=cntIter;
383     if ( errorCode != SOLVER_NO_ERROR) break;
384     } else {
385     /* just use the main diagonal of (m/omega - L ) */
386    
387     Paso_Preconditioner_Smoother_solve(fctp->iteration_matrix, ((Paso_Preconditioner*) (fctp->iteration_matrix->solver_p))->jacobi,
388     du, z, 1, FALSE);
389     options->num_iter++;
390     }
391 gross 3823
392 gross 3793
393     Paso_Update(n,1.,u,fct_solver->omega,du);
394     norm_du_old=norm_du;
395     norm_du=Paso_lsup(n,du, fctp->mpi_info);
396     if (m ==0) {
397     if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e\n",m+1, norm_du * fct_solver->omega);
398     } else {
399     if (norm_du_old > 0.) {
400     rate=norm_du/norm_du_old;
401     } else if (norm_du <= 0.) {
402     rate=0.;
403     } else {
404     rate=LARGE_POSITIVE_FLOAT;
405     }
406     if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e (rate = %e)\n",m+1, norm_du * fct_solver->omega, rate);
407     num_critical_rates+=( rate<critical_rate ? 0 : 1);
408     max_m_reached=(m>max_m);
409     diverged = (num_critical_rates >= num_critical_rates_max);
410     converged=(norm_du * fct_solver->omega <= ATOL) ;
411     }
412     m++;
413     } /* end of while loop */
414     if (errorCode == SOLVER_NO_ERROR) {
415     if (converged) {
416     if (options->verbose) printf("Paso_FCT_Solver_updateNL: iteration is completed.\n");
417     errorCode=SOLVER_NO_ERROR;
418     } else if (diverged) {
419     if (options->verbose) printf("Paso_FCT_Solver_updateNL: divergence.\n");
420     errorCode=SOLVER_DIVERGENCE;
421     } else if (max_m_reached) {
422     if (options->verbose) printf("Paso_FCT_Solver_updateNL: maximum number of iteration steps reached.\n");
423     errorCode=SOLVER_MAXITER_REACHED;
424     }
425    
426     }
427     return errorCode;
428     }
429    
430    
431     /*
432     * AntiDiffusionFlux:
433     *
434     * f_{ij} = (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])
435     *
436     * m=fc->mass matrix
437     * d=artifical diffusion matrix = L - K = - fc->iteration matrix - fc->transport matrix (away from main diagonal)
438     *
439     * for CN : theta =0.5
440     * for BE : theta = 1.
441     */
442    
443     void Paso_FCT_setAntiDiffusionFlux_CN(Paso_SystemMatrix *flux_matrix,
444     const Paso_TransportProblem* fct,
445     const double dt,
446     const Paso_Coupler* u_coupler,
447     const Paso_Coupler* u_old_coupler)
448     {
449     dim_t i;
450     index_t iptr_ij;
451    
452     const double *u = Paso_Coupler_borrowLocalData(u_coupler);
453     const double *u_old= Paso_Coupler_borrowLocalData(u_old_coupler);
454     const double *remote_u=Paso_Coupler_borrowRemoteData(u_coupler);
455     const double *remote_u_old=Paso_Coupler_borrowRemoteData(u_old_coupler);
456     const double dt_half= dt/2;
457     const Paso_SystemMatrixPattern *pattern=fct->iteration_matrix->pattern;
458     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fct->iteration_matrix);
459    
460     #pragma omp parallel for schedule(static) private(i, iptr_ij)
461     for (i = 0; i < n; ++i) {
462 caltinay 3795 const double u_i = u[i];
463     const double u_old_i = u_old[i];
464 gross 3793
465     #pragma ivdep
466     for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
467 caltinay 3795 const index_t j = pattern->mainPattern->index[iptr_ij];
468     const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
469     const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
470     const double u_old_j = u_old[j];
471     const double u_j = u[j];
472 gross 3793
473     /* (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i]) */
474     flux_matrix->mainBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i) - (m_ij-dt_half*d_ij)*(u_j-u_i);
475    
476     }
477     #pragma ivdep
478     for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
479 caltinay 3795 const index_t j = pattern->col_couplePattern->index[iptr_ij];
480     const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
481     const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
482     const double u_old_j = remote_u_old[j];
483     const double u_j = remote_u[j];
484 gross 3793 flux_matrix->col_coupleBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i)- (m_ij-dt_half*d_ij)*(u_j-u_i);
485     }
486     }
487     }
488    
489     void Paso_FCT_setAntiDiffusionFlux_BE(Paso_SystemMatrix *flux_matrix,
490     const Paso_TransportProblem* fct,
491     const double dt,
492     const Paso_Coupler* u_coupler,
493     const Paso_Coupler* u_old_coupler)
494     {
495     dim_t i;
496     index_t iptr_ij;
497    
498     const double *u=Paso_Coupler_borrowLocalData(u_coupler);
499     const double *u_old= Paso_Coupler_borrowLocalData(u_old_coupler);
500     const double *remote_u=Paso_Coupler_borrowRemoteData(u_coupler);
501     const double *remote_u_old=Paso_Coupler_borrowRemoteData(u_old_coupler);
502     const Paso_SystemMatrixPattern *pattern=fct->iteration_matrix->pattern;
503     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fct->iteration_matrix);
504    
505     #pragma omp parallel for schedule(static) private(i, iptr_ij)
506     for (i = 0; i < n; ++i) {
507 caltinay 3795 const double u_i = u[i];
508     const double u_old_i = u_old[i];
509 gross 3793 #pragma ivdep
510     for (iptr_ij=pattern->mainPattern->ptr[i]; iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
511    
512 caltinay 3795 const index_t j = pattern->mainPattern->index[iptr_ij];
513     const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
514     const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
515     const double u_old_j = u_old[j];
516     const double u_j = u[j];
517 gross 3793
518     flux_matrix->mainBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
519     }
520     #pragma ivdep
521     for (iptr_ij=pattern->col_couplePattern->ptr[i]; iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
522 caltinay 3795 const index_t j = pattern->col_couplePattern->index[iptr_ij];
523     const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
524     const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
525     const double u_old_j = remote_u_old[j];
526     const double u_j = remote_u[j];
527 gross 3793
528     flux_matrix->col_coupleBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
529     }
530     }
531     }
532    
533     /* special version of the ant-diffusive fluxes for the linear Crank-Nicolson scheme
534     * in fact this is evaluated for u = 2*u_tilde - u_old which is the predictor
535     * of the solution of the the stabilized problem at time dt using the forward Euler scheme
536     *
537     * f_{ij} = (m_{ij} - dt/2 d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) (u[j]-u[i])
538     * = (m_{ij} - dt/2 d_{ij}) * (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) * ( 2*(u_tilde[j]-u_tilde[i]) - (u_old[j] -u_old [i]) )
539     * = 2* m_{ij} * ( (u_old[j]-u_tilde[j] - (u_old[i]) - u_tilde[i]) ) - dt d_{ij} * (u_tilde[j]-u_tilde[i])
540     *
541     */
542    
543     void Paso_FCT_setAntiDiffusionFlux_linearCN(Paso_SystemMatrix *flux_matrix,
544     const Paso_TransportProblem* fct,
545     const double dt,
546     const Paso_Coupler* u_tilde_coupler,
547     const Paso_Coupler* u_old_coupler)
548     {
549     dim_t i;
550     index_t iptr_ij;
551    
552     const double *u_tilde=Paso_Coupler_borrowLocalData(u_tilde_coupler);
553     const double *u_old= Paso_Coupler_borrowLocalData(u_old_coupler);
554     const double *remote_u_tilde=Paso_Coupler_borrowRemoteData(u_tilde_coupler);
555     const double *remote_u_old=Paso_Coupler_borrowRemoteData(u_old_coupler);
556     const Paso_SystemMatrixPattern *pattern=fct->iteration_matrix->pattern;
557     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fct->iteration_matrix);
558    
559     #pragma omp parallel for schedule(static) private(i, iptr_ij)
560     for (i = 0; i < n; ++i) {
561 caltinay 3795 const double u_tilde_i = u_tilde[i];
562     const double u_old_i = u_old[i];
563 gross 3807 const double du_i = u_tilde_i - u_old_i;
564 gross 3793 #pragma ivdep
565     for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
566    
567 caltinay 3795 const index_t j = pattern->mainPattern->index[iptr_ij];
568     const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
569     const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
570     const double u_tilde_j = u_tilde[j];
571     const double u_old_j = u_old[j];
572 gross 3807 const double du_j = u_tilde_j - u_old_j;
573 gross 3793
574 gross 3807 flux_matrix->mainBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij * ( u_tilde_i - u_tilde_j);
575 gross 3793 }
576     #pragma ivdep
577     for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
578    
579 caltinay 3795 const index_t j = pattern->col_couplePattern->index[iptr_ij];
580     const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
581     const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];/* this is in fact -d_ij */
582     const double u_tilde_j = remote_u_tilde[j];
583     const double u_old_j = remote_u_old[j];
584 gross 3807 const double du_j = u_tilde_j - u_old_j;
585 gross 3793
586 gross 3807 flux_matrix->col_coupleBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij * ( u_tilde_i - u_tilde_j);
587 gross 3793
588     }
589     }
590    
591     }
592    
593     /**************************************************************/
594    
595     /* Creates the low order transport matrix and stores its negative values
596     * into the iteration_matrix except for the main diagonal which is stored
597     * separately.
598     * If fc->iteration_matrix==NULL, fc->iteration_matrix is allocated
599     *
600     * a=transport_matrix
601     * b= low_order_transport_matrix = - iteration_matrix
602     * c=main diagonal low_order_transport_matrix
603     * initialize c[i] mit a[i,i]
604     *
605     * d_ij=max(0,-a[i,j],-a[j,i])
606     * b[i,j]=-(a[i,j]+d_ij)
607     * c[i]-=d_ij
608     */
609    
610     void Paso_FCT_setLowOrderOperator(Paso_TransportProblem * fc) {
611    
612     dim_t i;
613     index_t iptr_ij, iptr_ji;
614     const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fc);
615    
616     if (fc->iteration_matrix==NULL) {
617     fc->iteration_matrix=Paso_SystemMatrix_alloc(fc->transport_matrix->type,
618     fc->transport_matrix->pattern,
619     fc->transport_matrix->row_block_size,
620     fc->transport_matrix->col_block_size, TRUE);
621     }
622    
623     if (Esys_noError()) {
624     const Paso_SystemMatrixPattern *pattern=fc->iteration_matrix->pattern;
625     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fc->iteration_matrix);
626     #pragma omp parallel for private(i, iptr_ij, iptr_ji) schedule(static)
627     for (i = 0; i < n; ++i) {
628     double sum=fc->transport_matrix->mainBlock->val[main_iptr[i]];
629    
630     /* printf("sum[%d] = %e -> ", i, sum); */
631     /* look at a[i,j] */
632     for (iptr_ij=pattern->mainPattern->ptr[i];iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
633 caltinay 3795 const index_t j = pattern->mainPattern->index[iptr_ij];
634     const double rtmp1 = fc->transport_matrix->mainBlock->val[iptr_ij];
635 gross 3793 if (j!=i) {
636     /* find entry a[j,i] */
637     #pragma ivdep
638     for (iptr_ji=pattern->mainPattern->ptr[j]; iptr_ji<pattern->mainPattern->ptr[j+1]; ++iptr_ji) {
639    
640     if ( pattern->mainPattern->index[iptr_ji] == i) {
641 caltinay 3795 const double rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji];
642 gross 3793 /*
643     printf("a[%d,%d]=%e\n",i,j,rtmp1);
644     printf("a[%d,%d]=%e\n",j,i,rtmp2);
645     */
646    
647 caltinay 3795 const double d_ij=-MIN3(0.,rtmp1,rtmp2);
648 gross 3793 fc->iteration_matrix->mainBlock->val[iptr_ij]=-(rtmp1+d_ij);
649     /* printf("l[%d,%d]=%e\n",i,j,fc->iteration_matrix->mainBlock->val[iptr_ij]); */
650     sum-=d_ij;
651     break;
652     }
653     }
654     }
655     }
656     for (iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
657 caltinay 3795 const index_t j = pattern->col_couplePattern->index[iptr_ij];
658     const double rtmp1 = fc->transport_matrix->col_coupleBlock->val[iptr_ij];
659 gross 3793 /* find entry a[j,i] */
660     #pragma ivdep
661     for (iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_ji<pattern->row_couplePattern->ptr[j+1]; ++iptr_ji) {
662     if (pattern->row_couplePattern->index[iptr_ji]==i) {
663 caltinay 3795 const double rtmp2=fc->transport_matrix->row_coupleBlock->val[iptr_ji];
664     const double d_ij=-MIN3(0.,rtmp1,rtmp2);
665 gross 3793 fc->iteration_matrix->col_coupleBlock->val[iptr_ij]=-(rtmp1+d_ij);
666     fc->iteration_matrix->row_coupleBlock->val[iptr_ji]=-(rtmp2+d_ij);
667     sum-=d_ij;
668     break;
669     }
670     }
671     }
672     /* set main diagonal entry */
673     fc->main_diagonal_low_order_transport_matrix[i]=sum;
674     /* printf("%e \n", sum); */
675     }
676    
677     }
678     }
679    
680     /*
681 gross 3822 * out_i=m_i u_i + a * \sum_{j <> i} l_{ij} (u_j-u_i) where m_i>0
682     * = u_i where m_i<=0
683 gross 3793 *
684     */
685     void Paso_FCT_Solver_setMuPaLu(double* out,
686     const double* M,
687     const Paso_Coupler* u_coupler,
688     const double a,
689     const Paso_SystemMatrix *L)
690     {
691     dim_t i;
692     const Paso_SystemMatrixPattern *pattern = L->pattern;
693     const double *u=Paso_Coupler_borrowLocalData(u_coupler);
694     const double *remote_u=Paso_Coupler_borrowRemoteData(u_coupler);
695 caltinay 3795 index_t iptr_ij;
696 gross 3793 const dim_t n=Paso_SystemMatrix_getTotalNumRows(L);
697    
698     #pragma omp parallel for private(i) schedule(static)
699     for (i = 0; i < n; ++i) {
700 gross 3822 if ( M[i] > 0.) {
701     out[i]=M[i]*u[i];
702     } else {
703     out[i]=u[i];
704     }
705 gross 3793 }
706     if (ABS(a)>0) {
707     #pragma omp parallel for schedule(static) private(i, iptr_ij)
708     for (i = 0; i < n; ++i) {
709 gross 3823 if ( M[i] > 0.) {
710 caltinay 3795 double sum=0;
711     const double u_i=u[i];
712 gross 3793 #pragma ivdep
713     for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
714     const index_t j=pattern->mainPattern->index[iptr_ij];
715 caltinay 3795 const double l_ij=L->mainBlock->val[iptr_ij];
716 gross 3793 sum+=l_ij*(u[j]-u_i);
717    
718     }
719     #pragma ivdep
720     for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
721     const index_t j=pattern->col_couplePattern->index[iptr_ij];
722 caltinay 3795 const double l_ij=L->col_coupleBlock->val[iptr_ij];
723 gross 3793 sum+=l_ij*(remote_u[j]-u_i);
724     }
725     out[i]+=a*sum;
726 gross 3823 }
727 gross 3793 }
728     }
729     }
730    
731     /* *************************************************************************************************************************** */
732    
733    
734    
735    
736    
737    
738    

  ViewVC Help
Powered by ViewVC 1.1.26