/[escript]/trunk/paso/src/FCTSolver.c
ViewVC logotype

Annotation of /trunk/paso/src/FCTSolver.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3120 - (hide annotations)
Mon Aug 30 10:48:00 2010 UTC (9 years, 1 month ago) by gross
File MIME type: text/plain
File size: 15274 byte(s)
first iteration on Paso code clean up
1 gross 2987
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2010 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17     /* Paso: Transport solver with flux corretion (L is row sum zero)
18     *
19     * - Mv_t=Lv v(0)=u
20     *
21     * to return v(dt)
22     *
23     */
24     /**************************************************************/
25    
26     /* Author: l.gross@uq.edu.au */
27    
28     /**************************************************************/
29    
30     #include "FCTSolver.h"
31     #include "Solver.h"
32     #include "PasoUtil.h"
33    
34     Paso_Function* Paso_FCTSolver_Function_alloc(Paso_TransportProblem *fctp, Paso_Options* options)
35     {
36     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
37     const dim_t blockSize=Paso_TransportProblem_getBlockSize(fctp);
38     Paso_Function * out=NULL;
39     Paso_FCTSolver *more=NULL;
40     out=MEMALLOC(1,Paso_Function);
41     if (! Paso_checkPtr(out)) {
42     out->kind=FCT;
43     out->mpi_info=Paso_MPIInfo_getReference(fctp->mpi_info);
44     out->n=n;
45     out->b=TMPMEMALLOC(n,double); /*b_m */
46     Paso_checkPtr(out->b);
47     out->tmp=TMPMEMALLOC(n,double); /* z_m */
48     Paso_checkPtr(out->tmp);
49    
50     more=MEMALLOC(1,Paso_FCTSolver);
51     out->more=(void*) more;
52     if (! Paso_checkPtr(more)) {
53     more->transportproblem=Paso_TransportProblem_getReference(fctp);
54     more->uTilde_n=TMPMEMALLOC(n,double);
55     Paso_checkPtr(more->uTilde_n);
56     more->QN_n=TMPMEMALLOC(n,double);
57     Paso_checkPtr(more->QN_n);
58     more->QP_n=TMPMEMALLOC(n,double);
59     Paso_checkPtr(more->QP_n);
60     more->RN_m=TMPMEMALLOC(n,double);
61     Paso_checkPtr(more->RN_m);
62     more->RP_m=TMPMEMALLOC(n,double);
63     Paso_checkPtr(more->RP_m);
64     more->QN_n_coupler=Paso_Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp),blockSize);
65     more->QP_n_coupler=Paso_Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp),blockSize);
66     more->RN_m_coupler=Paso_Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp),blockSize);
67     more->RP_m_coupler=Paso_Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp),blockSize);
68     more->uTilde_n_coupler=Paso_Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp),blockSize);
69     more->u_m_coupler=Paso_Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp),blockSize);
70     more->flux_matrix_m=Paso_SystemMatrix_alloc(fctp->transport_matrix->type,
71     fctp->transport_matrix->pattern,
72     fctp->transport_matrix->row_block_size,
73     fctp->transport_matrix->col_block_size, TRUE);
74    
75     }
76     }
77     if (Paso_noError()) {
78     return out;
79     } else {
80     Paso_FCTSolver_Function_free(out);
81     return NULL;
82     }
83     }
84    
85     void Paso_FCTSolver_Function_free(Paso_Function * in)
86     {
87     Paso_FCTSolver *more=NULL;
88     if (in!=NULL) {
89     Paso_MPIInfo_free(in->mpi_info);
90     MEMFREE(in->tmp);
91     MEMFREE(in->b);
92     more=(Paso_FCTSolver *) (in->more);
93     if (NULL !=more) {
94     Paso_TransportProblem_free(more->transportproblem);
95     Paso_SystemMatrix_free(more->flux_matrix_m);
96     TMPMEMFREE(more->uTilde_n);
97     TMPMEMFREE(more->QN_n);
98     TMPMEMFREE(more->QP_n);
99     TMPMEMFREE(more->RN_m);
100     TMPMEMFREE(more->RP_m);
101     Paso_Coupler_free(more->QN_n_coupler);
102     Paso_Coupler_free(more->QP_n_coupler);
103     Paso_Coupler_free(more->RN_m_coupler);
104     Paso_Coupler_free(more->RP_m_coupler);
105     Paso_Coupler_free(more->uTilde_n_coupler);
106     Paso_Coupler_free(more->u_m_coupler);
107     }
108     MEMFREE(in);
109     }
110     }
111     double Paso_FCTSolver_getSafeTimeStepSize(Paso_TransportProblem* fctp)
112     {
113     dim_t i, n;
114     double dt_max=LARGE_POSITIVE_FLOAT, dt_max_loc;
115 gross 3033 register double l_ii,m_i,al_ii;
116 gross 3020 index_t fail=0, fail_loc;
117 gross 2987 n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
118 gross 3014 /* set low order transport operator */
119     Paso_FCTSolver_setLowOrderOperator(fctp);
120 gross 2987
121     if (Paso_noError()) {
122     /*
123     * calculate time step size:
124     */
125     dt_max=LARGE_POSITIVE_FLOAT;
126 gross 3020 fail=0;
127     #pragma omp parallel private(dt_max_loc, fail_loc)
128 gross 2987 {
129     dt_max_loc=LARGE_POSITIVE_FLOAT;
130 gross 3020 fail_loc=0;
131 gross 3033 #pragma omp for schedule(static) private(i,l_ii,m_i, al_ii)
132 gross 2987 for (i=0;i<n;++i) {
133     l_ii=fctp->main_diagonal_low_order_transport_matrix[i];
134 gross 3020 m_i=fctp->lumped_mass_matrix[i];
135 gross 3033 al_ii=ABS(l_ii); /* al_ii should be negative, abs is taken to avoid problems with almost zero but positive values for l_ii */
136     if ( (m_i > 0) ) {
137     if (al_ii>0) dt_max_loc=MIN(dt_max_loc,m_i/al_ii);
138 gross 3020 } else {
139     fail_loc=-1;
140     }
141 gross 2987 }
142     #pragma omp critical
143     {
144     dt_max=MIN(dt_max,dt_max_loc);
145 gross 3020 fail=MIN(fail, fail_loc);
146 gross 2987 }
147     }
148     #ifdef PASO_MPI
149 gross 3020 {
150     double rtmp_loc[2], rtmp[2];
151     rtmp_loc[0]=dt_max;
152     rtmp_loc[1]= (double) fail;
153     MPI_Allreduce(rtmp_loc, rtmp, 2, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
154     dt_max=rtmp[0];
155     fail = rtmp[1] < 0 ? -1 : 0;
156     }
157 gross 2987 #endif
158 gross 3020 if (fail < 0 ) {
159 gross 3033 Paso_setError(VALUE_ERROR, "Paso_FCTSolver_getSafeTimeStepSize: negative mass matrix entries detected.");
160 gross 3020 return -1;
161     } else {
162     if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=fctp->dt_factor;
163     }
164 gross 2987 }
165     return dt_max;
166     }
167     /*
168     * evaluates F as
169     * (m_i*omega - l_ij)*(F_j/omega)
170     * =(m u^n[i] + dt*(1-theta) sum_{j <> i} l_{ij}*(u^n[j]-u^n[i]))-(m_i*u[i] - dt*theta*sum_{j<>i} l_{ij} (u[j]-u[i]) )- antidiffusion
171     *
172     */
173     err_t Paso_FCTSolver_Function_call(Paso_Function * F,double* value, const double* arg, Paso_Performance *pp)
174     {
175 gross 3005 Paso_Options options;
176 gross 2987 Paso_FCTSolver *more=(Paso_FCTSolver *) (F->more);
177 gross 3005 Paso_Options_setDefaults(&options);
178     options.verbose=TRUE;
179 gross 2987
180     /* set z_m in tmp */
181     Paso_FCTSolver_setUpRightHandSide(more->transportproblem,more->dt,arg,more->u_m_coupler,F->tmp,more->flux_matrix_m,
182     more->uTilde_n_coupler,F->b,
183     more->QN_n_coupler,more->QP_n_coupler,more->RN_m,
184     more->RN_m_coupler,more->RP_m,more->RP_m_coupler,pp);
185     /* apply preconditoner do get du = */
186 gross 3005 /* Paso_Solver(more->transportproblem->iteration_matrix,value,F->tmp,&options,pp); */
187    
188    
189 gross 3120 Paso_SystemMatrix_solvePreconditioner(more->transportproblem->iteration_matrix,value,F->tmp);
190 gross 2987 return NO_ERROR;
191     }
192    
193     err_t Paso_FCTSolver_solve(Paso_Function* F, double* u, double dt, Paso_Options* options, Paso_Performance *pp)
194     {
195 gross 3014 const dim_t num_critical_rates_max=3; /* number of rates >=critical_rate accepted before divergence is triggered */
196 gross 3094 const double critical_rate=0.8; /* expected value of convergence rate */
197 gross 3014
198     double norm_u_tilde, ATOL, norm_u, norm_du=LARGE_POSITIVE_FLOAT, norm_du_old, *du=NULL, rate;
199 gross 2987 err_t errorCode=SOLVER_NO_ERROR;
200     Paso_FCTSolver *more=(Paso_FCTSolver *) (F->more);
201 gross 3014 const double omega=1./(dt*Paso_Transport_getTheta(more->transportproblem));
202 gross 2987 Paso_TransportProblem* fctp=more->transportproblem;
203     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
204     const double atol=options->absolute_tolerance;
205     const double rtol=options->tolerance;
206     const dim_t max_m=options->iter_max;
207 gross 3014 dim_t m=0, num_critical_rates=0 ;
208     bool_t converged=FALSE, max_m_reached=FALSE,diverged=FALSE;
209 gross 2987
210     /* tolerance? */
211     more->dt=dt;
212     Paso_FCTSolver_setUp(fctp,dt,u, F->b,more->uTilde_n,more->uTilde_n_coupler,more->QN_n,more->QN_n_coupler,more->QP_n,more->QP_n_coupler,
213     options,pp);
214    
215    
216     norm_u_tilde=Paso_lsup(n,more->uTilde_n,F->mpi_info);
217     ATOL= rtol * norm_u_tilde + atol ;
218     if (options->verbose) printf("Paso_FCTSolver_solve: iteration starts u_tilda lsup = %e (abs. tol = %e)\n",norm_u_tilde,ATOL);
219 gross 3005 if (more->transportproblem->useBackwardEuler && FALSE ) {
220 gross 2987 options->absolute_tolerance = ATOL/omega;
221     options->tolerance=0;
222     errorCode=Paso_Solver_NewtonGMRES(F,u,options,pp);
223     options->absolute_tolerance=atol;
224     options->tolerance=rtol;
225     } else {
226     m=0;
227     du=MEMALLOC(n,double);
228     if (Paso_checkPtr(du)) {
229     errorCode=SOLVER_MEMORY_ERROR;
230     } else {
231     /* tolerance? */
232    
233 gross 3014 while ( (!converged) && (!diverged) && (! max_m_reached) && Paso_noError()) {
234 gross 2987 errorCode=Paso_FCTSolver_Function_call(F,du, u, pp);
235 gross 3120 options->num_iter++;
236 gross 2987 Paso_Update(n,1.,u,omega,du);
237     norm_u=Paso_lsup(n,u, fctp->mpi_info);
238 gross 3014 norm_du_old=norm_du;
239 gross 2987 norm_du=Paso_lsup(n,du, fctp->mpi_info);
240 gross 3014 if (m ==0) {
241     if (options->verbose) printf("Paso_FCTSolver_solve: step %d: increment= %e\n",m+1, norm_du*omega);
242    
243     } else {
244     if (norm_du_old > 0.) {
245     rate=norm_du/norm_du_old;
246     } else if (norm_du <= 0.) {
247     rate=0.;
248     } else {
249     rate=LARGE_POSITIVE_FLOAT;
250     }
251     if (options->verbose) printf("Paso_FCTSolver_solve: step %d: increment= %e (rate = %e)\n",m+1, norm_du*omega, rate);
252     num_critical_rates+=( rate<critical_rate ? 0 : 1);
253     }
254    
255 gross 2987 max_m_reached=(m>max_m);
256 gross 3014 diverged = (num_critical_rates >= num_critical_rates_max);
257 gross 2987 converged=(norm_du*omega <= ATOL) ;
258     m++;
259     }
260     }
261     MEMFREE(du);
262     if (errorCode == SOLVER_NO_ERROR) {
263     if (converged) {
264     if (options->verbose) printf("Paso_FCTSolver_solve: iteration is completed.\n");
265     errorCode=SOLVER_NO_ERROR;
266 gross 3014 } else if (diverged) {
267     if (options->verbose) printf("Paso_FCTSolver_solve: divergence.\n");
268     errorCode=SOLVER_DIVERGENCE;
269     } else if (max_m_reached) {
270     if (options->verbose) printf("Paso_FCTSolver_solve: maximum number of iteration steps reached.\n");
271 gross 2987 errorCode=SOLVER_MAXITER_REACHED;
272     }
273     }
274     }
275     return errorCode;
276     }
277    
278    
279    
280     err_t Paso_FCTSolver_setUpRightHandSide(Paso_TransportProblem* fctp, const double dt, const double *u_m,
281     Paso_Coupler* u_m_coupler, double * z_m,
282     Paso_SystemMatrix* flux_matrix, Paso_Coupler* uTilde_coupler, const double *b,
283     Paso_Coupler* QN_coupler, Paso_Coupler* QP_coupler,
284     double *RN_m, Paso_Coupler* RN_m_coupler, double* RP_m, Paso_Coupler* RP_m_coupler,
285     Paso_Performance* pp)
286     {
287 gross 3014 const double omega=dt* Paso_Transport_getTheta(fctp);
288 gross 2987 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
289     /* distribute u */
290     Paso_Coupler_startCollect(u_m_coupler,u_m);
291     Paso_Coupler_finishCollect(u_m_coupler);
292     /*
293     * set the ant diffusion fluxes:
294     *
295     */
296     Paso_FCTSolver_setAntiDiffusionFlux(dt,fctp,flux_matrix,u_m_coupler);
297     /*
298     * apply pre flux-correction: f_{ij}:=0 if f_{ij}*(\tilde{u}[i]- \tilde{u}[j])<=0
299     *
300     */
301     Paso_FCTSolver_applyPreAntiDiffusionCorrection(flux_matrix,uTilde_coupler);
302     /*
303     * set flux limiters RN_m,RP_m
304     *
305     */
306     Paso_FCTSolver_setRs(flux_matrix,fctp->lumped_mass_matrix,QN_coupler,QP_coupler,RN_m,RP_m);
307     Paso_Coupler_startCollect(RN_m_coupler,RN_m);
308     Paso_Coupler_startCollect(RP_m_coupler,RP_m);
309     /*
310     * z_m[i]=b[i] - (m_i*u_m[i] - dt*theta*sum_{j<>i} l_{ij} (u_m[j]-u_m[i]) )
311     *
312     * note that iteration_matrix stores the negative values of the
313     * low order transport matrix l therefore a=dt*theta is used.
314     */
315 gross 3005 Paso_FCTSolver_setMuPaLu(z_m, fctp->lumped_mass_matrix, u_m_coupler, omega, fctp->iteration_matrix);
316 gross 2987 /*{
317     int kk;
318     for (kk=0;kk<n;kk++) printf("z_m %d : %e %e -> %e\n",kk,z_m[kk], b[kk],z_m[kk]-b[kk]);
319     }
320     */
321     /* z_m=b-z_m */
322     Paso_Update(n,-1.,z_m,1.,b);
323    
324     Paso_Coupler_finishCollect(RN_m_coupler);
325     Paso_Coupler_finishCollect(RP_m_coupler);
326     /* add corrected fluxes into z_m */
327 gross 3005 Paso_FCTSolver_addCorrectedFluxes(z_m,flux_matrix,RN_m_coupler,RP_m_coupler);
328 gross 2987 return SOLVER_NO_ERROR;
329     }
330    
331 gross 3014 void Paso_FCTSolver_Function_initialize(const double dt, Paso_TransportProblem* fctp, Paso_Options* options, Paso_Performance* pp)
332     {
333     dim_t i;
334     const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fctp);
335     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
336     const double theta = Paso_Transport_getTheta(fctp);
337     const double omega=1./(dt* theta);
338     register double m, rtmp4;
339    
340     /*
341     * fctp->iteration_matrix[i,i]=m[i]/(dt theta) -l[i,i]
342     *
343     */
344     Paso_solve_free(fctp->iteration_matrix);
345     #pragma omp parallel for private(i,m,rtmp4)
346     for (i = 0; i < n; ++i) {
347     m=fctp->lumped_mass_matrix[i];
348     rtmp4=m*omega-(fctp->main_diagonal_low_order_transport_matrix[i]);
349     fctp->iteration_matrix->mainBlock->val[main_iptr[i]]=rtmp4;
350     }
351     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
352 gross 3120 Paso_SystemMatrix_setPreconditioner(fctp->iteration_matrix,options);
353 gross 3014 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
354     }
355    
356 gross 2987 void Paso_FCTSolver_setUp(Paso_TransportProblem* fctp, const double dt, const double *u, double* b, double* uTilde,
357     Paso_Coupler* uTilde_coupler, double *QN, Paso_Coupler* QN_coupler, double *QP, Paso_Coupler* QP_coupler,
358     Paso_Options* options, Paso_Performance* pp)
359     {
360     dim_t i;
361     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
362 gross 3014 register double m;
363 gross 2987 /* distribute u */
364     Paso_Coupler_startCollect(fctp->u_coupler,u);
365     Paso_Coupler_finishCollect(fctp->u_coupler);
366     /*
367     * b^n[i]=m u^n[i] + dt*(1-theta) sum_{j <> i} l_{ij}*(u^n[j]-u^n[i])
368     *
369     * note that iteration_matrix stores the negative values of the
370     * low order transport matrix l therefore a=-dt*(1-fctp->theta) is used.
371     *
372     */
373     if (fctp->useBackwardEuler) {
374     Paso_FCTSolver_setMuPaLu(b,fctp->lumped_mass_matrix,fctp->u_coupler,0.,fctp->iteration_matrix);
375     } else {
376     Paso_FCTSolver_setMuPaLu(b,fctp->lumped_mass_matrix,fctp->u_coupler,-dt*0.5,fctp->iteration_matrix);
377     }
378     /*
379     * uTilde[i]=b[i]/m[i]
380     *
381 gross 3014 */
382     #pragma omp parallel for private(i,m)
383 gross 2987 for (i = 0; i < n; ++i) {
384     m=fctp->lumped_mass_matrix[i];
385 gross 3014 uTilde[i]=b[i]/m;
386     }
387 gross 2987 /* distribute uTilde: */
388     Paso_Coupler_startCollect(uTilde_coupler,uTilde);
389     Paso_Coupler_finishCollect(uTilde_coupler);
390     /*
391     * calculate QP[i] max_{j} (\tilde{u}[j]- \tilde{u}[i] )
392     * QN[i] min_{j} (\tilde{u}[j]- \tilde{u}[i] )
393     *
394     */
395     Paso_FCTSolver_setQs(uTilde_coupler,QN,QP,fctp->iteration_matrix);
396     Paso_Coupler_startCollect(QN_coupler,QN);
397     Paso_Coupler_startCollect(QP_coupler,QP);
398     Paso_Coupler_finishCollect(QN_coupler);
399     Paso_Coupler_finishCollect(QP_coupler);
400 gross 3026 }

  ViewVC Help
Powered by ViewVC 1.1.26