/[escript]/trunk/paso/src/SolverFCT_solve.c
ViewVC logotype

Annotation of /trunk/paso/src/SolverFCT_solve.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1981 - (hide annotations)
Thu Nov 6 05:27:33 2008 UTC (11 years, 3 months ago) by jfenwick
File MIME type: text/plain
File size: 15462 byte(s)
More warning removal.

1 gross 1363
2     /*******************************************************
3 ksteube 1811 *
4     * Copyright (c) 2003-2008 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13 gross 1363
14 ksteube 1811
15 gross 1363 /**************************************************************/
16    
17     /* Paso: Flux correction transport solver
18     *
19 gross 1407 * solves Mu_t=Ku+q
20 gross 1570 * u(0) >=0
21 gross 1363 *
22 gross 1407 * Warning: the program assums sum_{j} k_{ij}=0!!!
23 gross 1363 *
24     */
25     /**************************************************************/
26    
27 gross 1804 /* Author: l.gross@uq.edu.au */
28 gross 1363
29     /**************************************************************/
30    
31     #include "Paso.h"
32     #include "Solver.h"
33     #include "SolverFCT.h"
34 gross 1804 #include "PasoUtil.h"
35 gross 1363 #include "escript/blocktimer.h"
36 gross 1370 #ifdef _OPENMP
37     #include <omp.h>
38     #endif
39     #ifdef PASO_MPI
40     #include <mpi.h>
41     #endif
42 gross 1363
43 ksteube 1812 /*
44     * inserts the source term into the problem
45     */
46     void Paso_FCT_setSource(const dim_t n,const double *source, double* sourceN, double* sourceP)
47     {
48     dim_t i;
49     register double rtmp;
50     /*
51     * seperate source into positive and negative part:
52     */
53     #pragma omp parallel for private(i,rtmp)
54     for (i = 0; i < n; ++i) {
55     rtmp=source[i];
56     if (rtmp <0) {
57     sourceN[i]=-rtmp;
58     sourceP[i]=0;
59     } else {
60     sourceN[i]= 0;
61     sourceP[i]= rtmp;
62     }
63     }
64     }
65    
66     err_t Paso_FCT_setUpRightHandSide(Paso_FCTransportProblem* fctp, const double dt, const double *u_m, Paso_Coupler* u_m_coupler, double * z_m,
67     Paso_SystemMatrix* flux_matrix, Paso_Coupler* uTilde_coupler, const double *b,
68     Paso_Coupler* QN_coupler, Paso_Coupler* QP_coupler,
69     double *RN_m, Paso_Coupler* RN_m_coupler, double* RP_m, Paso_Coupler* RP_m_coupler, const double *sourceN,
70     Paso_Performance* pp)
71     {
72     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
73     /* distribute u */
74     Paso_Coupler_startCollect(u_m_coupler,u_m);
75     Paso_Coupler_finishCollect(u_m_coupler);
76     /*
77     * set the ant diffusion fluxes:
78     *
79     */
80     Paso_FCTransportProblem_setAntiDiffusionFlux(dt,fctp,flux_matrix,u_m_coupler);
81     /*
82     * apply pre flux-correction: f_{ij}:=0 if f_{ij}*(\tilde{u}[i]- \tilde{u}[j])<=0
83     *
84     */
85     Paso_FCTransportProblem_applyPreAntiDiffusionCorrection(flux_matrix,uTilde_coupler);
86     /*
87     * set flux limiters RN_m,RP_m
88     *
89     */
90     Paso_FCTransportProblem_setRs(flux_matrix,fctp->lumped_mass_matrix,QN_coupler,QP_coupler,RN_m,RP_m);
91     Paso_Coupler_startCollect(RN_m_coupler,RN_m);
92     Paso_Coupler_startCollect(RP_m_coupler,RP_m);
93     /*
94     * z_m[i]=b[i] - (m_i*u_m[i] - dt*theta*sum_{j<>i} l_{ij} (u_m[j]-u_m[i]) + dt q^-[i])
95     *
96     * note that iteration_matrix stores the negative values of the
97     * low order transport matrix l therefore a=dt*fctp->theta is used.
98     */
99     Paso_SolverFCT_setMuPaLuPbQ(z_m,fctp->lumped_mass_matrix, u_m_coupler,dt*fctp->theta,fctp->iteration_matrix,dt,sourceN);
100     /* z_m=b-z_m */
101     Paso_Update(n,-1.,z_m,1.,b);
102    
103     Paso_Coupler_finishCollect(RN_m_coupler);
104     Paso_Coupler_finishCollect(RP_m_coupler);
105     /* add corrected fluxes into z_m */
106     Paso_FCTransportProblem_addCorrectedFluxes(z_m,flux_matrix,RN_m_coupler,RP_m_coupler);
107     return NO_ERROR;
108     }
109    
110 gross 1804 void Paso_SolverFCT_solve(Paso_FCTransportProblem* fctp, double* u, double dt, double* source, Paso_Options* options) {
111     const dim_t FAILURES_MAX=5;
112 jfenwick 1981 dim_t m, i_substeps, Failed=0, i;
113 gross 1804 double *z_m=NULL, *b_n=NULL, *sourceP=NULL, *sourceN=NULL, *uTilde_n=NULL, *QN_n=NULL, *QP_n=NULL, *RN_m=NULL, *RP_m=NULL, *du_m=NULL;
114     Paso_Coupler *QN_n_coupler=NULL, *QP_n_coupler=NULL, *RN_m_coupler=NULL, *RP_m_coupler=NULL, *uTilde_n_coupler=NULL, *u_m_coupler=NULL;
115     Paso_SystemMatrix *flux_matrix_m=NULL;
116 phornby 1917 double dt_max, dt2,t, norm_u_m, omega, norm_du_m;
117 gross 1804 register double mass, rtmp;
118     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
119     dim_t blockSize=Paso_FCTransportProblem_getBlockSize(fctp);
120     const double atol=options->absolute_tolerance;
121     const double rtol=options->tolerance;
122     const dim_t max_m=options->iter_max;
123     Paso_Performance pp;
124     bool_t converged=FALSE, max_m_reached=FALSE;
125     if (dt<=0.) {
126     Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive.");
127     }
128     dt_max=Paso_FCTransportProblem_getSafeTimeStepSize(fctp);
129     /*
130     * allocate memory
131     *
132     */
133     z_m=TMPMEMALLOC(n,double);
134     Paso_checkPtr(z_m);
135     du_m=TMPMEMALLOC(n,double);
136     Paso_checkPtr(du_m);
137     b_n=TMPMEMALLOC(n,double);
138     Paso_checkPtr(b_n);
139     sourceP=TMPMEMALLOC(n,double);
140     Paso_checkPtr(sourceP);
141     sourceN=TMPMEMALLOC(n,double);
142     Paso_checkPtr(sourceN);
143     uTilde_n=TMPMEMALLOC(n,double);
144     Paso_checkPtr(uTilde_n);
145     QN_n=TMPMEMALLOC(n,double);
146     Paso_checkPtr(QN_n);
147     QP_n=TMPMEMALLOC(n,double);
148     Paso_checkPtr(QP_n);
149     RN_m=TMPMEMALLOC(n,double);
150     Paso_checkPtr(RN_m);
151     RP_m=TMPMEMALLOC(n,double);
152     Paso_checkPtr(RP_m);
153     QN_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
154     QP_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
155     RN_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
156     RP_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
157     uTilde_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
158     u_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
159     flux_matrix_m=Paso_SystemMatrix_alloc(fctp->transport_matrix->type,
160     fctp->transport_matrix->pattern,
161     fctp->transport_matrix->row_block_size,
162     fctp->transport_matrix->col_block_size);
163    
164     if (Paso_noError()) {
165     /*
166     * Preparation:
167     *
168     */
169     Paso_FCT_setSource(n, source, sourceN, sourceP);
170     /*
171     * let the show begin!!!!
172     *
173     */
174     t=0;
175     i_substeps=0;
176     Paso_Copy(n,u,fctp->u);
177     norm_u_m=Paso_lsup(n,u, fctp->mpi_info);
178     /* while(i_substeps<n_substeps && Paso_noError()) { */
179     if (fctp->dt_max < LARGE_POSITIVE_FLOAT) {
180     dt2=MIN(dt_max,dt);
181     } else {
182     dt2=dt;
183     }
184     while(t<dt && Paso_noError()) {
185 ksteube 1812 printf("substep step %d at t=%e (step size= %e)\n",i_substeps+1,t+dt2,dt2);
186 gross 1804 Paso_FCT_setUp(fctp,dt2,sourceN,sourceP,b_n,uTilde_n,uTilde_n_coupler,QN_n,QN_n_coupler,QP_n,QP_n_coupler,
187     options,&pp);
188     /* now the iteration starts */
189     m=0;
190     converged=FALSE;
191     max_m_reached=FALSE;
192     /* tolerance? */
193     while ( (!converged) && (! max_m_reached) && Paso_noError()) {
194     /* set z_m */
195     Paso_FCT_setUpRightHandSide(fctp,dt2,u,u_m_coupler,z_m,flux_matrix_m,uTilde_n_coupler,b_n,
196     QN_n_coupler,QP_n_coupler,RN_m,RN_m_coupler,RP_m,RP_m_coupler,sourceN,&pp);
197     /*
198     * now we solve the linear system to get the correction dt:
199     *
200     */
201     if (fctp->theta > 0) {
202     omega=1./(dt2*fctp->theta);
203     Paso_Solver_solvePreconditioner(fctp->iteration_matrix,du_m,z_m);
204     Paso_Update(n,1.,u,omega,du_m);
205     } else {
206     omega=1;
207     #pragma omp parallel for private(i,mass,rtmp)
208     for (i = 0; i < n; ++i) {
209     mass=fctp->lumped_mass_matrix[i];
210     if (ABS(mass)>0.) {
211     rtmp=z_m[i]/mass;
212     } else {
213     rtmp=0;
214     }
215     du_m[i]=rtmp;
216     u[i]+=rtmp;
217     }
218     }
219     norm_u_m=Paso_lsup(n,u, fctp->mpi_info);
220     norm_du_m=Paso_lsup(n,du_m, fctp->mpi_info)*omega;
221 ksteube 1812 printf("iteration step %d completed: norm increment= %e (tolerance = %e)\n",m+1, norm_du_m, rtol * norm_u_m + atol);
222 gross 1804
223     max_m_reached=(m>max_m);
224     converged=(norm_du_m <= rtol * norm_u_m + atol);
225     m++;
226     }
227     if (converged) {
228     Failed=0;
229 ksteube 1812 /* #pragma omp parallel for schedule(static) private(i) */
230 gross 1804 Paso_Copy(n,fctp->u,u);
231     i_substeps++;
232     t+=dt2;
233     if (fctp->dt_max < LARGE_POSITIVE_FLOAT) {
234     dt2=MIN3(dt_max,dt2*1.5,dt-t);
235     } else {
236     dt2=MIN(dt2*1.5,dt-t);
237     }
238     } else if (max_m_reached) {
239     /* if FAILURES_MAX failures in a row: give up */
240     if (Failed > FAILURES_MAX) {
241     Paso_setError(VALUE_ERROR,"Paso_SolverFCT_solve: no convergence after time step reduction.");
242     } else {
243     printf("no convergence in Paso_Solver_NewtonGMRES: Trying smaller time step size.");
244     dt2=dt*0.5;
245     Failed++;
246     }
247     }
248    
249     }
250     /*
251     * clean-up:
252     *
253     */
254     MEMFREE(z_m);
255     MEMFREE(du_m);
256     TMPMEMFREE(b_n);
257     Paso_SystemMatrix_free(flux_matrix_m);
258     TMPMEMFREE(sourceP);
259     TMPMEMFREE(sourceN);
260     TMPMEMFREE(uTilde_n);
261     TMPMEMFREE(QN_n);
262     TMPMEMFREE(QP_n);
263     TMPMEMFREE(RN_m);
264     TMPMEMFREE(RP_m);
265     Paso_Coupler_free(QN_n_coupler);
266     Paso_Coupler_free(QP_n_coupler);
267     Paso_Coupler_free(RN_m_coupler);
268     Paso_Coupler_free(RP_m_coupler);
269     Paso_Coupler_free(uTilde_n_coupler);
270     Paso_Coupler_free(u_m_coupler);
271     options->absolute_tolerance=atol;
272     options->tolerance=rtol;
273     }
274     }
275 gross 1407 double Paso_FCTransportProblem_getSafeTimeStepSize(Paso_FCTransportProblem* fctp)
276     {
277 gross 1804 dim_t i, n;
278 gross 1407 double dt_max, dt_max_loc;
279 gross 1804 register double l_ii,m;
280     n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
281 gross 1407 if (! fctp->valid_matrices) {
282     fctp->dt_max=LARGE_POSITIVE_FLOAT;
283 gross 1363 /* extract the row sum of the advective part */
284 gross 1407 Paso_SystemMatrix_rowSum(fctp->mass_matrix,fctp->lumped_mass_matrix);
285 gross 1363
286 gross 1407 /* set low order transport operator */
287     Paso_FCTransportProblem_setLowOrderOperator(fctp);
288     /*
289 gross 1370 * calculate time step size:
290 gross 1407 */
291     dt_max=LARGE_POSITIVE_FLOAT;
292 gross 1370 if (fctp->theta < 1.) {
293 gross 1407 #pragma omp parallel private(dt_max_loc)
294 gross 1370 {
295 gross 1407 dt_max_loc=LARGE_POSITIVE_FLOAT;
296 gross 1804 #pragma omp for schedule(static) private(i,l_ii,m)
297     for (i=0;i<n;++i) {
298 phornby 1917 l_ii=fctp->main_diagonal_low_order_transport_matrix[i];
299     m=fctp->lumped_mass_matrix[i];
300     if ( (l_ii<0 && m>0.) || (l_ii>0 && m<0) ) {
301     dt_max_loc=MIN(dt_max_loc,-m/l_ii);
302     }
303     }
304 gross 1372 #pragma omp critical
305 phornby 1917 {
306     dt_max=MIN(dt_max,dt_max_loc);
307     }
308 gross 1370 }
309     #ifdef PASO_MPI
310 gross 1407 dt_max_loc = dt_max;
311 phornby 1917 MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
312     #endif
313     if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=1./(1.-fctp->theta);
314 gross 1407 }
315     if (dt_max <= 0.) {
316 gross 1400 Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive.");
317 gross 1407 } else {
318 phornby 1917 if (dt_max<LARGE_POSITIVE_FLOAT)
319     printf("maximum time step size is %e (theta = %e).\n",dt_max,fctp->theta);
320 gross 1370 }
321 gross 1407 fctp->dt_max=dt_max;
322 gross 1370 fctp->valid_matrices=Paso_noError();
323 gross 1363 }
324 gross 1407 return fctp->dt_max;
325     }
326 ksteube 1812 void Paso_FCT_setUp(Paso_FCTransportProblem* fctp, const double dt, const double *sourceN, const double *sourceP, double* b, double* uTilde,
327 gross 1804 Paso_Coupler* uTilde_coupler, double *QN, Paso_Coupler* QN_coupler, double *QP, Paso_Coupler* QP_coupler,
328     Paso_Options* options, Paso_Performance* pp)
329     {
330     dim_t i;
331     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
332     double omega, factor;
333     register double m, u_tilde_i, rtmp4;
334     /* distribute u */
335     Paso_Coupler_startCollect(fctp->u_coupler,fctp->u);
336     Paso_Coupler_finishCollect(fctp->u_coupler);
337     /*
338     * b^n[i]=m u^n[i] + dt*(1-theta) sum_{j <> i} l_{ij}*(u^n[j]-u^n[i]) + dt*sourceP[i]
339     *
340     * note that iteration_matrix stores the negative values of the
341     * low order transport matrix l therefore a=-dt*(1-fctp->theta) is used.
342     *
343     */
344     Paso_SolverFCT_setMuPaLuPbQ(b,fctp->lumped_mass_matrix,fctp->u_coupler,
345     -dt*(1-fctp->theta),fctp->iteration_matrix,dt,sourceP);
346     /*
347     * uTilde[i]=b[i]/m[i]
348     *
349     * fctp->iteration_matrix[i,i]=m[i]/(dt theta) + \frac{1}{\theta} \frac{q^-[i]}-l[i,i]
350     *
351     */
352     if (fctp->theta > 0) {
353     Paso_solve_free(fctp->iteration_matrix);
354     omega=1./(dt*fctp->theta);
355     factor=dt*omega;
356     #pragma omp parallel for private(i,m,u_tilde_i,rtmp4)
357     for (i = 0; i < n; ++i) {
358     m=fctp->lumped_mass_matrix[i];
359     if (ABS(m)>0.) {
360     u_tilde_i=b[i]/m;
361     } else {
362     u_tilde_i=fctp->u[i];
363     }
364     rtmp4=m*omega-fctp->main_diagonal_low_order_transport_matrix[i];
365     if (ABS(u_tilde_i)>0) rtmp4+=sourceN[i]*factor/u_tilde_i;
366     fctp->iteration_matrix->mainBlock->val[fctp->main_iptr[i]]=rtmp4;
367     uTilde[i]=u_tilde_i;
368     }
369     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
370     Paso_Solver_setPreconditioner(fctp->iteration_matrix,options);
371     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
372     } else {
373     #pragma omp parallel for private(i,m,u_tilde_i)
374     for (i = 0; i < n; ++i) {
375     m=fctp->lumped_mass_matrix[i];
376     if (ABS(m)>0.) {
377     u_tilde_i=b[i]/m;
378     } else {
379     u_tilde_i=fctp->u[i];
380     }
381     uTilde[i]=u_tilde_i;
382 gross 1407 }
383 gross 1804 /* no update of iteration_matrix required! */
384     } /* end (fctp->theta > 0) */
385 gross 1562
386 gross 1804 /* distribute uTilde: */
387     Paso_Coupler_startCollect(uTilde_coupler,uTilde);
388     Paso_Coupler_finishCollect(uTilde_coupler);
389     /*
390     * calculate QP[i] max_{j} (\tilde{u}[j]- \tilde{u}[i] )
391     * QN[i] min_{j} (\tilde{u}[j]- \tilde{u}[i] )
392     *
393     */
394     Paso_SolverFCT_setQs(uTilde_coupler,QN,QP,fctp->iteration_matrix);
395     Paso_Coupler_startCollect(QN_coupler,QN);
396     Paso_Coupler_startCollect(QP_coupler,QP);
397     Paso_Coupler_finishCollect(QN_coupler);
398     Paso_Coupler_finishCollect(QP_coupler);
399     }

  ViewVC Help
Powered by ViewVC 1.1.26