/[escript]/trunk/paso/src/SolverFCT_solve.c
ViewVC logotype

Annotation of /trunk/paso/src/SolverFCT_solve.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1804 - (hide annotations)
Wed Sep 24 07:52:19 2008 UTC (11 years, 1 month ago) by gross
File MIME type: text/plain
File size: 15546 byte(s)
a robister version of the upwinding scheme
1 gross 1363 /* $Id:$ */
2    
3     /*******************************************************
4     *
5 gross 1407 * Copyright 2007,2008 by University of Queensland
6 gross 1363 *
7 gross 1804 * http://esscc.uq.edu.au
8 gross 1363 * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     /**************************************************************/
15    
16     /* Paso: Flux correction transport solver
17     *
18 gross 1407 * solves Mu_t=Ku+q
19 gross 1570 * u(0) >=0
20 gross 1363 *
21 gross 1407 * Warning: the program assums sum_{j} k_{ij}=0!!!
22 gross 1363 *
23     */
24     /**************************************************************/
25    
26 gross 1804 /* Author: l.gross@uq.edu.au */
27 gross 1363
28     /**************************************************************/
29    
30     #include "Paso.h"
31     #include "Solver.h"
32     #include "SolverFCT.h"
33 gross 1804 #include "PasoUtil.h"
34 gross 1363 #include "escript/blocktimer.h"
35 gross 1370 #ifdef _OPENMP
36     #include <omp.h>
37     #endif
38     #ifdef PASO_MPI
39     #include <mpi.h>
40     #endif
41 gross 1363
42 gross 1804 void Paso_SolverFCT_solve(Paso_FCTransportProblem* fctp, double* u, double dt, double* source, Paso_Options* options) {
43     const dim_t FAILURES_MAX=5;
44     err_t error_code;
45     dim_t m,n_substeps, i_substeps, Failed, i, iter;
46     err_t errorCode;
47     double *z_m=NULL, *b_n=NULL, *sourceP=NULL, *sourceN=NULL, *uTilde_n=NULL, *QN_n=NULL, *QP_n=NULL, *RN_m=NULL, *RP_m=NULL, *du_m=NULL;
48     Paso_Coupler *QN_n_coupler=NULL, *QP_n_coupler=NULL, *RN_m_coupler=NULL, *RP_m_coupler=NULL, *uTilde_n_coupler=NULL, *u_m_coupler=NULL;
49     Paso_SystemMatrix *flux_matrix_m=NULL;
50     double dt_max, dt2,t, norm_u_m, omega, norm_du_m, tol;
51     register double mass, rtmp;
52     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
53     dim_t blockSize=Paso_FCTransportProblem_getBlockSize(fctp);
54     const double atol=options->absolute_tolerance;
55     const double rtol=options->tolerance;
56     const dim_t max_m=options->iter_max;
57     Paso_Performance pp;
58     bool_t converged=FALSE, max_m_reached=FALSE;
59     if (dt<=0.) {
60     Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive.");
61     }
62     dt_max=Paso_FCTransportProblem_getSafeTimeStepSize(fctp);
63     /*
64     * allocate memory
65     *
66     */
67     z_m=TMPMEMALLOC(n,double);
68     Paso_checkPtr(z_m);
69     du_m=TMPMEMALLOC(n,double);
70     Paso_checkPtr(du_m);
71     b_n=TMPMEMALLOC(n,double);
72     Paso_checkPtr(b_n);
73     sourceP=TMPMEMALLOC(n,double);
74     Paso_checkPtr(sourceP);
75     sourceN=TMPMEMALLOC(n,double);
76     Paso_checkPtr(sourceN);
77     uTilde_n=TMPMEMALLOC(n,double);
78     Paso_checkPtr(uTilde_n);
79     QN_n=TMPMEMALLOC(n,double);
80     Paso_checkPtr(QN_n);
81     QP_n=TMPMEMALLOC(n,double);
82     Paso_checkPtr(QP_n);
83     RN_m=TMPMEMALLOC(n,double);
84     Paso_checkPtr(RN_m);
85     RP_m=TMPMEMALLOC(n,double);
86     Paso_checkPtr(RP_m);
87     QN_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
88     QP_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
89     RN_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
90     RP_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
91     uTilde_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
92     u_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
93     flux_matrix_m=Paso_SystemMatrix_alloc(fctp->transport_matrix->type,
94     fctp->transport_matrix->pattern,
95     fctp->transport_matrix->row_block_size,
96     fctp->transport_matrix->col_block_size);
97    
98     if (Paso_noError()) {
99     /*
100     * Preparation:
101     *
102     */
103     Paso_FCT_setSource(n, source, sourceN, sourceP);
104     /*
105     * let the show begin!!!!
106     *
107     */
108     t=0;
109     i_substeps=0;
110     Paso_Copy(n,u,fctp->u);
111     norm_u_m=Paso_lsup(n,u, fctp->mpi_info);
112     /* while(i_substeps<n_substeps && Paso_noError()) { */
113     if (fctp->dt_max < LARGE_POSITIVE_FLOAT) {
114     dt2=MIN(dt_max,dt);
115     } else {
116     dt2=dt;
117     }
118     while(t<dt && Paso_noError()) {
119     printf("substep step %ld at t=%e (step size= %e)\n",i_substeps+1,t+dt2,dt2);
120     Paso_FCT_setUp(fctp,dt2,sourceN,sourceP,b_n,uTilde_n,uTilde_n_coupler,QN_n,QN_n_coupler,QP_n,QP_n_coupler,
121     options,&pp);
122     /* now the iteration starts */
123     m=0;
124     converged=FALSE;
125     max_m_reached=FALSE;
126     /* tolerance? */
127     while ( (!converged) && (! max_m_reached) && Paso_noError()) {
128     /* set z_m */
129     Paso_FCT_setUpRightHandSide(fctp,dt2,u,u_m_coupler,z_m,flux_matrix_m,uTilde_n_coupler,b_n,
130     QN_n_coupler,QP_n_coupler,RN_m,RN_m_coupler,RP_m,RP_m_coupler,sourceN,&pp);
131     /*
132     * now we solve the linear system to get the correction dt:
133     *
134     */
135     if (fctp->theta > 0) {
136     omega=1./(dt2*fctp->theta);
137     Paso_Solver_solvePreconditioner(fctp->iteration_matrix,du_m,z_m);
138     Paso_Update(n,1.,u,omega,du_m);
139     } else {
140     omega=1;
141     #pragma omp parallel for private(i,mass,rtmp)
142     for (i = 0; i < n; ++i) {
143     mass=fctp->lumped_mass_matrix[i];
144     if (ABS(mass)>0.) {
145     rtmp=z_m[i]/mass;
146     } else {
147     rtmp=0;
148     }
149     du_m[i]=rtmp;
150     u[i]+=rtmp;
151     }
152     }
153     norm_u_m=Paso_lsup(n,u, fctp->mpi_info);
154     norm_du_m=Paso_lsup(n,du_m, fctp->mpi_info)*omega;
155     printf("iteration step %ld completed: norm increment= %e (tolerance = %e)\n",m+1, norm_du_m, rtol * norm_u_m + atol);
156    
157     max_m_reached=(m>max_m);
158     converged=(norm_du_m <= rtol * norm_u_m + atol);
159     m++;
160     }
161     if (converged) {
162     Failed=0;
163     #pragma omp parallel for schedule(static) private(i)
164     Paso_Copy(n,fctp->u,u);
165     i_substeps++;
166     t+=dt2;
167     if (fctp->dt_max < LARGE_POSITIVE_FLOAT) {
168     dt2=MIN3(dt_max,dt2*1.5,dt-t);
169     } else {
170     dt2=MIN(dt2*1.5,dt-t);
171     }
172     } else if (max_m_reached) {
173     /* if FAILURES_MAX failures in a row: give up */
174     if (Failed > FAILURES_MAX) {
175     Paso_setError(VALUE_ERROR,"Paso_SolverFCT_solve: no convergence after time step reduction.");
176     } else {
177     printf("no convergence in Paso_Solver_NewtonGMRES: Trying smaller time step size.");
178     dt2=dt*0.5;
179     Failed++;
180     }
181     }
182    
183     }
184     /*
185     * clean-up:
186     *
187     */
188     MEMFREE(z_m);
189     MEMFREE(du_m);
190     TMPMEMFREE(b_n);
191     Paso_SystemMatrix_free(flux_matrix_m);
192     TMPMEMFREE(sourceP);
193     TMPMEMFREE(sourceN);
194     TMPMEMFREE(uTilde_n);
195     TMPMEMFREE(QN_n);
196     TMPMEMFREE(QP_n);
197     TMPMEMFREE(RN_m);
198     TMPMEMFREE(RP_m);
199     Paso_Coupler_free(QN_n_coupler);
200     Paso_Coupler_free(QP_n_coupler);
201     Paso_Coupler_free(RN_m_coupler);
202     Paso_Coupler_free(RP_m_coupler);
203     Paso_Coupler_free(uTilde_n_coupler);
204     Paso_Coupler_free(u_m_coupler);
205     options->absolute_tolerance=atol;
206     options->tolerance=rtol;
207     }
208     }
209 gross 1407 double Paso_FCTransportProblem_getSafeTimeStepSize(Paso_FCTransportProblem* fctp)
210     {
211 gross 1804 dim_t i, n;
212 gross 1407 double dt_max, dt_max_loc;
213 gross 1804 register double l_ii,m;
214     n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
215 gross 1407 if (! fctp->valid_matrices) {
216     fctp->dt_max=LARGE_POSITIVE_FLOAT;
217 gross 1363 /* extract the row sum of the advective part */
218 gross 1407 Paso_SystemMatrix_rowSum(fctp->mass_matrix,fctp->lumped_mass_matrix);
219 gross 1363
220 gross 1407 /* set low order transport operator */
221     Paso_FCTransportProblem_setLowOrderOperator(fctp);
222     /*
223 gross 1370 * calculate time step size:
224 gross 1407 */
225     dt_max=LARGE_POSITIVE_FLOAT;
226 gross 1370 if (fctp->theta < 1.) {
227 gross 1407 #pragma omp parallel private(dt_max_loc)
228 gross 1370 {
229 gross 1407 dt_max_loc=LARGE_POSITIVE_FLOAT;
230 gross 1804 #pragma omp for schedule(static) private(i,l_ii,m)
231     for (i=0;i<n;++i) {
232     l_ii=fctp->main_diagonal_low_order_transport_matrix[i];
233     m=fctp->lumped_mass_matrix[i];
234     if ( (l_ii<0 && m>0.) || (l_ii>0 && m<0) ) {
235     dt_max_loc=MIN(dt_max_loc,-m/l_ii);
236 gross 1370 }
237     }
238 gross 1372 #pragma omp critical
239 gross 1370 {
240 gross 1407 dt_max=MIN(dt_max,dt_max_loc);
241 gross 1370 }
242     }
243     #ifdef PASO_MPI
244 gross 1407 dt_max_loc = dt_max;
245     MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
246     #endif
247     if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=1./(1.-fctp->theta);
248     }
249     if (dt_max <= 0.) {
250 gross 1400 Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive.");
251 gross 1407 } else {
252     if (dt_max<LARGE_POSITIVE_FLOAT) printf("maximum time step size is %e (theta = %e).\n",dt_max,fctp->theta);
253 gross 1370 }
254 gross 1407 fctp->dt_max=dt_max;
255 gross 1370 fctp->valid_matrices=Paso_noError();
256 gross 1363 }
257 gross 1407 return fctp->dt_max;
258     }
259 gross 1804 /*
260     * inserts the source term into the problem
261     */
262     err_t Paso_FCT_setSource(const dim_t n,const double *source, double* sourceN, double* sourceP)
263     {
264     dim_t i;
265     register double rtmp;
266 gross 1400 /*
267 gross 1804 * seperate source into positive and negative part:
268 gross 1400 */
269 gross 1804 #pragma omp parallel for private(i,rtmp)
270     for (i = 0; i < n; ++i) {
271     rtmp=source[i];
272     if (rtmp <0) {
273     sourceN[i]=-rtmp;
274     sourceP[i]=0;
275 gross 1400 } else {
276 gross 1804 sourceN[i]= 0;
277     sourceP[i]= rtmp;
278 gross 1400 }
279 gross 1804 }
280     }
281     err_t Paso_FCT_setUp(Paso_FCTransportProblem* fctp, const double dt, const double *sourceN, const double *sourceP, double* b, double* uTilde,
282     Paso_Coupler* uTilde_coupler, double *QN, Paso_Coupler* QN_coupler, double *QP, Paso_Coupler* QP_coupler,
283     Paso_Options* options, Paso_Performance* pp)
284     {
285     dim_t i;
286     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
287     double omega, factor;
288     register double m, u_tilde_i, rtmp4;
289     /* distribute u */
290     Paso_Coupler_startCollect(fctp->u_coupler,fctp->u);
291     Paso_Coupler_finishCollect(fctp->u_coupler);
292     /*
293     * b^n[i]=m u^n[i] + dt*(1-theta) sum_{j <> i} l_{ij}*(u^n[j]-u^n[i]) + dt*sourceP[i]
294     *
295     * note that iteration_matrix stores the negative values of the
296     * low order transport matrix l therefore a=-dt*(1-fctp->theta) is used.
297     *
298     */
299     Paso_SolverFCT_setMuPaLuPbQ(b,fctp->lumped_mass_matrix,fctp->u_coupler,
300     -dt*(1-fctp->theta),fctp->iteration_matrix,dt,sourceP);
301     /*
302     * uTilde[i]=b[i]/m[i]
303     *
304     * fctp->iteration_matrix[i,i]=m[i]/(dt theta) + \frac{1}{\theta} \frac{q^-[i]}-l[i,i]
305     *
306     */
307     if (fctp->theta > 0) {
308     Paso_solve_free(fctp->iteration_matrix);
309     omega=1./(dt*fctp->theta);
310     factor=dt*omega;
311     #pragma omp parallel for private(i,m,u_tilde_i,rtmp4)
312     for (i = 0; i < n; ++i) {
313     m=fctp->lumped_mass_matrix[i];
314     if (ABS(m)>0.) {
315     u_tilde_i=b[i]/m;
316     } else {
317     u_tilde_i=fctp->u[i];
318     }
319     rtmp4=m*omega-fctp->main_diagonal_low_order_transport_matrix[i];
320     if (ABS(u_tilde_i)>0) rtmp4+=sourceN[i]*factor/u_tilde_i;
321     fctp->iteration_matrix->mainBlock->val[fctp->main_iptr[i]]=rtmp4;
322     uTilde[i]=u_tilde_i;
323     }
324     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
325     Paso_Solver_setPreconditioner(fctp->iteration_matrix,options);
326     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
327     } else {
328     #pragma omp parallel for private(i,m,u_tilde_i)
329     for (i = 0; i < n; ++i) {
330     m=fctp->lumped_mass_matrix[i];
331     if (ABS(m)>0.) {
332     u_tilde_i=b[i]/m;
333     } else {
334     u_tilde_i=fctp->u[i];
335     }
336     uTilde[i]=u_tilde_i;
337 gross 1407 }
338 gross 1804 /* no update of iteration_matrix required! */
339     } /* end (fctp->theta > 0) */
340 gross 1562
341 gross 1804 /* distribute uTilde: */
342     Paso_Coupler_startCollect(uTilde_coupler,uTilde);
343     Paso_Coupler_finishCollect(uTilde_coupler);
344     /*
345     * calculate QP[i] max_{j} (\tilde{u}[j]- \tilde{u}[i] )
346     * QN[i] min_{j} (\tilde{u}[j]- \tilde{u}[i] )
347     *
348     */
349     Paso_SolverFCT_setQs(uTilde_coupler,QN,QP,fctp->iteration_matrix);
350     Paso_Coupler_startCollect(QN_coupler,QN);
351     Paso_Coupler_startCollect(QP_coupler,QP);
352     Paso_Coupler_finishCollect(QN_coupler);
353     Paso_Coupler_finishCollect(QP_coupler);
354     }
355     err_t Paso_FCT_setUpRightHandSide(Paso_FCTransportProblem* fctp, const double dt, const double *u_m, Paso_Coupler* u_m_coupler, double * z_m,
356     Paso_SystemMatrix* flux_matrix, Paso_Coupler* uTilde_coupler, const double *b,
357     Paso_Coupler* QN_coupler, Paso_Coupler* QP_coupler,
358     double *RN_m, Paso_Coupler* RN_m_coupler, double* RP_m, Paso_Coupler* RP_m_coupler, const double *sourceN,
359     Paso_Performance* pp)
360     {
361     dim_t i;
362     const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
363     register double m, rtmp;
364     /* distribute u */
365     Paso_Coupler_startCollect(u_m_coupler,u_m);
366     Paso_Coupler_finishCollect(u_m_coupler);
367     /*
368     * set the ant diffusion fluxes:
369     *
370     */
371     Paso_FCTransportProblem_setAntiDiffusionFlux(dt,fctp,flux_matrix,u_m_coupler);
372     /*
373     * apply pre flux-correction: f_{ij}:=0 if f_{ij}*(\tilde{u}[i]- \tilde{u}[j])<=0
374     *
375     */
376     Paso_FCTransportProblem_applyPreAntiDiffusionCorrection(flux_matrix,uTilde_coupler);
377     /*
378     * set flux limiters RN_m,RP_m
379     *
380     */
381     Paso_FCTransportProblem_setRs(flux_matrix,fctp->lumped_mass_matrix,QN_coupler,QP_coupler,RN_m,RP_m);
382     Paso_Coupler_startCollect(RN_m_coupler,RN_m);
383     Paso_Coupler_startCollect(RP_m_coupler,RP_m);
384     /*
385     * z_m[i]=b[i] - (m_i*u_m[i] - dt*theta*sum_{j<>i} l_{ij} (u_m[j]-u_m[i]) + dt q^-[i])
386     *
387     * note that iteration_matrix stores the negative values of the
388     * low order transport matrix l therefore a=dt*fctp->theta is used.
389     */
390     Paso_SolverFCT_setMuPaLuPbQ(z_m,fctp->lumped_mass_matrix, u_m_coupler,dt*fctp->theta,fctp->iteration_matrix,dt,sourceN);
391     /* z_m=b-z_m */
392     Paso_Update(n,-1.,z_m,1.,b);
393 gross 1562
394 gross 1804 Paso_Coupler_finishCollect(RN_m_coupler);
395     Paso_Coupler_finishCollect(RP_m_coupler);
396     /* add corrected fluxes into z_m */
397     Paso_FCTransportProblem_addCorrectedFluxes(z_m,flux_matrix,RN_m_coupler,RP_m_coupler);
398     return NO_ERROR;
399 gross 1375 }

  ViewVC Help
Powered by ViewVC 1.1.26