/[escript]/trunk/paso/src/SolverFCT_solve.c
ViewVC logotype

Annotation of /trunk/paso/src/SolverFCT_solve.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1552 - (hide annotations)
Thu May 8 08:52:41 2008 UTC (11 years, 9 months ago) by gross
File MIME type: text/plain
File size: 13707 byte(s)
some changes to make the implementatiopn of a upwind MPI version easier
1 gross 1363 /* $Id:$ */
2    
3     /*******************************************************
4     *
5 gross 1407 * Copyright 2007,2008 by University of Queensland
6 gross 1363 *
7 gross 1407 * http://esscc.uq.edu_m.au
8 gross 1363 * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     /**************************************************************/
15    
16     /* Paso: Flux correction transport solver
17     *
18 gross 1407 * solves Mu_t=Ku+q
19     * u(0) >= u_min
20 gross 1363 *
21 gross 1407 * Warning: the program assums sum_{j} k_{ij}=0!!!
22 gross 1363 *
23     */
24     /**************************************************************/
25    
26 gross 1407 /* Author: l.gross@uq.edu_m.au */
27 gross 1363
28     /**************************************************************/
29    
30     #include "Paso.h"
31     #include "Solver.h"
32     #include "SolverFCT.h"
33     #include "escript/blocktimer.h"
34 gross 1370 #ifdef _OPENMP
35     #include <omp.h>
36     #endif
37     #ifdef PASO_MPI
38     #include <mpi.h>
39     #endif
40 gross 1363
41 gross 1407 double Paso_FCTransportProblem_getSafeTimeStepSize(Paso_FCTransportProblem* fctp)
42     {
43     dim_t i, n_rows;
44     double dt_max, dt_max_loc;
45     register double rtmp1,rtmp2;
46     n_rows=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
47     if (! fctp->valid_matrices) {
48     fctp->dt_max=LARGE_POSITIVE_FLOAT;
49 gross 1363 /* extract the row sum of the advective part */
50 gross 1407 Paso_SystemMatrix_rowSum(fctp->mass_matrix,fctp->lumped_mass_matrix);
51 gross 1363
52 gross 1407 /* set low order transport operator */
53     Paso_FCTransportProblem_setLowOrderOperator(fctp);
54     /*
55 gross 1370 * calculate time step size:
56 gross 1407 */
57     dt_max=LARGE_POSITIVE_FLOAT;
58 gross 1370 if (fctp->theta < 1.) {
59 gross 1407 #pragma omp parallel private(dt_max_loc)
60 gross 1370 {
61 gross 1407 dt_max_loc=LARGE_POSITIVE_FLOAT;
62     #pragma omp for schedule(static) private(i,rtmp1,rtmp2)
63 gross 1372 for (i=0;i<n_rows;++i) {
64 gross 1407 rtmp1=fctp->main_diagonal_low_order_transport_matrix[i];
65 gross 1370 rtmp2=fctp->lumped_mass_matrix[i];
66 gross 1513 if ( (rtmp1<0 && rtmp2>0.) || (rtmp1>0 && rtmp2<0.) ) {
67 gross 1407 dt_max_loc=MIN(dt_max_loc,-rtmp2/rtmp1);
68 gross 1370 }
69     }
70 gross 1372 #pragma omp critical
71 gross 1370 {
72 gross 1407 dt_max=MIN(dt_max,dt_max_loc);
73 gross 1370 }
74     }
75     #ifdef PASO_MPI
76 gross 1407 dt_max_loc = dt_max;
77     MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
78     #endif
79     if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=1./(1.-fctp->theta);
80     }
81     if (dt_max <= 0.) {
82 gross 1400 Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive.");
83 gross 1407 } else {
84     if (dt_max<LARGE_POSITIVE_FLOAT) printf("maximum time step size is %e (theta = %e).\n",dt_max,fctp->theta);
85 gross 1370 }
86 gross 1407 fctp->dt_max=dt_max;
87 gross 1370 fctp->valid_matrices=Paso_noError();
88 gross 1363 }
89 gross 1407 return fctp->dt_max;
90     }
91 gross 1363
92 gross 1407
93    
94    
95     void Paso_SolverFCT_solve(Paso_FCTransportProblem* fctp, double* u, double dt, double* source, Paso_Options* options) {
96    
97     index_t i, j;
98     int n_substeps,n, m;
99     double dt_max, omega, dt2,t;
100     double local_norm[2],norm[2],local_norm_u,local_norm_du,norm_u,norm_du, tolerance;
101     register double rtmp1,rtmp2,rtmp3,rtmp4, rtmp;
102     double *b_n=NULL, *sourceP=NULL, *sourceN=NULL, *uTilde_n=NULL, *QN_n=NULL, *QP_n=NULL, *RN_m=NULL, *RP_m=NULL, *z_m=NULL, *du_m=NULL;
103     Paso_SystemMatrix *flux_matrix=NULL;
104     dim_t n_rows=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
105     bool_t converged;
106     if (dt<=0.) {
107     Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive.");
108     }
109     dt_max=Paso_FCTransportProblem_getSafeTimeStepSize(fctp);
110 gross 1400 /*
111 gross 1407 * allocate memory
112 gross 1400 *
113     */
114 gross 1407 b_n=TMPMEMALLOC(n_rows,double);
115     Paso_checkPtr(b_n);
116     sourceP=TMPMEMALLOC(n_rows,double);
117     Paso_checkPtr(sourceP);
118     sourceN=TMPMEMALLOC(n_rows,double);
119     Paso_checkPtr(sourceN);
120     uTilde_n=TMPMEMALLOC(n_rows,double);
121     Paso_checkPtr(uTilde_n);
122     QN_n=TMPMEMALLOC(n_rows,double);
123     Paso_checkPtr(QN_n);
124     QP_n=TMPMEMALLOC(n_rows,double);
125     Paso_checkPtr(QP_n);
126     RN_m=TMPMEMALLOC(n_rows,double);
127     Paso_checkPtr(RN_m);
128     RP_m=TMPMEMALLOC(n_rows,double);
129     Paso_checkPtr(RP_m);
130     z_m=TMPMEMALLOC(n_rows,double);
131     Paso_checkPtr(z_m);
132     du_m=TMPMEMALLOC(n_rows,double);
133     Paso_checkPtr(du_m);
134     flux_matrix=Paso_SystemMatrix_alloc(fctp->transport_matrix->type,
135     fctp->transport_matrix->pattern,
136     fctp->transport_matrix->row_block_size,
137     fctp->transport_matrix->col_block_size);
138 gross 1400 if (Paso_noError()) {
139     /*
140     * Preparation:
141     *
142     */
143    
144     /* decide on substepping */
145 gross 1407 if (fctp->dt_max < LARGE_POSITIVE_FLOAT) {
146     n_substeps=ceil(dt/dt_max);
147 gross 1400 } else {
148 gross 1407 n_substeps=1;
149 gross 1400 }
150     dt2=dt/n_substeps;
151 gross 1407 printf("%d time steps of size is %e (theta = %e, dt_max=%e).\n",n_substeps, dt2,fctp->theta, dt_max);
152     /*
153     * seperate source into positive and negative part:
154     */
155     #pragma omp parallel for private(i,rtmp)
156     for (i = 0; i < n_rows; ++i) {
157     rtmp=source[i];
158     if (rtmp <0) {
159 gross 1410 sourceN[i]=-rtmp;
160     sourceP[i]=0;
161 gross 1407 } else {
162 gross 1410 sourceN[i]= 0;
163     sourceP[i]= rtmp;
164 gross 1400 }
165 gross 1407 }
166 gross 1410 /* for (i = 0; i < n_rows; ++i) printf("%d : %e \n",i,source[i],sourceP[i],sourceN[i]) */
167 gross 1407 /*
168 gross 1410 * let the show begin!!!!
169 gross 1407 *
170     */
171 gross 1400 t=dt2;
172     n=0;
173     tolerance=options->tolerance;
174     while(n<n_substeps && Paso_noError()) {
175     printf("substep step %d at t=%e\n",n+1,t);
176 gross 1410 #pragma omp parallel for private(i)
177     for (i = 0; i < n_rows; ++i) {
178     u[i]=fctp->u[i];
179     }
180 gross 1407 /*
181 gross 1410 * b^n[i]=m u^n[i] + dt2*(1-theta) sum_{j <> i} l_{ij}*(u^n[j]-u^n[i]) + dt2*sourceP[i]
182 gross 1407 *
183     * note that iteration_matrix stores the negative values of the
184 gross 1410 * low order transport matrix l therefore a=-dt2*(1-fctp->theta) is used.
185 gross 1407 *
186     */
187     Paso_SolverFCT_setMuPaLuPbQ(b_n,fctp->lumped_mass_matrix,u,
188 gross 1410 -dt2*(1-fctp->theta),fctp->iteration_matrix,dt2,sourceP);
189 gross 1407 /*
190     * uTilde_n[i]=b[i]/m[i]
191     *
192 gross 1410 * a[i,i]=m[i]/(dt2 theta) + \frac{1}{\theta} \frac{q^-[i]}-l[i,i]
193 gross 1407 *
194     */
195     if (fctp->theta > 0) {
196     Paso_solve_free(fctp->iteration_matrix);
197 gross 1410 omega=1./(dt2*fctp->theta);
198     rtmp2=dt2*omega;
199 gross 1407 #pragma omp parallel for private(i,rtmp,rtmp3,rtmp4)
200     for (i = 0; i < n_rows; ++i) {
201     rtmp=fctp->lumped_mass_matrix[i];
202     if (ABS(rtmp)>0.) {
203     rtmp3=b_n[i]/rtmp;
204     } else {
205     rtmp3=u[i];
206     }
207 gross 1410 rtmp4=rtmp*omega-fctp->main_diagonal_low_order_transport_matrix[i];
208 gross 1407 if (ABS(rtmp3)>0) rtmp4+=sourceN[i]*rtmp2/rtmp3;
209     fctp->iteration_matrix->mainBlock->val[fctp->main_iptr[i]]=rtmp4;
210     uTilde_n[i]=rtmp3;
211 gross 1400 }
212     } else {
213 gross 1407 #pragma omp parallel for private(i,rtmp,rtmp3)
214     for (i = 0; i < n_rows; ++i) {
215     rtmp=fctp->lumped_mass_matrix[i];
216     if (ABS(rtmp)>0.) {
217     rtmp3=b_n[i]/rtmp;
218     } else {
219     rtmp3=u[i];
220     }
221     uTilde_n[i]=rtmp3;
222     }
223 gross 1410 omega=1.;
224 gross 1407 /* no update of iteration_matrix retquired! */
225     } /* end (fctp->theta > 0) */
226     /*
227     * calculate QP_n[i] max_{j} (\tilde{u}[j]- \tilde{u}[i] )
228     * QN_n[i] min_{j} (\tilde{u}[j]- \tilde{u}[i] )
229     *
230     */
231     Paso_SolverFCT_setQs(uTilde_n,QN_n,QP_n,fctp->iteration_matrix);
232     /*
233     * now we enter the mation on a time-step:
234     *
235     */
236     m=0;
237     converged=FALSE;
238 gross 1410 while ( (!converged) && (m<500) && Paso_noError()) {
239 gross 1407 printf("iteration step %d\n",m+1);
240     /*
241     * set the ant diffusion fluxes:
242     *
243     */
244 gross 1410 Paso_FCTransportProblem_setAntiDiffusionFlux(dt2,fctp,flux_matrix,u,fctp->u);
245 gross 1407 /*
246     * apply pre flux-correction: f_{ij}:=0 if f_{ij}*(\tilde{u}[i]- \tilde{u}[j])<=0
247     *
248     * this is not entirely correct!!!!!
249     *
250     */
251 gross 1410 Paso_FCTransportProblem_applyPreAntiDiffusionCorrection(flux_matrix,uTilde_n);
252 gross 1407 /*
253     * set flux limms RN_m,RP_m
254     *
255     */
256     Paso_FCTransportProblem_setRs(flux_matrix,fctp->lumped_mass_matrix,QN_n,QP_n,RN_m,RP_m);
257     /*
258 gross 1410 * z_m[i]=b_n[i] - (m_i*u[i] - dt2*theta*sum_{j<>i} l_{ij} (u[j]-u[i]) + dt2 q^-[i])
259 gross 1407 *
260     * note that iteration_matrix stores the negative values of the
261 gross 1410 * low order transport matrix l therefore a=dt2*fctp->theta is used.
262 gross 1407 */
263 gross 1410
264 gross 1407 Paso_SolverFCT_setMuPaLuPbQ(z_m,fctp->lumped_mass_matrix,u,
265 gross 1410 dt2*fctp->theta,fctp->iteration_matrix,dt2,sourceN);
266     #pragma omp parallel for private(i)
267     for (i = 0; i < n_rows; ++i) z_m[i]=b_n[i]-z_m[i];
268 gross 1407
269     /* add corrected fluxes into z_m */
270 gross 1410 Paso_FCTransportProblem_addCorrectedFluxes(z_m,flux_matrix,RN_m,RP_m);
271 gross 1407 /*
272 gross 1410 * now we solve the linear system to get the correction dt2:
273 gross 1407 *
274     */
275     if (fctp->theta > 0) {
276     /* set the right hand side of the linear system: */
277 gross 1410 options->tolerance=1.e-2;
278 gross 1407 Paso_solve(fctp->iteration_matrix,du_m,z_m,options);
279     /* TODO: check errors ! */
280     } else {
281     #pragma omp parallel for private(i,rtmp,rtmp1)
282     for (i = 0; i < n_rows; ++i) {
283     rtmp=fctp->lumped_mass_matrix[i];
284     if (ABS(rtmp)>0.) {
285 gross 1410 rtmp1=z_m[i]/rtmp;
286 gross 1407 } else {
287     rtmp1=0;
288     }
289     du_m[i]=rtmp1;
290     }
291     }
292     /*
293     * update u and calculate norm of du_m and the new u:
294     *
295     */
296     norm_u=0.;
297     norm_du=0.;
298     #pragma omp parallel private(local_norm_u,local_norm_du)
299     {
300     local_norm_u=0.;
301     local_norm_du=0.;
302     #pragma omp for schedule(static) private(i)
303     for (i=0;i<n_rows;++i) {
304     u[i]+=omega*du_m[i];
305     local_norm_u=MAX(local_norm_u,ABS(u[i]));
306     local_norm_du=MAX(local_norm_du,ABS(du_m[i]));
307     }
308     #pragma omp critical
309     {
310     norm_u=MAX(norm_u,local_norm_u);
311     norm_du=MAX(norm_du,local_norm_du);
312     }
313     }
314     #ifdef PASO_MPI
315     local_norm[0]=norm_u;
316     local_norm[1]=norm_du;
317 ksteube 1415 MPI_Allreduce(local_norm,norm, 2, MPI_DOUBLE, MPI_MAX, fctp->mpi_info->comm);
318 gross 1407 norm_u=norm[0];
319     norm_du=norm[1];
320     #endif
321     norm_du*=omega;
322     converged=(norm_du <= tolerance * norm_u);
323     m++;
324     printf("iteration step %d: norm u and du_m : %e %e\n",m,norm_u,norm_du);
325     /* TODO: check if du_m has been redu_mced */
326 gross 1410 } /* end of inner iteration */
327     #pragma omp parallel for schedule(static) private(i)
328     for (i=0;i<n_rows;++i) fctp->u[i]=u[i];
329     n++;
330 gross 1407 } /* end of time integration */
331     #pragma omp parallel for schedule(static) private(i)
332 gross 1410 for (i=0;i<n_rows;++i) u[i]=fctp->u[i]+fctp->u_min;
333     /* TODO: update u_min ? */
334 gross 1407
335 gross 1370 }
336 gross 1407 /*
337     * clean-up:
338 gross 1400 *
339     */
340 gross 1407 TMPMEMFREE(b_n);
341     Paso_SystemMatrix_free(flux_matrix);
342     TMPMEMFREE(sourceP);
343     TMPMEMFREE(sourceN);
344     TMPMEMFREE(uTilde_n);
345     TMPMEMFREE(QN_n);
346     TMPMEMFREE(QP_n);
347     TMPMEMFREE(RN_m);
348     TMPMEMFREE(RP_m);
349     TMPMEMFREE(z_m);
350     TMPMEMFREE(du_m);
351 gross 1375 }

  ViewVC Help
Powered by ViewVC 1.1.26