/[escript]/trunk/paso/src/SolverFCT_solve.c
ViewVC logotype

Annotation of /trunk/paso/src/SolverFCT_solve.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1401 - (hide annotations)
Fri Jan 25 04:31:18 2008 UTC (11 years, 10 months ago) by gross
File MIME type: text/plain
File size: 10956 byte(s)
rewrite antidiffusion calculation to avoid coloring for OPENMP parallelization
1 gross 1363 /* $Id:$ */
2    
3     /*******************************************************
4     *
5     * Copyright 2007 by University of Queensland
6     *
7     * http://esscc.uq.edu.au
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     /**************************************************************/
15    
16     /* Paso: Flux correction transport solver
17     *
18     * solves Mu_t=Du+Ku+q
19     *
20     * where is D is diffusive (not checked)
21     * - D is symmetric
22     * - row sums are equal to zero.
23     * and K is the advective part.
24     *
25     * u(0) >= 0
26     *
27     * intially fctp->transport_matrix defines the diffusive part
28     * but the matrix is updated by the adevctive part + artificial diffusion
29     *
30     */
31     /**************************************************************/
32    
33     /* Author: l.gross@uq.edu.au */
34    
35     /**************************************************************/
36    
37     #include "Paso.h"
38     #include "Solver.h"
39     #include "SolverFCT.h"
40     #include "escript/blocktimer.h"
41 gross 1370 #ifdef _OPENMP
42     #include <omp.h>
43     #endif
44     #ifdef PASO_MPI
45     #include <mpi.h>
46     #endif
47 gross 1363
48 gross 1370 /***********************************************************************************
49 gross 1363
50 gross 1370 solve (until convergence)
51 gross 1363
52 gross 1370 (*) [M-theta*dt*L] du = M*u_last + (1-theta)*dt*F(u_last) - M*u + theta*dt F(u)
53     u <- u+du
54    
55     with F(u) = L u + f_a(u) (f_a anti-diffusion)
56     and L = D + K + D_K stored in transport_matrix
57     D = Diffusive part (on input stored in transport_matrix)
58     K = flux_matrix
59     D_K = artificial diffusion introduced by K
60     f_a = anti-diffusion introduced by K and u
61     u_last=u of last time step
62    
63     For the case theta==0 (explicit case) no iteration is required. One sets
64    
65 gross 1400 M*u= M*u_last + dt*b
66 gross 1370
67 gross 1400 with b=F(u) = L u + f_a(u).
68 gross 1370
69    
70 gross 1400 For the case theta>0 we solve (*) is the form
71 gross 1370
72 gross 1400 [L-M/theta*dt] du2 =M/(theta*dt)*u_last + ((1-theta)/theta)*F(u_last) - M/(theta*dt)*u + F(u)
73    
74     for du2=-du which is solved as
75    
76     A du2 = r= b + f(u)
77    
78     with
79    
80     du=-du2
81 gross 1370 A=L-M/(theta*dt) (stored into transport_matrix)
82 gross 1400 f(u) =-M/(theta*dt)*u + F(u) = - M/(theta*dt)*u + Lu + f_a(u) = f_a(u) + Au
83     F(u)=f(u)+M/(theta*dt)*u
84     b= M/(theta*dt)*u_last+(1-theta)/theta*F(u_last)=
85     = M/(theta*dt)*u_last+(1-theta)/theta*(f(u)+M/(theta*dt)*u_last)
86     = M/(theta*dt)*u_last+(1-theta)/theta*(f(u_last)+M/(theta*dt)*u_last)
87     = M*1./(theta*dt)*(1+(1-theta)/theta)*u_last+(1-theta)/theta*f(u_last)
88 gross 1370
89     */
90    
91 gross 1364 void Paso_SolverFCT_solve(Paso_FCTransportProblem* fctp, double* u, double dt, double* source, Paso_Options* options) {
92 gross 1363
93 gross 1375 index_t i, j;
94 gross 1400 int n_substeps,n, iter;
95     double fac, fac2, *b=NULL, *f=NULL, *du=NULL;
96 gross 1370 double dt2=fctp->dt_max, dt2_loc, rtmp,rtmp2,t;
97 gross 1400 double local_norm[2],norm[2],local_norm_u,local_norm_du,norm_u,norm_du, tolerance;
98 gross 1372 dim_t n_rows=Paso_SystemMatrix_getTotalNumRows(fctp->flux_matrix);
99 gross 1400 bool_t converged;
100 gross 1363 if (dt<=0.) {
101     Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive.");
102     }
103     if (! fctp->valid_matrices) {
104    
105     /* extract the row sum of the advective part */
106     Paso_SystemMatrix_rowSum(fctp->flux_matrix,fctp->row_sum_flux_matrix);
107 gross 1370 /* add the advective part + artificial diffusion to the diffusive part = transport-matrix */
108 gross 1363 Paso_FCTransportProblem_addAdvectivePart(fctp,1.);
109 gross 1370 /* create a copy of the main diagonal entires of the transport-matrix */
110     #pragma omp parallel for schedule(static) private(i)
111 gross 1372 for (i=0;i<n_rows;++i) {
112 gross 1370 fctp->transport_matrix_diagonal[i]=
113     fctp->transport_matrix->mainBlock->val[fctp->main_iptr[i]];
114     }
115 gross 1363
116 gross 1400 /* Paso_SystemMatrix_saveMM(fctp->flux_matrix,"flux.mm");
117     Paso_SystemMatrix_saveMM(fctp->transport_matrix,"trans.mm"); */
118 gross 1370 /*=================================================================== *
119     *
120     * calculate time step size:
121     */
122 gross 1400 dt2=LARGE_POSITIVE_FLOAT;
123 gross 1370 if (fctp->theta < 1.) {
124     #pragma omp parallel private(dt2_loc)
125     {
126     dt2_loc=LARGE_POSITIVE_FLOAT;
127     #pragma omp for schedule(static) private(i,rtmp,rtmp2)
128 gross 1372 for (i=0;i<n_rows;++i) {
129 gross 1370 rtmp=fctp->transport_matrix_diagonal[i];
130     rtmp2=fctp->lumped_mass_matrix[i];
131     if ( (rtmp<0 && rtmp2>=0.) || (rtmp>0 && rtmp2<=0.) ) {
132     dt2_loc=MIN(dt2_loc,-rtmp2/rtmp);
133     }
134     }
135 gross 1372 #pragma omp critical
136 gross 1370 {
137     dt2=MIN(dt2,dt2_loc);
138     }
139     }
140     #ifdef PASO_MPI
141     dt2_loc = dt2;
142     MPI_Allreduce(&dt2_loc, &dt2, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
143     #endif
144 gross 1400 if (dt2<LARGE_POSITIVE_FLOAT) dt2*=1./(1.-fctp->theta);
145 gross 1370 }
146 gross 1400 if (dt2 <= 0.) {
147     Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive.");
148 gross 1370 } else {
149 gross 1400 if (dt2<LARGE_POSITIVE_FLOAT) printf("minimum time step size is %e (theta = %e).\n",dt2,fctp->theta);
150 gross 1370 }
151 gross 1400 fctp->dt=dt2;
152     fctp->dt_max=dt2; /* FIXME: remove*/
153 gross 1370 fctp->valid_matrices=Paso_noError();
154 gross 1363 }
155    
156 gross 1370 /* */
157     Paso_SystemMatrix_allocBuffer(fctp->transport_matrix);
158 gross 1400 /*
159     * allocate memory:
160     *
161     */
162     b=MEMALLOC(n_rows,double);
163     Paso_checkPtr(b);
164 gross 1370 if (fctp->theta>0) {
165 gross 1400 b=MEMALLOC(n_rows,double);
166     du=MEMALLOC(n_rows,double);
167     f=MEMALLOC(n_rows,double);
168     Paso_checkPtr(du);
169     Paso_checkPtr(f);
170     }
171     if (Paso_noError()) {
172     /*
173     * Preparation:
174     *
175     */
176    
177     /* decide on substepping */
178     if (fctp->dt < LARGE_POSITIVE_FLOAT) {
179     n_substeps=ceil(dt/fctp->dt);
180     } else {
181     n_substeps=1.;
182     }
183     dt2=dt/n_substeps;
184     printf("%d time steps of size is %e (theta = %e, dt_max=%e).\n",n_substeps, dt2,fctp->theta,fctp->dt);
185    
186     /*
187     * implicit case:
188     *
189     * A=L-M/(theta*dt) (stored into transport_matrix)
190     *
191     * b= M/(theta*dt)*u_last+(1-theta)/theta)*F(u_last)
192     *
193     */
194     if (fctp->theta>0) {
195     Paso_solve_free(fctp->transport_matrix);
196     fac=1./(fctp->theta*dt2);
197     fac2=(1.-fctp->theta)/fctp->theta;
198     #pragma omp parallel for schedule(static) private(i)
199     for (i=0;i<n_rows;++i) {
200     fctp->transport_matrix->mainBlock->val[fctp->main_iptr[i]]=
201     fctp->transport_matrix_diagonal[i]-fctp->lumped_mass_matrix[i]*fac;
202     }
203     }
204     #pragma omp parallel for schedule(static) private(i)
205     for (i=0;i<n_rows;++i) {
206     u[i]=fctp->u[i];
207     /* printf("A %d : %e %e\n",i,u[i],fctp->u[i]); */
208     }
209     /*
210     * now the show can begin:
211     *
212     */
213     t=dt2;
214     n=0;
215     tolerance=options->tolerance;
216     while(n<n_substeps && Paso_noError()) {
217     printf("substep step %d at t=%e\n",n+1,t);
218     if (fctp->theta>0.) {
219     /*
220     * implicit scheme:
221     *
222     */
223     if (fctp->theta<1.) {
224     Paso_FCTransportProblem_setFlux(fctp,u,b); /* b=f(u_last) */
225     #pragma omp parallel for schedule(static) private(i)
226     for (i=0;i<n_rows;++i) {
227     b[i]=fctp->lumped_mass_matrix[i]*fac*(1.+fac2)*u[i]+fac2*b[i];
228     }
229     } else {
230     #pragma omp parallel for schedule(static) private(i)
231     for (i=0;i<n_rows;++i) {
232     b[i]=fctp->lumped_mass_matrix[i]*fac*u[i];
233     }
234     }
235     /*
236     * Enter iteration on a time step :
237     *
238     */
239     iter=0;
240     converged=FALSE;
241     while ( (!converged) && (iter<50) && Paso_noError()) {
242     printf("iteration step %d\n",iter+1);
243     Paso_FCTransportProblem_setFlux(fctp,u,f);
244     #pragma omp parallel for schedule(static) private(i)
245     for (i=0;i<n_rows;++i) {
246     f[i]+=b[i];
247     }
248     options->tolerance=1.e-3;
249     Paso_solve(fctp->transport_matrix,du,f,options);
250     /* update u and calculate norms */
251     norm_u=0.;
252     norm_du=0.;
253     #pragma omp parallel private(local_norm_u,local_norm_du)
254     {
255     local_norm_u=0.;
256     local_norm_du=0.;
257     #pragma omp for schedule(static) private(i)
258     for (i=0;i<n_rows;++i) {
259     u[i]-=du[i];
260     local_norm_u=MAX(local_norm_u,ABS(u[i]));
261     local_norm_du=MAX(local_norm_du,ABS(du[i]));
262     }
263 gross 1401 #pragma omp critical
264 gross 1400 {
265     norm_u=MAX(norm_u,local_norm_u);
266     norm_du=MAX(norm_du,local_norm_du);
267     }
268     }
269     #ifdef PASO_MPI
270     local_norm[0]=norm_u;
271     local_norm[1]=norm_du;
272     MPI_Allreduce(local_norm,norm, 2, MPI_DOUBLE, MPI_MAX, fctp->mpi_info->comm);
273     norm_u=norm[0];
274     norm_du=norm[1];
275     #endif
276     converged=(norm_du <= tolerance * norm_u);
277     iter++;
278     printf("iteration step %d: norm u and du : %e %e\n",iter,norm_u,norm_du);
279     }
280     } else {
281     /*
282     * explicit scheme:
283     *
284     */
285     #pragma omp parallel for schedule(static) private(i)
286     for (i=0;i<n_rows;++i) {
287     u[i]+=dt2*b[i]/fctp->lumped_mass_matrix[i];
288     }
289     }
290     /* and the next time step */
291     t+=dt2;
292     n++;
293 gross 1370 }
294 gross 1400 /*
295     * save last u
296     *
297     */
298     if (Paso_noError()) {
299     #pragma omp parallel for schedule(static) private(i)
300     for (i=0;i<n_rows;++i) {
301     fctp->u[i]=u[i];
302     }
303 gross 1370 }
304 gross 1400 }
305     /*
306     *
307     * clean-up
308     *
309     */
310     if (fctp->theta>0) {
311     MEMFREE(b);
312     MEMFREE(du);
313     MEMFREE(f);
314     }
315 gross 1375 }

  ViewVC Help
Powered by ViewVC 1.1.26