/[escript]/trunk/paso/src/SolverFCT_solve.c
ViewVC logotype

Contents of /trunk/paso/src/SolverFCT_solve.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1401 - (show annotations)
Fri Jan 25 04:31:18 2008 UTC (11 years, 9 months ago) by gross
File MIME type: text/plain
File size: 10956 byte(s)
rewrite antidiffusion calculation to avoid coloring for OPENMP parallelization
1 /* $Id:$ */
2
3 /*******************************************************
4 *
5 * Copyright 2007 by University of Queensland
6 *
7 * http://esscc.uq.edu.au
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 /**************************************************************/
15
16 /* Paso: Flux correction transport solver
17 *
18 * solves Mu_t=Du+Ku+q
19 *
20 * where is D is diffusive (not checked)
21 * - D is symmetric
22 * - row sums are equal to zero.
23 * and K is the advective part.
24 *
25 * u(0) >= 0
26 *
27 * intially fctp->transport_matrix defines the diffusive part
28 * but the matrix is updated by the adevctive part + artificial diffusion
29 *
30 */
31 /**************************************************************/
32
33 /* Author: l.gross@uq.edu.au */
34
35 /**************************************************************/
36
37 #include "Paso.h"
38 #include "Solver.h"
39 #include "SolverFCT.h"
40 #include "escript/blocktimer.h"
41 #ifdef _OPENMP
42 #include <omp.h>
43 #endif
44 #ifdef PASO_MPI
45 #include <mpi.h>
46 #endif
47
48 /***********************************************************************************
49
50 solve (until convergence)
51
52 (*) [M-theta*dt*L] du = M*u_last + (1-theta)*dt*F(u_last) - M*u + theta*dt F(u)
53 u <- u+du
54
55 with F(u) = L u + f_a(u) (f_a anti-diffusion)
56 and L = D + K + D_K stored in transport_matrix
57 D = Diffusive part (on input stored in transport_matrix)
58 K = flux_matrix
59 D_K = artificial diffusion introduced by K
60 f_a = anti-diffusion introduced by K and u
61 u_last=u of last time step
62
63 For the case theta==0 (explicit case) no iteration is required. One sets
64
65 M*u= M*u_last + dt*b
66
67 with b=F(u) = L u + f_a(u).
68
69
70 For the case theta>0 we solve (*) is the form
71
72 [L-M/theta*dt] du2 =M/(theta*dt)*u_last + ((1-theta)/theta)*F(u_last) - M/(theta*dt)*u + F(u)
73
74 for du2=-du which is solved as
75
76 A du2 = r= b + f(u)
77
78 with
79
80 du=-du2
81 A=L-M/(theta*dt) (stored into transport_matrix)
82 f(u) =-M/(theta*dt)*u + F(u) = - M/(theta*dt)*u + Lu + f_a(u) = f_a(u) + Au
83 F(u)=f(u)+M/(theta*dt)*u
84 b= M/(theta*dt)*u_last+(1-theta)/theta*F(u_last)=
85 = M/(theta*dt)*u_last+(1-theta)/theta*(f(u)+M/(theta*dt)*u_last)
86 = M/(theta*dt)*u_last+(1-theta)/theta*(f(u_last)+M/(theta*dt)*u_last)
87 = M*1./(theta*dt)*(1+(1-theta)/theta)*u_last+(1-theta)/theta*f(u_last)
88
89 */
90
91 void Paso_SolverFCT_solve(Paso_FCTransportProblem* fctp, double* u, double dt, double* source, Paso_Options* options) {
92
93 index_t i, j;
94 int n_substeps,n, iter;
95 double fac, fac2, *b=NULL, *f=NULL, *du=NULL;
96 double dt2=fctp->dt_max, dt2_loc, rtmp,rtmp2,t;
97 double local_norm[2],norm[2],local_norm_u,local_norm_du,norm_u,norm_du, tolerance;
98 dim_t n_rows=Paso_SystemMatrix_getTotalNumRows(fctp->flux_matrix);
99 bool_t converged;
100 if (dt<=0.) {
101 Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive.");
102 }
103 if (! fctp->valid_matrices) {
104
105 /* extract the row sum of the advective part */
106 Paso_SystemMatrix_rowSum(fctp->flux_matrix,fctp->row_sum_flux_matrix);
107 /* add the advective part + artificial diffusion to the diffusive part = transport-matrix */
108 Paso_FCTransportProblem_addAdvectivePart(fctp,1.);
109 /* create a copy of the main diagonal entires of the transport-matrix */
110 #pragma omp parallel for schedule(static) private(i)
111 for (i=0;i<n_rows;++i) {
112 fctp->transport_matrix_diagonal[i]=
113 fctp->transport_matrix->mainBlock->val[fctp->main_iptr[i]];
114 }
115
116 /* Paso_SystemMatrix_saveMM(fctp->flux_matrix,"flux.mm");
117 Paso_SystemMatrix_saveMM(fctp->transport_matrix,"trans.mm"); */
118 /*=================================================================== *
119 *
120 * calculate time step size:
121 */
122 dt2=LARGE_POSITIVE_FLOAT;
123 if (fctp->theta < 1.) {
124 #pragma omp parallel private(dt2_loc)
125 {
126 dt2_loc=LARGE_POSITIVE_FLOAT;
127 #pragma omp for schedule(static) private(i,rtmp,rtmp2)
128 for (i=0;i<n_rows;++i) {
129 rtmp=fctp->transport_matrix_diagonal[i];
130 rtmp2=fctp->lumped_mass_matrix[i];
131 if ( (rtmp<0 && rtmp2>=0.) || (rtmp>0 && rtmp2<=0.) ) {
132 dt2_loc=MIN(dt2_loc,-rtmp2/rtmp);
133 }
134 }
135 #pragma omp critical
136 {
137 dt2=MIN(dt2,dt2_loc);
138 }
139 }
140 #ifdef PASO_MPI
141 dt2_loc = dt2;
142 MPI_Allreduce(&dt2_loc, &dt2, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
143 #endif
144 if (dt2<LARGE_POSITIVE_FLOAT) dt2*=1./(1.-fctp->theta);
145 }
146 if (dt2 <= 0.) {
147 Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive.");
148 } else {
149 if (dt2<LARGE_POSITIVE_FLOAT) printf("minimum time step size is %e (theta = %e).\n",dt2,fctp->theta);
150 }
151 fctp->dt=dt2;
152 fctp->dt_max=dt2; /* FIXME: remove*/
153 fctp->valid_matrices=Paso_noError();
154 }
155
156 /* */
157 Paso_SystemMatrix_allocBuffer(fctp->transport_matrix);
158 /*
159 * allocate memory:
160 *
161 */
162 b=MEMALLOC(n_rows,double);
163 Paso_checkPtr(b);
164 if (fctp->theta>0) {
165 b=MEMALLOC(n_rows,double);
166 du=MEMALLOC(n_rows,double);
167 f=MEMALLOC(n_rows,double);
168 Paso_checkPtr(du);
169 Paso_checkPtr(f);
170 }
171 if (Paso_noError()) {
172 /*
173 * Preparation:
174 *
175 */
176
177 /* decide on substepping */
178 if (fctp->dt < LARGE_POSITIVE_FLOAT) {
179 n_substeps=ceil(dt/fctp->dt);
180 } else {
181 n_substeps=1.;
182 }
183 dt2=dt/n_substeps;
184 printf("%d time steps of size is %e (theta = %e, dt_max=%e).\n",n_substeps, dt2,fctp->theta,fctp->dt);
185
186 /*
187 * implicit case:
188 *
189 * A=L-M/(theta*dt) (stored into transport_matrix)
190 *
191 * b= M/(theta*dt)*u_last+(1-theta)/theta)*F(u_last)
192 *
193 */
194 if (fctp->theta>0) {
195 Paso_solve_free(fctp->transport_matrix);
196 fac=1./(fctp->theta*dt2);
197 fac2=(1.-fctp->theta)/fctp->theta;
198 #pragma omp parallel for schedule(static) private(i)
199 for (i=0;i<n_rows;++i) {
200 fctp->transport_matrix->mainBlock->val[fctp->main_iptr[i]]=
201 fctp->transport_matrix_diagonal[i]-fctp->lumped_mass_matrix[i]*fac;
202 }
203 }
204 #pragma omp parallel for schedule(static) private(i)
205 for (i=0;i<n_rows;++i) {
206 u[i]=fctp->u[i];
207 /* printf("A %d : %e %e\n",i,u[i],fctp->u[i]); */
208 }
209 /*
210 * now the show can begin:
211 *
212 */
213 t=dt2;
214 n=0;
215 tolerance=options->tolerance;
216 while(n<n_substeps && Paso_noError()) {
217 printf("substep step %d at t=%e\n",n+1,t);
218 if (fctp->theta>0.) {
219 /*
220 * implicit scheme:
221 *
222 */
223 if (fctp->theta<1.) {
224 Paso_FCTransportProblem_setFlux(fctp,u,b); /* b=f(u_last) */
225 #pragma omp parallel for schedule(static) private(i)
226 for (i=0;i<n_rows;++i) {
227 b[i]=fctp->lumped_mass_matrix[i]*fac*(1.+fac2)*u[i]+fac2*b[i];
228 }
229 } else {
230 #pragma omp parallel for schedule(static) private(i)
231 for (i=0;i<n_rows;++i) {
232 b[i]=fctp->lumped_mass_matrix[i]*fac*u[i];
233 }
234 }
235 /*
236 * Enter iteration on a time step :
237 *
238 */
239 iter=0;
240 converged=FALSE;
241 while ( (!converged) && (iter<50) && Paso_noError()) {
242 printf("iteration step %d\n",iter+1);
243 Paso_FCTransportProblem_setFlux(fctp,u,f);
244 #pragma omp parallel for schedule(static) private(i)
245 for (i=0;i<n_rows;++i) {
246 f[i]+=b[i];
247 }
248 options->tolerance=1.e-3;
249 Paso_solve(fctp->transport_matrix,du,f,options);
250 /* update u and calculate norms */
251 norm_u=0.;
252 norm_du=0.;
253 #pragma omp parallel private(local_norm_u,local_norm_du)
254 {
255 local_norm_u=0.;
256 local_norm_du=0.;
257 #pragma omp for schedule(static) private(i)
258 for (i=0;i<n_rows;++i) {
259 u[i]-=du[i];
260 local_norm_u=MAX(local_norm_u,ABS(u[i]));
261 local_norm_du=MAX(local_norm_du,ABS(du[i]));
262 }
263 #pragma omp critical
264 {
265 norm_u=MAX(norm_u,local_norm_u);
266 norm_du=MAX(norm_du,local_norm_du);
267 }
268 }
269 #ifdef PASO_MPI
270 local_norm[0]=norm_u;
271 local_norm[1]=norm_du;
272 MPI_Allreduce(local_norm,norm, 2, MPI_DOUBLE, MPI_MAX, fctp->mpi_info->comm);
273 norm_u=norm[0];
274 norm_du=norm[1];
275 #endif
276 converged=(norm_du <= tolerance * norm_u);
277 iter++;
278 printf("iteration step %d: norm u and du : %e %e\n",iter,norm_u,norm_du);
279 }
280 } else {
281 /*
282 * explicit scheme:
283 *
284 */
285 #pragma omp parallel for schedule(static) private(i)
286 for (i=0;i<n_rows;++i) {
287 u[i]+=dt2*b[i]/fctp->lumped_mass_matrix[i];
288 }
289 }
290 /* and the next time step */
291 t+=dt2;
292 n++;
293 }
294 /*
295 * save last u
296 *
297 */
298 if (Paso_noError()) {
299 #pragma omp parallel for schedule(static) private(i)
300 for (i=0;i<n_rows;++i) {
301 fctp->u[i]=u[i];
302 }
303 }
304 }
305 /*
306 *
307 * clean-up
308 *
309 */
310 if (fctp->theta>0) {
311 MEMFREE(b);
312 MEMFREE(du);
313 MEMFREE(f);
314 }
315 }

  ViewVC Help
Powered by ViewVC 1.1.26