/[escript]/trunk/paso/src/SolverFCT_solve.c
ViewVC logotype

Contents of /trunk/paso/src/SolverFCT_solve.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1407 - (show annotations)
Mon Feb 4 06:45:48 2008 UTC (11 years, 5 months ago) by gross
File MIME type: text/plain
File size: 14676 byte(s)
new upwinding algorithm (still fails)
1 /* $Id:$ */
2
3 /*******************************************************
4 *
5 * Copyright 2007,2008 by University of Queensland
6 *
7 * http://esscc.uq.edu_m.au
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 /**************************************************************/
15
16 /* Paso: Flux correction transport solver
17 *
18 * solves Mu_t=Ku+q
19 * u(0) >= u_min
20 *
21 * Warning: the program assums sum_{j} k_{ij}=0!!!
22 *
23 */
24 /**************************************************************/
25
26 /* Author: l.gross@uq.edu_m.au */
27
28 /**************************************************************/
29
30 #include "Paso.h"
31 #include "Solver.h"
32 #include "SolverFCT.h"
33 #include "escript/blocktimer.h"
34 #ifdef _OPENMP
35 #include <omp.h>
36 #endif
37 #ifdef PASO_MPI
38 #include <mpi.h>
39 #endif
40
41 double Paso_FCTransportProblem_getSafeTimeStepSize(Paso_FCTransportProblem* fctp)
42 {
43 dim_t i, n_rows;
44 double dt_max, dt_max_loc;
45 register double rtmp1,rtmp2;
46 n_rows=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
47 if (! fctp->valid_matrices) {
48 fctp->dt_max=LARGE_POSITIVE_FLOAT;
49
50
51 /* extract the row sum of the advective part */
52 Paso_SystemMatrix_rowSum(fctp->mass_matrix,fctp->lumped_mass_matrix);
53
54 /* set low order transport operator */
55 Paso_FCTransportProblem_setLowOrderOperator(fctp);
56 /*
57 * calculate time step size:
58 */
59 dt_max=LARGE_POSITIVE_FLOAT;
60 if (fctp->theta < 1.) {
61 #pragma omp parallel private(dt_max_loc)
62 {
63 dt_max_loc=LARGE_POSITIVE_FLOAT;
64 #pragma omp for schedule(static) private(i,rtmp1,rtmp2)
65 for (i=0;i<n_rows;++i) {
66 rtmp1=fctp->main_diagonal_low_order_transport_matrix[i];
67 rtmp2=fctp->lumped_mass_matrix[i];
68 if ( (rtmp1<0 && rtmp2>=0.) || (rtmp1>0 && rtmp2<=0.) ) {
69 dt_max_loc=MIN(dt_max_loc,-rtmp2/rtmp1);
70 }
71 }
72 #pragma omp critical
73 {
74 dt_max=MIN(dt_max,dt_max_loc);
75 }
76 }
77 #ifdef PASO_MPI
78 dt_max_loc = dt_max;
79 MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
80 #endif
81 if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=1./(1.-fctp->theta);
82 }
83 if (dt_max <= 0.) {
84 Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive.");
85 } else {
86 if (dt_max<LARGE_POSITIVE_FLOAT) printf("maximum time step size is %e (theta = %e).\n",dt_max,fctp->theta);
87 }
88 fctp->dt_max=dt_max;
89 fctp->valid_matrices=Paso_noError();
90 }
91 return fctp->dt_max;
92 }
93
94
95
96
97 void Paso_SolverFCT_solve(Paso_FCTransportProblem* fctp, double* u, double dt, double* source, Paso_Options* options) {
98
99 index_t i, j;
100 int n_substeps,n, m;
101 double dt_max, omega, dt2,t;
102 double local_norm[2],norm[2],local_norm_u,local_norm_du,norm_u,norm_du, tolerance;
103 register double rtmp1,rtmp2,rtmp3,rtmp4, rtmp;
104 double *b_n=NULL, *sourceP=NULL, *sourceN=NULL, *uTilde_n=NULL, *QN_n=NULL, *QP_n=NULL, *RN_m=NULL, *RP_m=NULL, *z_m=NULL, *du_m=NULL;
105 Paso_SystemMatrix *flux_matrix=NULL;
106 dim_t n_rows=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
107 bool_t converged;
108 if (dt<=0.) {
109 Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive.");
110 }
111 dt_max=Paso_FCTransportProblem_getSafeTimeStepSize(fctp);
112 /*
113 * allocate memory
114 *
115 */
116 Paso_SystemMatrix_allocBuffer(fctp->iteration_matrix);
117 b_n=TMPMEMALLOC(n_rows,double);
118 Paso_checkPtr(b_n);
119 sourceP=TMPMEMALLOC(n_rows,double);
120 Paso_checkPtr(sourceP);
121 sourceN=TMPMEMALLOC(n_rows,double);
122 Paso_checkPtr(sourceN);
123 uTilde_n=TMPMEMALLOC(n_rows,double);
124 Paso_checkPtr(uTilde_n);
125 QN_n=TMPMEMALLOC(n_rows,double);
126 Paso_checkPtr(QN_n);
127 QP_n=TMPMEMALLOC(n_rows,double);
128 Paso_checkPtr(QP_n);
129 RN_m=TMPMEMALLOC(n_rows,double);
130 Paso_checkPtr(RN_m);
131 RP_m=TMPMEMALLOC(n_rows,double);
132 Paso_checkPtr(RP_m);
133 z_m=TMPMEMALLOC(n_rows,double);
134 Paso_checkPtr(z_m);
135 du_m=TMPMEMALLOC(n_rows,double);
136 Paso_checkPtr(du_m);
137 flux_matrix=Paso_SystemMatrix_alloc(fctp->transport_matrix->type,
138 fctp->transport_matrix->pattern,
139 fctp->transport_matrix->row_block_size,
140 fctp->transport_matrix->col_block_size);
141 if (Paso_noError()) {
142 Paso_SystemMatrix_allocBuffer(flux_matrix);
143 Paso_SystemMatrix_allocBuffer(fctp->iteration_matrix);
144 /*
145 * Preparation:
146 *
147 */
148
149 /* decide on substepping */
150 if (fctp->dt_max < LARGE_POSITIVE_FLOAT) {
151 n_substeps=ceil(dt/dt_max);
152 } else {
153 n_substeps=1;
154 }
155 dt2=dt/n_substeps;
156 printf("%d time steps of size is %e (theta = %e, dt_max=%e).\n",n_substeps, dt2,fctp->theta, dt_max);
157
158 /*
159 * seperate source into positive and negative part:
160 */
161 printf("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\n");
162 #pragma omp parallel for private(i,rtmp)
163 for (i = 0; i < n_rows; ++i) {
164 rtmp=source[i];
165 if (rtmp <0) {
166 sourceP[i]=-rtmp;
167 } else {
168 sourceN[i]= rtmp;
169 }
170 }
171 printf("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\n");
172 /*
173 * now the show can begin:
174 *
175 */
176 #pragma omp parallel for schedule(static) private(i)
177 for (i=0;i<n_rows;++i) u[i]=fctp->u[i]-fctp->u_min;
178 printf("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\n");
179
180 t=dt2;
181 n=0;
182 tolerance=options->tolerance;
183 while(n<n_substeps && Paso_noError()) {
184 printf("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\n");
185 printf("substep step %d at t=%e\n",n+1,t);
186 /*
187 * b^n[i]=m u^n[i] + dt*(1-theta) sum_{j <> i} l_{ij}*(u^n[j]-u^n[i]) + dt*sourceP[i]
188 *
189 * note that iteration_matrix stores the negative values of the
190 * low order transport matrix l therefore a=-dt*(1-fctp->theta) is used.
191 *
192 */
193 Paso_SolverFCT_setMuPaLuPbQ(b_n,fctp->lumped_mass_matrix,u,
194 -dt*(1-fctp->theta),fctp->iteration_matrix,dt,sourceP);
195 /*
196 * uTilde_n[i]=b[i]/m[i]
197 *
198 * a[i,i]=m[i]/(dt theta) + \frac{1}{\theta} \frac{q^-[i]}-l[i,i]
199 *
200 */
201 printf("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\n");
202 if (fctp->theta > 0) {
203 Paso_solve_free(fctp->iteration_matrix);
204 rtmp1=1./(dt*fctp->theta);
205 rtmp2=1./fctp->theta;
206 #pragma omp parallel for private(i,rtmp,rtmp3,rtmp4)
207 for (i = 0; i < n_rows; ++i) {
208 rtmp=fctp->lumped_mass_matrix[i];
209 if (ABS(rtmp)>0.) {
210 rtmp3=b_n[i]/rtmp;
211 } else {
212 rtmp3=u[i];
213 }
214 rtmp4=rtmp*rtmp1-fctp->main_diagonal_low_order_transport_matrix[i];
215 if (ABS(rtmp3)>0) rtmp4+=sourceN[i]*rtmp2/rtmp3;
216 fctp->iteration_matrix->mainBlock->val[fctp->main_iptr[i]]=rtmp4;
217 uTilde_n[i]=rtmp3;
218 }
219 } else {
220 #pragma omp parallel for private(i,rtmp,rtmp3)
221 for (i = 0; i < n_rows; ++i) {
222 rtmp=fctp->lumped_mass_matrix[i];
223 if (ABS(rtmp)>0.) {
224 rtmp3=b_n[i]/rtmp;
225 } else {
226 rtmp3=u[i];
227 }
228 uTilde_n[i]=rtmp3;
229 }
230 /* no update of iteration_matrix retquired! */
231 } /* end (fctp->theta > 0) */
232 /*
233 * calculate QP_n[i] max_{j} (\tilde{u}[j]- \tilde{u}[i] )
234 * QN_n[i] min_{j} (\tilde{u}[j]- \tilde{u}[i] )
235 *
236 */
237 Paso_SolverFCT_setQs(uTilde_n,QN_n,QP_n,fctp->iteration_matrix);
238 /*
239 * now we enter the mation on a time-step:
240 *
241 */
242 m=0;
243 converged=FALSE;
244 while ( (!converged) && (m<50) && Paso_noError()) {
245 printf("iteration step %d\n",m+1);
246 /*
247 * set the ant diffusion fluxes:
248 *
249 * initially we set f_{ij} = - dt d_{ij} (u[j]-u[i])
250 * and then f_{ij} += omega (m_{ij} - dt (1-theta) d_{ij}) (du[j]-du[i])
251 */
252 if (m==0) {
253 Paso_SystemMatrix_setValues(flux_matrix,0.);
254 Paso_FCTransportProblem_updateAntiDiffusionFlux(fctp,flux_matrix,0.,-dt,u);
255 } else {
256 Paso_FCTransportProblem_updateAntiDiffusionFlux(fctp,flux_matrix,
257 omega,-omega*dt*(1.-fctp->theta),du_m);
258 }
259 /*
260 * apply pre flux-correction: f_{ij}:=0 if f_{ij}*(\tilde{u}[i]- \tilde{u}[j])<=0
261 *
262 * this is not entirely correct!!!!!
263 *
264 */
265 Paso_FCTransportProblem_applyPreAntiDiffusionCorrection(flux_matrix,uTilde_n);
266 /*
267 * set flux limms RN_m,RP_m
268 *
269 */
270 Paso_FCTransportProblem_setRs(flux_matrix,fctp->lumped_mass_matrix,QN_n,QP_n,RN_m,RP_m);
271 /*
272 * z_m[i]=m_i*u[i] - dt*theta*sum_{j<>i} l_{ij} (u[j]-u[i]) + dt q^-[i]
273 *
274 * note that iteration_matrix stores the negative values of the
275 * low order transport matrix l therefore a=dt*fctp->theta is used.
276 */
277 Paso_SolverFCT_setMuPaLuPbQ(z_m,fctp->lumped_mass_matrix,u,
278 dt*fctp->theta,fctp->iteration_matrix,dt,sourceN);
279
280 /* add corrected fluxes into z_m */
281 Paso_FCTransportProblem_addCorrectedFluxes(z_m,flux_matrix,RN_m,RP_m);
282 /*
283 * now we solve the linear system to get the correction dt:
284 *
285 */
286 if (fctp->theta > 0) {
287 /* set the right hand side of the linear system: */
288 #pragma omp parallel for private(i)
289 for (i = 0; i < n_rows; ++i) {
290 z_m[i]=b_n[i]-z_m[i];
291 }
292 options->tolerance=1.e-3;
293 Paso_solve(fctp->iteration_matrix,du_m,z_m,options);
294 /* TODO: check errors ! */
295 omega=dt*fctp->theta;
296 } else {
297 #pragma omp parallel for private(i,rtmp,rtmp1)
298 for (i = 0; i < n_rows; ++i) {
299 rtmp=fctp->lumped_mass_matrix[i];
300 if (ABS(rtmp)>0.) {
301 rtmp1=(b_n[i]-z_m[i])/rtmp;
302 } else {
303 rtmp1=0;
304 }
305 du_m[i]=rtmp1;
306 }
307 omega=1.;
308 }
309 /*
310 * update u and calculate norm of du_m and the new u:
311 *
312 */
313 norm_u=0.;
314 norm_du=0.;
315 #pragma omp parallel private(local_norm_u,local_norm_du)
316 {
317 local_norm_u=0.;
318 local_norm_du=0.;
319 #pragma omp for schedule(static) private(i)
320 for (i=0;i<n_rows;++i) {
321 u[i]+=omega*du_m[i];
322 local_norm_u=MAX(local_norm_u,ABS(u[i]));
323 local_norm_du=MAX(local_norm_du,ABS(du_m[i]));
324 }
325 #pragma omp critical
326 {
327 norm_u=MAX(norm_u,local_norm_u);
328 norm_du=MAX(norm_du,local_norm_du);
329 }
330 }
331 #ifdef PASO_MPI
332 local_norm[0]=norm_u;
333 local_norm[1]=norm_du;
334 MPI_Allredu_mce(local_norm,norm, 2, MPI_DOUBLE, MPI_MAX, fctp->mpi_info->comm);
335 norm_u=norm[0];
336 norm_du=norm[1];
337 #endif
338 norm_du*=omega;
339 converged=(norm_du <= tolerance * norm_u);
340 m++;
341 printf("iteration step %d: norm u and du_m : %e %e\n",m,norm_u,norm_du);
342 /* TODO: check if du_m has been redu_mced */
343 } /* end of inner mation */
344 } /* end of time integration */
345 #pragma omp parallel for schedule(static) private(i)
346 for (i=0;i<n_rows;++i) fctp->u[i]=u[i]+fctp->u_min;
347 /* TODO: update u_min */
348
349 }
350 /*
351 * clean-up:
352 *
353 */
354 TMPMEMFREE(b_n);
355 Paso_SystemMatrix_free(flux_matrix);
356 TMPMEMFREE(sourceP);
357 TMPMEMFREE(sourceN);
358 TMPMEMFREE(uTilde_n);
359 TMPMEMFREE(QN_n);
360 TMPMEMFREE(QP_n);
361 TMPMEMFREE(RN_m);
362 TMPMEMFREE(RP_m);
363 TMPMEMFREE(z_m);
364 TMPMEMFREE(du_m);
365 }

  ViewVC Help
Powered by ViewVC 1.1.26