/[escript]/trunk/paso/src/SolverFCT_solve.c
ViewVC logotype

Contents of /trunk/paso/src/SolverFCT_solve.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1670 - (show annotations)
Thu Jul 24 04:30:43 2008 UTC (11 years, 3 months ago) by ksteube
File MIME type: text/plain
File size: 15832 byte(s)
Fix variable declaration to compile under MPI

1 /* $Id:$ */
2
3 /*******************************************************
4 *
5 * Copyright 2007,2008 by University of Queensland
6 *
7 * http://esscc.uq.edu_m.au
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 /**************************************************************/
15
16 /* Paso: Flux correction transport solver
17 *
18 * solves Mu_t=Ku+q
19 * u(0) >=0
20 *
21 * Warning: the program assums sum_{j} k_{ij}=0!!!
22 *
23 */
24 /**************************************************************/
25
26 /* Author: l.gross@uq.edu_m.au */
27
28 /**************************************************************/
29
30 #include "Paso.h"
31 #include "Solver.h"
32 #include "SolverFCT.h"
33 #include "escript/blocktimer.h"
34 #ifdef _OPENMP
35 #include <omp.h>
36 #endif
37 #ifdef PASO_MPI
38 #include <mpi.h>
39 #endif
40
41 double Paso_FCTransportProblem_getSafeTimeStepSize(Paso_FCTransportProblem* fctp)
42 {
43 dim_t i, n_rows;
44 double dt_max, dt_max_loc;
45 register double rtmp1,rtmp2;
46 n_rows=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
47 if (! fctp->valid_matrices) {
48 fctp->dt_max=LARGE_POSITIVE_FLOAT;
49 /* extract the row sum of the advective part */
50 Paso_SystemMatrix_rowSum(fctp->mass_matrix,fctp->lumped_mass_matrix);
51
52 /* set low order transport operator */
53 Paso_FCTransportProblem_setLowOrderOperator(fctp);
54 /*
55 * calculate time step size:
56 */
57 dt_max=LARGE_POSITIVE_FLOAT;
58 if (fctp->theta < 1.) {
59 #pragma omp parallel private(dt_max_loc)
60 {
61 dt_max_loc=LARGE_POSITIVE_FLOAT;
62 #pragma omp for schedule(static) private(i,rtmp1,rtmp2)
63 for (i=0;i<n_rows;++i) {
64 rtmp1=fctp->main_diagonal_low_order_transport_matrix[i];
65 rtmp2=fctp->lumped_mass_matrix[i];
66 if ( (rtmp1<0 && rtmp2>0.) || (rtmp1>0 && rtmp2<0) ) {
67 dt_max_loc=MIN(dt_max_loc,-rtmp2/rtmp1);
68 }
69 }
70 #pragma omp critical
71 {
72 dt_max=MIN(dt_max,dt_max_loc);
73 }
74 }
75 #ifdef PASO_MPI
76 dt_max_loc = dt_max;
77 MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
78 #endif
79 if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=1./(1.-fctp->theta);
80 }
81 if (dt_max <= 0.) {
82 Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive.");
83 } else {
84 if (dt_max<LARGE_POSITIVE_FLOAT) printf("maximum time step size is %e (theta = %e).\n",dt_max,fctp->theta);
85 }
86 fctp->dt_max=dt_max;
87 fctp->valid_matrices=Paso_noError();
88 }
89 return fctp->dt_max;
90 }
91
92
93
94
95 void Paso_SolverFCT_solve(Paso_FCTransportProblem* fctp, double* u, double dt, double* source, Paso_Options* options) {
96 index_t i;
97 long n_substeps,n, m;
98 double dt_max, omega, dt2,t;
99 double local_norm_u,local_norm_du,norm_u,norm_du, tolerance;
100 register double rtmp1,rtmp2,rtmp3,rtmp4, rtmp;
101 double *b_n=NULL, *sourceP=NULL, *sourceN=NULL, *uTilde_n=NULL, *QN_n=NULL, *QP_n=NULL, *RN_m=NULL, *RP_m=NULL, *z_m=NULL, *du_m=NULL,*u_m=NULL;
102 Paso_Coupler* QN_n_coupler=NULL, *QP_n_coupler=NULL, *uTilde_n_coupler=NULL, *RN_m_coupler=NULL, *RP_m_coupler=NULL, *u_m_coupler=NULL;
103 Paso_SystemMatrix *flux_matrix=NULL;
104 dim_t n_rows=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
105 bool_t converged;
106 dim_t max_m=500;
107 double inner_tol=1.e-2;
108 dim_t blockSize=Paso_FCTransportProblem_getBlockSize(fctp);
109 if (dt<=0.) {
110 Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive.");
111 }
112 dt_max=Paso_FCTransportProblem_getSafeTimeStepSize(fctp);
113 /*
114 * allocate memory
115 *
116 */
117 u_m=TMPMEMALLOC(n_rows,double);
118 Paso_checkPtr(u_m);
119 b_n=TMPMEMALLOC(n_rows,double);
120 Paso_checkPtr(b_n);
121 sourceP=TMPMEMALLOC(n_rows,double);
122 Paso_checkPtr(sourceP);
123 sourceN=TMPMEMALLOC(n_rows,double);
124 Paso_checkPtr(sourceN);
125 uTilde_n=TMPMEMALLOC(n_rows,double);
126 Paso_checkPtr(uTilde_n);
127 QN_n=TMPMEMALLOC(n_rows,double);
128 Paso_checkPtr(QN_n);
129 QP_n=TMPMEMALLOC(n_rows,double);
130 Paso_checkPtr(QP_n);
131 RN_m=TMPMEMALLOC(n_rows,double);
132 Paso_checkPtr(RN_m);
133 RP_m=TMPMEMALLOC(n_rows,double);
134 Paso_checkPtr(RP_m);
135 z_m=TMPMEMALLOC(n_rows,double);
136 Paso_checkPtr(z_m);
137 du_m=TMPMEMALLOC(n_rows,double);
138 Paso_checkPtr(du_m);
139 QN_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
140 QP_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
141 RN_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
142 RP_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
143 uTilde_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
144 u_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
145 flux_matrix=Paso_SystemMatrix_alloc(fctp->transport_matrix->type,
146 fctp->transport_matrix->pattern,
147 fctp->transport_matrix->row_block_size,
148 fctp->transport_matrix->col_block_size);
149 if (Paso_noError()) {
150 #ifdef PASO_MPI
151 double local_norm[2], norm[2];
152 #endif
153 /*
154 * Preparation:
155 *
156 */
157
158 /* decide on substepping */
159 if (fctp->dt_max < LARGE_POSITIVE_FLOAT) {
160 n_substeps=(long)ceil(dt/dt_max);
161 } else {
162 n_substeps=1;
163 }
164 dt2=dt/n_substeps;
165 printf("%ld time steps of size is %e (theta = %e, dt_max=%e).\n",n_substeps, dt2,fctp->theta, dt_max);
166 /*
167 * seperate source into positive and negative part:
168 */
169 #pragma omp parallel for private(i,rtmp)
170 for (i = 0; i < n_rows; ++i) {
171 rtmp=source[i];
172 if (rtmp <0) {
173 sourceN[i]=-rtmp;
174 sourceP[i]=0;
175 } else {
176 sourceN[i]= 0;
177 sourceP[i]= rtmp;
178 }
179 }
180 u_m_coupler->data=u_m;
181 /* for (i = 0; i < n_rows; ++i) printf("%d : %e \n",i,source[i],sourceP[i],sourceN[i]) */
182 /*
183 * let the show begin!!!!
184 *
185 */
186 t=dt2;
187 n=0;
188 tolerance=options->tolerance;
189 while(n<n_substeps && Paso_noError()) {
190
191 printf("substep step %ld at t=%e\n",n+1,t);
192 /*
193 * b^n[i]=m u^n[i] + dt2*(1-theta) sum_{j <> i} l_{ij}*(u^n[j]-u^n[i]) + dt2*sourceP[i]
194 *
195 * note that iteration_matrix stores the negative values of the
196 * low order transport matrix l therefore a=-dt2*(1-fctp->theta) is used.
197 *
198 */
199 Paso_SolverFCT_setMuPaLuPbQ(b_n,fctp->lumped_mass_matrix,fctp->u_coupler,
200 -dt2*(1-fctp->theta),fctp->iteration_matrix,dt2,sourceP);
201 /*
202 * uTilde_n[i]=b[i]/m[i]
203 *
204 * fctp->iteration_matrix[i,i]=m[i]/(dt2 theta) + \frac{1}{\theta} \frac{q^-[i]}-l[i,i]
205 *
206 */
207 if (fctp->theta > 0) {
208 Paso_solve_free(fctp->iteration_matrix);
209 omega=1./(dt2*fctp->theta);
210 rtmp2=dt2*omega;
211 #pragma omp parallel for private(i,rtmp,rtmp3,rtmp4)
212 for (i = 0; i < n_rows; ++i) {
213 rtmp=fctp->lumped_mass_matrix[i];
214 if (ABS(rtmp)>0.) {
215 rtmp3=b_n[i]/rtmp;
216 } else {
217 rtmp3=fctp->u[i];
218 }
219 rtmp4=rtmp*omega-fctp->main_diagonal_low_order_transport_matrix[i];
220 if (ABS(rtmp3)>0) rtmp4+=sourceN[i]*rtmp2/rtmp3;
221 fctp->iteration_matrix->mainBlock->val[fctp->main_iptr[i]]=rtmp4;
222 uTilde_n[i]=rtmp3;
223 }
224 } else {
225 #pragma omp parallel for private(i,rtmp,rtmp3)
226 for (i = 0; i < n_rows; ++i) {
227 rtmp=fctp->lumped_mass_matrix[i];
228 if (ABS(rtmp)>0.) {
229 rtmp3=b_n[i]/rtmp;
230 } else {
231 rtmp3=fctp->u[i];
232 }
233 uTilde_n[i]=rtmp3;
234 }
235 omega=1.;
236 /* no update of iteration_matrix required! */
237 } /* end (fctp->theta > 0) */
238
239 /* distribute uTilde_n: */
240 Paso_Coupler_startCollect(uTilde_n_coupler,uTilde_n);
241 Paso_Coupler_finishCollect(uTilde_n_coupler);
242 /*
243 * calculate QP_n[i] max_{j} (\tilde{u}[j]- \tilde{u}[i] )
244 * QN_n[i] min_{j} (\tilde{u}[j]- \tilde{u}[i] )
245 *
246 */
247 Paso_SolverFCT_setQs(uTilde_n_coupler,QN_n,QP_n,fctp->iteration_matrix);
248 Paso_Coupler_startCollect(QN_n_coupler,QN_n);
249 Paso_Coupler_startCollect(QP_n_coupler,QP_n);
250 Paso_Coupler_finishCollect(QN_n_coupler);
251 Paso_Coupler_finishCollect(QP_n_coupler);
252 /*
253 * now we enter the iteration on a time-step:
254 *
255 */
256 Paso_Coupler_copyAll(fctp->u_coupler, u_m_coupler);
257 /* REPLACE BY JACOBEAN FREE NEWTON */
258 m=0;
259 converged=FALSE;
260 while ( (!converged) && (m<max_m) && Paso_noError()) {
261 printf("iteration step %ld\n",m+1);
262 /*
263 * set the ant diffusion fluxes:
264 *
265 */
266 Paso_FCTransportProblem_setAntiDiffusionFlux(dt2,fctp,flux_matrix,u_m_coupler);
267 /*
268 * apply pre flux-correction: f_{ij}:=0 if f_{ij}*(\tilde{u}[i]- \tilde{u}[j])<=0
269 *
270 */
271 Paso_FCTransportProblem_applyPreAntiDiffusionCorrection(flux_matrix,uTilde_n_coupler);
272 /*
273 * set flux limiters RN_m,RP_m
274 *
275 */
276 Paso_FCTransportProblem_setRs(flux_matrix,fctp->lumped_mass_matrix,QN_n_coupler,QP_n_coupler,RN_m,RP_m);
277 Paso_Coupler_startCollect(RN_m_coupler,RN_m);
278 Paso_Coupler_startCollect(RP_m_coupler,RP_m);
279 /*
280 * z_m[i]=b_n[i] - (m_i*u[i] - dt2*theta*sum_{j<>i} l_{ij} (u[j]-u[i]) + dt2 q^-[i])
281 *
282 * note that iteration_matrix stores the negative values of the
283 * low order transport matrix l therefore a=dt2*fctp->theta is used.
284 */
285
286 Paso_SolverFCT_setMuPaLuPbQ(z_m,fctp->lumped_mass_matrix, u_m_coupler,
287 dt2*fctp->theta,fctp->iteration_matrix,dt2,sourceN);
288 /* for (i = 0; i < n_rows; ++i) {
289 * printf("%d: %e %e\n",i,z_m[i], b_n[i]);
290 * }
291 */
292 #pragma omp parallel for private(i)
293 for (i = 0; i < n_rows; ++i) z_m[i]=b_n[i]-z_m[i];
294
295 Paso_Coupler_finishCollect(RN_m_coupler);
296 Paso_Coupler_finishCollect(RP_m_coupler);
297 /* add corrected fluxes into z_m */
298 Paso_FCTransportProblem_addCorrectedFluxes(z_m,flux_matrix,RN_m_coupler,RP_m_coupler);
299 /*
300 * now we solve the linear system to get the correction dt2:
301 *
302 */
303 if (fctp->theta > 0) {
304 /* set the right hand side of the linear system: */
305 options->tolerance=inner_tol;
306 options->verbose=TRUE;
307 Paso_solve(fctp->iteration_matrix,du_m,z_m,options);
308 /* TODO: check errors ! */
309 } else {
310 #pragma omp parallel for private(i,rtmp,rtmp1)
311 for (i = 0; i < n_rows; ++i) {
312 rtmp=fctp->lumped_mass_matrix[i];
313 if (ABS(rtmp)>0.) {
314 rtmp1=z_m[i]/rtmp;
315 } else {
316 rtmp1=0;
317 }
318 du_m[i]=rtmp1;
319 }
320 }
321 /*
322 * update u and calculate norm of du_m and the new u:
323 *
324 */
325 norm_u=0.;
326 norm_du=0.;
327 #pragma omp parallel private(local_norm_u,local_norm_du)
328 {
329 local_norm_u=0.;
330 local_norm_du=0.;
331 #pragma omp for schedule(static) private(i)
332 for (i=0;i<n_rows;++i) {
333 u_m[i]+=omega*du_m[i];
334 local_norm_u=MAX(local_norm_u,ABS(u_m[i]));
335 local_norm_du=MAX(local_norm_du,ABS(du_m[i]));
336 }
337 #pragma omp critical
338 {
339 norm_u=MAX(norm_u,local_norm_u);
340 norm_du=MAX(norm_du,local_norm_du);
341 }
342 }
343 Paso_Coupler_startCollect(u_m_coupler,u_m);
344 #ifdef PASO_MPI
345 local_norm[0]=norm_u;
346 local_norm[1]=norm_du;
347 MPI_Allreduce(local_norm,norm, 2, MPI_DOUBLE, MPI_MAX, fctp->mpi_info->comm);
348 norm_u=norm[0];
349 norm_du=norm[1];
350 #endif
351 norm_du*=omega;
352 converged=(norm_du <= tolerance * norm_u);
353 m++;
354 printf("iteration step %ld: norm u and du_m : %e %e\n",m,norm_u,norm_du);
355 /* TODO: check if du_m has been redu_mced */
356 Paso_Coupler_finishCollect(u_m_coupler);
357 } /* end of inner iteration */
358 SWAP(fctp->u,u_m,double*);
359 SWAP(fctp->u_coupler,u_m_coupler,Paso_Coupler*);
360 n++;
361 } /* end of time integration */
362 options->tolerance=tolerance;
363 #pragma omp parallel for schedule(static) private(i)
364 for (i=0;i<n_rows;++i) u[i]=fctp->u[i];
365 }
366 /*
367 * clean-up:
368 *
369 */
370 TMPMEMFREE(b_n);
371 Paso_SystemMatrix_free(flux_matrix);
372 TMPMEMFREE(sourceP);
373 TMPMEMFREE(sourceN);
374 TMPMEMFREE(uTilde_n);
375 TMPMEMFREE(QN_n);
376 TMPMEMFREE(QP_n);
377 TMPMEMFREE(RN_m);
378 TMPMEMFREE(RP_m);
379 TMPMEMFREE(z_m);
380 TMPMEMFREE(du_m);
381 TMPMEMFREE(u_m);
382 Paso_Coupler_free(QN_n_coupler);
383 Paso_Coupler_free(QP_n_coupler);
384 Paso_Coupler_free(RN_m_coupler);
385 Paso_Coupler_free(RP_m_coupler);
386 Paso_Coupler_free(uTilde_n_coupler);
387 Paso_Coupler_free(u_m_coupler);
388 }

  ViewVC Help
Powered by ViewVC 1.1.26