/[escript]/branches/stage3.1/paso/src/SolverFCT_solve.c
ViewVC logotype

Contents of /branches/stage3.1/paso/src/SolverFCT_solve.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2945 - (show annotations)
Wed Feb 24 00:17:46 2010 UTC (8 years, 11 months ago) by jfenwick
File MIME type: text/plain
File size: 15647 byte(s)
Bringing release stage up to trunk version 2944

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: Flux correction transport solver
18 *
19 * solves Mu_t=Ku+q
20 * u(0) >=0
21 *
22 * Warning: the program assums sum_{j} k_{ij}=0!!!
23 *
24 */
25 /**************************************************************/
26
27 /* Author: l.gross@uq.edu.au */
28
29 /**************************************************************/
30
31 #include "Paso.h"
32 #include "Solver.h"
33 #include "SolverFCT.h"
34 #include "PasoUtil.h"
35 #include "esysUtils/blocktimer.h"
36 #ifdef _OPENMP
37 #include <omp.h>
38 #endif
39 #ifdef PASO_MPI
40 #include <mpi.h>
41 #endif
42
43 /*
44 * inserts the source term into the problem
45 */
46 void Paso_FCT_setSource(const dim_t n,const double *source, double* sourceN, double* sourceP)
47 {
48 dim_t i;
49 register double rtmp;
50 /*
51 * seperate source into positive and negative part:
52 */
53 #pragma omp parallel for private(i,rtmp)
54 for (i = 0; i < n; ++i) {
55 rtmp=source[i];
56 if (rtmp <0) {
57 sourceN[i]=-rtmp;
58 sourceP[i]=0;
59 } else {
60 sourceN[i]= 0;
61 sourceP[i]= rtmp;
62 }
63 }
64 }
65
66 err_t Paso_FCT_setUpRightHandSide(Paso_FCTransportProblem* fctp, const double dt, const double *u_m, Paso_Coupler* u_m_coupler, double * z_m,
67 Paso_SystemMatrix* flux_matrix, Paso_Coupler* uTilde_coupler, const double *b,
68 Paso_Coupler* QN_coupler, Paso_Coupler* QP_coupler,
69 double *RN_m, Paso_Coupler* RN_m_coupler, double* RP_m, Paso_Coupler* RP_m_coupler, const double *sourceN,
70 Paso_Performance* pp)
71 {
72 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
73 /* distribute u */
74 Paso_Coupler_startCollect(u_m_coupler,u_m);
75 Paso_Coupler_finishCollect(u_m_coupler);
76 /*
77 * set the ant diffusion fluxes:
78 *
79 */
80 Paso_FCTransportProblem_setAntiDiffusionFlux(dt,fctp,flux_matrix,u_m_coupler);
81 /*
82 * apply pre flux-correction: f_{ij}:=0 if f_{ij}*(\tilde{u}[i]- \tilde{u}[j])<=0
83 *
84 */
85 Paso_FCTransportProblem_applyPreAntiDiffusionCorrection(flux_matrix,uTilde_coupler);
86 /*
87 * set flux limiters RN_m,RP_m
88 *
89 */
90 Paso_FCTransportProblem_setRs(flux_matrix,fctp->lumped_mass_matrix,QN_coupler,QP_coupler,RN_m,RP_m);
91 Paso_Coupler_startCollect(RN_m_coupler,RN_m);
92 Paso_Coupler_startCollect(RP_m_coupler,RP_m);
93 /*
94 * z_m[i]=b[i] - (m_i*u_m[i] - dt*theta*sum_{j<>i} l_{ij} (u_m[j]-u_m[i]) + dt q^-[i])
95 *
96 * note that iteration_matrix stores the negative values of the
97 * low order transport matrix l therefore a=dt*fctp->theta is used.
98 */
99 Paso_SolverFCT_setMuPaLuPbQ(z_m,fctp->lumped_mass_matrix, u_m_coupler,dt*fctp->theta,fctp->iteration_matrix,dt,sourceN);
100 /* z_m=b-z_m */
101 /*{
102 int kk;
103 for (kk=0;kk<n;kk++) printf("z_m %d : %e %e -> %e\n",kk,z_m[kk], b[kk],z_m[kk]-b[kk]);
104 }
105 */
106 Paso_Update(n,-1.,z_m,1.,b);
107
108 Paso_Coupler_finishCollect(RN_m_coupler);
109 Paso_Coupler_finishCollect(RP_m_coupler);
110 /* add corrected fluxes into z_m */
111 Paso_FCTransportProblem_addCorrectedFluxes(z_m,flux_matrix,RN_m_coupler,RP_m_coupler);
112 return NO_ERROR;
113 }
114
115 void Paso_SolverFCT_solve(Paso_FCTransportProblem* fctp, double* u, double dt, double* source, Paso_Options* options) {
116 const dim_t FAILURES_MAX=100;
117 dim_t m, i_substeps, Failed=0, i;
118 double *z_m=NULL, *b_n=NULL, *sourceP=NULL, *sourceN=NULL, *uTilde_n=NULL, *QN_n=NULL, *QP_n=NULL, *RN_m=NULL, *RP_m=NULL, *du_m=NULL;
119 Paso_Coupler *QN_n_coupler=NULL, *QP_n_coupler=NULL, *RN_m_coupler=NULL, *RP_m_coupler=NULL, *uTilde_n_coupler=NULL, *u_m_coupler=NULL;
120 Paso_SystemMatrix *flux_matrix_m=NULL;
121 double dt_max, dt2,t, norm_u_m, omega, norm_du_m;
122 register double mass, rtmp;
123 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
124 dim_t blockSize=Paso_FCTransportProblem_getBlockSize(fctp);
125 const double atol=options->absolute_tolerance;
126 const double rtol=options->tolerance;
127 const dim_t max_m=options->iter_max;
128 Paso_Performance pp;
129 bool_t converged=FALSE, max_m_reached=FALSE;
130 if (dt<=0.) {
131 Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive.");
132 }
133 dt_max=Paso_FCTransportProblem_getSafeTimeStepSize(fctp);
134 /*
135 * allocate memory
136 *
137 */
138 z_m=TMPMEMALLOC(n,double);
139 Paso_checkPtr(z_m);
140 du_m=TMPMEMALLOC(n,double);
141 Paso_checkPtr(du_m);
142 b_n=TMPMEMALLOC(n,double);
143 Paso_checkPtr(b_n);
144 sourceP=TMPMEMALLOC(n,double);
145 Paso_checkPtr(sourceP);
146 sourceN=TMPMEMALLOC(n,double);
147 Paso_checkPtr(sourceN);
148 uTilde_n=TMPMEMALLOC(n,double);
149 Paso_checkPtr(uTilde_n);
150 QN_n=TMPMEMALLOC(n,double);
151 Paso_checkPtr(QN_n);
152 QP_n=TMPMEMALLOC(n,double);
153 Paso_checkPtr(QP_n);
154 RN_m=TMPMEMALLOC(n,double);
155 Paso_checkPtr(RN_m);
156 RP_m=TMPMEMALLOC(n,double);
157 Paso_checkPtr(RP_m);
158 QN_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
159 QP_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
160 RN_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
161 RP_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
162 uTilde_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
163 u_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
164 flux_matrix_m=Paso_SystemMatrix_alloc(fctp->transport_matrix->type,
165 fctp->transport_matrix->pattern,
166 fctp->transport_matrix->row_block_size,
167 fctp->transport_matrix->col_block_size, TRUE);
168
169 if (Paso_noError()) {
170 /*
171 * Preparation:
172 *
173 */
174 Paso_FCT_setSource(n, source, sourceN, sourceP);
175 /*
176 * let the show begin!!!!
177 *
178 */
179 t=0;
180 i_substeps=0;
181 norm_u_m=Paso_lsup(n,fctp->u, fctp->mpi_info);
182 /* while(i_substeps<n_substeps && Paso_noError()) */
183 if (fctp->dt_max < LARGE_POSITIVE_FLOAT) {
184 dt2=MIN(dt_max,dt);
185 } else {
186 dt2=dt;
187 }
188 while( (t<dt*(1.-sqrt(EPSILON))) && Paso_noError()) {
189 if (options->verbose) printf("substep step %d at t=%e (step size= %e)\n",i_substeps+1,t+dt2,dt2);
190 Paso_FCT_setUp(fctp,dt2,sourceN,sourceP,b_n,uTilde_n,uTilde_n_coupler,QN_n,QN_n_coupler,QP_n,QP_n_coupler,
191 options,&pp);
192 Paso_Copy(n,u,fctp->u);
193 /* now the iteration starts */
194 m=0;
195 converged=FALSE;
196 max_m_reached=FALSE;
197 /* tolerance? */
198 while ( (!converged) && (! max_m_reached) && Paso_noError()) {
199 /* set z_m */
200 Paso_FCT_setUpRightHandSide(fctp,dt2,u,u_m_coupler,z_m,flux_matrix_m,uTilde_n_coupler,b_n,
201 QN_n_coupler,QP_n_coupler,RN_m,RN_m_coupler,RP_m,RP_m_coupler,sourceN,&pp);
202 /*
203 * now we solve the linear system to get the correction dt:
204 *
205 */
206 if (fctp->theta > 0) {
207 omega=1./(dt2*fctp->theta);
208 Paso_Solver_solvePreconditioner(fctp->iteration_matrix,du_m,z_m);
209 Paso_Update(n,1.,u,omega,du_m);
210 /* for (i = 0; i < n; ++i) printf("u %d = %e %e <= %e \n",i,u[i], du_m[i], z_m[i]); */
211 } else {
212 omega=1;
213 #pragma omp parallel for private(i,mass,rtmp)
214 for (i = 0; i < n; ++i) {
215 mass=fctp->lumped_mass_matrix[i];
216 if (ABS(mass)>0.) {
217 rtmp=z_m[i]/mass;
218 } else {
219 rtmp=0;
220 }
221 du_m[i]=rtmp;
222 u[i]+=rtmp;
223 }
224 }
225 norm_u_m=Paso_lsup(n,u, fctp->mpi_info);
226 norm_du_m=Paso_lsup(n,du_m, fctp->mpi_info)*omega;
227 if (options->verbose) printf("iteration step %d completed: norm increment= %e (tolerance = %e)\n",m+1, norm_du_m, rtol * norm_u_m + atol);
228
229 max_m_reached=(m>max_m);
230 converged=(norm_du_m <= rtol * norm_u_m + atol);
231 m++;
232 }
233 if (converged) {
234 Failed=0;
235 /* #pragma omp parallel for schedule(static) private(i) */
236 Paso_Copy(n,fctp->u,u);
237 i_substeps++;
238 t+=dt2;
239 if (fctp->dt_max < LARGE_POSITIVE_FLOAT) {
240 dt2=MIN3(dt_max,dt2*1.1,dt-t);
241 } else {
242 dt2=MIN(dt2*1.1,dt-t);
243 }
244 } else if (max_m_reached) {
245 /* if FAILURES_MAX failures in a row: give up */
246 if (Failed > FAILURES_MAX) {
247 Paso_setError(VALUE_ERROR,"Paso_SolverFCT_solve: no convergence after time step reduction.");
248 } else {
249 if (options->verbose) printf("no convergence in Paso_Solver_NewtonGMRES: Trying smaller time step size.");
250 dt2*=0.5;
251 Failed++;
252 }
253 }
254 }
255 /*
256 * clean-up:
257 *
258 */
259 MEMFREE(z_m);
260 MEMFREE(du_m);
261 TMPMEMFREE(b_n);
262 Paso_SystemMatrix_free(flux_matrix_m);
263 TMPMEMFREE(sourceP);
264 TMPMEMFREE(sourceN);
265 TMPMEMFREE(uTilde_n);
266 TMPMEMFREE(QN_n);
267 TMPMEMFREE(QP_n);
268 TMPMEMFREE(RN_m);
269 TMPMEMFREE(RP_m);
270 Paso_Coupler_free(QN_n_coupler);
271 Paso_Coupler_free(QP_n_coupler);
272 Paso_Coupler_free(RN_m_coupler);
273 Paso_Coupler_free(RP_m_coupler);
274 Paso_Coupler_free(uTilde_n_coupler);
275 Paso_Coupler_free(u_m_coupler);
276 options->absolute_tolerance=atol;
277 options->tolerance=rtol;
278 }
279 }
280 double Paso_FCTransportProblem_getSafeTimeStepSize(Paso_FCTransportProblem* fctp)
281 {
282 dim_t i, n;
283 double dt_max, dt_max_loc;
284 register double l_ii,m;
285 n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
286 if ( ! fctp->valid_matrices) {
287 /* extract the row sum of the advective part */
288 Paso_SystemMatrix_rowSum(fctp->mass_matrix,fctp->lumped_mass_matrix);
289
290 /* set low order transport operator */
291 Paso_FCTransportProblem_setLowOrderOperator(fctp);
292
293 if (Paso_noError()) {
294 /*
295 * calculate time step size:
296 */
297 dt_max=LARGE_POSITIVE_FLOAT;
298 #pragma omp parallel private(dt_max_loc)
299 {
300 dt_max_loc=LARGE_POSITIVE_FLOAT;
301 #pragma omp for schedule(static) private(i,l_ii,m)
302 for (i=0;i<n;++i) {
303 l_ii=fctp->main_diagonal_low_order_transport_matrix[i];
304 m=fctp->lumped_mass_matrix[i];
305 if ( (l_ii<0 && m>0.) || (l_ii>0 && m<0) ) {
306 dt_max_loc=MIN(dt_max_loc,-m/l_ii);
307 }
308 }
309 #pragma omp critical
310 {
311 dt_max=MIN(dt_max,dt_max_loc);
312 }
313 }
314 #ifdef PASO_MPI
315 dt_max_loc = dt_max;
316 MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
317 #endif
318 if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=fctp->dt_factor;
319 if (dt_max <= 0.) {
320 Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive.");
321 }
322 fctp->dt_max=dt_max;
323 fctp->valid_matrices=Paso_noError();
324 }
325 }
326 return fctp->dt_max;
327 }
328 void Paso_FCT_setUp(Paso_FCTransportProblem* fctp, const double dt, const double *sourceN, const double *sourceP, double* b, double* uTilde,
329 Paso_Coupler* uTilde_coupler, double *QN, Paso_Coupler* QN_coupler, double *QP, Paso_Coupler* QP_coupler,
330 Paso_Options* options, Paso_Performance* pp)
331 {
332 dim_t i;
333 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
334 double omega, factor;
335 register double m, u_tilde_i, rtmp4;
336 /* distribute u */
337 Paso_Coupler_startCollect(fctp->u_coupler,fctp->u);
338 Paso_Coupler_finishCollect(fctp->u_coupler);
339 /*
340 * b^n[i]=m u^n[i] + dt*(1-theta) sum_{j <> i} l_{ij}*(u^n[j]-u^n[i]) + dt*sourceP[i]
341 *
342 * note that iteration_matrix stores the negative values of the
343 * low order transport matrix l therefore a=-dt*(1-fctp->theta) is used.
344 *
345 */
346 Paso_SolverFCT_setMuPaLuPbQ(b,fctp->lumped_mass_matrix,fctp->u_coupler,
347 -dt*(1-fctp->theta),fctp->iteration_matrix,dt,sourceP);
348 /*
349 * uTilde[i]=b[i]/m[i]
350 *
351 * fctp->iteration_matrix[i,i]=m[i]/(dt theta) + \frac{1}{\theta} \frac{q^-[i]}-l[i,i]
352 *
353 */
354 if (fctp->theta > 0) {
355 Paso_solve_free(fctp->iteration_matrix);
356 omega=1./(dt*fctp->theta);
357 factor=dt*omega;
358 #pragma omp parallel for private(i,m,u_tilde_i,rtmp4)
359 for (i = 0; i < n; ++i) {
360 m=fctp->lumped_mass_matrix[i];
361 if (ABS(m)>0.) {
362 u_tilde_i=b[i]/m;
363 } else {
364 u_tilde_i=fctp->u[i];
365 }
366 rtmp4=m*omega-fctp->main_diagonal_low_order_transport_matrix[i];
367 if (ABS(u_tilde_i)>0) rtmp4+=sourceN[i]*factor/u_tilde_i;
368 fctp->iteration_matrix->mainBlock->val[fctp->main_iptr[i]]=rtmp4;
369 uTilde[i]=u_tilde_i;
370 /* printf("uTilde %d %e and %e : %e : %e %e\n",i,u_tilde_i, b[i], rtmp4, m, fctp->main_diagonal_low_order_transport_matrix[i]); */
371 }
372 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
373 Paso_Solver_setPreconditioner(fctp->iteration_matrix,options);
374 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
375 } else {
376 #pragma omp parallel for private(i,m,u_tilde_i)
377 for (i = 0; i < n; ++i) {
378 m=fctp->lumped_mass_matrix[i];
379 if (ABS(m)>0.) {
380 u_tilde_i=b[i]/m;
381 } else {
382 u_tilde_i=fctp->u[i];
383 }
384 uTilde[i]=u_tilde_i;
385 }
386 /* no update of iteration_matrix required! */
387 } /* end (fctp->theta > 0) */
388
389 /* distribute uTilde: */
390 Paso_Coupler_startCollect(uTilde_coupler,uTilde);
391 Paso_Coupler_finishCollect(uTilde_coupler);
392 /*
393 * calculate QP[i] max_{j} (\tilde{u}[j]- \tilde{u}[i] )
394 * QN[i] min_{j} (\tilde{u}[j]- \tilde{u}[i] )
395 *
396 */
397 Paso_SolverFCT_setQs(uTilde_coupler,QN,QP,fctp->iteration_matrix);
398 Paso_Coupler_startCollect(QN_coupler,QN);
399 Paso_Coupler_startCollect(QP_coupler,QP);
400 Paso_Coupler_finishCollect(QN_coupler);
401 Paso_Coupler_finishCollect(QP_coupler);
402 }

  ViewVC Help
Powered by ViewVC 1.1.26