/[escript]/trunk/paso/src/SolverFCT_solve.c
ViewVC logotype

Contents of /trunk/paso/src/SolverFCT_solve.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1811 - (show annotations)
Thu Sep 25 23:11:13 2008 UTC (11 years, 4 months ago) by ksteube
File MIME type: text/plain
File size: 15554 byte(s)
Copyright updated in all files

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2008 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: Flux correction transport solver
18 *
19 * solves Mu_t=Ku+q
20 * u(0) >=0
21 *
22 * Warning: the program assums sum_{j} k_{ij}=0!!!
23 *
24 */
25 /**************************************************************/
26
27 /* Author: l.gross@uq.edu.au */
28
29 /**************************************************************/
30
31 #include "Paso.h"
32 #include "Solver.h"
33 #include "SolverFCT.h"
34 #include "PasoUtil.h"
35 #include "escript/blocktimer.h"
36 #ifdef _OPENMP
37 #include <omp.h>
38 #endif
39 #ifdef PASO_MPI
40 #include <mpi.h>
41 #endif
42
43 void Paso_SolverFCT_solve(Paso_FCTransportProblem* fctp, double* u, double dt, double* source, Paso_Options* options) {
44 const dim_t FAILURES_MAX=5;
45 err_t error_code;
46 dim_t m,n_substeps, i_substeps, Failed, i, iter;
47 err_t errorCode;
48 double *z_m=NULL, *b_n=NULL, *sourceP=NULL, *sourceN=NULL, *uTilde_n=NULL, *QN_n=NULL, *QP_n=NULL, *RN_m=NULL, *RP_m=NULL, *du_m=NULL;
49 Paso_Coupler *QN_n_coupler=NULL, *QP_n_coupler=NULL, *RN_m_coupler=NULL, *RP_m_coupler=NULL, *uTilde_n_coupler=NULL, *u_m_coupler=NULL;
50 Paso_SystemMatrix *flux_matrix_m=NULL;
51 double dt_max, dt2,t, norm_u_m, omega, norm_du_m, tol;
52 register double mass, rtmp;
53 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
54 dim_t blockSize=Paso_FCTransportProblem_getBlockSize(fctp);
55 const double atol=options->absolute_tolerance;
56 const double rtol=options->tolerance;
57 const dim_t max_m=options->iter_max;
58 Paso_Performance pp;
59 bool_t converged=FALSE, max_m_reached=FALSE;
60 if (dt<=0.) {
61 Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive.");
62 }
63 dt_max=Paso_FCTransportProblem_getSafeTimeStepSize(fctp);
64 /*
65 * allocate memory
66 *
67 */
68 z_m=TMPMEMALLOC(n,double);
69 Paso_checkPtr(z_m);
70 du_m=TMPMEMALLOC(n,double);
71 Paso_checkPtr(du_m);
72 b_n=TMPMEMALLOC(n,double);
73 Paso_checkPtr(b_n);
74 sourceP=TMPMEMALLOC(n,double);
75 Paso_checkPtr(sourceP);
76 sourceN=TMPMEMALLOC(n,double);
77 Paso_checkPtr(sourceN);
78 uTilde_n=TMPMEMALLOC(n,double);
79 Paso_checkPtr(uTilde_n);
80 QN_n=TMPMEMALLOC(n,double);
81 Paso_checkPtr(QN_n);
82 QP_n=TMPMEMALLOC(n,double);
83 Paso_checkPtr(QP_n);
84 RN_m=TMPMEMALLOC(n,double);
85 Paso_checkPtr(RN_m);
86 RP_m=TMPMEMALLOC(n,double);
87 Paso_checkPtr(RP_m);
88 QN_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
89 QP_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
90 RN_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
91 RP_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
92 uTilde_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
93 u_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize);
94 flux_matrix_m=Paso_SystemMatrix_alloc(fctp->transport_matrix->type,
95 fctp->transport_matrix->pattern,
96 fctp->transport_matrix->row_block_size,
97 fctp->transport_matrix->col_block_size);
98
99 if (Paso_noError()) {
100 /*
101 * Preparation:
102 *
103 */
104 Paso_FCT_setSource(n, source, sourceN, sourceP);
105 /*
106 * let the show begin!!!!
107 *
108 */
109 t=0;
110 i_substeps=0;
111 Paso_Copy(n,u,fctp->u);
112 norm_u_m=Paso_lsup(n,u, fctp->mpi_info);
113 /* while(i_substeps<n_substeps && Paso_noError()) { */
114 if (fctp->dt_max < LARGE_POSITIVE_FLOAT) {
115 dt2=MIN(dt_max,dt);
116 } else {
117 dt2=dt;
118 }
119 while(t<dt && Paso_noError()) {
120 printf("substep step %ld at t=%e (step size= %e)\n",i_substeps+1,t+dt2,dt2);
121 Paso_FCT_setUp(fctp,dt2,sourceN,sourceP,b_n,uTilde_n,uTilde_n_coupler,QN_n,QN_n_coupler,QP_n,QP_n_coupler,
122 options,&pp);
123 /* now the iteration starts */
124 m=0;
125 converged=FALSE;
126 max_m_reached=FALSE;
127 /* tolerance? */
128 while ( (!converged) && (! max_m_reached) && Paso_noError()) {
129 /* set z_m */
130 Paso_FCT_setUpRightHandSide(fctp,dt2,u,u_m_coupler,z_m,flux_matrix_m,uTilde_n_coupler,b_n,
131 QN_n_coupler,QP_n_coupler,RN_m,RN_m_coupler,RP_m,RP_m_coupler,sourceN,&pp);
132 /*
133 * now we solve the linear system to get the correction dt:
134 *
135 */
136 if (fctp->theta > 0) {
137 omega=1./(dt2*fctp->theta);
138 Paso_Solver_solvePreconditioner(fctp->iteration_matrix,du_m,z_m);
139 Paso_Update(n,1.,u,omega,du_m);
140 } else {
141 omega=1;
142 #pragma omp parallel for private(i,mass,rtmp)
143 for (i = 0; i < n; ++i) {
144 mass=fctp->lumped_mass_matrix[i];
145 if (ABS(mass)>0.) {
146 rtmp=z_m[i]/mass;
147 } else {
148 rtmp=0;
149 }
150 du_m[i]=rtmp;
151 u[i]+=rtmp;
152 }
153 }
154 norm_u_m=Paso_lsup(n,u, fctp->mpi_info);
155 norm_du_m=Paso_lsup(n,du_m, fctp->mpi_info)*omega;
156 printf("iteration step %ld completed: norm increment= %e (tolerance = %e)\n",m+1, norm_du_m, rtol * norm_u_m + atol);
157
158 max_m_reached=(m>max_m);
159 converged=(norm_du_m <= rtol * norm_u_m + atol);
160 m++;
161 }
162 if (converged) {
163 Failed=0;
164 #pragma omp parallel for schedule(static) private(i)
165 Paso_Copy(n,fctp->u,u);
166 i_substeps++;
167 t+=dt2;
168 if (fctp->dt_max < LARGE_POSITIVE_FLOAT) {
169 dt2=MIN3(dt_max,dt2*1.5,dt-t);
170 } else {
171 dt2=MIN(dt2*1.5,dt-t);
172 }
173 } else if (max_m_reached) {
174 /* if FAILURES_MAX failures in a row: give up */
175 if (Failed > FAILURES_MAX) {
176 Paso_setError(VALUE_ERROR,"Paso_SolverFCT_solve: no convergence after time step reduction.");
177 } else {
178 printf("no convergence in Paso_Solver_NewtonGMRES: Trying smaller time step size.");
179 dt2=dt*0.5;
180 Failed++;
181 }
182 }
183
184 }
185 /*
186 * clean-up:
187 *
188 */
189 MEMFREE(z_m);
190 MEMFREE(du_m);
191 TMPMEMFREE(b_n);
192 Paso_SystemMatrix_free(flux_matrix_m);
193 TMPMEMFREE(sourceP);
194 TMPMEMFREE(sourceN);
195 TMPMEMFREE(uTilde_n);
196 TMPMEMFREE(QN_n);
197 TMPMEMFREE(QP_n);
198 TMPMEMFREE(RN_m);
199 TMPMEMFREE(RP_m);
200 Paso_Coupler_free(QN_n_coupler);
201 Paso_Coupler_free(QP_n_coupler);
202 Paso_Coupler_free(RN_m_coupler);
203 Paso_Coupler_free(RP_m_coupler);
204 Paso_Coupler_free(uTilde_n_coupler);
205 Paso_Coupler_free(u_m_coupler);
206 options->absolute_tolerance=atol;
207 options->tolerance=rtol;
208 }
209 }
210 double Paso_FCTransportProblem_getSafeTimeStepSize(Paso_FCTransportProblem* fctp)
211 {
212 dim_t i, n;
213 double dt_max, dt_max_loc;
214 register double l_ii,m;
215 n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
216 if (! fctp->valid_matrices) {
217 fctp->dt_max=LARGE_POSITIVE_FLOAT;
218 /* extract the row sum of the advective part */
219 Paso_SystemMatrix_rowSum(fctp->mass_matrix,fctp->lumped_mass_matrix);
220
221 /* set low order transport operator */
222 Paso_FCTransportProblem_setLowOrderOperator(fctp);
223 /*
224 * calculate time step size:
225 */
226 dt_max=LARGE_POSITIVE_FLOAT;
227 if (fctp->theta < 1.) {
228 #pragma omp parallel private(dt_max_loc)
229 {
230 dt_max_loc=LARGE_POSITIVE_FLOAT;
231 #pragma omp for schedule(static) private(i,l_ii,m)
232 for (i=0;i<n;++i) {
233 l_ii=fctp->main_diagonal_low_order_transport_matrix[i];
234 m=fctp->lumped_mass_matrix[i];
235 if ( (l_ii<0 && m>0.) || (l_ii>0 && m<0) ) {
236 dt_max_loc=MIN(dt_max_loc,-m/l_ii);
237 }
238 }
239 #pragma omp critical
240 {
241 dt_max=MIN(dt_max,dt_max_loc);
242 }
243 }
244 #ifdef PASO_MPI
245 dt_max_loc = dt_max;
246 MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
247 #endif
248 if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=1./(1.-fctp->theta);
249 }
250 if (dt_max <= 0.) {
251 Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive.");
252 } else {
253 if (dt_max<LARGE_POSITIVE_FLOAT) printf("maximum time step size is %e (theta = %e).\n",dt_max,fctp->theta);
254 }
255 fctp->dt_max=dt_max;
256 fctp->valid_matrices=Paso_noError();
257 }
258 return fctp->dt_max;
259 }
260 /*
261 * inserts the source term into the problem
262 */
263 err_t Paso_FCT_setSource(const dim_t n,const double *source, double* sourceN, double* sourceP)
264 {
265 dim_t i;
266 register double rtmp;
267 /*
268 * seperate source into positive and negative part:
269 */
270 #pragma omp parallel for private(i,rtmp)
271 for (i = 0; i < n; ++i) {
272 rtmp=source[i];
273 if (rtmp <0) {
274 sourceN[i]=-rtmp;
275 sourceP[i]=0;
276 } else {
277 sourceN[i]= 0;
278 sourceP[i]= rtmp;
279 }
280 }
281 }
282 err_t Paso_FCT_setUp(Paso_FCTransportProblem* fctp, const double dt, const double *sourceN, const double *sourceP, double* b, double* uTilde,
283 Paso_Coupler* uTilde_coupler, double *QN, Paso_Coupler* QN_coupler, double *QP, Paso_Coupler* QP_coupler,
284 Paso_Options* options, Paso_Performance* pp)
285 {
286 dim_t i;
287 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
288 double omega, factor;
289 register double m, u_tilde_i, rtmp4;
290 /* distribute u */
291 Paso_Coupler_startCollect(fctp->u_coupler,fctp->u);
292 Paso_Coupler_finishCollect(fctp->u_coupler);
293 /*
294 * b^n[i]=m u^n[i] + dt*(1-theta) sum_{j <> i} l_{ij}*(u^n[j]-u^n[i]) + dt*sourceP[i]
295 *
296 * note that iteration_matrix stores the negative values of the
297 * low order transport matrix l therefore a=-dt*(1-fctp->theta) is used.
298 *
299 */
300 Paso_SolverFCT_setMuPaLuPbQ(b,fctp->lumped_mass_matrix,fctp->u_coupler,
301 -dt*(1-fctp->theta),fctp->iteration_matrix,dt,sourceP);
302 /*
303 * uTilde[i]=b[i]/m[i]
304 *
305 * fctp->iteration_matrix[i,i]=m[i]/(dt theta) + \frac{1}{\theta} \frac{q^-[i]}-l[i,i]
306 *
307 */
308 if (fctp->theta > 0) {
309 Paso_solve_free(fctp->iteration_matrix);
310 omega=1./(dt*fctp->theta);
311 factor=dt*omega;
312 #pragma omp parallel for private(i,m,u_tilde_i,rtmp4)
313 for (i = 0; i < n; ++i) {
314 m=fctp->lumped_mass_matrix[i];
315 if (ABS(m)>0.) {
316 u_tilde_i=b[i]/m;
317 } else {
318 u_tilde_i=fctp->u[i];
319 }
320 rtmp4=m*omega-fctp->main_diagonal_low_order_transport_matrix[i];
321 if (ABS(u_tilde_i)>0) rtmp4+=sourceN[i]*factor/u_tilde_i;
322 fctp->iteration_matrix->mainBlock->val[fctp->main_iptr[i]]=rtmp4;
323 uTilde[i]=u_tilde_i;
324 }
325 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
326 Paso_Solver_setPreconditioner(fctp->iteration_matrix,options);
327 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
328 } else {
329 #pragma omp parallel for private(i,m,u_tilde_i)
330 for (i = 0; i < n; ++i) {
331 m=fctp->lumped_mass_matrix[i];
332 if (ABS(m)>0.) {
333 u_tilde_i=b[i]/m;
334 } else {
335 u_tilde_i=fctp->u[i];
336 }
337 uTilde[i]=u_tilde_i;
338 }
339 /* no update of iteration_matrix required! */
340 } /* end (fctp->theta > 0) */
341
342 /* distribute uTilde: */
343 Paso_Coupler_startCollect(uTilde_coupler,uTilde);
344 Paso_Coupler_finishCollect(uTilde_coupler);
345 /*
346 * calculate QP[i] max_{j} (\tilde{u}[j]- \tilde{u}[i] )
347 * QN[i] min_{j} (\tilde{u}[j]- \tilde{u}[i] )
348 *
349 */
350 Paso_SolverFCT_setQs(uTilde_coupler,QN,QP,fctp->iteration_matrix);
351 Paso_Coupler_startCollect(QN_coupler,QN);
352 Paso_Coupler_startCollect(QP_coupler,QP);
353 Paso_Coupler_finishCollect(QN_coupler);
354 Paso_Coupler_finishCollect(QP_coupler);
355 }
356 err_t Paso_FCT_setUpRightHandSide(Paso_FCTransportProblem* fctp, const double dt, const double *u_m, Paso_Coupler* u_m_coupler, double * z_m,
357 Paso_SystemMatrix* flux_matrix, Paso_Coupler* uTilde_coupler, const double *b,
358 Paso_Coupler* QN_coupler, Paso_Coupler* QP_coupler,
359 double *RN_m, Paso_Coupler* RN_m_coupler, double* RP_m, Paso_Coupler* RP_m_coupler, const double *sourceN,
360 Paso_Performance* pp)
361 {
362 dim_t i;
363 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
364 register double m, rtmp;
365 /* distribute u */
366 Paso_Coupler_startCollect(u_m_coupler,u_m);
367 Paso_Coupler_finishCollect(u_m_coupler);
368 /*
369 * set the ant diffusion fluxes:
370 *
371 */
372 Paso_FCTransportProblem_setAntiDiffusionFlux(dt,fctp,flux_matrix,u_m_coupler);
373 /*
374 * apply pre flux-correction: f_{ij}:=0 if f_{ij}*(\tilde{u}[i]- \tilde{u}[j])<=0
375 *
376 */
377 Paso_FCTransportProblem_applyPreAntiDiffusionCorrection(flux_matrix,uTilde_coupler);
378 /*
379 * set flux limiters RN_m,RP_m
380 *
381 */
382 Paso_FCTransportProblem_setRs(flux_matrix,fctp->lumped_mass_matrix,QN_coupler,QP_coupler,RN_m,RP_m);
383 Paso_Coupler_startCollect(RN_m_coupler,RN_m);
384 Paso_Coupler_startCollect(RP_m_coupler,RP_m);
385 /*
386 * z_m[i]=b[i] - (m_i*u_m[i] - dt*theta*sum_{j<>i} l_{ij} (u_m[j]-u_m[i]) + dt q^-[i])
387 *
388 * note that iteration_matrix stores the negative values of the
389 * low order transport matrix l therefore a=dt*fctp->theta is used.
390 */
391 Paso_SolverFCT_setMuPaLuPbQ(z_m,fctp->lumped_mass_matrix, u_m_coupler,dt*fctp->theta,fctp->iteration_matrix,dt,sourceN);
392 /* z_m=b-z_m */
393 Paso_Update(n,-1.,z_m,1.,b);
394
395 Paso_Coupler_finishCollect(RN_m_coupler);
396 Paso_Coupler_finishCollect(RP_m_coupler);
397 /* add corrected fluxes into z_m */
398 Paso_FCTransportProblem_addCorrectedFluxes(z_m,flux_matrix,RN_m_coupler,RP_m_coupler);
399 return NO_ERROR;
400 }

  ViewVC Help
Powered by ViewVC 1.1.26