1 |
|
2 |
/******************************************************* |
3 |
* |
4 |
* Copyright (c) 2003-2008 by University of Queensland |
5 |
* Earth Systems Science Computational Center (ESSCC) |
6 |
* http://www.uq.edu.au/esscc |
7 |
* |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
|
15 |
/**************************************************************/ |
16 |
|
17 |
/* Paso: Flux correction transport solver |
18 |
* |
19 |
* solves Mu_t=Ku+q |
20 |
* u(0) >=0 |
21 |
* |
22 |
* Warning: the program assums sum_{j} k_{ij}=0!!! |
23 |
* |
24 |
*/ |
25 |
/**************************************************************/ |
26 |
|
27 |
/* Author: l.gross@uq.edu.au */ |
28 |
|
29 |
/**************************************************************/ |
30 |
|
31 |
#include "Paso.h" |
32 |
#include "Solver.h" |
33 |
#include "SolverFCT.h" |
34 |
#include "PasoUtil.h" |
35 |
#include "escript/blocktimer.h" |
36 |
#ifdef _OPENMP |
37 |
#include <omp.h> |
38 |
#endif |
39 |
#ifdef PASO_MPI |
40 |
#include <mpi.h> |
41 |
#endif |
42 |
|
43 |
/* |
44 |
* inserts the source term into the problem |
45 |
*/ |
46 |
void Paso_FCT_setSource(const dim_t n,const double *source, double* sourceN, double* sourceP) |
47 |
{ |
48 |
dim_t i; |
49 |
register double rtmp; |
50 |
/* |
51 |
* seperate source into positive and negative part: |
52 |
*/ |
53 |
#pragma omp parallel for private(i,rtmp) |
54 |
for (i = 0; i < n; ++i) { |
55 |
rtmp=source[i]; |
56 |
if (rtmp <0) { |
57 |
sourceN[i]=-rtmp; |
58 |
sourceP[i]=0; |
59 |
} else { |
60 |
sourceN[i]= 0; |
61 |
sourceP[i]= rtmp; |
62 |
} |
63 |
} |
64 |
} |
65 |
|
66 |
err_t Paso_FCT_setUpRightHandSide(Paso_FCTransportProblem* fctp, const double dt, const double *u_m, Paso_Coupler* u_m_coupler, double * z_m, |
67 |
Paso_SystemMatrix* flux_matrix, Paso_Coupler* uTilde_coupler, const double *b, |
68 |
Paso_Coupler* QN_coupler, Paso_Coupler* QP_coupler, |
69 |
double *RN_m, Paso_Coupler* RN_m_coupler, double* RP_m, Paso_Coupler* RP_m_coupler, const double *sourceN, |
70 |
Paso_Performance* pp) |
71 |
{ |
72 |
const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix); |
73 |
/* distribute u */ |
74 |
Paso_Coupler_startCollect(u_m_coupler,u_m); |
75 |
Paso_Coupler_finishCollect(u_m_coupler); |
76 |
/* |
77 |
* set the ant diffusion fluxes: |
78 |
* |
79 |
*/ |
80 |
Paso_FCTransportProblem_setAntiDiffusionFlux(dt,fctp,flux_matrix,u_m_coupler); |
81 |
/* |
82 |
* apply pre flux-correction: f_{ij}:=0 if f_{ij}*(\tilde{u}[i]- \tilde{u}[j])<=0 |
83 |
* |
84 |
*/ |
85 |
Paso_FCTransportProblem_applyPreAntiDiffusionCorrection(flux_matrix,uTilde_coupler); |
86 |
/* |
87 |
* set flux limiters RN_m,RP_m |
88 |
* |
89 |
*/ |
90 |
Paso_FCTransportProblem_setRs(flux_matrix,fctp->lumped_mass_matrix,QN_coupler,QP_coupler,RN_m,RP_m); |
91 |
Paso_Coupler_startCollect(RN_m_coupler,RN_m); |
92 |
Paso_Coupler_startCollect(RP_m_coupler,RP_m); |
93 |
/* |
94 |
* z_m[i]=b[i] - (m_i*u_m[i] - dt*theta*sum_{j<>i} l_{ij} (u_m[j]-u_m[i]) + dt q^-[i]) |
95 |
* |
96 |
* note that iteration_matrix stores the negative values of the |
97 |
* low order transport matrix l therefore a=dt*fctp->theta is used. |
98 |
*/ |
99 |
Paso_SolverFCT_setMuPaLuPbQ(z_m,fctp->lumped_mass_matrix, u_m_coupler,dt*fctp->theta,fctp->iteration_matrix,dt,sourceN); |
100 |
/* z_m=b-z_m */ |
101 |
Paso_Update(n,-1.,z_m,1.,b); |
102 |
|
103 |
Paso_Coupler_finishCollect(RN_m_coupler); |
104 |
Paso_Coupler_finishCollect(RP_m_coupler); |
105 |
/* add corrected fluxes into z_m */ |
106 |
Paso_FCTransportProblem_addCorrectedFluxes(z_m,flux_matrix,RN_m_coupler,RP_m_coupler); |
107 |
return NO_ERROR; |
108 |
} |
109 |
|
110 |
void Paso_SolverFCT_solve(Paso_FCTransportProblem* fctp, double* u, double dt, double* source, Paso_Options* options) { |
111 |
const dim_t FAILURES_MAX=5; |
112 |
dim_t m, i_substeps, Failed, i; |
113 |
double *z_m=NULL, *b_n=NULL, *sourceP=NULL, *sourceN=NULL, *uTilde_n=NULL, *QN_n=NULL, *QP_n=NULL, *RN_m=NULL, *RP_m=NULL, *du_m=NULL; |
114 |
Paso_Coupler *QN_n_coupler=NULL, *QP_n_coupler=NULL, *RN_m_coupler=NULL, *RP_m_coupler=NULL, *uTilde_n_coupler=NULL, *u_m_coupler=NULL; |
115 |
Paso_SystemMatrix *flux_matrix_m=NULL; |
116 |
double dt_max, dt2,t, norm_u_m, omega, norm_du_m; |
117 |
register double mass, rtmp; |
118 |
const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix); |
119 |
dim_t blockSize=Paso_FCTransportProblem_getBlockSize(fctp); |
120 |
const double atol=options->absolute_tolerance; |
121 |
const double rtol=options->tolerance; |
122 |
const dim_t max_m=options->iter_max; |
123 |
Paso_Performance pp; |
124 |
bool_t converged=FALSE, max_m_reached=FALSE; |
125 |
if (dt<=0.) { |
126 |
Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive."); |
127 |
} |
128 |
dt_max=Paso_FCTransportProblem_getSafeTimeStepSize(fctp); |
129 |
/* |
130 |
* allocate memory |
131 |
* |
132 |
*/ |
133 |
z_m=TMPMEMALLOC(n,double); |
134 |
Paso_checkPtr(z_m); |
135 |
du_m=TMPMEMALLOC(n,double); |
136 |
Paso_checkPtr(du_m); |
137 |
b_n=TMPMEMALLOC(n,double); |
138 |
Paso_checkPtr(b_n); |
139 |
sourceP=TMPMEMALLOC(n,double); |
140 |
Paso_checkPtr(sourceP); |
141 |
sourceN=TMPMEMALLOC(n,double); |
142 |
Paso_checkPtr(sourceN); |
143 |
uTilde_n=TMPMEMALLOC(n,double); |
144 |
Paso_checkPtr(uTilde_n); |
145 |
QN_n=TMPMEMALLOC(n,double); |
146 |
Paso_checkPtr(QN_n); |
147 |
QP_n=TMPMEMALLOC(n,double); |
148 |
Paso_checkPtr(QP_n); |
149 |
RN_m=TMPMEMALLOC(n,double); |
150 |
Paso_checkPtr(RN_m); |
151 |
RP_m=TMPMEMALLOC(n,double); |
152 |
Paso_checkPtr(RP_m); |
153 |
QN_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); |
154 |
QP_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); |
155 |
RN_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); |
156 |
RP_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); |
157 |
uTilde_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); |
158 |
u_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); |
159 |
flux_matrix_m=Paso_SystemMatrix_alloc(fctp->transport_matrix->type, |
160 |
fctp->transport_matrix->pattern, |
161 |
fctp->transport_matrix->row_block_size, |
162 |
fctp->transport_matrix->col_block_size); |
163 |
|
164 |
if (Paso_noError()) { |
165 |
/* |
166 |
* Preparation: |
167 |
* |
168 |
*/ |
169 |
Paso_FCT_setSource(n, source, sourceN, sourceP); |
170 |
/* |
171 |
* let the show begin!!!! |
172 |
* |
173 |
*/ |
174 |
t=0; |
175 |
i_substeps=0; |
176 |
Paso_Copy(n,u,fctp->u); |
177 |
norm_u_m=Paso_lsup(n,u, fctp->mpi_info); |
178 |
/* while(i_substeps<n_substeps && Paso_noError()) { */ |
179 |
if (fctp->dt_max < LARGE_POSITIVE_FLOAT) { |
180 |
dt2=MIN(dt_max,dt); |
181 |
} else { |
182 |
dt2=dt; |
183 |
} |
184 |
while(t<dt && Paso_noError()) { |
185 |
printf("substep step %d at t=%e (step size= %e)\n",i_substeps+1,t+dt2,dt2); |
186 |
Paso_FCT_setUp(fctp,dt2,sourceN,sourceP,b_n,uTilde_n,uTilde_n_coupler,QN_n,QN_n_coupler,QP_n,QP_n_coupler, |
187 |
options,&pp); |
188 |
/* now the iteration starts */ |
189 |
m=0; |
190 |
converged=FALSE; |
191 |
max_m_reached=FALSE; |
192 |
/* tolerance? */ |
193 |
while ( (!converged) && (! max_m_reached) && Paso_noError()) { |
194 |
/* set z_m */ |
195 |
Paso_FCT_setUpRightHandSide(fctp,dt2,u,u_m_coupler,z_m,flux_matrix_m,uTilde_n_coupler,b_n, |
196 |
QN_n_coupler,QP_n_coupler,RN_m,RN_m_coupler,RP_m,RP_m_coupler,sourceN,&pp); |
197 |
/* |
198 |
* now we solve the linear system to get the correction dt: |
199 |
* |
200 |
*/ |
201 |
if (fctp->theta > 0) { |
202 |
omega=1./(dt2*fctp->theta); |
203 |
Paso_Solver_solvePreconditioner(fctp->iteration_matrix,du_m,z_m); |
204 |
Paso_Update(n,1.,u,omega,du_m); |
205 |
} else { |
206 |
omega=1; |
207 |
#pragma omp parallel for private(i,mass,rtmp) |
208 |
for (i = 0; i < n; ++i) { |
209 |
mass=fctp->lumped_mass_matrix[i]; |
210 |
if (ABS(mass)>0.) { |
211 |
rtmp=z_m[i]/mass; |
212 |
} else { |
213 |
rtmp=0; |
214 |
} |
215 |
du_m[i]=rtmp; |
216 |
u[i]+=rtmp; |
217 |
} |
218 |
} |
219 |
norm_u_m=Paso_lsup(n,u, fctp->mpi_info); |
220 |
norm_du_m=Paso_lsup(n,du_m, fctp->mpi_info)*omega; |
221 |
printf("iteration step %d completed: norm increment= %e (tolerance = %e)\n",m+1, norm_du_m, rtol * norm_u_m + atol); |
222 |
|
223 |
max_m_reached=(m>max_m); |
224 |
converged=(norm_du_m <= rtol * norm_u_m + atol); |
225 |
m++; |
226 |
} |
227 |
if (converged) { |
228 |
Failed=0; |
229 |
/* #pragma omp parallel for schedule(static) private(i) */ |
230 |
Paso_Copy(n,fctp->u,u); |
231 |
i_substeps++; |
232 |
t+=dt2; |
233 |
if (fctp->dt_max < LARGE_POSITIVE_FLOAT) { |
234 |
dt2=MIN3(dt_max,dt2*1.5,dt-t); |
235 |
} else { |
236 |
dt2=MIN(dt2*1.5,dt-t); |
237 |
} |
238 |
} else if (max_m_reached) { |
239 |
/* if FAILURES_MAX failures in a row: give up */ |
240 |
if (Failed > FAILURES_MAX) { |
241 |
Paso_setError(VALUE_ERROR,"Paso_SolverFCT_solve: no convergence after time step reduction."); |
242 |
} else { |
243 |
printf("no convergence in Paso_Solver_NewtonGMRES: Trying smaller time step size."); |
244 |
dt2=dt*0.5; |
245 |
Failed++; |
246 |
} |
247 |
} |
248 |
|
249 |
} |
250 |
/* |
251 |
* clean-up: |
252 |
* |
253 |
*/ |
254 |
MEMFREE(z_m); |
255 |
MEMFREE(du_m); |
256 |
TMPMEMFREE(b_n); |
257 |
Paso_SystemMatrix_free(flux_matrix_m); |
258 |
TMPMEMFREE(sourceP); |
259 |
TMPMEMFREE(sourceN); |
260 |
TMPMEMFREE(uTilde_n); |
261 |
TMPMEMFREE(QN_n); |
262 |
TMPMEMFREE(QP_n); |
263 |
TMPMEMFREE(RN_m); |
264 |
TMPMEMFREE(RP_m); |
265 |
Paso_Coupler_free(QN_n_coupler); |
266 |
Paso_Coupler_free(QP_n_coupler); |
267 |
Paso_Coupler_free(RN_m_coupler); |
268 |
Paso_Coupler_free(RP_m_coupler); |
269 |
Paso_Coupler_free(uTilde_n_coupler); |
270 |
Paso_Coupler_free(u_m_coupler); |
271 |
options->absolute_tolerance=atol; |
272 |
options->tolerance=rtol; |
273 |
} |
274 |
} |
275 |
double Paso_FCTransportProblem_getSafeTimeStepSize(Paso_FCTransportProblem* fctp) |
276 |
{ |
277 |
dim_t i, n; |
278 |
double dt_max, dt_max_loc; |
279 |
register double l_ii,m; |
280 |
n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix); |
281 |
if (! fctp->valid_matrices) { |
282 |
fctp->dt_max=LARGE_POSITIVE_FLOAT; |
283 |
/* extract the row sum of the advective part */ |
284 |
Paso_SystemMatrix_rowSum(fctp->mass_matrix,fctp->lumped_mass_matrix); |
285 |
|
286 |
/* set low order transport operator */ |
287 |
Paso_FCTransportProblem_setLowOrderOperator(fctp); |
288 |
/* |
289 |
* calculate time step size: |
290 |
*/ |
291 |
dt_max=LARGE_POSITIVE_FLOAT; |
292 |
if (fctp->theta < 1.) { |
293 |
#pragma omp parallel private(dt_max_loc) |
294 |
{ |
295 |
dt_max_loc=LARGE_POSITIVE_FLOAT; |
296 |
#pragma omp for schedule(static) private(i,l_ii,m) |
297 |
for (i=0;i<n;++i) { |
298 |
l_ii=fctp->main_diagonal_low_order_transport_matrix[i]; |
299 |
m=fctp->lumped_mass_matrix[i]; |
300 |
if ( (l_ii<0 && m>0.) || (l_ii>0 && m<0) ) { |
301 |
dt_max_loc=MIN(dt_max_loc,-m/l_ii); |
302 |
} |
303 |
} |
304 |
#pragma omp critical |
305 |
{ |
306 |
dt_max=MIN(dt_max,dt_max_loc); |
307 |
} |
308 |
} |
309 |
#ifdef PASO_MPI |
310 |
dt_max_loc = dt_max; |
311 |
MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm); |
312 |
#endif |
313 |
if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=1./(1.-fctp->theta); |
314 |
} |
315 |
if (dt_max <= 0.) { |
316 |
Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive."); |
317 |
} else { |
318 |
if (dt_max<LARGE_POSITIVE_FLOAT) |
319 |
printf("maximum time step size is %e (theta = %e).\n",dt_max,fctp->theta); |
320 |
} |
321 |
fctp->dt_max=dt_max; |
322 |
fctp->valid_matrices=Paso_noError(); |
323 |
} |
324 |
return fctp->dt_max; |
325 |
} |
326 |
void Paso_FCT_setUp(Paso_FCTransportProblem* fctp, const double dt, const double *sourceN, const double *sourceP, double* b, double* uTilde, |
327 |
Paso_Coupler* uTilde_coupler, double *QN, Paso_Coupler* QN_coupler, double *QP, Paso_Coupler* QP_coupler, |
328 |
Paso_Options* options, Paso_Performance* pp) |
329 |
{ |
330 |
dim_t i; |
331 |
const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix); |
332 |
double omega, factor; |
333 |
register double m, u_tilde_i, rtmp4; |
334 |
/* distribute u */ |
335 |
Paso_Coupler_startCollect(fctp->u_coupler,fctp->u); |
336 |
Paso_Coupler_finishCollect(fctp->u_coupler); |
337 |
/* |
338 |
* b^n[i]=m u^n[i] + dt*(1-theta) sum_{j <> i} l_{ij}*(u^n[j]-u^n[i]) + dt*sourceP[i] |
339 |
* |
340 |
* note that iteration_matrix stores the negative values of the |
341 |
* low order transport matrix l therefore a=-dt*(1-fctp->theta) is used. |
342 |
* |
343 |
*/ |
344 |
Paso_SolverFCT_setMuPaLuPbQ(b,fctp->lumped_mass_matrix,fctp->u_coupler, |
345 |
-dt*(1-fctp->theta),fctp->iteration_matrix,dt,sourceP); |
346 |
/* |
347 |
* uTilde[i]=b[i]/m[i] |
348 |
* |
349 |
* fctp->iteration_matrix[i,i]=m[i]/(dt theta) + \frac{1}{\theta} \frac{q^-[i]}-l[i,i] |
350 |
* |
351 |
*/ |
352 |
if (fctp->theta > 0) { |
353 |
Paso_solve_free(fctp->iteration_matrix); |
354 |
omega=1./(dt*fctp->theta); |
355 |
factor=dt*omega; |
356 |
#pragma omp parallel for private(i,m,u_tilde_i,rtmp4) |
357 |
for (i = 0; i < n; ++i) { |
358 |
m=fctp->lumped_mass_matrix[i]; |
359 |
if (ABS(m)>0.) { |
360 |
u_tilde_i=b[i]/m; |
361 |
} else { |
362 |
u_tilde_i=fctp->u[i]; |
363 |
} |
364 |
rtmp4=m*omega-fctp->main_diagonal_low_order_transport_matrix[i]; |
365 |
if (ABS(u_tilde_i)>0) rtmp4+=sourceN[i]*factor/u_tilde_i; |
366 |
fctp->iteration_matrix->mainBlock->val[fctp->main_iptr[i]]=rtmp4; |
367 |
uTilde[i]=u_tilde_i; |
368 |
} |
369 |
Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT); |
370 |
Paso_Solver_setPreconditioner(fctp->iteration_matrix,options); |
371 |
Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT); |
372 |
} else { |
373 |
#pragma omp parallel for private(i,m,u_tilde_i) |
374 |
for (i = 0; i < n; ++i) { |
375 |
m=fctp->lumped_mass_matrix[i]; |
376 |
if (ABS(m)>0.) { |
377 |
u_tilde_i=b[i]/m; |
378 |
} else { |
379 |
u_tilde_i=fctp->u[i]; |
380 |
} |
381 |
uTilde[i]=u_tilde_i; |
382 |
} |
383 |
/* no update of iteration_matrix required! */ |
384 |
} /* end (fctp->theta > 0) */ |
385 |
|
386 |
/* distribute uTilde: */ |
387 |
Paso_Coupler_startCollect(uTilde_coupler,uTilde); |
388 |
Paso_Coupler_finishCollect(uTilde_coupler); |
389 |
/* |
390 |
* calculate QP[i] max_{j} (\tilde{u}[j]- \tilde{u}[i] ) |
391 |
* QN[i] min_{j} (\tilde{u}[j]- \tilde{u}[i] ) |
392 |
* |
393 |
*/ |
394 |
Paso_SolverFCT_setQs(uTilde_coupler,QN,QP,fctp->iteration_matrix); |
395 |
Paso_Coupler_startCollect(QN_coupler,QN); |
396 |
Paso_Coupler_startCollect(QP_coupler,QP); |
397 |
Paso_Coupler_finishCollect(QN_coupler); |
398 |
Paso_Coupler_finishCollect(QP_coupler); |
399 |
} |