1 |
/* $Id:$ */ |
2 |
|
3 |
/******************************************************* |
4 |
* |
5 |
* Copyright 2007,2008 by University of Queensland |
6 |
* |
7 |
* http://esscc.uq.edu_m.au |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
/**************************************************************/ |
15 |
|
16 |
/* Paso: Flux correction transport solver |
17 |
* |
18 |
* solves Mu_t=Ku+q |
19 |
* u(0) >= u_min |
20 |
* |
21 |
* Warning: the program assums sum_{j} k_{ij}=0!!! |
22 |
* |
23 |
*/ |
24 |
/**************************************************************/ |
25 |
|
26 |
/* Author: l.gross@uq.edu_m.au */ |
27 |
|
28 |
/**************************************************************/ |
29 |
|
30 |
#include "Paso.h" |
31 |
#include "Solver.h" |
32 |
#include "SolverFCT.h" |
33 |
#include "escript/blocktimer.h" |
34 |
#ifdef _OPENMP |
35 |
#include <omp.h> |
36 |
#endif |
37 |
#ifdef PASO_MPI |
38 |
#include <mpi.h> |
39 |
#endif |
40 |
|
41 |
double Paso_FCTransportProblem_getSafeTimeStepSize(Paso_FCTransportProblem* fctp) |
42 |
{ |
43 |
dim_t i, n_rows; |
44 |
double dt_max, dt_max_loc; |
45 |
register double rtmp1,rtmp2; |
46 |
n_rows=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix); |
47 |
if (! fctp->valid_matrices) { |
48 |
fctp->dt_max=LARGE_POSITIVE_FLOAT; |
49 |
|
50 |
|
51 |
/* extract the row sum of the advective part */ |
52 |
Paso_SystemMatrix_rowSum(fctp->mass_matrix,fctp->lumped_mass_matrix); |
53 |
|
54 |
/* set low order transport operator */ |
55 |
Paso_FCTransportProblem_setLowOrderOperator(fctp); |
56 |
/* |
57 |
* calculate time step size: |
58 |
*/ |
59 |
dt_max=LARGE_POSITIVE_FLOAT; |
60 |
if (fctp->theta < 1.) { |
61 |
#pragma omp parallel private(dt_max_loc) |
62 |
{ |
63 |
dt_max_loc=LARGE_POSITIVE_FLOAT; |
64 |
#pragma omp for schedule(static) private(i,rtmp1,rtmp2) |
65 |
for (i=0;i<n_rows;++i) { |
66 |
rtmp1=fctp->main_diagonal_low_order_transport_matrix[i]; |
67 |
rtmp2=fctp->lumped_mass_matrix[i]; |
68 |
if ( (rtmp1<0 && rtmp2>=0.) || (rtmp1>0 && rtmp2<=0.) ) { |
69 |
dt_max_loc=MIN(dt_max_loc,-rtmp2/rtmp1); |
70 |
} |
71 |
} |
72 |
#pragma omp critical |
73 |
{ |
74 |
dt_max=MIN(dt_max,dt_max_loc); |
75 |
} |
76 |
} |
77 |
#ifdef PASO_MPI |
78 |
dt_max_loc = dt_max; |
79 |
MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm); |
80 |
#endif |
81 |
if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=1./(1.-fctp->theta); |
82 |
} |
83 |
if (dt_max <= 0.) { |
84 |
Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive."); |
85 |
} else { |
86 |
if (dt_max<LARGE_POSITIVE_FLOAT) printf("maximum time step size is %e (theta = %e).\n",dt_max,fctp->theta); |
87 |
} |
88 |
fctp->dt_max=dt_max; |
89 |
fctp->valid_matrices=Paso_noError(); |
90 |
} |
91 |
return fctp->dt_max; |
92 |
} |
93 |
|
94 |
|
95 |
|
96 |
|
97 |
void Paso_SolverFCT_solve(Paso_FCTransportProblem* fctp, double* u, double dt, double* source, Paso_Options* options) { |
98 |
|
99 |
index_t i, j; |
100 |
int n_substeps,n, m; |
101 |
double dt_max, omega, dt2,t; |
102 |
double local_norm[2],norm[2],local_norm_u,local_norm_du,norm_u,norm_du, tolerance; |
103 |
register double rtmp1,rtmp2,rtmp3,rtmp4, rtmp; |
104 |
double *b_n=NULL, *sourceP=NULL, *sourceN=NULL, *uTilde_n=NULL, *QN_n=NULL, *QP_n=NULL, *RN_m=NULL, *RP_m=NULL, *z_m=NULL, *du_m=NULL; |
105 |
Paso_SystemMatrix *flux_matrix=NULL; |
106 |
dim_t n_rows=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix); |
107 |
bool_t converged; |
108 |
if (dt<=0.) { |
109 |
Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive."); |
110 |
} |
111 |
dt_max=Paso_FCTransportProblem_getSafeTimeStepSize(fctp); |
112 |
/* |
113 |
* allocate memory |
114 |
* |
115 |
*/ |
116 |
Paso_SystemMatrix_allocBuffer(fctp->iteration_matrix); |
117 |
b_n=TMPMEMALLOC(n_rows,double); |
118 |
Paso_checkPtr(b_n); |
119 |
sourceP=TMPMEMALLOC(n_rows,double); |
120 |
Paso_checkPtr(sourceP); |
121 |
sourceN=TMPMEMALLOC(n_rows,double); |
122 |
Paso_checkPtr(sourceN); |
123 |
uTilde_n=TMPMEMALLOC(n_rows,double); |
124 |
Paso_checkPtr(uTilde_n); |
125 |
QN_n=TMPMEMALLOC(n_rows,double); |
126 |
Paso_checkPtr(QN_n); |
127 |
QP_n=TMPMEMALLOC(n_rows,double); |
128 |
Paso_checkPtr(QP_n); |
129 |
RN_m=TMPMEMALLOC(n_rows,double); |
130 |
Paso_checkPtr(RN_m); |
131 |
RP_m=TMPMEMALLOC(n_rows,double); |
132 |
Paso_checkPtr(RP_m); |
133 |
z_m=TMPMEMALLOC(n_rows,double); |
134 |
Paso_checkPtr(z_m); |
135 |
du_m=TMPMEMALLOC(n_rows,double); |
136 |
Paso_checkPtr(du_m); |
137 |
flux_matrix=Paso_SystemMatrix_alloc(fctp->transport_matrix->type, |
138 |
fctp->transport_matrix->pattern, |
139 |
fctp->transport_matrix->row_block_size, |
140 |
fctp->transport_matrix->col_block_size); |
141 |
if (Paso_noError()) { |
142 |
Paso_SystemMatrix_allocBuffer(flux_matrix); |
143 |
Paso_SystemMatrix_allocBuffer(fctp->iteration_matrix); |
144 |
/* |
145 |
* Preparation: |
146 |
* |
147 |
*/ |
148 |
|
149 |
/* decide on substepping */ |
150 |
if (fctp->dt_max < LARGE_POSITIVE_FLOAT) { |
151 |
n_substeps=ceil(dt/dt_max); |
152 |
} else { |
153 |
n_substeps=1; |
154 |
} |
155 |
dt2=dt/n_substeps; |
156 |
printf("%d time steps of size is %e (theta = %e, dt_max=%e).\n",n_substeps, dt2,fctp->theta, dt_max); |
157 |
/* |
158 |
* seperate source into positive and negative part: |
159 |
*/ |
160 |
#pragma omp parallel for private(i,rtmp) |
161 |
for (i = 0; i < n_rows; ++i) { |
162 |
rtmp=source[i]; |
163 |
if (rtmp <0) { |
164 |
sourceN[i]=-rtmp; |
165 |
sourceP[i]=0; |
166 |
} else { |
167 |
sourceN[i]= 0; |
168 |
sourceP[i]= rtmp; |
169 |
} |
170 |
} |
171 |
/* for (i = 0; i < n_rows; ++i) printf("%d : %e \n",i,source[i],sourceP[i],sourceN[i]) */ |
172 |
/* |
173 |
* let the show begin!!!! |
174 |
* |
175 |
*/ |
176 |
t=dt2; |
177 |
n=0; |
178 |
tolerance=options->tolerance; |
179 |
while(n<n_substeps && Paso_noError()) { |
180 |
printf("substep step %d at t=%e\n",n+1,t); |
181 |
#pragma omp parallel for private(i) |
182 |
for (i = 0; i < n_rows; ++i) { |
183 |
u[i]=fctp->u[i]; |
184 |
} |
185 |
/* |
186 |
* b^n[i]=m u^n[i] + dt2*(1-theta) sum_{j <> i} l_{ij}*(u^n[j]-u^n[i]) + dt2*sourceP[i] |
187 |
* |
188 |
* note that iteration_matrix stores the negative values of the |
189 |
* low order transport matrix l therefore a=-dt2*(1-fctp->theta) is used. |
190 |
* |
191 |
*/ |
192 |
Paso_SolverFCT_setMuPaLuPbQ(b_n,fctp->lumped_mass_matrix,u, |
193 |
-dt2*(1-fctp->theta),fctp->iteration_matrix,dt2,sourceP); |
194 |
/* |
195 |
* uTilde_n[i]=b[i]/m[i] |
196 |
* |
197 |
* a[i,i]=m[i]/(dt2 theta) + \frac{1}{\theta} \frac{q^-[i]}-l[i,i] |
198 |
* |
199 |
*/ |
200 |
if (fctp->theta > 0) { |
201 |
Paso_solve_free(fctp->iteration_matrix); |
202 |
omega=1./(dt2*fctp->theta); |
203 |
rtmp2=dt2*omega; |
204 |
#pragma omp parallel for private(i,rtmp,rtmp3,rtmp4) |
205 |
for (i = 0; i < n_rows; ++i) { |
206 |
rtmp=fctp->lumped_mass_matrix[i]; |
207 |
if (ABS(rtmp)>0.) { |
208 |
rtmp3=b_n[i]/rtmp; |
209 |
} else { |
210 |
rtmp3=u[i]; |
211 |
} |
212 |
rtmp4=rtmp*omega-fctp->main_diagonal_low_order_transport_matrix[i]; |
213 |
if (ABS(rtmp3)>0) rtmp4+=sourceN[i]*rtmp2/rtmp3; |
214 |
fctp->iteration_matrix->mainBlock->val[fctp->main_iptr[i]]=rtmp4; |
215 |
uTilde_n[i]=rtmp3; |
216 |
} |
217 |
} else { |
218 |
#pragma omp parallel for private(i,rtmp,rtmp3) |
219 |
for (i = 0; i < n_rows; ++i) { |
220 |
rtmp=fctp->lumped_mass_matrix[i]; |
221 |
if (ABS(rtmp)>0.) { |
222 |
rtmp3=b_n[i]/rtmp; |
223 |
} else { |
224 |
rtmp3=u[i]; |
225 |
} |
226 |
uTilde_n[i]=rtmp3; |
227 |
} |
228 |
omega=1.; |
229 |
/* no update of iteration_matrix retquired! */ |
230 |
} /* end (fctp->theta > 0) */ |
231 |
/* |
232 |
* calculate QP_n[i] max_{j} (\tilde{u}[j]- \tilde{u}[i] ) |
233 |
* QN_n[i] min_{j} (\tilde{u}[j]- \tilde{u}[i] ) |
234 |
* |
235 |
*/ |
236 |
Paso_SolverFCT_setQs(uTilde_n,QN_n,QP_n,fctp->iteration_matrix); |
237 |
/* |
238 |
* now we enter the mation on a time-step: |
239 |
* |
240 |
*/ |
241 |
m=0; |
242 |
converged=FALSE; |
243 |
while ( (!converged) && (m<500) && Paso_noError()) { |
244 |
printf("iteration step %d\n",m+1); |
245 |
/* |
246 |
* set the ant diffusion fluxes: |
247 |
* |
248 |
*/ |
249 |
Paso_FCTransportProblem_setAntiDiffusionFlux(dt2,fctp,flux_matrix,u,fctp->u); |
250 |
/* |
251 |
* apply pre flux-correction: f_{ij}:=0 if f_{ij}*(\tilde{u}[i]- \tilde{u}[j])<=0 |
252 |
* |
253 |
* this is not entirely correct!!!!! |
254 |
* |
255 |
*/ |
256 |
Paso_FCTransportProblem_applyPreAntiDiffusionCorrection(flux_matrix,uTilde_n); |
257 |
/* |
258 |
* set flux limms RN_m,RP_m |
259 |
* |
260 |
*/ |
261 |
Paso_FCTransportProblem_setRs(flux_matrix,fctp->lumped_mass_matrix,QN_n,QP_n,RN_m,RP_m); |
262 |
/* |
263 |
* z_m[i]=b_n[i] - (m_i*u[i] - dt2*theta*sum_{j<>i} l_{ij} (u[j]-u[i]) + dt2 q^-[i]) |
264 |
* |
265 |
* note that iteration_matrix stores the negative values of the |
266 |
* low order transport matrix l therefore a=dt2*fctp->theta is used. |
267 |
*/ |
268 |
|
269 |
Paso_SolverFCT_setMuPaLuPbQ(z_m,fctp->lumped_mass_matrix,u, |
270 |
dt2*fctp->theta,fctp->iteration_matrix,dt2,sourceN); |
271 |
#pragma omp parallel for private(i) |
272 |
for (i = 0; i < n_rows; ++i) z_m[i]=b_n[i]-z_m[i]; |
273 |
|
274 |
/* add corrected fluxes into z_m */ |
275 |
Paso_FCTransportProblem_addCorrectedFluxes(z_m,flux_matrix,RN_m,RP_m); |
276 |
/* |
277 |
* now we solve the linear system to get the correction dt2: |
278 |
* |
279 |
*/ |
280 |
if (fctp->theta > 0) { |
281 |
/* set the right hand side of the linear system: */ |
282 |
options->tolerance=1.e-2; |
283 |
Paso_solve(fctp->iteration_matrix,du_m,z_m,options); |
284 |
/* TODO: check errors ! */ |
285 |
} else { |
286 |
#pragma omp parallel for private(i,rtmp,rtmp1) |
287 |
for (i = 0; i < n_rows; ++i) { |
288 |
rtmp=fctp->lumped_mass_matrix[i]; |
289 |
if (ABS(rtmp)>0.) { |
290 |
rtmp1=z_m[i]/rtmp; |
291 |
} else { |
292 |
rtmp1=0; |
293 |
} |
294 |
du_m[i]=rtmp1; |
295 |
} |
296 |
} |
297 |
/* |
298 |
* update u and calculate norm of du_m and the new u: |
299 |
* |
300 |
*/ |
301 |
norm_u=0.; |
302 |
norm_du=0.; |
303 |
#pragma omp parallel private(local_norm_u,local_norm_du) |
304 |
{ |
305 |
local_norm_u=0.; |
306 |
local_norm_du=0.; |
307 |
#pragma omp for schedule(static) private(i) |
308 |
for (i=0;i<n_rows;++i) { |
309 |
u[i]+=omega*du_m[i]; |
310 |
local_norm_u=MAX(local_norm_u,ABS(u[i])); |
311 |
local_norm_du=MAX(local_norm_du,ABS(du_m[i])); |
312 |
} |
313 |
#pragma omp critical |
314 |
{ |
315 |
norm_u=MAX(norm_u,local_norm_u); |
316 |
norm_du=MAX(norm_du,local_norm_du); |
317 |
} |
318 |
} |
319 |
#ifdef PASO_MPI |
320 |
local_norm[0]=norm_u; |
321 |
local_norm[1]=norm_du; |
322 |
MPI_Allreduce(local_norm,norm, 2, MPI_DOUBLE, MPI_MAX, fctp->mpi_info->comm); |
323 |
norm_u=norm[0]; |
324 |
norm_du=norm[1]; |
325 |
#endif |
326 |
norm_du*=omega; |
327 |
converged=(norm_du <= tolerance * norm_u); |
328 |
m++; |
329 |
printf("iteration step %d: norm u and du_m : %e %e\n",m,norm_u,norm_du); |
330 |
/* TODO: check if du_m has been redu_mced */ |
331 |
} /* end of inner iteration */ |
332 |
#pragma omp parallel for schedule(static) private(i) |
333 |
for (i=0;i<n_rows;++i) fctp->u[i]=u[i]; |
334 |
n++; |
335 |
} /* end of time integration */ |
336 |
#pragma omp parallel for schedule(static) private(i) |
337 |
for (i=0;i<n_rows;++i) u[i]=fctp->u[i]+fctp->u_min; |
338 |
/* TODO: update u_min ? */ |
339 |
|
340 |
} |
341 |
/* |
342 |
* clean-up: |
343 |
* |
344 |
*/ |
345 |
TMPMEMFREE(b_n); |
346 |
Paso_SystemMatrix_free(flux_matrix); |
347 |
TMPMEMFREE(sourceP); |
348 |
TMPMEMFREE(sourceN); |
349 |
TMPMEMFREE(uTilde_n); |
350 |
TMPMEMFREE(QN_n); |
351 |
TMPMEMFREE(QP_n); |
352 |
TMPMEMFREE(RN_m); |
353 |
TMPMEMFREE(RP_m); |
354 |
TMPMEMFREE(z_m); |
355 |
TMPMEMFREE(du_m); |
356 |
} |