1 |
/* $Id:$ */ |
2 |
|
3 |
/******************************************************* |
4 |
* |
5 |
* Copyright 2007,2008 by University of Queensland |
6 |
* |
7 |
* http://esscc.uq.edu_m.au |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
/**************************************************************/ |
15 |
|
16 |
/* Paso: Flux correction transport solver |
17 |
* |
18 |
* solves Mu_t=Ku+q |
19 |
* u(0) >= u_min |
20 |
* |
21 |
* Warning: the program assums sum_{j} k_{ij}=0!!! |
22 |
* |
23 |
*/ |
24 |
/**************************************************************/ |
25 |
|
26 |
/* Author: l.gross@uq.edu_m.au */ |
27 |
|
28 |
/**************************************************************/ |
29 |
|
30 |
#include "Paso.h" |
31 |
#include "Solver.h" |
32 |
#include "SolverFCT.h" |
33 |
#include "escript/blocktimer.h" |
34 |
#ifdef _OPENMP |
35 |
#include <omp.h> |
36 |
#endif |
37 |
#ifdef PASO_MPI |
38 |
#include <mpi.h> |
39 |
#endif |
40 |
|
41 |
double Paso_FCTransportProblem_getSafeTimeStepSize(Paso_FCTransportProblem* fctp) |
42 |
{ |
43 |
dim_t i, n_rows; |
44 |
double dt_max, dt_max_loc; |
45 |
register double rtmp1,rtmp2; |
46 |
n_rows=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix); |
47 |
if (! fctp->valid_matrices) { |
48 |
fctp->dt_max=LARGE_POSITIVE_FLOAT; |
49 |
/* extract the row sum of the advective part */ |
50 |
Paso_SystemMatrix_rowSum(fctp->mass_matrix,fctp->lumped_mass_matrix); |
51 |
|
52 |
/* set low order transport operator */ |
53 |
Paso_FCTransportProblem_setLowOrderOperator(fctp); |
54 |
/* |
55 |
* calculate time step size: |
56 |
*/ |
57 |
dt_max=LARGE_POSITIVE_FLOAT; |
58 |
if (fctp->theta < 1.) { |
59 |
#pragma omp parallel private(dt_max_loc) |
60 |
{ |
61 |
dt_max_loc=LARGE_POSITIVE_FLOAT; |
62 |
#pragma omp for schedule(static) private(i,rtmp1,rtmp2) |
63 |
for (i=0;i<n_rows;++i) { |
64 |
rtmp1=fctp->main_diagonal_low_order_transport_matrix[i]; |
65 |
rtmp2=fctp->lumped_mass_matrix[i]; |
66 |
if ( (rtmp1<0 && rtmp2>=0.) || (rtmp1>0 && rtmp2<=0.) ) { |
67 |
dt_max_loc=MIN(dt_max_loc,-rtmp2/rtmp1); |
68 |
} |
69 |
} |
70 |
#pragma omp critical |
71 |
{ |
72 |
dt_max=MIN(dt_max,dt_max_loc); |
73 |
} |
74 |
} |
75 |
#ifdef PASO_MPI |
76 |
dt_max_loc = dt_max; |
77 |
MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm); |
78 |
#endif |
79 |
if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=1./(1.-fctp->theta); |
80 |
} |
81 |
if (dt_max <= 0.) { |
82 |
Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive."); |
83 |
} else { |
84 |
if (dt_max<LARGE_POSITIVE_FLOAT) printf("maximum time step size is %e (theta = %e).\n",dt_max,fctp->theta); |
85 |
} |
86 |
fctp->dt_max=dt_max; |
87 |
fctp->valid_matrices=Paso_noError(); |
88 |
} |
89 |
return fctp->dt_max; |
90 |
} |
91 |
|
92 |
|
93 |
|
94 |
|
95 |
void Paso_SolverFCT_solve(Paso_FCTransportProblem* fctp, double* u, double dt, double* source, Paso_Options* options) { |
96 |
|
97 |
index_t i; |
98 |
long n_substeps,n, m; |
99 |
double dt_max, omega, dt2,t; |
100 |
double local_norm_u,local_norm_du,norm_u,norm_du, tolerance; |
101 |
register double rtmp1,rtmp2,rtmp3,rtmp4, rtmp; |
102 |
double *b_n=NULL, *sourceP=NULL, *sourceN=NULL, *uTilde_n=NULL, *QN_n=NULL, *QP_n=NULL, *RN_m=NULL, *RP_m=NULL, *z_m=NULL, *du_m=NULL; |
103 |
Paso_SystemMatrix *flux_matrix=NULL; |
104 |
dim_t n_rows=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix); |
105 |
bool_t converged; |
106 |
#ifdef PASO_MPI |
107 |
double local_norm[2],norm[2]; |
108 |
#endif |
109 |
|
110 |
if (dt<=0.) { |
111 |
Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive."); |
112 |
} |
113 |
dt_max=Paso_FCTransportProblem_getSafeTimeStepSize(fctp); |
114 |
/* |
115 |
* allocate memory |
116 |
* |
117 |
*/ |
118 |
Paso_SystemMatrix_allocBuffer(fctp->iteration_matrix); |
119 |
b_n=TMPMEMALLOC(n_rows,double); |
120 |
Paso_checkPtr(b_n); |
121 |
sourceP=TMPMEMALLOC(n_rows,double); |
122 |
Paso_checkPtr(sourceP); |
123 |
sourceN=TMPMEMALLOC(n_rows,double); |
124 |
Paso_checkPtr(sourceN); |
125 |
uTilde_n=TMPMEMALLOC(n_rows,double); |
126 |
Paso_checkPtr(uTilde_n); |
127 |
QN_n=TMPMEMALLOC(n_rows,double); |
128 |
Paso_checkPtr(QN_n); |
129 |
QP_n=TMPMEMALLOC(n_rows,double); |
130 |
Paso_checkPtr(QP_n); |
131 |
RN_m=TMPMEMALLOC(n_rows,double); |
132 |
Paso_checkPtr(RN_m); |
133 |
RP_m=TMPMEMALLOC(n_rows,double); |
134 |
Paso_checkPtr(RP_m); |
135 |
z_m=TMPMEMALLOC(n_rows,double); |
136 |
Paso_checkPtr(z_m); |
137 |
du_m=TMPMEMALLOC(n_rows,double); |
138 |
Paso_checkPtr(du_m); |
139 |
flux_matrix=Paso_SystemMatrix_alloc(fctp->transport_matrix->type, |
140 |
fctp->transport_matrix->pattern, |
141 |
fctp->transport_matrix->row_block_size, |
142 |
fctp->transport_matrix->col_block_size); |
143 |
if (Paso_noError()) { |
144 |
Paso_SystemMatrix_allocBuffer(flux_matrix); |
145 |
Paso_SystemMatrix_allocBuffer(fctp->iteration_matrix); |
146 |
/* |
147 |
* Preparation: |
148 |
* |
149 |
*/ |
150 |
|
151 |
/* decide on substepping */ |
152 |
if (fctp->dt_max < LARGE_POSITIVE_FLOAT) { |
153 |
n_substeps=(long)ceil(dt/dt_max); |
154 |
} else { |
155 |
n_substeps=1; |
156 |
} |
157 |
dt2=dt/n_substeps; |
158 |
printf("%d time steps of size is %e (theta = %e, dt_max=%e).\n",n_substeps, dt2,fctp->theta, dt_max); |
159 |
/* |
160 |
* seperate source into positive and negative part: |
161 |
*/ |
162 |
#pragma omp parallel for private(i,rtmp) |
163 |
for (i = 0; i < n_rows; ++i) { |
164 |
rtmp=source[i]; |
165 |
if (rtmp <0) { |
166 |
sourceN[i]=-rtmp; |
167 |
sourceP[i]=0; |
168 |
} else { |
169 |
sourceN[i]= 0; |
170 |
sourceP[i]= rtmp; |
171 |
} |
172 |
} |
173 |
/* for (i = 0; i < n_rows; ++i) printf("%d : %e \n",i,source[i],sourceP[i],sourceN[i]) */ |
174 |
/* |
175 |
* let the show begin!!!! |
176 |
* |
177 |
*/ |
178 |
t=dt2; |
179 |
n=0; |
180 |
tolerance=options->tolerance; |
181 |
while(n<n_substeps && Paso_noError()) { |
182 |
printf("substep step %d at t=%e\n",n+1,t); |
183 |
#pragma omp parallel for private(i) |
184 |
for (i = 0; i < n_rows; ++i) { |
185 |
u[i]=fctp->u[i]; |
186 |
} |
187 |
/* |
188 |
* b^n[i]=m u^n[i] + dt2*(1-theta) sum_{j <> i} l_{ij}*(u^n[j]-u^n[i]) + dt2*sourceP[i] |
189 |
* |
190 |
* note that iteration_matrix stores the negative values of the |
191 |
* low order transport matrix l therefore a=-dt2*(1-fctp->theta) is used. |
192 |
* |
193 |
*/ |
194 |
Paso_SolverFCT_setMuPaLuPbQ(b_n,fctp->lumped_mass_matrix,u, |
195 |
-dt2*(1-fctp->theta),fctp->iteration_matrix,dt2,sourceP); |
196 |
/* |
197 |
* uTilde_n[i]=b[i]/m[i] |
198 |
* |
199 |
* a[i,i]=m[i]/(dt2 theta) + \frac{1}{\theta} \frac{q^-[i]}-l[i,i] |
200 |
* |
201 |
*/ |
202 |
if (fctp->theta > 0) { |
203 |
Paso_solve_free(fctp->iteration_matrix); |
204 |
omega=1./(dt2*fctp->theta); |
205 |
rtmp2=dt2*omega; |
206 |
#pragma omp parallel for private(i,rtmp,rtmp3,rtmp4) |
207 |
for (i = 0; i < n_rows; ++i) { |
208 |
rtmp=fctp->lumped_mass_matrix[i]; |
209 |
if (ABS(rtmp)>0.) { |
210 |
rtmp3=b_n[i]/rtmp; |
211 |
} else { |
212 |
rtmp3=u[i]; |
213 |
} |
214 |
rtmp4=rtmp*omega-fctp->main_diagonal_low_order_transport_matrix[i]; |
215 |
if (ABS(rtmp3)>0) rtmp4+=sourceN[i]*rtmp2/rtmp3; |
216 |
fctp->iteration_matrix->mainBlock->val[fctp->main_iptr[i]]=rtmp4; |
217 |
uTilde_n[i]=rtmp3; |
218 |
} |
219 |
} else { |
220 |
#pragma omp parallel for private(i,rtmp,rtmp3) |
221 |
for (i = 0; i < n_rows; ++i) { |
222 |
rtmp=fctp->lumped_mass_matrix[i]; |
223 |
if (ABS(rtmp)>0.) { |
224 |
rtmp3=b_n[i]/rtmp; |
225 |
} else { |
226 |
rtmp3=u[i]; |
227 |
} |
228 |
uTilde_n[i]=rtmp3; |
229 |
} |
230 |
omega=1.; |
231 |
/* no update of iteration_matrix retquired! */ |
232 |
} /* end (fctp->theta > 0) */ |
233 |
/* |
234 |
* calculate QP_n[i] max_{j} (\tilde{u}[j]- \tilde{u}[i] ) |
235 |
* QN_n[i] min_{j} (\tilde{u}[j]- \tilde{u}[i] ) |
236 |
* |
237 |
*/ |
238 |
Paso_SolverFCT_setQs(uTilde_n,QN_n,QP_n,fctp->iteration_matrix); |
239 |
/* |
240 |
* now we enter the mation on a time-step: |
241 |
* |
242 |
*/ |
243 |
m=0; |
244 |
converged=FALSE; |
245 |
while ( (!converged) && (m<500) && Paso_noError()) { |
246 |
printf("iteration step %d\n",m+1); |
247 |
/* |
248 |
* set the ant diffusion fluxes: |
249 |
* |
250 |
*/ |
251 |
Paso_FCTransportProblem_setAntiDiffusionFlux(dt2,fctp,flux_matrix,u,fctp->u); |
252 |
/* |
253 |
* apply pre flux-correction: f_{ij}:=0 if f_{ij}*(\tilde{u}[i]- \tilde{u}[j])<=0 |
254 |
* |
255 |
* this is not entirely correct!!!!! |
256 |
* |
257 |
*/ |
258 |
Paso_FCTransportProblem_applyPreAntiDiffusionCorrection(flux_matrix,uTilde_n); |
259 |
/* |
260 |
* set flux limms RN_m,RP_m |
261 |
* |
262 |
*/ |
263 |
Paso_FCTransportProblem_setRs(flux_matrix,fctp->lumped_mass_matrix,QN_n,QP_n,RN_m,RP_m); |
264 |
/* |
265 |
* z_m[i]=b_n[i] - (m_i*u[i] - dt2*theta*sum_{j<>i} l_{ij} (u[j]-u[i]) + dt2 q^-[i]) |
266 |
* |
267 |
* note that iteration_matrix stores the negative values of the |
268 |
* low order transport matrix l therefore a=dt2*fctp->theta is used. |
269 |
*/ |
270 |
|
271 |
Paso_SolverFCT_setMuPaLuPbQ(z_m,fctp->lumped_mass_matrix,u, |
272 |
dt2*fctp->theta,fctp->iteration_matrix,dt2,sourceN); |
273 |
#pragma omp parallel for private(i) |
274 |
for (i = 0; i < n_rows; ++i) z_m[i]=b_n[i]-z_m[i]; |
275 |
|
276 |
/* add corrected fluxes into z_m */ |
277 |
Paso_FCTransportProblem_addCorrectedFluxes(z_m,flux_matrix,RN_m,RP_m); |
278 |
/* |
279 |
* now we solve the linear system to get the correction dt2: |
280 |
* |
281 |
*/ |
282 |
if (fctp->theta > 0) { |
283 |
/* set the right hand side of the linear system: */ |
284 |
options->tolerance=1.e-2; |
285 |
Paso_solve(fctp->iteration_matrix,du_m,z_m,options); |
286 |
/* TODO: check errors ! */ |
287 |
} else { |
288 |
#pragma omp parallel for private(i,rtmp,rtmp1) |
289 |
for (i = 0; i < n_rows; ++i) { |
290 |
rtmp=fctp->lumped_mass_matrix[i]; |
291 |
if (ABS(rtmp)>0.) { |
292 |
rtmp1=z_m[i]/rtmp; |
293 |
} else { |
294 |
rtmp1=0; |
295 |
} |
296 |
du_m[i]=rtmp1; |
297 |
} |
298 |
} |
299 |
/* |
300 |
* update u and calculate norm of du_m and the new u: |
301 |
* |
302 |
*/ |
303 |
norm_u=0.; |
304 |
norm_du=0.; |
305 |
#pragma omp parallel private(local_norm_u,local_norm_du) |
306 |
{ |
307 |
local_norm_u=0.; |
308 |
local_norm_du=0.; |
309 |
#pragma omp for schedule(static) private(i) |
310 |
for (i=0;i<n_rows;++i) { |
311 |
u[i]+=omega*du_m[i]; |
312 |
local_norm_u=MAX(local_norm_u,ABS(u[i])); |
313 |
local_norm_du=MAX(local_norm_du,ABS(du_m[i])); |
314 |
} |
315 |
#pragma omp critical |
316 |
{ |
317 |
norm_u=MAX(norm_u,local_norm_u); |
318 |
norm_du=MAX(norm_du,local_norm_du); |
319 |
} |
320 |
} |
321 |
#ifdef PASO_MPI |
322 |
local_norm[0]=norm_u; |
323 |
local_norm[1]=norm_du; |
324 |
MPI_Allreduce(local_norm,norm, 2, MPI_DOUBLE, MPI_MAX, fctp->mpi_info->comm); |
325 |
norm_u=norm[0]; |
326 |
norm_du=norm[1]; |
327 |
#endif |
328 |
norm_du*=omega; |
329 |
converged=(norm_du <= tolerance * norm_u); |
330 |
m++; |
331 |
printf("iteration step %d: norm u and du_m : %e %e\n",m,norm_u,norm_du); |
332 |
/* TODO: check if du_m has been redu_mced */ |
333 |
} /* end of inner iteration */ |
334 |
#pragma omp parallel for schedule(static) private(i) |
335 |
for (i=0;i<n_rows;++i) fctp->u[i]=u[i]; |
336 |
n++; |
337 |
} /* end of time integration */ |
338 |
#pragma omp parallel for schedule(static) private(i) |
339 |
for (i=0;i<n_rows;++i) u[i]=fctp->u[i]+fctp->u_min; |
340 |
/* TODO: update u_min ? */ |
341 |
|
342 |
} |
343 |
/* |
344 |
* clean-up: |
345 |
* |
346 |
*/ |
347 |
TMPMEMFREE(b_n); |
348 |
Paso_SystemMatrix_free(flux_matrix); |
349 |
TMPMEMFREE(sourceP); |
350 |
TMPMEMFREE(sourceN); |
351 |
TMPMEMFREE(uTilde_n); |
352 |
TMPMEMFREE(QN_n); |
353 |
TMPMEMFREE(QP_n); |
354 |
TMPMEMFREE(RN_m); |
355 |
TMPMEMFREE(RP_m); |
356 |
TMPMEMFREE(z_m); |
357 |
TMPMEMFREE(du_m); |
358 |
} |