1 |
/* $Id:$ */ |
2 |
|
3 |
/******************************************************* |
4 |
* |
5 |
* Copyright 2007,2008 by University of Queensland |
6 |
* |
7 |
* http://esscc.uq.edu_m.au |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
/**************************************************************/ |
15 |
|
16 |
/* Paso: Flux correction transport solver |
17 |
* |
18 |
* solves Mu_t=Ku+q |
19 |
* u(0) >=0 |
20 |
* |
21 |
* Warning: the program assums sum_{j} k_{ij}=0!!! |
22 |
* |
23 |
*/ |
24 |
/**************************************************************/ |
25 |
|
26 |
/* Author: l.gross@uq.edu_m.au */ |
27 |
|
28 |
/**************************************************************/ |
29 |
|
30 |
#include "Paso.h" |
31 |
#include "Solver.h" |
32 |
#include "SolverFCT.h" |
33 |
#include "escript/blocktimer.h" |
34 |
#ifdef _OPENMP |
35 |
#include <omp.h> |
36 |
#endif |
37 |
#ifdef PASO_MPI |
38 |
#include <mpi.h> |
39 |
#endif |
40 |
|
41 |
double Paso_FCTransportProblem_getSafeTimeStepSize(Paso_FCTransportProblem* fctp) |
42 |
{ |
43 |
dim_t i, n_rows; |
44 |
double dt_max, dt_max_loc; |
45 |
register double rtmp1,rtmp2; |
46 |
n_rows=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix); |
47 |
if (! fctp->valid_matrices) { |
48 |
fctp->dt_max=LARGE_POSITIVE_FLOAT; |
49 |
/* extract the row sum of the advective part */ |
50 |
Paso_SystemMatrix_rowSum(fctp->mass_matrix,fctp->lumped_mass_matrix); |
51 |
|
52 |
/* set low order transport operator */ |
53 |
Paso_FCTransportProblem_setLowOrderOperator(fctp); |
54 |
/* |
55 |
* calculate time step size: |
56 |
*/ |
57 |
dt_max=LARGE_POSITIVE_FLOAT; |
58 |
if (fctp->theta < 1.) { |
59 |
#pragma omp parallel private(dt_max_loc) |
60 |
{ |
61 |
dt_max_loc=LARGE_POSITIVE_FLOAT; |
62 |
#pragma omp for schedule(static) private(i,rtmp1,rtmp2) |
63 |
for (i=0;i<n_rows;++i) { |
64 |
rtmp1=fctp->main_diagonal_low_order_transport_matrix[i]; |
65 |
rtmp2=fctp->lumped_mass_matrix[i]; |
66 |
if ( (rtmp1<0 && rtmp2>0.) || (rtmp1>0 && rtmp2<0) ) { |
67 |
dt_max_loc=MIN(dt_max_loc,-rtmp2/rtmp1); |
68 |
} |
69 |
} |
70 |
#pragma omp critical |
71 |
{ |
72 |
dt_max=MIN(dt_max,dt_max_loc); |
73 |
} |
74 |
} |
75 |
#ifdef PASO_MPI |
76 |
dt_max_loc = dt_max; |
77 |
MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm); |
78 |
#endif |
79 |
if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=1./(1.-fctp->theta); |
80 |
} |
81 |
if (dt_max <= 0.) { |
82 |
Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive."); |
83 |
} else { |
84 |
if (dt_max<LARGE_POSITIVE_FLOAT) printf("maximum time step size is %e (theta = %e).\n",dt_max,fctp->theta); |
85 |
} |
86 |
fctp->dt_max=dt_max; |
87 |
fctp->valid_matrices=Paso_noError(); |
88 |
} |
89 |
return fctp->dt_max; |
90 |
} |
91 |
|
92 |
|
93 |
|
94 |
|
95 |
void Paso_SolverFCT_solve(Paso_FCTransportProblem* fctp, double* u, double dt, double* source, Paso_Options* options) { |
96 |
index_t i; |
97 |
long n_substeps,n, m; |
98 |
double dt_max, omega, dt2,t; |
99 |
double local_norm_u,local_norm_du,norm_u,norm_du, tolerance; |
100 |
register double rtmp1,rtmp2,rtmp3,rtmp4, rtmp; |
101 |
double *b_n=NULL, *sourceP=NULL, *sourceN=NULL, *uTilde_n=NULL, *QN_n=NULL, *QP_n=NULL, *RN_m=NULL, *RP_m=NULL, *z_m=NULL, *du_m=NULL,*u_m=NULL; |
102 |
Paso_Coupler* QN_n_coupler=NULL, *QP_n_coupler=NULL, *uTilde_n_coupler=NULL, *RN_m_coupler=NULL, *RP_m_coupler=NULL, *u_m_coupler=NULL; |
103 |
Paso_SystemMatrix *flux_matrix=NULL; |
104 |
dim_t n_rows=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix); |
105 |
bool_t converged; |
106 |
dim_t max_m=500; |
107 |
double inner_tol=1.e-2; |
108 |
dim_t blockSize=Paso_FCTransportProblem_getBlockSize(fctp); |
109 |
if (dt<=0.) { |
110 |
Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive."); |
111 |
} |
112 |
dt_max=Paso_FCTransportProblem_getSafeTimeStepSize(fctp); |
113 |
/* |
114 |
* allocate memory |
115 |
* |
116 |
*/ |
117 |
u_m=TMPMEMALLOC(n_rows,double); |
118 |
Paso_checkPtr(u_m); |
119 |
b_n=TMPMEMALLOC(n_rows,double); |
120 |
Paso_checkPtr(b_n); |
121 |
sourceP=TMPMEMALLOC(n_rows,double); |
122 |
Paso_checkPtr(sourceP); |
123 |
sourceN=TMPMEMALLOC(n_rows,double); |
124 |
Paso_checkPtr(sourceN); |
125 |
uTilde_n=TMPMEMALLOC(n_rows,double); |
126 |
Paso_checkPtr(uTilde_n); |
127 |
QN_n=TMPMEMALLOC(n_rows,double); |
128 |
Paso_checkPtr(QN_n); |
129 |
QP_n=TMPMEMALLOC(n_rows,double); |
130 |
Paso_checkPtr(QP_n); |
131 |
RN_m=TMPMEMALLOC(n_rows,double); |
132 |
Paso_checkPtr(RN_m); |
133 |
RP_m=TMPMEMALLOC(n_rows,double); |
134 |
Paso_checkPtr(RP_m); |
135 |
z_m=TMPMEMALLOC(n_rows,double); |
136 |
Paso_checkPtr(z_m); |
137 |
du_m=TMPMEMALLOC(n_rows,double); |
138 |
Paso_checkPtr(du_m); |
139 |
QN_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); |
140 |
QP_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); |
141 |
RN_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); |
142 |
RP_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); |
143 |
uTilde_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); |
144 |
u_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); |
145 |
flux_matrix=Paso_SystemMatrix_alloc(fctp->transport_matrix->type, |
146 |
fctp->transport_matrix->pattern, |
147 |
fctp->transport_matrix->row_block_size, |
148 |
fctp->transport_matrix->col_block_size); |
149 |
if (Paso_noError()) { |
150 |
#ifdef PASO_MPI |
151 |
double local_norm[2], norm[2]; |
152 |
#endif |
153 |
/* |
154 |
* Preparation: |
155 |
* |
156 |
*/ |
157 |
|
158 |
/* decide on substepping */ |
159 |
if (fctp->dt_max < LARGE_POSITIVE_FLOAT) { |
160 |
n_substeps=(long)ceil(dt/dt_max); |
161 |
} else { |
162 |
n_substeps=1; |
163 |
} |
164 |
dt2=dt/n_substeps; |
165 |
printf("%ld time steps of size is %e (theta = %e, dt_max=%e).\n",n_substeps, dt2,fctp->theta, dt_max); |
166 |
/* |
167 |
* seperate source into positive and negative part: |
168 |
*/ |
169 |
#pragma omp parallel for private(i,rtmp) |
170 |
for (i = 0; i < n_rows; ++i) { |
171 |
rtmp=source[i]; |
172 |
if (rtmp <0) { |
173 |
sourceN[i]=-rtmp; |
174 |
sourceP[i]=0; |
175 |
} else { |
176 |
sourceN[i]= 0; |
177 |
sourceP[i]= rtmp; |
178 |
} |
179 |
} |
180 |
u_m_coupler->data=u_m; |
181 |
/* for (i = 0; i < n_rows; ++i) printf("%d : %e \n",i,source[i],sourceP[i],sourceN[i]) */ |
182 |
/* |
183 |
* let the show begin!!!! |
184 |
* |
185 |
*/ |
186 |
t=dt2; |
187 |
n=0; |
188 |
tolerance=options->tolerance; |
189 |
while(n<n_substeps && Paso_noError()) { |
190 |
|
191 |
printf("substep step %ld at t=%e\n",n+1,t); |
192 |
/* |
193 |
* b^n[i]=m u^n[i] + dt2*(1-theta) sum_{j <> i} l_{ij}*(u^n[j]-u^n[i]) + dt2*sourceP[i] |
194 |
* |
195 |
* note that iteration_matrix stores the negative values of the |
196 |
* low order transport matrix l therefore a=-dt2*(1-fctp->theta) is used. |
197 |
* |
198 |
*/ |
199 |
Paso_SolverFCT_setMuPaLuPbQ(b_n,fctp->lumped_mass_matrix,fctp->u_coupler, |
200 |
-dt2*(1-fctp->theta),fctp->iteration_matrix,dt2,sourceP); |
201 |
/* |
202 |
* uTilde_n[i]=b[i]/m[i] |
203 |
* |
204 |
* fctp->iteration_matrix[i,i]=m[i]/(dt2 theta) + \frac{1}{\theta} \frac{q^-[i]}-l[i,i] |
205 |
* |
206 |
*/ |
207 |
if (fctp->theta > 0) { |
208 |
Paso_solve_free(fctp->iteration_matrix); |
209 |
omega=1./(dt2*fctp->theta); |
210 |
rtmp2=dt2*omega; |
211 |
#pragma omp parallel for private(i,rtmp,rtmp3,rtmp4) |
212 |
for (i = 0; i < n_rows; ++i) { |
213 |
rtmp=fctp->lumped_mass_matrix[i]; |
214 |
if (ABS(rtmp)>0.) { |
215 |
rtmp3=b_n[i]/rtmp; |
216 |
} else { |
217 |
rtmp3=fctp->u[i]; |
218 |
} |
219 |
rtmp4=rtmp*omega-fctp->main_diagonal_low_order_transport_matrix[i]; |
220 |
if (ABS(rtmp3)>0) rtmp4+=sourceN[i]*rtmp2/rtmp3; |
221 |
fctp->iteration_matrix->mainBlock->val[fctp->main_iptr[i]]=rtmp4; |
222 |
uTilde_n[i]=rtmp3; |
223 |
} |
224 |
} else { |
225 |
#pragma omp parallel for private(i,rtmp,rtmp3) |
226 |
for (i = 0; i < n_rows; ++i) { |
227 |
rtmp=fctp->lumped_mass_matrix[i]; |
228 |
if (ABS(rtmp)>0.) { |
229 |
rtmp3=b_n[i]/rtmp; |
230 |
} else { |
231 |
rtmp3=fctp->u[i]; |
232 |
} |
233 |
uTilde_n[i]=rtmp3; |
234 |
} |
235 |
omega=1.; |
236 |
/* no update of iteration_matrix required! */ |
237 |
} /* end (fctp->theta > 0) */ |
238 |
|
239 |
/* distribute uTilde_n: */ |
240 |
Paso_Coupler_startCollect(uTilde_n_coupler,uTilde_n); |
241 |
Paso_Coupler_finishCollect(uTilde_n_coupler); |
242 |
/* |
243 |
* calculate QP_n[i] max_{j} (\tilde{u}[j]- \tilde{u}[i] ) |
244 |
* QN_n[i] min_{j} (\tilde{u}[j]- \tilde{u}[i] ) |
245 |
* |
246 |
*/ |
247 |
Paso_SolverFCT_setQs(uTilde_n_coupler,QN_n,QP_n,fctp->iteration_matrix); |
248 |
Paso_Coupler_startCollect(QN_n_coupler,QN_n); |
249 |
Paso_Coupler_startCollect(QP_n_coupler,QP_n); |
250 |
Paso_Coupler_finishCollect(QN_n_coupler); |
251 |
Paso_Coupler_finishCollect(QP_n_coupler); |
252 |
/* |
253 |
* now we enter the iteration on a time-step: |
254 |
* |
255 |
*/ |
256 |
Paso_Coupler_copyAll(fctp->u_coupler, u_m_coupler); |
257 |
/* REPLACE BY JACOBEAN FREE NEWTON */ |
258 |
m=0; |
259 |
converged=FALSE; |
260 |
while ( (!converged) && (m<max_m) && Paso_noError()) { |
261 |
printf("iteration step %ld\n",m+1); |
262 |
/* |
263 |
* set the ant diffusion fluxes: |
264 |
* |
265 |
*/ |
266 |
Paso_FCTransportProblem_setAntiDiffusionFlux(dt2,fctp,flux_matrix,u_m_coupler); |
267 |
/* |
268 |
* apply pre flux-correction: f_{ij}:=0 if f_{ij}*(\tilde{u}[i]- \tilde{u}[j])<=0 |
269 |
* |
270 |
*/ |
271 |
Paso_FCTransportProblem_applyPreAntiDiffusionCorrection(flux_matrix,uTilde_n_coupler); |
272 |
/* |
273 |
* set flux limiters RN_m,RP_m |
274 |
* |
275 |
*/ |
276 |
Paso_FCTransportProblem_setRs(flux_matrix,fctp->lumped_mass_matrix,QN_n_coupler,QP_n_coupler,RN_m,RP_m); |
277 |
Paso_Coupler_startCollect(RN_m_coupler,RN_m); |
278 |
Paso_Coupler_startCollect(RP_m_coupler,RP_m); |
279 |
/* |
280 |
* z_m[i]=b_n[i] - (m_i*u[i] - dt2*theta*sum_{j<>i} l_{ij} (u[j]-u[i]) + dt2 q^-[i]) |
281 |
* |
282 |
* note that iteration_matrix stores the negative values of the |
283 |
* low order transport matrix l therefore a=dt2*fctp->theta is used. |
284 |
*/ |
285 |
|
286 |
Paso_SolverFCT_setMuPaLuPbQ(z_m,fctp->lumped_mass_matrix, u_m_coupler, |
287 |
dt2*fctp->theta,fctp->iteration_matrix,dt2,sourceN); |
288 |
/* for (i = 0; i < n_rows; ++i) { |
289 |
* printf("%d: %e %e\n",i,z_m[i], b_n[i]); |
290 |
* } |
291 |
*/ |
292 |
#pragma omp parallel for private(i) |
293 |
for (i = 0; i < n_rows; ++i) z_m[i]=b_n[i]-z_m[i]; |
294 |
|
295 |
Paso_Coupler_finishCollect(RN_m_coupler); |
296 |
Paso_Coupler_finishCollect(RP_m_coupler); |
297 |
/* add corrected fluxes into z_m */ |
298 |
Paso_FCTransportProblem_addCorrectedFluxes(z_m,flux_matrix,RN_m_coupler,RP_m_coupler); |
299 |
/* |
300 |
* now we solve the linear system to get the correction dt2: |
301 |
* |
302 |
*/ |
303 |
if (fctp->theta > 0) { |
304 |
/* set the right hand side of the linear system: */ |
305 |
options->tolerance=inner_tol; |
306 |
options->verbose=TRUE; |
307 |
Paso_solve(fctp->iteration_matrix,du_m,z_m,options); |
308 |
/* TODO: check errors ! */ |
309 |
} else { |
310 |
#pragma omp parallel for private(i,rtmp,rtmp1) |
311 |
for (i = 0; i < n_rows; ++i) { |
312 |
rtmp=fctp->lumped_mass_matrix[i]; |
313 |
if (ABS(rtmp)>0.) { |
314 |
rtmp1=z_m[i]/rtmp; |
315 |
} else { |
316 |
rtmp1=0; |
317 |
} |
318 |
du_m[i]=rtmp1; |
319 |
} |
320 |
} |
321 |
/* |
322 |
* update u and calculate norm of du_m and the new u: |
323 |
* |
324 |
*/ |
325 |
norm_u=0.; |
326 |
norm_du=0.; |
327 |
#pragma omp parallel private(local_norm_u,local_norm_du) |
328 |
{ |
329 |
local_norm_u=0.; |
330 |
local_norm_du=0.; |
331 |
#pragma omp for schedule(static) private(i) |
332 |
for (i=0;i<n_rows;++i) { |
333 |
u_m[i]+=omega*du_m[i]; |
334 |
local_norm_u=MAX(local_norm_u,ABS(u_m[i])); |
335 |
local_norm_du=MAX(local_norm_du,ABS(du_m[i])); |
336 |
} |
337 |
#pragma omp critical |
338 |
{ |
339 |
norm_u=MAX(norm_u,local_norm_u); |
340 |
norm_du=MAX(norm_du,local_norm_du); |
341 |
} |
342 |
} |
343 |
Paso_Coupler_startCollect(u_m_coupler,u_m); |
344 |
#ifdef PASO_MPI |
345 |
local_norm[0]=norm_u; |
346 |
local_norm[1]=norm_du; |
347 |
MPI_Allreduce(local_norm,norm, 2, MPI_DOUBLE, MPI_MAX, fctp->mpi_info->comm); |
348 |
norm_u=norm[0]; |
349 |
norm_du=norm[1]; |
350 |
#endif |
351 |
norm_du*=omega; |
352 |
converged=(norm_du <= tolerance * norm_u); |
353 |
m++; |
354 |
printf("iteration step %ld: norm u and du_m : %e %e\n",m,norm_u,norm_du); |
355 |
/* TODO: check if du_m has been redu_mced */ |
356 |
Paso_Coupler_finishCollect(u_m_coupler); |
357 |
} /* end of inner iteration */ |
358 |
SWAP(fctp->u,u_m,double*); |
359 |
SWAP(fctp->u_coupler,u_m_coupler,Paso_Coupler*); |
360 |
n++; |
361 |
} /* end of time integration */ |
362 |
options->tolerance=tolerance; |
363 |
#pragma omp parallel for schedule(static) private(i) |
364 |
for (i=0;i<n_rows;++i) u[i]=fctp->u[i]; |
365 |
} |
366 |
/* |
367 |
* clean-up: |
368 |
* |
369 |
*/ |
370 |
TMPMEMFREE(b_n); |
371 |
Paso_SystemMatrix_free(flux_matrix); |
372 |
TMPMEMFREE(sourceP); |
373 |
TMPMEMFREE(sourceN); |
374 |
TMPMEMFREE(uTilde_n); |
375 |
TMPMEMFREE(QN_n); |
376 |
TMPMEMFREE(QP_n); |
377 |
TMPMEMFREE(RN_m); |
378 |
TMPMEMFREE(RP_m); |
379 |
TMPMEMFREE(z_m); |
380 |
TMPMEMFREE(du_m); |
381 |
TMPMEMFREE(u_m); |
382 |
Paso_Coupler_free(QN_n_coupler); |
383 |
Paso_Coupler_free(QP_n_coupler); |
384 |
Paso_Coupler_free(RN_m_coupler); |
385 |
Paso_Coupler_free(RP_m_coupler); |
386 |
Paso_Coupler_free(uTilde_n_coupler); |
387 |
Paso_Coupler_free(u_m_coupler); |
388 |
} |