/[escript]/trunk/paso/src/FCTSolver.c
ViewVC logotype

Contents of /trunk/paso/src/FCTSolver.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3490 - (show annotations)
Wed Mar 30 02:24:33 2011 UTC (8 years, 4 months ago) by caltinay
File MIME type: text/plain
File size: 15206 byte(s)
More gcc-4.6 fixes (mostly initialized-but-unused-var warnings)

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: Transport solver with flux corretion (L is row sum zero)
18 *
19 * - Mv_t=Lv v(0)=u
20 *
21 * to return v(dt)
22 *
23 */
24 /**************************************************************/
25
26 /* Author: l.gross@uq.edu.au */
27
28 /**************************************************************/
29
30 #include "FCTSolver.h"
31 #include "Solver.h"
32 #include "PasoUtil.h"
33
34 Paso_Function* Paso_FCTSolver_Function_alloc(Paso_TransportProblem *fctp, Paso_Options* options)
35 {
36 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
37 const dim_t blockSize=Paso_TransportProblem_getBlockSize(fctp);
38 Paso_Function * out=NULL;
39 Paso_FCTSolver *more=NULL;
40 out=MEMALLOC(1,Paso_Function);
41 if (! Esys_checkPtr(out)) {
42 out->kind=FCT;
43 out->mpi_info=Esys_MPIInfo_getReference(fctp->mpi_info);
44 out->n=n;
45 out->b=TMPMEMALLOC(n,double); /*b_m */
46 Esys_checkPtr(out->b);
47 out->tmp=TMPMEMALLOC(n,double); /* z_m */
48 Esys_checkPtr(out->tmp);
49
50 more=MEMALLOC(1,Paso_FCTSolver);
51 out->more=(void*) more;
52 if (! Esys_checkPtr(more)) {
53 more->transportproblem=Paso_TransportProblem_getReference(fctp);
54 more->uTilde_n=TMPMEMALLOC(n,double);
55 Esys_checkPtr(more->uTilde_n);
56 more->QN_n=TMPMEMALLOC(n,double);
57 Esys_checkPtr(more->QN_n);
58 more->QP_n=TMPMEMALLOC(n,double);
59 Esys_checkPtr(more->QP_n);
60 more->RN_m=TMPMEMALLOC(n,double);
61 Esys_checkPtr(more->RN_m);
62 more->RP_m=TMPMEMALLOC(n,double);
63 Esys_checkPtr(more->RP_m);
64 more->QN_n_coupler=Paso_Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp),blockSize);
65 more->QP_n_coupler=Paso_Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp),blockSize);
66 more->RN_m_coupler=Paso_Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp),blockSize);
67 more->RP_m_coupler=Paso_Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp),blockSize);
68 more->uTilde_n_coupler=Paso_Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp),blockSize);
69 more->u_m_coupler=Paso_Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp),blockSize);
70 more->flux_matrix_m=Paso_SystemMatrix_alloc(fctp->transport_matrix->type,
71 fctp->transport_matrix->pattern,
72 fctp->transport_matrix->row_block_size,
73 fctp->transport_matrix->col_block_size, TRUE);
74
75 }
76 }
77 if (Esys_noError()) {
78 return out;
79 } else {
80 Paso_FCTSolver_Function_free(out);
81 return NULL;
82 }
83 }
84
85 void Paso_FCTSolver_Function_free(Paso_Function * in)
86 {
87 Paso_FCTSolver *more=NULL;
88 if (in!=NULL) {
89 Esys_MPIInfo_free(in->mpi_info);
90 MEMFREE(in->tmp);
91 MEMFREE(in->b);
92 more=(Paso_FCTSolver *) (in->more);
93 if (NULL !=more) {
94 Paso_TransportProblem_free(more->transportproblem);
95 Paso_SystemMatrix_free(more->flux_matrix_m);
96 TMPMEMFREE(more->uTilde_n);
97 TMPMEMFREE(more->QN_n);
98 TMPMEMFREE(more->QP_n);
99 TMPMEMFREE(more->RN_m);
100 TMPMEMFREE(more->RP_m);
101 Paso_Coupler_free(more->QN_n_coupler);
102 Paso_Coupler_free(more->QP_n_coupler);
103 Paso_Coupler_free(more->RN_m_coupler);
104 Paso_Coupler_free(more->RP_m_coupler);
105 Paso_Coupler_free(more->uTilde_n_coupler);
106 Paso_Coupler_free(more->u_m_coupler);
107 }
108 MEMFREE(in);
109 }
110 }
111 double Paso_FCTSolver_getSafeTimeStepSize(Paso_TransportProblem* fctp)
112 {
113 dim_t i, n;
114 double dt_max=LARGE_POSITIVE_FLOAT, dt_max_loc;
115 register double l_ii,m_i,al_ii;
116 index_t fail=0, fail_loc;
117 n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
118 /* set low order transport operator */
119 Paso_FCTSolver_setLowOrderOperator(fctp);
120
121 if (Esys_noError()) {
122 /*
123 * calculate time step size:
124 */
125 dt_max=LARGE_POSITIVE_FLOAT;
126 fail=0;
127 #pragma omp parallel private(dt_max_loc, fail_loc)
128 {
129 dt_max_loc=LARGE_POSITIVE_FLOAT;
130 fail_loc=0;
131 #pragma omp for schedule(static) private(i,l_ii,m_i, al_ii)
132 for (i=0;i<n;++i) {
133 l_ii=fctp->main_diagonal_low_order_transport_matrix[i];
134 m_i=fctp->lumped_mass_matrix[i];
135 al_ii=ABS(l_ii); /* al_ii should be negative, abs is taken to avoid problems with almost zero but positive values for l_ii */
136 if ( (m_i > 0) ) {
137 if (al_ii>0) dt_max_loc=MIN(dt_max_loc,m_i/al_ii);
138 } else {
139 fail_loc=-1;
140 }
141 }
142 #pragma omp critical
143 {
144 dt_max=MIN(dt_max,dt_max_loc);
145 fail=MIN(fail, fail_loc);
146 }
147 }
148 #ifdef ESYS_MPI
149 {
150 double rtmp_loc[2], rtmp[2];
151 rtmp_loc[0]=dt_max;
152 rtmp_loc[1]= (double) fail;
153 MPI_Allreduce(rtmp_loc, rtmp, 2, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
154 dt_max=rtmp[0];
155 fail = rtmp[1] < 0 ? -1 : 0;
156 }
157 #endif
158 if (fail < 0 ) {
159 Esys_setError(VALUE_ERROR, "Paso_FCTSolver_getSafeTimeStepSize: negative mass matrix entries detected.");
160 return -1;
161 } else {
162 if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=fctp->dt_factor;
163 }
164 }
165 return dt_max;
166 }
167 /*
168 * evaluates F as
169 * (m_i*omega - l_ij)*(F_j/omega)
170 * =(m u^n[i] + dt*(1-theta) sum_{j <> i} l_{ij}*(u^n[j]-u^n[i]))-(m_i*u[i] - dt*theta*sum_{j<>i} l_{ij} (u[j]-u[i]) )- antidiffusion
171 *
172 */
173 err_t Paso_FCTSolver_Function_call(Paso_Function * F,double* value, const double* arg, Paso_Performance *pp)
174 {
175 Paso_Options options;
176 Paso_FCTSolver *more=(Paso_FCTSolver *) (F->more);
177 Paso_Options_setDefaults(&options);
178 options.verbose=TRUE;
179
180 /* set z_m in tmp */
181 Paso_FCTSolver_setUpRightHandSide(more->transportproblem,more->dt,arg,more->u_m_coupler,F->tmp,more->flux_matrix_m,
182 more->uTilde_n_coupler,F->b,
183 more->QN_n_coupler,more->QP_n_coupler,more->RN_m,
184 more->RN_m_coupler,more->RP_m,more->RP_m_coupler,pp);
185 /* apply preconditoner do get du = */
186 /* Paso_Solver(more->transportproblem->iteration_matrix,value,F->tmp,&options,pp); */
187
188
189 Paso_SystemMatrix_solvePreconditioner(more->transportproblem->iteration_matrix,value,F->tmp);
190 return NO_ERROR;
191 }
192
193 err_t Paso_FCTSolver_solve(Paso_Function* F, double* u, double dt, Paso_Options* options, Paso_Performance *pp)
194 {
195 const dim_t num_critical_rates_max=3; /* number of rates >=critical_rate accepted before divergence is triggered */
196 const double critical_rate=0.8; /* expected value of convergence rate */
197
198 double norm_u_tilde, ATOL, norm_du=LARGE_POSITIVE_FLOAT, norm_du_old, *du=NULL, rate;
199 err_t errorCode=SOLVER_NO_ERROR;
200 Paso_FCTSolver *more=(Paso_FCTSolver *) (F->more);
201 const double omega=1./(dt*Paso_Transport_getTheta(more->transportproblem));
202 Paso_TransportProblem* fctp=more->transportproblem;
203 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
204 const double atol=options->absolute_tolerance;
205 const double rtol=options->tolerance;
206 const dim_t max_m=options->iter_max;
207 dim_t m=0, num_critical_rates=0 ;
208 bool_t converged=FALSE, max_m_reached=FALSE,diverged=FALSE;
209
210 /* tolerance? */
211 more->dt=dt;
212 Paso_FCTSolver_setUp(fctp,dt,u, F->b,more->uTilde_n,more->uTilde_n_coupler,more->QN_n,more->QN_n_coupler,more->QP_n,more->QP_n_coupler,
213 options,pp);
214
215
216 norm_u_tilde=Paso_lsup(n,more->uTilde_n,F->mpi_info);
217 ATOL= rtol * norm_u_tilde + atol ;
218 if (options->verbose) printf("Paso_FCTSolver_solve: iteration starts u_tilda lsup = %e (abs. tol = %e)\n",norm_u_tilde,ATOL);
219 if (more->transportproblem->useBackwardEuler && FALSE ) {
220 options->absolute_tolerance = ATOL/omega;
221 options->tolerance=0;
222 errorCode=Paso_Solver_NewtonGMRES(F,u,options,pp);
223 options->absolute_tolerance=atol;
224 options->tolerance=rtol;
225 } else {
226 m=0;
227 du=MEMALLOC(n,double);
228 if (Esys_checkPtr(du)) {
229 errorCode=SOLVER_MEMORY_ERROR;
230 } else {
231 /* tolerance? */
232
233 while ( (!converged) && (!diverged) && (! max_m_reached) && Esys_noError()) {
234 errorCode=Paso_FCTSolver_Function_call(F,du, u, pp);
235 options->num_iter++;
236 Paso_Update(n,1.,u,omega,du);
237 norm_du_old=norm_du;
238 norm_du=Paso_lsup(n,du, fctp->mpi_info);
239 if (m ==0) {
240 if (options->verbose) printf("Paso_FCTSolver_solve: step %d: increment= %e\n",m+1, norm_du*omega);
241
242 } else {
243 if (norm_du_old > 0.) {
244 rate=norm_du/norm_du_old;
245 } else if (norm_du <= 0.) {
246 rate=0.;
247 } else {
248 rate=LARGE_POSITIVE_FLOAT;
249 }
250 if (options->verbose) printf("Paso_FCTSolver_solve: step %d: increment= %e (rate = %e)\n",m+1, norm_du*omega, rate);
251 num_critical_rates+=( rate<critical_rate ? 0 : 1);
252 }
253
254 max_m_reached=(m>max_m);
255 diverged = (num_critical_rates >= num_critical_rates_max);
256 converged=(norm_du*omega <= ATOL) ;
257 m++;
258 }
259 }
260 MEMFREE(du);
261 if (errorCode == SOLVER_NO_ERROR) {
262 if (converged) {
263 if (options->verbose) printf("Paso_FCTSolver_solve: iteration is completed.\n");
264 errorCode=SOLVER_NO_ERROR;
265 } else if (diverged) {
266 if (options->verbose) printf("Paso_FCTSolver_solve: divergence.\n");
267 errorCode=SOLVER_DIVERGENCE;
268 } else if (max_m_reached) {
269 if (options->verbose) printf("Paso_FCTSolver_solve: maximum number of iteration steps reached.\n");
270 errorCode=SOLVER_MAXITER_REACHED;
271 }
272 }
273 }
274 return errorCode;
275 }
276
277
278
279 err_t Paso_FCTSolver_setUpRightHandSide(Paso_TransportProblem* fctp, const double dt, const double *u_m,
280 Paso_Coupler* u_m_coupler, double * z_m,
281 Paso_SystemMatrix* flux_matrix, Paso_Coupler* uTilde_coupler, const double *b,
282 Paso_Coupler* QN_coupler, Paso_Coupler* QP_coupler,
283 double *RN_m, Paso_Coupler* RN_m_coupler, double* RP_m, Paso_Coupler* RP_m_coupler,
284 Paso_Performance* pp)
285 {
286 const double omega=dt* Paso_Transport_getTheta(fctp);
287 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
288 /* distribute u */
289 Paso_Coupler_startCollect(u_m_coupler,u_m);
290 Paso_Coupler_finishCollect(u_m_coupler);
291 /*
292 * set the ant diffusion fluxes:
293 *
294 */
295 Paso_FCTSolver_setAntiDiffusionFlux(dt,fctp,flux_matrix,u_m_coupler);
296 /*
297 * apply pre flux-correction: f_{ij}:=0 if f_{ij}*(\tilde{u}[i]- \tilde{u}[j])<=0
298 *
299 */
300 Paso_FCTSolver_applyPreAntiDiffusionCorrection(flux_matrix,uTilde_coupler);
301 /*
302 * set flux limiters RN_m,RP_m
303 *
304 */
305 Paso_FCTSolver_setRs(flux_matrix,fctp->lumped_mass_matrix,QN_coupler,QP_coupler,RN_m,RP_m);
306 Paso_Coupler_startCollect(RN_m_coupler,RN_m);
307 Paso_Coupler_startCollect(RP_m_coupler,RP_m);
308 /*
309 * z_m[i]=b[i] - (m_i*u_m[i] - dt*theta*sum_{j<>i} l_{ij} (u_m[j]-u_m[i]) )
310 *
311 * note that iteration_matrix stores the negative values of the
312 * low order transport matrix l therefore a=dt*theta is used.
313 */
314 Paso_FCTSolver_setMuPaLu(z_m, fctp->lumped_mass_matrix, u_m_coupler, omega, fctp->iteration_matrix);
315 /*{
316 int kk;
317 for (kk=0;kk<n;kk++) printf("z_m %d : %e %e -> %e\n",kk,z_m[kk], b[kk],z_m[kk]-b[kk]);
318 }
319 */
320 /* z_m=b-z_m */
321 Paso_Update(n,-1.,z_m,1.,b);
322
323 Paso_Coupler_finishCollect(RN_m_coupler);
324 Paso_Coupler_finishCollect(RP_m_coupler);
325 /* add corrected fluxes into z_m */
326 Paso_FCTSolver_addCorrectedFluxes(z_m,flux_matrix,RN_m_coupler,RP_m_coupler);
327 return SOLVER_NO_ERROR;
328 }
329
330 void Paso_FCTSolver_Function_initialize(const double dt, Paso_TransportProblem* fctp, Paso_Options* options, Paso_Performance* pp)
331 {
332 dim_t i;
333 const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fctp);
334 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
335 const double theta = Paso_Transport_getTheta(fctp);
336 const double omega=1./(dt* theta);
337 register double m, rtmp4;
338
339 /*
340 * fctp->iteration_matrix[i,i]=m[i]/(dt theta) -l[i,i]
341 *
342 */
343 Paso_solve_free(fctp->iteration_matrix);
344 #pragma omp parallel for private(i,m,rtmp4)
345 for (i = 0; i < n; ++i) {
346 m=fctp->lumped_mass_matrix[i];
347 rtmp4=m*omega-(fctp->main_diagonal_low_order_transport_matrix[i]);
348 fctp->iteration_matrix->mainBlock->val[main_iptr[i]]=rtmp4;
349 }
350 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
351 Paso_SystemMatrix_setPreconditioner(fctp->iteration_matrix,options);
352 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
353 }
354
355 void Paso_FCTSolver_setUp(Paso_TransportProblem* fctp, const double dt, const double *u, double* b, double* uTilde,
356 Paso_Coupler* uTilde_coupler, double *QN, Paso_Coupler* QN_coupler, double *QP, Paso_Coupler* QP_coupler,
357 Paso_Options* options, Paso_Performance* pp)
358 {
359 dim_t i;
360 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
361 register double m;
362 /* distribute u */
363 Paso_Coupler_startCollect(fctp->u_coupler,u);
364 Paso_Coupler_finishCollect(fctp->u_coupler);
365 /*
366 * b^n[i]=m u^n[i] + dt*(1-theta) sum_{j <> i} l_{ij}*(u^n[j]-u^n[i])
367 *
368 * note that iteration_matrix stores the negative values of the
369 * low order transport matrix l therefore a=-dt*(1-fctp->theta) is used.
370 *
371 */
372 if (fctp->useBackwardEuler) {
373 Paso_FCTSolver_setMuPaLu(b,fctp->lumped_mass_matrix,fctp->u_coupler,0.,fctp->iteration_matrix);
374 } else {
375 Paso_FCTSolver_setMuPaLu(b,fctp->lumped_mass_matrix,fctp->u_coupler,-dt*0.5,fctp->iteration_matrix);
376 }
377 /*
378 * uTilde[i]=b[i]/m[i]
379 *
380 */
381 #pragma omp parallel for private(i,m)
382 for (i = 0; i < n; ++i) {
383 m=fctp->lumped_mass_matrix[i];
384 uTilde[i]=b[i]/m;
385 }
386 /* distribute uTilde: */
387 Paso_Coupler_startCollect(uTilde_coupler,uTilde);
388 Paso_Coupler_finishCollect(uTilde_coupler);
389 /*
390 * calculate QP[i] max_{j} (\tilde{u}[j]- \tilde{u}[i] )
391 * QN[i] min_{j} (\tilde{u}[j]- \tilde{u}[i] )
392 *
393 */
394 Paso_FCTSolver_setQs(uTilde_coupler,QN,QP,fctp->iteration_matrix);
395 Paso_Coupler_startCollect(QN_coupler,QN);
396 Paso_Coupler_startCollect(QP_coupler,QP);
397 Paso_Coupler_finishCollect(QN_coupler);
398 Paso_Coupler_finishCollect(QP_coupler);
399 }

  ViewVC Help
Powered by ViewVC 1.1.26