/[escript]/trunk/paso/src/FCT_Solver.c
ViewVC logotype

Contents of /trunk/paso/src/FCT_Solver.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4286 - (show annotations)
Thu Mar 7 04:28:11 2013 UTC (6 years, 6 months ago) by caltinay
File MIME type: text/plain
File size: 30846 byte(s)
Assorted spelling fixes.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15
16
17 /************************************************************************************/
18
19 /* Paso: Transport solver with flux correction (L is row sum zero)
20 *
21 * - Mv_t=Lv v(0)=u
22 *
23 * to return v(dt)
24 *
25 */
26 /************************************************************************************/
27
28 /* Author: l.gross@uq.edu.au */
29
30 /************************************************************************************/
31
32 #include "FCT_Solver.h"
33 #include "Preconditioner.h"
34 #include "PasoUtil.h"
35
36
37 Paso_FCT_Solver* Paso_FCT_Solver_alloc(Paso_TransportProblem *fctp, Paso_Options* options)
38 {
39 Paso_FCT_Solver* out=NULL;
40 const dim_t blockSize=Paso_TransportProblem_getBlockSize(fctp);
41 const dim_t n = Paso_TransportProblem_getTotalNumRows(fctp);
42
43 out=MEMALLOC(1,Paso_FCT_Solver);
44 if (! Esys_checkPtr(out)) {
45 out->transportproblem = Paso_TransportProblem_getReference(fctp);
46 out->mpi_info = Esys_MPIInfo_getReference(fctp->mpi_info);
47 out->flux_limiter = Paso_FCT_FluxLimiter_alloc(fctp);
48 out->b = MEMALLOC(n, double);
49 if ( (options->ode_solver == PASO_CRANK_NICOLSON) || (options->ode_solver == PASO_BACKWARD_EULER) ) {
50 out->du = MEMALLOC(n, double);
51 out->z = MEMALLOC(n, double);
52 } else {
53 out->du = NULL;
54 out->z=NULL;
55 }
56 out->u_coupler = Paso_Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp), blockSize);
57 out->u_old_coupler = Paso_Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp), blockSize);
58 out->omega=0;
59
60 if ( options->ode_solver == PASO_LINEAR_CRANK_NICOLSON ) {
61 out->method = PASO_LINEAR_CRANK_NICOLSON;
62 } else if ( options->ode_solver == PASO_CRANK_NICOLSON ) {
63 out->method = PASO_CRANK_NICOLSON;
64 } else if ( options->ode_solver == PASO_BACKWARD_EULER ) {
65 out->method = PASO_BACKWARD_EULER;
66 } else {
67 Esys_setError(VALUE_ERROR, "Paso_FCT_Solver_alloc: unknown integration scheme.");
68 out->method = UNKNOWN;
69 }
70
71 }
72
73 if (Esys_noError()) {
74 return out;
75 } else {
76 Paso_FCT_Solver_free(out);
77 return NULL;
78 }
79
80 }
81
82 void Paso_FCT_Solver_free(Paso_FCT_Solver *in)
83 {
84 if (in != NULL) {
85 Paso_TransportProblem_free(in->transportproblem);
86 Paso_FCT_FluxLimiter_free(in->flux_limiter);
87 Esys_MPIInfo_free(in->mpi_info);
88 Paso_Coupler_free(in->u_old_coupler);
89 Paso_Coupler_free(in->u_coupler);
90
91 MEMFREE(in->b);
92 MEMFREE(in->z);
93 MEMFREE(in->du);
94 MEMFREE(in);
95
96 }
97 }
98
99 double Paso_FCT_Solver_getSafeTimeStepSize(Paso_TransportProblem* fctp)
100 {
101 dim_t i, n;
102 double dt_max_loc=LARGE_POSITIVE_FLOAT;
103 double dt_max=LARGE_POSITIVE_FLOAT;
104 n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
105 /* set low order transport operator */
106 Paso_FCT_setLowOrderOperator(fctp);
107
108 if (Esys_noError()) {
109 /*
110 * calculate time step size:
111 */
112 dt_max=LARGE_POSITIVE_FLOAT;
113 #pragma omp parallel private(i, dt_max_loc)
114 {
115 dt_max_loc=LARGE_POSITIVE_FLOAT;
116 #pragma omp for schedule(static)
117 for (i=0;i<n;++i) {
118 const double l_ii=fctp->main_diagonal_low_order_transport_matrix[i];
119 const double m_i=fctp->lumped_mass_matrix[i];
120 if ( m_i > 0 ) {
121 if (l_ii<0) dt_max_loc=MIN(dt_max_loc,m_i/(-l_ii));
122 }
123 }
124 #pragma omp critical
125 {
126 dt_max=MIN(dt_max,dt_max_loc);
127 }
128 }
129 #ifdef ESYS_MPI
130 {
131 dt_max_loc=dt_max;
132 MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
133 }
134 #endif
135 if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=2.;
136 }
137 return dt_max;
138 }
139
140 /* modifies the main diagonal of the iteration matrix to introduce new dt */
141 void Paso_FCT_Solver_initialize(const double dt, Paso_FCT_Solver *fct_solver, Paso_Options* options, Paso_Performance* pp)
142 {
143 Paso_TransportProblem* fctp = fct_solver->transportproblem;
144 const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fctp);
145 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
146 const double theta = Paso_FCT_Solver_getTheta(fct_solver);
147 const double omega=1./(dt* theta);
148 dim_t i;
149 Paso_Options options2;
150
151
152
153 Paso_solve_free(fctp->iteration_matrix);
154 /*
155 * fctp->iteration_matrix[i,i]=m[i]/(dt theta) -l[i,i]
156 *
157 */
158 fct_solver->omega=omega;
159 fct_solver->dt = dt;
160 #pragma omp parallel for private(i)
161 for (i = 0; i < n; ++i) {
162 const double m_i=fctp->lumped_mass_matrix[i];
163 const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
164 if ( m_i > 0 ) {
165 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m_i * omega - l_ii;
166 } else {
167 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = ABS(m_i * omega - l_ii)/(EPSILON*EPSILON);
168 }
169 }
170
171 /* allocate preconditioner/solver */
172 Paso_Options_setDefaults(&options2);
173 options2.verbose = options->verbose;
174 if (fct_solver->method == PASO_LINEAR_CRANK_NICOLSON ) {
175 options2.preconditioner = PASO_GS;
176 } else {
177 options2.preconditioner = PASO_JACOBI;
178 /* options2.preconditioner = PASO_GS; */
179 }
180 options2.use_local_preconditioner = FALSE;
181 options2.sweeps=-1;
182
183 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
184 Paso_SystemMatrix_setPreconditioner(fctp->iteration_matrix, &options2);
185 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
186 }
187
188 /* entry point for update procedures */
189 err_t Paso_FCT_Solver_update(Paso_FCT_Solver *fct_solver, double* u, double *u_old, Paso_Options* options, Paso_Performance *pp)
190 {
191 const index_t method=fct_solver->method;
192 err_t err_out = SOLVER_NO_ERROR;
193
194
195 if (method == PASO_LINEAR_CRANK_NICOLSON) {
196 err_out=Paso_FCT_Solver_update_LCN(fct_solver, u, u_old, options, pp);
197
198 } else if (method == PASO_CRANK_NICOLSON) {
199 err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);
200
201 } else if (method == PASO_BACKWARD_EULER) {
202 err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);
203 } else {
204 err_out = SOLVER_INPUT_ERROR;
205 }
206 return err_out;
207
208 }
209
210 /* linear crank-nicolson update */
211 err_t Paso_FCT_Solver_update_LCN(Paso_FCT_Solver *fct_solver, double * u, double *u_old, Paso_Options* options, Paso_Performance *pp)
212 {
213 double const dt = fct_solver->dt;
214 dim_t sweep_max, i;
215 double *b = fct_solver->b;
216 double const RTOL = options->tolerance;
217 const dim_t n=Paso_TransportProblem_getTotalNumRows(fct_solver->transportproblem);
218 Paso_SystemMatrix * iteration_matrix = fct_solver->transportproblem->iteration_matrix;
219 const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fct_solver->transportproblem);
220 err_t errorCode = SOLVER_NO_ERROR;
221 double norm_u_tilde;
222
223 Paso_Coupler_startCollect(fct_solver->u_old_coupler,u_old);
224 Paso_Coupler_finishCollect(fct_solver->u_old_coupler);
225
226 /* b[i]=m*u_tilde[i] = m u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i])
227 = u_tilde[i] = u_old[i] where constraint m<0.
228 * note that iteration_matrix stores the negative values of the
229 * low order transport matrix l. Therefore a=-dt*0.5 is used. */
230
231 Paso_FCT_Solver_setMuPaLu(b, fct_solver->transportproblem->lumped_mass_matrix,
232 fct_solver->u_old_coupler, -dt*0.5, iteration_matrix);
233 /* solve for u_tilde : u_tilda = m^{-1} * b */
234 Paso_FCT_FluxLimiter_setU_tilda(fct_solver->flux_limiter, b);
235 /* u_tilda_connector is completed */
236
237 /* calculate anti-diffusive fluxes for u_tilda */
238 Paso_FCT_setAntiDiffusionFlux_linearCN(fct_solver->flux_limiter->antidiffusive_fluxes,
239 fct_solver->transportproblem, dt,
240 fct_solver->flux_limiter->u_tilde_coupler,
241 fct_solver->u_old_coupler);
242
243
244 /* b_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
245 Paso_FCT_FluxLimiter_addLimitedFluxes_Start(fct_solver->flux_limiter);
246 Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(fct_solver->flux_limiter, b);
247
248 Paso_Scale(n, b,fct_solver->omega );
249 /* solve (m-dt/2*L) u = b in the form (omega*m-L) u = b * omega with omega*dt/2=1 */
250 #pragma omp for private(i) schedule(static)
251 for (i = 0; i < n; ++i) {
252 if (! (fct_solver->transportproblem->lumped_mass_matrix[i] > 0 ) ) {
253 b[i] = fct_solver->flux_limiter->u_tilde[i]
254 * fct_solver->transportproblem->iteration_matrix->mainBlock->val[main_iptr[i]];
255 }
256 }
257 /* initial guess is u<- -u + 2*u_tilde */
258 Paso_Update(n, -1., u, 2., fct_solver->flux_limiter->u_tilde);
259
260 sweep_max = MAX((int) (- 2 * log(RTOL)/log(2.)-0.5),1);
261 norm_u_tilde=Paso_lsup(n, fct_solver->flux_limiter->u_tilde, fct_solver->flux_limiter->mpi_info);
262 if (options->verbose) {
263 printf("Paso_FCT_Solver_update_LCN: u_tilda lsup = %e (rtol = %e, max. sweeps = %d)\n",norm_u_tilde,RTOL * norm_u_tilde ,sweep_max);
264 }
265 errorCode = Paso_Preconditioner_Smoother_solve_byTolerance( iteration_matrix, ((Paso_Preconditioner*) (iteration_matrix->solver_p))->gs,
266 u, b, RTOL, &sweep_max, TRUE);
267 if (errorCode == PRECONDITIONER_NO_ERROR) {
268 if (options->verbose) printf("Paso_FCT_Solver_update_LCN: convergence after %d Gauss-Seidel steps.\n",sweep_max);
269 errorCode=SOLVER_NO_ERROR;
270 } else {
271 if (options->verbose) printf("Paso_FCT_Solver_update_LCN: Gauss-Seidel failed within %d steps (rel. tolerance %e).\n",sweep_max,RTOL);
272 errorCode= SOLVER_MAXITER_REACHED;
273 }
274 return errorCode;
275
276 }
277
278 err_t Paso_FCT_Solver_updateNL(Paso_FCT_Solver *fct_solver, double* u, double *u_old, Paso_Options* options, Paso_Performance *pp)
279 {
280 const dim_t num_critical_rates_max=3; /* number of rates >=critical_rate accepted before divergence is triggered */
281 const double critical_rate=0.95; /* expected value of convergence rate */
282
283 double *b = fct_solver->b;
284 double *z = fct_solver->z;
285 double *du = fct_solver->du;
286 double const dt = fct_solver->dt;
287 Paso_TransportProblem* fctp = fct_solver->transportproblem;
288 Paso_FCT_FluxLimiter* flux_limiter = fct_solver->flux_limiter;
289 dim_t i;
290 double norm_u_tilde, ATOL, norm_du=LARGE_POSITIVE_FLOAT, norm_du_old, rate=1.;
291 err_t errorCode=SOLVER_NO_ERROR;
292 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
293 const double atol=options->absolute_tolerance;
294 const double rtol=options->tolerance;
295 const dim_t max_m=options->iter_max;
296 dim_t m=0, num_critical_rates=0 ;
297 /* ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// */
298
299 bool_t converged=FALSE, max_m_reached=FALSE,diverged=FALSE;
300 options->num_iter=0;
301
302 Paso_Coupler_startCollect(fct_solver->u_old_coupler,u_old);
303 Paso_Coupler_finishCollect(fct_solver->u_old_coupler);
304 /* prepare u_tilda and flux limiter */
305 if ( fct_solver->method == PASO_BACKWARD_EULER ) {
306 /* b[i]=m_i* u_old[i] */
307 #pragma omp for private(i) schedule(static)
308 for (i = 0; i < n; ++i) {
309 if (fctp->lumped_mass_matrix[i] > 0 ) {
310 b[i]=u_old[i]* fctp->lumped_mass_matrix[i];
311 } else {
312 b[i]=u_old[i];
313 }
314 }
315 } else {
316 /* b[i]=m_i* u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i]) = m_i * u_tilde_i where m_i>0
317 * = u_old[i] otherwise
318 * note that iteration_matrix stores the negative values of the
319 * low order transport matrix l. Therefore a=-dt*0.5 is used. */
320 Paso_FCT_Solver_setMuPaLu(b,fctp->lumped_mass_matrix,fct_solver->u_old_coupler,-dt*0.5,fctp->iteration_matrix);
321 }
322 Paso_FCT_FluxLimiter_setU_tilda(flux_limiter, b); /* u_tilda = m^{-1} b */
323 /* u_tilda_connector is completed */
324 /********************************************************************************************************************************************/
325 /* calculate stopping criterion */
326 norm_u_tilde=Paso_lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
327 ATOL= rtol * norm_u_tilde + atol ;
328 if (options->verbose) printf("Paso_FCT_Solver_updateNL: iteration starts u_tilda lsup = %e (abs. tol = %e)\n",norm_u_tilde,ATOL);
329 /* ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// */
330
331 /* u_old is an initial guess for u*/
332 Paso_Copy(n,u,u_old);
333
334 while ( (!converged) && (!diverged) && (! max_m_reached) && Esys_noError()) {
335 Paso_Coupler_startCollect(fct_solver->u_coupler,u);
336 Paso_Coupler_finishCollect(fct_solver->u_coupler);
337
338 /* set antidiffusive_flux_{ij} for u */
339 if (fct_solver->method == PASO_BACKWARD_EULER) {
340 Paso_FCT_setAntiDiffusionFlux_BE(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);
341 } else {
342 Paso_FCT_setAntiDiffusionFlux_CN(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);
343 }
344 /* start the calculation of the limitation factors_{fct_solver->ij} */
345 Paso_FCT_FluxLimiter_addLimitedFluxes_Start(flux_limiter); /* uses u_tilde */
346
347 /*
348 * z_m[i]=b[i] - (m_i*u[i] - omega*sum_{j<>i} l_{ij} (u[j]-u[i]) ) where m_i>0
349 * ==b[i] - u[i] = u_tilda[i]-u[i] =0 otherwise
350 *
351 * omega = dt/2 or dt .
352 *
353 * note that iteration_matrix stores the negative values of the
354 * low order transport matrix l. Therefore a=dt*theta is used.
355 */
356 if (fct_solver-> method == PASO_BACKWARD_EULER) {
357 Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt, fctp->iteration_matrix);
358 } else {
359 Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt/2, fctp->iteration_matrix);
360 }
361
362
363 Paso_Update(n,-1.,z,1.,b); /* z=b-z */
364
365 /* z_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
366 Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(flux_limiter, z);
367
368 /* we solve (m/omega - L ) * du = z */
369 if (fct_solver->method == PASO_BACKWARD_EULER) {
370 dim_t cntIter = options->iter_max;
371 double tol= Paso_l2(n, z, fctp->mpi_info) ;
372
373 if ( m ==0) {
374 tol *=0.5;
375 } else {
376 tol *= MIN(MAX(rate*rate, 1e-2), 0.5);
377 }
378 /* use BiCGStab with jacobi preconditioner ( m - omega * L ) */
379 Paso_zeroes(n,du);
380 errorCode = Paso_Solver_BiCGStab(fctp->iteration_matrix, z, du, &cntIter, &tol, pp);
381
382 /* errorCode = Paso_Solver_GMRES(fctp->iteration_matrix, z, du, &cntIter, &tol, 10, 2000, pp); */
383 if (options->verbose) printf("Paso_FCT_Solver_updateNL: BiCGStab is completed after %d steps (residual =%e).\n",cntIter, tol);
384 options->num_iter+=cntIter;
385 if ( errorCode != SOLVER_NO_ERROR) break;
386 } else {
387 /* just use the main diagonal of (m/omega - L ) */
388
389 Paso_Preconditioner_Smoother_solve(fctp->iteration_matrix, ((Paso_Preconditioner*) (fctp->iteration_matrix->solver_p))->jacobi,
390 du, z, 1, FALSE);
391
392 options->num_iter++;
393 }
394
395
396 Paso_Update(n,1.,u,fct_solver->omega,du);
397 norm_du_old=norm_du;
398 norm_du=Paso_lsup(n,du, fctp->mpi_info);
399 if (m ==0) {
400 if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e\n",m+1, norm_du * fct_solver->omega);
401 } else {
402 if (norm_du_old > 0.) {
403 rate=norm_du/norm_du_old;
404 } else if (norm_du <= 0.) {
405 rate=0.;
406 } else {
407 rate=LARGE_POSITIVE_FLOAT;
408 }
409 if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e (rate = %e)\n",m+1, norm_du * fct_solver->omega, rate);
410 num_critical_rates+=( rate<critical_rate ? 0 : 1);
411 max_m_reached=(m>max_m);
412 diverged = (num_critical_rates >= num_critical_rates_max);
413 converged=(norm_du * fct_solver->omega <= ATOL) ;
414 }
415 m++;
416 } /* end of while loop */
417 if (errorCode == SOLVER_NO_ERROR) {
418 if (converged) {
419 if (options->verbose) printf("Paso_FCT_Solver_updateNL: iteration is completed.\n");
420 errorCode=SOLVER_NO_ERROR;
421 } else if (diverged) {
422 if (options->verbose) printf("Paso_FCT_Solver_updateNL: divergence.\n");
423 errorCode=SOLVER_DIVERGENCE;
424 } else if (max_m_reached) {
425 if (options->verbose) printf("Paso_FCT_Solver_updateNL: maximum number of iteration steps reached.\n");
426 errorCode=SOLVER_MAXITER_REACHED;
427 }
428
429 }
430 return errorCode;
431 }
432
433
434 /*
435 * AntiDiffusionFlux:
436 *
437 * f_{ij} = (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])
438 *
439 * m=fc->mass matrix
440 * d=artificial diffusion matrix = L - K = - fc->iteration matrix - fc->transport matrix (away from main diagonal)
441 *
442 * for CN : theta =0.5
443 * for BE : theta = 1.
444 */
445
446 void Paso_FCT_setAntiDiffusionFlux_CN(Paso_SystemMatrix *flux_matrix,
447 const Paso_TransportProblem* fct,
448 const double dt,
449 const Paso_Coupler* u_coupler,
450 const Paso_Coupler* u_old_coupler)
451 {
452 dim_t i;
453 index_t iptr_ij;
454
455 const double *u = Paso_Coupler_borrowLocalData(u_coupler);
456 const double *u_old= Paso_Coupler_borrowLocalData(u_old_coupler);
457 const double *remote_u=Paso_Coupler_borrowRemoteData(u_coupler);
458 const double *remote_u_old=Paso_Coupler_borrowRemoteData(u_old_coupler);
459 const double dt_half= dt/2;
460 const Paso_SystemMatrixPattern *pattern=fct->iteration_matrix->pattern;
461 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fct->iteration_matrix);
462
463 #pragma omp parallel for schedule(static) private(i, iptr_ij)
464 for (i = 0; i < n; ++i) {
465 const double u_i = u[i];
466 const double u_old_i = u_old[i];
467
468 #pragma ivdep
469 for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
470 const index_t j = pattern->mainPattern->index[iptr_ij];
471 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
472 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
473 const double u_old_j = u_old[j];
474 const double u_j = u[j];
475
476 /* (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i]) */
477 flux_matrix->mainBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i) - (m_ij-dt_half*d_ij)*(u_j-u_i);
478
479 }
480 #pragma ivdep
481 for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
482 const index_t j = pattern->col_couplePattern->index[iptr_ij];
483 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
484 const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
485 const double u_old_j = remote_u_old[j];
486 const double u_j = remote_u[j];
487 flux_matrix->col_coupleBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i)- (m_ij-dt_half*d_ij)*(u_j-u_i);
488 }
489 }
490 }
491
492 void Paso_FCT_setAntiDiffusionFlux_BE(Paso_SystemMatrix *flux_matrix,
493 const Paso_TransportProblem* fct,
494 const double dt,
495 const Paso_Coupler* u_coupler,
496 const Paso_Coupler* u_old_coupler)
497 {
498 dim_t i;
499 index_t iptr_ij;
500
501 const double *u=Paso_Coupler_borrowLocalData(u_coupler);
502 const double *u_old= Paso_Coupler_borrowLocalData(u_old_coupler);
503 const double *remote_u=Paso_Coupler_borrowRemoteData(u_coupler);
504 const double *remote_u_old=Paso_Coupler_borrowRemoteData(u_old_coupler);
505 const Paso_SystemMatrixPattern *pattern=fct->iteration_matrix->pattern;
506 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fct->iteration_matrix);
507
508 #pragma omp parallel for schedule(static) private(i, iptr_ij)
509 for (i = 0; i < n; ++i) {
510 const double u_i = u[i];
511 const double u_old_i = u_old[i];
512 #pragma ivdep
513 for (iptr_ij=pattern->mainPattern->ptr[i]; iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
514
515 const index_t j = pattern->mainPattern->index[iptr_ij];
516 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
517 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
518 const double u_old_j = u_old[j];
519 const double u_j = u[j];
520
521 flux_matrix->mainBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
522 }
523 #pragma ivdep
524 for (iptr_ij=pattern->col_couplePattern->ptr[i]; iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
525 const index_t j = pattern->col_couplePattern->index[iptr_ij];
526 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
527 const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
528 const double u_old_j = remote_u_old[j];
529 const double u_j = remote_u[j];
530
531 flux_matrix->col_coupleBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
532 }
533 }
534 }
535
536 /* special version of the anti-diffusive fluxes for the linear Crank-Nicolson scheme
537 * in fact this is evaluated for u = 2*u_tilde - u_old which is the predictor
538 * of the solution of the stabilized problem at time dt using the forward Euler scheme
539 *
540 * f_{ij} = (m_{ij} - dt/2 d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) (u[j]-u[i])
541 * = (m_{ij} - dt/2 d_{ij}) * (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) * ( 2*(u_tilde[j]-u_tilde[i]) - (u_old[j] -u_old [i]) )
542 * = 2* m_{ij} * ( (u_old[j]-u_tilde[j] - (u_old[i]) - u_tilde[i]) ) - dt d_{ij} * (u_tilde[j]-u_tilde[i])
543 *
544 */
545
546 void Paso_FCT_setAntiDiffusionFlux_linearCN(Paso_SystemMatrix *flux_matrix,
547 const Paso_TransportProblem* fct,
548 const double dt,
549 const Paso_Coupler* u_tilde_coupler,
550 const Paso_Coupler* u_old_coupler)
551 {
552 dim_t i;
553 index_t iptr_ij;
554
555 const double *u_tilde=Paso_Coupler_borrowLocalData(u_tilde_coupler);
556 const double *u_old= Paso_Coupler_borrowLocalData(u_old_coupler);
557 const double *remote_u_tilde=Paso_Coupler_borrowRemoteData(u_tilde_coupler);
558 const double *remote_u_old=Paso_Coupler_borrowRemoteData(u_old_coupler);
559 const Paso_SystemMatrixPattern *pattern=fct->iteration_matrix->pattern;
560 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fct->iteration_matrix);
561
562 #pragma omp parallel for schedule(static) private(i, iptr_ij)
563 for (i = 0; i < n; ++i) {
564 const double u_tilde_i = u_tilde[i];
565 const double u_old_i = u_old[i];
566 const double du_i = u_tilde_i - u_old_i;
567 #pragma ivdep
568 for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
569
570 const index_t j = pattern->mainPattern->index[iptr_ij];
571 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
572 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
573 const double u_tilde_j = u_tilde[j];
574 const double u_old_j = u_old[j];
575 const double du_j = u_tilde_j - u_old_j;
576
577 flux_matrix->mainBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij * ( u_tilde_i - u_tilde_j);
578 }
579 #pragma ivdep
580 for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
581
582 const index_t j = pattern->col_couplePattern->index[iptr_ij];
583 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
584 const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];/* this is in fact -d_ij */
585 const double u_tilde_j = remote_u_tilde[j];
586 const double u_old_j = remote_u_old[j];
587 const double du_j = u_tilde_j - u_old_j;
588
589 flux_matrix->col_coupleBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij * ( u_tilde_i - u_tilde_j);
590
591 }
592 }
593
594 }
595
596 /************************************************************************************/
597
598 /* Creates the low order transport matrix and stores its negative values
599 * into the iteration_matrix except for the main diagonal which is stored
600 * separately.
601 * If fc->iteration_matrix==NULL, fc->iteration_matrix is allocated
602 *
603 * a=transport_matrix
604 * b= low_order_transport_matrix = - iteration_matrix
605 * c=main diagonal low_order_transport_matrix
606 * initialize c[i] mit a[i,i]
607 *
608 * d_ij=max(0,-a[i,j],-a[j,i])
609 * b[i,j]=-(a[i,j]+d_ij)
610 * c[i]-=d_ij
611 */
612
613 void Paso_FCT_setLowOrderOperator(Paso_TransportProblem * fc) {
614
615 dim_t i;
616 index_t iptr_ij, iptr_ji;
617 const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fc);
618
619 if (fc->iteration_matrix==NULL) {
620 fc->iteration_matrix=Paso_SystemMatrix_alloc(fc->transport_matrix->type,
621 fc->transport_matrix->pattern,
622 fc->transport_matrix->row_block_size,
623 fc->transport_matrix->col_block_size, TRUE);
624 }
625
626 if (Esys_noError()) {
627 const Paso_SystemMatrixPattern *pattern=fc->iteration_matrix->pattern;
628 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fc->iteration_matrix);
629 #pragma omp parallel for private(i, iptr_ij, iptr_ji) schedule(static)
630 for (i = 0; i < n; ++i) {
631 double sum=fc->transport_matrix->mainBlock->val[main_iptr[i]];
632
633 /* printf("sum[%d] = %e -> ", i, sum); */
634 /* look at a[i,j] */
635 for (iptr_ij=pattern->mainPattern->ptr[i];iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
636 const index_t j = pattern->mainPattern->index[iptr_ij];
637 const double rtmp1 = fc->transport_matrix->mainBlock->val[iptr_ij];
638 if (j!=i) {
639 /* find entry a[j,i] */
640 #pragma ivdep
641 for (iptr_ji=pattern->mainPattern->ptr[j]; iptr_ji<pattern->mainPattern->ptr[j+1]; ++iptr_ji) {
642
643 if ( pattern->mainPattern->index[iptr_ji] == i) {
644 const double rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji];
645 /*
646 printf("a[%d,%d]=%e\n",i,j,rtmp1);
647 printf("a[%d,%d]=%e\n",j,i,rtmp2);
648 */
649
650 const double d_ij=-MIN3(0.,rtmp1,rtmp2);
651 fc->iteration_matrix->mainBlock->val[iptr_ij]=-(rtmp1+d_ij);
652 /* printf("l[%d,%d]=%e\n",i,j,fc->iteration_matrix->mainBlock->val[iptr_ij]); */
653 sum-=d_ij;
654 break;
655 }
656 }
657 }
658 }
659 for (iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
660 const index_t j = pattern->col_couplePattern->index[iptr_ij];
661 const double rtmp1 = fc->transport_matrix->col_coupleBlock->val[iptr_ij];
662 /* find entry a[j,i] */
663 #pragma ivdep
664 for (iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_ji<pattern->row_couplePattern->ptr[j+1]; ++iptr_ji) {
665 if (pattern->row_couplePattern->index[iptr_ji]==i) {
666 const double rtmp2=fc->transport_matrix->row_coupleBlock->val[iptr_ji];
667 const double d_ij=-MIN3(0.,rtmp1,rtmp2);
668 fc->iteration_matrix->col_coupleBlock->val[iptr_ij]=-(rtmp1+d_ij);
669 fc->iteration_matrix->row_coupleBlock->val[iptr_ji]=-(rtmp2+d_ij);
670 sum-=d_ij;
671 break;
672 }
673 }
674 }
675 /* set main diagonal entry */
676 fc->main_diagonal_low_order_transport_matrix[i]=sum;
677 /* printf("%e \n", sum); */
678 }
679
680 }
681 }
682
683 /*
684 * out_i=m_i u_i + a * \sum_{j <> i} l_{ij} (u_j-u_i) where m_i>0
685 * = u_i where m_i<=0
686 *
687 */
688 void Paso_FCT_Solver_setMuPaLu(double* out,
689 const double* M,
690 const Paso_Coupler* u_coupler,
691 const double a,
692 const Paso_SystemMatrix *L)
693 {
694 dim_t i;
695 const Paso_SystemMatrixPattern *pattern = L->pattern;
696 const double *u=Paso_Coupler_borrowLocalData(u_coupler);
697 const double *remote_u=Paso_Coupler_borrowRemoteData(u_coupler);
698 index_t iptr_ij;
699 const dim_t n=Paso_SystemMatrix_getTotalNumRows(L);
700
701 #pragma omp parallel for private(i) schedule(static)
702 for (i = 0; i < n; ++i) {
703 if ( M[i] > 0.) {
704 out[i]=M[i]*u[i];
705 } else {
706 out[i]=u[i];
707 }
708 }
709 if (ABS(a)>0) {
710 #pragma omp parallel for schedule(static) private(i, iptr_ij)
711 for (i = 0; i < n; ++i) {
712 if ( M[i] > 0.) {
713 double sum=0;
714 const double u_i=u[i];
715 #pragma ivdep
716 for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
717 const index_t j=pattern->mainPattern->index[iptr_ij];
718 const double l_ij=L->mainBlock->val[iptr_ij];
719 sum+=l_ij*(u[j]-u_i);
720
721 }
722 #pragma ivdep
723 for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
724 const index_t j=pattern->col_couplePattern->index[iptr_ij];
725 const double l_ij=L->col_coupleBlock->val[iptr_ij];
726 sum+=l_ij*(remote_u[j]-u_i);
727 }
728 out[i]+=a*sum;
729 }
730 }
731 }
732 }
733
734 /* ************************************************************************************************************************************************* */
735
736
737
738
739
740
741

  ViewVC Help
Powered by ViewVC 1.1.26