/[escript]/branches/symbolic_from_3470/paso/src/FCT_Solver.c
ViewVC logotype

Contents of /branches/symbolic_from_3470/paso/src/FCT_Solver.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3868 - (show annotations)
Thu Mar 15 06:07:08 2012 UTC (7 years, 1 month ago) by caltinay
File MIME type: text/plain
File size: 30598 byte(s)
Update to latest trunk

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: Transport solver with flux correction (L is row sum zero)
18 *
19 * - Mv_t=Lv v(0)=u
20 *
21 * to return v(dt)
22 *
23 */
24 /**************************************************************/
25
26 /* Author: l.gross@uq.edu.au */
27
28 /**************************************************************/
29
30 #include "FCT_Solver.h"
31 #include "Preconditioner.h"
32 #include "PasoUtil.h"
33
34
35 Paso_FCT_Solver* Paso_FCT_Solver_alloc(Paso_TransportProblem *fctp, Paso_Options* options)
36 {
37 Paso_FCT_Solver* out=NULL;
38 const dim_t blockSize=Paso_TransportProblem_getBlockSize(fctp);
39 const dim_t n = Paso_TransportProblem_getTotalNumRows(fctp);
40
41 out=MEMALLOC(1,Paso_FCT_Solver);
42 if (! Esys_checkPtr(out)) {
43 out->transportproblem = Paso_TransportProblem_getReference(fctp);
44 out->mpi_info = Esys_MPIInfo_getReference(fctp->mpi_info);
45 out->flux_limiter = Paso_FCT_FluxLimiter_alloc(fctp);
46 out->b = MEMALLOC(n, double);
47 if ( (options->ode_solver == PASO_CRANK_NICOLSON) || (options->ode_solver == PASO_BACKWARD_EULER) ) {
48 out->du = MEMALLOC(n, double);
49 out->z = MEMALLOC(n, double);
50 } else {
51 out->du = NULL;
52 out->z=NULL;
53 }
54 out->u_coupler = Paso_Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp), blockSize);
55 out->u_old_coupler = Paso_Coupler_alloc(Paso_TransportProblem_borrowConnector(fctp), blockSize);
56 out->omega=0;
57
58 if ( options->ode_solver == PASO_LINEAR_CRANK_NICOLSON ) {
59 out->method = PASO_LINEAR_CRANK_NICOLSON;
60 } else if ( options->ode_solver == PASO_CRANK_NICOLSON ) {
61 out->method = PASO_CRANK_NICOLSON;
62 } else if ( options->ode_solver == PASO_BACKWARD_EULER ) {
63 out->method = PASO_BACKWARD_EULER;
64 } else {
65 Esys_setError(VALUE_ERROR, "Paso_FCT_Solver_alloc: unknown integration scheme.");
66 out->method = UNKNOWN;
67 }
68
69 }
70
71 if (Esys_noError()) {
72 return out;
73 } else {
74 Paso_FCT_Solver_free(out);
75 return NULL;
76 }
77
78 }
79
80 void Paso_FCT_Solver_free(Paso_FCT_Solver *in)
81 {
82 if (in != NULL) {
83 Paso_TransportProblem_free(in->transportproblem);
84 Paso_FCT_FluxLimiter_free(in->flux_limiter);
85 Esys_MPIInfo_free(in->mpi_info);
86 Paso_Coupler_free(in->u_old_coupler);
87 Paso_Coupler_free(in->u_coupler);
88
89 MEMFREE(in->b);
90 MEMFREE(in->z);
91 MEMFREE(in->du);
92 MEMFREE(in);
93
94 }
95 }
96
97 double Paso_FCT_Solver_getSafeTimeStepSize(Paso_TransportProblem* fctp)
98 {
99 dim_t i, n;
100 double dt_max_loc=LARGE_POSITIVE_FLOAT;
101 double dt_max=LARGE_POSITIVE_FLOAT;
102 n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
103 /* set low order transport operator */
104 Paso_FCT_setLowOrderOperator(fctp);
105
106 if (Esys_noError()) {
107 /*
108 * calculate time step size:
109 */
110 dt_max=LARGE_POSITIVE_FLOAT;
111 #pragma omp parallel private(i, dt_max_loc)
112 {
113 dt_max_loc=LARGE_POSITIVE_FLOAT;
114 #pragma omp for schedule(static)
115 for (i=0;i<n;++i) {
116 const double l_ii=fctp->main_diagonal_low_order_transport_matrix[i];
117 const double m_i=fctp->lumped_mass_matrix[i];
118 if ( m_i > 0 ) {
119 if (l_ii<0) dt_max_loc=MIN(dt_max_loc,m_i/(-l_ii));
120 }
121 }
122 #pragma omp critical
123 {
124 dt_max=MIN(dt_max,dt_max_loc);
125 }
126 }
127 #ifdef ESYS_MPI
128 {
129 dt_max_loc=dt_max;
130 MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
131 }
132 #endif
133 if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=2.;
134 }
135 return dt_max;
136 }
137
138 /* modifies the main diagonal of the iteration matrix to introduce new dt */
139 void Paso_FCT_Solver_initialize(const double dt, Paso_FCT_Solver *fct_solver, Paso_Options* options, Paso_Performance* pp)
140 {
141 Paso_TransportProblem* fctp = fct_solver->transportproblem;
142 const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fctp);
143 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
144 const double theta = Paso_FCT_Solver_getTheta(fct_solver);
145 const double omega=1./(dt* theta);
146 dim_t i;
147 Paso_Options options2;
148
149
150
151 Paso_solve_free(fctp->iteration_matrix);
152 /*
153 * fctp->iteration_matrix[i,i]=m[i]/(dt theta) -l[i,i]
154 *
155 */
156 fct_solver->omega=omega;
157 fct_solver->dt = dt;
158 #pragma omp parallel for private(i)
159 for (i = 0; i < n; ++i) {
160 const double m_i=fctp->lumped_mass_matrix[i];
161 const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
162 if ( m_i > 0 ) {
163 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m_i * omega - l_ii;
164 } else {
165 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = ABS(m_i * omega - l_ii)/(EPSILON*EPSILON);
166 }
167 }
168
169 /* allocate preconditioner/solver */
170 Paso_Options_setDefaults(&options2);
171 options2.verbose = options->verbose;
172 if (fct_solver->method == PASO_LINEAR_CRANK_NICOLSON ) {
173 options2.preconditioner = PASO_GS;
174 } else {
175 options2.preconditioner = PASO_JACOBI;
176 /* options2.preconditioner = PASO_GS; */
177 }
178 options2.use_local_preconditioner = FALSE;
179 options2.sweeps=-1;
180
181 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
182 Paso_SystemMatrix_setPreconditioner(fctp->iteration_matrix, &options2);
183 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
184 }
185
186 /* entry point for update proceedures */
187 err_t Paso_FCT_Solver_update(Paso_FCT_Solver *fct_solver, double* u, double *u_old, Paso_Options* options, Paso_Performance *pp)
188 {
189 const index_t method=fct_solver->method;
190 err_t err_out = SOLVER_NO_ERROR;
191
192
193 if (method == PASO_LINEAR_CRANK_NICOLSON) {
194 err_out=Paso_FCT_Solver_update_LCN(fct_solver, u, u_old, options, pp);
195
196 } else if (method == PASO_CRANK_NICOLSON) {
197 err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);
198
199 } else if (method == PASO_BACKWARD_EULER) {
200 err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);
201 } else {
202 err_out = SOLVER_INPUT_ERROR;
203 }
204 return err_out;
205
206 }
207
208 /* linear crank-nicolson update */
209 err_t Paso_FCT_Solver_update_LCN(Paso_FCT_Solver *fct_solver, double * u, double *u_old, Paso_Options* options, Paso_Performance *pp)
210 {
211 double const dt = fct_solver->dt;
212 dim_t sweep_max, i;
213 double *b = fct_solver->b;
214 double const RTOL = options->tolerance;
215 const dim_t n=Paso_TransportProblem_getTotalNumRows(fct_solver->transportproblem);
216 Paso_SystemMatrix * iteration_matrix = fct_solver->transportproblem->iteration_matrix;
217 const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fct_solver->transportproblem);
218 err_t errorCode = SOLVER_NO_ERROR;
219 double norm_u_tilde;
220
221 Paso_Coupler_startCollect(fct_solver->u_old_coupler,u_old);
222 Paso_Coupler_finishCollect(fct_solver->u_old_coupler);
223
224 /* b[i]=m*u_tilde[i] = m u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i])
225 = u_tilde[i] = u_old[i] where constraint m<0.
226 * note that iteration_matrix stores the negative values of the
227 * low order transport matrix l. Therefore a=-dt*0.5 is used. */
228
229 Paso_FCT_Solver_setMuPaLu(b, fct_solver->transportproblem->lumped_mass_matrix,
230 fct_solver->u_old_coupler, -dt*0.5, iteration_matrix);
231 /* solve for u_tilde : u_tilda = m^{-1} * b */
232 Paso_FCT_FluxLimiter_setU_tilda(fct_solver->flux_limiter, b);
233 /* u_tilda_connector is completed */
234
235 /* calculate anti-diffusive fluxes for u_tilda */
236 Paso_FCT_setAntiDiffusionFlux_linearCN(fct_solver->flux_limiter->antidiffusive_fluxes,
237 fct_solver->transportproblem, dt,
238 fct_solver->flux_limiter->u_tilde_coupler,
239 fct_solver->u_old_coupler);
240
241
242 /* b_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
243 Paso_FCT_FluxLimiter_addLimitedFluxes_Start(fct_solver->flux_limiter);
244 Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(fct_solver->flux_limiter, b);
245
246 Paso_Scale(n, b,fct_solver->omega );
247 /* solve (m-dt/2*L) u = b in the form (omega*m-L) u = b * omega with omega*dt/2=1 */
248 #pragma omp for private(i) schedule(static)
249 for (i = 0; i < n; ++i) {
250 if (! (fct_solver->transportproblem->lumped_mass_matrix[i] > 0 ) ) {
251 b[i] = fct_solver->flux_limiter->u_tilde[i]
252 * fct_solver->transportproblem->iteration_matrix->mainBlock->val[main_iptr[i]];
253 }
254 }
255 /* initial guess is u<- -u + 2*u_tilde */
256 Paso_Update(n, -1., u, 2., fct_solver->flux_limiter->u_tilde);
257
258 sweep_max = MAX((int) (- 2 * log(RTOL)/log(2.)-0.5),1);
259 norm_u_tilde=Paso_lsup(n, fct_solver->flux_limiter->u_tilde, fct_solver->flux_limiter->mpi_info);
260 if (options->verbose) {
261 printf("Paso_FCT_Solver_update_LCN: u_tilda lsup = %e (rtol = %e, max. sweeps = %d)\n",norm_u_tilde,RTOL * norm_u_tilde ,sweep_max);
262 }
263 errorCode = Paso_Preconditioner_Smoother_solve_byTolerance( iteration_matrix, ((Paso_Preconditioner*) (iteration_matrix->solver_p))->gs,
264 u, b, RTOL, &sweep_max, TRUE);
265 if (errorCode == PRECONDITIONER_NO_ERROR) {
266 if (options->verbose) printf("Paso_FCT_Solver_update_LCN: convergence after %d Gauss-Seidel steps.\n",sweep_max);
267 errorCode=SOLVER_NO_ERROR;
268 } else {
269 if (options->verbose) printf("Paso_FCT_Solver_update_LCN: Gauss-Seidel failed within %d stesp (rel. tolerance %e).\n",sweep_max,RTOL);
270 errorCode= SOLVER_MAXITER_REACHED;
271 }
272 return errorCode;
273
274 }
275
276 err_t Paso_FCT_Solver_updateNL(Paso_FCT_Solver *fct_solver, double* u, double *u_old, Paso_Options* options, Paso_Performance *pp)
277 {
278 const dim_t num_critical_rates_max=3; /* number of rates >=critical_rate accepted before divergence is triggered */
279 const double critical_rate=0.95; /* expected value of convergence rate */
280
281 double *b = fct_solver->b;
282 double *z = fct_solver->z;
283 double *du = fct_solver->du;
284 double const dt = fct_solver->dt;
285 Paso_TransportProblem* fctp = fct_solver->transportproblem;
286 Paso_FCT_FluxLimiter* flux_limiter = fct_solver->flux_limiter;
287 dim_t i;
288 double norm_u_tilde, ATOL, norm_du=LARGE_POSITIVE_FLOAT, norm_du_old, rate=1.;
289 err_t errorCode=SOLVER_NO_ERROR;
290 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
291 const double atol=options->absolute_tolerance;
292 const double rtol=options->tolerance;
293 const dim_t max_m=options->iter_max;
294 dim_t m=0, num_critical_rates=0 ;
295 /* ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// */
296
297 bool_t converged=FALSE, max_m_reached=FALSE,diverged=FALSE;
298 options->num_iter=0;
299
300 Paso_Coupler_startCollect(fct_solver->u_old_coupler,u_old);
301 Paso_Coupler_finishCollect(fct_solver->u_old_coupler);
302 /* prepare u_tilda and flux limiter */
303 if ( fct_solver->method == PASO_BACKWARD_EULER ) {
304 /* b[i]=m_i* u_old[i] */
305 #pragma omp for private(i) schedule(static)
306 for (i = 0; i < n; ++i) {
307 if (fctp->lumped_mass_matrix[i] > 0 ) {
308 b[i]=u_old[i]* fctp->lumped_mass_matrix[i];
309 } else {
310 b[i]=u_old[i];
311 }
312 }
313 } else {
314 /* b[i]=m_i* u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i]) = m_i * u_tilde_i where m_i>0
315 * = u_old[i] otherwise
316 * note that iteration_matrix stores the negative values of the
317 * low order transport matrix l. Therefore a=-dt*0.5 is used. */
318 Paso_FCT_Solver_setMuPaLu(b,fctp->lumped_mass_matrix,fct_solver->u_old_coupler,-dt*0.5,fctp->iteration_matrix);
319 }
320 Paso_FCT_FluxLimiter_setU_tilda(flux_limiter, b); /* u_tilda = m^{-1} b */
321 /* u_tilda_connector is completed */
322 /**********************************************************************************************************************/
323 /* calculate stopping criterium */
324 norm_u_tilde=Paso_lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
325 ATOL= rtol * norm_u_tilde + atol ;
326 if (options->verbose) printf("Paso_FCT_Solver_updateNL: iteration starts u_tilda lsup = %e (abs. tol = %e)\n",norm_u_tilde,ATOL);
327 /* ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// */
328
329 /* u_old is an initial guess for u*/
330 Paso_Copy(n,u,u_old);
331
332 while ( (!converged) && (!diverged) && (! max_m_reached) && Esys_noError()) {
333 Paso_Coupler_startCollect(fct_solver->u_coupler,u);
334 Paso_Coupler_finishCollect(fct_solver->u_coupler);
335
336 /* set antidiffusive_flux_{ij} for u */
337 if (fct_solver->method == PASO_BACKWARD_EULER) {
338 Paso_FCT_setAntiDiffusionFlux_BE(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);
339 } else {
340 Paso_FCT_setAntiDiffusionFlux_CN(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);
341 }
342 /* start the calculation of the limitation factors_{fct_solver->ij} */
343 Paso_FCT_FluxLimiter_addLimitedFluxes_Start(flux_limiter); /* uses u_tilde */
344
345 /*
346 * z_m[i]=b[i] - (m_i*u[i] - omega*sum_{j<>i} l_{ij} (u[j]-u[i]) ) where m_i>0
347 * ==b[i] - u[i] = u_tilda[i]-u[i] =0 otherwise
348 *
349 * omega = dt/2 or dt .
350 *
351 * note that iteration_matrix stores the negative values of the
352 * low order transport matrix l. Therefore a=dt*theta is used.
353 */
354 if (fct_solver-> method == PASO_BACKWARD_EULER) {
355 Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt, fctp->iteration_matrix);
356 } else {
357 Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt/2, fctp->iteration_matrix);
358 }
359
360
361 Paso_Update(n,-1.,z,1.,b); /* z=b-z */
362
363 /* z_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
364 Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(flux_limiter, z);
365
366 /* we solve (m/omega - L ) * du = z */
367 if (fct_solver->method == PASO_BACKWARD_EULER) {
368 dim_t cntIter = options->iter_max;
369 double tol= Paso_l2(n, z, fctp->mpi_info) ;
370
371 if ( m ==0) {
372 tol *=0.5;
373 } else {
374 tol *= MIN(MAX(rate*rate, 1e-2), 0.5);
375 }
376 /* use BiCGSTab with jacobi preconditioner ( m - omega * L ) */
377 Paso_zeroes(n,du);
378 errorCode = Paso_Solver_BiCGStab(fctp->iteration_matrix, z, du, &cntIter, &tol, pp);
379
380 /* errorCode = Paso_Solver_GMRES(fctp->iteration_matrix, z, du, &cntIter, &tol, 10, 2000, pp); */
381 if (options->verbose) printf("Paso_FCT_Solver_updateNL: BiCGStab is completed after %d steps (residual =%e).\n",cntIter, tol);
382 options->num_iter+=cntIter;
383 if ( errorCode != SOLVER_NO_ERROR) break;
384 } else {
385 /* just use the main diagonal of (m/omega - L ) */
386
387 Paso_Preconditioner_Smoother_solve(fctp->iteration_matrix, ((Paso_Preconditioner*) (fctp->iteration_matrix->solver_p))->jacobi,
388 du, z, 1, FALSE);
389
390 options->num_iter++;
391 }
392
393
394 Paso_Update(n,1.,u,fct_solver->omega,du);
395 norm_du_old=norm_du;
396 norm_du=Paso_lsup(n,du, fctp->mpi_info);
397 if (m ==0) {
398 if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e\n",m+1, norm_du * fct_solver->omega);
399 } else {
400 if (norm_du_old > 0.) {
401 rate=norm_du/norm_du_old;
402 } else if (norm_du <= 0.) {
403 rate=0.;
404 } else {
405 rate=LARGE_POSITIVE_FLOAT;
406 }
407 if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e (rate = %e)\n",m+1, norm_du * fct_solver->omega, rate);
408 num_critical_rates+=( rate<critical_rate ? 0 : 1);
409 max_m_reached=(m>max_m);
410 diverged = (num_critical_rates >= num_critical_rates_max);
411 converged=(norm_du * fct_solver->omega <= ATOL) ;
412 }
413 m++;
414 } /* end of while loop */
415 if (errorCode == SOLVER_NO_ERROR) {
416 if (converged) {
417 if (options->verbose) printf("Paso_FCT_Solver_updateNL: iteration is completed.\n");
418 errorCode=SOLVER_NO_ERROR;
419 } else if (diverged) {
420 if (options->verbose) printf("Paso_FCT_Solver_updateNL: divergence.\n");
421 errorCode=SOLVER_DIVERGENCE;
422 } else if (max_m_reached) {
423 if (options->verbose) printf("Paso_FCT_Solver_updateNL: maximum number of iteration steps reached.\n");
424 errorCode=SOLVER_MAXITER_REACHED;
425 }
426
427 }
428 return errorCode;
429 }
430
431
432 /*
433 * AntiDiffusionFlux:
434 *
435 * f_{ij} = (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])
436 *
437 * m=fc->mass matrix
438 * d=artifical diffusion matrix = L - K = - fc->iteration matrix - fc->transport matrix (away from main diagonal)
439 *
440 * for CN : theta =0.5
441 * for BE : theta = 1.
442 */
443
444 void Paso_FCT_setAntiDiffusionFlux_CN(Paso_SystemMatrix *flux_matrix,
445 const Paso_TransportProblem* fct,
446 const double dt,
447 const Paso_Coupler* u_coupler,
448 const Paso_Coupler* u_old_coupler)
449 {
450 dim_t i;
451 index_t iptr_ij;
452
453 const double *u = Paso_Coupler_borrowLocalData(u_coupler);
454 const double *u_old= Paso_Coupler_borrowLocalData(u_old_coupler);
455 const double *remote_u=Paso_Coupler_borrowRemoteData(u_coupler);
456 const double *remote_u_old=Paso_Coupler_borrowRemoteData(u_old_coupler);
457 const double dt_half= dt/2;
458 const Paso_SystemMatrixPattern *pattern=fct->iteration_matrix->pattern;
459 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fct->iteration_matrix);
460
461 #pragma omp parallel for schedule(static) private(i, iptr_ij)
462 for (i = 0; i < n; ++i) {
463 const double u_i = u[i];
464 const double u_old_i = u_old[i];
465
466 #pragma ivdep
467 for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
468 const index_t j = pattern->mainPattern->index[iptr_ij];
469 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
470 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
471 const double u_old_j = u_old[j];
472 const double u_j = u[j];
473
474 /* (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i]) */
475 flux_matrix->mainBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i) - (m_ij-dt_half*d_ij)*(u_j-u_i);
476
477 }
478 #pragma ivdep
479 for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
480 const index_t j = pattern->col_couplePattern->index[iptr_ij];
481 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
482 const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
483 const double u_old_j = remote_u_old[j];
484 const double u_j = remote_u[j];
485 flux_matrix->col_coupleBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i)- (m_ij-dt_half*d_ij)*(u_j-u_i);
486 }
487 }
488 }
489
490 void Paso_FCT_setAntiDiffusionFlux_BE(Paso_SystemMatrix *flux_matrix,
491 const Paso_TransportProblem* fct,
492 const double dt,
493 const Paso_Coupler* u_coupler,
494 const Paso_Coupler* u_old_coupler)
495 {
496 dim_t i;
497 index_t iptr_ij;
498
499 const double *u=Paso_Coupler_borrowLocalData(u_coupler);
500 const double *u_old= Paso_Coupler_borrowLocalData(u_old_coupler);
501 const double *remote_u=Paso_Coupler_borrowRemoteData(u_coupler);
502 const double *remote_u_old=Paso_Coupler_borrowRemoteData(u_old_coupler);
503 const Paso_SystemMatrixPattern *pattern=fct->iteration_matrix->pattern;
504 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fct->iteration_matrix);
505
506 #pragma omp parallel for schedule(static) private(i, iptr_ij)
507 for (i = 0; i < n; ++i) {
508 const double u_i = u[i];
509 const double u_old_i = u_old[i];
510 #pragma ivdep
511 for (iptr_ij=pattern->mainPattern->ptr[i]; iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
512
513 const index_t j = pattern->mainPattern->index[iptr_ij];
514 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
515 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
516 const double u_old_j = u_old[j];
517 const double u_j = u[j];
518
519 flux_matrix->mainBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
520 }
521 #pragma ivdep
522 for (iptr_ij=pattern->col_couplePattern->ptr[i]; iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
523 const index_t j = pattern->col_couplePattern->index[iptr_ij];
524 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
525 const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
526 const double u_old_j = remote_u_old[j];
527 const double u_j = remote_u[j];
528
529 flux_matrix->col_coupleBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
530 }
531 }
532 }
533
534 /* special version of the ant-diffusive fluxes for the linear Crank-Nicolson scheme
535 * in fact this is evaluated for u = 2*u_tilde - u_old which is the predictor
536 * of the solution of the the stabilized problem at time dt using the forward Euler scheme
537 *
538 * f_{ij} = (m_{ij} - dt/2 d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) (u[j]-u[i])
539 * = (m_{ij} - dt/2 d_{ij}) * (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) * ( 2*(u_tilde[j]-u_tilde[i]) - (u_old[j] -u_old [i]) )
540 * = 2* m_{ij} * ( (u_old[j]-u_tilde[j] - (u_old[i]) - u_tilde[i]) ) - dt d_{ij} * (u_tilde[j]-u_tilde[i])
541 *
542 */
543
544 void Paso_FCT_setAntiDiffusionFlux_linearCN(Paso_SystemMatrix *flux_matrix,
545 const Paso_TransportProblem* fct,
546 const double dt,
547 const Paso_Coupler* u_tilde_coupler,
548 const Paso_Coupler* u_old_coupler)
549 {
550 dim_t i;
551 index_t iptr_ij;
552
553 const double *u_tilde=Paso_Coupler_borrowLocalData(u_tilde_coupler);
554 const double *u_old= Paso_Coupler_borrowLocalData(u_old_coupler);
555 const double *remote_u_tilde=Paso_Coupler_borrowRemoteData(u_tilde_coupler);
556 const double *remote_u_old=Paso_Coupler_borrowRemoteData(u_old_coupler);
557 const Paso_SystemMatrixPattern *pattern=fct->iteration_matrix->pattern;
558 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fct->iteration_matrix);
559
560 #pragma omp parallel for schedule(static) private(i, iptr_ij)
561 for (i = 0; i < n; ++i) {
562 const double u_tilde_i = u_tilde[i];
563 const double u_old_i = u_old[i];
564 const double du_i = u_tilde_i - u_old_i;
565 #pragma ivdep
566 for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
567
568 const index_t j = pattern->mainPattern->index[iptr_ij];
569 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
570 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
571 const double u_tilde_j = u_tilde[j];
572 const double u_old_j = u_old[j];
573 const double du_j = u_tilde_j - u_old_j;
574
575 flux_matrix->mainBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij * ( u_tilde_i - u_tilde_j);
576 }
577 #pragma ivdep
578 for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
579
580 const index_t j = pattern->col_couplePattern->index[iptr_ij];
581 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
582 const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];/* this is in fact -d_ij */
583 const double u_tilde_j = remote_u_tilde[j];
584 const double u_old_j = remote_u_old[j];
585 const double du_j = u_tilde_j - u_old_j;
586
587 flux_matrix->col_coupleBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij * ( u_tilde_i - u_tilde_j);
588
589 }
590 }
591
592 }
593
594 /**************************************************************/
595
596 /* Creates the low order transport matrix and stores its negative values
597 * into the iteration_matrix except for the main diagonal which is stored
598 * separately.
599 * If fc->iteration_matrix==NULL, fc->iteration_matrix is allocated
600 *
601 * a=transport_matrix
602 * b= low_order_transport_matrix = - iteration_matrix
603 * c=main diagonal low_order_transport_matrix
604 * initialize c[i] mit a[i,i]
605 *
606 * d_ij=max(0,-a[i,j],-a[j,i])
607 * b[i,j]=-(a[i,j]+d_ij)
608 * c[i]-=d_ij
609 */
610
611 void Paso_FCT_setLowOrderOperator(Paso_TransportProblem * fc) {
612
613 dim_t i;
614 index_t iptr_ij, iptr_ji;
615 const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fc);
616
617 if (fc->iteration_matrix==NULL) {
618 fc->iteration_matrix=Paso_SystemMatrix_alloc(fc->transport_matrix->type,
619 fc->transport_matrix->pattern,
620 fc->transport_matrix->row_block_size,
621 fc->transport_matrix->col_block_size, TRUE);
622 }
623
624 if (Esys_noError()) {
625 const Paso_SystemMatrixPattern *pattern=fc->iteration_matrix->pattern;
626 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fc->iteration_matrix);
627 #pragma omp parallel for private(i, iptr_ij, iptr_ji) schedule(static)
628 for (i = 0; i < n; ++i) {
629 double sum=fc->transport_matrix->mainBlock->val[main_iptr[i]];
630
631 /* printf("sum[%d] = %e -> ", i, sum); */
632 /* look at a[i,j] */
633 for (iptr_ij=pattern->mainPattern->ptr[i];iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
634 const index_t j = pattern->mainPattern->index[iptr_ij];
635 const double rtmp1 = fc->transport_matrix->mainBlock->val[iptr_ij];
636 if (j!=i) {
637 /* find entry a[j,i] */
638 #pragma ivdep
639 for (iptr_ji=pattern->mainPattern->ptr[j]; iptr_ji<pattern->mainPattern->ptr[j+1]; ++iptr_ji) {
640
641 if ( pattern->mainPattern->index[iptr_ji] == i) {
642 const double rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji];
643 /*
644 printf("a[%d,%d]=%e\n",i,j,rtmp1);
645 printf("a[%d,%d]=%e\n",j,i,rtmp2);
646 */
647
648 const double d_ij=-MIN3(0.,rtmp1,rtmp2);
649 fc->iteration_matrix->mainBlock->val[iptr_ij]=-(rtmp1+d_ij);
650 /* printf("l[%d,%d]=%e\n",i,j,fc->iteration_matrix->mainBlock->val[iptr_ij]); */
651 sum-=d_ij;
652 break;
653 }
654 }
655 }
656 }
657 for (iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
658 const index_t j = pattern->col_couplePattern->index[iptr_ij];
659 const double rtmp1 = fc->transport_matrix->col_coupleBlock->val[iptr_ij];
660 /* find entry a[j,i] */
661 #pragma ivdep
662 for (iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_ji<pattern->row_couplePattern->ptr[j+1]; ++iptr_ji) {
663 if (pattern->row_couplePattern->index[iptr_ji]==i) {
664 const double rtmp2=fc->transport_matrix->row_coupleBlock->val[iptr_ji];
665 const double d_ij=-MIN3(0.,rtmp1,rtmp2);
666 fc->iteration_matrix->col_coupleBlock->val[iptr_ij]=-(rtmp1+d_ij);
667 fc->iteration_matrix->row_coupleBlock->val[iptr_ji]=-(rtmp2+d_ij);
668 sum-=d_ij;
669 break;
670 }
671 }
672 }
673 /* set main diagonal entry */
674 fc->main_diagonal_low_order_transport_matrix[i]=sum;
675 /* printf("%e \n", sum); */
676 }
677
678 }
679 }
680
681 /*
682 * out_i=m_i u_i + a * \sum_{j <> i} l_{ij} (u_j-u_i) where m_i>0
683 * = u_i where m_i<=0
684 *
685 */
686 void Paso_FCT_Solver_setMuPaLu(double* out,
687 const double* M,
688 const Paso_Coupler* u_coupler,
689 const double a,
690 const Paso_SystemMatrix *L)
691 {
692 dim_t i;
693 const Paso_SystemMatrixPattern *pattern = L->pattern;
694 const double *u=Paso_Coupler_borrowLocalData(u_coupler);
695 const double *remote_u=Paso_Coupler_borrowRemoteData(u_coupler);
696 index_t iptr_ij;
697 const dim_t n=Paso_SystemMatrix_getTotalNumRows(L);
698
699 #pragma omp parallel for private(i) schedule(static)
700 for (i = 0; i < n; ++i) {
701 if ( M[i] > 0.) {
702 out[i]=M[i]*u[i];
703 } else {
704 out[i]=u[i];
705 }
706 }
707 if (ABS(a)>0) {
708 #pragma omp parallel for schedule(static) private(i, iptr_ij)
709 for (i = 0; i < n; ++i) {
710 if ( M[i] > 0.) {
711 double sum=0;
712 const double u_i=u[i];
713 #pragma ivdep
714 for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
715 const index_t j=pattern->mainPattern->index[iptr_ij];
716 const double l_ij=L->mainBlock->val[iptr_ij];
717 sum+=l_ij*(u[j]-u_i);
718
719 }
720 #pragma ivdep
721 for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
722 const index_t j=pattern->col_couplePattern->index[iptr_ij];
723 const double l_ij=L->col_coupleBlock->val[iptr_ij];
724 sum+=l_ij*(remote_u[j]-u_i);
725 }
726 out[i]+=a*sum;
727 }
728 }
729 }
730 }
731
732 /* *************************************************************************************************************************** */
733
734
735
736
737
738
739

  ViewVC Help
Powered by ViewVC 1.1.26