/[escript]/trunk/paso/src/FCT_Solver.cpp
ViewVC logotype

Contents of /trunk/paso/src/FCT_Solver.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4843 - (show annotations)
Tue Apr 8 05:32:07 2014 UTC (6 years, 1 month ago) by caltinay
File size: 31104 byte(s)
checkpointing paso::Preconditioner.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17
18 /****************************************************************************/
19
20 /* Paso: Transport solver with flux correction (L is row sum zero)
21 *
22 * - Mv_t=Lv v(0)=u
23 *
24 * to return v(dt)
25 *
26 */
27 /****************************************************************************/
28
29 /* Author: l.gross@uq.edu.au */
30
31 /****************************************************************************/
32
33 #include "FCT_Solver.h"
34 #include "Preconditioner.h"
35 #include "PasoUtil.h"
36
37
38 Paso_FCT_Solver* Paso_FCT_Solver_alloc(Paso_TransportProblem *fctp, Paso_Options* options)
39 {
40 Paso_FCT_Solver* out=NULL;
41 const dim_t blockSize=Paso_TransportProblem_getBlockSize(fctp);
42 const dim_t n = Paso_TransportProblem_getTotalNumRows(fctp);
43
44 out=new Paso_FCT_Solver;
45 out->transportproblem = Paso_TransportProblem_getReference(fctp);
46 out->mpi_info = Esys_MPIInfo_getReference(fctp->mpi_info);
47 out->flux_limiter = Paso_FCT_FluxLimiter_alloc(fctp);
48 out->b = new double[n];
49 if ( (options->ode_solver == PASO_CRANK_NICOLSON) || (options->ode_solver == PASO_BACKWARD_EULER) ) {
50 out->du = new double[n];
51 out->z = new double[n];
52 } else {
53 out->du = NULL;
54 out->z=NULL;
55 }
56 out->u_coupler.reset(new paso::Coupler(Paso_TransportProblem_borrowConnector(fctp), blockSize));
57 out->u_old_coupler.reset(new paso::Coupler(Paso_TransportProblem_borrowConnector(fctp), blockSize));
58 out->omega=0;
59
60 if ( options->ode_solver == PASO_LINEAR_CRANK_NICOLSON ) {
61 out->method = PASO_LINEAR_CRANK_NICOLSON;
62 } else if ( options->ode_solver == PASO_CRANK_NICOLSON ) {
63 out->method = PASO_CRANK_NICOLSON;
64 } else if ( options->ode_solver == PASO_BACKWARD_EULER ) {
65 out->method = PASO_BACKWARD_EULER;
66 } else {
67 Esys_setError(VALUE_ERROR, "Paso_FCT_Solver_alloc: unknown integration scheme.");
68 out->method = UNKNOWN;
69 }
70
71 if (Esys_noError()) {
72 return out;
73 } else {
74 Paso_FCT_Solver_free(out);
75 return NULL;
76 }
77 }
78
79 void Paso_FCT_Solver_free(Paso_FCT_Solver *in)
80 {
81 if (in != NULL) {
82 Paso_TransportProblem_free(in->transportproblem);
83 Paso_FCT_FluxLimiter_free(in->flux_limiter);
84 Esys_MPIInfo_free(in->mpi_info);
85 delete[] in->b;
86 delete[] in->z;
87 delete[] in->du;
88 delete in;
89 }
90 }
91
92 double Paso_FCT_Solver_getSafeTimeStepSize(Paso_TransportProblem* fctp)
93 {
94 dim_t i;
95 double dt_max_loc=LARGE_POSITIVE_FLOAT;
96 double dt_max=LARGE_POSITIVE_FLOAT;
97 const dim_t n = fctp->transport_matrix->getTotalNumRows();
98 /* set low order transport operator */
99 Paso_FCT_setLowOrderOperator(fctp);
100
101 if (Esys_noError()) {
102 /*
103 * calculate time step size:
104 */
105 dt_max=LARGE_POSITIVE_FLOAT;
106 #pragma omp parallel private(i, dt_max_loc)
107 {
108 dt_max_loc=LARGE_POSITIVE_FLOAT;
109 #pragma omp for schedule(static)
110 for (i=0;i<n;++i) {
111 const double l_ii=fctp->main_diagonal_low_order_transport_matrix[i];
112 const double m_i=fctp->lumped_mass_matrix[i];
113 if ( m_i > 0 ) {
114 if (l_ii<0) dt_max_loc=MIN(dt_max_loc,m_i/(-l_ii));
115 }
116 }
117 #pragma omp critical
118 {
119 dt_max=MIN(dt_max,dt_max_loc);
120 }
121 }
122 #ifdef ESYS_MPI
123 {
124 dt_max_loc=dt_max;
125 MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
126 }
127 #endif
128 if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=2.;
129 }
130 return dt_max;
131 }
132
133 /* modifies the main diagonal of the iteration matrix to introduce new dt */
134 void Paso_FCT_Solver_initialize(const double dt, Paso_FCT_Solver *fct_solver, Paso_Options* options, Paso_Performance* pp)
135 {
136 Paso_TransportProblem* fctp = fct_solver->transportproblem;
137 const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fctp);
138 const dim_t n = fctp->transport_matrix->getTotalNumRows();
139 const double theta = Paso_FCT_Solver_getTheta(fct_solver);
140 const double omega=1./(dt* theta);
141 dim_t i;
142 Paso_Options options2;
143
144
145
146 Paso_solve_free(fctp->iteration_matrix.get());
147 /*
148 * fctp->iteration_matrix[i,i]=m[i]/(dt theta) -l[i,i]
149 *
150 */
151 fct_solver->omega=omega;
152 fct_solver->dt = dt;
153 #pragma omp parallel for private(i)
154 for (i = 0; i < n; ++i) {
155 const double m_i=fctp->lumped_mass_matrix[i];
156 const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
157 if ( m_i > 0 ) {
158 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m_i * omega - l_ii;
159 } else {
160 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = ABS(m_i * omega - l_ii)/(EPSILON*EPSILON);
161 }
162 }
163
164 /* allocate preconditioner/solver */
165 Paso_Options_setDefaults(&options2);
166 options2.verbose = options->verbose;
167 if (fct_solver->method == PASO_LINEAR_CRANK_NICOLSON ) {
168 options2.preconditioner = PASO_GS;
169 } else {
170 options2.preconditioner = PASO_JACOBI;
171 /* options2.preconditioner = PASO_GS; */
172 }
173 options2.use_local_preconditioner = FALSE;
174 options2.sweeps=-1;
175
176 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
177 fctp->iteration_matrix->setPreconditioner(&options2);
178 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
179 }
180
181 /* entry point for update procedures */
182 err_t Paso_FCT_Solver_update(Paso_FCT_Solver *fct_solver, double* u, double *u_old, Paso_Options* options, Paso_Performance *pp)
183 {
184 const index_t method=fct_solver->method;
185 err_t err_out = SOLVER_NO_ERROR;
186
187 if (method == PASO_LINEAR_CRANK_NICOLSON) {
188 err_out=Paso_FCT_Solver_update_LCN(fct_solver, u, u_old, options, pp);
189 } else if (method == PASO_CRANK_NICOLSON) {
190 err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);
191 } else if (method == PASO_BACKWARD_EULER) {
192 err_out=Paso_FCT_Solver_updateNL(fct_solver, u, u_old, options, pp);
193 } else {
194 err_out = SOLVER_INPUT_ERROR;
195 }
196 return err_out;
197 }
198
199 /* linear crank-nicolson update */
200 err_t Paso_FCT_Solver_update_LCN(Paso_FCT_Solver *fct_solver, double * u, double *u_old, Paso_Options* options, Paso_Performance *pp)
201 {
202 double const dt = fct_solver->dt;
203 dim_t sweep_max, i;
204 double *b = fct_solver->b;
205 double const RTOL = options->tolerance;
206 const dim_t n=Paso_TransportProblem_getTotalNumRows(fct_solver->transportproblem);
207 paso::SystemMatrix_ptr iteration_matrix(fct_solver->transportproblem->iteration_matrix);
208 const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fct_solver->transportproblem);
209 err_t errorCode = SOLVER_NO_ERROR;
210 double norm_u_tilde;
211
212 fct_solver->u_old_coupler->startCollect(u_old);
213 fct_solver->u_old_coupler->finishCollect();
214
215 /* b[i]=m*u_tilde[i] = m u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i])
216 = u_tilde[i] = u_old[i] where constraint m<0.
217 * note that iteration_matrix stores the negative values of the
218 * low order transport matrix l. Therefore a=-dt*0.5 is used. */
219
220 Paso_FCT_Solver_setMuPaLu(b, fct_solver->transportproblem->lumped_mass_matrix,
221 fct_solver->u_old_coupler, -dt*0.5, iteration_matrix);
222 /* solve for u_tilde : u_tilda = m^{-1} * b */
223 Paso_FCT_FluxLimiter_setU_tilda(fct_solver->flux_limiter, b);
224 /* u_tilda_connector is completed */
225
226 /* calculate anti-diffusive fluxes for u_tilda */
227 Paso_FCT_setAntiDiffusionFlux_linearCN(fct_solver->flux_limiter->antidiffusive_fluxes,
228 fct_solver->transportproblem, dt,
229 fct_solver->flux_limiter->u_tilde_coupler,
230 fct_solver->u_old_coupler);
231
232
233 /* b_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
234 Paso_FCT_FluxLimiter_addLimitedFluxes_Start(fct_solver->flux_limiter);
235 Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(fct_solver->flux_limiter, b);
236
237 Paso_Scale(n, b,fct_solver->omega );
238 /* solve (m-dt/2*L) u = b in the form (omega*m-L) u = b * omega with omega*dt/2=1 */
239 #pragma omp for private(i) schedule(static)
240 for (i = 0; i < n; ++i) {
241 if (! (fct_solver->transportproblem->lumped_mass_matrix[i] > 0 ) ) {
242 b[i] = fct_solver->flux_limiter->u_tilde[i]
243 * fct_solver->transportproblem->iteration_matrix->mainBlock->val[main_iptr[i]];
244 }
245 }
246 /* initial guess is u<- -u + 2*u_tilde */
247 Paso_Update(n, -1., u, 2., fct_solver->flux_limiter->u_tilde);
248
249 sweep_max = MAX((int) (- 2 * log(RTOL)/log(2.)-0.5),1);
250 norm_u_tilde=Paso_lsup(n, fct_solver->flux_limiter->u_tilde, fct_solver->flux_limiter->mpi_info);
251 if (options->verbose) {
252 printf("Paso_FCT_Solver_update_LCN: u_tilda lsup = %e (rtol = %e, max. sweeps = %d)\n",norm_u_tilde,RTOL * norm_u_tilde ,sweep_max);
253 }
254 errorCode = paso::Preconditioner_Smoother_solve_byTolerance( iteration_matrix, ((paso::Preconditioner*) (iteration_matrix->solver_p))->gs,
255 u, b, RTOL, &sweep_max, TRUE);
256 if (errorCode == PRECONDITIONER_NO_ERROR) {
257 if (options->verbose) printf("Paso_FCT_Solver_update_LCN: convergence after %d Gauss-Seidel steps.\n",sweep_max);
258 errorCode=SOLVER_NO_ERROR;
259 } else {
260 if (options->verbose) printf("Paso_FCT_Solver_update_LCN: Gauss-Seidel failed within %d stesp (rel. tolerance %e).\n",sweep_max,RTOL);
261 errorCode= SOLVER_MAXITER_REACHED;
262 }
263 return errorCode;
264
265 }
266
267 err_t Paso_FCT_Solver_updateNL(Paso_FCT_Solver *fct_solver, double* u, double *u_old, Paso_Options* options, Paso_Performance *pp)
268 {
269 const dim_t num_critical_rates_max=3; /* number of rates >=critical_rate accepted before divergence is triggered */
270 const double critical_rate=0.95; /* expected value of convergence rate */
271
272 double *b = fct_solver->b;
273 double *z = fct_solver->z;
274 double *du = fct_solver->du;
275 double const dt = fct_solver->dt;
276 Paso_TransportProblem* fctp = fct_solver->transportproblem;
277 Paso_FCT_FluxLimiter* flux_limiter = fct_solver->flux_limiter;
278 dim_t i;
279 double norm_u_tilde, ATOL, norm_du=LARGE_POSITIVE_FLOAT, norm_du_old, rate=1.;
280 err_t errorCode=SOLVER_NO_ERROR;
281 const dim_t n = fctp->transport_matrix->getTotalNumRows();
282 const double atol=options->absolute_tolerance;
283 const double rtol=options->tolerance;
284 const dim_t max_m=options->iter_max;
285 dim_t m=0, num_critical_rates=0 ;
286 /* ////////////////////////////////////////////////////////////////////// */
287
288 bool converged=FALSE, max_m_reached=FALSE,diverged=FALSE;
289 options->num_iter=0;
290
291 fct_solver->u_old_coupler->startCollect(u_old);
292 fct_solver->u_old_coupler->finishCollect();
293 /* prepare u_tilda and flux limiter */
294 if ( fct_solver->method == PASO_BACKWARD_EULER ) {
295 /* b[i]=m_i* u_old[i] */
296 #pragma omp for private(i) schedule(static)
297 for (i = 0; i < n; ++i) {
298 if (fctp->lumped_mass_matrix[i] > 0 ) {
299 b[i]=u_old[i]* fctp->lumped_mass_matrix[i];
300 } else {
301 b[i]=u_old[i];
302 }
303 }
304 } else {
305 /* b[i]=m_i* u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i]) = m_i * u_tilde_i where m_i>0
306 * = u_old[i] otherwise
307 * note that iteration_matrix stores the negative values of the
308 * low order transport matrix l. Therefore a=-dt*0.5 is used. */
309 Paso_FCT_Solver_setMuPaLu(b,fctp->lumped_mass_matrix,fct_solver->u_old_coupler,-dt*0.5,fctp->iteration_matrix);
310 }
311 Paso_FCT_FluxLimiter_setU_tilda(flux_limiter, b); /* u_tilda = m^{-1} b */
312 /* u_tilda_connector is completed */
313 /************************************************************************/
314 /* calculate stopping criterion */
315 norm_u_tilde=Paso_lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
316 ATOL= rtol * norm_u_tilde + atol ;
317 if (options->verbose) printf("Paso_FCT_Solver_updateNL: iteration starts u_tilda lsup = %e (abs. tol = %e)\n",norm_u_tilde,ATOL);
318 /* ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// */
319
320 /* u_old is an initial guess for u*/
321 Paso_Copy(n,u,u_old);
322
323 while ( (!converged) && (!diverged) && (! max_m_reached) && Esys_noError()) {
324 fct_solver->u_coupler->startCollect(u);
325 fct_solver->u_coupler->finishCollect();
326
327 /* set antidiffusive_flux_{ij} for u */
328 if (fct_solver->method == PASO_BACKWARD_EULER) {
329 Paso_FCT_setAntiDiffusionFlux_BE(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);
330 } else {
331 Paso_FCT_setAntiDiffusionFlux_CN(fct_solver->flux_limiter->antidiffusive_fluxes, fctp, dt, fct_solver->u_coupler, fct_solver->u_old_coupler);
332 }
333 /* start the calculation of the limitation factors_{fct_solver->ij} */
334 Paso_FCT_FluxLimiter_addLimitedFluxes_Start(flux_limiter); /* uses u_tilde */
335
336 /*
337 * z_m[i]=b[i] - (m_i*u[i] - omega*sum_{j<>i} l_{ij} (u[j]-u[i]) ) where m_i>0
338 * ==b[i] - u[i] = u_tilda[i]-u[i] =0 otherwise
339 *
340 * omega = dt/2 or dt .
341 *
342 * note that iteration_matrix stores the negative values of the
343 * low order transport matrix l. Therefore a=dt*theta is used.
344 */
345 if (fct_solver-> method == PASO_BACKWARD_EULER) {
346 Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt, fctp->iteration_matrix);
347 } else {
348 Paso_FCT_Solver_setMuPaLu(z, fctp->lumped_mass_matrix, fct_solver->u_coupler, dt/2, fctp->iteration_matrix);
349 }
350
351 Paso_Update(n,-1.,z,1.,b); /* z=b-z */
352
353 /* z_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
354 Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(flux_limiter, z);
355
356 /* we solve (m/omega - L ) * du = z */
357 if (fct_solver->method == PASO_BACKWARD_EULER) {
358 dim_t cntIter = options->iter_max;
359 double tol= Paso_l2(n, z, fctp->mpi_info) ;
360
361 if ( m ==0) {
362 tol *=0.5;
363 } else {
364 tol *= MIN(MAX(rate*rate, 1e-2), 0.5);
365 }
366 /* use BiCGSTab with jacobi preconditioner ( m - omega * L ) */
367 Paso_zeroes(n,du);
368 errorCode = Paso_Solver_BiCGStab(fctp->iteration_matrix, z, du, &cntIter, &tol, pp);
369
370 /* errorCode = Paso_Solver_GMRES(fctp->iteration_matrix, z, du, &cntIter, &tol, 10, 2000, pp); */
371 if (options->verbose) printf("Paso_FCT_Solver_updateNL: BiCGStab is completed after %d steps (residual =%e).\n",cntIter, tol);
372 options->num_iter+=cntIter;
373 if ( errorCode != SOLVER_NO_ERROR) break;
374 } else {
375 /* just use the main diagonal of (m/omega - L ) */
376
377 paso::Preconditioner_Smoother_solve(fctp->iteration_matrix, ((paso::Preconditioner*) (fctp->iteration_matrix->solver_p))->jacobi,
378 du, z, 1, FALSE);
379
380 options->num_iter++;
381 }
382
383 Paso_Update(n,1.,u,fct_solver->omega,du);
384 norm_du_old=norm_du;
385 norm_du=Paso_lsup(n,du, fctp->mpi_info);
386 if (m ==0) {
387 if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e\n",m+1, norm_du * fct_solver->omega);
388 } else {
389 if (norm_du_old > 0.) {
390 rate=norm_du/norm_du_old;
391 } else if (norm_du <= 0.) {
392 rate=0.;
393 } else {
394 rate=LARGE_POSITIVE_FLOAT;
395 }
396 if (options->verbose) printf("Paso_FCT_Solver_updateNL: step %d: increment= %e (rate = %e)\n",m+1, norm_du * fct_solver->omega, rate);
397 num_critical_rates+=( rate<critical_rate ? 0 : 1);
398 max_m_reached=(m>max_m);
399 diverged = (num_critical_rates >= num_critical_rates_max);
400 converged=(norm_du * fct_solver->omega <= ATOL);
401 }
402 m++;
403 } /* end of while loop */
404 if (errorCode == SOLVER_NO_ERROR) {
405 if (converged) {
406 if (options->verbose) printf("Paso_FCT_Solver_updateNL: iteration is completed.\n");
407 errorCode=SOLVER_NO_ERROR;
408 } else if (diverged) {
409 if (options->verbose) printf("Paso_FCT_Solver_updateNL: divergence.\n");
410 errorCode=SOLVER_DIVERGENCE;
411 } else if (max_m_reached) {
412 if (options->verbose) printf("Paso_FCT_Solver_updateNL: maximum number of iteration steps reached.\n");
413 errorCode=SOLVER_MAXITER_REACHED;
414 }
415 }
416 return errorCode;
417 }
418
419
420 /*
421 * AntiDiffusionFlux:
422 *
423 * f_{ij} = (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])
424 *
425 * m=fc->mass matrix
426 * d=artificial diffusion matrix = L - K = - fc->iteration matrix - fc->transport matrix (away from main diagonal)
427 *
428 * for CN : theta =0.5
429 * for BE : theta = 1.
430 */
431
432 void Paso_FCT_setAntiDiffusionFlux_CN(paso::SystemMatrix_ptr flux_matrix,
433 const Paso_TransportProblem* fct,
434 const double dt,
435 paso::const_Coupler_ptr u_coupler,
436 paso::const_Coupler_ptr u_old_coupler)
437 {
438 dim_t i;
439 index_t iptr_ij;
440
441 const double *u = u_coupler->borrowLocalData();
442 const double *u_old= u_old_coupler->borrowLocalData();
443 const double *remote_u=u_coupler->borrowRemoteData();
444 const double *remote_u_old=u_old_coupler->borrowRemoteData();
445 const double dt_half= dt/2;
446 paso::const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
447 const dim_t n = fct->iteration_matrix->getTotalNumRows();
448
449 #pragma omp parallel for schedule(static) private(i, iptr_ij)
450 for (i = 0; i < n; ++i) {
451 const double u_i = u[i];
452 const double u_old_i = u_old[i];
453
454 #pragma ivdep
455 for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
456 const index_t j = pattern->mainPattern->index[iptr_ij];
457 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
458 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
459 const double u_old_j = u_old[j];
460 const double u_j = u[j];
461
462 /* (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i]) */
463 flux_matrix->mainBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i) - (m_ij-dt_half*d_ij)*(u_j-u_i);
464
465 }
466 #pragma ivdep
467 for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
468 const index_t j = pattern->col_couplePattern->index[iptr_ij];
469 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
470 const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
471 const double u_old_j = remote_u_old[j];
472 const double u_j = remote_u[j];
473 flux_matrix->col_coupleBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i)- (m_ij-dt_half*d_ij)*(u_j-u_i);
474 }
475 }
476 }
477
478 void Paso_FCT_setAntiDiffusionFlux_BE(paso::SystemMatrix_ptr flux_matrix,
479 const Paso_TransportProblem* fct,
480 const double dt,
481 paso::const_Coupler_ptr u_coupler,
482 paso::const_Coupler_ptr u_old_coupler)
483 {
484 dim_t i;
485 index_t iptr_ij;
486
487 const double *u = u_coupler->borrowLocalData();
488 const double *u_old = u_old_coupler->borrowLocalData();
489 const double *remote_u = u_coupler->borrowRemoteData();
490 const double *remote_u_old = u_old_coupler->borrowRemoteData();
491 paso::const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
492 const dim_t n = fct->iteration_matrix->getTotalNumRows();
493
494 #pragma omp parallel for schedule(static) private(i, iptr_ij)
495 for (i = 0; i < n; ++i) {
496 const double u_i = u[i];
497 const double u_old_i = u_old[i];
498 #pragma ivdep
499 for (iptr_ij=pattern->mainPattern->ptr[i]; iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
500
501 const index_t j = pattern->mainPattern->index[iptr_ij];
502 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
503 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
504 const double u_old_j = u_old[j];
505 const double u_j = u[j];
506
507 flux_matrix->mainBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
508 }
509 #pragma ivdep
510 for (iptr_ij=pattern->col_couplePattern->ptr[i]; iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
511 const index_t j = pattern->col_couplePattern->index[iptr_ij];
512 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
513 const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
514 const double u_old_j = remote_u_old[j];
515 const double u_j = remote_u[j];
516
517 flux_matrix->col_coupleBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
518 }
519 }
520 }
521
522 /* special version of the ant-diffusive fluxes for the linear Crank-Nicolson scheme
523 * in fact this is evaluated for u = 2*u_tilde - u_old which is the predictor
524 * of the solution of the the stabilised problem at time dt using the forward Euler scheme
525 *
526 * f_{ij} = (m_{ij} - dt/2 d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) (u[j]-u[i])
527 * = (m_{ij} - dt/2 d_{ij}) * (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) * ( 2*(u_tilde[j]-u_tilde[i]) - (u_old[j] -u_old [i]) )
528 * = 2* m_{ij} * ( (u_old[j]-u_tilde[j] - (u_old[i]) - u_tilde[i]) ) - dt d_{ij} * (u_tilde[j]-u_tilde[i])
529 *
530 */
531
532 void Paso_FCT_setAntiDiffusionFlux_linearCN(paso::SystemMatrix_ptr flux_matrix,
533 const Paso_TransportProblem* fct, const double dt,
534 paso::const_Coupler_ptr u_tilde_coupler,
535 paso::const_Coupler_ptr u_old_coupler)
536 {
537 dim_t i;
538 index_t iptr_ij;
539
540 const double *u_tilde = u_tilde_coupler->borrowLocalData();
541 const double *u_old = u_old_coupler->borrowLocalData();
542 const double *remote_u_tilde = u_tilde_coupler->borrowRemoteData();
543 const double *remote_u_old = u_old_coupler->borrowRemoteData();
544 paso::const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
545 const dim_t n = fct->iteration_matrix->getTotalNumRows();
546
547 #pragma omp parallel for schedule(static) private(i, iptr_ij)
548 for (i = 0; i < n; ++i) {
549 const double u_tilde_i = u_tilde[i];
550 const double u_old_i = u_old[i];
551 const double du_i = u_tilde_i - u_old_i;
552 #pragma ivdep
553 for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
554
555 const index_t j = pattern->mainPattern->index[iptr_ij];
556 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
557 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
558 const double u_tilde_j = u_tilde[j];
559 const double u_old_j = u_old[j];
560 const double du_j = u_tilde_j - u_old_j;
561
562 flux_matrix->mainBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij * ( u_tilde_i - u_tilde_j);
563 }
564 #pragma ivdep
565 for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
566
567 const index_t j = pattern->col_couplePattern->index[iptr_ij];
568 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
569 const double d_ij = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];/* this is in fact -d_ij */
570 const double u_tilde_j = remote_u_tilde[j];
571 const double u_old_j = remote_u_old[j];
572 const double du_j = u_tilde_j - u_old_j;
573
574 flux_matrix->col_coupleBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij * ( u_tilde_i - u_tilde_j);
575
576 }
577 }
578
579 }
580
581 /****************************************************************************/
582
583 /* Creates the low order transport matrix and stores its negative values
584 * into the iteration_matrix except for the main diagonal which is stored
585 * separately.
586 * If fc->iteration_matrix==NULL, fc->iteration_matrix is allocated
587 *
588 * a=transport_matrix
589 * b= low_order_transport_matrix = - iteration_matrix
590 * c=main diagonal low_order_transport_matrix
591 * initialise c[i] mit a[i,i]
592 *
593 * d_ij=max(0,-a[i,j],-a[j,i])
594 * b[i,j]=-(a[i,j]+d_ij)
595 * c[i]-=d_ij
596 */
597
598 void Paso_FCT_setLowOrderOperator(Paso_TransportProblem * fc)
599 {
600 dim_t i;
601 index_t iptr_ij, iptr_ji;
602 const index_t* main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fc);
603
604 if (!fc->iteration_matrix.get()) {
605 fc->iteration_matrix.reset(new paso::SystemMatrix(
606 fc->transport_matrix->type, fc->transport_matrix->pattern,
607 fc->transport_matrix->row_block_size,
608 fc->transport_matrix->col_block_size, true));
609 }
610
611 if (Esys_noError()) {
612 paso::const_SystemMatrixPattern_ptr pattern(fc->iteration_matrix->pattern);
613 const dim_t n = fc->iteration_matrix->getTotalNumRows();
614 #pragma omp parallel for private(i, iptr_ij, iptr_ji) schedule(static)
615 for (i = 0; i < n; ++i) {
616 double sum=fc->transport_matrix->mainBlock->val[main_iptr[i]];
617
618 /* printf("sum[%d] = %e -> ", i, sum); */
619 /* look at a[i,j] */
620 for (iptr_ij=pattern->mainPattern->ptr[i];iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
621 const index_t j = pattern->mainPattern->index[iptr_ij];
622 const double rtmp1 = fc->transport_matrix->mainBlock->val[iptr_ij];
623 if (j!=i) {
624 /* find entry a[j,i] */
625 #pragma ivdep
626 for (iptr_ji=pattern->mainPattern->ptr[j]; iptr_ji<pattern->mainPattern->ptr[j+1]; ++iptr_ji) {
627
628 if ( pattern->mainPattern->index[iptr_ji] == i) {
629 const double rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji];
630 /*
631 printf("a[%d,%d]=%e\n",i,j,rtmp1);
632 printf("a[%d,%d]=%e\n",j,i,rtmp2);
633 */
634
635 const double d_ij=-MIN3(0.,rtmp1,rtmp2);
636 fc->iteration_matrix->mainBlock->val[iptr_ij]=-(rtmp1+d_ij);
637 /* printf("l[%d,%d]=%e\n",i,j,fc->iteration_matrix->mainBlock->val[iptr_ij]); */
638 sum-=d_ij;
639 break;
640 }
641 }
642 }
643 }
644 for (iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
645 const index_t j = pattern->col_couplePattern->index[iptr_ij];
646 const double rtmp1 = fc->transport_matrix->col_coupleBlock->val[iptr_ij];
647 /* find entry a[j,i] */
648 #pragma ivdep
649 for (iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_ji<pattern->row_couplePattern->ptr[j+1]; ++iptr_ji) {
650 if (pattern->row_couplePattern->index[iptr_ji]==i) {
651 const double rtmp2=fc->transport_matrix->row_coupleBlock->val[iptr_ji];
652 const double d_ij=-MIN3(0.,rtmp1,rtmp2);
653 fc->iteration_matrix->col_coupleBlock->val[iptr_ij]=-(rtmp1+d_ij);
654 fc->iteration_matrix->row_coupleBlock->val[iptr_ji]=-(rtmp2+d_ij);
655 sum-=d_ij;
656 break;
657 }
658 }
659 }
660 /* set main diagonal entry */
661 fc->main_diagonal_low_order_transport_matrix[i]=sum;
662 /* printf("%e \n", sum); */
663 }
664
665 }
666 }
667
668 /*
669 * out_i=m_i u_i + a * \sum_{j <> i} l_{ij} (u_j-u_i) where m_i>0
670 * = u_i where m_i<=0
671 *
672 */
673 void Paso_FCT_Solver_setMuPaLu(double* out,
674 const double* M,
675 paso::const_Coupler_ptr u_coupler,
676 const double a,
677 paso::const_SystemMatrix_ptr L)
678 {
679 dim_t i;
680 paso::const_SystemMatrixPattern_ptr pattern(L->pattern);
681 const double *u = u_coupler->borrowLocalData();
682 const double *remote_u = u_coupler->borrowRemoteData();
683 index_t iptr_ij;
684 const dim_t n = L->getTotalNumRows();
685
686 #pragma omp parallel for private(i) schedule(static)
687 for (i = 0; i < n; ++i) {
688 if ( M[i] > 0.) {
689 out[i]=M[i]*u[i];
690 } else {
691 out[i]=u[i];
692 }
693 }
694 if (ABS(a)>0) {
695 #pragma omp parallel for schedule(static) private(i, iptr_ij)
696 for (i = 0; i < n; ++i) {
697 if ( M[i] > 0.) {
698 double sum=0;
699 const double u_i=u[i];
700 #pragma ivdep
701 for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
702 const index_t j=pattern->mainPattern->index[iptr_ij];
703 const double l_ij=L->mainBlock->val[iptr_ij];
704 sum+=l_ij*(u[j]-u_i);
705 }
706 #pragma ivdep
707 for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
708 const index_t j=pattern->col_couplePattern->index[iptr_ij];
709 const double l_ij=L->col_coupleBlock->val[iptr_ij];
710 sum+=l_ij*(remote_u[j]-u_i);
711 }
712 out[i]+=a*sum;
713 }
714 }
715 }
716 }
717

Properties

Name Value
svn:mergeinfo /branches/amg_from_3530/paso/src/FCT_Solver.cpp:3531-3826 /branches/lapack2681/paso/src/FCT_Solver.cpp:2682-2741 /branches/pasowrap/paso/src/FCT_Solver.cpp:3661-3674 /branches/py3_attempt2/paso/src/FCT_Solver.cpp:3871-3891 /branches/restext/paso/src/FCT_Solver.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/FCT_Solver.cpp:3669-3791 /branches/stage3.0/paso/src/FCT_Solver.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/FCT_Solver.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/FCT_Solver.cpp:3517-3974 /release/3.0/paso/src/FCT_Solver.cpp:2591-2601 /trunk/paso/src/FCT_Solver.cpp:4257-4344 /trunk/ripley/test/python/paso/src/FCT_Solver.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26