/[escript]/trunk/paso/src/FCT_Solver.cpp
ViewVC logotype

Contents of /trunk/paso/src/FCT_Solver.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4867 - (show annotations)
Fri Apr 11 12:29:49 2014 UTC (6 years, 1 month ago) by caltinay
File size: 27952 byte(s)
paso: starting to polish

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17
18 /****************************************************************************/
19
20 /* Paso: Transport solver with flux correction (L is row sum zero)
21 *
22 * - Mv_t=Lv v(0)=u
23 *
24 * to return v(dt)
25 *
26 */
27 /****************************************************************************/
28
29 /* Author: l.gross@uq.edu.au */
30
31 /****************************************************************************/
32
33 #include "FCT_Solver.h"
34 #include "Preconditioner.h"
35 #include "PasoUtil.h"
36
37 namespace paso {
38
39 FCT_Solver::FCT_Solver(const_TransportProblem_ptr tp, Options* options) :
40 transportproblem(tp),
41 omega(0),
42 z(NULL),
43 du(NULL)
44 {
45 const dim_t blockSize = tp->getBlockSize();
46 const dim_t n = tp->getTotalNumRows();
47 mpi_info = Esys_MPIInfo_getReference(tp->mpi_info);
48 flux_limiter = new FCT_FluxLimiter(tp);
49 b = new double[n];
50 if (options->ode_solver == PASO_CRANK_NICOLSON || options->ode_solver == PASO_BACKWARD_EULER) {
51 du = new double[n];
52 z = new double[n];
53 }
54 u_coupler.reset(new Coupler(tp->borrowConnector(), blockSize));
55 u_old_coupler.reset(new Coupler(tp->borrowConnector(), blockSize));
56
57 if (options->ode_solver == PASO_LINEAR_CRANK_NICOLSON) {
58 method = PASO_LINEAR_CRANK_NICOLSON;
59 } else if (options->ode_solver == PASO_CRANK_NICOLSON) {
60 method = PASO_CRANK_NICOLSON;
61 } else if (options->ode_solver == PASO_BACKWARD_EULER) {
62 method = PASO_BACKWARD_EULER;
63 } else {
64 Esys_setError(VALUE_ERROR, "FCT_Solver: unknown integration scheme.");
65 method = UNKNOWN;
66 }
67 }
68
69 FCT_Solver::~FCT_Solver()
70 {
71 delete flux_limiter;
72 Esys_MPIInfo_free(mpi_info);
73 delete[] b;
74 delete[] z;
75 delete[] du;
76 }
77
78 // modifies the main diagonal of the iteration matrix to introduce new dt
79 void FCT_Solver::initialize(double _dt, Options* options, Paso_Performance* pp)
80 {
81 const_TransportProblem_ptr fctp(transportproblem);
82 const index_t* main_iptr = fctp->borrowMainDiagonalPointer();
83 const dim_t n = fctp->transport_matrix->getTotalNumRows();
84 const double theta = getTheta();
85 omega = 1. / (_dt * theta);
86 dim_t i;
87 Options options2;
88
89 Paso_solve_free(fctp->iteration_matrix.get());
90 // fctp->iteration_matrix[i,i]=m[i]/(dt theta) -l[i,i]
91 dt = _dt;
92 #pragma omp parallel for private(i)
93 for (i = 0; i < n; ++i) {
94 const double m_i = fctp->lumped_mass_matrix[i];
95 const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
96 if ( m_i > 0 ) {
97 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m_i * omega - l_ii;
98 } else {
99 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = ABS(m_i * omega - l_ii)/(EPSILON*EPSILON);
100 }
101 }
102
103 // allocate preconditioner/solver
104 options2.verbose = options->verbose;
105 if (method == PASO_LINEAR_CRANK_NICOLSON) {
106 options2.preconditioner = PASO_GS;
107 } else {
108 options2.preconditioner = PASO_JACOBI;
109 //options2.preconditioner = PASO_GS;
110 }
111 options2.use_local_preconditioner = false;
112 options2.sweeps = -1;
113
114 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
115 fctp->iteration_matrix->setPreconditioner(&options2);
116 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
117 }
118
119 // entry point for update procedures
120 err_t FCT_Solver::update(double* u, double* u_old, Options* options, Paso_Performance* pp)
121 {
122 err_t err_out = SOLVER_NO_ERROR;
123
124 if (method == PASO_LINEAR_CRANK_NICOLSON) {
125 err_out = updateLCN(u, u_old, options, pp);
126 } else if (method == PASO_CRANK_NICOLSON) {
127 err_out = updateNL(u, u_old, options, pp);
128 } else if (method == PASO_BACKWARD_EULER) {
129 err_out = updateNL(u, u_old, options, pp);
130 } else {
131 err_out = SOLVER_INPUT_ERROR;
132 }
133 return err_out;
134 }
135
136 /// linear crank-nicolson update
137 err_t FCT_Solver::updateLCN(double* u, double* u_old, Options* options, Paso_Performance *pp)
138 {
139 dim_t sweep_max, i;
140 double const RTOL = options->tolerance;
141 const dim_t n = transportproblem->getTotalNumRows();
142 SystemMatrix_ptr iteration_matrix(transportproblem->iteration_matrix);
143 const index_t* main_iptr = transportproblem->borrowMainDiagonalPointer();
144 err_t errorCode = SOLVER_NO_ERROR;
145 double norm_u_tilde;
146
147 u_old_coupler->startCollect(u_old);
148 u_old_coupler->finishCollect();
149
150 // b[i]=m*u_tilde[i] = m u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i])
151 // = u_tilde[i] = u_old[i] where constraint m<0.
152 // note that iteration_matrix stores the negative values of the
153 // low order transport matrix l. Therefore a=-dt*0.5 is used.
154
155 setMuPaLu(b, u_old_coupler, -dt*0.5);
156 /* solve for u_tilde : u_tilda = m^{-1} * b */
157 flux_limiter->setU_tilde(b);
158 // u_tilde_connector is completed
159
160 // calculate anti-diffusive fluxes for u_tilde
161 setAntiDiffusionFlux_linearCN(flux_limiter->antidiffusive_fluxes);
162
163 /* b_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
164 flux_limiter->addLimitedFluxes_Start();
165 flux_limiter->addLimitedFluxes_Complete(b);
166
167 util::scale(n, b, omega);
168 // solve (m-dt/2*L) u = b in the form (omega*m-L) u = b * omega with omega*dt/2=1
169 #pragma omp for private(i) schedule(static)
170 for (i = 0; i < n; ++i) {
171 if (!(transportproblem->lumped_mass_matrix[i] > 0)) {
172 b[i] = flux_limiter->u_tilde[i]
173 * transportproblem->iteration_matrix->mainBlock->val[main_iptr[i]];
174 }
175 }
176 // initial guess is u<- -u + 2*u_tilde
177 util::update(n, -1., u, 2., flux_limiter->u_tilde);
178
179 sweep_max = MAX((int) (- 2 * log(RTOL)/log(2.)-0.5),1);
180 norm_u_tilde = util::lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
181 if (options->verbose) {
182 printf("FCT_Solver::updateLCN: u_tilde lsup = %e (rtol = %e, max. sweeps = %d)\n", norm_u_tilde, RTOL*norm_u_tilde, sweep_max);
183 }
184 errorCode = Preconditioner_Smoother_solve_byTolerance(iteration_matrix,
185 ((Preconditioner*)(iteration_matrix->solver_p))->gs, u, b, RTOL,
186 &sweep_max, true);
187 if (errorCode == PRECONDITIONER_NO_ERROR) {
188 if (options->verbose) printf("FCT_Solver::updateLCN: convergence after %d Gauss-Seidel steps.\n",sweep_max);
189 errorCode = SOLVER_NO_ERROR;
190 } else {
191 if (options->verbose) printf("FCT_Solver::updateLCN: Gauss-Seidel failed within %d stesp (rel. tolerance %e).\n",sweep_max,RTOL);
192 errorCode = SOLVER_MAXITER_REACHED;
193 }
194 return errorCode;
195 }
196
197 err_t FCT_Solver::updateNL(double* u, double *u_old, Options* options,
198 Paso_Performance *pp)
199 {
200 // number of rates >=critical_rate accepted before divergence is triggered
201 const dim_t num_critical_rates_max = 3;
202 // expected value of convergence rate
203 const double critical_rate = 0.95;
204
205 const_TransportProblem_ptr fctp(transportproblem);
206 dim_t i;
207 double norm_u_tilde, norm_du=LARGE_POSITIVE_FLOAT, norm_du_old, rate=1.;
208 const dim_t n = fctp->transport_matrix->getTotalNumRows();
209 const double atol = options->absolute_tolerance;
210 const double rtol = options->tolerance;
211 const dim_t max_m = options->iter_max;
212 dim_t m = 0, num_critical_rates = 0;
213 err_t errorCode = SOLVER_NO_ERROR;
214 bool converged=false, max_m_reached=false, diverged=false;
215 /* //////////////////////////////////////////////////////////////////// */
216
217 options->num_iter=0;
218 u_old_coupler->startCollect(u_old);
219 u_old_coupler->finishCollect();
220 // prepare u_tilde and flux limiter
221 if (method == PASO_BACKWARD_EULER) {
222 // b[i]=m_i* u_old[i]
223 #pragma omp for private(i) schedule(static)
224 for (i = 0; i < n; ++i) {
225 if (fctp->lumped_mass_matrix[i] > 0 ) {
226 b[i]=u_old[i]* fctp->lumped_mass_matrix[i];
227 } else {
228 b[i]=u_old[i];
229 }
230 }
231 } else {
232 /* b[i]=m_i* u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i]) = m_i * u_tilde_i where m_i>0
233 * = u_old[i] otherwise
234 * note that iteration_matrix stores the negative values of the
235 * low order transport matrix l. Therefore a=-dt*0.5 is used. */
236 setMuPaLu(b, u_old_coupler, -dt*0.5);
237 }
238 flux_limiter->setU_tilde(b); // u_tilde = m^{-1} b */
239 // u_tilde_connector is completed
240 /************************************************************************/
241 // calculate stopping criterion
242 norm_u_tilde=util::lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
243 const double ATOL = rtol * norm_u_tilde + atol;
244 if (options->verbose)
245 printf("FCT_Solver::updateNL: iteration starts u_tilde lsup = %e "
246 "(abs. tol = %e)\n", norm_u_tilde, ATOL);
247
248 // u_old is an initial guess for u
249 util::copy(n, u, u_old);
250
251 while (!converged && !diverged && !max_m_reached && Esys_noError()) {
252 u_coupler->startCollect(u);
253 u_coupler->finishCollect();
254
255 // set antidiffusive_flux_{ij} for u
256 if (method == PASO_BACKWARD_EULER) {
257 setAntiDiffusionFlux_BE(flux_limiter->antidiffusive_fluxes);
258 } else {
259 setAntiDiffusionFlux_CN(flux_limiter->antidiffusive_fluxes);
260 }
261 // start the calculation of the limitation factors_{fct_solver->ij}
262 flux_limiter->addLimitedFluxes_Start(); // uses u_tilde
263
264 /*
265 * z_m[i]=b[i] - (m_i*u[i] - omega*sum_{j<>i} l_{ij} (u[j]-u[i]) ) where m_i>0
266 * ==b[i] - u[i] = u_tilda[i]-u[i] =0 otherwise
267 *
268 * omega = dt/2 or dt .
269 *
270 * note that iteration_matrix stores the negative values of the
271 * low order transport matrix l. Therefore a=dt*theta is used.
272 */
273 if (method == PASO_BACKWARD_EULER) {
274 setMuPaLu(z, u_coupler, dt);
275 } else {
276 setMuPaLu(z, u_coupler, dt/2);
277 }
278
279 util::update(n, -1., z, 1., b); // z=b-z
280
281 // z_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij}
282 flux_limiter->addLimitedFluxes_Complete(z);
283
284 // we solve (m/omega - L ) * du = z
285 if (method == PASO_BACKWARD_EULER) {
286 dim_t cntIter = options->iter_max;
287 double tol= util::l2(n, z, fctp->mpi_info) ;
288
289 if (m==0) {
290 tol *= 0.5;
291 } else {
292 tol *= MIN(MAX(rate*rate, 1e-2), 0.5);
293 }
294 // use BiCGStab with Jacobi preconditioner ( m - omega * L )
295 util::zeroes(n,du);
296 errorCode = Solver_BiCGStab(fctp->iteration_matrix, z, du, &cntIter, &tol, pp);
297
298 // errorCode = Solver_GMRES(fctp->iteration_matrix, z, du, &cntIter, &tol, 10, 2000, pp);
299 if (options->verbose)
300 printf("FCT_Solver::updateNL: BiCGStab completed after %d steps (residual =%e).\n",cntIter, tol);
301 options->num_iter += cntIter;
302 if (errorCode != SOLVER_NO_ERROR) break;
303 } else {
304 // just use the main diagonal of (m/omega - L )
305
306 Preconditioner_Smoother_solve(fctp->iteration_matrix,
307 ((Preconditioner*) (fctp->iteration_matrix->solver_p))->jacobi,
308 du, z, 1, false);
309
310 options->num_iter++;
311 }
312
313 util::update(n, 1., u, omega, du);
314 norm_du_old = norm_du;
315 norm_du = util::lsup(n, du, fctp->mpi_info);
316 if (m == 0) {
317 if (options->verbose)
318 printf("FCT_Solver::updateNL: step %d: increment= %e\n", m+1,
319 norm_du * omega);
320 } else {
321 if (norm_du_old > 0.) {
322 rate = norm_du/norm_du_old;
323 } else if (norm_du <= 0.) {
324 rate = 0.;
325 } else {
326 rate = LARGE_POSITIVE_FLOAT;
327 }
328 if (options->verbose)
329 printf("FCT_Solver::updateNL: step %d: increment= %e (rate = %e)\n",m+1, norm_du * omega, rate);
330 num_critical_rates += (rate<critical_rate ? 0 : 1);
331 max_m_reached = (m>max_m);
332 diverged = (num_critical_rates >= num_critical_rates_max);
333 converged = (norm_du * omega <= ATOL);
334 }
335 m++;
336 } // end of while loop
337 if (errorCode == SOLVER_NO_ERROR) {
338 if (converged) {
339 if (options->verbose)
340 printf("FCT_Solver::updateNL: iteration is completed.\n");
341 errorCode = SOLVER_NO_ERROR;
342 } else if (diverged) {
343 if (options->verbose)
344 printf("FCT_Solver::updateNL: divergence.\n");
345 errorCode = SOLVER_DIVERGENCE;
346 } else if (max_m_reached) {
347 if (options->verbose)
348 printf("FCT_Solver::updateNL: maximum number of iteration steps reached.\n");
349 errorCode = SOLVER_MAXITER_REACHED;
350 }
351 }
352 return errorCode;
353 }
354
355
356 /*
357 * AntiDiffusionFlux:
358 *
359 * f_{ij} = (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])
360 *
361 * m=fc->mass matrix
362 * d=artificial diffusion matrix = L - K = - fc->iteration matrix - fc->transport matrix (away from main diagonal)
363 *
364 * for CN : theta = 0.5
365 * for BE : theta = 1.
366 */
367
368 void FCT_Solver::setAntiDiffusionFlux_CN(SystemMatrix_ptr flux_matrix)
369 {
370 const double* u = u_coupler->borrowLocalData();
371 const double* u_old = u_old_coupler->borrowLocalData();
372 const double* remote_u = u_coupler->borrowRemoteData();
373 const double* remote_u_old = u_old_coupler->borrowRemoteData();
374 const double dt_half = dt/2;
375 const_TransportProblem_ptr fct(transportproblem);
376 const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
377 const dim_t n = fct->iteration_matrix->getTotalNumRows();
378
379 #pragma omp parallel for
380 for (dim_t i = 0; i < n; ++i) {
381 const double u_i = u[i];
382 const double u_old_i = u_old[i];
383
384 #pragma ivdep
385 for (index_t iptr_ij = pattern->mainPattern->ptr[i];
386 iptr_ij < pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
387 const index_t j = pattern->mainPattern->index[iptr_ij];
388 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
389 // this is in fact -d_ij
390 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+
391 fct->iteration_matrix->mainBlock->val[iptr_ij];
392 const double u_old_j = u_old[j];
393 const double u_j = u[j];
394
395 // (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])
396 flux_matrix->mainBlock->val[iptr_ij] =
397 (m_ij+dt_half*d_ij)*(u_old_j-u_old_i) -
398 (m_ij-dt_half*d_ij)*(u_j-u_i);
399
400 }
401 #pragma ivdep
402 for (index_t iptr_ij = pattern->col_couplePattern->ptr[i];
403 iptr_ij < pattern->col_couplePattern->ptr[i+1]; iptr_ij++) {
404 const index_t j = pattern->col_couplePattern->index[iptr_ij];
405 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
406 // this is in fact -d_ij
407 const double d_ij =
408 fct->transport_matrix->col_coupleBlock->val[iptr_ij] +
409 fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
410 const double u_old_j = remote_u_old[j];
411 const double u_j = remote_u[j];
412 flux_matrix->col_coupleBlock->val[iptr_ij] =
413 (m_ij+dt_half*d_ij)*(u_old_j-u_old_i) -
414 (m_ij-dt_half*d_ij)*(u_j-u_i);
415 }
416 }
417 }
418
419 void FCT_Solver::setAntiDiffusionFlux_BE(SystemMatrix_ptr flux_matrix)
420 {
421 const double* u = u_coupler->borrowLocalData();
422 const double* u_old = u_old_coupler->borrowLocalData();
423 const double* remote_u = u_coupler->borrowRemoteData();
424 const double* remote_u_old = u_old_coupler->borrowRemoteData();
425 const_TransportProblem_ptr fct(transportproblem);
426 const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
427 const dim_t n = fct->iteration_matrix->getTotalNumRows();
428
429 #pragma omp parallel for
430 for (dim_t i = 0; i < n; ++i) {
431 const double u_i = u[i];
432 const double u_old_i = u_old[i];
433 #pragma ivdep
434 for (index_t iptr_ij = pattern->mainPattern->ptr[i];
435 iptr_ij < pattern->mainPattern->ptr[i+1]; iptr_ij++) {
436 const index_t j = pattern->mainPattern->index[iptr_ij];
437 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
438 // this is in fact -d_ij
439 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+
440 fct->iteration_matrix->mainBlock->val[iptr_ij];
441 const double u_old_j = u_old[j];
442 const double u_j = u[j];
443
444 flux_matrix->mainBlock->val[iptr_ij] =
445 m_ij*(u_old_j-u_old_i) - (m_ij-dt*d_ij)*(u_j-u_i);
446 }
447 #pragma ivdep
448 for (index_t iptr_ij = pattern->col_couplePattern->ptr[i];
449 iptr_ij < pattern->col_couplePattern->ptr[i+1]; iptr_ij++) {
450 const index_t j = pattern->col_couplePattern->index[iptr_ij];
451 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
452 // this is in fact -d_ij
453 const double d_ij =
454 fct->transport_matrix->col_coupleBlock->val[iptr_ij] +
455 fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
456 const double u_old_j = remote_u_old[j];
457 const double u_j = remote_u[j];
458
459 flux_matrix->col_coupleBlock->val[iptr_ij] =
460 m_ij*(u_old_j-u_old_i) - (m_ij-dt*d_ij)*(u_j-u_i);
461 }
462 }
463 }
464
465 /* special version of the ant-diffusive fluxes for the linear Crank-Nicolson
466 * scheme. In fact this is evaluated for u = 2*u_tilde - u_old which is the
467 * predictor of the solution of the the stabilised problem at time dt using
468 * the forward Euler scheme
469 *
470 * f_{ij} = (m_{ij} - dt/2 d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) (u[j]-u[i])
471 * = (m_{ij} - dt/2 d_{ij}) * (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) * ( 2*(u_tilde[j]-u_tilde[i]) - (u_old[j] -u_old [i]) )
472 * = 2* m_{ij} * ( (u_old[j]-u_tilde[j] - (u_old[i]) - u_tilde[i]) ) - dt d_{ij} * (u_tilde[j]-u_tilde[i])
473 *
474 */
475 void FCT_Solver::setAntiDiffusionFlux_linearCN(SystemMatrix_ptr flux_matrix)
476 {
477 const_Coupler_ptr u_tilde_coupler(flux_limiter->u_tilde_coupler);
478 const double* u_tilde = u_tilde_coupler->borrowLocalData();
479 const double* u_old = u_old_coupler->borrowLocalData();
480 const double* remote_u_tilde = u_tilde_coupler->borrowRemoteData();
481 const double* remote_u_old = u_old_coupler->borrowRemoteData();
482 const_TransportProblem_ptr fct(transportproblem);
483 const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
484 const dim_t n = fct->iteration_matrix->getTotalNumRows();
485
486 #pragma omp parallel for
487 for (dim_t i = 0; i < n; ++i) {
488 const double u_tilde_i = u_tilde[i];
489 const double u_old_i = u_old[i];
490 const double du_i = u_tilde_i - u_old_i;
491 #pragma ivdep
492 for (index_t iptr_ij = pattern->mainPattern->ptr[i];
493 iptr_ij < pattern->mainPattern->ptr[i+1]; iptr_ij++) {
494 const index_t j = pattern->mainPattern->index[iptr_ij];
495 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
496 // this is in fact -d_ij
497 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+
498 fct->iteration_matrix->mainBlock->val[iptr_ij];
499 const double u_tilde_j = u_tilde[j];
500 const double u_old_j = u_old[j];
501 const double du_j = u_tilde_j - u_old_j;
502
503 flux_matrix->mainBlock->val[iptr_ij] = 2 * m_ij * ( du_i - du_j ) -
504 dt * d_ij * ( u_tilde_i - u_tilde_j);
505 }
506 #pragma ivdep
507 for (index_t iptr_ij=pattern->col_couplePattern->ptr[i];
508 iptr_ij<pattern->col_couplePattern->ptr[i+1]; iptr_ij++) {
509
510 const index_t j = pattern->col_couplePattern->index[iptr_ij];
511 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
512 // this is in fact -d_ij
513 const double d_ij =
514 fct->transport_matrix->col_coupleBlock->val[iptr_ij] +
515 fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
516 const double u_tilde_j = remote_u_tilde[j];
517 const double u_old_j = remote_u_old[j];
518 const double du_j = u_tilde_j - u_old_j;
519
520 flux_matrix->col_coupleBlock->val[iptr_ij] =
521 2*m_ij * ( du_i - du_j ) - dt * d_ij * (u_tilde_i - u_tilde_j);
522 }
523 }
524 }
525
526 /****************************************************************************/
527
528 double FCT_Solver::getSafeTimeStepSize(TransportProblem_ptr fctp)
529 {
530 double dt_max = LARGE_POSITIVE_FLOAT;
531 const dim_t n = fctp->transport_matrix->getTotalNumRows();
532
533 // set low order transport operator
534 setLowOrderOperator(fctp);
535
536 if (Esys_noError()) {
537 // calculate time step size
538 dt_max = LARGE_POSITIVE_FLOAT;
539 #pragma omp parallel
540 {
541 double dt_max_loc = LARGE_POSITIVE_FLOAT;
542 #pragma omp for schedule(static)
543 for (dim_t i=0; i<n; ++i) {
544 const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
545 const double m_i = fctp->lumped_mass_matrix[i];
546 if (m_i > 0) {
547 if (l_ii<0)
548 dt_max_loc = MIN(dt_max_loc,m_i/(-l_ii));
549 }
550 }
551 #pragma omp critical
552 {
553 dt_max = MIN(dt_max,dt_max_loc);
554 }
555 }
556 #ifdef ESYS_MPI
557 double dt_max_loc = dt_max;
558 MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
559 #endif
560 if (dt_max < LARGE_POSITIVE_FLOAT)
561 dt_max *= 2.;
562 }
563 return dt_max;
564 }
565
566 /* Creates the low order transport matrix and stores its negative values
567 * into the iteration_matrix except for the main diagonal which is stored
568 * separately.
569 * If fc->iteration_matrix==NULL, fc->iteration_matrix is allocated
570 *
571 * a=transport_matrix
572 * b= low_order_transport_matrix = - iteration_matrix
573 * c=main diagonal low_order_transport_matrix
574 * initialise c[i] mit a[i,i]
575 *
576 * d_ij=max(0,-a[i,j],-a[j,i])
577 * b[i,j]=-(a[i,j]+d_ij)
578 * c[i]-=d_ij
579 */
580
581 void FCT_Solver::setLowOrderOperator(TransportProblem_ptr fc)
582 {
583 const index_t* main_iptr = fc->borrowMainDiagonalPointer();
584
585 if (!fc->iteration_matrix.get()) {
586 fc->iteration_matrix.reset(new SystemMatrix(
587 fc->transport_matrix->type, fc->transport_matrix->pattern,
588 fc->transport_matrix->row_block_size,
589 fc->transport_matrix->col_block_size, true));
590 }
591
592 const_SystemMatrixPattern_ptr pattern(fc->iteration_matrix->pattern);
593 const dim_t n = fc->iteration_matrix->getTotalNumRows();
594 #pragma omp parallel for
595 for (dim_t i = 0; i < n; ++i) {
596 double sum = fc->transport_matrix->mainBlock->val[main_iptr[i]];
597 //printf("sum[%d] = %e -> ", i, sum);
598
599 // look at a[i,j]
600 for (index_t iptr_ij=pattern->mainPattern->ptr[i];iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
601 const index_t j = pattern->mainPattern->index[iptr_ij];
602 const double rtmp1 = fc->transport_matrix->mainBlock->val[iptr_ij];
603 if (j != i) {
604 // find entry a[j,i]
605 #pragma ivdep
606 for (index_t iptr_ji=pattern->mainPattern->ptr[j]; iptr_ji<pattern->mainPattern->ptr[j+1]; ++iptr_ji) {
607
608 if (pattern->mainPattern->index[iptr_ji] == i) {
609 const double rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji];
610 //printf("a[%d,%d]=%e\n",i,j,rtmp1);
611 //printf("a[%d,%d]=%e\n",j,i,rtmp2);
612 const double d_ij=-MIN3(0.,rtmp1,rtmp2);
613 fc->iteration_matrix->mainBlock->val[iptr_ij]=-(rtmp1+d_ij);
614 //printf("l[%d,%d]=%e\n",i,j,fc->iteration_matrix->mainBlock->val[iptr_ij]);
615 sum-=d_ij;
616 break;
617 }
618 }
619 }
620 }
621 for (index_t iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
622 const index_t j = pattern->col_couplePattern->index[iptr_ij];
623 const double rtmp1 = fc->transport_matrix->col_coupleBlock->val[iptr_ij];
624 // find entry a[j,i]
625 #pragma ivdep
626 for (index_t iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_ji<pattern->row_couplePattern->ptr[j+1]; ++iptr_ji) {
627 if (pattern->row_couplePattern->index[iptr_ji]==i) {
628 const double rtmp2=fc->transport_matrix->row_coupleBlock->val[iptr_ji];
629 const double d_ij=-MIN3(0.,rtmp1,rtmp2);
630 fc->iteration_matrix->col_coupleBlock->val[iptr_ij]=-(rtmp1+d_ij);
631 fc->iteration_matrix->row_coupleBlock->val[iptr_ji]=-(rtmp2+d_ij);
632 sum-=d_ij;
633 break;
634 }
635 }
636 }
637 // set main diagonal entry
638 fc->main_diagonal_low_order_transport_matrix[i] = sum;
639 //printf("%e\n", sum);
640 }
641 }
642
643 /*
644 * out_i=m_i u_i + a * \sum_{j <> i} l_{ij} (u_j-u_i) where m_i>0
645 * = u_i where m_i<=0
646 *
647 */
648 void FCT_Solver::setMuPaLu(double* out, const_Coupler_ptr coupler, double a)
649 {
650 const_SystemMatrix_ptr L(transportproblem->iteration_matrix);
651 const double* M = transportproblem->lumped_mass_matrix;
652 const_SystemMatrixPattern_ptr pattern(L->pattern);
653 const double* u = coupler->borrowLocalData();
654 const double* remote_u = coupler->borrowRemoteData();
655 const dim_t n = L->getTotalNumRows();
656
657 #pragma omp parallel for
658 for (dim_t i = 0; i < n; ++i) {
659 if (M[i] > 0.) {
660 out[i] = M[i]*u[i];
661 } else {
662 out[i] = u[i];
663 }
664 }
665 if (ABS(a) > 0) {
666 #pragma omp parallel for
667 for (dim_t i = 0; i < n; ++i) {
668 if (M[i] > 0.) {
669 double sum = 0;
670 const double u_i = u[i];
671 #pragma ivdep
672 for (index_t iptr_ij = pattern->mainPattern->ptr[i];
673 iptr_ij < pattern->mainPattern->ptr[i+1];
674 iptr_ij++) {
675 const index_t j = pattern->mainPattern->index[iptr_ij];
676 const double l_ij = L->mainBlock->val[iptr_ij];
677 sum += l_ij*(u[j]-u_i);
678 }
679 #pragma ivdep
680 for (index_t iptr_ij = pattern->col_couplePattern->ptr[i];
681 iptr_ij < pattern->col_couplePattern->ptr[i+1];
682 iptr_ij++) {
683 const index_t j=pattern->col_couplePattern->index[iptr_ij];
684 const double l_ij = L->col_coupleBlock->val[iptr_ij];
685 sum += l_ij*(remote_u[j]-u_i);
686 }
687 out[i] += a*sum;
688 }
689 }
690 }
691 }
692
693 } // namespace paso
694

Properties

Name Value
svn:mergeinfo /branches/amg_from_3530/paso/src/FCT_Solver.cpp:3531-3826 /branches/lapack2681/paso/src/FCT_Solver.cpp:2682-2741 /branches/pasowrap/paso/src/FCT_Solver.cpp:3661-3674 /branches/py3_attempt2/paso/src/FCT_Solver.cpp:3871-3891 /branches/restext/paso/src/FCT_Solver.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/FCT_Solver.cpp:3669-3791 /branches/stage3.0/paso/src/FCT_Solver.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/FCT_Solver.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/FCT_Solver.cpp:3517-3974 /release/3.0/paso/src/FCT_Solver.cpp:2591-2601 /trunk/paso/src/FCT_Solver.cpp:4257-4344 /trunk/ripley/test/python/paso/src/FCT_Solver.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26