/[escript]/branches/trilinos_from_5897/paso/src/FCT_Solver.cpp
ViewVC logotype

Contents of /branches/trilinos_from_5897/paso/src/FCT_Solver.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5933 - (show annotations)
Wed Feb 17 23:53:30 2016 UTC (22 months ago) by caltinay
File size: 28505 byte(s)
sync with trunk.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2016 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17
18 /****************************************************************************/
19
20 /* Paso: Transport solver with flux correction (L is row sum zero)
21 *
22 * - Mv_t=Lv v(0)=u
23 *
24 * to return v(dt)
25 *
26 */
27 /****************************************************************************/
28
29 /* Author: l.gross@uq.edu.au */
30
31 /****************************************************************************/
32
33 #include <iostream>
34 #include "FCT_Solver.h"
35 #include "Preconditioner.h"
36 #include "PasoUtil.h"
37
38 namespace paso {
39
40 FCT_Solver::FCT_Solver(const_TransportProblem_ptr tp, Options* options) :
41 transportproblem(tp),
42 omega(0),
43 z(NULL),
44 du(NULL)
45 {
46 const dim_t blockSize = tp->getBlockSize();
47 const dim_t n = tp->getTotalNumRows();
48 mpi_info = tp->mpi_info;
49 flux_limiter = new FCT_FluxLimiter(tp);
50 b = new double[n];
51 if (options->ode_solver == PASO_CRANK_NICOLSON || options->ode_solver == PASO_BACKWARD_EULER) {
52 du = new double[n];
53 z = new double[n];
54 }
55 u_coupler.reset(new Coupler(tp->borrowConnector(), blockSize));
56 u_old_coupler.reset(new Coupler(tp->borrowConnector(), blockSize));
57
58 if (options->ode_solver == PASO_LINEAR_CRANK_NICOLSON) {
59 method = PASO_LINEAR_CRANK_NICOLSON;
60 } else if (options->ode_solver == PASO_CRANK_NICOLSON) {
61 method = PASO_CRANK_NICOLSON;
62 } else if (options->ode_solver == PASO_BACKWARD_EULER) {
63 method = PASO_BACKWARD_EULER;
64 } else {
65 Esys_setError(VALUE_ERROR, "FCT_Solver: unknown integration scheme.");
66 method = UNKNOWN;
67 }
68 }
69
70 FCT_Solver::~FCT_Solver()
71 {
72 delete flux_limiter;
73 delete[] b;
74 delete[] z;
75 delete[] du;
76 }
77
78 // modifies the main diagonal of the iteration matrix to introduce new dt
79 void FCT_Solver::initialize(double _dt, Options* options, Performance* pp)
80 {
81 const_TransportProblem_ptr fctp(transportproblem);
82 const index_t* main_iptr = fctp->borrowMainDiagonalPointer();
83 const dim_t n = fctp->transport_matrix->getTotalNumRows();
84 const double theta = getTheta();
85 omega = 1. / (_dt * theta);
86 dim_t i;
87 Options options2;
88
89 solve_free(fctp->iteration_matrix.get());
90 // fctp->iteration_matrix[i,i]=m[i]/(dt theta) -l[i,i]
91 dt = _dt;
92 #pragma omp parallel for private(i)
93 for (i = 0; i < n; ++i) {
94 const double m_i = fctp->lumped_mass_matrix[i];
95 const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
96 if ( m_i > 0 ) {
97 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m_i * omega - l_ii;
98 } else {
99 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = std::abs(m_i * omega - l_ii)/(EPSILON*EPSILON);
100 }
101 }
102
103 // allocate preconditioner/solver
104 options2.verbose = options->verbose;
105 if (method == PASO_LINEAR_CRANK_NICOLSON) {
106 options2.preconditioner = PASO_GS;
107 } else {
108 options2.preconditioner = PASO_JACOBI;
109 //options2.preconditioner = PASO_GS;
110 }
111 options2.use_local_preconditioner = false;
112 options2.sweeps = -1;
113
114 Performance_startMonitor(pp, PERFORMANCE_PRECONDITIONER_INIT);
115 fctp->iteration_matrix->setPreconditioner(&options2);
116 Performance_stopMonitor(pp, PERFORMANCE_PRECONDITIONER_INIT);
117 }
118
119 // entry point for update procedures
120 err_t FCT_Solver::update(double* u, double* u_old, Options* options, Performance* pp)
121 {
122 err_t err_out = SOLVER_NO_ERROR;
123
124 if (method == PASO_LINEAR_CRANK_NICOLSON) {
125 err_out = updateLCN(u, u_old, options, pp);
126 } else if (method == PASO_CRANK_NICOLSON) {
127 err_out = updateNL(u, u_old, options, pp);
128 } else if (method == PASO_BACKWARD_EULER) {
129 err_out = updateNL(u, u_old, options, pp);
130 } else {
131 err_out = SOLVER_INPUT_ERROR;
132 }
133 return err_out;
134 }
135
136 /// linear crank-nicolson update
137 err_t FCT_Solver::updateLCN(double* u, double* u_old, Options* options, Performance* pp)
138 {
139 dim_t sweep_max, i;
140 double const RTOL = options->tolerance;
141 const dim_t n = transportproblem->getTotalNumRows();
142 SystemMatrix_ptr iteration_matrix(transportproblem->iteration_matrix);
143 const index_t* main_iptr = transportproblem->borrowMainDiagonalPointer();
144 err_t errorCode = SOLVER_NO_ERROR;
145 double norm_u_tilde;
146
147 u_old_coupler->startCollect(u_old);
148 u_old_coupler->finishCollect();
149
150 // b[i]=m*u_tilde[i] = m u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i])
151 // = u_tilde[i] = u_old[i] where constraint m<0.
152 // note that iteration_matrix stores the negative values of the
153 // low order transport matrix l. Therefore a=-dt*0.5 is used.
154
155 setMuPaLu(b, u_old_coupler, -dt*0.5);
156 /* solve for u_tilde : u_tilda = m^{-1} * b */
157 flux_limiter->setU_tilde(b);
158 // u_tilde_connector is completed
159
160 // calculate anti-diffusive fluxes for u_tilde
161 setAntiDiffusionFlux_linearCN(flux_limiter->antidiffusive_fluxes);
162
163 /* b_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
164 flux_limiter->addLimitedFluxes_Start();
165 flux_limiter->addLimitedFluxes_Complete(b);
166
167 util::scale(n, b, omega);
168 // solve (m-dt/2*L) u = b in the form (omega*m-L) u = b * omega with omega*dt/2=1
169 #pragma omp for private(i) schedule(static)
170 for (i = 0; i < n; ++i) {
171 if (!(transportproblem->lumped_mass_matrix[i] > 0)) {
172 b[i] = flux_limiter->u_tilde[i]
173 * transportproblem->iteration_matrix->mainBlock->val[main_iptr[i]];
174 }
175 }
176 // initial guess is u<- -u + 2*u_tilde
177 util::update(n, -1., u, 2., flux_limiter->u_tilde);
178
179 sweep_max = std::max((int) (- 2 * log(RTOL)/log(2.)-0.5),1);
180 norm_u_tilde = util::lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
181 if (options->verbose) {
182 std::cout << "FCT_Solver::updateLCN: u_tilde lsup = " << norm_u_tilde
183 << " (rtol = " << RTOL*norm_u_tilde << ", max. sweeps = "
184 << sweep_max << ")" << std::endl;
185 }
186 errorCode = Preconditioner_Smoother_solve_byTolerance(iteration_matrix,
187 ((Preconditioner*)(iteration_matrix->solver_p))->gs, u, b, RTOL,
188 &sweep_max, true);
189 if (errorCode == PRECONDITIONER_NO_ERROR) {
190 if (options->verbose)
191 std::cout << "FCT_Solver::updateLCN: convergence after "
192 << sweep_max << " Gauss-Seidel steps." << std::endl;
193 errorCode = SOLVER_NO_ERROR;
194 } else {
195 if (options->verbose)
196 std::cout << "FCT_Solver::updateLCN: Gauss-Seidel failed within "
197 << sweep_max << " steps (rel. tolerance " << RTOL << ")."
198 << std::endl;
199 errorCode = SOLVER_MAXITER_REACHED;
200 }
201 return errorCode;
202 }
203
204 err_t FCT_Solver::updateNL(double* u, double* u_old, Options* options,
205 Performance* pp)
206 {
207 // number of rates >=critical_rate accepted before divergence is triggered
208 const dim_t num_critical_rates_max = 3;
209 // expected value of convergence rate
210 const double critical_rate = 0.95;
211
212 const_TransportProblem_ptr fctp(transportproblem);
213 dim_t i;
214 double norm_u_tilde, norm_du=LARGE_POSITIVE_FLOAT, norm_du_old, rate=1.;
215 const dim_t n = fctp->transport_matrix->getTotalNumRows();
216 const double atol = options->absolute_tolerance;
217 const double rtol = options->tolerance;
218 const dim_t max_m = options->iter_max;
219 dim_t m = 0, num_critical_rates = 0;
220 err_t errorCode = SOLVER_NO_ERROR;
221 bool converged=false, max_m_reached=false, diverged=false;
222 /* //////////////////////////////////////////////////////////////////// */
223
224 options->num_iter=0;
225 u_old_coupler->startCollect(u_old);
226 u_old_coupler->finishCollect();
227 // prepare u_tilde and flux limiter
228 if (method == PASO_BACKWARD_EULER) {
229 // b[i]=m_i* u_old[i]
230 #pragma omp for private(i) schedule(static)
231 for (i = 0; i < n; ++i) {
232 if (fctp->lumped_mass_matrix[i] > 0 ) {
233 b[i]=u_old[i]* fctp->lumped_mass_matrix[i];
234 } else {
235 b[i]=u_old[i];
236 }
237 }
238 } else {
239 /* b[i]=m_i* u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i]) = m_i * u_tilde_i where m_i>0
240 * = u_old[i] otherwise
241 * note that iteration_matrix stores the negative values of the
242 * low order transport matrix l. Therefore a=-dt*0.5 is used. */
243 setMuPaLu(b, u_old_coupler, -dt*0.5);
244 }
245 flux_limiter->setU_tilde(b); // u_tilde = m^{-1} b */
246 // u_tilde_connector is completed
247 /************************************************************************/
248 // calculate stopping criterion
249 norm_u_tilde=util::lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
250 const double ATOL = rtol * norm_u_tilde + atol;
251 if (options->verbose)
252 std::cout << "FCT_Solver::updateNL: iteration starts u_tilde lsup = "
253 << norm_u_tilde << " (abs. tol = " << ATOL << ")" << std::endl;
254
255 // u_old is an initial guess for u
256 util::copy(n, u, u_old);
257
258 while (!converged && !diverged && !max_m_reached && Esys_noError()) {
259 u_coupler->startCollect(u);
260 u_coupler->finishCollect();
261
262 // set antidiffusive_flux_{ij} for u
263 if (method == PASO_BACKWARD_EULER) {
264 setAntiDiffusionFlux_BE(flux_limiter->antidiffusive_fluxes);
265 } else {
266 setAntiDiffusionFlux_CN(flux_limiter->antidiffusive_fluxes);
267 }
268 // start the calculation of the limitation factors_{fct_solver->ij}
269 flux_limiter->addLimitedFluxes_Start(); // uses u_tilde
270
271 /*
272 * z_m[i]=b[i] - (m_i*u[i] - omega*sum_{j<>i} l_{ij} (u[j]-u[i]) ) where m_i>0
273 * ==b[i] - u[i] = u_tilda[i]-u[i] =0 otherwise
274 *
275 * omega = dt/2 or dt .
276 *
277 * note that iteration_matrix stores the negative values of the
278 * low order transport matrix l. Therefore a=dt*theta is used.
279 */
280 if (method == PASO_BACKWARD_EULER) {
281 setMuPaLu(z, u_coupler, dt);
282 } else {
283 setMuPaLu(z, u_coupler, dt/2);
284 }
285
286 util::update(n, -1., z, 1., b); // z=b-z
287
288 // z_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij}
289 flux_limiter->addLimitedFluxes_Complete(z);
290
291 // we solve (m/omega - L ) * du = z
292 if (method == PASO_BACKWARD_EULER) {
293 dim_t cntIter = options->iter_max;
294 double tol= util::l2(n, z, fctp->mpi_info) ;
295
296 if (m==0) {
297 tol *= 0.5;
298 } else {
299 tol *= std::min(std::max(rate*rate, 1e-2), 0.5);
300 }
301 // use BiCGStab with Jacobi preconditioner ( m - omega * L )
302 util::zeroes(n,du);
303 errorCode = Solver_BiCGStab(fctp->iteration_matrix, z, du, &cntIter, &tol, pp);
304
305 // errorCode = Solver_GMRES(fctp->iteration_matrix, z, du, &cntIter, &tol, 10, 2000, pp);
306 if (options->verbose)
307 std::cout << "FCT_Solver::updateNL: BiCGStab completed after "
308 << cntIter << " steps (residual = " << tol << ")." << std::endl;
309 options->num_iter += cntIter;
310 if (errorCode != SOLVER_NO_ERROR) break;
311 } else {
312 // just use the main diagonal of (m/omega - L )
313
314 Preconditioner_Smoother_solve(fctp->iteration_matrix,
315 ((Preconditioner*) (fctp->iteration_matrix->solver_p))->jacobi,
316 du, z, 1, false);
317
318 options->num_iter++;
319 }
320
321 util::update(n, 1., u, omega, du);
322 norm_du_old = norm_du;
323 norm_du = util::lsup(n, du, fctp->mpi_info);
324 if (m == 0) {
325 if (options->verbose)
326 std::cout << "FCT_Solver::updateNL: step " << m+1
327 << ": increment = " << norm_du * omega << std::endl;
328 } else {
329 if (norm_du_old > 0.) {
330 rate = norm_du/norm_du_old;
331 } else if (norm_du <= 0.) {
332 rate = 0.;
333 } else {
334 rate = LARGE_POSITIVE_FLOAT;
335 }
336 if (options->verbose)
337 std::cout << "FCT_Solver::updateNL: step " << m+1
338 << ": increment= " << norm_du * omega << " (rate = "
339 << rate << ")" << std::endl;
340 num_critical_rates += (rate<critical_rate ? 0 : 1);
341 max_m_reached = (m>max_m);
342 diverged = (num_critical_rates >= num_critical_rates_max);
343 converged = (norm_du * omega <= ATOL);
344 }
345 m++;
346 } // end of while loop
347 if (errorCode == SOLVER_NO_ERROR) {
348 if (converged) {
349 if (options->verbose)
350 std::cout << "FCT_Solver::updateNL: iteration is completed." << std::endl;
351 errorCode = SOLVER_NO_ERROR;
352 } else if (diverged) {
353 if (options->verbose)
354 std::cout << "FCT_Solver::updateNL: divergence." << std::endl;
355 errorCode = SOLVER_DIVERGENCE;
356 } else if (max_m_reached) {
357 if (options->verbose)
358 std::cout << "FCT_Solver::updateNL: maximum number of iteration steps reached." << std::endl;
359 errorCode = SOLVER_MAXITER_REACHED;
360 }
361 }
362 return errorCode;
363 }
364
365
366 /*
367 * AntiDiffusionFlux:
368 *
369 * f_{ij} = (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])
370 *
371 * m=fc->mass matrix
372 * d=artificial diffusion matrix = L - K = - fc->iteration matrix - fc->transport matrix (away from main diagonal)
373 *
374 * for CN : theta = 0.5
375 * for BE : theta = 1.
376 */
377
378 void FCT_Solver::setAntiDiffusionFlux_CN(SystemMatrix_ptr flux_matrix)
379 {
380 const double* u = u_coupler->borrowLocalData();
381 const double* u_old = u_old_coupler->borrowLocalData();
382 const double* remote_u = u_coupler->borrowRemoteData();
383 const double* remote_u_old = u_old_coupler->borrowRemoteData();
384 const double dt_half = dt/2;
385 const_TransportProblem_ptr fct(transportproblem);
386 const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
387 const dim_t n = fct->iteration_matrix->getTotalNumRows();
388
389 #pragma omp parallel for
390 for (dim_t i = 0; i < n; ++i) {
391 const double u_i = u[i];
392 const double u_old_i = u_old[i];
393
394 #pragma ivdep
395 for (index_t iptr_ij = pattern->mainPattern->ptr[i];
396 iptr_ij < pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
397 const index_t j = pattern->mainPattern->index[iptr_ij];
398 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
399 // this is in fact -d_ij
400 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+
401 fct->iteration_matrix->mainBlock->val[iptr_ij];
402 const double u_old_j = u_old[j];
403 const double u_j = u[j];
404
405 // (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])
406 flux_matrix->mainBlock->val[iptr_ij] =
407 (m_ij+dt_half*d_ij)*(u_old_j-u_old_i) -
408 (m_ij-dt_half*d_ij)*(u_j-u_i);
409
410 }
411 #pragma ivdep
412 for (index_t iptr_ij = pattern->col_couplePattern->ptr[i];
413 iptr_ij < pattern->col_couplePattern->ptr[i+1]; iptr_ij++) {
414 const index_t j = pattern->col_couplePattern->index[iptr_ij];
415 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
416 // this is in fact -d_ij
417 const double d_ij =
418 fct->transport_matrix->col_coupleBlock->val[iptr_ij] +
419 fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
420 const double u_old_j = remote_u_old[j];
421 const double u_j = remote_u[j];
422 flux_matrix->col_coupleBlock->val[iptr_ij] =
423 (m_ij+dt_half*d_ij)*(u_old_j-u_old_i) -
424 (m_ij-dt_half*d_ij)*(u_j-u_i);
425 }
426 }
427 }
428
429 void FCT_Solver::setAntiDiffusionFlux_BE(SystemMatrix_ptr flux_matrix)
430 {
431 const double* u = u_coupler->borrowLocalData();
432 const double* u_old = u_old_coupler->borrowLocalData();
433 const double* remote_u = u_coupler->borrowRemoteData();
434 const double* remote_u_old = u_old_coupler->borrowRemoteData();
435 const_TransportProblem_ptr fct(transportproblem);
436 const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
437 const dim_t n = fct->iteration_matrix->getTotalNumRows();
438
439 #pragma omp parallel for
440 for (dim_t i = 0; i < n; ++i) {
441 const double u_i = u[i];
442 const double u_old_i = u_old[i];
443 #pragma ivdep
444 for (index_t iptr_ij = pattern->mainPattern->ptr[i];
445 iptr_ij < pattern->mainPattern->ptr[i+1]; iptr_ij++) {
446 const index_t j = pattern->mainPattern->index[iptr_ij];
447 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
448 // this is in fact -d_ij
449 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+
450 fct->iteration_matrix->mainBlock->val[iptr_ij];
451 const double u_old_j = u_old[j];
452 const double u_j = u[j];
453
454 flux_matrix->mainBlock->val[iptr_ij] =
455 m_ij*(u_old_j-u_old_i) - (m_ij-dt*d_ij)*(u_j-u_i);
456 }
457 #pragma ivdep
458 for (index_t iptr_ij = pattern->col_couplePattern->ptr[i];
459 iptr_ij < pattern->col_couplePattern->ptr[i+1]; iptr_ij++) {
460 const index_t j = pattern->col_couplePattern->index[iptr_ij];
461 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
462 // this is in fact -d_ij
463 const double d_ij =
464 fct->transport_matrix->col_coupleBlock->val[iptr_ij] +
465 fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
466 const double u_old_j = remote_u_old[j];
467 const double u_j = remote_u[j];
468
469 flux_matrix->col_coupleBlock->val[iptr_ij] =
470 m_ij*(u_old_j-u_old_i) - (m_ij-dt*d_ij)*(u_j-u_i);
471 }
472 }
473 }
474
475 /* special version of the ant-diffusive fluxes for the linear Crank-Nicolson
476 * scheme. In fact this is evaluated for u = 2*u_tilde - u_old which is the
477 * predictor of the solution of the the stabilised problem at time dt using
478 * the forward Euler scheme
479 *
480 * f_{ij} = (m_{ij} - dt/2 d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) (u[j]-u[i])
481 * = (m_{ij} - dt/2 d_{ij}) * (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) * ( 2*(u_tilde[j]-u_tilde[i]) - (u_old[j] -u_old [i]) )
482 * = 2* m_{ij} * ( (u_old[j]-u_tilde[j] - (u_old[i]) - u_tilde[i]) ) - dt d_{ij} * (u_tilde[j]-u_tilde[i])
483 *
484 */
485 void FCT_Solver::setAntiDiffusionFlux_linearCN(SystemMatrix_ptr flux_matrix)
486 {
487 const_Coupler_ptr u_tilde_coupler(flux_limiter->u_tilde_coupler);
488 const double* u_tilde = u_tilde_coupler->borrowLocalData();
489 const double* u_old = u_old_coupler->borrowLocalData();
490 const double* remote_u_tilde = u_tilde_coupler->borrowRemoteData();
491 const double* remote_u_old = u_old_coupler->borrowRemoteData();
492 const_TransportProblem_ptr fct(transportproblem);
493 const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
494 const dim_t n = fct->iteration_matrix->getTotalNumRows();
495
496 #pragma omp parallel for
497 for (dim_t i = 0; i < n; ++i) {
498 const double u_tilde_i = u_tilde[i];
499 const double u_old_i = u_old[i];
500 const double du_i = u_tilde_i - u_old_i;
501 #pragma ivdep
502 for (index_t iptr_ij = pattern->mainPattern->ptr[i];
503 iptr_ij < pattern->mainPattern->ptr[i+1]; iptr_ij++) {
504 const index_t j = pattern->mainPattern->index[iptr_ij];
505 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
506 // this is in fact -d_ij
507 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+
508 fct->iteration_matrix->mainBlock->val[iptr_ij];
509 const double u_tilde_j = u_tilde[j];
510 const double u_old_j = u_old[j];
511 const double du_j = u_tilde_j - u_old_j;
512
513 flux_matrix->mainBlock->val[iptr_ij] = 2 * m_ij * ( du_i - du_j ) -
514 dt * d_ij * ( u_tilde_i - u_tilde_j);
515 }
516 #pragma ivdep
517 for (index_t iptr_ij=pattern->col_couplePattern->ptr[i];
518 iptr_ij<pattern->col_couplePattern->ptr[i+1]; iptr_ij++) {
519
520 const index_t j = pattern->col_couplePattern->index[iptr_ij];
521 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
522 // this is in fact -d_ij
523 const double d_ij =
524 fct->transport_matrix->col_coupleBlock->val[iptr_ij] +
525 fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
526 const double u_tilde_j = remote_u_tilde[j];
527 const double u_old_j = remote_u_old[j];
528 const double du_j = u_tilde_j - u_old_j;
529
530 flux_matrix->col_coupleBlock->val[iptr_ij] =
531 2*m_ij * ( du_i - du_j ) - dt * d_ij * (u_tilde_i - u_tilde_j);
532 }
533 }
534 }
535
536 /****************************************************************************/
537
538 double FCT_Solver::getSafeTimeStepSize(const_TransportProblem_ptr fctp)
539 {
540 double dt_max = LARGE_POSITIVE_FLOAT;
541 const dim_t n = fctp->transport_matrix->getTotalNumRows();
542
543 // set low order transport operator
544 setLowOrderOperator(boost::const_pointer_cast<TransportProblem>(fctp));
545
546 if (Esys_noError()) {
547 // calculate time step size
548 dt_max = LARGE_POSITIVE_FLOAT;
549 #pragma omp parallel
550 {
551 double dt_max_loc = LARGE_POSITIVE_FLOAT;
552 #pragma omp for schedule(static)
553 for (dim_t i=0; i<n; ++i) {
554 const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
555 const double m_i = fctp->lumped_mass_matrix[i];
556 if (m_i > 0) {
557 if (l_ii<0)
558 dt_max_loc = std::min(dt_max_loc,m_i/(-l_ii));
559 }
560 }
561 #pragma omp critical
562 {
563 dt_max = std::min(dt_max,dt_max_loc);
564 }
565 }
566 #ifdef ESYS_MPI
567 double dt_max_loc = dt_max;
568 MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
569 #endif
570 if (dt_max < LARGE_POSITIVE_FLOAT)
571 dt_max *= 2.;
572 }
573 return dt_max;
574 }
575
576 /* Creates the low order transport matrix and stores its negative values
577 * into the iteration_matrix except for the main diagonal which is stored
578 * separately.
579 * If fc->iteration_matrix==NULL, fc->iteration_matrix is allocated
580 *
581 * a=transport_matrix
582 * b= low_order_transport_matrix = - iteration_matrix
583 * c=main diagonal low_order_transport_matrix
584 * initialise c[i] mit a[i,i]
585 *
586 * d_ij=max(0,-a[i,j],-a[j,i])
587 * b[i,j]=-(a[i,j]+d_ij)
588 * c[i]-=d_ij
589 */
590
591 void FCT_Solver::setLowOrderOperator(TransportProblem_ptr fc)
592 {
593 const index_t* main_iptr = fc->borrowMainDiagonalPointer();
594
595 if (!fc->iteration_matrix.get()) {
596 fc->iteration_matrix.reset(new SystemMatrix(
597 fc->transport_matrix->type, fc->transport_matrix->pattern,
598 fc->transport_matrix->row_block_size,
599 fc->transport_matrix->col_block_size, true,
600 fc->transport_matrix->getRowFunctionSpace(),
601 fc->transport_matrix->getColumnFunctionSpace()));
602 }
603
604 const_SystemMatrixPattern_ptr pattern(fc->iteration_matrix->pattern);
605 const dim_t n = fc->iteration_matrix->getTotalNumRows();
606 #pragma omp parallel for
607 for (dim_t i = 0; i < n; ++i) {
608 double sum = fc->transport_matrix->mainBlock->val[main_iptr[i]];
609 //printf("sum[%d] = %e -> ", i, sum);
610
611 // look at a[i,j]
612 for (index_t iptr_ij=pattern->mainPattern->ptr[i];iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
613 const index_t j = pattern->mainPattern->index[iptr_ij];
614 const double rtmp1 = fc->transport_matrix->mainBlock->val[iptr_ij];
615 if (j != i) {
616 // find entry a[j,i]
617 #pragma ivdep
618 for (index_t iptr_ji=pattern->mainPattern->ptr[j]; iptr_ji<pattern->mainPattern->ptr[j+1]; ++iptr_ji) {
619
620 if (pattern->mainPattern->index[iptr_ji] == i) {
621 const double rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji];
622 //printf("a[%d,%d]=%e\n",i,j,rtmp1);
623 //printf("a[%d,%d]=%e\n",j,i,rtmp2);
624 const double d_ij=-MIN3(0.,rtmp1,rtmp2);
625 fc->iteration_matrix->mainBlock->val[iptr_ij]=-(rtmp1+d_ij);
626 //printf("l[%d,%d]=%e\n",i,j,fc->iteration_matrix->mainBlock->val[iptr_ij]);
627 sum-=d_ij;
628 break;
629 }
630 }
631 }
632 }
633 for (index_t iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
634 const index_t j = pattern->col_couplePattern->index[iptr_ij];
635 const double rtmp1 = fc->transport_matrix->col_coupleBlock->val[iptr_ij];
636 // find entry a[j,i]
637 #pragma ivdep
638 for (index_t iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_ji<pattern->row_couplePattern->ptr[j+1]; ++iptr_ji) {
639 if (pattern->row_couplePattern->index[iptr_ji]==i) {
640 const double rtmp2=fc->transport_matrix->row_coupleBlock->val[iptr_ji];
641 const double d_ij=-MIN3(0.,rtmp1,rtmp2);
642 fc->iteration_matrix->col_coupleBlock->val[iptr_ij]=-(rtmp1+d_ij);
643 fc->iteration_matrix->row_coupleBlock->val[iptr_ji]=-(rtmp2+d_ij);
644 sum-=d_ij;
645 break;
646 }
647 }
648 }
649 // set main diagonal entry
650 fc->main_diagonal_low_order_transport_matrix[i] = sum;
651 //printf("%e\n", sum);
652 }
653 }
654
655 /*
656 * out_i=m_i u_i + a * \sum_{j <> i} l_{ij} (u_j-u_i) where m_i>0
657 * = u_i where m_i<=0
658 *
659 */
660 void FCT_Solver::setMuPaLu(double* out, const_Coupler_ptr coupler, double a)
661 {
662 const_SystemMatrix_ptr L(transportproblem->iteration_matrix);
663 const double* M = transportproblem->lumped_mass_matrix;
664 const_SystemMatrixPattern_ptr pattern(L->pattern);
665 const double* u = coupler->borrowLocalData();
666 const double* remote_u = coupler->borrowRemoteData();
667 const dim_t n = L->getTotalNumRows();
668
669 #pragma omp parallel for
670 for (dim_t i = 0; i < n; ++i) {
671 if (M[i] > 0.) {
672 out[i] = M[i]*u[i];
673 } else {
674 out[i] = u[i];
675 }
676 }
677 if (std::abs(a) > 0) {
678 #pragma omp parallel for
679 for (dim_t i = 0; i < n; ++i) {
680 if (M[i] > 0.) {
681 double sum = 0;
682 const double u_i = u[i];
683 #pragma ivdep
684 for (index_t iptr_ij = pattern->mainPattern->ptr[i];
685 iptr_ij < pattern->mainPattern->ptr[i+1];
686 iptr_ij++) {
687 const index_t j = pattern->mainPattern->index[iptr_ij];
688 const double l_ij = L->mainBlock->val[iptr_ij];
689 sum += l_ij*(u[j]-u_i);
690 }
691 #pragma ivdep
692 for (index_t iptr_ij = pattern->col_couplePattern->ptr[i];
693 iptr_ij < pattern->col_couplePattern->ptr[i+1];
694 iptr_ij++) {
695 const index_t j=pattern->col_couplePattern->index[iptr_ij];
696 const double l_ij = L->col_coupleBlock->val[iptr_ij];
697 sum += l_ij*(remote_u[j]-u_i);
698 }
699 out[i] += a*sum;
700 }
701 }
702 }
703 }
704
705 } // namespace paso
706

Properties

Name Value
svn:mergeinfo /branches/4.0fordebian/paso/src/FCT_Solver.cpp:5567-5588 /branches/amg_from_3530/paso/src/FCT_Solver.cpp:3531-3826 /branches/lapack2681/paso/src/FCT_Solver.cpp:2682-2741 /branches/pasowrap/paso/src/FCT_Solver.cpp:3661-3674 /branches/py3_attempt2/paso/src/FCT_Solver.cpp:3871-3891 /branches/restext/paso/src/FCT_Solver.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/FCT_Solver.cpp:3669-3791 /branches/stage3.0/paso/src/FCT_Solver.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/FCT_Solver.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/FCT_Solver.cpp:3517-3974 /release/3.0/paso/src/FCT_Solver.cpp:2591-2601 /release/4.0/paso/src/FCT_Solver.cpp:5380-5406 /trunk/paso/src/FCT_Solver.cpp:4257-4344,5898-5932 /trunk/ripley/test/python/paso/src/FCT_Solver.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26