/[escript]/trunk/paso/src/FCT_Solver.cpp
ViewVC logotype

Contents of /trunk/paso/src/FCT_Solver.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6939 - (show annotations)
Mon Jan 20 03:37:18 2020 UTC (4 months, 1 week ago) by uqaeller
File size: 28688 byte(s)
Updated the copyright header.


1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2020 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Apache License, version 2.0
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014-2017 by Centre for Geoscience Computing (GeoComp)
14 * Development from 2019 by School of Earth and Environmental Sciences
15 **
16 *****************************************************************************/
17
18
19 /****************************************************************************/
20
21 /* Paso: Transport solver with flux correction (L is row sum zero)
22 *
23 * - Mv_t=Lv v(0)=u
24 *
25 * to return v(dt)
26 *
27 */
28 /****************************************************************************/
29
30 /* Author: l.gross@uq.edu.au */
31
32 /****************************************************************************/
33
34 #include "FCT_Solver.h"
35 #include "PasoUtil.h"
36 #include "Preconditioner.h"
37
38 #include <iostream>
39
40 #define MIN3(_arg1_,_arg2_,_arg3_) std::min(_arg1_, std::min(_arg2_,_arg3_))
41
42 namespace paso {
43
44 static const real_t LARGE_POSITIVE_FLOAT = escript::DataTypes::real_t_max();
45
46 FCT_Solver::FCT_Solver(const_TransportProblem_ptr tp, Options* options) :
47 transportproblem(tp),
48 omega(0),
49 z(NULL),
50 du(NULL)
51 {
52 const dim_t blockSize = tp->getBlockSize();
53 const dim_t n = tp->getTotalNumRows();
54 mpi_info = tp->mpi_info;
55 flux_limiter = new FCT_FluxLimiter(tp);
56 b = new double[n];
57 if (options->ode_solver == PASO_CRANK_NICOLSON || options->ode_solver == PASO_BACKWARD_EULER) {
58 du = new double[n];
59 z = new double[n];
60 }
61 u_coupler.reset(new Coupler<real_t>(tp->borrowConnector(), blockSize, mpi_info));
62 u_old_coupler.reset(new Coupler<real_t>(tp->borrowConnector(), blockSize, mpi_info));
63
64 if (options->ode_solver == PASO_LINEAR_CRANK_NICOLSON) {
65 method = PASO_LINEAR_CRANK_NICOLSON;
66 } else if (options->ode_solver == PASO_CRANK_NICOLSON) {
67 method = PASO_CRANK_NICOLSON;
68 } else if (options->ode_solver == PASO_BACKWARD_EULER) {
69 method = PASO_BACKWARD_EULER;
70 } else {
71 throw PasoException("FCT_Solver: unknown integration scheme.");
72 }
73 }
74
75 FCT_Solver::~FCT_Solver()
76 {
77 delete flux_limiter;
78 delete[] b;
79 delete[] z;
80 delete[] du;
81 }
82
83 // modifies the main diagonal of the iteration matrix to introduce new dt
84 void FCT_Solver::initialize(double _dt, Options* options, Performance* pp)
85 {
86 const real_t EPSILON = escript::DataTypes::real_t_eps();
87 const_TransportProblem_ptr fctp(transportproblem);
88 const index_t* main_iptr = fctp->borrowMainDiagonalPointer();
89 const dim_t n = fctp->transport_matrix->getTotalNumRows();
90 const double theta = getTheta();
91 omega = 1. / (_dt * theta);
92 dim_t i;
93 Options options2;
94
95 solve_free(fctp->iteration_matrix.get());
96 // fctp->iteration_matrix[i,i]=m[i]/(dt theta) -l[i,i]
97 dt = _dt;
98 #pragma omp parallel for private(i)
99 for (i = 0; i < n; ++i) {
100 const double m_i = fctp->lumped_mass_matrix[i];
101 const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
102 if ( m_i > 0 ) {
103 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m_i * omega - l_ii;
104 } else {
105 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = std::abs(m_i * omega - l_ii)/(EPSILON*EPSILON);
106 }
107 }
108
109 // allocate preconditioner/solver
110 options2.verbose = options->verbose;
111 if (method == PASO_LINEAR_CRANK_NICOLSON) {
112 options2.preconditioner = PASO_GS;
113 } else {
114 options2.preconditioner = PASO_JACOBI;
115 //options2.preconditioner = PASO_GS;
116 }
117 options2.use_local_preconditioner = false;
118 options2.sweeps = -1;
119
120 Performance_startMonitor(pp, PERFORMANCE_PRECONDITIONER_INIT);
121 fctp->iteration_matrix->setPreconditioner(&options2);
122 Performance_stopMonitor(pp, PERFORMANCE_PRECONDITIONER_INIT);
123 }
124
125 // entry point for update procedures
126 SolverResult FCT_Solver::update(double* u, double* u_old, Options* options,
127 Performance* pp)
128 {
129 SolverResult err_out = NoError;
130
131 if (method == PASO_LINEAR_CRANK_NICOLSON) {
132 err_out = updateLCN(u, u_old, options, pp);
133 } else if (method == PASO_CRANK_NICOLSON) {
134 err_out = updateNL(u, u_old, options, pp);
135 } else if (method == PASO_BACKWARD_EULER) {
136 err_out = updateNL(u, u_old, options, pp);
137 } else {
138 err_out = InputError;
139 }
140 return err_out;
141 }
142
143 /// linear crank-nicolson update
144 SolverResult FCT_Solver::updateLCN(double* u, double* u_old, Options* options,
145 Performance* pp)
146 {
147 dim_t sweep_max, i;
148 double const RTOL = options->tolerance;
149 const dim_t n = transportproblem->getTotalNumRows();
150 SystemMatrix_ptr iteration_matrix(transportproblem->iteration_matrix);
151 const index_t* main_iptr = transportproblem->borrowMainDiagonalPointer();
152 SolverResult errorCode = NoError;
153 double norm_u_tilde;
154
155 u_old_coupler->startCollect(u_old);
156 u_old_coupler->finishCollect();
157
158 // b[i]=m*u_tilde[i] = m u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i])
159 // = u_tilde[i] = u_old[i] where constraint m<0.
160 // note that iteration_matrix stores the negative values of the
161 // low order transport matrix l. Therefore a=-dt*0.5 is used.
162
163 setMuPaLu(b, u_old_coupler, -dt*0.5);
164 /* solve for u_tilde : u_tilda = m^{-1} * b */
165 flux_limiter->setU_tilde(b);
166 // u_tilde_connector is completed
167
168 // calculate anti-diffusive fluxes for u_tilde
169 setAntiDiffusionFlux_linearCN(flux_limiter->antidiffusive_fluxes);
170
171 /* b_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
172 flux_limiter->addLimitedFluxes_Start();
173 flux_limiter->addLimitedFluxes_Complete(b);
174
175 util::scale(n, b, omega);
176 // solve (m-dt/2*L) u = b in the form (omega*m-L) u = b * omega with omega*dt/2=1
177 #pragma omp for private(i) schedule(static)
178 for (i = 0; i < n; ++i) {
179 if (!(transportproblem->lumped_mass_matrix[i] > 0)) {
180 b[i] = flux_limiter->u_tilde[i]
181 * transportproblem->iteration_matrix->mainBlock->val[main_iptr[i]];
182 }
183 }
184 // initial guess is u<- -u + 2*u_tilde
185 util::update(n, -1., u, 2., flux_limiter->u_tilde);
186
187 sweep_max = std::max((int) (- 2 * log(RTOL)/log(2.)-0.5),1);
188 norm_u_tilde = util::lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
189 if (options->verbose) {
190 std::cout << "FCT_Solver::updateLCN: u_tilde lsup = " << norm_u_tilde
191 << " (rtol = " << RTOL*norm_u_tilde << ", max. sweeps = "
192 << sweep_max << ")" << std::endl;
193 }
194 errorCode = Preconditioner_Smoother_solve_byTolerance(iteration_matrix,
195 ((Preconditioner*)(iteration_matrix->solver_p))->gs, u, b, RTOL,
196 &sweep_max, true);
197 if (errorCode == NoError) {
198 if (options->verbose)
199 std::cout << "FCT_Solver::updateLCN: convergence after "
200 << sweep_max << " Gauss-Seidel steps." << std::endl;
201 errorCode = NoError;
202 } else {
203 if (options->verbose)
204 std::cout << "FCT_Solver::updateLCN: Gauss-Seidel failed within "
205 << sweep_max << " steps (rel. tolerance " << RTOL << ")."
206 << std::endl;
207 errorCode = MaxIterReached;
208 }
209 return errorCode;
210 }
211
212 SolverResult FCT_Solver::updateNL(double* u, double* u_old, Options* options,
213 Performance* pp)
214 {
215 // number of rates >=critical_rate accepted before divergence is triggered
216 const dim_t num_critical_rates_max = 3;
217 // expected value of convergence rate
218 const double critical_rate = 0.95;
219
220 const_TransportProblem_ptr fctp(transportproblem);
221 dim_t i;
222 double norm_u_tilde, norm_du=LARGE_POSITIVE_FLOAT, norm_du_old, rate=1.;
223 const dim_t n = fctp->transport_matrix->getTotalNumRows();
224 const double atol = options->absolute_tolerance;
225 const double rtol = options->tolerance;
226 const dim_t max_m = options->iter_max;
227 dim_t m = 0, num_critical_rates = 0;
228 SolverResult errorCode = NoError;
229 bool converged=false, max_m_reached=false, diverged=false;
230 /* //////////////////////////////////////////////////////////////////// */
231
232 options->num_iter=0;
233 u_old_coupler->startCollect(u_old);
234 u_old_coupler->finishCollect();
235 // prepare u_tilde and flux limiter
236 if (method == PASO_BACKWARD_EULER) {
237 // b[i]=m_i* u_old[i]
238 #pragma omp for private(i) schedule(static)
239 for (i = 0; i < n; ++i) {
240 if (fctp->lumped_mass_matrix[i] > 0 ) {
241 b[i]=u_old[i]* fctp->lumped_mass_matrix[i];
242 } else {
243 b[i]=u_old[i];
244 }
245 }
246 } else {
247 /* b[i]=m_i* u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i]) = m_i * u_tilde_i where m_i>0
248 * = u_old[i] otherwise
249 * note that iteration_matrix stores the negative values of the
250 * low order transport matrix l. Therefore a=-dt*0.5 is used. */
251 setMuPaLu(b, u_old_coupler, -dt*0.5);
252 }
253 flux_limiter->setU_tilde(b); // u_tilde = m^{-1} b */
254 // u_tilde_connector is completed
255 /************************************************************************/
256 // calculate stopping criterion
257 norm_u_tilde=util::lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
258 const double ATOL = rtol * norm_u_tilde + atol;
259 if (options->verbose)
260 std::cout << "FCT_Solver::updateNL: iteration starts u_tilde lsup = "
261 << norm_u_tilde << " (abs. tol = " << ATOL << ")" << std::endl;
262
263 // u_old is an initial guess for u
264 util::copy(n, u, u_old);
265
266 while (!converged && !diverged && !max_m_reached) {
267 u_coupler->startCollect(u);
268 u_coupler->finishCollect();
269
270 // set antidiffusive_flux_{ij} for u
271 if (method == PASO_BACKWARD_EULER) {
272 setAntiDiffusionFlux_BE(flux_limiter->antidiffusive_fluxes);
273 } else {
274 setAntiDiffusionFlux_CN(flux_limiter->antidiffusive_fluxes);
275 }
276 // start the calculation of the limitation factors_{fct_solver->ij}
277 flux_limiter->addLimitedFluxes_Start(); // uses u_tilde
278
279 /*
280 * z_m[i]=b[i] - (m_i*u[i] - omega*sum_{j<>i} l_{ij} (u[j]-u[i]) ) where m_i>0
281 * ==b[i] - u[i] = u_tilda[i]-u[i] =0 otherwise
282 *
283 * omega = dt/2 or dt .
284 *
285 * note that iteration_matrix stores the negative values of the
286 * low order transport matrix l. Therefore a=dt*theta is used.
287 */
288 if (method == PASO_BACKWARD_EULER) {
289 setMuPaLu(z, u_coupler, dt);
290 } else {
291 setMuPaLu(z, u_coupler, dt/2);
292 }
293
294 util::update(n, -1., z, 1., b); // z=b-z
295
296 // z_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij}
297 flux_limiter->addLimitedFluxes_Complete(z);
298
299 // we solve (m/omega - L ) * du = z
300 if (method == PASO_BACKWARD_EULER) {
301 dim_t cntIter = options->iter_max;
302 double tol= util::l2(n, z, fctp->mpi_info) ;
303
304 if (m==0) {
305 tol *= 0.5;
306 } else {
307 tol *= std::min(std::max(rate*rate, 1e-2), 0.5);
308 }
309 // use BiCGStab with Jacobi preconditioner ( m - omega * L )
310 util::zeroes(n,du);
311 errorCode = Solver_BiCGStab(fctp->iteration_matrix, z, du, &cntIter, &tol, pp);
312
313 // errorCode = Solver_GMRES(fctp->iteration_matrix, z, du, &cntIter, &tol, 10, 2000, pp);
314 if (options->verbose)
315 std::cout << "FCT_Solver::updateNL: BiCGStab completed after "
316 << cntIter << " steps (residual = " << tol << ")." << std::endl;
317 options->num_iter += cntIter;
318 if (errorCode != NoError) break;
319 } else {
320 // just use the main diagonal of (m/omega - L )
321
322 Preconditioner_Smoother_solve(fctp->iteration_matrix,
323 ((Preconditioner*) (fctp->iteration_matrix->solver_p))->jacobi,
324 du, z, 1, false);
325
326 options->num_iter++;
327 }
328
329 util::update(n, 1., u, omega, du);
330 norm_du_old = norm_du;
331 norm_du = util::lsup(n, du, fctp->mpi_info);
332 if (m == 0) {
333 if (options->verbose)
334 std::cout << "FCT_Solver::updateNL: step " << m+1
335 << ": increment = " << norm_du * omega << std::endl;
336 } else {
337 if (norm_du_old > 0.) {
338 rate = norm_du/norm_du_old;
339 } else if (norm_du <= 0.) {
340 rate = 0.;
341 } else {
342 rate = LARGE_POSITIVE_FLOAT;
343 }
344 if (options->verbose)
345 std::cout << "FCT_Solver::updateNL: step " << m+1
346 << ": increment= " << norm_du * omega << " (rate = "
347 << rate << ")" << std::endl;
348 num_critical_rates += (rate<critical_rate ? 0 : 1);
349 max_m_reached = (m>max_m);
350 diverged = (num_critical_rates >= num_critical_rates_max);
351 converged = (norm_du * omega <= ATOL);
352 }
353 m++;
354 } // end of while loop
355 if (errorCode == NoError) {
356 if (converged) {
357 if (options->verbose)
358 std::cout << "FCT_Solver::updateNL: iteration is completed." << std::endl;
359 errorCode = NoError;
360 } else if (diverged) {
361 if (options->verbose)
362 std::cout << "FCT_Solver::updateNL: divergence." << std::endl;
363 errorCode = Divergence;
364 } else if (max_m_reached) {
365 if (options->verbose)
366 std::cout << "FCT_Solver::updateNL: maximum number of iteration steps reached." << std::endl;
367 errorCode = MaxIterReached;
368 }
369 }
370 return errorCode;
371 }
372
373
374 /*
375 * AntiDiffusionFlux:
376 *
377 * f_{ij} = (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])
378 *
379 * m=fc->mass matrix
380 * d=artificial diffusion matrix = L - K = - fc->iteration matrix - fc->transport matrix (away from main diagonal)
381 *
382 * for CN : theta = 0.5
383 * for BE : theta = 1.
384 */
385
386 void FCT_Solver::setAntiDiffusionFlux_CN(SystemMatrix_ptr flux_matrix)
387 {
388 const double* u = u_coupler->borrowLocalData();
389 const double* u_old = u_old_coupler->borrowLocalData();
390 const double* remote_u = u_coupler->borrowRemoteData();
391 const double* remote_u_old = u_old_coupler->borrowRemoteData();
392 const double dt_half = dt/2;
393 const_TransportProblem_ptr fct(transportproblem);
394 const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
395 const dim_t n = fct->iteration_matrix->getTotalNumRows();
396
397 #pragma omp parallel for
398 for (dim_t i = 0; i < n; ++i) {
399 const double u_i = u[i];
400 const double u_old_i = u_old[i];
401
402 #pragma ivdep
403 for (index_t iptr_ij = pattern->mainPattern->ptr[i];
404 iptr_ij < pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
405 const index_t j = pattern->mainPattern->index[iptr_ij];
406 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
407 // this is in fact -d_ij
408 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+
409 fct->iteration_matrix->mainBlock->val[iptr_ij];
410 const double u_old_j = u_old[j];
411 const double u_j = u[j];
412
413 // (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])
414 flux_matrix->mainBlock->val[iptr_ij] =
415 (m_ij+dt_half*d_ij)*(u_old_j-u_old_i) -
416 (m_ij-dt_half*d_ij)*(u_j-u_i);
417
418 }
419 #pragma ivdep
420 for (index_t iptr_ij = pattern->col_couplePattern->ptr[i];
421 iptr_ij < pattern->col_couplePattern->ptr[i+1]; iptr_ij++) {
422 const index_t j = pattern->col_couplePattern->index[iptr_ij];
423 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
424 // this is in fact -d_ij
425 const double d_ij =
426 fct->transport_matrix->col_coupleBlock->val[iptr_ij] +
427 fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
428 const double u_old_j = remote_u_old[j];
429 const double u_j = remote_u[j];
430 flux_matrix->col_coupleBlock->val[iptr_ij] =
431 (m_ij+dt_half*d_ij)*(u_old_j-u_old_i) -
432 (m_ij-dt_half*d_ij)*(u_j-u_i);
433 }
434 }
435 }
436
437 void FCT_Solver::setAntiDiffusionFlux_BE(SystemMatrix_ptr flux_matrix)
438 {
439 const double* u = u_coupler->borrowLocalData();
440 const double* u_old = u_old_coupler->borrowLocalData();
441 const double* remote_u = u_coupler->borrowRemoteData();
442 const double* remote_u_old = u_old_coupler->borrowRemoteData();
443 const_TransportProblem_ptr fct(transportproblem);
444 const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
445 const dim_t n = fct->iteration_matrix->getTotalNumRows();
446
447 #pragma omp parallel for
448 for (dim_t i = 0; i < n; ++i) {
449 const double u_i = u[i];
450 const double u_old_i = u_old[i];
451 #pragma ivdep
452 for (index_t iptr_ij = pattern->mainPattern->ptr[i];
453 iptr_ij < pattern->mainPattern->ptr[i+1]; iptr_ij++) {
454 const index_t j = pattern->mainPattern->index[iptr_ij];
455 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
456 // this is in fact -d_ij
457 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+
458 fct->iteration_matrix->mainBlock->val[iptr_ij];
459 const double u_old_j = u_old[j];
460 const double u_j = u[j];
461
462 flux_matrix->mainBlock->val[iptr_ij] =
463 m_ij*(u_old_j-u_old_i) - (m_ij-dt*d_ij)*(u_j-u_i);
464 }
465 #pragma ivdep
466 for (index_t iptr_ij = pattern->col_couplePattern->ptr[i];
467 iptr_ij < pattern->col_couplePattern->ptr[i+1]; iptr_ij++) {
468 const index_t j = pattern->col_couplePattern->index[iptr_ij];
469 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
470 // this is in fact -d_ij
471 const double d_ij =
472 fct->transport_matrix->col_coupleBlock->val[iptr_ij] +
473 fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
474 const double u_old_j = remote_u_old[j];
475 const double u_j = remote_u[j];
476
477 flux_matrix->col_coupleBlock->val[iptr_ij] =
478 m_ij*(u_old_j-u_old_i) - (m_ij-dt*d_ij)*(u_j-u_i);
479 }
480 }
481 }
482
483 /* special version of the ant-diffusive fluxes for the linear Crank-Nicolson
484 * scheme. In fact this is evaluated for u = 2*u_tilde - u_old which is the
485 * predictor of the solution of the the stabilised problem at time dt using
486 * the forward Euler scheme
487 *
488 * f_{ij} = (m_{ij} - dt/2 d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) (u[j]-u[i])
489 * = (m_{ij} - dt/2 d_{ij}) * (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) * ( 2*(u_tilde[j]-u_tilde[i]) - (u_old[j] -u_old [i]) )
490 * = 2* m_{ij} * ( (u_old[j]-u_tilde[j] - (u_old[i]) - u_tilde[i]) ) - dt d_{ij} * (u_tilde[j]-u_tilde[i])
491 *
492 */
493 void FCT_Solver::setAntiDiffusionFlux_linearCN(SystemMatrix_ptr flux_matrix)
494 {
495 const_Coupler_ptr<real_t> u_tilde_coupler(flux_limiter->u_tilde_coupler);
496 const double* u_tilde = u_tilde_coupler->borrowLocalData();
497 const double* u_old = u_old_coupler->borrowLocalData();
498 const double* remote_u_tilde = u_tilde_coupler->borrowRemoteData();
499 const double* remote_u_old = u_old_coupler->borrowRemoteData();
500 const_TransportProblem_ptr fct(transportproblem);
501 const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
502 const dim_t n = fct->iteration_matrix->getTotalNumRows();
503
504 #pragma omp parallel for
505 for (dim_t i = 0; i < n; ++i) {
506 const double u_tilde_i = u_tilde[i];
507 const double u_old_i = u_old[i];
508 const double du_i = u_tilde_i - u_old_i;
509 #pragma ivdep
510 for (index_t iptr_ij = pattern->mainPattern->ptr[i];
511 iptr_ij < pattern->mainPattern->ptr[i+1]; iptr_ij++) {
512 const index_t j = pattern->mainPattern->index[iptr_ij];
513 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
514 // this is in fact -d_ij
515 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+
516 fct->iteration_matrix->mainBlock->val[iptr_ij];
517 const double u_tilde_j = u_tilde[j];
518 const double u_old_j = u_old[j];
519 const double du_j = u_tilde_j - u_old_j;
520
521 flux_matrix->mainBlock->val[iptr_ij] = 2 * m_ij * ( du_i - du_j ) -
522 dt * d_ij * ( u_tilde_i - u_tilde_j);
523 }
524 #pragma ivdep
525 for (index_t iptr_ij=pattern->col_couplePattern->ptr[i];
526 iptr_ij<pattern->col_couplePattern->ptr[i+1]; iptr_ij++) {
527
528 const index_t j = pattern->col_couplePattern->index[iptr_ij];
529 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
530 // this is in fact -d_ij
531 const double d_ij =
532 fct->transport_matrix->col_coupleBlock->val[iptr_ij] +
533 fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
534 const double u_tilde_j = remote_u_tilde[j];
535 const double u_old_j = remote_u_old[j];
536 const double du_j = u_tilde_j - u_old_j;
537
538 flux_matrix->col_coupleBlock->val[iptr_ij] =
539 2*m_ij * ( du_i - du_j ) - dt * d_ij * (u_tilde_i - u_tilde_j);
540 }
541 }
542 }
543
544 /****************************************************************************/
545
546 double FCT_Solver::getSafeTimeStepSize(const_TransportProblem_ptr fctp)
547 {
548 double dt_max = LARGE_POSITIVE_FLOAT;
549 const dim_t n = fctp->transport_matrix->getTotalNumRows();
550
551 // set low order transport operator
552 setLowOrderOperator(boost::const_pointer_cast<TransportProblem>(fctp));
553
554 // calculate time step size
555 dt_max = LARGE_POSITIVE_FLOAT;
556 #pragma omp parallel
557 {
558 double dt_max_loc = LARGE_POSITIVE_FLOAT;
559 #pragma omp for schedule(static)
560 for (dim_t i=0; i<n; ++i) {
561 const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
562 const double m_i = fctp->lumped_mass_matrix[i];
563 if (m_i > 0) {
564 if (l_ii<0)
565 dt_max_loc = std::min(dt_max_loc,m_i/(-l_ii));
566 }
567 }
568 #pragma omp critical
569 {
570 dt_max = std::min(dt_max,dt_max_loc);
571 }
572 }
573 #ifdef ESYS_MPI
574 double dt_max_loc = dt_max;
575 MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
576 #endif
577 if (dt_max < LARGE_POSITIVE_FLOAT)
578 dt_max *= 2.;
579
580 return dt_max;
581 }
582
583 /* Creates the low order transport matrix and stores its negative values
584 * into the iteration_matrix except for the main diagonal which is stored
585 * separately.
586 * If fc->iteration_matrix==NULL, fc->iteration_matrix is allocated
587 *
588 * a=transport_matrix
589 * b= low_order_transport_matrix = - iteration_matrix
590 * c=main diagonal low_order_transport_matrix
591 * initialise c[i] mit a[i,i]
592 *
593 * d_ij=max(0,-a[i,j],-a[j,i])
594 * b[i,j]=-(a[i,j]+d_ij)
595 * c[i]-=d_ij
596 */
597
598 void FCT_Solver::setLowOrderOperator(TransportProblem_ptr fc)
599 {
600 const index_t* main_iptr = fc->borrowMainDiagonalPointer();
601
602 if (!fc->iteration_matrix.get()) {
603 fc->iteration_matrix.reset(new SystemMatrix(
604 fc->transport_matrix->type, fc->transport_matrix->pattern,
605 fc->transport_matrix->row_block_size,
606 fc->transport_matrix->col_block_size, true,
607 fc->transport_matrix->getRowFunctionSpace(),
608 fc->transport_matrix->getColumnFunctionSpace()));
609 }
610
611 const_SystemMatrixPattern_ptr pattern(fc->iteration_matrix->pattern);
612 const dim_t n = fc->iteration_matrix->getTotalNumRows();
613 #pragma omp parallel for
614 for (dim_t i = 0; i < n; ++i) {
615 double sum = fc->transport_matrix->mainBlock->val[main_iptr[i]];
616 //printf("sum[%d] = %e -> ", i, sum);
617
618 // look at a[i,j]
619 for (index_t iptr_ij=pattern->mainPattern->ptr[i];iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
620 const index_t j = pattern->mainPattern->index[iptr_ij];
621 const double rtmp1 = fc->transport_matrix->mainBlock->val[iptr_ij];
622 if (j != i) {
623 // find entry a[j,i]
624 #pragma ivdep
625 for (index_t iptr_ji=pattern->mainPattern->ptr[j]; iptr_ji<pattern->mainPattern->ptr[j+1]; ++iptr_ji) {
626
627 if (pattern->mainPattern->index[iptr_ji] == i) {
628 const double rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji];
629 //printf("a[%d,%d]=%e\n",i,j,rtmp1);
630 //printf("a[%d,%d]=%e\n",j,i,rtmp2);
631 const double d_ij=-MIN3(0.,rtmp1,rtmp2);
632 fc->iteration_matrix->mainBlock->val[iptr_ij]=-(rtmp1+d_ij);
633 //printf("l[%d,%d]=%e\n",i,j,fc->iteration_matrix->mainBlock->val[iptr_ij]);
634 sum-=d_ij;
635 break;
636 }
637 }
638 }
639 }
640 for (index_t iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
641 const index_t j = pattern->col_couplePattern->index[iptr_ij];
642 const double rtmp1 = fc->transport_matrix->col_coupleBlock->val[iptr_ij];
643 // find entry a[j,i]
644 #pragma ivdep
645 for (index_t iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_ji<pattern->row_couplePattern->ptr[j+1]; ++iptr_ji) {
646 if (pattern->row_couplePattern->index[iptr_ji]==i) {
647 const double rtmp2=fc->transport_matrix->row_coupleBlock->val[iptr_ji];
648 const double d_ij=-MIN3(0.,rtmp1,rtmp2);
649 fc->iteration_matrix->col_coupleBlock->val[iptr_ij]=-(rtmp1+d_ij);
650 fc->iteration_matrix->row_coupleBlock->val[iptr_ji]=-(rtmp2+d_ij);
651 sum-=d_ij;
652 break;
653 }
654 }
655 }
656 // set main diagonal entry
657 fc->main_diagonal_low_order_transport_matrix[i] = sum;
658 //printf("%e\n", sum);
659 }
660 }
661
662 /*
663 * out_i=m_i u_i + a * \sum_{j <> i} l_{ij} (u_j-u_i) where m_i>0
664 * = u_i where m_i<=0
665 *
666 */
667 void FCT_Solver::setMuPaLu(double* out, const_Coupler_ptr<real_t> coupler, double a)
668 {
669 const_SystemMatrix_ptr L(transportproblem->iteration_matrix);
670 const double* M = transportproblem->lumped_mass_matrix;
671 const_SystemMatrixPattern_ptr pattern(L->pattern);
672 const double* u = coupler->borrowLocalData();
673 const double* remote_u = coupler->borrowRemoteData();
674 const dim_t n = L->getTotalNumRows();
675
676 #pragma omp parallel for
677 for (dim_t i = 0; i < n; ++i) {
678 if (M[i] > 0.) {
679 out[i] = M[i]*u[i];
680 } else {
681 out[i] = u[i];
682 }
683 }
684 if (std::abs(a) > 0) {
685 #pragma omp parallel for
686 for (dim_t i = 0; i < n; ++i) {
687 if (M[i] > 0.) {
688 double sum = 0;
689 const double u_i = u[i];
690 #pragma ivdep
691 for (index_t iptr_ij = pattern->mainPattern->ptr[i];
692 iptr_ij < pattern->mainPattern->ptr[i+1];
693 iptr_ij++) {
694 const index_t j = pattern->mainPattern->index[iptr_ij];
695 const double l_ij = L->mainBlock->val[iptr_ij];
696 sum += l_ij*(u[j]-u_i);
697 }
698 #pragma ivdep
699 for (index_t iptr_ij = pattern->col_couplePattern->ptr[i];
700 iptr_ij < pattern->col_couplePattern->ptr[i+1];
701 iptr_ij++) {
702 const index_t j=pattern->col_couplePattern->index[iptr_ij];
703 const double l_ij = L->col_coupleBlock->val[iptr_ij];
704 sum += l_ij*(remote_u[j]-u_i);
705 }
706 out[i] += a*sum;
707 }
708 }
709 }
710 }
711
712 } // namespace paso
713

Properties

Name Value
svn:mergeinfo /branches/4.0fordebian/paso/src/FCT_Solver.cpp:5567-5588 /branches/amg_from_3530/paso/src/FCT_Solver.cpp:3531-3826 /branches/lapack2681/paso/src/FCT_Solver.cpp:2682-2741 /branches/pasowrap/paso/src/FCT_Solver.cpp:3661-3674 /branches/py3_attempt2/paso/src/FCT_Solver.cpp:3871-3891 /branches/restext/paso/src/FCT_Solver.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/FCT_Solver.cpp:3669-3791 /branches/stage3.0/paso/src/FCT_Solver.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/FCT_Solver.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/FCT_Solver.cpp:3517-3974 /branches/trilinos_from_5897/paso/src/FCT_Solver.cpp:5898-6118 /release/3.0/paso/src/FCT_Solver.cpp:2591-2601 /release/4.0/paso/src/FCT_Solver.cpp:5380-5406 /trunk/paso/src/FCT_Solver.cpp:4257-4344 /trunk/ripley/test/python/paso/src/FCT_Solver.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26