/[escript]/trunk/paso/src/FCT_Solver.cpp
ViewVC logotype

Contents of /trunk/paso/src/FCT_Solver.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5179 - (show annotations)
Thu Sep 25 07:18:20 2014 UTC (5 years, 8 months ago) by caltinay
File size: 28301 byte(s)
Work towards allowing non-int index type.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17
18 /****************************************************************************/
19
20 /* Paso: Transport solver with flux correction (L is row sum zero)
21 *
22 * - Mv_t=Lv v(0)=u
23 *
24 * to return v(dt)
25 *
26 */
27 /****************************************************************************/
28
29 /* Author: l.gross@uq.edu.au */
30
31 /****************************************************************************/
32
33 #include "FCT_Solver.h"
34 #include "Preconditioner.h"
35 #include "PasoUtil.h"
36
37 namespace paso {
38
39 FCT_Solver::FCT_Solver(const_TransportProblem_ptr tp, Options* options) :
40 transportproblem(tp),
41 omega(0),
42 z(NULL),
43 du(NULL)
44 {
45 const dim_t blockSize = tp->getBlockSize();
46 const dim_t n = tp->getTotalNumRows();
47 mpi_info = tp->mpi_info;
48 flux_limiter = new FCT_FluxLimiter(tp);
49 b = new double[n];
50 if (options->ode_solver == PASO_CRANK_NICOLSON || options->ode_solver == PASO_BACKWARD_EULER) {
51 du = new double[n];
52 z = new double[n];
53 }
54 u_coupler.reset(new Coupler(tp->borrowConnector(), blockSize));
55 u_old_coupler.reset(new Coupler(tp->borrowConnector(), blockSize));
56
57 if (options->ode_solver == PASO_LINEAR_CRANK_NICOLSON) {
58 method = PASO_LINEAR_CRANK_NICOLSON;
59 } else if (options->ode_solver == PASO_CRANK_NICOLSON) {
60 method = PASO_CRANK_NICOLSON;
61 } else if (options->ode_solver == PASO_BACKWARD_EULER) {
62 method = PASO_BACKWARD_EULER;
63 } else {
64 Esys_setError(VALUE_ERROR, "FCT_Solver: unknown integration scheme.");
65 method = UNKNOWN;
66 }
67 }
68
69 FCT_Solver::~FCT_Solver()
70 {
71 delete flux_limiter;
72 delete[] b;
73 delete[] z;
74 delete[] du;
75 }
76
77 // modifies the main diagonal of the iteration matrix to introduce new dt
78 void FCT_Solver::initialize(double _dt, Options* options, Performance* pp)
79 {
80 const_TransportProblem_ptr fctp(transportproblem);
81 const index_t* main_iptr = fctp->borrowMainDiagonalPointer();
82 const dim_t n = fctp->transport_matrix->getTotalNumRows();
83 const double theta = getTheta();
84 omega = 1. / (_dt * theta);
85 dim_t i;
86 Options options2;
87
88 solve_free(fctp->iteration_matrix.get());
89 // fctp->iteration_matrix[i,i]=m[i]/(dt theta) -l[i,i]
90 dt = _dt;
91 #pragma omp parallel for private(i)
92 for (i = 0; i < n; ++i) {
93 const double m_i = fctp->lumped_mass_matrix[i];
94 const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
95 if ( m_i > 0 ) {
96 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m_i * omega - l_ii;
97 } else {
98 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = std::abs(m_i * omega - l_ii)/(EPSILON*EPSILON);
99 }
100 }
101
102 // allocate preconditioner/solver
103 options2.verbose = options->verbose;
104 if (method == PASO_LINEAR_CRANK_NICOLSON) {
105 options2.preconditioner = PASO_GS;
106 } else {
107 options2.preconditioner = PASO_JACOBI;
108 //options2.preconditioner = PASO_GS;
109 }
110 options2.use_local_preconditioner = false;
111 options2.sweeps = -1;
112
113 Performance_startMonitor(pp, PERFORMANCE_PRECONDITIONER_INIT);
114 fctp->iteration_matrix->setPreconditioner(&options2);
115 Performance_stopMonitor(pp, PERFORMANCE_PRECONDITIONER_INIT);
116 }
117
118 // entry point for update procedures
119 err_t FCT_Solver::update(double* u, double* u_old, Options* options, Performance* pp)
120 {
121 err_t err_out = SOLVER_NO_ERROR;
122
123 if (method == PASO_LINEAR_CRANK_NICOLSON) {
124 err_out = updateLCN(u, u_old, options, pp);
125 } else if (method == PASO_CRANK_NICOLSON) {
126 err_out = updateNL(u, u_old, options, pp);
127 } else if (method == PASO_BACKWARD_EULER) {
128 err_out = updateNL(u, u_old, options, pp);
129 } else {
130 err_out = SOLVER_INPUT_ERROR;
131 }
132 return err_out;
133 }
134
135 /// linear crank-nicolson update
136 err_t FCT_Solver::updateLCN(double* u, double* u_old, Options* options, Performance* pp)
137 {
138 dim_t sweep_max, i;
139 double const RTOL = options->tolerance;
140 const dim_t n = transportproblem->getTotalNumRows();
141 SystemMatrix_ptr iteration_matrix(transportproblem->iteration_matrix);
142 const index_t* main_iptr = transportproblem->borrowMainDiagonalPointer();
143 err_t errorCode = SOLVER_NO_ERROR;
144 double norm_u_tilde;
145
146 u_old_coupler->startCollect(u_old);
147 u_old_coupler->finishCollect();
148
149 // b[i]=m*u_tilde[i] = m u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i])
150 // = u_tilde[i] = u_old[i] where constraint m<0.
151 // note that iteration_matrix stores the negative values of the
152 // low order transport matrix l. Therefore a=-dt*0.5 is used.
153
154 setMuPaLu(b, u_old_coupler, -dt*0.5);
155 /* solve for u_tilde : u_tilda = m^{-1} * b */
156 flux_limiter->setU_tilde(b);
157 // u_tilde_connector is completed
158
159 // calculate anti-diffusive fluxes for u_tilde
160 setAntiDiffusionFlux_linearCN(flux_limiter->antidiffusive_fluxes);
161
162 /* b_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
163 flux_limiter->addLimitedFluxes_Start();
164 flux_limiter->addLimitedFluxes_Complete(b);
165
166 util::scale(n, b, omega);
167 // solve (m-dt/2*L) u = b in the form (omega*m-L) u = b * omega with omega*dt/2=1
168 #pragma omp for private(i) schedule(static)
169 for (i = 0; i < n; ++i) {
170 if (!(transportproblem->lumped_mass_matrix[i] > 0)) {
171 b[i] = flux_limiter->u_tilde[i]
172 * transportproblem->iteration_matrix->mainBlock->val[main_iptr[i]];
173 }
174 }
175 // initial guess is u<- -u + 2*u_tilde
176 util::update(n, -1., u, 2., flux_limiter->u_tilde);
177
178 sweep_max = std::max((int) (- 2 * log(RTOL)/log(2.)-0.5),1);
179 norm_u_tilde = util::lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
180 if (options->verbose) {
181 std::cout << "FCT_Solver::updateLCN: u_tilde lsup = " << norm_u_tilde
182 << " (rtol = " << RTOL*norm_u_tilde << ", max. sweeps = "
183 << sweep_max << ")" << std::endl;
184 }
185 errorCode = Preconditioner_Smoother_solve_byTolerance(iteration_matrix,
186 ((Preconditioner*)(iteration_matrix->solver_p))->gs, u, b, RTOL,
187 &sweep_max, true);
188 if (errorCode == PRECONDITIONER_NO_ERROR) {
189 if (options->verbose)
190 std::cout << "FCT_Solver::updateLCN: convergence after "
191 << sweep_max << " Gauss-Seidel steps." << std::endl;
192 errorCode = SOLVER_NO_ERROR;
193 } else {
194 if (options->verbose)
195 std::cout << "FCT_Solver::updateLCN: Gauss-Seidel failed within "
196 << sweep_max << " steps (rel. tolerance " << RTOL << ")."
197 << std::endl;
198 errorCode = SOLVER_MAXITER_REACHED;
199 }
200 return errorCode;
201 }
202
203 err_t FCT_Solver::updateNL(double* u, double* u_old, Options* options,
204 Performance* pp)
205 {
206 // number of rates >=critical_rate accepted before divergence is triggered
207 const dim_t num_critical_rates_max = 3;
208 // expected value of convergence rate
209 const double critical_rate = 0.95;
210
211 const_TransportProblem_ptr fctp(transportproblem);
212 dim_t i;
213 double norm_u_tilde, norm_du=LARGE_POSITIVE_FLOAT, norm_du_old, rate=1.;
214 const dim_t n = fctp->transport_matrix->getTotalNumRows();
215 const double atol = options->absolute_tolerance;
216 const double rtol = options->tolerance;
217 const dim_t max_m = options->iter_max;
218 dim_t m = 0, num_critical_rates = 0;
219 err_t errorCode = SOLVER_NO_ERROR;
220 bool converged=false, max_m_reached=false, diverged=false;
221 /* //////////////////////////////////////////////////////////////////// */
222
223 options->num_iter=0;
224 u_old_coupler->startCollect(u_old);
225 u_old_coupler->finishCollect();
226 // prepare u_tilde and flux limiter
227 if (method == PASO_BACKWARD_EULER) {
228 // b[i]=m_i* u_old[i]
229 #pragma omp for private(i) schedule(static)
230 for (i = 0; i < n; ++i) {
231 if (fctp->lumped_mass_matrix[i] > 0 ) {
232 b[i]=u_old[i]* fctp->lumped_mass_matrix[i];
233 } else {
234 b[i]=u_old[i];
235 }
236 }
237 } else {
238 /* b[i]=m_i* u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i]) = m_i * u_tilde_i where m_i>0
239 * = u_old[i] otherwise
240 * note that iteration_matrix stores the negative values of the
241 * low order transport matrix l. Therefore a=-dt*0.5 is used. */
242 setMuPaLu(b, u_old_coupler, -dt*0.5);
243 }
244 flux_limiter->setU_tilde(b); // u_tilde = m^{-1} b */
245 // u_tilde_connector is completed
246 /************************************************************************/
247 // calculate stopping criterion
248 norm_u_tilde=util::lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
249 const double ATOL = rtol * norm_u_tilde + atol;
250 if (options->verbose)
251 std::cout << "FCT_Solver::updateNL: iteration starts u_tilde lsup = "
252 << norm_u_tilde << " (abs. tol = " << ATOL << ")" << std::endl;
253
254 // u_old is an initial guess for u
255 util::copy(n, u, u_old);
256
257 while (!converged && !diverged && !max_m_reached && Esys_noError()) {
258 u_coupler->startCollect(u);
259 u_coupler->finishCollect();
260
261 // set antidiffusive_flux_{ij} for u
262 if (method == PASO_BACKWARD_EULER) {
263 setAntiDiffusionFlux_BE(flux_limiter->antidiffusive_fluxes);
264 } else {
265 setAntiDiffusionFlux_CN(flux_limiter->antidiffusive_fluxes);
266 }
267 // start the calculation of the limitation factors_{fct_solver->ij}
268 flux_limiter->addLimitedFluxes_Start(); // uses u_tilde
269
270 /*
271 * z_m[i]=b[i] - (m_i*u[i] - omega*sum_{j<>i} l_{ij} (u[j]-u[i]) ) where m_i>0
272 * ==b[i] - u[i] = u_tilda[i]-u[i] =0 otherwise
273 *
274 * omega = dt/2 or dt .
275 *
276 * note that iteration_matrix stores the negative values of the
277 * low order transport matrix l. Therefore a=dt*theta is used.
278 */
279 if (method == PASO_BACKWARD_EULER) {
280 setMuPaLu(z, u_coupler, dt);
281 } else {
282 setMuPaLu(z, u_coupler, dt/2);
283 }
284
285 util::update(n, -1., z, 1., b); // z=b-z
286
287 // z_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij}
288 flux_limiter->addLimitedFluxes_Complete(z);
289
290 // we solve (m/omega - L ) * du = z
291 if (method == PASO_BACKWARD_EULER) {
292 dim_t cntIter = options->iter_max;
293 double tol= util::l2(n, z, fctp->mpi_info) ;
294
295 if (m==0) {
296 tol *= 0.5;
297 } else {
298 tol *= std::min(std::max(rate*rate, 1e-2), 0.5);
299 }
300 // use BiCGStab with Jacobi preconditioner ( m - omega * L )
301 util::zeroes(n,du);
302 errorCode = Solver_BiCGStab(fctp->iteration_matrix, z, du, &cntIter, &tol, pp);
303
304 // errorCode = Solver_GMRES(fctp->iteration_matrix, z, du, &cntIter, &tol, 10, 2000, pp);
305 if (options->verbose)
306 std::cout << "FCT_Solver::updateNL: BiCGStab completed after "
307 << cntIter << " steps (residual = " << tol << ")." << std::endl;
308 options->num_iter += cntIter;
309 if (errorCode != SOLVER_NO_ERROR) break;
310 } else {
311 // just use the main diagonal of (m/omega - L )
312
313 Preconditioner_Smoother_solve(fctp->iteration_matrix,
314 ((Preconditioner*) (fctp->iteration_matrix->solver_p))->jacobi,
315 du, z, 1, false);
316
317 options->num_iter++;
318 }
319
320 util::update(n, 1., u, omega, du);
321 norm_du_old = norm_du;
322 norm_du = util::lsup(n, du, fctp->mpi_info);
323 if (m == 0) {
324 if (options->verbose)
325 std::cout << "FCT_Solver::updateNL: step " << m+1
326 << ": increment = " << norm_du * omega << std::endl;
327 } else {
328 if (norm_du_old > 0.) {
329 rate = norm_du/norm_du_old;
330 } else if (norm_du <= 0.) {
331 rate = 0.;
332 } else {
333 rate = LARGE_POSITIVE_FLOAT;
334 }
335 if (options->verbose)
336 std::cout << "FCT_Solver::updateNL: step " << m+1
337 << ": increment= " << norm_du * omega << " (rate = "
338 << rate << ")" << std::endl;
339 num_critical_rates += (rate<critical_rate ? 0 : 1);
340 max_m_reached = (m>max_m);
341 diverged = (num_critical_rates >= num_critical_rates_max);
342 converged = (norm_du * omega <= ATOL);
343 }
344 m++;
345 } // end of while loop
346 if (errorCode == SOLVER_NO_ERROR) {
347 if (converged) {
348 if (options->verbose)
349 std::cout << "FCT_Solver::updateNL: iteration is completed." << std::endl;
350 errorCode = SOLVER_NO_ERROR;
351 } else if (diverged) {
352 if (options->verbose)
353 std::cout << "FCT_Solver::updateNL: divergence." << std::endl;
354 errorCode = SOLVER_DIVERGENCE;
355 } else if (max_m_reached) {
356 if (options->verbose)
357 std::cout << "FCT_Solver::updateNL: maximum number of iteration steps reached." << std::endl;
358 errorCode = SOLVER_MAXITER_REACHED;
359 }
360 }
361 return errorCode;
362 }
363
364
365 /*
366 * AntiDiffusionFlux:
367 *
368 * f_{ij} = (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])
369 *
370 * m=fc->mass matrix
371 * d=artificial diffusion matrix = L - K = - fc->iteration matrix - fc->transport matrix (away from main diagonal)
372 *
373 * for CN : theta = 0.5
374 * for BE : theta = 1.
375 */
376
377 void FCT_Solver::setAntiDiffusionFlux_CN(SystemMatrix_ptr flux_matrix)
378 {
379 const double* u = u_coupler->borrowLocalData();
380 const double* u_old = u_old_coupler->borrowLocalData();
381 const double* remote_u = u_coupler->borrowRemoteData();
382 const double* remote_u_old = u_old_coupler->borrowRemoteData();
383 const double dt_half = dt/2;
384 const_TransportProblem_ptr fct(transportproblem);
385 const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
386 const dim_t n = fct->iteration_matrix->getTotalNumRows();
387
388 #pragma omp parallel for
389 for (dim_t i = 0; i < n; ++i) {
390 const double u_i = u[i];
391 const double u_old_i = u_old[i];
392
393 #pragma ivdep
394 for (index_t iptr_ij = pattern->mainPattern->ptr[i];
395 iptr_ij < pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
396 const index_t j = pattern->mainPattern->index[iptr_ij];
397 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
398 // this is in fact -d_ij
399 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+
400 fct->iteration_matrix->mainBlock->val[iptr_ij];
401 const double u_old_j = u_old[j];
402 const double u_j = u[j];
403
404 // (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])
405 flux_matrix->mainBlock->val[iptr_ij] =
406 (m_ij+dt_half*d_ij)*(u_old_j-u_old_i) -
407 (m_ij-dt_half*d_ij)*(u_j-u_i);
408
409 }
410 #pragma ivdep
411 for (index_t iptr_ij = pattern->col_couplePattern->ptr[i];
412 iptr_ij < pattern->col_couplePattern->ptr[i+1]; iptr_ij++) {
413 const index_t j = pattern->col_couplePattern->index[iptr_ij];
414 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
415 // this is in fact -d_ij
416 const double d_ij =
417 fct->transport_matrix->col_coupleBlock->val[iptr_ij] +
418 fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
419 const double u_old_j = remote_u_old[j];
420 const double u_j = remote_u[j];
421 flux_matrix->col_coupleBlock->val[iptr_ij] =
422 (m_ij+dt_half*d_ij)*(u_old_j-u_old_i) -
423 (m_ij-dt_half*d_ij)*(u_j-u_i);
424 }
425 }
426 }
427
428 void FCT_Solver::setAntiDiffusionFlux_BE(SystemMatrix_ptr flux_matrix)
429 {
430 const double* u = u_coupler->borrowLocalData();
431 const double* u_old = u_old_coupler->borrowLocalData();
432 const double* remote_u = u_coupler->borrowRemoteData();
433 const double* remote_u_old = u_old_coupler->borrowRemoteData();
434 const_TransportProblem_ptr fct(transportproblem);
435 const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
436 const dim_t n = fct->iteration_matrix->getTotalNumRows();
437
438 #pragma omp parallel for
439 for (dim_t i = 0; i < n; ++i) {
440 const double u_i = u[i];
441 const double u_old_i = u_old[i];
442 #pragma ivdep
443 for (index_t iptr_ij = pattern->mainPattern->ptr[i];
444 iptr_ij < pattern->mainPattern->ptr[i+1]; iptr_ij++) {
445 const index_t j = pattern->mainPattern->index[iptr_ij];
446 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
447 // this is in fact -d_ij
448 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+
449 fct->iteration_matrix->mainBlock->val[iptr_ij];
450 const double u_old_j = u_old[j];
451 const double u_j = u[j];
452
453 flux_matrix->mainBlock->val[iptr_ij] =
454 m_ij*(u_old_j-u_old_i) - (m_ij-dt*d_ij)*(u_j-u_i);
455 }
456 #pragma ivdep
457 for (index_t iptr_ij = pattern->col_couplePattern->ptr[i];
458 iptr_ij < pattern->col_couplePattern->ptr[i+1]; iptr_ij++) {
459 const index_t j = pattern->col_couplePattern->index[iptr_ij];
460 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
461 // this is in fact -d_ij
462 const double d_ij =
463 fct->transport_matrix->col_coupleBlock->val[iptr_ij] +
464 fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
465 const double u_old_j = remote_u_old[j];
466 const double u_j = remote_u[j];
467
468 flux_matrix->col_coupleBlock->val[iptr_ij] =
469 m_ij*(u_old_j-u_old_i) - (m_ij-dt*d_ij)*(u_j-u_i);
470 }
471 }
472 }
473
474 /* special version of the ant-diffusive fluxes for the linear Crank-Nicolson
475 * scheme. In fact this is evaluated for u = 2*u_tilde - u_old which is the
476 * predictor of the solution of the the stabilised problem at time dt using
477 * the forward Euler scheme
478 *
479 * f_{ij} = (m_{ij} - dt/2 d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) (u[j]-u[i])
480 * = (m_{ij} - dt/2 d_{ij}) * (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) * ( 2*(u_tilde[j]-u_tilde[i]) - (u_old[j] -u_old [i]) )
481 * = 2* m_{ij} * ( (u_old[j]-u_tilde[j] - (u_old[i]) - u_tilde[i]) ) - dt d_{ij} * (u_tilde[j]-u_tilde[i])
482 *
483 */
484 void FCT_Solver::setAntiDiffusionFlux_linearCN(SystemMatrix_ptr flux_matrix)
485 {
486 const_Coupler_ptr u_tilde_coupler(flux_limiter->u_tilde_coupler);
487 const double* u_tilde = u_tilde_coupler->borrowLocalData();
488 const double* u_old = u_old_coupler->borrowLocalData();
489 const double* remote_u_tilde = u_tilde_coupler->borrowRemoteData();
490 const double* remote_u_old = u_old_coupler->borrowRemoteData();
491 const_TransportProblem_ptr fct(transportproblem);
492 const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
493 const dim_t n = fct->iteration_matrix->getTotalNumRows();
494
495 #pragma omp parallel for
496 for (dim_t i = 0; i < n; ++i) {
497 const double u_tilde_i = u_tilde[i];
498 const double u_old_i = u_old[i];
499 const double du_i = u_tilde_i - u_old_i;
500 #pragma ivdep
501 for (index_t iptr_ij = pattern->mainPattern->ptr[i];
502 iptr_ij < pattern->mainPattern->ptr[i+1]; iptr_ij++) {
503 const index_t j = pattern->mainPattern->index[iptr_ij];
504 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
505 // this is in fact -d_ij
506 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+
507 fct->iteration_matrix->mainBlock->val[iptr_ij];
508 const double u_tilde_j = u_tilde[j];
509 const double u_old_j = u_old[j];
510 const double du_j = u_tilde_j - u_old_j;
511
512 flux_matrix->mainBlock->val[iptr_ij] = 2 * m_ij * ( du_i - du_j ) -
513 dt * d_ij * ( u_tilde_i - u_tilde_j);
514 }
515 #pragma ivdep
516 for (index_t iptr_ij=pattern->col_couplePattern->ptr[i];
517 iptr_ij<pattern->col_couplePattern->ptr[i+1]; iptr_ij++) {
518
519 const index_t j = pattern->col_couplePattern->index[iptr_ij];
520 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
521 // this is in fact -d_ij
522 const double d_ij =
523 fct->transport_matrix->col_coupleBlock->val[iptr_ij] +
524 fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
525 const double u_tilde_j = remote_u_tilde[j];
526 const double u_old_j = remote_u_old[j];
527 const double du_j = u_tilde_j - u_old_j;
528
529 flux_matrix->col_coupleBlock->val[iptr_ij] =
530 2*m_ij * ( du_i - du_j ) - dt * d_ij * (u_tilde_i - u_tilde_j);
531 }
532 }
533 }
534
535 /****************************************************************************/
536
537 double FCT_Solver::getSafeTimeStepSize(TransportProblem_ptr fctp)
538 {
539 double dt_max = LARGE_POSITIVE_FLOAT;
540 const dim_t n = fctp->transport_matrix->getTotalNumRows();
541
542 // set low order transport operator
543 setLowOrderOperator(fctp);
544
545 if (Esys_noError()) {
546 // calculate time step size
547 dt_max = LARGE_POSITIVE_FLOAT;
548 #pragma omp parallel
549 {
550 double dt_max_loc = LARGE_POSITIVE_FLOAT;
551 #pragma omp for schedule(static)
552 for (dim_t i=0; i<n; ++i) {
553 const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
554 const double m_i = fctp->lumped_mass_matrix[i];
555 if (m_i > 0) {
556 if (l_ii<0)
557 dt_max_loc = std::min(dt_max_loc,m_i/(-l_ii));
558 }
559 }
560 #pragma omp critical
561 {
562 dt_max = std::min(dt_max,dt_max_loc);
563 }
564 }
565 #ifdef ESYS_MPI
566 double dt_max_loc = dt_max;
567 MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
568 #endif
569 if (dt_max < LARGE_POSITIVE_FLOAT)
570 dt_max *= 2.;
571 }
572 return dt_max;
573 }
574
575 /* Creates the low order transport matrix and stores its negative values
576 * into the iteration_matrix except for the main diagonal which is stored
577 * separately.
578 * If fc->iteration_matrix==NULL, fc->iteration_matrix is allocated
579 *
580 * a=transport_matrix
581 * b= low_order_transport_matrix = - iteration_matrix
582 * c=main diagonal low_order_transport_matrix
583 * initialise c[i] mit a[i,i]
584 *
585 * d_ij=max(0,-a[i,j],-a[j,i])
586 * b[i,j]=-(a[i,j]+d_ij)
587 * c[i]-=d_ij
588 */
589
590 void FCT_Solver::setLowOrderOperator(TransportProblem_ptr fc)
591 {
592 const index_t* main_iptr = fc->borrowMainDiagonalPointer();
593
594 if (!fc->iteration_matrix.get()) {
595 fc->iteration_matrix.reset(new SystemMatrix(
596 fc->transport_matrix->type, fc->transport_matrix->pattern,
597 fc->transport_matrix->row_block_size,
598 fc->transport_matrix->col_block_size, true));
599 }
600
601 const_SystemMatrixPattern_ptr pattern(fc->iteration_matrix->pattern);
602 const dim_t n = fc->iteration_matrix->getTotalNumRows();
603 #pragma omp parallel for
604 for (dim_t i = 0; i < n; ++i) {
605 double sum = fc->transport_matrix->mainBlock->val[main_iptr[i]];
606 //printf("sum[%d] = %e -> ", i, sum);
607
608 // look at a[i,j]
609 for (index_t iptr_ij=pattern->mainPattern->ptr[i];iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
610 const index_t j = pattern->mainPattern->index[iptr_ij];
611 const double rtmp1 = fc->transport_matrix->mainBlock->val[iptr_ij];
612 if (j != i) {
613 // find entry a[j,i]
614 #pragma ivdep
615 for (index_t iptr_ji=pattern->mainPattern->ptr[j]; iptr_ji<pattern->mainPattern->ptr[j+1]; ++iptr_ji) {
616
617 if (pattern->mainPattern->index[iptr_ji] == i) {
618 const double rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji];
619 //printf("a[%d,%d]=%e\n",i,j,rtmp1);
620 //printf("a[%d,%d]=%e\n",j,i,rtmp2);
621 const double d_ij=-MIN3(0.,rtmp1,rtmp2);
622 fc->iteration_matrix->mainBlock->val[iptr_ij]=-(rtmp1+d_ij);
623 //printf("l[%d,%d]=%e\n",i,j,fc->iteration_matrix->mainBlock->val[iptr_ij]);
624 sum-=d_ij;
625 break;
626 }
627 }
628 }
629 }
630 for (index_t iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
631 const index_t j = pattern->col_couplePattern->index[iptr_ij];
632 const double rtmp1 = fc->transport_matrix->col_coupleBlock->val[iptr_ij];
633 // find entry a[j,i]
634 #pragma ivdep
635 for (index_t iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_ji<pattern->row_couplePattern->ptr[j+1]; ++iptr_ji) {
636 if (pattern->row_couplePattern->index[iptr_ji]==i) {
637 const double rtmp2=fc->transport_matrix->row_coupleBlock->val[iptr_ji];
638 const double d_ij=-MIN3(0.,rtmp1,rtmp2);
639 fc->iteration_matrix->col_coupleBlock->val[iptr_ij]=-(rtmp1+d_ij);
640 fc->iteration_matrix->row_coupleBlock->val[iptr_ji]=-(rtmp2+d_ij);
641 sum-=d_ij;
642 break;
643 }
644 }
645 }
646 // set main diagonal entry
647 fc->main_diagonal_low_order_transport_matrix[i] = sum;
648 //printf("%e\n", sum);
649 }
650 }
651
652 /*
653 * out_i=m_i u_i + a * \sum_{j <> i} l_{ij} (u_j-u_i) where m_i>0
654 * = u_i where m_i<=0
655 *
656 */
657 void FCT_Solver::setMuPaLu(double* out, const_Coupler_ptr coupler, double a)
658 {
659 const_SystemMatrix_ptr L(transportproblem->iteration_matrix);
660 const double* M = transportproblem->lumped_mass_matrix;
661 const_SystemMatrixPattern_ptr pattern(L->pattern);
662 const double* u = coupler->borrowLocalData();
663 const double* remote_u = coupler->borrowRemoteData();
664 const dim_t n = L->getTotalNumRows();
665
666 #pragma omp parallel for
667 for (dim_t i = 0; i < n; ++i) {
668 if (M[i] > 0.) {
669 out[i] = M[i]*u[i];
670 } else {
671 out[i] = u[i];
672 }
673 }
674 if (std::abs(a) > 0) {
675 #pragma omp parallel for
676 for (dim_t i = 0; i < n; ++i) {
677 if (M[i] > 0.) {
678 double sum = 0;
679 const double u_i = u[i];
680 #pragma ivdep
681 for (index_t iptr_ij = pattern->mainPattern->ptr[i];
682 iptr_ij < pattern->mainPattern->ptr[i+1];
683 iptr_ij++) {
684 const index_t j = pattern->mainPattern->index[iptr_ij];
685 const double l_ij = L->mainBlock->val[iptr_ij];
686 sum += l_ij*(u[j]-u_i);
687 }
688 #pragma ivdep
689 for (index_t iptr_ij = pattern->col_couplePattern->ptr[i];
690 iptr_ij < pattern->col_couplePattern->ptr[i+1];
691 iptr_ij++) {
692 const index_t j=pattern->col_couplePattern->index[iptr_ij];
693 const double l_ij = L->col_coupleBlock->val[iptr_ij];
694 sum += l_ij*(remote_u[j]-u_i);
695 }
696 out[i] += a*sum;
697 }
698 }
699 }
700 }
701
702 } // namespace paso
703

Properties

Name Value
svn:mergeinfo /branches/amg_from_3530/paso/src/FCT_Solver.cpp:3531-3826 /branches/lapack2681/paso/src/FCT_Solver.cpp:2682-2741 /branches/pasowrap/paso/src/FCT_Solver.cpp:3661-3674 /branches/py3_attempt2/paso/src/FCT_Solver.cpp:3871-3891 /branches/restext/paso/src/FCT_Solver.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/FCT_Solver.cpp:3669-3791 /branches/stage3.0/paso/src/FCT_Solver.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/FCT_Solver.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/FCT_Solver.cpp:3517-3974 /release/3.0/paso/src/FCT_Solver.cpp:2591-2601 /trunk/paso/src/FCT_Solver.cpp:4257-4344 /trunk/ripley/test/python/paso/src/FCT_Solver.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26