/[escript]/trunk/paso/src/FCT_Solver.cpp
ViewVC logotype

Contents of /trunk/paso/src/FCT_Solver.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4934 - (show annotations)
Tue May 13 00:28:11 2014 UTC (6 years ago) by jfenwick
File size: 27904 byte(s)
This commit is brought to you by the number 4934 and the tool "meld".
Merge of partially complete split world code from branch.



1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17
18 /****************************************************************************/
19
20 /* Paso: Transport solver with flux correction (L is row sum zero)
21 *
22 * - Mv_t=Lv v(0)=u
23 *
24 * to return v(dt)
25 *
26 */
27 /****************************************************************************/
28
29 /* Author: l.gross@uq.edu.au */
30
31 /****************************************************************************/
32
33 #include "FCT_Solver.h"
34 #include "Preconditioner.h"
35 #include "PasoUtil.h"
36
37 namespace paso {
38
39 FCT_Solver::FCT_Solver(const_TransportProblem_ptr tp, Options* options) :
40 transportproblem(tp),
41 omega(0),
42 z(NULL),
43 du(NULL)
44 {
45 const dim_t blockSize = tp->getBlockSize();
46 const dim_t n = tp->getTotalNumRows();
47 mpi_info = tp->mpi_info;
48 flux_limiter = new FCT_FluxLimiter(tp);
49 b = new double[n];
50 if (options->ode_solver == PASO_CRANK_NICOLSON || options->ode_solver == PASO_BACKWARD_EULER) {
51 du = new double[n];
52 z = new double[n];
53 }
54 u_coupler.reset(new Coupler(tp->borrowConnector(), blockSize));
55 u_old_coupler.reset(new Coupler(tp->borrowConnector(), blockSize));
56
57 if (options->ode_solver == PASO_LINEAR_CRANK_NICOLSON) {
58 method = PASO_LINEAR_CRANK_NICOLSON;
59 } else if (options->ode_solver == PASO_CRANK_NICOLSON) {
60 method = PASO_CRANK_NICOLSON;
61 } else if (options->ode_solver == PASO_BACKWARD_EULER) {
62 method = PASO_BACKWARD_EULER;
63 } else {
64 Esys_setError(VALUE_ERROR, "FCT_Solver: unknown integration scheme.");
65 method = UNKNOWN;
66 }
67 }
68
69 FCT_Solver::~FCT_Solver()
70 {
71 delete flux_limiter;
72 delete[] b;
73 delete[] z;
74 delete[] du;
75 }
76
77 // modifies the main diagonal of the iteration matrix to introduce new dt
78 void FCT_Solver::initialize(double _dt, Options* options, Performance* pp)
79 {
80 const_TransportProblem_ptr fctp(transportproblem);
81 const index_t* main_iptr = fctp->borrowMainDiagonalPointer();
82 const dim_t n = fctp->transport_matrix->getTotalNumRows();
83 const double theta = getTheta();
84 omega = 1. / (_dt * theta);
85 dim_t i;
86 Options options2;
87
88 solve_free(fctp->iteration_matrix.get());
89 // fctp->iteration_matrix[i,i]=m[i]/(dt theta) -l[i,i]
90 dt = _dt;
91 #pragma omp parallel for private(i)
92 for (i = 0; i < n; ++i) {
93 const double m_i = fctp->lumped_mass_matrix[i];
94 const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
95 if ( m_i > 0 ) {
96 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m_i * omega - l_ii;
97 } else {
98 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = std::abs(m_i * omega - l_ii)/(EPSILON*EPSILON);
99 }
100 }
101
102 // allocate preconditioner/solver
103 options2.verbose = options->verbose;
104 if (method == PASO_LINEAR_CRANK_NICOLSON) {
105 options2.preconditioner = PASO_GS;
106 } else {
107 options2.preconditioner = PASO_JACOBI;
108 //options2.preconditioner = PASO_GS;
109 }
110 options2.use_local_preconditioner = false;
111 options2.sweeps = -1;
112
113 Performance_startMonitor(pp, PERFORMANCE_PRECONDITIONER_INIT);
114 fctp->iteration_matrix->setPreconditioner(&options2);
115 Performance_stopMonitor(pp, PERFORMANCE_PRECONDITIONER_INIT);
116 }
117
118 // entry point for update procedures
119 err_t FCT_Solver::update(double* u, double* u_old, Options* options, Performance* pp)
120 {
121 err_t err_out = SOLVER_NO_ERROR;
122
123 if (method == PASO_LINEAR_CRANK_NICOLSON) {
124 err_out = updateLCN(u, u_old, options, pp);
125 } else if (method == PASO_CRANK_NICOLSON) {
126 err_out = updateNL(u, u_old, options, pp);
127 } else if (method == PASO_BACKWARD_EULER) {
128 err_out = updateNL(u, u_old, options, pp);
129 } else {
130 err_out = SOLVER_INPUT_ERROR;
131 }
132 return err_out;
133 }
134
135 /// linear crank-nicolson update
136 err_t FCT_Solver::updateLCN(double* u, double* u_old, Options* options, Performance* pp)
137 {
138 dim_t sweep_max, i;
139 double const RTOL = options->tolerance;
140 const dim_t n = transportproblem->getTotalNumRows();
141 SystemMatrix_ptr iteration_matrix(transportproblem->iteration_matrix);
142 const index_t* main_iptr = transportproblem->borrowMainDiagonalPointer();
143 err_t errorCode = SOLVER_NO_ERROR;
144 double norm_u_tilde;
145
146 u_old_coupler->startCollect(u_old);
147 u_old_coupler->finishCollect();
148
149 // b[i]=m*u_tilde[i] = m u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i])
150 // = u_tilde[i] = u_old[i] where constraint m<0.
151 // note that iteration_matrix stores the negative values of the
152 // low order transport matrix l. Therefore a=-dt*0.5 is used.
153
154 setMuPaLu(b, u_old_coupler, -dt*0.5);
155 /* solve for u_tilde : u_tilda = m^{-1} * b */
156 flux_limiter->setU_tilde(b);
157 // u_tilde_connector is completed
158
159 // calculate anti-diffusive fluxes for u_tilde
160 setAntiDiffusionFlux_linearCN(flux_limiter->antidiffusive_fluxes);
161
162 /* b_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
163 flux_limiter->addLimitedFluxes_Start();
164 flux_limiter->addLimitedFluxes_Complete(b);
165
166 util::scale(n, b, omega);
167 // solve (m-dt/2*L) u = b in the form (omega*m-L) u = b * omega with omega*dt/2=1
168 #pragma omp for private(i) schedule(static)
169 for (i = 0; i < n; ++i) {
170 if (!(transportproblem->lumped_mass_matrix[i] > 0)) {
171 b[i] = flux_limiter->u_tilde[i]
172 * transportproblem->iteration_matrix->mainBlock->val[main_iptr[i]];
173 }
174 }
175 // initial guess is u<- -u + 2*u_tilde
176 util::update(n, -1., u, 2., flux_limiter->u_tilde);
177
178 sweep_max = std::max((int) (- 2 * log(RTOL)/log(2.)-0.5),1);
179 norm_u_tilde = util::lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
180 if (options->verbose) {
181 printf("FCT_Solver::updateLCN: u_tilde lsup = %e (rtol = %e, max. sweeps = %d)\n", norm_u_tilde, RTOL*norm_u_tilde, sweep_max);
182 }
183 errorCode = Preconditioner_Smoother_solve_byTolerance(iteration_matrix,
184 ((Preconditioner*)(iteration_matrix->solver_p))->gs, u, b, RTOL,
185 &sweep_max, true);
186 if (errorCode == PRECONDITIONER_NO_ERROR) {
187 if (options->verbose) printf("FCT_Solver::updateLCN: convergence after %d Gauss-Seidel steps.\n",sweep_max);
188 errorCode = SOLVER_NO_ERROR;
189 } else {
190 if (options->verbose) printf("FCT_Solver::updateLCN: Gauss-Seidel failed within %d stesp (rel. tolerance %e).\n",sweep_max,RTOL);
191 errorCode = SOLVER_MAXITER_REACHED;
192 }
193 return errorCode;
194 }
195
196 err_t FCT_Solver::updateNL(double* u, double* u_old, Options* options,
197 Performance* pp)
198 {
199 // number of rates >=critical_rate accepted before divergence is triggered
200 const dim_t num_critical_rates_max = 3;
201 // expected value of convergence rate
202 const double critical_rate = 0.95;
203
204 const_TransportProblem_ptr fctp(transportproblem);
205 dim_t i;
206 double norm_u_tilde, norm_du=LARGE_POSITIVE_FLOAT, norm_du_old, rate=1.;
207 const dim_t n = fctp->transport_matrix->getTotalNumRows();
208 const double atol = options->absolute_tolerance;
209 const double rtol = options->tolerance;
210 const dim_t max_m = options->iter_max;
211 dim_t m = 0, num_critical_rates = 0;
212 err_t errorCode = SOLVER_NO_ERROR;
213 bool converged=false, max_m_reached=false, diverged=false;
214 /* //////////////////////////////////////////////////////////////////// */
215
216 options->num_iter=0;
217 u_old_coupler->startCollect(u_old);
218 u_old_coupler->finishCollect();
219 // prepare u_tilde and flux limiter
220 if (method == PASO_BACKWARD_EULER) {
221 // b[i]=m_i* u_old[i]
222 #pragma omp for private(i) schedule(static)
223 for (i = 0; i < n; ++i) {
224 if (fctp->lumped_mass_matrix[i] > 0 ) {
225 b[i]=u_old[i]* fctp->lumped_mass_matrix[i];
226 } else {
227 b[i]=u_old[i];
228 }
229 }
230 } else {
231 /* b[i]=m_i* u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i]) = m_i * u_tilde_i where m_i>0
232 * = u_old[i] otherwise
233 * note that iteration_matrix stores the negative values of the
234 * low order transport matrix l. Therefore a=-dt*0.5 is used. */
235 setMuPaLu(b, u_old_coupler, -dt*0.5);
236 }
237 flux_limiter->setU_tilde(b); // u_tilde = m^{-1} b */
238 // u_tilde_connector is completed
239 /************************************************************************/
240 // calculate stopping criterion
241 norm_u_tilde=util::lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
242 const double ATOL = rtol * norm_u_tilde + atol;
243 if (options->verbose)
244 printf("FCT_Solver::updateNL: iteration starts u_tilde lsup = %e "
245 "(abs. tol = %e)\n", norm_u_tilde, ATOL);
246
247 // u_old is an initial guess for u
248 util::copy(n, u, u_old);
249
250 while (!converged && !diverged && !max_m_reached && Esys_noError()) {
251 u_coupler->startCollect(u);
252 u_coupler->finishCollect();
253
254 // set antidiffusive_flux_{ij} for u
255 if (method == PASO_BACKWARD_EULER) {
256 setAntiDiffusionFlux_BE(flux_limiter->antidiffusive_fluxes);
257 } else {
258 setAntiDiffusionFlux_CN(flux_limiter->antidiffusive_fluxes);
259 }
260 // start the calculation of the limitation factors_{fct_solver->ij}
261 flux_limiter->addLimitedFluxes_Start(); // uses u_tilde
262
263 /*
264 * z_m[i]=b[i] - (m_i*u[i] - omega*sum_{j<>i} l_{ij} (u[j]-u[i]) ) where m_i>0
265 * ==b[i] - u[i] = u_tilda[i]-u[i] =0 otherwise
266 *
267 * omega = dt/2 or dt .
268 *
269 * note that iteration_matrix stores the negative values of the
270 * low order transport matrix l. Therefore a=dt*theta is used.
271 */
272 if (method == PASO_BACKWARD_EULER) {
273 setMuPaLu(z, u_coupler, dt);
274 } else {
275 setMuPaLu(z, u_coupler, dt/2);
276 }
277
278 util::update(n, -1., z, 1., b); // z=b-z
279
280 // z_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij}
281 flux_limiter->addLimitedFluxes_Complete(z);
282
283 // we solve (m/omega - L ) * du = z
284 if (method == PASO_BACKWARD_EULER) {
285 dim_t cntIter = options->iter_max;
286 double tol= util::l2(n, z, fctp->mpi_info) ;
287
288 if (m==0) {
289 tol *= 0.5;
290 } else {
291 tol *= std::min(std::max(rate*rate, 1e-2), 0.5);
292 }
293 // use BiCGStab with Jacobi preconditioner ( m - omega * L )
294 util::zeroes(n,du);
295 errorCode = Solver_BiCGStab(fctp->iteration_matrix, z, du, &cntIter, &tol, pp);
296
297 // errorCode = Solver_GMRES(fctp->iteration_matrix, z, du, &cntIter, &tol, 10, 2000, pp);
298 if (options->verbose)
299 printf("FCT_Solver::updateNL: BiCGStab completed after %d steps (residual =%e).\n",cntIter, tol);
300 options->num_iter += cntIter;
301 if (errorCode != SOLVER_NO_ERROR) break;
302 } else {
303 // just use the main diagonal of (m/omega - L )
304
305 Preconditioner_Smoother_solve(fctp->iteration_matrix,
306 ((Preconditioner*) (fctp->iteration_matrix->solver_p))->jacobi,
307 du, z, 1, false);
308
309 options->num_iter++;
310 }
311
312 util::update(n, 1., u, omega, du);
313 norm_du_old = norm_du;
314 norm_du = util::lsup(n, du, fctp->mpi_info);
315 if (m == 0) {
316 if (options->verbose)
317 printf("FCT_Solver::updateNL: step %d: increment= %e\n", m+1,
318 norm_du * omega);
319 } else {
320 if (norm_du_old > 0.) {
321 rate = norm_du/norm_du_old;
322 } else if (norm_du <= 0.) {
323 rate = 0.;
324 } else {
325 rate = LARGE_POSITIVE_FLOAT;
326 }
327 if (options->verbose)
328 printf("FCT_Solver::updateNL: step %d: increment= %e (rate = %e)\n",m+1, norm_du * omega, rate);
329 num_critical_rates += (rate<critical_rate ? 0 : 1);
330 max_m_reached = (m>max_m);
331 diverged = (num_critical_rates >= num_critical_rates_max);
332 converged = (norm_du * omega <= ATOL);
333 }
334 m++;
335 } // end of while loop
336 if (errorCode == SOLVER_NO_ERROR) {
337 if (converged) {
338 if (options->verbose)
339 printf("FCT_Solver::updateNL: iteration is completed.\n");
340 errorCode = SOLVER_NO_ERROR;
341 } else if (diverged) {
342 if (options->verbose)
343 printf("FCT_Solver::updateNL: divergence.\n");
344 errorCode = SOLVER_DIVERGENCE;
345 } else if (max_m_reached) {
346 if (options->verbose)
347 printf("FCT_Solver::updateNL: maximum number of iteration steps reached.\n");
348 errorCode = SOLVER_MAXITER_REACHED;
349 }
350 }
351 return errorCode;
352 }
353
354
355 /*
356 * AntiDiffusionFlux:
357 *
358 * f_{ij} = (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])
359 *
360 * m=fc->mass matrix
361 * d=artificial diffusion matrix = L - K = - fc->iteration matrix - fc->transport matrix (away from main diagonal)
362 *
363 * for CN : theta = 0.5
364 * for BE : theta = 1.
365 */
366
367 void FCT_Solver::setAntiDiffusionFlux_CN(SystemMatrix_ptr flux_matrix)
368 {
369 const double* u = u_coupler->borrowLocalData();
370 const double* u_old = u_old_coupler->borrowLocalData();
371 const double* remote_u = u_coupler->borrowRemoteData();
372 const double* remote_u_old = u_old_coupler->borrowRemoteData();
373 const double dt_half = dt/2;
374 const_TransportProblem_ptr fct(transportproblem);
375 const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
376 const dim_t n = fct->iteration_matrix->getTotalNumRows();
377
378 #pragma omp parallel for
379 for (dim_t i = 0; i < n; ++i) {
380 const double u_i = u[i];
381 const double u_old_i = u_old[i];
382
383 #pragma ivdep
384 for (index_t iptr_ij = pattern->mainPattern->ptr[i];
385 iptr_ij < pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
386 const index_t j = pattern->mainPattern->index[iptr_ij];
387 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
388 // this is in fact -d_ij
389 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+
390 fct->iteration_matrix->mainBlock->val[iptr_ij];
391 const double u_old_j = u_old[j];
392 const double u_j = u[j];
393
394 // (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])
395 flux_matrix->mainBlock->val[iptr_ij] =
396 (m_ij+dt_half*d_ij)*(u_old_j-u_old_i) -
397 (m_ij-dt_half*d_ij)*(u_j-u_i);
398
399 }
400 #pragma ivdep
401 for (index_t iptr_ij = pattern->col_couplePattern->ptr[i];
402 iptr_ij < pattern->col_couplePattern->ptr[i+1]; iptr_ij++) {
403 const index_t j = pattern->col_couplePattern->index[iptr_ij];
404 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
405 // this is in fact -d_ij
406 const double d_ij =
407 fct->transport_matrix->col_coupleBlock->val[iptr_ij] +
408 fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
409 const double u_old_j = remote_u_old[j];
410 const double u_j = remote_u[j];
411 flux_matrix->col_coupleBlock->val[iptr_ij] =
412 (m_ij+dt_half*d_ij)*(u_old_j-u_old_i) -
413 (m_ij-dt_half*d_ij)*(u_j-u_i);
414 }
415 }
416 }
417
418 void FCT_Solver::setAntiDiffusionFlux_BE(SystemMatrix_ptr flux_matrix)
419 {
420 const double* u = u_coupler->borrowLocalData();
421 const double* u_old = u_old_coupler->borrowLocalData();
422 const double* remote_u = u_coupler->borrowRemoteData();
423 const double* remote_u_old = u_old_coupler->borrowRemoteData();
424 const_TransportProblem_ptr fct(transportproblem);
425 const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
426 const dim_t n = fct->iteration_matrix->getTotalNumRows();
427
428 #pragma omp parallel for
429 for (dim_t i = 0; i < n; ++i) {
430 const double u_i = u[i];
431 const double u_old_i = u_old[i];
432 #pragma ivdep
433 for (index_t iptr_ij = pattern->mainPattern->ptr[i];
434 iptr_ij < pattern->mainPattern->ptr[i+1]; iptr_ij++) {
435 const index_t j = pattern->mainPattern->index[iptr_ij];
436 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
437 // this is in fact -d_ij
438 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+
439 fct->iteration_matrix->mainBlock->val[iptr_ij];
440 const double u_old_j = u_old[j];
441 const double u_j = u[j];
442
443 flux_matrix->mainBlock->val[iptr_ij] =
444 m_ij*(u_old_j-u_old_i) - (m_ij-dt*d_ij)*(u_j-u_i);
445 }
446 #pragma ivdep
447 for (index_t iptr_ij = pattern->col_couplePattern->ptr[i];
448 iptr_ij < pattern->col_couplePattern->ptr[i+1]; iptr_ij++) {
449 const index_t j = pattern->col_couplePattern->index[iptr_ij];
450 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
451 // this is in fact -d_ij
452 const double d_ij =
453 fct->transport_matrix->col_coupleBlock->val[iptr_ij] +
454 fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
455 const double u_old_j = remote_u_old[j];
456 const double u_j = remote_u[j];
457
458 flux_matrix->col_coupleBlock->val[iptr_ij] =
459 m_ij*(u_old_j-u_old_i) - (m_ij-dt*d_ij)*(u_j-u_i);
460 }
461 }
462 }
463
464 /* special version of the ant-diffusive fluxes for the linear Crank-Nicolson
465 * scheme. In fact this is evaluated for u = 2*u_tilde - u_old which is the
466 * predictor of the solution of the the stabilised problem at time dt using
467 * the forward Euler scheme
468 *
469 * f_{ij} = (m_{ij} - dt/2 d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) (u[j]-u[i])
470 * = (m_{ij} - dt/2 d_{ij}) * (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) * ( 2*(u_tilde[j]-u_tilde[i]) - (u_old[j] -u_old [i]) )
471 * = 2* m_{ij} * ( (u_old[j]-u_tilde[j] - (u_old[i]) - u_tilde[i]) ) - dt d_{ij} * (u_tilde[j]-u_tilde[i])
472 *
473 */
474 void FCT_Solver::setAntiDiffusionFlux_linearCN(SystemMatrix_ptr flux_matrix)
475 {
476 const_Coupler_ptr u_tilde_coupler(flux_limiter->u_tilde_coupler);
477 const double* u_tilde = u_tilde_coupler->borrowLocalData();
478 const double* u_old = u_old_coupler->borrowLocalData();
479 const double* remote_u_tilde = u_tilde_coupler->borrowRemoteData();
480 const double* remote_u_old = u_old_coupler->borrowRemoteData();
481 const_TransportProblem_ptr fct(transportproblem);
482 const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
483 const dim_t n = fct->iteration_matrix->getTotalNumRows();
484
485 #pragma omp parallel for
486 for (dim_t i = 0; i < n; ++i) {
487 const double u_tilde_i = u_tilde[i];
488 const double u_old_i = u_old[i];
489 const double du_i = u_tilde_i - u_old_i;
490 #pragma ivdep
491 for (index_t iptr_ij = pattern->mainPattern->ptr[i];
492 iptr_ij < pattern->mainPattern->ptr[i+1]; iptr_ij++) {
493 const index_t j = pattern->mainPattern->index[iptr_ij];
494 const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
495 // this is in fact -d_ij
496 const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+
497 fct->iteration_matrix->mainBlock->val[iptr_ij];
498 const double u_tilde_j = u_tilde[j];
499 const double u_old_j = u_old[j];
500 const double du_j = u_tilde_j - u_old_j;
501
502 flux_matrix->mainBlock->val[iptr_ij] = 2 * m_ij * ( du_i - du_j ) -
503 dt * d_ij * ( u_tilde_i - u_tilde_j);
504 }
505 #pragma ivdep
506 for (index_t iptr_ij=pattern->col_couplePattern->ptr[i];
507 iptr_ij<pattern->col_couplePattern->ptr[i+1]; iptr_ij++) {
508
509 const index_t j = pattern->col_couplePattern->index[iptr_ij];
510 const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
511 // this is in fact -d_ij
512 const double d_ij =
513 fct->transport_matrix->col_coupleBlock->val[iptr_ij] +
514 fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
515 const double u_tilde_j = remote_u_tilde[j];
516 const double u_old_j = remote_u_old[j];
517 const double du_j = u_tilde_j - u_old_j;
518
519 flux_matrix->col_coupleBlock->val[iptr_ij] =
520 2*m_ij * ( du_i - du_j ) - dt * d_ij * (u_tilde_i - u_tilde_j);
521 }
522 }
523 }
524
525 /****************************************************************************/
526
527 double FCT_Solver::getSafeTimeStepSize(TransportProblem_ptr fctp)
528 {
529 double dt_max = LARGE_POSITIVE_FLOAT;
530 const dim_t n = fctp->transport_matrix->getTotalNumRows();
531
532 // set low order transport operator
533 setLowOrderOperator(fctp);
534
535 if (Esys_noError()) {
536 // calculate time step size
537 dt_max = LARGE_POSITIVE_FLOAT;
538 #pragma omp parallel
539 {
540 double dt_max_loc = LARGE_POSITIVE_FLOAT;
541 #pragma omp for schedule(static)
542 for (dim_t i=0; i<n; ++i) {
543 const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
544 const double m_i = fctp->lumped_mass_matrix[i];
545 if (m_i > 0) {
546 if (l_ii<0)
547 dt_max_loc = std::min(dt_max_loc,m_i/(-l_ii));
548 }
549 }
550 #pragma omp critical
551 {
552 dt_max = std::min(dt_max,dt_max_loc);
553 }
554 }
555 #ifdef ESYS_MPI
556 double dt_max_loc = dt_max;
557 MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
558 #endif
559 if (dt_max < LARGE_POSITIVE_FLOAT)
560 dt_max *= 2.;
561 }
562 return dt_max;
563 }
564
565 /* Creates the low order transport matrix and stores its negative values
566 * into the iteration_matrix except for the main diagonal which is stored
567 * separately.
568 * If fc->iteration_matrix==NULL, fc->iteration_matrix is allocated
569 *
570 * a=transport_matrix
571 * b= low_order_transport_matrix = - iteration_matrix
572 * c=main diagonal low_order_transport_matrix
573 * initialise c[i] mit a[i,i]
574 *
575 * d_ij=max(0,-a[i,j],-a[j,i])
576 * b[i,j]=-(a[i,j]+d_ij)
577 * c[i]-=d_ij
578 */
579
580 void FCT_Solver::setLowOrderOperator(TransportProblem_ptr fc)
581 {
582 const index_t* main_iptr = fc->borrowMainDiagonalPointer();
583
584 if (!fc->iteration_matrix.get()) {
585 fc->iteration_matrix.reset(new SystemMatrix(
586 fc->transport_matrix->type, fc->transport_matrix->pattern,
587 fc->transport_matrix->row_block_size,
588 fc->transport_matrix->col_block_size, true));
589 }
590
591 const_SystemMatrixPattern_ptr pattern(fc->iteration_matrix->pattern);
592 const dim_t n = fc->iteration_matrix->getTotalNumRows();
593 #pragma omp parallel for
594 for (dim_t i = 0; i < n; ++i) {
595 double sum = fc->transport_matrix->mainBlock->val[main_iptr[i]];
596 //printf("sum[%d] = %e -> ", i, sum);
597
598 // look at a[i,j]
599 for (index_t iptr_ij=pattern->mainPattern->ptr[i];iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
600 const index_t j = pattern->mainPattern->index[iptr_ij];
601 const double rtmp1 = fc->transport_matrix->mainBlock->val[iptr_ij];
602 if (j != i) {
603 // find entry a[j,i]
604 #pragma ivdep
605 for (index_t iptr_ji=pattern->mainPattern->ptr[j]; iptr_ji<pattern->mainPattern->ptr[j+1]; ++iptr_ji) {
606
607 if (pattern->mainPattern->index[iptr_ji] == i) {
608 const double rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji];
609 //printf("a[%d,%d]=%e\n",i,j,rtmp1);
610 //printf("a[%d,%d]=%e\n",j,i,rtmp2);
611 const double d_ij=-MIN3(0.,rtmp1,rtmp2);
612 fc->iteration_matrix->mainBlock->val[iptr_ij]=-(rtmp1+d_ij);
613 //printf("l[%d,%d]=%e\n",i,j,fc->iteration_matrix->mainBlock->val[iptr_ij]);
614 sum-=d_ij;
615 break;
616 }
617 }
618 }
619 }
620 for (index_t iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
621 const index_t j = pattern->col_couplePattern->index[iptr_ij];
622 const double rtmp1 = fc->transport_matrix->col_coupleBlock->val[iptr_ij];
623 // find entry a[j,i]
624 #pragma ivdep
625 for (index_t iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_ji<pattern->row_couplePattern->ptr[j+1]; ++iptr_ji) {
626 if (pattern->row_couplePattern->index[iptr_ji]==i) {
627 const double rtmp2=fc->transport_matrix->row_coupleBlock->val[iptr_ji];
628 const double d_ij=-MIN3(0.,rtmp1,rtmp2);
629 fc->iteration_matrix->col_coupleBlock->val[iptr_ij]=-(rtmp1+d_ij);
630 fc->iteration_matrix->row_coupleBlock->val[iptr_ji]=-(rtmp2+d_ij);
631 sum-=d_ij;
632 break;
633 }
634 }
635 }
636 // set main diagonal entry
637 fc->main_diagonal_low_order_transport_matrix[i] = sum;
638 //printf("%e\n", sum);
639 }
640 }
641
642 /*
643 * out_i=m_i u_i + a * \sum_{j <> i} l_{ij} (u_j-u_i) where m_i>0
644 * = u_i where m_i<=0
645 *
646 */
647 void FCT_Solver::setMuPaLu(double* out, const_Coupler_ptr coupler, double a)
648 {
649 const_SystemMatrix_ptr L(transportproblem->iteration_matrix);
650 const double* M = transportproblem->lumped_mass_matrix;
651 const_SystemMatrixPattern_ptr pattern(L->pattern);
652 const double* u = coupler->borrowLocalData();
653 const double* remote_u = coupler->borrowRemoteData();
654 const dim_t n = L->getTotalNumRows();
655
656 #pragma omp parallel for
657 for (dim_t i = 0; i < n; ++i) {
658 if (M[i] > 0.) {
659 out[i] = M[i]*u[i];
660 } else {
661 out[i] = u[i];
662 }
663 }
664 if (std::abs(a) > 0) {
665 #pragma omp parallel for
666 for (dim_t i = 0; i < n; ++i) {
667 if (M[i] > 0.) {
668 double sum = 0;
669 const double u_i = u[i];
670 #pragma ivdep
671 for (index_t iptr_ij = pattern->mainPattern->ptr[i];
672 iptr_ij < pattern->mainPattern->ptr[i+1];
673 iptr_ij++) {
674 const index_t j = pattern->mainPattern->index[iptr_ij];
675 const double l_ij = L->mainBlock->val[iptr_ij];
676 sum += l_ij*(u[j]-u_i);
677 }
678 #pragma ivdep
679 for (index_t iptr_ij = pattern->col_couplePattern->ptr[i];
680 iptr_ij < pattern->col_couplePattern->ptr[i+1];
681 iptr_ij++) {
682 const index_t j=pattern->col_couplePattern->index[iptr_ij];
683 const double l_ij = L->col_coupleBlock->val[iptr_ij];
684 sum += l_ij*(remote_u[j]-u_i);
685 }
686 out[i] += a*sum;
687 }
688 }
689 }
690 }
691
692 } // namespace paso
693

Properties

Name Value
svn:mergeinfo /branches/amg_from_3530/paso/src/FCT_Solver.cpp:3531-3826 /branches/lapack2681/paso/src/FCT_Solver.cpp:2682-2741 /branches/pasowrap/paso/src/FCT_Solver.cpp:3661-3674 /branches/py3_attempt2/paso/src/FCT_Solver.cpp:3871-3891 /branches/restext/paso/src/FCT_Solver.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/FCT_Solver.cpp:3669-3791 /branches/stage3.0/paso/src/FCT_Solver.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/FCT_Solver.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/FCT_Solver.cpp:3517-3974 /release/3.0/paso/src/FCT_Solver.cpp:2591-2601 /trunk/paso/src/FCT_Solver.cpp:4257-4344 /trunk/ripley/test/python/paso/src/FCT_Solver.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26