/[escript]/trunk/paso/src/FCT_Solver.cpp
ViewVC logotype

Annotation of /trunk/paso/src/FCT_Solver.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4934 - (hide annotations)
Tue May 13 00:28:11 2014 UTC (6 years ago) by jfenwick
File size: 27904 byte(s)
This commit is brought to you by the number 4934 and the tool "meld".
Merge of partially complete split world code from branch.



1 gross 3793
2 jfenwick 3981 /*****************************************************************************
3 gross 3793 *
4 jfenwick 4657 * Copyright (c) 2003-2014 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 gross 3793 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 gross 3793
17    
18 caltinay 4843 /****************************************************************************/
19 gross 3793
20 caltinay 4843 /* Paso: Transport solver with flux correction (L is row sum zero)
21 gross 3793 *
22     * - Mv_t=Lv v(0)=u
23     *
24     * to return v(dt)
25     *
26     */
27 caltinay 4843 /****************************************************************************/
28 gross 3793
29     /* Author: l.gross@uq.edu.au */
30    
31 caltinay 4843 /****************************************************************************/
32 gross 3793
33     #include "FCT_Solver.h"
34     #include "Preconditioner.h"
35     #include "PasoUtil.h"
36    
37 caltinay 4867 namespace paso {
38 gross 3793
39 caltinay 4867 FCT_Solver::FCT_Solver(const_TransportProblem_ptr tp, Options* options) :
40     transportproblem(tp),
41     omega(0),
42     z(NULL),
43     du(NULL)
44 gross 3793 {
45 caltinay 4867 const dim_t blockSize = tp->getBlockSize();
46     const dim_t n = tp->getTotalNumRows();
47 jfenwick 4934 mpi_info = tp->mpi_info;
48 caltinay 4867 flux_limiter = new FCT_FluxLimiter(tp);
49     b = new double[n];
50     if (options->ode_solver == PASO_CRANK_NICOLSON || options->ode_solver == PASO_BACKWARD_EULER) {
51     du = new double[n];
52     z = new double[n];
53 caltinay 4843 }
54 caltinay 4867 u_coupler.reset(new Coupler(tp->borrowConnector(), blockSize));
55     u_old_coupler.reset(new Coupler(tp->borrowConnector(), blockSize));
56 caltinay 4843
57 caltinay 4867 if (options->ode_solver == PASO_LINEAR_CRANK_NICOLSON) {
58     method = PASO_LINEAR_CRANK_NICOLSON;
59     } else if (options->ode_solver == PASO_CRANK_NICOLSON) {
60     method = PASO_CRANK_NICOLSON;
61     } else if (options->ode_solver == PASO_BACKWARD_EULER) {
62     method = PASO_BACKWARD_EULER;
63 caltinay 4843 } else {
64 caltinay 4867 Esys_setError(VALUE_ERROR, "FCT_Solver: unknown integration scheme.");
65     method = UNKNOWN;
66 caltinay 4843 }
67 gross 3793 }
68    
69 caltinay 4867 FCT_Solver::~FCT_Solver()
70 gross 3793 {
71 caltinay 4867 delete flux_limiter;
72     delete[] b;
73     delete[] z;
74     delete[] du;
75 gross 3793 }
76    
77 caltinay 4867 // modifies the main diagonal of the iteration matrix to introduce new dt
78 caltinay 4869 void FCT_Solver::initialize(double _dt, Options* options, Performance* pp)
79 gross 3793 {
80 caltinay 4867 const_TransportProblem_ptr fctp(transportproblem);
81     const index_t* main_iptr = fctp->borrowMainDiagonalPointer();
82     const dim_t n = fctp->transport_matrix->getTotalNumRows();
83     const double theta = getTheta();
84     omega = 1. / (_dt * theta);
85     dim_t i;
86     Options options2;
87 caltinay 4843
88 caltinay 4869 solve_free(fctp->iteration_matrix.get());
89 caltinay 4867 // fctp->iteration_matrix[i,i]=m[i]/(dt theta) -l[i,i]
90     dt = _dt;
91 gross 3793 #pragma omp parallel for private(i)
92     for (i = 0; i < n; ++i) {
93 caltinay 4867 const double m_i = fctp->lumped_mass_matrix[i];
94     const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
95     if ( m_i > 0 ) {
96     fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m_i * omega - l_ii;
97     } else {
98 caltinay 4869 fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = std::abs(m_i * omega - l_ii)/(EPSILON*EPSILON);
99 caltinay 4867 }
100 gross 3793 }
101 caltinay 4843
102 caltinay 4867 // allocate preconditioner/solver
103 gross 3793 options2.verbose = options->verbose;
104 caltinay 4867 if (method == PASO_LINEAR_CRANK_NICOLSON) {
105 gross 3793 options2.preconditioner = PASO_GS;
106 caltinay 4867 } else {
107 caltinay 4843 options2.preconditioner = PASO_JACOBI;
108 caltinay 4867 //options2.preconditioner = PASO_GS;
109 gross 3793 }
110 caltinay 4867 options2.use_local_preconditioner = false;
111     options2.sweeps = -1;
112 caltinay 4843
113 caltinay 4869 Performance_startMonitor(pp, PERFORMANCE_PRECONDITIONER_INIT);
114 caltinay 4836 fctp->iteration_matrix->setPreconditioner(&options2);
115 caltinay 4869 Performance_stopMonitor(pp, PERFORMANCE_PRECONDITIONER_INIT);
116 gross 3793 }
117    
118 caltinay 4867 // entry point for update procedures
119 caltinay 4869 err_t FCT_Solver::update(double* u, double* u_old, Options* options, Performance* pp)
120 caltinay 4843 {
121 gross 3793 err_t err_out = SOLVER_NO_ERROR;
122 caltinay 4843
123 gross 3793 if (method == PASO_LINEAR_CRANK_NICOLSON) {
124 caltinay 4867 err_out = updateLCN(u, u_old, options, pp);
125 gross 3793 } else if (method == PASO_CRANK_NICOLSON) {
126 caltinay 4867 err_out = updateNL(u, u_old, options, pp);
127 gross 3793 } else if (method == PASO_BACKWARD_EULER) {
128 caltinay 4867 err_out = updateNL(u, u_old, options, pp);
129 gross 3793 } else {
130     err_out = SOLVER_INPUT_ERROR;
131     }
132     return err_out;
133     }
134    
135 caltinay 4867 /// linear crank-nicolson update
136 caltinay 4869 err_t FCT_Solver::updateLCN(double* u, double* u_old, Options* options, Performance* pp)
137 gross 3793 {
138 gross 3823 dim_t sweep_max, i;
139 gross 3793 double const RTOL = options->tolerance;
140 caltinay 4867 const dim_t n = transportproblem->getTotalNumRows();
141     SystemMatrix_ptr iteration_matrix(transportproblem->iteration_matrix);
142     const index_t* main_iptr = transportproblem->borrowMainDiagonalPointer();
143 gross 3793 err_t errorCode = SOLVER_NO_ERROR;
144 gross 3823 double norm_u_tilde;
145 caltinay 4843
146 caltinay 4867 u_old_coupler->startCollect(u_old);
147     u_old_coupler->finishCollect();
148 caltinay 4843
149 caltinay 4867 // b[i]=m*u_tilde[i] = m u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i])
150     // = u_tilde[i] = u_old[i] where constraint m<0.
151     // note that iteration_matrix stores the negative values of the
152     // low order transport matrix l. Therefore a=-dt*0.5 is used.
153 caltinay 4843
154 caltinay 4867 setMuPaLu(b, u_old_coupler, -dt*0.5);
155 caltinay 4843 /* solve for u_tilde : u_tilda = m^{-1} * b */
156 caltinay 4867 flux_limiter->setU_tilde(b);
157     // u_tilde_connector is completed
158 caltinay 4843
159 caltinay 4867 // calculate anti-diffusive fluxes for u_tilde
160     setAntiDiffusionFlux_linearCN(flux_limiter->antidiffusive_fluxes);
161 gross 3807
162 caltinay 4867 /* b_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
163     flux_limiter->addLimitedFluxes_Start();
164     flux_limiter->addLimitedFluxes_Complete(b);
165 gross 3807
166 caltinay 4867 util::scale(n, b, omega);
167     // solve (m-dt/2*L) u = b in the form (omega*m-L) u = b * omega with omega*dt/2=1
168 caltinay 4843 #pragma omp for private(i) schedule(static)
169 caltinay 4867 for (i = 0; i < n; ++i) {
170     if (!(transportproblem->lumped_mass_matrix[i] > 0)) {
171     b[i] = flux_limiter->u_tilde[i]
172     * transportproblem->iteration_matrix->mainBlock->val[main_iptr[i]];
173 caltinay 4843 }
174 caltinay 4867 }
175     // initial guess is u<- -u + 2*u_tilde
176     util::update(n, -1., u, 2., flux_limiter->u_tilde);
177 gross 3823
178 caltinay 4869 sweep_max = std::max((int) (- 2 * log(RTOL)/log(2.)-0.5),1);
179 caltinay 4867 norm_u_tilde = util::lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
180     if (options->verbose) {
181     printf("FCT_Solver::updateLCN: u_tilde lsup = %e (rtol = %e, max. sweeps = %d)\n", norm_u_tilde, RTOL*norm_u_tilde, sweep_max);
182     }
183     errorCode = Preconditioner_Smoother_solve_byTolerance(iteration_matrix,
184     ((Preconditioner*)(iteration_matrix->solver_p))->gs, u, b, RTOL,
185     &sweep_max, true);
186     if (errorCode == PRECONDITIONER_NO_ERROR) {
187     if (options->verbose) printf("FCT_Solver::updateLCN: convergence after %d Gauss-Seidel steps.\n",sweep_max);
188     errorCode = SOLVER_NO_ERROR;
189     } else {
190     if (options->verbose) printf("FCT_Solver::updateLCN: Gauss-Seidel failed within %d stesp (rel. tolerance %e).\n",sweep_max,RTOL);
191     errorCode = SOLVER_MAXITER_REACHED;
192     }
193     return errorCode;
194 caltinay 4843 }
195    
196 caltinay 4869 err_t FCT_Solver::updateNL(double* u, double* u_old, Options* options,
197     Performance* pp)
198 gross 3793 {
199 caltinay 4867 // number of rates >=critical_rate accepted before divergence is triggered
200     const dim_t num_critical_rates_max = 3;
201     // expected value of convergence rate
202     const double critical_rate = 0.95;
203 gross 3794
204 caltinay 4867 const_TransportProblem_ptr fctp(transportproblem);
205     dim_t i;
206     double norm_u_tilde, norm_du=LARGE_POSITIVE_FLOAT, norm_du_old, rate=1.;
207     const dim_t n = fctp->transport_matrix->getTotalNumRows();
208     const double atol = options->absolute_tolerance;
209     const double rtol = options->tolerance;
210     const dim_t max_m = options->iter_max;
211     dim_t m = 0, num_critical_rates = 0;
212     err_t errorCode = SOLVER_NO_ERROR;
213     bool converged=false, max_m_reached=false, diverged=false;
214     /* //////////////////////////////////////////////////////////////////// */
215 caltinay 4843
216 caltinay 4867 options->num_iter=0;
217     u_old_coupler->startCollect(u_old);
218     u_old_coupler->finishCollect();
219     // prepare u_tilde and flux limiter
220     if (method == PASO_BACKWARD_EULER) {
221     // b[i]=m_i* u_old[i]
222     #pragma omp for private(i) schedule(static)
223     for (i = 0; i < n; ++i) {
224     if (fctp->lumped_mass_matrix[i] > 0 ) {
225     b[i]=u_old[i]* fctp->lumped_mass_matrix[i];
226     } else {
227     b[i]=u_old[i];
228     }
229     }
230 gross 3793 } else {
231 gross 3822 /* b[i]=m_i* u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i]) = m_i * u_tilde_i where m_i>0
232 caltinay 4843 * = u_old[i] otherwise
233     * note that iteration_matrix stores the negative values of the
234 gross 3793 * low order transport matrix l. Therefore a=-dt*0.5 is used. */
235 caltinay 4867 setMuPaLu(b, u_old_coupler, -dt*0.5);
236 caltinay 4843 }
237 caltinay 4867 flux_limiter->setU_tilde(b); // u_tilde = m^{-1} b */
238     // u_tilde_connector is completed
239 caltinay 4843 /************************************************************************/
240 caltinay 4867 // calculate stopping criterion
241     norm_u_tilde=util::lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
242     const double ATOL = rtol * norm_u_tilde + atol;
243     if (options->verbose)
244     printf("FCT_Solver::updateNL: iteration starts u_tilde lsup = %e "
245     "(abs. tol = %e)\n", norm_u_tilde, ATOL);
246 gross 3793
247 caltinay 4867 // u_old is an initial guess for u
248     util::copy(n, u, u_old);
249 caltinay 4843
250 caltinay 4867 while (!converged && !diverged && !max_m_reached && Esys_noError()) {
251     u_coupler->startCollect(u);
252     u_coupler->finishCollect();
253 gross 3793
254 caltinay 4867 // set antidiffusive_flux_{ij} for u
255     if (method == PASO_BACKWARD_EULER) {
256     setAntiDiffusionFlux_BE(flux_limiter->antidiffusive_fluxes);
257 caltinay 4843 } else {
258 caltinay 4867 setAntiDiffusionFlux_CN(flux_limiter->antidiffusive_fluxes);
259 gross 3793 }
260 caltinay 4867 // start the calculation of the limitation factors_{fct_solver->ij}
261     flux_limiter->addLimitedFluxes_Start(); // uses u_tilde
262 gross 3793
263     /*
264 caltinay 4843 * z_m[i]=b[i] - (m_i*u[i] - omega*sum_{j<>i} l_{ij} (u[j]-u[i]) ) where m_i>0
265     * ==b[i] - u[i] = u_tilda[i]-u[i] =0 otherwise
266     *
267     * omega = dt/2 or dt .
268     *
269     * note that iteration_matrix stores the negative values of the
270     * low order transport matrix l. Therefore a=dt*theta is used.
271     */
272 caltinay 4867 if (method == PASO_BACKWARD_EULER) {
273     setMuPaLu(z, u_coupler, dt);
274 caltinay 4843 } else {
275 caltinay 4867 setMuPaLu(z, u_coupler, dt/2);
276 caltinay 4843 }
277 gross 3793
278 caltinay 4867 util::update(n, -1., z, 1., b); // z=b-z
279 gross 3793
280 caltinay 4867 // z_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij}
281     flux_limiter->addLimitedFluxes_Complete(z);
282 gross 3793
283 caltinay 4867 // we solve (m/omega - L ) * du = z
284     if (method == PASO_BACKWARD_EULER) {
285     dim_t cntIter = options->iter_max;
286     double tol= util::l2(n, z, fctp->mpi_info) ;
287 gross 3793
288 caltinay 4867 if (m==0) {
289     tol *= 0.5;
290     } else {
291 caltinay 4869 tol *= std::min(std::max(rate*rate, 1e-2), 0.5);
292 caltinay 4867 }
293     // use BiCGStab with Jacobi preconditioner ( m - omega * L )
294     util::zeroes(n,du);
295     errorCode = Solver_BiCGStab(fctp->iteration_matrix, z, du, &cntIter, &tol, pp);
296 caltinay 4843
297 caltinay 4867 // errorCode = Solver_GMRES(fctp->iteration_matrix, z, du, &cntIter, &tol, 10, 2000, pp);
298     if (options->verbose)
299     printf("FCT_Solver::updateNL: BiCGStab completed after %d steps (residual =%e).\n",cntIter, tol);
300     options->num_iter += cntIter;
301     if (errorCode != SOLVER_NO_ERROR) break;
302     } else {
303     // just use the main diagonal of (m/omega - L )
304 caltinay 4843
305 caltinay 4867 Preconditioner_Smoother_solve(fctp->iteration_matrix,
306     ((Preconditioner*) (fctp->iteration_matrix->solver_p))->jacobi,
307     du, z, 1, false);
308 caltinay 4843
309 caltinay 4867 options->num_iter++;
310     }
311 caltinay 4843
312 caltinay 4867 util::update(n, 1., u, omega, du);
313     norm_du_old = norm_du;
314     norm_du = util::lsup(n, du, fctp->mpi_info);
315     if (m == 0) {
316     if (options->verbose)
317     printf("FCT_Solver::updateNL: step %d: increment= %e\n", m+1,
318     norm_du * omega);
319     } else {
320     if (norm_du_old > 0.) {
321     rate = norm_du/norm_du_old;
322     } else if (norm_du <= 0.) {
323     rate = 0.;
324     } else {
325     rate = LARGE_POSITIVE_FLOAT;
326     }
327     if (options->verbose)
328     printf("FCT_Solver::updateNL: step %d: increment= %e (rate = %e)\n",m+1, norm_du * omega, rate);
329     num_critical_rates += (rate<critical_rate ? 0 : 1);
330     max_m_reached = (m>max_m);
331     diverged = (num_critical_rates >= num_critical_rates_max);
332     converged = (norm_du * omega <= ATOL);
333     }
334     m++;
335     } // end of while loop
336     if (errorCode == SOLVER_NO_ERROR) {
337     if (converged) {
338     if (options->verbose)
339     printf("FCT_Solver::updateNL: iteration is completed.\n");
340     errorCode = SOLVER_NO_ERROR;
341     } else if (diverged) {
342     if (options->verbose)
343     printf("FCT_Solver::updateNL: divergence.\n");
344     errorCode = SOLVER_DIVERGENCE;
345     } else if (max_m_reached) {
346     if (options->verbose)
347     printf("FCT_Solver::updateNL: maximum number of iteration steps reached.\n");
348     errorCode = SOLVER_MAXITER_REACHED;
349     }
350 gross 3793 }
351     return errorCode;
352     }
353    
354    
355 caltinay 4843 /*
356     * AntiDiffusionFlux:
357 gross 3793 *
358     * f_{ij} = (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])
359 caltinay 4843 *
360 gross 3793 * m=fc->mass matrix
361 jfenwick 4345 * d=artificial diffusion matrix = L - K = - fc->iteration matrix - fc->transport matrix (away from main diagonal)
362 gross 3793 *
363 caltinay 4867 * for CN : theta = 0.5
364 gross 3793 * for BE : theta = 1.
365     */
366    
367 caltinay 4867 void FCT_Solver::setAntiDiffusionFlux_CN(SystemMatrix_ptr flux_matrix)
368 gross 3793 {
369 caltinay 4867 const double* u = u_coupler->borrowLocalData();
370     const double* u_old = u_old_coupler->borrowLocalData();
371     const double* remote_u = u_coupler->borrowRemoteData();
372     const double* remote_u_old = u_old_coupler->borrowRemoteData();
373     const double dt_half = dt/2;
374     const_TransportProblem_ptr fct(transportproblem);
375     const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
376     const dim_t n = fct->iteration_matrix->getTotalNumRows();
377 caltinay 4843
378 caltinay 4867 #pragma omp parallel for
379     for (dim_t i = 0; i < n; ++i) {
380     const double u_i = u[i];
381     const double u_old_i = u_old[i];
382 gross 3793
383 caltinay 4867 #pragma ivdep
384     for (index_t iptr_ij = pattern->mainPattern->ptr[i];
385     iptr_ij < pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
386     const index_t j = pattern->mainPattern->index[iptr_ij];
387     const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
388     // this is in fact -d_ij
389     const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+
390     fct->iteration_matrix->mainBlock->val[iptr_ij];
391     const double u_old_j = u_old[j];
392     const double u_j = u[j];
393 gross 3793
394 caltinay 4867 // (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i])
395     flux_matrix->mainBlock->val[iptr_ij] =
396     (m_ij+dt_half*d_ij)*(u_old_j-u_old_i) -
397     (m_ij-dt_half*d_ij)*(u_j-u_i);
398 gross 3793
399 caltinay 4867 }
400     #pragma ivdep
401     for (index_t iptr_ij = pattern->col_couplePattern->ptr[i];
402     iptr_ij < pattern->col_couplePattern->ptr[i+1]; iptr_ij++) {
403     const index_t j = pattern->col_couplePattern->index[iptr_ij];
404     const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
405     // this is in fact -d_ij
406     const double d_ij =
407     fct->transport_matrix->col_coupleBlock->val[iptr_ij] +
408     fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
409     const double u_old_j = remote_u_old[j];
410     const double u_j = remote_u[j];
411     flux_matrix->col_coupleBlock->val[iptr_ij] =
412     (m_ij+dt_half*d_ij)*(u_old_j-u_old_i) -
413     (m_ij-dt_half*d_ij)*(u_j-u_i);
414     }
415     }
416 gross 3793 }
417    
418 caltinay 4867 void FCT_Solver::setAntiDiffusionFlux_BE(SystemMatrix_ptr flux_matrix)
419 gross 3793 {
420 caltinay 4867 const double* u = u_coupler->borrowLocalData();
421     const double* u_old = u_old_coupler->borrowLocalData();
422     const double* remote_u = u_coupler->borrowRemoteData();
423     const double* remote_u_old = u_old_coupler->borrowRemoteData();
424     const_TransportProblem_ptr fct(transportproblem);
425     const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
426     const dim_t n = fct->iteration_matrix->getTotalNumRows();
427 caltinay 4843
428 caltinay 4867 #pragma omp parallel for
429     for (dim_t i = 0; i < n; ++i) {
430     const double u_i = u[i];
431     const double u_old_i = u_old[i];
432     #pragma ivdep
433     for (index_t iptr_ij = pattern->mainPattern->ptr[i];
434     iptr_ij < pattern->mainPattern->ptr[i+1]; iptr_ij++) {
435     const index_t j = pattern->mainPattern->index[iptr_ij];
436     const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
437     // this is in fact -d_ij
438     const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+
439     fct->iteration_matrix->mainBlock->val[iptr_ij];
440     const double u_old_j = u_old[j];
441     const double u_j = u[j];
442 gross 3793
443 caltinay 4867 flux_matrix->mainBlock->val[iptr_ij] =
444     m_ij*(u_old_j-u_old_i) - (m_ij-dt*d_ij)*(u_j-u_i);
445     }
446     #pragma ivdep
447     for (index_t iptr_ij = pattern->col_couplePattern->ptr[i];
448     iptr_ij < pattern->col_couplePattern->ptr[i+1]; iptr_ij++) {
449     const index_t j = pattern->col_couplePattern->index[iptr_ij];
450     const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
451     // this is in fact -d_ij
452     const double d_ij =
453     fct->transport_matrix->col_coupleBlock->val[iptr_ij] +
454     fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
455     const double u_old_j = remote_u_old[j];
456     const double u_j = remote_u[j];
457 gross 3793
458 caltinay 4867 flux_matrix->col_coupleBlock->val[iptr_ij] =
459     m_ij*(u_old_j-u_old_i) - (m_ij-dt*d_ij)*(u_j-u_i);
460     }
461     }
462 gross 3793 }
463    
464 caltinay 4867 /* special version of the ant-diffusive fluxes for the linear Crank-Nicolson
465     * scheme. In fact this is evaluated for u = 2*u_tilde - u_old which is the
466     * predictor of the solution of the the stabilised problem at time dt using
467     * the forward Euler scheme
468 caltinay 4843 *
469 caltinay 4867 * f_{ij} = (m_{ij} - dt/2 d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) (u[j]-u[i])
470 gross 3793 * = (m_{ij} - dt/2 d_{ij}) * (u_old[j]-u_old[i]) - (m_{ij} + dt/2 d_{ij}) * ( 2*(u_tilde[j]-u_tilde[i]) - (u_old[j] -u_old [i]) )
471 caltinay 4843 * = 2* m_{ij} * ( (u_old[j]-u_tilde[j] - (u_old[i]) - u_tilde[i]) ) - dt d_{ij} * (u_tilde[j]-u_tilde[i])
472     *
473 gross 3793 */
474 caltinay 4867 void FCT_Solver::setAntiDiffusionFlux_linearCN(SystemMatrix_ptr flux_matrix)
475 gross 3793 {
476 caltinay 4867 const_Coupler_ptr u_tilde_coupler(flux_limiter->u_tilde_coupler);
477     const double* u_tilde = u_tilde_coupler->borrowLocalData();
478     const double* u_old = u_old_coupler->borrowLocalData();
479     const double* remote_u_tilde = u_tilde_coupler->borrowRemoteData();
480     const double* remote_u_old = u_old_coupler->borrowRemoteData();
481     const_TransportProblem_ptr fct(transportproblem);
482     const_SystemMatrixPattern_ptr pattern(fct->iteration_matrix->pattern);
483     const dim_t n = fct->iteration_matrix->getTotalNumRows();
484 caltinay 4843
485 caltinay 4867 #pragma omp parallel for
486     for (dim_t i = 0; i < n; ++i) {
487     const double u_tilde_i = u_tilde[i];
488     const double u_old_i = u_old[i];
489     const double du_i = u_tilde_i - u_old_i;
490     #pragma ivdep
491     for (index_t iptr_ij = pattern->mainPattern->ptr[i];
492     iptr_ij < pattern->mainPattern->ptr[i+1]; iptr_ij++) {
493     const index_t j = pattern->mainPattern->index[iptr_ij];
494     const double m_ij = fct->mass_matrix->mainBlock->val[iptr_ij];
495     // this is in fact -d_ij
496     const double d_ij = fct->transport_matrix->mainBlock->val[iptr_ij]+
497     fct->iteration_matrix->mainBlock->val[iptr_ij];
498     const double u_tilde_j = u_tilde[j];
499     const double u_old_j = u_old[j];
500     const double du_j = u_tilde_j - u_old_j;
501 gross 3793
502 caltinay 4867 flux_matrix->mainBlock->val[iptr_ij] = 2 * m_ij * ( du_i - du_j ) -
503     dt * d_ij * ( u_tilde_i - u_tilde_j);
504     }
505     #pragma ivdep
506     for (index_t iptr_ij=pattern->col_couplePattern->ptr[i];
507     iptr_ij<pattern->col_couplePattern->ptr[i+1]; iptr_ij++) {
508 caltinay 4843
509 caltinay 4867 const index_t j = pattern->col_couplePattern->index[iptr_ij];
510     const double m_ij = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
511     // this is in fact -d_ij
512     const double d_ij =
513     fct->transport_matrix->col_coupleBlock->val[iptr_ij] +
514     fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
515     const double u_tilde_j = remote_u_tilde[j];
516     const double u_old_j = remote_u_old[j];
517     const double du_j = u_tilde_j - u_old_j;
518 caltinay 4843
519 caltinay 4867 flux_matrix->col_coupleBlock->val[iptr_ij] =
520     2*m_ij * ( du_i - du_j ) - dt * d_ij * (u_tilde_i - u_tilde_j);
521     }
522     }
523     }
524 caltinay 4843
525 caltinay 4867 /****************************************************************************/
526 gross 3793
527 caltinay 4867 double FCT_Solver::getSafeTimeStepSize(TransportProblem_ptr fctp)
528     {
529     double dt_max = LARGE_POSITIVE_FLOAT;
530     const dim_t n = fctp->transport_matrix->getTotalNumRows();
531 caltinay 4843
532 caltinay 4867 // set low order transport operator
533     setLowOrderOperator(fctp);
534 gross 3793
535 caltinay 4867 if (Esys_noError()) {
536     // calculate time step size
537     dt_max = LARGE_POSITIVE_FLOAT;
538     #pragma omp parallel
539     {
540     double dt_max_loc = LARGE_POSITIVE_FLOAT;
541     #pragma omp for schedule(static)
542     for (dim_t i=0; i<n; ++i) {
543     const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
544     const double m_i = fctp->lumped_mass_matrix[i];
545     if (m_i > 0) {
546     if (l_ii<0)
547 caltinay 4869 dt_max_loc = std::min(dt_max_loc,m_i/(-l_ii));
548 caltinay 4867 }
549     }
550     #pragma omp critical
551     {
552 caltinay 4869 dt_max = std::min(dt_max,dt_max_loc);
553 caltinay 4867 }
554     }
555     #ifdef ESYS_MPI
556     double dt_max_loc = dt_max;
557     MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
558     #endif
559     if (dt_max < LARGE_POSITIVE_FLOAT)
560     dt_max *= 2.;
561     }
562     return dt_max;
563 gross 3793 }
564    
565 caltinay 4843 /* Creates the low order transport matrix and stores its negative values
566 gross 3793 * into the iteration_matrix except for the main diagonal which is stored
567     * separately.
568 caltinay 4843 * If fc->iteration_matrix==NULL, fc->iteration_matrix is allocated
569 gross 3793 *
570 caltinay 4843 * a=transport_matrix
571 gross 3793 * b= low_order_transport_matrix = - iteration_matrix
572 caltinay 4843 * c=main diagonal low_order_transport_matrix
573     * initialise c[i] mit a[i,i]
574 gross 3793 *
575 caltinay 4843 * d_ij=max(0,-a[i,j],-a[j,i])
576     * b[i,j]=-(a[i,j]+d_ij)
577     * c[i]-=d_ij
578 gross 3793 */
579    
580 caltinay 4867 void FCT_Solver::setLowOrderOperator(TransportProblem_ptr fc)
581 caltinay 4843 {
582 caltinay 4867 const index_t* main_iptr = fc->borrowMainDiagonalPointer();
583 gross 3793
584 caltinay 4867 if (!fc->iteration_matrix.get()) {
585     fc->iteration_matrix.reset(new SystemMatrix(
586 caltinay 4836 fc->transport_matrix->type, fc->transport_matrix->pattern,
587     fc->transport_matrix->row_block_size,
588     fc->transport_matrix->col_block_size, true));
589 caltinay 4867 }
590 gross 3793
591 caltinay 4867 const_SystemMatrixPattern_ptr pattern(fc->iteration_matrix->pattern);
592     const dim_t n = fc->iteration_matrix->getTotalNumRows();
593     #pragma omp parallel for
594     for (dim_t i = 0; i < n; ++i) {
595     double sum = fc->transport_matrix->mainBlock->val[main_iptr[i]];
596     //printf("sum[%d] = %e -> ", i, sum);
597 caltinay 4843
598 caltinay 4867 // look at a[i,j]
599     for (index_t iptr_ij=pattern->mainPattern->ptr[i];iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
600     const index_t j = pattern->mainPattern->index[iptr_ij];
601     const double rtmp1 = fc->transport_matrix->mainBlock->val[iptr_ij];
602     if (j != i) {
603     // find entry a[j,i]
604     #pragma ivdep
605     for (index_t iptr_ji=pattern->mainPattern->ptr[j]; iptr_ji<pattern->mainPattern->ptr[j+1]; ++iptr_ji) {
606 caltinay 4843
607 caltinay 4867 if (pattern->mainPattern->index[iptr_ji] == i) {
608 caltinay 4843 const double rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji];
609 caltinay 4867 //printf("a[%d,%d]=%e\n",i,j,rtmp1);
610     //printf("a[%d,%d]=%e\n",j,i,rtmp2);
611 caltinay 3795 const double d_ij=-MIN3(0.,rtmp1,rtmp2);
612 gross 3793 fc->iteration_matrix->mainBlock->val[iptr_ij]=-(rtmp1+d_ij);
613 caltinay 4867 //printf("l[%d,%d]=%e\n",i,j,fc->iteration_matrix->mainBlock->val[iptr_ij]);
614 gross 3793 sum-=d_ij;
615     break;
616     }
617 caltinay 4867 }
618     }
619     }
620     for (index_t iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
621     const index_t j = pattern->col_couplePattern->index[iptr_ij];
622     const double rtmp1 = fc->transport_matrix->col_coupleBlock->val[iptr_ij];
623     // find entry a[j,i]
624     #pragma ivdep
625     for (index_t iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_ji<pattern->row_couplePattern->ptr[j+1]; ++iptr_ji) {
626     if (pattern->row_couplePattern->index[iptr_ji]==i) {
627     const double rtmp2=fc->transport_matrix->row_coupleBlock->val[iptr_ji];
628     const double d_ij=-MIN3(0.,rtmp1,rtmp2);
629     fc->iteration_matrix->col_coupleBlock->val[iptr_ij]=-(rtmp1+d_ij);
630     fc->iteration_matrix->row_coupleBlock->val[iptr_ji]=-(rtmp2+d_ij);
631     sum-=d_ij;
632     break;
633     }
634     }
635     }
636     // set main diagonal entry
637     fc->main_diagonal_low_order_transport_matrix[i] = sum;
638     //printf("%e\n", sum);
639     }
640 gross 3793 }
641    
642     /*
643 gross 3822 * out_i=m_i u_i + a * \sum_{j <> i} l_{ij} (u_j-u_i) where m_i>0
644     * = u_i where m_i<=0
645 gross 3793 *
646     */
647 caltinay 4867 void FCT_Solver::setMuPaLu(double* out, const_Coupler_ptr coupler, double a)
648 gross 3793 {
649 caltinay 4867 const_SystemMatrix_ptr L(transportproblem->iteration_matrix);
650     const double* M = transportproblem->lumped_mass_matrix;
651     const_SystemMatrixPattern_ptr pattern(L->pattern);
652     const double* u = coupler->borrowLocalData();
653     const double* remote_u = coupler->borrowRemoteData();
654     const dim_t n = L->getTotalNumRows();
655 gross 3793
656 caltinay 4867 #pragma omp parallel for
657     for (dim_t i = 0; i < n; ++i) {
658     if (M[i] > 0.) {
659     out[i] = M[i]*u[i];
660     } else {
661     out[i] = u[i];
662 caltinay 4843 }
663 caltinay 4867 }
664 caltinay 4869 if (std::abs(a) > 0) {
665 caltinay 4867 #pragma omp parallel for
666     for (dim_t i = 0; i < n; ++i) {
667     if (M[i] > 0.) {
668     double sum = 0;
669     const double u_i = u[i];
670     #pragma ivdep
671     for (index_t iptr_ij = pattern->mainPattern->ptr[i];
672     iptr_ij < pattern->mainPattern->ptr[i+1];
673     iptr_ij++) {
674     const index_t j = pattern->mainPattern->index[iptr_ij];
675     const double l_ij = L->mainBlock->val[iptr_ij];
676     sum += l_ij*(u[j]-u_i);
677     }
678     #pragma ivdep
679     for (index_t iptr_ij = pattern->col_couplePattern->ptr[i];
680     iptr_ij < pattern->col_couplePattern->ptr[i+1];
681     iptr_ij++) {
682     const index_t j=pattern->col_couplePattern->index[iptr_ij];
683     const double l_ij = L->col_coupleBlock->val[iptr_ij];
684     sum += l_ij*(remote_u[j]-u_i);
685     }
686     out[i] += a*sum;
687     }
688     }
689     }
690 gross 3793 }
691    
692 caltinay 4867 } // namespace paso
693    

Properties

Name Value
svn:mergeinfo /branches/amg_from_3530/paso/src/FCT_Solver.cpp:3531-3826 /branches/lapack2681/paso/src/FCT_Solver.cpp:2682-2741 /branches/pasowrap/paso/src/FCT_Solver.cpp:3661-3674 /branches/py3_attempt2/paso/src/FCT_Solver.cpp:3871-3891 /branches/restext/paso/src/FCT_Solver.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/FCT_Solver.cpp:3669-3791 /branches/stage3.0/paso/src/FCT_Solver.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/FCT_Solver.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/FCT_Solver.cpp:3517-3974 /release/3.0/paso/src/FCT_Solver.cpp:2591-2601 /trunk/paso/src/FCT_Solver.cpp:4257-4344 /trunk/ripley/test/python/paso/src/FCT_Solver.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26