# Contents of /trunk/paso/src/SolverFCT_solve.c

Revision 1981 - (show annotations)
Thu Nov 6 05:27:33 2008 UTC (11 years, 3 months ago) by jfenwick
File MIME type: text/plain
File size: 15462 byte(s)
More warning removal.


 1 2 /******************************************************* 3 * 4 * Copyright (c) 2003-2008 by University of Queensland 5 * Earth Systems Science Computational Center (ESSCC) 6 * http://www.uq.edu.au/esscc 7 * 8 * Primary Business: Queensland, Australia 9 * Licensed under the Open Software License version 3.0 10 * http://www.opensource.org/licenses/osl-3.0.php 11 * 12 *******************************************************/ 13 14 15 /**************************************************************/ 16 17 /* Paso: Flux correction transport solver 18 * 19 * solves Mu_t=Ku+q 20 * u(0) >=0 21 * 22 * Warning: the program assums sum_{j} k_{ij}=0!!! 23 * 24 */ 25 /**************************************************************/ 26 27 /* Author: l.gross@uq.edu.au */ 28 29 /**************************************************************/ 30 31 #include "Paso.h" 32 #include "Solver.h" 33 #include "SolverFCT.h" 34 #include "PasoUtil.h" 35 #include "escript/blocktimer.h" 36 #ifdef _OPENMP 37 #include 38 #endif 39 #ifdef PASO_MPI 40 #include 41 #endif 42 43 /* 44 * inserts the source term into the problem 45 */ 46 void Paso_FCT_setSource(const dim_t n,const double *source, double* sourceN, double* sourceP) 47 { 48 dim_t i; 49 register double rtmp; 50 /* 51 * seperate source into positive and negative part: 52 */ 53 #pragma omp parallel for private(i,rtmp) 54 for (i = 0; i < n; ++i) { 55 rtmp=source[i]; 56 if (rtmp <0) { 57 sourceN[i]=-rtmp; 58 sourceP[i]=0; 59 } else { 60 sourceN[i]= 0; 61 sourceP[i]= rtmp; 62 } 63 } 64 } 65 66 err_t Paso_FCT_setUpRightHandSide(Paso_FCTransportProblem* fctp, const double dt, const double *u_m, Paso_Coupler* u_m_coupler, double * z_m, 67 Paso_SystemMatrix* flux_matrix, Paso_Coupler* uTilde_coupler, const double *b, 68 Paso_Coupler* QN_coupler, Paso_Coupler* QP_coupler, 69 double *RN_m, Paso_Coupler* RN_m_coupler, double* RP_m, Paso_Coupler* RP_m_coupler, const double *sourceN, 70 Paso_Performance* pp) 71 { 72 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix); 73 /* distribute u */ 74 Paso_Coupler_startCollect(u_m_coupler,u_m); 75 Paso_Coupler_finishCollect(u_m_coupler); 76 /* 77 * set the ant diffusion fluxes: 78 * 79 */ 80 Paso_FCTransportProblem_setAntiDiffusionFlux(dt,fctp,flux_matrix,u_m_coupler); 81 /* 82 * apply pre flux-correction: f_{ij}:=0 if f_{ij}*(\tilde{u}[i]- \tilde{u}[j])<=0 83 * 84 */ 85 Paso_FCTransportProblem_applyPreAntiDiffusionCorrection(flux_matrix,uTilde_coupler); 86 /* 87 * set flux limiters RN_m,RP_m 88 * 89 */ 90 Paso_FCTransportProblem_setRs(flux_matrix,fctp->lumped_mass_matrix,QN_coupler,QP_coupler,RN_m,RP_m); 91 Paso_Coupler_startCollect(RN_m_coupler,RN_m); 92 Paso_Coupler_startCollect(RP_m_coupler,RP_m); 93 /* 94 * z_m[i]=b[i] - (m_i*u_m[i] - dt*theta*sum_{j<>i} l_{ij} (u_m[j]-u_m[i]) + dt q^-[i]) 95 * 96 * note that iteration_matrix stores the negative values of the 97 * low order transport matrix l therefore a=dt*fctp->theta is used. 98 */ 99 Paso_SolverFCT_setMuPaLuPbQ(z_m,fctp->lumped_mass_matrix, u_m_coupler,dt*fctp->theta,fctp->iteration_matrix,dt,sourceN); 100 /* z_m=b-z_m */ 101 Paso_Update(n,-1.,z_m,1.,b); 102 103 Paso_Coupler_finishCollect(RN_m_coupler); 104 Paso_Coupler_finishCollect(RP_m_coupler); 105 /* add corrected fluxes into z_m */ 106 Paso_FCTransportProblem_addCorrectedFluxes(z_m,flux_matrix,RN_m_coupler,RP_m_coupler); 107 return NO_ERROR; 108 } 109 110 void Paso_SolverFCT_solve(Paso_FCTransportProblem* fctp, double* u, double dt, double* source, Paso_Options* options) { 111 const dim_t FAILURES_MAX=5; 112 dim_t m, i_substeps, Failed=0, i; 113 double *z_m=NULL, *b_n=NULL, *sourceP=NULL, *sourceN=NULL, *uTilde_n=NULL, *QN_n=NULL, *QP_n=NULL, *RN_m=NULL, *RP_m=NULL, *du_m=NULL; 114 Paso_Coupler *QN_n_coupler=NULL, *QP_n_coupler=NULL, *RN_m_coupler=NULL, *RP_m_coupler=NULL, *uTilde_n_coupler=NULL, *u_m_coupler=NULL; 115 Paso_SystemMatrix *flux_matrix_m=NULL; 116 double dt_max, dt2,t, norm_u_m, omega, norm_du_m; 117 register double mass, rtmp; 118 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix); 119 dim_t blockSize=Paso_FCTransportProblem_getBlockSize(fctp); 120 const double atol=options->absolute_tolerance; 121 const double rtol=options->tolerance; 122 const dim_t max_m=options->iter_max; 123 Paso_Performance pp; 124 bool_t converged=FALSE, max_m_reached=FALSE; 125 if (dt<=0.) { 126 Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive."); 127 } 128 dt_max=Paso_FCTransportProblem_getSafeTimeStepSize(fctp); 129 /* 130 * allocate memory 131 * 132 */ 133 z_m=TMPMEMALLOC(n,double); 134 Paso_checkPtr(z_m); 135 du_m=TMPMEMALLOC(n,double); 136 Paso_checkPtr(du_m); 137 b_n=TMPMEMALLOC(n,double); 138 Paso_checkPtr(b_n); 139 sourceP=TMPMEMALLOC(n,double); 140 Paso_checkPtr(sourceP); 141 sourceN=TMPMEMALLOC(n,double); 142 Paso_checkPtr(sourceN); 143 uTilde_n=TMPMEMALLOC(n,double); 144 Paso_checkPtr(uTilde_n); 145 QN_n=TMPMEMALLOC(n,double); 146 Paso_checkPtr(QN_n); 147 QP_n=TMPMEMALLOC(n,double); 148 Paso_checkPtr(QP_n); 149 RN_m=TMPMEMALLOC(n,double); 150 Paso_checkPtr(RN_m); 151 RP_m=TMPMEMALLOC(n,double); 152 Paso_checkPtr(RP_m); 153 QN_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); 154 QP_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); 155 RN_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); 156 RP_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); 157 uTilde_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); 158 u_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); 159 flux_matrix_m=Paso_SystemMatrix_alloc(fctp->transport_matrix->type, 160 fctp->transport_matrix->pattern, 161 fctp->transport_matrix->row_block_size, 162 fctp->transport_matrix->col_block_size); 163 164 if (Paso_noError()) { 165 /* 166 * Preparation: 167 * 168 */ 169 Paso_FCT_setSource(n, source, sourceN, sourceP); 170 /* 171 * let the show begin!!!! 172 * 173 */ 174 t=0; 175 i_substeps=0; 176 Paso_Copy(n,u,fctp->u); 177 norm_u_m=Paso_lsup(n,u, fctp->mpi_info); 178 /* while(i_substepsdt_max < LARGE_POSITIVE_FLOAT) { 180 dt2=MIN(dt_max,dt); 181 } else { 182 dt2=dt; 183 } 184 while(t
theta > 0) { 202 omega=1./(dt2*fctp->theta); 203 Paso_Solver_solvePreconditioner(fctp->iteration_matrix,du_m,z_m); 204 Paso_Update(n,1.,u,omega,du_m); 205 } else { 206 omega=1; 207 #pragma omp parallel for private(i,mass,rtmp) 208 for (i = 0; i < n; ++i) { 209 mass=fctp->lumped_mass_matrix[i]; 210 if (ABS(mass)>0.) { 211 rtmp=z_m[i]/mass; 212 } else { 213 rtmp=0; 214 } 215 du_m[i]=rtmp; 216 u[i]+=rtmp; 217 } 218 } 219 norm_u_m=Paso_lsup(n,u, fctp->mpi_info); 220 norm_du_m=Paso_lsup(n,du_m, fctp->mpi_info)*omega; 221 printf("iteration step %d completed: norm increment= %e (tolerance = %e)\n",m+1, norm_du_m, rtol * norm_u_m + atol); 222 223 max_m_reached=(m>max_m); 224 converged=(norm_du_m <= rtol * norm_u_m + atol); 225 m++; 226 } 227 if (converged) { 228 Failed=0; 229 /* #pragma omp parallel for schedule(static) private(i) */ 230 Paso_Copy(n,fctp->u,u); 231 i_substeps++; 232 t+=dt2; 233 if (fctp->dt_max < LARGE_POSITIVE_FLOAT) { 234 dt2=MIN3(dt_max,dt2*1.5,dt-t); 235 } else { 236 dt2=MIN(dt2*1.5,dt-t); 237 } 238 } else if (max_m_reached) { 239 /* if FAILURES_MAX failures in a row: give up */ 240 if (Failed > FAILURES_MAX) { 241 Paso_setError(VALUE_ERROR,"Paso_SolverFCT_solve: no convergence after time step reduction."); 242 } else { 243 printf("no convergence in Paso_Solver_NewtonGMRES: Trying smaller time step size."); 244 dt2=dt*0.5; 245 Failed++; 246 } 247 } 248 249 } 250 /* 251 * clean-up: 252 * 253 */ 254 MEMFREE(z_m); 255 MEMFREE(du_m); 256 TMPMEMFREE(b_n); 257 Paso_SystemMatrix_free(flux_matrix_m); 258 TMPMEMFREE(sourceP); 259 TMPMEMFREE(sourceN); 260 TMPMEMFREE(uTilde_n); 261 TMPMEMFREE(QN_n); 262 TMPMEMFREE(QP_n); 263 TMPMEMFREE(RN_m); 264 TMPMEMFREE(RP_m); 265 Paso_Coupler_free(QN_n_coupler); 266 Paso_Coupler_free(QP_n_coupler); 267 Paso_Coupler_free(RN_m_coupler); 268 Paso_Coupler_free(RP_m_coupler); 269 Paso_Coupler_free(uTilde_n_coupler); 270 Paso_Coupler_free(u_m_coupler); 271 options->absolute_tolerance=atol; 272 options->tolerance=rtol; 273 } 274 } 275 double Paso_FCTransportProblem_getSafeTimeStepSize(Paso_FCTransportProblem* fctp) 276 { 277 dim_t i, n; 278 double dt_max, dt_max_loc; 279 register double l_ii,m; 280 n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix); 281 if (! fctp->valid_matrices) { 282 fctp->dt_max=LARGE_POSITIVE_FLOAT; 283 /* extract the row sum of the advective part */ 284 Paso_SystemMatrix_rowSum(fctp->mass_matrix,fctp->lumped_mass_matrix); 285 286 /* set low order transport operator */ 287 Paso_FCTransportProblem_setLowOrderOperator(fctp); 288 /* 289 * calculate time step size: 290 */ 291 dt_max=LARGE_POSITIVE_FLOAT; 292 if (fctp->theta < 1.) { 293 #pragma omp parallel private(dt_max_loc) 294 { 295 dt_max_loc=LARGE_POSITIVE_FLOAT; 296 #pragma omp for schedule(static) private(i,l_ii,m) 297 for (i=0;imain_diagonal_low_order_transport_matrix[i]; 299 m=fctp->lumped_mass_matrix[i]; 300 if ( (l_ii<0 && m>0.) || (l_ii>0 && m<0) ) { 301 dt_max_loc=MIN(dt_max_loc,-m/l_ii); 302 } 303 } 304 #pragma omp critical 305 { 306 dt_max=MIN(dt_max,dt_max_loc); 307 } 308 } 309 #ifdef PASO_MPI 310 dt_max_loc = dt_max; 311 MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm); 312 #endif 313 if (dt_maxtheta); 314 } 315 if (dt_max <= 0.) { 316 Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive."); 317 } else { 318 if (dt_maxtheta); 320 } 321 fctp->dt_max=dt_max; 322 fctp->valid_matrices=Paso_noError(); 323 } 324 return fctp->dt_max; 325 } 326 void Paso_FCT_setUp(Paso_FCTransportProblem* fctp, const double dt, const double *sourceN, const double *sourceP, double* b, double* uTilde, 327 Paso_Coupler* uTilde_coupler, double *QN, Paso_Coupler* QN_coupler, double *QP, Paso_Coupler* QP_coupler, 328 Paso_Options* options, Paso_Performance* pp) 329 { 330 dim_t i; 331 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix); 332 double omega, factor; 333 register double m, u_tilde_i, rtmp4; 334 /* distribute u */ 335 Paso_Coupler_startCollect(fctp->u_coupler,fctp->u); 336 Paso_Coupler_finishCollect(fctp->u_coupler); 337 /* 338 * b^n[i]=m u^n[i] + dt*(1-theta) sum_{j <> i} l_{ij}*(u^n[j]-u^n[i]) + dt*sourceP[i] 339 * 340 * note that iteration_matrix stores the negative values of the 341 * low order transport matrix l therefore a=-dt*(1-fctp->theta) is used. 342 * 343 */ 344 Paso_SolverFCT_setMuPaLuPbQ(b,fctp->lumped_mass_matrix,fctp->u_coupler, 345 -dt*(1-fctp->theta),fctp->iteration_matrix,dt,sourceP); 346 /* 347 * uTilde[i]=b[i]/m[i] 348 * 349 * fctp->iteration_matrix[i,i]=m[i]/(dt theta) + \frac{1}{\theta} \frac{q^-[i]}-l[i,i] 350 * 351 */ 352 if (fctp->theta > 0) { 353 Paso_solve_free(fctp->iteration_matrix); 354 omega=1./(dt*fctp->theta); 355 factor=dt*omega; 356 #pragma omp parallel for private(i,m,u_tilde_i,rtmp4) 357 for (i = 0; i < n; ++i) { 358 m=fctp->lumped_mass_matrix[i]; 359 if (ABS(m)>0.) { 360 u_tilde_i=b[i]/m; 361 } else { 362 u_tilde_i=fctp->u[i]; 363 } 364 rtmp4=m*omega-fctp->main_diagonal_low_order_transport_matrix[i]; 365 if (ABS(u_tilde_i)>0) rtmp4+=sourceN[i]*factor/u_tilde_i; 366 fctp->iteration_matrix->mainBlock->val[fctp->main_iptr[i]]=rtmp4; 367 uTilde[i]=u_tilde_i; 368 } 369 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT); 370 Paso_Solver_setPreconditioner(fctp->iteration_matrix,options); 371 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT); 372 } else { 373 #pragma omp parallel for private(i,m,u_tilde_i) 374 for (i = 0; i < n; ++i) { 375 m=fctp->lumped_mass_matrix[i]; 376 if (ABS(m)>0.) { 377 u_tilde_i=b[i]/m; 378 } else { 379 u_tilde_i=fctp->u[i]; 380 } 381 uTilde[i]=u_tilde_i; 382 } 383 /* no update of iteration_matrix required! */ 384 } /* end (fctp->theta > 0) */ 385 386 /* distribute uTilde: */ 387 Paso_Coupler_startCollect(uTilde_coupler,uTilde); 388 Paso_Coupler_finishCollect(uTilde_coupler); 389 /* 390 * calculate QP[i] max_{j} (\tilde{u}[j]- \tilde{u}[i] ) 391 * QN[i] min_{j} (\tilde{u}[j]- \tilde{u}[i] ) 392 * 393 */ 394 Paso_SolverFCT_setQs(uTilde_coupler,QN,QP,fctp->iteration_matrix); 395 Paso_Coupler_startCollect(QN_coupler,QN); 396 Paso_Coupler_startCollect(QP_coupler,QP); 397 Paso_Coupler_finishCollect(QN_coupler); 398 Paso_Coupler_finishCollect(QP_coupler); 399 }