# Contents of /trunk/paso/src/SolverFCT_solve.c

Revision 2551 - (show annotations)
Thu Jul 23 09:19:15 2009 UTC (10 years, 2 months ago) by gross
File MIME type: text/plain
File size: 15375 byte(s)
a problem with the sparse matrix unrolling fixed.

 1 2 /******************************************************* 3 * 4 * Copyright (c) 2003-2009 by University of Queensland 5 * Earth Systems Science Computational Center (ESSCC) 6 * http://www.uq.edu.au/esscc 7 * 8 * Primary Business: Queensland, Australia 9 * Licensed under the Open Software License version 3.0 10 * http://www.opensource.org/licenses/osl-3.0.php 11 * 12 *******************************************************/ 13 14 15 /**************************************************************/ 16 17 /* Paso: Flux correction transport solver 18 * 19 * solves Mu_t=Ku+q 20 * u(0) >=0 21 * 22 * Warning: the program assums sum_{j} k_{ij}=0!!! 23 * 24 */ 25 /**************************************************************/ 26 27 /* Author: l.gross@uq.edu.au */ 28 29 /**************************************************************/ 30 31 #include "Paso.h" 32 #include "Solver.h" 33 #include "SolverFCT.h" 34 #include "PasoUtil.h" 35 #include "esysUtils/blocktimer.h" 36 #ifdef _OPENMP 37 #include 38 #endif 39 #ifdef PASO_MPI 40 #include 41 #endif 42 43 /* 44 * inserts the source term into the problem 45 */ 46 void Paso_FCT_setSource(const dim_t n,const double *source, double* sourceN, double* sourceP) 47 { 48 dim_t i; 49 register double rtmp; 50 /* 51 * seperate source into positive and negative part: 52 */ 53 #pragma omp parallel for private(i,rtmp) 54 for (i = 0; i < n; ++i) { 55 rtmp=source[i]; 56 if (rtmp <0) { 57 sourceN[i]=-rtmp; 58 sourceP[i]=0; 59 } else { 60 sourceN[i]= 0; 61 sourceP[i]= rtmp; 62 } 63 } 64 } 65 66 err_t Paso_FCT_setUpRightHandSide(Paso_FCTransportProblem* fctp, const double dt, const double *u_m, Paso_Coupler* u_m_coupler, double * z_m, 67 Paso_SystemMatrix* flux_matrix, Paso_Coupler* uTilde_coupler, const double *b, 68 Paso_Coupler* QN_coupler, Paso_Coupler* QP_coupler, 69 double *RN_m, Paso_Coupler* RN_m_coupler, double* RP_m, Paso_Coupler* RP_m_coupler, const double *sourceN, 70 Paso_Performance* pp) 71 { 72 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix); 73 /* distribute u */ 74 Paso_Coupler_startCollect(u_m_coupler,u_m); 75 Paso_Coupler_finishCollect(u_m_coupler); 76 /* 77 * set the ant diffusion fluxes: 78 * 79 */ 80 Paso_FCTransportProblem_setAntiDiffusionFlux(dt,fctp,flux_matrix,u_m_coupler); 81 /* 82 * apply pre flux-correction: f_{ij}:=0 if f_{ij}*(\tilde{u}[i]- \tilde{u}[j])<=0 83 * 84 */ 85 Paso_FCTransportProblem_applyPreAntiDiffusionCorrection(flux_matrix,uTilde_coupler); 86 /* 87 * set flux limiters RN_m,RP_m 88 * 89 */ 90 Paso_FCTransportProblem_setRs(flux_matrix,fctp->lumped_mass_matrix,QN_coupler,QP_coupler,RN_m,RP_m); 91 Paso_Coupler_startCollect(RN_m_coupler,RN_m); 92 Paso_Coupler_startCollect(RP_m_coupler,RP_m); 93 /* 94 * z_m[i]=b[i] - (m_i*u_m[i] - dt*theta*sum_{j<>i} l_{ij} (u_m[j]-u_m[i]) + dt q^-[i]) 95 * 96 * note that iteration_matrix stores the negative values of the 97 * low order transport matrix l therefore a=dt*fctp->theta is used. 98 */ 99 Paso_SolverFCT_setMuPaLuPbQ(z_m,fctp->lumped_mass_matrix, u_m_coupler,dt*fctp->theta,fctp->iteration_matrix,dt,sourceN); 100 /* z_m=b-z_m */ 101 Paso_Update(n,-1.,z_m,1.,b); 102 103 Paso_Coupler_finishCollect(RN_m_coupler); 104 Paso_Coupler_finishCollect(RP_m_coupler); 105 /* add corrected fluxes into z_m */ 106 Paso_FCTransportProblem_addCorrectedFluxes(z_m,flux_matrix,RN_m_coupler,RP_m_coupler); 107 return NO_ERROR; 108 } 109 110 void Paso_SolverFCT_solve(Paso_FCTransportProblem* fctp, double* u, double dt, double* source, Paso_Options* options) { 111 const dim_t FAILURES_MAX=100; 112 dim_t m, i_substeps, Failed=0, i; 113 double *z_m=NULL, *b_n=NULL, *sourceP=NULL, *sourceN=NULL, *uTilde_n=NULL, *QN_n=NULL, *QP_n=NULL, *RN_m=NULL, *RP_m=NULL, *du_m=NULL; 114 Paso_Coupler *QN_n_coupler=NULL, *QP_n_coupler=NULL, *RN_m_coupler=NULL, *RP_m_coupler=NULL, *uTilde_n_coupler=NULL, *u_m_coupler=NULL; 115 Paso_SystemMatrix *flux_matrix_m=NULL; 116 double dt_max, dt2,t, norm_u_m, omega, norm_du_m; 117 register double mass, rtmp; 118 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix); 119 dim_t blockSize=Paso_FCTransportProblem_getBlockSize(fctp); 120 const double atol=options->absolute_tolerance; 121 const double rtol=options->tolerance; 122 const dim_t max_m=options->iter_max; 123 Paso_Performance pp; 124 bool_t converged=FALSE, max_m_reached=FALSE; 125 if (dt<=0.) { 126 Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive."); 127 } 128 dt_max=Paso_FCTransportProblem_getSafeTimeStepSize(fctp); 129 /* 130 * allocate memory 131 * 132 */ 133 z_m=TMPMEMALLOC(n,double); 134 Paso_checkPtr(z_m); 135 du_m=TMPMEMALLOC(n,double); 136 Paso_checkPtr(du_m); 137 b_n=TMPMEMALLOC(n,double); 138 Paso_checkPtr(b_n); 139 sourceP=TMPMEMALLOC(n,double); 140 Paso_checkPtr(sourceP); 141 sourceN=TMPMEMALLOC(n,double); 142 Paso_checkPtr(sourceN); 143 uTilde_n=TMPMEMALLOC(n,double); 144 Paso_checkPtr(uTilde_n); 145 QN_n=TMPMEMALLOC(n,double); 146 Paso_checkPtr(QN_n); 147 QP_n=TMPMEMALLOC(n,double); 148 Paso_checkPtr(QP_n); 149 RN_m=TMPMEMALLOC(n,double); 150 Paso_checkPtr(RN_m); 151 RP_m=TMPMEMALLOC(n,double); 152 Paso_checkPtr(RP_m); 153 QN_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); 154 QP_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); 155 RN_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); 156 RP_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); 157 uTilde_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); 158 u_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); 159 flux_matrix_m=Paso_SystemMatrix_alloc(fctp->transport_matrix->type, 160 fctp->transport_matrix->pattern, 161 fctp->transport_matrix->row_block_size, 162 fctp->transport_matrix->col_block_size, TRUE); 163 164 if (Paso_noError()) { 165 /* 166 * Preparation: 167 * 168 */ 169 Paso_FCT_setSource(n, source, sourceN, sourceP); 170 /* 171 * let the show begin!!!! 172 * 173 */ 174 t=0; 175 i_substeps=0; 176 norm_u_m=Paso_lsup(n,fctp->u, fctp->mpi_info); 177 /* while(i_substepsdt_max < LARGE_POSITIVE_FLOAT) { 179 dt2=MIN(dt_max,dt); 180 } else { 181 dt2=dt; 182 } 183 while( (tverbose) printf("substep step %d at t=%e (step size= %e)\n",i_substeps+1,t+dt2,dt2); 185 Paso_FCT_setUp(fctp,dt2,sourceN,sourceP,b_n,uTilde_n,uTilde_n_coupler,QN_n,QN_n_coupler,QP_n,QP_n_coupler, 186 options,&pp); 187 Paso_Copy(n,u,fctp->u); 188 /* now the iteration starts */ 189 m=0; 190 converged=FALSE; 191 max_m_reached=FALSE; 192 /* tolerance? */ 193 while ( (!converged) && (! max_m_reached) && Paso_noError()) { 194 /* set z_m */ 195 Paso_FCT_setUpRightHandSide(fctp,dt2,u,u_m_coupler,z_m,flux_matrix_m,uTilde_n_coupler,b_n, 196 QN_n_coupler,QP_n_coupler,RN_m,RN_m_coupler,RP_m,RP_m_coupler,sourceN,&pp); 197 /* 198 * now we solve the linear system to get the correction dt: 199 * 200 */ 201 /* for (i=0;itheta > 0) { 203 omega=1./(dt2*fctp->theta); 204 Paso_Solver_solvePreconditioner(fctp->iteration_matrix,du_m,z_m); 205 Paso_Update(n,1.,u,omega,du_m); 206 } else { 207 omega=1; 208 #pragma omp parallel for private(i,mass,rtmp) 209 for (i = 0; i < n; ++i) { 210 mass=fctp->lumped_mass_matrix[i]; 211 if (ABS(mass)>0.) { 212 rtmp=z_m[i]/mass; 213 } else { 214 rtmp=0; 215 } 216 du_m[i]=rtmp; 217 u[i]+=rtmp; 218 } 219 } 220 norm_u_m=Paso_lsup(n,u, fctp->mpi_info); 221 norm_du_m=Paso_lsup(n,du_m, fctp->mpi_info)*omega; 222 if (options->verbose) printf("iteration step %d completed: norm increment= %e (tolerance = %e)\n",m+1, norm_du_m, rtol * norm_u_m + atol); 223 224 max_m_reached=(m>max_m); 225 converged=(norm_du_m <= rtol * norm_u_m + atol); 226 m++; 227 } 228 if (converged) { 229 Failed=0; 230 /* #pragma omp parallel for schedule(static) private(i) */ 231 Paso_Copy(n,fctp->u,u); 232 i_substeps++; 233 t+=dt2; 234 if (fctp->dt_max < LARGE_POSITIVE_FLOAT) { 235 dt2=MIN3(dt_max,dt2*1.1,dt-t); 236 } else { 237 dt2=MIN(dt2*1.1,dt-t); 238 } 239 } else if (max_m_reached) { 240 /* if FAILURES_MAX failures in a row: give up */ 241 if (Failed > FAILURES_MAX) { 242 Paso_setError(VALUE_ERROR,"Paso_SolverFCT_solve: no convergence after time step reduction."); 243 } else { 244 if (options->verbose) printf("no convergence in Paso_Solver_NewtonGMRES: Trying smaller time step size."); 245 dt2*=0.5; 246 Failed++; 247 } 248 } 249 } 250 /* 251 * clean-up: 252 * 253 */ 254 MEMFREE(z_m); 255 MEMFREE(du_m); 256 TMPMEMFREE(b_n); 257 Paso_SystemMatrix_free(flux_matrix_m); 258 TMPMEMFREE(sourceP); 259 TMPMEMFREE(sourceN); 260 TMPMEMFREE(uTilde_n); 261 TMPMEMFREE(QN_n); 262 TMPMEMFREE(QP_n); 263 TMPMEMFREE(RN_m); 264 TMPMEMFREE(RP_m); 265 Paso_Coupler_free(QN_n_coupler); 266 Paso_Coupler_free(QP_n_coupler); 267 Paso_Coupler_free(RN_m_coupler); 268 Paso_Coupler_free(RP_m_coupler); 269 Paso_Coupler_free(uTilde_n_coupler); 270 Paso_Coupler_free(u_m_coupler); 271 options->absolute_tolerance=atol; 272 options->tolerance=rtol; 273 } 274 } 275 double Paso_FCTransportProblem_getSafeTimeStepSize(Paso_FCTransportProblem* fctp) 276 { 277 dim_t i, n; 278 double dt_max, dt_max_loc; 279 register double l_ii,m; 280 n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix); 281 if ( ! fctp->valid_matrices) { 282 /* extract the row sum of the advective part */ 283 Paso_SystemMatrix_rowSum(fctp->mass_matrix,fctp->lumped_mass_matrix); 284 285 /* set low order transport operator */ 286 Paso_FCTransportProblem_setLowOrderOperator(fctp); 287 288 if (Paso_noError()) { 289 /* 290 * calculate time step size: 291 */ 292 dt_max=LARGE_POSITIVE_FLOAT; 293 #pragma omp parallel private(dt_max_loc) 294 { 295 dt_max_loc=LARGE_POSITIVE_FLOAT; 296 #pragma omp for schedule(static) private(i,l_ii,m) 297 for (i=0;imain_diagonal_low_order_transport_matrix[i]; 299 m=fctp->lumped_mass_matrix[i]; 300 if ( (l_ii<0 && m>0.) || (l_ii>0 && m<0) ) { 301 dt_max_loc=MIN(dt_max_loc,-m/l_ii); 302 } 303 } 304 #pragma omp critical 305 { 306 dt_max=MIN(dt_max,dt_max_loc); 307 } 308 } 309 #ifdef PASO_MPI 310 dt_max_loc = dt_max; 311 MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm); 312 #endif 313 if (dt_maxdt_factor; 314 if (dt_max <= 0.) { 315 Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive."); 316 } 317 fctp->dt_max=dt_max; 318 fctp->valid_matrices=Paso_noError(); 319 } 320 } 321 return fctp->dt_max; 322 } 323 void Paso_FCT_setUp(Paso_FCTransportProblem* fctp, const double dt, const double *sourceN, const double *sourceP, double* b, double* uTilde, 324 Paso_Coupler* uTilde_coupler, double *QN, Paso_Coupler* QN_coupler, double *QP, Paso_Coupler* QP_coupler, 325 Paso_Options* options, Paso_Performance* pp) 326 { 327 dim_t i; 328 const dim_t n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix); 329 double omega, factor; 330 register double m, u_tilde_i, rtmp4; 331 /* distribute u */ 332 Paso_Coupler_startCollect(fctp->u_coupler,fctp->u); 333 Paso_Coupler_finishCollect(fctp->u_coupler); 334 /* 335 * b^n[i]=m u^n[i] + dt*(1-theta) sum_{j <> i} l_{ij}*(u^n[j]-u^n[i]) + dt*sourceP[i] 336 * 337 * note that iteration_matrix stores the negative values of the 338 * low order transport matrix l therefore a=-dt*(1-fctp->theta) is used. 339 * 340 */ 341 Paso_SolverFCT_setMuPaLuPbQ(b,fctp->lumped_mass_matrix,fctp->u_coupler, 342 -dt*(1-fctp->theta),fctp->iteration_matrix,dt,sourceP); 343 /* 344 * uTilde[i]=b[i]/m[i] 345 * 346 * fctp->iteration_matrix[i,i]=m[i]/(dt theta) + \frac{1}{\theta} \frac{q^-[i]}-l[i,i] 347 * 348 */ 349 if (fctp->theta > 0) { 350 Paso_solve_free(fctp->iteration_matrix); 351 omega=1./(dt*fctp->theta); 352 factor=dt*omega; 353 #pragma omp parallel for private(i,m,u_tilde_i,rtmp4) 354 for (i = 0; i < n; ++i) { 355 m=fctp->lumped_mass_matrix[i]; 356 if (ABS(m)>0.) { 357 u_tilde_i=b[i]/m; 358 } else { 359 u_tilde_i=fctp->u[i]; 360 } 361 rtmp4=m*omega-fctp->main_diagonal_low_order_transport_matrix[i]; 362 if (ABS(u_tilde_i)>0) rtmp4+=sourceN[i]*factor/u_tilde_i; 363 fctp->iteration_matrix->mainBlock->val[fctp->main_iptr[i]]=rtmp4; 364 uTilde[i]=u_tilde_i; 365 } 366 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT); 367 Paso_Solver_setPreconditioner(fctp->iteration_matrix,options); 368 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT); 369 } else { 370 #pragma omp parallel for private(i,m,u_tilde_i) 371 for (i = 0; i < n; ++i) { 372 m=fctp->lumped_mass_matrix[i]; 373 if (ABS(m)>0.) { 374 u_tilde_i=b[i]/m; 375 } else { 376 u_tilde_i=fctp->u[i]; 377 } 378 uTilde[i]=u_tilde_i; 379 } 380 /* no update of iteration_matrix required! */ 381 } /* end (fctp->theta > 0) */ 382 383 /* distribute uTilde: */ 384 Paso_Coupler_startCollect(uTilde_coupler,uTilde); 385 Paso_Coupler_finishCollect(uTilde_coupler); 386 /* 387 * calculate QP[i] max_{j} (\tilde{u}[j]- \tilde{u}[i] ) 388 * QN[i] min_{j} (\tilde{u}[j]- \tilde{u}[i] ) 389 * 390 */ 391 Paso_SolverFCT_setQs(uTilde_coupler,QN,QP,fctp->iteration_matrix); 392 Paso_Coupler_startCollect(QN_coupler,QN); 393 Paso_Coupler_startCollect(QP_coupler,QP); 394 Paso_Coupler_finishCollect(QN_coupler); 395 Paso_Coupler_finishCollect(QP_coupler); 396 }