# Contents of /trunk/paso/src/SolverFCT_solve.c

Revision 1407 - (show annotations)
Mon Feb 4 06:45:48 2008 UTC (12 years ago) by gross
File MIME type: text/plain
File size: 14676 byte(s)
new upwinding algorithm (still fails)

 1 /* $Id:$ */ 2 3 /******************************************************* 4 * 5 * Copyright 2007,2008 by University of Queensland 6 * 7 * http://esscc.uq.edu_m.au 8 * Primary Business: Queensland, Australia 9 * Licensed under the Open Software License version 3.0 10 * http://www.opensource.org/licenses/osl-3.0.php 11 * 12 *******************************************************/ 13 14 /**************************************************************/ 15 16 /* Paso: Flux correction transport solver 17 * 18 * solves Mu_t=Ku+q 19 * u(0) >= u_min 20 * 21 * Warning: the program assums sum_{j} k_{ij}=0!!! 22 * 23 */ 24 /**************************************************************/ 25 26 /* Author: l.gross@uq.edu_m.au */ 27 28 /**************************************************************/ 29 30 #include "Paso.h" 31 #include "Solver.h" 32 #include "SolverFCT.h" 33 #include "escript/blocktimer.h" 34 #ifdef _OPENMP 35 #include 36 #endif 37 #ifdef PASO_MPI 38 #include 39 #endif 40 41 double Paso_FCTransportProblem_getSafeTimeStepSize(Paso_FCTransportProblem* fctp) 42 { 43 dim_t i, n_rows; 44 double dt_max, dt_max_loc; 45 register double rtmp1,rtmp2; 46 n_rows=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix); 47 if (! fctp->valid_matrices) { 48 fctp->dt_max=LARGE_POSITIVE_FLOAT; 49 50 51 /* extract the row sum of the advective part */ 52 Paso_SystemMatrix_rowSum(fctp->mass_matrix,fctp->lumped_mass_matrix); 53 54 /* set low order transport operator */ 55 Paso_FCTransportProblem_setLowOrderOperator(fctp); 56 /* 57 * calculate time step size: 58 */ 59 dt_max=LARGE_POSITIVE_FLOAT; 60 if (fctp->theta < 1.) { 61 #pragma omp parallel private(dt_max_loc) 62 { 63 dt_max_loc=LARGE_POSITIVE_FLOAT; 64 #pragma omp for schedule(static) private(i,rtmp1,rtmp2) 65 for (i=0;imain_diagonal_low_order_transport_matrix[i]; 67 rtmp2=fctp->lumped_mass_matrix[i]; 68 if ( (rtmp1<0 && rtmp2>=0.) || (rtmp1>0 && rtmp2<=0.) ) { 69 dt_max_loc=MIN(dt_max_loc,-rtmp2/rtmp1); 70 } 71 } 72 #pragma omp critical 73 { 74 dt_max=MIN(dt_max,dt_max_loc); 75 } 76 } 77 #ifdef PASO_MPI 78 dt_max_loc = dt_max; 79 MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm); 80 #endif 81 if (dt_maxtheta); 82 } 83 if (dt_max <= 0.) { 84 Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive."); 85 } else { 86 if (dt_maxtheta); 87 } 88 fctp->dt_max=dt_max; 89 fctp->valid_matrices=Paso_noError(); 90 } 91 return fctp->dt_max; 92 } 93 94 95 96 97 void Paso_SolverFCT_solve(Paso_FCTransportProblem* fctp, double* u, double dt, double* source, Paso_Options* options) { 98 99 index_t i, j; 100 int n_substeps,n, m; 101 double dt_max, omega, dt2,t; 102 double local_norm[2],norm[2],local_norm_u,local_norm_du,norm_u,norm_du, tolerance; 103 register double rtmp1,rtmp2,rtmp3,rtmp4, rtmp; 104 double *b_n=NULL, *sourceP=NULL, *sourceN=NULL, *uTilde_n=NULL, *QN_n=NULL, *QP_n=NULL, *RN_m=NULL, *RP_m=NULL, *z_m=NULL, *du_m=NULL; 105 Paso_SystemMatrix *flux_matrix=NULL; 106 dim_t n_rows=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix); 107 bool_t converged; 108 if (dt<=0.) { 109 Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive."); 110 } 111 dt_max=Paso_FCTransportProblem_getSafeTimeStepSize(fctp); 112 /* 113 * allocate memory 114 * 115 */ 116 Paso_SystemMatrix_allocBuffer(fctp->iteration_matrix); 117 b_n=TMPMEMALLOC(n_rows,double); 118 Paso_checkPtr(b_n); 119 sourceP=TMPMEMALLOC(n_rows,double); 120 Paso_checkPtr(sourceP); 121 sourceN=TMPMEMALLOC(n_rows,double); 122 Paso_checkPtr(sourceN); 123 uTilde_n=TMPMEMALLOC(n_rows,double); 124 Paso_checkPtr(uTilde_n); 125 QN_n=TMPMEMALLOC(n_rows,double); 126 Paso_checkPtr(QN_n); 127 QP_n=TMPMEMALLOC(n_rows,double); 128 Paso_checkPtr(QP_n); 129 RN_m=TMPMEMALLOC(n_rows,double); 130 Paso_checkPtr(RN_m); 131 RP_m=TMPMEMALLOC(n_rows,double); 132 Paso_checkPtr(RP_m); 133 z_m=TMPMEMALLOC(n_rows,double); 134 Paso_checkPtr(z_m); 135 du_m=TMPMEMALLOC(n_rows,double); 136 Paso_checkPtr(du_m); 137 flux_matrix=Paso_SystemMatrix_alloc(fctp->transport_matrix->type, 138 fctp->transport_matrix->pattern, 139 fctp->transport_matrix->row_block_size, 140 fctp->transport_matrix->col_block_size); 141 if (Paso_noError()) { 142 Paso_SystemMatrix_allocBuffer(flux_matrix); 143 Paso_SystemMatrix_allocBuffer(fctp->iteration_matrix); 144 /* 145 * Preparation: 146 * 147 */ 148 149 /* decide on substepping */ 150 if (fctp->dt_max < LARGE_POSITIVE_FLOAT) { 151 n_substeps=ceil(dt/dt_max); 152 } else { 153 n_substeps=1; 154 } 155 dt2=dt/n_substeps; 156 printf("%d time steps of size is %e (theta = %e, dt_max=%e).\n",n_substeps, dt2,fctp->theta, dt_max); 157 158 /* 159 * seperate source into positive and negative part: 160 */ 161 printf("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\n"); 162 #pragma omp parallel for private(i,rtmp) 163 for (i = 0; i < n_rows; ++i) { 164 rtmp=source[i]; 165 if (rtmp <0) { 166 sourceP[i]=-rtmp; 167 } else { 168 sourceN[i]= rtmp; 169 } 170 } 171 printf("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\n"); 172 /* 173 * now the show can begin: 174 * 175 */ 176 #pragma omp parallel for schedule(static) private(i) 177 for (i=0;iu[i]-fctp->u_min; 178 printf("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\n"); 179 180 t=dt2; 181 n=0; 182 tolerance=options->tolerance; 183 while(n i} l_{ij}*(u^n[j]-u^n[i]) + dt*sourceP[i] 188 * 189 * note that iteration_matrix stores the negative values of the 190 * low order transport matrix l therefore a=-dt*(1-fctp->theta) is used. 191 * 192 */ 193 Paso_SolverFCT_setMuPaLuPbQ(b_n,fctp->lumped_mass_matrix,u, 194 -dt*(1-fctp->theta),fctp->iteration_matrix,dt,sourceP); 195 /* 196 * uTilde_n[i]=b[i]/m[i] 197 * 198 * a[i,i]=m[i]/(dt theta) + \frac{1}{\theta} \frac{q^-[i]}-l[i,i] 199 * 200 */ 201 printf("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF\n"); 202 if (fctp->theta > 0) { 203 Paso_solve_free(fctp->iteration_matrix); 204 rtmp1=1./(dt*fctp->theta); 205 rtmp2=1./fctp->theta; 206 #pragma omp parallel for private(i,rtmp,rtmp3,rtmp4) 207 for (i = 0; i < n_rows; ++i) { 208 rtmp=fctp->lumped_mass_matrix[i]; 209 if (ABS(rtmp)>0.) { 210 rtmp3=b_n[i]/rtmp; 211 } else { 212 rtmp3=u[i]; 213 } 214 rtmp4=rtmp*rtmp1-fctp->main_diagonal_low_order_transport_matrix[i]; 215 if (ABS(rtmp3)>0) rtmp4+=sourceN[i]*rtmp2/rtmp3; 216 fctp->iteration_matrix->mainBlock->val[fctp->main_iptr[i]]=rtmp4; 217 uTilde_n[i]=rtmp3; 218 } 219 } else { 220 #pragma omp parallel for private(i,rtmp,rtmp3) 221 for (i = 0; i < n_rows; ++i) { 222 rtmp=fctp->lumped_mass_matrix[i]; 223 if (ABS(rtmp)>0.) { 224 rtmp3=b_n[i]/rtmp; 225 } else { 226 rtmp3=u[i]; 227 } 228 uTilde_n[i]=rtmp3; 229 } 230 /* no update of iteration_matrix retquired! */ 231 } /* end (fctp->theta > 0) */ 232 /* 233 * calculate QP_n[i] max_{j} (\tilde{u}[j]- \tilde{u}[i] ) 234 * QN_n[i] min_{j} (\tilde{u}[j]- \tilde{u}[i] ) 235 * 236 */ 237 Paso_SolverFCT_setQs(uTilde_n,QN_n,QP_n,fctp->iteration_matrix); 238 /* 239 * now we enter the mation on a time-step: 240 * 241 */ 242 m=0; 243 converged=FALSE; 244 while ( (!converged) && (m<50) && Paso_noError()) { 245 printf("iteration step %d\n",m+1); 246 /* 247 * set the ant diffusion fluxes: 248 * 249 * initially we set f_{ij} = - dt d_{ij} (u[j]-u[i]) 250 * and then f_{ij} += omega (m_{ij} - dt (1-theta) d_{ij}) (du[j]-du[i]) 251 */ 252 if (m==0) { 253 Paso_SystemMatrix_setValues(flux_matrix,0.); 254 Paso_FCTransportProblem_updateAntiDiffusionFlux(fctp,flux_matrix,0.,-dt,u); 255 } else { 256 Paso_FCTransportProblem_updateAntiDiffusionFlux(fctp,flux_matrix, 257 omega,-omega*dt*(1.-fctp->theta),du_m); 258 } 259 /* 260 * apply pre flux-correction: f_{ij}:=0 if f_{ij}*(\tilde{u}[i]- \tilde{u}[j])<=0 261 * 262 * this is not entirely correct!!!!! 263 * 264 */ 265 Paso_FCTransportProblem_applyPreAntiDiffusionCorrection(flux_matrix,uTilde_n); 266 /* 267 * set flux limms RN_m,RP_m 268 * 269 */ 270 Paso_FCTransportProblem_setRs(flux_matrix,fctp->lumped_mass_matrix,QN_n,QP_n,RN_m,RP_m); 271 /* 272 * z_m[i]=m_i*u[i] - dt*theta*sum_{j<>i} l_{ij} (u[j]-u[i]) + dt q^-[i] 273 * 274 * note that iteration_matrix stores the negative values of the 275 * low order transport matrix l therefore a=dt*fctp->theta is used. 276 */ 277 Paso_SolverFCT_setMuPaLuPbQ(z_m,fctp->lumped_mass_matrix,u, 278 dt*fctp->theta,fctp->iteration_matrix,dt,sourceN); 279 280 /* add corrected fluxes into z_m */ 281 Paso_FCTransportProblem_addCorrectedFluxes(z_m,flux_matrix,RN_m,RP_m); 282 /* 283 * now we solve the linear system to get the correction dt: 284 * 285 */ 286 if (fctp->theta > 0) { 287 /* set the right hand side of the linear system: */ 288 #pragma omp parallel for private(i) 289 for (i = 0; i < n_rows; ++i) { 290 z_m[i]=b_n[i]-z_m[i]; 291 } 292 options->tolerance=1.e-3; 293 Paso_solve(fctp->iteration_matrix,du_m,z_m,options); 294 /* TODO: check errors ! */ 295 omega=dt*fctp->theta; 296 } else { 297 #pragma omp parallel for private(i,rtmp,rtmp1) 298 for (i = 0; i < n_rows; ++i) { 299 rtmp=fctp->lumped_mass_matrix[i]; 300 if (ABS(rtmp)>0.) { 301 rtmp1=(b_n[i]-z_m[i])/rtmp; 302 } else { 303 rtmp1=0; 304 } 305 du_m[i]=rtmp1; 306 } 307 omega=1.; 308 } 309 /* 310 * update u and calculate norm of du_m and the new u: 311 * 312 */ 313 norm_u=0.; 314 norm_du=0.; 315 #pragma omp parallel private(local_norm_u,local_norm_du) 316 { 317 local_norm_u=0.; 318 local_norm_du=0.; 319 #pragma omp for schedule(static) private(i) 320 for (i=0;impi_info->comm); 335 norm_u=norm[0]; 336 norm_du=norm[1]; 337 #endif 338 norm_du*=omega; 339 converged=(norm_du <= tolerance * norm_u); 340 m++; 341 printf("iteration step %d: norm u and du_m : %e %e\n",m,norm_u,norm_du); 342 /* TODO: check if du_m has been redu_mced */ 343 } /* end of inner mation */ 344 } /* end of time integration */ 345 #pragma omp parallel for schedule(static) private(i) 346 for (i=0;iu[i]=u[i]+fctp->u_min; 347 /* TODO: update u_min */ 348 349 } 350 /* 351 * clean-up: 352 * 353 */ 354 TMPMEMFREE(b_n); 355 Paso_SystemMatrix_free(flux_matrix); 356 TMPMEMFREE(sourceP); 357 TMPMEMFREE(sourceN); 358 TMPMEMFREE(uTilde_n); 359 TMPMEMFREE(QN_n); 360 TMPMEMFREE(QP_n); 361 TMPMEMFREE(RN_m); 362 TMPMEMFREE(RP_m); 363 TMPMEMFREE(z_m); 364 TMPMEMFREE(du_m); 365 }