# Contents of /trunk/paso/src/SolverFCT_solve.c

Revision 1562 - (show annotations)
Wed May 21 13:04:40 2008 UTC (11 years, 9 months ago) by gross
File MIME type: text/plain
File size: 15787 byte(s)
The algebraic upwinding with MPI. The case of boundary constraint needs still some attention.


 1 /* $Id:$ */ 2 3 /******************************************************* 4 * 5 * Copyright 2007,2008 by University of Queensland 6 * 7 * http://esscc.uq.edu_m.au 8 * Primary Business: Queensland, Australia 9 * Licensed under the Open Software License version 3.0 10 * http://www.opensource.org/licenses/osl-3.0.php 11 * 12 *******************************************************/ 13 14 /**************************************************************/ 15 16 /* Paso: Flux correction transport solver 17 * 18 * solves Mu_t=Ku+q 19 * u(0) >= u_min 20 * 21 * Warning: the program assums sum_{j} k_{ij}=0!!! 22 * 23 */ 24 /**************************************************************/ 25 26 /* Author: l.gross@uq.edu_m.au */ 27 28 /**************************************************************/ 29 30 #include "Paso.h" 31 #include "Solver.h" 32 #include "SolverFCT.h" 33 #include "escript/blocktimer.h" 34 #ifdef _OPENMP 35 #include 36 #endif 37 #ifdef PASO_MPI 38 #include 39 #endif 40 41 double Paso_FCTransportProblem_getSafeTimeStepSize(Paso_FCTransportProblem* fctp) 42 { 43 dim_t i, n_rows; 44 double dt_max, dt_max_loc; 45 register double rtmp1,rtmp2; 46 n_rows=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix); 47 if (! fctp->valid_matrices) { 48 fctp->dt_max=LARGE_POSITIVE_FLOAT; 49 /* extract the row sum of the advective part */ 50 Paso_SystemMatrix_rowSum(fctp->mass_matrix,fctp->lumped_mass_matrix); 51 52 /* set low order transport operator */ 53 Paso_FCTransportProblem_setLowOrderOperator(fctp); 54 /* 55 * calculate time step size: 56 */ 57 dt_max=LARGE_POSITIVE_FLOAT; 58 if (fctp->theta < 1.) { 59 #pragma omp parallel private(dt_max_loc) 60 { 61 dt_max_loc=LARGE_POSITIVE_FLOAT; 62 #pragma omp for schedule(static) private(i,rtmp1,rtmp2) 63 for (i=0;imain_diagonal_low_order_transport_matrix[i]; 65 rtmp2=fctp->lumped_mass_matrix[i]; 66 if ( (rtmp1<0 && rtmp2>0.) || (rtmp1>0 && rtmp2<0.) ) { 67 dt_max_loc=MIN(dt_max_loc,-rtmp2/rtmp1); 68 } 69 } 70 #pragma omp critical 71 { 72 dt_max=MIN(dt_max,dt_max_loc); 73 } 74 } 75 #ifdef PASO_MPI 76 dt_max_loc = dt_max; 77 MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm); 78 #endif 79 if (dt_maxtheta); 80 } 81 if (dt_max <= 0.) { 82 Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive."); 83 } else { 84 if (dt_maxtheta); 85 } 86 fctp->dt_max=dt_max; 87 fctp->valid_matrices=Paso_noError(); 88 } 89 return fctp->dt_max; 90 } 91 92 93 94 95 void Paso_SolverFCT_solve(Paso_FCTransportProblem* fctp, double* u, double dt, double* source, Paso_Options* options) { 96 index_t i, j; 97 int n_substeps,n, m; 98 double dt_max, omega, dt2,t; 99 double local_norm[2],norm[2],local_norm_u,local_norm_du,norm_u,norm_du, tolerance; 100 register double rtmp1,rtmp2,rtmp3,rtmp4, rtmp; 101 double *b_n=NULL, *sourceP=NULL, *sourceN=NULL, *uTilde_n=NULL, *QN_n=NULL, *QP_n=NULL, *RN_m=NULL, *RP_m=NULL, *z_m=NULL, *du_m=NULL,*u_m=NULL; 102 Paso_Coupler* QN_n_coupler=NULL, *QP_n_coupler=NULL, *uTilde_n_coupler=NULL, *RN_m_coupler=NULL, *RP_m_coupler=NULL, *u_m_coupler=NULL; 103 Paso_SystemMatrix *flux_matrix=NULL; 104 dim_t n_rows=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix); 105 bool_t converged; 106 dim_t max_m=500; 107 double inner_tol=1.e-2; 108 dim_t blockSize=Paso_FCTransportProblem_getBlockSize(fctp); 109 if (dt<=0.) { 110 Paso_setError(TYPE_ERROR,"Paso_SolverFCT_solve: dt must be positive."); 111 } 112 dt_max=Paso_FCTransportProblem_getSafeTimeStepSize(fctp); 113 /* 114 * allocate memory 115 * 116 */ 117 u_m=TMPMEMALLOC(n_rows,double); 118 Paso_checkPtr(u_m); 119 b_n=TMPMEMALLOC(n_rows,double); 120 Paso_checkPtr(b_n); 121 sourceP=TMPMEMALLOC(n_rows,double); 122 Paso_checkPtr(sourceP); 123 sourceN=TMPMEMALLOC(n_rows,double); 124 Paso_checkPtr(sourceN); 125 uTilde_n=TMPMEMALLOC(n_rows,double); 126 Paso_checkPtr(uTilde_n); 127 QN_n=TMPMEMALLOC(n_rows,double); 128 Paso_checkPtr(QN_n); 129 QP_n=TMPMEMALLOC(n_rows,double); 130 Paso_checkPtr(QP_n); 131 RN_m=TMPMEMALLOC(n_rows,double); 132 Paso_checkPtr(RN_m); 133 RP_m=TMPMEMALLOC(n_rows,double); 134 Paso_checkPtr(RP_m); 135 z_m=TMPMEMALLOC(n_rows,double); 136 Paso_checkPtr(z_m); 137 du_m=TMPMEMALLOC(n_rows,double); 138 Paso_checkPtr(du_m); 139 QN_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); 140 QP_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); 141 RN_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); 142 RP_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); 143 uTilde_n_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); 144 u_m_coupler=Paso_Coupler_alloc(Paso_FCTransportProblem_borrowConnector(fctp),blockSize); 145 flux_matrix=Paso_SystemMatrix_alloc(fctp->transport_matrix->type, 146 fctp->transport_matrix->pattern, 147 fctp->transport_matrix->row_block_size, 148 fctp->transport_matrix->col_block_size); 149 if (Paso_noError()) { 150 /* 151 * Preparation: 152 * 153 */ 154 155 /* decide on substepping */ 156 if (fctp->dt_max < LARGE_POSITIVE_FLOAT) { 157 n_substeps=ceil(dt/dt_max); 158 } else { 159 n_substeps=1; 160 } 161 dt2=dt/n_substeps; 162 printf("%d time steps of size is %e (theta = %e, dt_max=%e).\n",n_substeps, dt2,fctp->theta, dt_max); 163 /* 164 * seperate source into positive and negative part: 165 */ 166 #pragma omp parallel for private(i,rtmp) 167 for (i = 0; i < n_rows; ++i) { 168 rtmp=source[i]; 169 if (rtmp <0) { 170 sourceN[i]=-rtmp; 171 sourceP[i]=0; 172 } else { 173 sourceN[i]= 0; 174 sourceP[i]= rtmp; 175 } 176 } 177 u_m_coupler->data=u_m; 178 /* for (i = 0; i < n_rows; ++i) printf("%d : %e \n",i,source[i],sourceP[i],sourceN[i]) */ 179 /* 180 * let the show begin!!!! 181 * 182 */ 183 t=dt2; 184 n=0; 185 tolerance=options->tolerance; 186 while(n i} l_{ij}*(u^n[j]-u^n[i]) + dt2*sourceP[i] 191 * 192 * note that iteration_matrix stores the negative values of the 193 * low order transport matrix l therefore a=-dt2*(1-fctp->theta) is used. 194 * 195 */ 196 Paso_SolverFCT_setMuPaLuPbQ(b_n,fctp->lumped_mass_matrix,fctp->u_coupler, 197 -dt2*(1-fctp->theta),fctp->iteration_matrix,dt2,sourceP); 198 /* 199 * uTilde_n[i]=b[i]/m[i] 200 * 201 * fctp->iteration_matrix[i,i]=m[i]/(dt2 theta) + \frac{1}{\theta} \frac{q^-[i]}-l[i,i] 202 * 203 */ 204 if (fctp->theta > 0) { 205 Paso_solve_free(fctp->iteration_matrix); 206 omega=1./(dt2*fctp->theta); 207 rtmp2=dt2*omega; 208 #pragma omp parallel for private(i,rtmp,rtmp3,rtmp4) 209 for (i = 0; i < n_rows; ++i) { 210 rtmp=fctp->lumped_mass_matrix[i]; 211 if (ABS(rtmp)>0.) { 212 rtmp3=b_n[i]/rtmp; 213 } else { 214 rtmp3=fctp->u[i]; 215 } 216 rtmp4=rtmp*omega-fctp->main_diagonal_low_order_transport_matrix[i]; 217 if (ABS(rtmp3)>0) rtmp4+=sourceN[i]*rtmp2/rtmp3; 218 fctp->iteration_matrix->mainBlock->val[fctp->main_iptr[i]]=rtmp4; 219 uTilde_n[i]=rtmp3; 220 } 221 } else { 222 #pragma omp parallel for private(i,rtmp,rtmp3) 223 for (i = 0; i < n_rows; ++i) { 224 rtmp=fctp->lumped_mass_matrix[i]; 225 if (ABS(rtmp)>0.) { 226 rtmp3=b_n[i]/rtmp; 227 } else { 228 rtmp3=fctp->u[i]; 229 } 230 uTilde_n[i]=rtmp3; 231 } 232 omega=1.; 233 /* no update of iteration_matrix required! */ 234 } /* end (fctp->theta > 0) */ 235 236 /* distribute uTilde_n: */ 237 Paso_Coupler_startCollect(uTilde_n_coupler,uTilde_n); 238 Paso_Coupler_finishCollect(uTilde_n_coupler); 239 /* 240 * calculate QP_n[i] max_{j} (\tilde{u}[j]- \tilde{u}[i] ) 241 * QN_n[i] min_{j} (\tilde{u}[j]- \tilde{u}[i] ) 242 * 243 */ 244 Paso_SolverFCT_setQs(uTilde_n_coupler,QN_n,QP_n,fctp->iteration_matrix); 245 Paso_Coupler_startCollect(QN_n_coupler,QN_n); 246 Paso_Coupler_startCollect(QP_n_coupler,QP_n); 247 Paso_Coupler_finishCollect(QN_n_coupler); 248 Paso_Coupler_finishCollect(QP_n_coupler); 249 /* 250 * now we enter the iteration on a time-step: 251 * 252 */ 253 Paso_Coupler_copyAll(fctp->u_coupler, u_m_coupler); 254 /* REPLACE BY JACOBEAN FREE NEWTON */ 255 m=0; 256 converged=FALSE; 257 while ( (!converged) && (mlumped_mass_matrix,QN_n_coupler,QP_n_coupler,RN_m,RP_m); 274 Paso_Coupler_startCollect(RN_m_coupler,RN_m); 275 Paso_Coupler_startCollect(RP_m_coupler,RP_m); 276 /* 277 * z_m[i]=b_n[i] - (m_i*u[i] - dt2*theta*sum_{j<>i} l_{ij} (u[j]-u[i]) + dt2 q^-[i]) 278 * 279 * note that iteration_matrix stores the negative values of the 280 * low order transport matrix l therefore a=dt2*fctp->theta is used. 281 */ 282 283 Paso_SolverFCT_setMuPaLuPbQ(z_m,fctp->lumped_mass_matrix, u_m_coupler, 284 dt2*fctp->theta,fctp->iteration_matrix,dt2,sourceN); 285 /* for (i = 0; i < n_rows; ++i) { 286 * printf("%d: %e %e\n",i,z_m[i], b_n[i]); 287 * } 288 */ 289 #pragma omp parallel for private(i) 290 for (i = 0; i < n_rows; ++i) z_m[i]=b_n[i]-z_m[i]; 291 292 Paso_Coupler_finishCollect(RN_m_coupler); 293 Paso_Coupler_finishCollect(RP_m_coupler); 294 /* add corrected fluxes into z_m */ 295 Paso_FCTransportProblem_addCorrectedFluxes(z_m,flux_matrix,RN_m_coupler,RP_m_coupler); 296 /* 297 * now we solve the linear system to get the correction dt2: 298 * 299 */ 300 if (fctp->theta > 0) { 301 /* set the right hand side of the linear system: */ 302 options->tolerance=inner_tol; 303 options->verbose=TRUE; 304 Paso_solve(fctp->iteration_matrix,du_m,z_m,options); 305 /* TODO: check errors ! */ 306 } else { 307 #pragma omp parallel for private(i,rtmp,rtmp1) 308 for (i = 0; i < n_rows; ++i) { 309 rtmp=fctp->lumped_mass_matrix[i]; 310 if (ABS(rtmp)>0.) { 311 rtmp1=z_m[i]/rtmp; 312 } else { 313 rtmp1=0; 314 } 315 du_m[i]=rtmp1; 316 } 317 } 318 /* 319 * update u and calculate norm of du_m and the new u: 320 * 321 */ 322 norm_u=0.; 323 norm_du=0.; 324 #pragma omp parallel private(local_norm_u,local_norm_du) 325 { 326 local_norm_u=0.; 327 local_norm_du=0.; 328 #pragma omp for schedule(static) private(i) 329 for (i=0;impi_info->comm); 345 norm_u=norm[0]; 346 norm_du=norm[1]; 347 #endif 348 norm_du*=omega; 349 converged=(norm_du <= tolerance * norm_u); 350 m++; 351 printf("iteration step %d: norm u and du_m : %e %e\n",m,norm_u,norm_du); 352 /* TODO: check if du_m has been redu_mced */ 353 Paso_Coupler_finishCollect(u_m_coupler); 354 } /* end of inner iteration */ 355 SWAP(fctp->u,u_m,double*); 356 SWAP(fctp->u_coupler,u_m_coupler,Paso_Coupler*); 357 n++; 358 } /* end of time integration */ 359 options->tolerance=tolerance; 360 #pragma omp parallel for schedule(static) private(i) 361 for (i=0;iu[i]+fctp->u_min; 362 } 363 /* 364 * clean-up: 365 * 366 */ 367 TMPMEMFREE(b_n); 368 Paso_SystemMatrix_free(flux_matrix); 369 TMPMEMFREE(sourceP); 370 TMPMEMFREE(sourceN); 371 TMPMEMFREE(uTilde_n); 372 TMPMEMFREE(QN_n); 373 TMPMEMFREE(QP_n); 374 TMPMEMFREE(RN_m); 375 TMPMEMFREE(RP_m); 376 TMPMEMFREE(z_m); 377 TMPMEMFREE(du_m); 378 TMPMEMFREE(u_m); 379 Paso_Coupler_free(QN_n_coupler); 380 Paso_Coupler_free(QP_n_coupler); 381 Paso_Coupler_free(RN_m_coupler); 382 Paso_Coupler_free(RP_m_coupler); 383 Paso_Coupler_free(uTilde_n_coupler); 384 Paso_Coupler_free(u_m_coupler); 385 }