# Contents of /trunk/paso/src/SolverFCT_FluxControl.c

Revision 2551 - (show annotations)
Thu Jul 23 09:19:15 2009 UTC (10 years, 1 month ago) by gross
File MIME type: text/plain
File size: 18685 byte(s)
a problem with the sparse matrix unrolling fixed.

 1 2 /******************************************************* 3 * 4 * Copyright (c) 2003-2009 by University of Queensland 5 * Earth Systems Science Computational Center (ESSCC) 6 * http://www.uq.edu.au/esscc 7 * 8 * Primary Business: Queensland, Australia 9 * Licensed under the Open Software License version 3.0 10 * http://www.opensource.org/licenses/osl-3.0.php 11 * 12 *******************************************************/ 13 14 15 /**************************************************************/ 16 17 /* Paso: FluxControl */ 18 19 /**************************************************************/ 20 21 /* Author: l.gross@uq.edu.au */ 22 23 /**************************************************************/ 24 25 26 #include "Paso.h" 27 #include "SolverFCT.h" 28 #include "PasoUtil.h" 29 30 31 /**************************************************************/ 32 33 /* create the low order transport matrix and stores it negative values 34 * into the iteration_matrix accept the main diagonal which is stored seperately 35 * if fc->iteration_matrix==NULL, fc->iteration_matrix is allocated 36 * 37 * a=transport_matrix 38 * b= low_order_transport_matrix = - iteration_matrix 39 * c=main diagonal low_order_transport_matrix 40 * initialize c[i] mit a[i,i] 41 * d_ij=max(0,-a[i,j],-a[j,i]) 42 * b[i,j]=-(a[i,j]+d_ij) 43 * c[i]-=d_ij 44 */ 45 46 void Paso_FCTransportProblem_setLowOrderOperator(Paso_FCTransportProblem * fc) { 47 dim_t n=Paso_SystemMatrix_getTotalNumRows(fc->transport_matrix),i; 48 index_t iptr_ij,j,iptr_ji; 49 Paso_SystemMatrixPattern *pattern; 50 register double d_ij, sum, rtmp1, rtmp2; 51 52 if (fc==NULL) return; 53 if (fc->iteration_matrix==NULL) { 54 fc->iteration_matrix=Paso_SystemMatrix_alloc(fc->transport_matrix->type, 55 fc->transport_matrix->pattern, 56 fc->transport_matrix->row_block_size, 57 fc->transport_matrix->col_block_size, TRUE); 58 } 59 60 if (Paso_noError()) { 61 pattern=fc->iteration_matrix->pattern; 62 n=Paso_SystemMatrix_getTotalNumRows(fc->iteration_matrix); 63 #pragma omp parallel for private(i,sum,iptr_ij,j,iptr_ji,rtmp1, rtmp2,d_ij) schedule(static) 64 for (i = 0; i < n; ++i) { 65 sum=fc->transport_matrix->mainBlock->val[fc->main_iptr[i]]; 66 /* look at a[i,j] */ 67 #pragma ivdep 68 for (iptr_ij=pattern->mainPattern->ptr[i];iptr_ijmainPattern->ptr[i+1]; ++iptr_ij) { 69 j=pattern->mainPattern->index[iptr_ij]; 70 if (j!=i) { 71 /* find entry a[j,i] */ 72 for (iptr_ji=pattern->mainPattern->ptr[j]; iptr_jimainPattern->ptr[j+1]; ++iptr_ji) { 73 if (pattern->mainPattern->index[iptr_ji]==i) { 74 rtmp1=fc->transport_matrix->mainBlock->val[iptr_ij]; 75 rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji]; 76 d_ij=-MIN3(0.,rtmp1,rtmp2); 77 fc->iteration_matrix->mainBlock->val[iptr_ij]=-(rtmp1+d_ij); 78 sum-=d_ij; 79 break; 80 } 81 } 82 } 83 } 84 #pragma ivdep 85 for (iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ijcol_couplePattern->ptr[i+1]; ++iptr_ij) { 86 j=pattern->col_couplePattern->index[iptr_ij]; 87 /* find entry a[j,i] */ 88 for (iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_jirow_couplePattern->ptr[j+1]; ++iptr_ji) { 89 if (pattern->row_couplePattern->index[iptr_ji]==i) { 90 rtmp1=fc->transport_matrix->col_coupleBlock->val[iptr_ij]; 91 rtmp2=fc->transport_matrix->row_coupleBlock->val[iptr_ji]; 92 d_ij=-MIN3(0.,rtmp1,rtmp2); 93 fc->iteration_matrix->col_coupleBlock->val[iptr_ij]=-(rtmp1+d_ij); 94 fc->iteration_matrix->row_coupleBlock->val[iptr_ji]=-(rtmp2+d_ij); 95 sum-=d_ij; 96 break; 97 } 98 } 99 } 100 /* set main diagonal entry */ 101 fc->main_diagonal_low_order_transport_matrix[i]=sum; 102 } 103 } 104 } 105 /* 106 * out_i=m_i u_i + alpha \sum_{j <> i} l_{ij} (u_j-u_i) + beta q_i 107 * 108 */ 109 void Paso_SolverFCT_setMuPaLuPbQ(double* out, 110 const double* M, 111 const Paso_Coupler* u_coupler, 112 const double a, 113 const Paso_SystemMatrix *L, 114 const double b, 115 const double* Q) 116 { 117 dim_t n, i, j; 118 Paso_SystemMatrixPattern *pattern; 119 const double *u=Paso_Coupler_borrowLocalData(u_coupler); 120 const double *remote_u=Paso_Coupler_borrowRemoteData(u_coupler); 121 register double sum, u_i, l_ij; 122 register index_t iptr_ij; 123 124 n=Paso_SystemMatrix_getTotalNumRows(L); 125 126 #pragma omp parallel for private(i) schedule(static) 127 for (i = 0; i < n; ++i) { 128 out[i]=M[i]*u[i]+b*Q[i]; 129 } 130 if (ABS(a)>0) { 131 pattern=L->pattern; 132 #pragma omp parallel for schedule(static) private(i, sum, u_i, iptr_ij, j, l_ij) 133 for (i = 0; i < n; ++i) { 134 sum=0; 135 u_i=u[i]; 136 #pragma ivdep 137 for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ijmainPattern->ptr[i+1]; ++iptr_ij) { 138 j=pattern->mainPattern->index[iptr_ij]; 139 l_ij=L->mainBlock->val[iptr_ij]; 140 sum+=l_ij*(u[j]-u_i); 141 } 142 #pragma ivdep 143 for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ijcol_couplePattern->ptr[i+1]; ++iptr_ij) { 144 j=pattern->col_couplePattern->index[iptr_ij]; 145 l_ij=L->col_coupleBlock->val[iptr_ij]; 146 sum+=l_ij*(remote_u[j]-u_i); 147 } 148 out[i]+=a*sum; 149 } 150 } 151 } 152 /* 153 * calculate QP[i] max_{i in L->pattern[i]} (u[j]-u[i] ) 154 * QN[i] min_{i in L->pattern[i]} (u[j]-u[i] ) 155 * 156 */ 157 void Paso_SolverFCT_setQs(const Paso_Coupler* u_coupler,double* QN, double* QP, const Paso_SystemMatrix *L) 158 { 159 dim_t n, i, j; 160 Paso_SystemMatrixPattern *pattern; 161 const double *u=Paso_Coupler_borrowLocalData(u_coupler); 162 const double *remote_u=Paso_Coupler_borrowRemoteData(u_coupler); 163 register double u_i, u_min_i, u_max_i, u_j; 164 register index_t iptr_ij; 165 n=Paso_SystemMatrix_getTotalNumRows(L); 166 pattern=L->pattern; 167 #pragma omp parallel for schedule(static) private(i, u_i, u_min_i, u_max_i, j, u_j, iptr_ij) 168 for (i = 0; i < n; ++i) { 169 u_i=u[i]; 170 u_min_i=u_i; 171 u_max_i=u_i; 172 #pragma ivdep 173 for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ijmainPattern->ptr[i+1]; ++iptr_ij) { 174 j=pattern->mainPattern->index[iptr_ij]; 175 u_j=u[j]; 176 u_min_i=MIN(u_min_i,u_j); 177 u_max_i=MAX(u_max_i,u_j); 178 } 179 #pragma ivdep 180 for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ijcol_couplePattern->ptr[i+1]; ++iptr_ij) { 181 j=pattern->col_couplePattern->index[iptr_ij]; 182 u_j=remote_u[j]; 183 u_min_i=MIN(u_min_i,u_j); 184 u_max_i=MAX(u_max_i,u_j); 185 } 186 QN[i]=u_min_i-u_i; 187 QP[i]=u_max_i-u_i; 188 } 189 } 190 191 /* 192 * 193 * f_{ij} = (m_{ij} - dt (1-theta) d_{ij}) (u_last[j]-u_last[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i]) 194 * 195 * m=fc->mass matrix 196 * d=artifical diffusion matrix = L - K = - fc->iteration matrix - fc->transport matrix (away from main diagonal) 197 */ 198 void Paso_FCTransportProblem_setAntiDiffusionFlux(const double dt, const Paso_FCTransportProblem * fc, Paso_SystemMatrix *flux_matrix, const Paso_Coupler* u_coupler) 199 { 200 dim_t n, j, i; 201 index_t iptr_ij; 202 const double *u=Paso_Coupler_borrowLocalData(u_coupler); 203 const double *u_last= Paso_Coupler_borrowLocalData(fc->u_coupler); 204 const double *remote_u=Paso_Coupler_borrowRemoteData(u_coupler); 205 const double *remote_u_last=Paso_Coupler_borrowRemoteData(fc->u_coupler); 206 const double f1=- dt * (1.-fc->theta); 207 const double f2= dt * fc->theta; 208 register double m_ij, d_ij, u_i, u_last_i, d_u_last, d_u; 209 const Paso_SystemMatrixPattern *pattern=fc->iteration_matrix->pattern; 210 n=Paso_SystemMatrix_getTotalNumRows(fc->iteration_matrix); 211 if ( (ABS(f1) >0 ) ) { 212 if ( (ABS(f2) >0 ) ) { 213 #pragma omp parallel for schedule(static) private(i, u_i, u_last_i, iptr_ij, j,m_ij,d_ij, d_u_last, d_u) 214 for (i = 0; i < n; ++i) { 215 u_i=u[i]; 216 u_last_i=u_last[i]; 217 #pragma ivdep 218 for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ijmainPattern->ptr[i+1]; ++iptr_ij) { 219 j=pattern->mainPattern->index[iptr_ij]; 220 m_ij=fc->mass_matrix->mainBlock->val[iptr_ij]; 221 d_ij=-(fc->transport_matrix->mainBlock->val[iptr_ij]+ 222 fc->iteration_matrix->mainBlock->val[iptr_ij]); 223 d_u=u[j]-u_i; 224 d_u_last=u_last[j]-u_last_i; 225 flux_matrix->mainBlock->val[iptr_ij]=(m_ij+f1*d_ij)*d_u_last- (m_ij+f2*d_ij)*d_u; 226 } 227 #pragma ivdep 228 for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ijcol_couplePattern->ptr[i+1]; ++iptr_ij) { 229 j=pattern->col_couplePattern->index[iptr_ij]; 230 m_ij=fc->mass_matrix->col_coupleBlock->val[iptr_ij]; 231 d_ij=-(fc->transport_matrix->col_coupleBlock->val[iptr_ij]+ 232 fc->iteration_matrix->col_coupleBlock->val[iptr_ij]); 233 d_u=remote_u[j]-u_i; 234 d_u_last=remote_u_last[j]-u_last_i; 235 flux_matrix->col_coupleBlock->val[iptr_ij]=(m_ij+f1*d_ij)*d_u_last- (m_ij+f2*d_ij)*d_u; 236 } 237 } 238 } else { 239 #pragma omp parallel for schedule(static) private(i, u_i, u_last_i, iptr_ij, j,m_ij,d_ij, d_u_last, d_u) 240 for (i = 0; i < n; ++i) { 241 u_i=u[i]; 242 u_last_i=u_last[i]; 243 #pragma ivdep 244 for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ijmainPattern->ptr[i+1]; ++iptr_ij) { 245 j=pattern->mainPattern->index[iptr_ij]; 246 m_ij=fc->mass_matrix->mainBlock->val[iptr_ij]; 247 d_ij=-(fc->transport_matrix->mainBlock->val[iptr_ij]+ 248 fc->iteration_matrix->mainBlock->val[iptr_ij]); 249 d_u=u[j]-u_i; 250 d_u_last=u_last[j]-u_last_i; 251 flux_matrix->mainBlock->val[iptr_ij]=(m_ij+f1*d_ij)*d_u_last-m_ij*d_u; 252 } 253 #pragma ivdep 254 for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ijcol_couplePattern->ptr[i+1]; ++iptr_ij) { 255 j=pattern->col_couplePattern->index[iptr_ij]; 256 m_ij=fc->mass_matrix->col_coupleBlock->val[iptr_ij]; 257 d_ij=-(fc->transport_matrix->col_coupleBlock->val[iptr_ij]+ 258 fc->iteration_matrix->col_coupleBlock->val[iptr_ij]); 259 d_u=remote_u[j]-u_i; 260 d_u_last=remote_u_last[j]-u_last_i; 261 flux_matrix->col_coupleBlock->val[iptr_ij]=(m_ij+f1*d_ij)*d_u_last-m_ij*d_u; 262 } 263 } 264 } 265 } else { 266 if ( (ABS(f2) >0 ) ) { 267 #pragma omp parallel for schedule(static) private(i, u_i, u_last_i, iptr_ij, j,m_ij,d_ij, d_u_last, d_u) 268 for (i = 0; i < n; ++i) { 269 u_i=u[i]; 270 u_last_i=u_last[i]; 271 #pragma ivdep 272 for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ijmainPattern->ptr[i+1]; ++iptr_ij) { 273 j=pattern->mainPattern->index[iptr_ij]; 274 m_ij=fc->mass_matrix->mainBlock->val[iptr_ij]; 275 d_ij=-(fc->transport_matrix->mainBlock->val[iptr_ij]+ 276 fc->iteration_matrix->mainBlock->val[iptr_ij]); 277 d_u=u[j]-u_i; 278 d_u_last=u_last[j]-u_last_i; 279 flux_matrix->mainBlock->val[iptr_ij]=m_ij*d_u_last- (m_ij+f2*d_ij)*d_u; 280 } 281 #pragma ivdep 282 for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ijcol_couplePattern->ptr[i+1]; ++iptr_ij) { 283 j=pattern->col_couplePattern->index[iptr_ij]; 284 m_ij=fc->mass_matrix->col_coupleBlock->val[iptr_ij]; 285 d_ij=-(fc->transport_matrix->col_coupleBlock->val[iptr_ij]+ 286 fc->iteration_matrix->col_coupleBlock->val[iptr_ij]); 287 d_u=remote_u[j]-u_i; 288 d_u_last=remote_u_last[j]-u_last_i; 289 flux_matrix->col_coupleBlock->val[iptr_ij]=m_ij*d_u_last- (m_ij+f2*d_ij)*d_u; 290 } 291 } 292 } else { 293 #pragma omp parallel for schedule(static) private(i, u_i, u_last_i, iptr_ij, j,m_ij,d_ij, d_u_last, d_u) 294 for (i = 0; i < n; ++i) { 295 u_i=u[i]; 296 u_last_i=u_last[i]; 297 #pragma ivdep 298 for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ijmainPattern->ptr[i+1]; ++iptr_ij) { 299 j=pattern->mainPattern->index[iptr_ij]; 300 m_ij=fc->mass_matrix->mainBlock->val[iptr_ij]; 301 d_ij=-(fc->transport_matrix->mainBlock->val[iptr_ij]+ 302 fc->iteration_matrix->mainBlock->val[iptr_ij]); 303 d_u=u[j]-u_i; 304 d_u_last=u_last[j]-u_last_i; 305 flux_matrix->mainBlock->val[iptr_ij]=m_ij*(d_u_last-d_u); 306 } 307 #pragma ivdep 308 for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ijcol_couplePattern->ptr[i+1]; ++iptr_ij) { 309 j=pattern->col_couplePattern->index[iptr_ij]; 310 m_ij=fc->mass_matrix->col_coupleBlock->val[iptr_ij]; 311 d_ij=-(fc->transport_matrix->col_coupleBlock->val[iptr_ij]+ 312 fc->iteration_matrix->col_coupleBlock->val[iptr_ij]); 313 d_u=remote_u[j]-u_i; 314 d_u_last=remote_u_last[j]-u_last_i; 315 flux_matrix->col_coupleBlock->val[iptr_ij]=m_ij*(d_u_last-d_u); 316 } 317 } 318 } 319 } 320 } 321 /* 322 * apply pre flux-correction: f_{ij}:=0 if f_{ij}*(u_[i]-u[j])<=0 323 * 324 */ 325 void Paso_FCTransportProblem_applyPreAntiDiffusionCorrection(Paso_SystemMatrix *f,const Paso_Coupler* u_coupler) 326 { 327 dim_t n, i, j; 328 Paso_SystemMatrixPattern *pattern; 329 const double *u=Paso_Coupler_borrowLocalData(u_coupler); 330 const double *remote_u=Paso_Coupler_borrowRemoteData(u_coupler); 331 register double u_i, f_ij; 332 register index_t iptr_ij; 333 334 n=Paso_SystemMatrix_getTotalNumRows(f); 335 pattern=f->pattern; 336 #pragma omp parallel for schedule(static) private(i, u_i, iptr_ij, j, f_ij) 337 for (i = 0; i < n; ++i) { 338 u_i=u[i]; 339 #pragma ivdep 340 for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ijmainPattern->ptr[i+1]; ++iptr_ij) { 341 j=pattern->mainPattern->index[iptr_ij]; 342 f_ij=f->mainBlock->val[iptr_ij]; 343 if (f_ij * (u_i-u[j]) <= 0) f->mainBlock->val[iptr_ij]=0; 344 } 345 #pragma ivdep 346 for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ijcol_couplePattern->ptr[i+1]; ++iptr_ij) { 347 j=pattern->col_couplePattern->index[iptr_ij]; 348 f_ij=f->col_coupleBlock->val[iptr_ij]; 349 if (f_ij * (u_i-remote_u[j]) <= 0) f->col_coupleBlock->val[iptr_ij]=0; 350 } 351 } 352 } 353 354 355 void Paso_FCTransportProblem_setRs(const Paso_SystemMatrix *f,const double* lumped_mass_matrix, 356 const Paso_Coupler* QN_coupler,const Paso_Coupler* QP_coupler,double* RN,double* RP) 357 { 358 dim_t n, i, j; 359 Paso_SystemMatrixPattern *pattern; 360 const double *QN=Paso_Coupler_borrowLocalData(QN_coupler); 361 const double *QP=Paso_Coupler_borrowLocalData(QP_coupler); 362 register double f_ij, PP_i, PN_i; 363 register index_t iptr_ij; 364 365 n=Paso_SystemMatrix_getTotalNumRows(f); 366 pattern=f->pattern; 367 #pragma omp parallel for schedule(static) private(i, iptr_ij, j, f_ij, PP_i, PN_i) 368 for (i = 0; i < n; ++i) { 369 PP_i=0.; 370 PN_i=0.; 371 #pragma ivdep 372 for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ijmainPattern->ptr[i+1]; ++iptr_ij) { 373 j=pattern->mainPattern->index[iptr_ij]; 374 if (i != j ) { 375 f_ij=f->mainBlock->val[iptr_ij]; 376 if (f_ij <=0) { 377 PN_i+=f_ij; 378 } else { 379 PP_i+=f_ij; 380 } 381 } 382 } 383 #pragma ivdep 384 for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ijcol_couplePattern->ptr[i+1]; ++iptr_ij) { 385 j=pattern->col_couplePattern->index[iptr_ij]; 386 f_ij=f->col_coupleBlock->val[iptr_ij]; 387 if (f_ij <=0 ) { 388 PN_i+=f_ij; 389 } else { 390 PP_i+=f_ij; 391 } 392 } 393 if (PN_i<0) { 394 RN[i]=MIN(1,QN[i]*lumped_mass_matrix[i]/PN_i); 395 } else { 396 RN[i]=1.; 397 } 398 if (PP_i>0) { 399 RP[i]=MIN(1,QP[i]*lumped_mass_matrix[i]/PP_i); 400 } else { 401 RP[i]=1.; 402 } 403 } 404 405 } 406 407 void Paso_FCTransportProblem_addCorrectedFluxes(double* f,const Paso_SystemMatrix *flux_matrix, const Paso_Coupler* RN_coupler, const Paso_Coupler* RP_coupler) 408 { 409 dim_t n, i, j; 410 Paso_SystemMatrixPattern *pattern; 411 register double RN_i, RP_i, f_i, f_ij; 412 register index_t iptr_ij; 413 const double *RN=Paso_Coupler_borrowLocalData(RN_coupler); 414 const double *remote_RN=Paso_Coupler_borrowRemoteData(RN_coupler); 415 const double *RP=Paso_Coupler_borrowLocalData(RP_coupler); 416 const double *remote_RP=Paso_Coupler_borrowRemoteData(RP_coupler); 417 n=Paso_SystemMatrix_getTotalNumRows(flux_matrix); 418 419 pattern=flux_matrix->pattern; 420 #pragma omp parallel for schedule(static) private(i, RN_i, RP_i, iptr_ij, j, f_ij, f_i) 421 for (i = 0; i < n; ++i) { 422 RN_i=RN[i]; 423 RP_i=RP[i]; 424 f_i=0; 425 #pragma ivdep 426 for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ijmainPattern->ptr[i+1]; ++iptr_ij) { 427 j=pattern->mainPattern->index[iptr_ij]; 428 f_ij=flux_matrix->mainBlock->val[iptr_ij]; 429 if (f_ij >=0) { 430 f_i+=f_ij*MIN(RP_i,RN[j]); 431 } else { 432 f_i+=f_ij*MIN(RN_i,RP[j]); 433 } 434 } 435 #pragma ivdep 436 for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ijcol_couplePattern->ptr[i+1]; ++iptr_ij) { 437 j=pattern->col_couplePattern->index[iptr_ij]; 438 f_ij=flux_matrix->col_coupleBlock->val[iptr_ij]; 439 if (f_ij >=0) { 440 f_i+=f_ij*MIN(RP_i,remote_RN[j]); 441 }else { 442 f_i+=f_ij*MIN(RN_i,remote_RP[j]); 443 } 444 } 445 f[i]+=f_i; 446 } 447 }