/[escript]/trunk/paso/src/FCT_Solver.c
ViewVC logotype

Diff of /trunk/paso/src/FCT_Solver.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3794 by gross, Wed Feb 1 23:02:26 2012 UTC revision 3823 by gross, Mon Feb 13 08:09:32 2012 UTC
# Line 97  void Paso_FCT_Solver_free(Paso_FCT_Solve Line 97  void Paso_FCT_Solver_free(Paso_FCT_Solve
97  double Paso_FCT_Solver_getSafeTimeStepSize(Paso_TransportProblem* fctp)  double Paso_FCT_Solver_getSafeTimeStepSize(Paso_TransportProblem* fctp)
98  {  {
99     dim_t i, n;     dim_t i, n;
100       double dt_max_loc=LARGE_POSITIVE_FLOAT;
101     double dt_max=LARGE_POSITIVE_FLOAT;     double dt_max=LARGE_POSITIVE_FLOAT;
    index_t fail=0;  
102     n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);     n=Paso_SystemMatrix_getTotalNumRows(fctp->transport_matrix);
103     /* set low order transport operator */     /* set low order transport operator */
104     Paso_FCT_setLowOrderOperator(fctp);     Paso_FCT_setLowOrderOperator(fctp);
# Line 108  double Paso_FCT_Solver_getSafeTimeStepSi Line 108  double Paso_FCT_Solver_getSafeTimeStepSi
108           *  calculate time step size:                                                     *  calculate time step size:                                          
109          */          */
110          dt_max=LARGE_POSITIVE_FLOAT;          dt_max=LARGE_POSITIVE_FLOAT;
111      fail=0;          #pragma omp parallel private(i, dt_max_loc)
         #pragma omp parallel private(i)  
112          {          {
113                 double dt_max_loc=LARGE_POSITIVE_FLOAT;  
            index_t fail_loc=0;  
114                 #pragma omp for schedule(static)                 #pragma omp for schedule(static)
115                 for (i=0;i<n;++i) {                 for (i=0;i<n;++i) {
116                    const register double l_ii=fctp->main_diagonal_low_order_transport_matrix[i];                    const double l_ii=fctp->main_diagonal_low_order_transport_matrix[i];
117                    const register double m_i=fctp->lumped_mass_matrix[i];                    const double m_i=fctp->lumped_mass_matrix[i];
118            if ( (m_i > 0) ) {            if ( m_i > 0 ) {
119                if (l_ii<0) dt_max_loc=MIN(dt_max_loc,m_i/(-l_ii));                if (l_ii<0) dt_max_loc=MIN(dt_max_loc,m_i/(-l_ii));
120            } else {                    }
121                fail_loc=-1;             }
           }  
                }  
122                 #pragma omp critical                 #pragma omp critical
123                 {                 {
124                    dt_max=MIN(dt_max,dt_max_loc);                    dt_max=MIN(dt_max,dt_max_loc);
           fail=MIN(fail, fail_loc);  
125                 }                 }
126          }          }
127          #ifdef ESYS_MPI          #ifdef ESYS_MPI
128          {          {
129             double rtmp_loc[2], rtmp[2];                 dt_max_loc=dt_max;
130                 rtmp_loc[0]=dt_max;                 MPI_Allreduce(&dt_max_loc, &dt_max, 1, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);
            rtmp_loc[1]= (double) fail;  
                MPI_Allreduce(rtmp_loc, rtmp, 2, MPI_DOUBLE, MPI_MIN, fctp->mpi_info->comm);  
            dt_max=rtmp[0];  
            fail = rtmp[1] < 0 ? -1 : 0;  
131      }      }
132          #endif          #endif
133          if (fail < 0 ) {      if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=2.;
        Esys_setError(VALUE_ERROR, "Paso_FCTSolver_getSafeTimeStepSize: negative mass matrix entries detected.");  
        return -1;  
     } else {  
         if (dt_max<LARGE_POSITIVE_FLOAT) dt_max*=2.;  
     }  
134     }     }
135     return dt_max;     return dt_max;
136  }  }
# Line 171  void Paso_FCT_Solver_initialize(const do Line 157  void Paso_FCT_Solver_initialize(const do
157      fct_solver->dt = dt;      fct_solver->dt = dt;
158      #pragma omp parallel for private(i)      #pragma omp parallel for private(i)
159      for (i = 0; i < n; ++i) {      for (i = 0; i < n; ++i) {
160             const register double m=fctp->lumped_mass_matrix[i];             const double m_i=fctp->lumped_mass_matrix[i];
161         const register double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];         const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
162             fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m * omega - l_ii;         if ( m_i > 0 ) {
163                   fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m_i * omega - l_ii;
164           } else {
165               fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = ABS(m_i * omega - l_ii)/(EPSILON*EPSILON);
166           }
167      }      }
168            
169      /* allocate preconditioner/solver */      /* allocate preconditioner/solver */
# Line 186  void Paso_FCT_Solver_initialize(const do Line 176  void Paso_FCT_Solver_initialize(const do
176  /* options2.preconditioner = PASO_GS; */  /* options2.preconditioner = PASO_GS; */
177      }      }
178      options2.use_local_preconditioner = FALSE;      options2.use_local_preconditioner = FALSE;
179      options2.sweeps=1;      options2.sweeps=-1;
180            
181      Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);      Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
182      Paso_SystemMatrix_setPreconditioner(fctp->iteration_matrix, &options2);      Paso_SystemMatrix_setPreconditioner(fctp->iteration_matrix, &options2);
# Line 219  err_t Paso_FCT_Solver_update(Paso_FCT_So Line 209  err_t Paso_FCT_Solver_update(Paso_FCT_So
209  err_t Paso_FCT_Solver_update_LCN(Paso_FCT_Solver *fct_solver, double * u, double *u_old, Paso_Options* options, Paso_Performance *pp)  err_t Paso_FCT_Solver_update_LCN(Paso_FCT_Solver *fct_solver, double * u, double *u_old, Paso_Options* options, Paso_Performance *pp)
210  {  {
211      double const dt = fct_solver->dt;      double const dt = fct_solver->dt;
212      dim_t sweep_max;      dim_t sweep_max, i;
213      double *b = fct_solver->b;      double *b = fct_solver->b;
214      double const RTOL = options->tolerance;      double const RTOL = options->tolerance;
215      const dim_t n=Paso_TransportProblem_getTotalNumRows(fct_solver->transportproblem);      const dim_t n=Paso_TransportProblem_getTotalNumRows(fct_solver->transportproblem);
216      Paso_SystemMatrix * iteration_matrix = fct_solver->transportproblem->iteration_matrix;      Paso_SystemMatrix * iteration_matrix = fct_solver->transportproblem->iteration_matrix;
217        const index_t*  main_iptr=Paso_TransportProblem_borrowMainDiagonalPointer(fct_solver->transportproblem);
218      err_t errorCode = SOLVER_NO_ERROR;      err_t errorCode = SOLVER_NO_ERROR;
219        double norm_u_tilde;
220        
     
221      Paso_Coupler_startCollect(fct_solver->u_old_coupler,u_old);      Paso_Coupler_startCollect(fct_solver->u_old_coupler,u_old);
222      Paso_Coupler_finishCollect(fct_solver->u_old_coupler);      Paso_Coupler_finishCollect(fct_solver->u_old_coupler);
223            
224      /* b[i]=m*u_tilde[i] = m u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i])      /* b[i]=m*u_tilde[i] = m u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i])  
225               = u_tilde[i]   = u_old[i] where constraint m<0.
226          * note that iteration_matrix stores the negative values of the          * note that iteration_matrix stores the negative values of the
227          * low order transport matrix l. Therefore a=-dt*0.5 is used. */          * low order transport matrix l. Therefore a=-dt*0.5 is used. */
228                
229      Paso_FCT_Solver_setMuPaLu(b, fct_solver->transportproblem->lumped_mass_matrix,      Paso_FCT_Solver_setMuPaLu(b, fct_solver->transportproblem->lumped_mass_matrix,
230                   fct_solver->u_old_coupler, -dt*0.5, iteration_matrix);                   fct_solver->u_old_coupler, -dt*0.5, iteration_matrix);
       
231      /* solve for u_tilde : u_tilda = m^{-1} * b   */      /* solve for u_tilde : u_tilda = m^{-1} * b   */
232      Paso_FCT_FluxLimiter_setU_tilda(fct_solver->flux_limiter, b);      Paso_FCT_FluxLimiter_setU_tilda(fct_solver->flux_limiter, b);
233      /* u_tilda_connector is completed */      /* u_tilda_connector is completed */
234          
235      /* calculate anti-diffusive fluxes for u_tilda */      /* calculate anti-diffusive fluxes for u_tilda */
236      Paso_FCT_setAntiDiffusionFlux_linearCN(fct_solver->flux_limiter->antidiffusive_fluxes,      Paso_FCT_setAntiDiffusionFlux_linearCN(fct_solver->flux_limiter->antidiffusive_fluxes,
237                         fct_solver->transportproblem, dt,                         fct_solver->transportproblem, dt,
238                         fct_solver->flux_limiter->u_tilde_coupler,                         fct_solver->flux_limiter->u_tilde_coupler,
239                         fct_solver->u_old_coupler);                         fct_solver->u_old_coupler);
240      
241    
242     /* b_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */     /* b_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
243     Paso_FCT_FluxLimiter_addLimitedFluxes_Start(fct_solver->flux_limiter);     Paso_FCT_FluxLimiter_addLimitedFluxes_Start(fct_solver->flux_limiter);
244     Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(fct_solver->flux_limiter, b);     Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(fct_solver->flux_limiter, b);
245        
246         Paso_Scale(n, b,fct_solver->omega );    
247     /* solve (m-dt/2*L) u = b */       /* solve (m-dt/2*L) u = b in the form (omega*m-L) u = b * omega with omega*dt/2=1 */
248       #pragma omp for private(i) schedule(static)
249       for (i = 0; i < n; ++i) {
250           if (! (fct_solver->transportproblem->lumped_mass_matrix[i] > 0 ) ) {
251         b[i] = fct_solver->flux_limiter->u_tilde[i]
252             * fct_solver->transportproblem->iteration_matrix->mainBlock->val[main_iptr[i]];
253           }
254       }
255     /* initial guess is u<- -u + 2*u_tilde */     /* initial guess is u<- -u + 2*u_tilde */
256     Paso_Update(n, -1., u, 2., fct_solver->flux_limiter->u_tilde);     Paso_Update(n, -1., u, 2., fct_solver->flux_limiter->u_tilde);
257    
258     sweep_max = MAX((int) (- 2 * log(RTOL)/log(2.)-0.5),1);     sweep_max = MAX((int) (- 2 * log(RTOL)/log(2.)-0.5),1);
259         norm_u_tilde=Paso_lsup(n, fct_solver->flux_limiter->u_tilde, fct_solver->flux_limiter->mpi_info);
260       if (options->verbose) {
261           printf("Paso_FCT_Solver_update_LCN: u_tilda lsup = %e (rtol = %e, max. sweeps = %d)\n",norm_u_tilde,RTOL * norm_u_tilde ,sweep_max);
262       }
263     errorCode = Paso_Preconditioner_Smoother_solve_byTolerance( iteration_matrix,  ((Paso_Preconditioner*) (iteration_matrix->solver_p))->gs,     errorCode = Paso_Preconditioner_Smoother_solve_byTolerance( iteration_matrix,  ((Paso_Preconditioner*) (iteration_matrix->solver_p))->gs,
264                             u, b, RTOL, &sweep_max, TRUE);                             u, b, RTOL, &sweep_max, TRUE);
265     if (errorCode == PRECONDITIONER_NO_ERROR) {     if (errorCode == PRECONDITIONER_NO_ERROR) {
# Line 301  err_t Paso_FCT_Solver_updateNL(Paso_FCT_ Line 304  err_t Paso_FCT_Solver_updateNL(Paso_FCT_
304            /* b[i]=m_i* u_old[i] */            /* b[i]=m_i* u_old[i] */
305            #pragma omp for private(i) schedule(static)            #pragma omp for private(i) schedule(static)
306            for (i = 0; i < n; ++i) {            for (i = 0; i < n; ++i) {
307                 b[i]=u_old[i]* fctp->lumped_mass_matrix[i];             if (fctp->lumped_mass_matrix[i] > 0 ) {
308               b[i]=u_old[i]* fctp->lumped_mass_matrix[i];
309               } else {
310               b[i]=u_old[i];
311               }
312            }            }
313      } else {      } else {
314         /* b[i]=m_i* u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i]) = m_i * u_tilde_i         /* b[i]=m_i* u_old[i] + dt/2 sum_{j <> i} l_{ij}*(u_old[j]-u_old[i]) = m_i * u_tilde_i where m_i>0
315        *     = u_old[i]  otherwise
316          * note that iteration_matrix stores the negative values of the          * note that iteration_matrix stores the negative values of the
317          * low order transport matrix l. Therefore a=-dt*0.5 is used. */          * low order transport matrix l. Therefore a=-dt*0.5 is used. */
318         Paso_FCT_Solver_setMuPaLu(b,fctp->lumped_mass_matrix,fct_solver->u_old_coupler,-dt*0.5,fctp->iteration_matrix);         Paso_FCT_Solver_setMuPaLu(b,fctp->lumped_mass_matrix,fct_solver->u_old_coupler,-dt*0.5,fctp->iteration_matrix);
319      }              }        
320      Paso_FCT_FluxLimiter_setU_tilda(flux_limiter, b); /* u_tilda = m^{-1} b */      Paso_FCT_FluxLimiter_setU_tilda(flux_limiter, b); /* u_tilda = m^{-1} b */
321      /* u_tilda_connector is completed */      /* u_tilda_connector is completed */
     
322      /**********************************************************************************************************************/        /**********************************************************************************************************************/  
323      /* calculate stopping criterium */      /* calculate stopping criterium */
324      norm_u_tilde=Paso_lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);      norm_u_tilde=Paso_lsup(n, flux_limiter->u_tilde, flux_limiter->mpi_info);
# Line 336  err_t Paso_FCT_Solver_updateNL(Paso_FCT_ Line 343  err_t Paso_FCT_Solver_updateNL(Paso_FCT_
343           Paso_FCT_FluxLimiter_addLimitedFluxes_Start(flux_limiter); /* uses u_tilde */             Paso_FCT_FluxLimiter_addLimitedFluxes_Start(flux_limiter); /* uses u_tilde */  
344    
345           /*           /*
346        * z_m[i]=b[i] - (m_i*u[i] - omega*sum_{j<>i} l_{ij} (u[j]-u[i]) ) omega = dt/2 or dt .        * z_m[i]=b[i] - (m_i*u[i] - omega*sum_{j<>i} l_{ij} (u[j]-u[i]) ) where m_i>0
347          *       ==b[i] - u[i] = u_tilda[i]-u[i] =0 otherwise
348          *
349          * omega = dt/2 or dt .
350        *        *
351        * note that iteration_matrix stores the negative values of the        * note that iteration_matrix stores the negative values of the
352        * low order transport matrix l. Therefore a=dt*theta is used.        * low order transport matrix l. Therefore a=dt*theta is used.
# Line 350  err_t Paso_FCT_Solver_updateNL(Paso_FCT_ Line 360  err_t Paso_FCT_Solver_updateNL(Paso_FCT_
360    
361        Paso_Update(n,-1.,z,1.,b);  /* z=b-z */        Paso_Update(n,-1.,z,1.,b);  /* z=b-z */
362    
   
363        /* z_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */        /* z_i += sum_{j} limitation factor_{ij} * antidiffusive_flux_{ij} */
364        Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(flux_limiter, z);        Paso_FCT_FluxLimiter_addLimitedFluxes_Complete(flux_limiter, z);
365        
# Line 379  err_t Paso_FCT_Solver_updateNL(Paso_FCT_ Line 388  err_t Paso_FCT_Solver_updateNL(Paso_FCT_
388                        du, z, 1, FALSE);                        du, z, 1, FALSE);
389            options->num_iter++;            options->num_iter++;
390         }                   }          
391            
392    
393         Paso_Update(n,1.,u,fct_solver->omega,du);         Paso_Update(n,1.,u,fct_solver->omega,du);
394         norm_du_old=norm_du;         norm_du_old=norm_du;
# Line 450  void Paso_FCT_setAntiDiffusionFlux_CN(Pa Line 459  void Paso_FCT_setAntiDiffusionFlux_CN(Pa
459    
460    #pragma omp parallel for schedule(static) private(i, iptr_ij)    #pragma omp parallel for schedule(static) private(i, iptr_ij)
461    for (i = 0; i < n; ++i) {    for (i = 0; i < n; ++i) {
462        const register double u_i     = u[i];        const double u_i     = u[i];
463        const register double u_old_i = u_old[i];        const double u_old_i = u_old[i];
464    
465        #pragma ivdep        #pragma ivdep
466        for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {        for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
467      const register index_t j      = pattern->mainPattern->index[iptr_ij];      const index_t j      = pattern->mainPattern->index[iptr_ij];
468      const register double m_ij    = fct->mass_matrix->mainBlock->val[iptr_ij];      const double m_ij    = fct->mass_matrix->mainBlock->val[iptr_ij];
469      const register double d_ij    = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */      const double d_ij    = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
470      const register double u_old_j = u_old[j];      const double u_old_j = u_old[j];
471      const register double u_j     = u[j];      const double u_j     = u[j];
472            
473      /* (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i]) */      /* (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i]) */
474      flux_matrix->mainBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i) - (m_ij-dt_half*d_ij)*(u_j-u_i);      flux_matrix->mainBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i) - (m_ij-dt_half*d_ij)*(u_j-u_i);
# Line 467  void Paso_FCT_setAntiDiffusionFlux_CN(Pa Line 476  void Paso_FCT_setAntiDiffusionFlux_CN(Pa
476        }        }
477        #pragma ivdep        #pragma ivdep
478        for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {        for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
479      const register index_t j      = pattern->col_couplePattern->index[iptr_ij];      const index_t j      = pattern->col_couplePattern->index[iptr_ij];
480      const register double m_ij    = fct->mass_matrix->col_coupleBlock->val[iptr_ij];      const double m_ij    = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
481      const register double d_ij    = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */      const double d_ij    = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
482          const register double u_old_j = remote_u_old[j];          const double u_old_j = remote_u_old[j];
483      const register double u_j     = remote_u[j];      const double u_j     = remote_u[j];
484      flux_matrix->col_coupleBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i)- (m_ij-dt_half*d_ij)*(u_j-u_i);      flux_matrix->col_coupleBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i)- (m_ij-dt_half*d_ij)*(u_j-u_i);
485        }        }
486    }    }
# Line 495  void Paso_FCT_setAntiDiffusionFlux_BE(Pa Line 504  void Paso_FCT_setAntiDiffusionFlux_BE(Pa
504    
505    #pragma omp parallel for schedule(static) private(i, iptr_ij)    #pragma omp parallel for schedule(static) private(i, iptr_ij)
506    for (i = 0; i < n; ++i) {    for (i = 0; i < n; ++i) {
507        const register double u_i     = u[i];        const double u_i     = u[i];
508        const register double u_old_i = u_old[i];        const double u_old_i = u_old[i];
509        #pragma ivdep        #pragma ivdep
510        for (iptr_ij=pattern->mainPattern->ptr[i]; iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {        for (iptr_ij=pattern->mainPattern->ptr[i]; iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
511    
512      const register index_t j      = pattern->mainPattern->index[iptr_ij];      const index_t j      = pattern->mainPattern->index[iptr_ij];
513      const register double m_ij    = fct->mass_matrix->mainBlock->val[iptr_ij];      const double m_ij    = fct->mass_matrix->mainBlock->val[iptr_ij];
514      const register double d_ij    = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */      const double d_ij    = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
515      const register double u_old_j = u_old[j];      const double u_old_j = u_old[j];
516      const register double u_j     = u[j];      const double u_j     = u[j];
517            
518      flux_matrix->mainBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);      flux_matrix->mainBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
519        }        }
520        #pragma ivdep        #pragma ivdep
521        for (iptr_ij=pattern->col_couplePattern->ptr[i]; iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {        for (iptr_ij=pattern->col_couplePattern->ptr[i]; iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
522      const register index_t j      = pattern->col_couplePattern->index[iptr_ij];      const index_t j      = pattern->col_couplePattern->index[iptr_ij];
523      const register double m_ij    = fct->mass_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */      const double m_ij    = fct->mass_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
524      const register double d_ij    = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];      const double d_ij    = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
525          const register double u_old_j = remote_u_old[j];          const double u_old_j = remote_u_old[j];
526      const register double u_j     = remote_u[j];      const double u_j     = remote_u[j];
527            
528      flux_matrix->col_coupleBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);      flux_matrix->col_coupleBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
529        }        }
# Line 549  void Paso_FCT_setAntiDiffusionFlux_linea Line 558  void Paso_FCT_setAntiDiffusionFlux_linea
558    
559    #pragma omp parallel for schedule(static) private(i, iptr_ij)    #pragma omp parallel for schedule(static) private(i, iptr_ij)
560    for (i = 0; i < n; ++i) {    for (i = 0; i < n; ++i) {
561        const register double u_tilde_i = u_tilde[i];        const double u_tilde_i = u_tilde[i];
562        const register double u_old_i   = u_old[i];        const double u_old_i   = u_old[i];
563        const register double du_i      = u_old_i - u_tilde_i;        const double du_i      = u_tilde_i - u_old_i;
564        #pragma ivdep        #pragma ivdep
565        for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {        for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
566            
567        const register index_t j      = pattern->mainPattern->index[iptr_ij];        const index_t j      = pattern->mainPattern->index[iptr_ij];
568        const register double m_ij    = fct->mass_matrix->mainBlock->val[iptr_ij];        const double m_ij    = fct->mass_matrix->mainBlock->val[iptr_ij];
569        const register double d_ij    = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */        const double d_ij    = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
570            const register double u_tilde_j = u_tilde[j];            const double u_tilde_j = u_tilde[j];
571        const register double u_old_j = u_old[j];        const double u_old_j = u_old[j];
572        const register double du_j    = u_old_j - u_tilde_j;        const double du_j    = u_tilde_j - u_old_j;
573                
574        flux_matrix->mainBlock->val[iptr_ij]=2 * m_ij * ( du_i - du_j ) + dt * d_ij * ( u_tilde_j - u_tilde_i);        flux_matrix->mainBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij * ( u_tilde_i - u_tilde_j);
   
575        }        }
576        #pragma ivdep        #pragma ivdep
577        for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {        for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
578            
579      const register index_t j      = pattern->col_couplePattern->index[iptr_ij];      const index_t j      = pattern->col_couplePattern->index[iptr_ij];
580      const register double m_ij    = fct->mass_matrix->col_coupleBlock->val[iptr_ij];      const double m_ij    = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
581      const register double d_ij    = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];/* this is in fact -d_ij */      const double d_ij    = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];/* this is in fact -d_ij */
582          const register double u_tilde_j = remote_u_tilde[j];          const double u_tilde_j = remote_u_tilde[j];
583      const register double u_old_j = remote_u_old[j];      const double u_old_j = remote_u_old[j];
584      const register double du_j    = u_old_j - u_tilde_j;          const double du_j    = u_tilde_j - u_old_j;
585    
586      flux_matrix->col_coupleBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) + dt * d_ij *  ( u_tilde_j - u_tilde_i);      flux_matrix->col_coupleBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) - dt * d_ij *  ( u_tilde_i - u_tilde_j);
587            
588        }        }
589    }    }
# Line 622  void Paso_FCT_setLowOrderOperator(Paso_T Line 630  void Paso_FCT_setLowOrderOperator(Paso_T
630  /* printf("sum[%d] = %e -> ", i, sum); */  /* printf("sum[%d] = %e -> ", i, sum); */
631            /* look at a[i,j] */            /* look at a[i,j] */
632            for (iptr_ij=pattern->mainPattern->ptr[i];iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {            for (iptr_ij=pattern->mainPattern->ptr[i];iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
633                const register index_t j    = pattern->mainPattern->index[iptr_ij];                const index_t j    = pattern->mainPattern->index[iptr_ij];
634                const register double rtmp1 = fc->transport_matrix->mainBlock->val[iptr_ij];                const double rtmp1 = fc->transport_matrix->mainBlock->val[iptr_ij];
635            if (j!=i) {            if (j!=i) {
636                   /* find entry a[j,i] */                   /* find entry a[j,i] */
637                   #pragma ivdep                   #pragma ivdep
638                   for (iptr_ji=pattern->mainPattern->ptr[j]; iptr_ji<pattern->mainPattern->ptr[j+1]; ++iptr_ji) {                   for (iptr_ji=pattern->mainPattern->ptr[j]; iptr_ji<pattern->mainPattern->ptr[j+1]; ++iptr_ji) {
639                        
640                      if ( pattern->mainPattern->index[iptr_ji] == i) {                      if ( pattern->mainPattern->index[iptr_ji] == i) {
641                  const register double rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji];                  const double rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji];
642  /*  /*
643  printf("a[%d,%d]=%e\n",i,j,rtmp1);  printf("a[%d,%d]=%e\n",i,j,rtmp1);
644  printf("a[%d,%d]=%e\n",j,i,rtmp2);  printf("a[%d,%d]=%e\n",j,i,rtmp2);
645  */  */
646    
647                          const register double d_ij=-MIN3(0.,rtmp1,rtmp2);                          const double d_ij=-MIN3(0.,rtmp1,rtmp2);
648                          fc->iteration_matrix->mainBlock->val[iptr_ij]=-(rtmp1+d_ij);                          fc->iteration_matrix->mainBlock->val[iptr_ij]=-(rtmp1+d_ij);
649  /* printf("l[%d,%d]=%e\n",i,j,fc->iteration_matrix->mainBlock->val[iptr_ij]); */  /* printf("l[%d,%d]=%e\n",i,j,fc->iteration_matrix->mainBlock->val[iptr_ij]); */
650                          sum-=d_ij;                          sum-=d_ij;
# Line 646  printf("a[%d,%d]=%e\n",j,i,rtmp2); Line 654  printf("a[%d,%d]=%e\n",j,i,rtmp2);
654               }               }
655            }            }
656            for (iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {            for (iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
657                const register index_t    j = pattern->col_couplePattern->index[iptr_ij];                const index_t    j = pattern->col_couplePattern->index[iptr_ij];
658                const register double  rtmp1 = fc->transport_matrix->col_coupleBlock->val[iptr_ij];                const double  rtmp1 = fc->transport_matrix->col_coupleBlock->val[iptr_ij];
659            /* find entry a[j,i] */            /* find entry a[j,i] */
660                #pragma ivdep                #pragma ivdep
661                for (iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_ji<pattern->row_couplePattern->ptr[j+1]; ++iptr_ji) {                for (iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_ji<pattern->row_couplePattern->ptr[j+1]; ++iptr_ji) {
662                      if (pattern->row_couplePattern->index[iptr_ji]==i) {                      if (pattern->row_couplePattern->index[iptr_ji]==i) {
663                          const register double rtmp2=fc->transport_matrix->row_coupleBlock->val[iptr_ji];                          const double rtmp2=fc->transport_matrix->row_coupleBlock->val[iptr_ji];
664                          const register double d_ij=-MIN3(0.,rtmp1,rtmp2);                          const double d_ij=-MIN3(0.,rtmp1,rtmp2);
665                          fc->iteration_matrix->col_coupleBlock->val[iptr_ij]=-(rtmp1+d_ij);                          fc->iteration_matrix->col_coupleBlock->val[iptr_ij]=-(rtmp1+d_ij);
666                          fc->iteration_matrix->row_coupleBlock->val[iptr_ji]=-(rtmp2+d_ij);                          fc->iteration_matrix->row_coupleBlock->val[iptr_ji]=-(rtmp2+d_ij);
667                          sum-=d_ij;                          sum-=d_ij;
# Line 670  printf("a[%d,%d]=%e\n",j,i,rtmp2); Line 678  printf("a[%d,%d]=%e\n",j,i,rtmp2);
678  }  }
679    
680  /*  /*
681   * out_i=m_i u_i + a * \sum_{j <> i} l_{ij} (u_j-u_i)   * out_i=m_i u_i + a * \sum_{j <> i} l_{ij} (u_j-u_i) where m_i>0
682     *       = u_i                                        where m_i<=0
683   *   *
684   */   */
685  void Paso_FCT_Solver_setMuPaLu(double* out,  void Paso_FCT_Solver_setMuPaLu(double* out,
# Line 683  void Paso_FCT_Solver_setMuPaLu(double* o Line 692  void Paso_FCT_Solver_setMuPaLu(double* o
692    const Paso_SystemMatrixPattern *pattern = L->pattern;    const Paso_SystemMatrixPattern *pattern = L->pattern;
693    const double *u=Paso_Coupler_borrowLocalData(u_coupler);    const double *u=Paso_Coupler_borrowLocalData(u_coupler);
694    const double *remote_u=Paso_Coupler_borrowRemoteData(u_coupler);    const double *remote_u=Paso_Coupler_borrowRemoteData(u_coupler);
695    register index_t iptr_ij;    index_t iptr_ij;
696    const dim_t n=Paso_SystemMatrix_getTotalNumRows(L);    const dim_t n=Paso_SystemMatrix_getTotalNumRows(L);
697    
698    #pragma omp parallel for private(i) schedule(static)    #pragma omp parallel for private(i) schedule(static)
699    for (i = 0; i < n; ++i) {    for (i = 0; i < n; ++i) {
700        out[i]=M[i]*u[i];        if ( M[i] > 0.) {
701              out[i]=M[i]*u[i];
702          } else {
703          out[i]=u[i];
704          }
705    }    }
706    if (ABS(a)>0) {    if (ABS(a)>0) {
707        #pragma omp parallel for schedule(static) private(i, iptr_ij)        #pragma omp parallel for schedule(static) private(i, iptr_ij)
708        for (i = 0; i < n; ++i) {        for (i = 0; i < n; ++i) {
709            register double sum=0;      if ( M[i] > 0.) {
710            const register double u_i=u[i];            double sum=0;
711              const double u_i=u[i];
712            #pragma ivdep            #pragma ivdep
713        for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {        for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
714                 const index_t j=pattern->mainPattern->index[iptr_ij];                 const index_t j=pattern->mainPattern->index[iptr_ij];
715                 const register double l_ij=L->mainBlock->val[iptr_ij];                 const double l_ij=L->mainBlock->val[iptr_ij];
716                 sum+=l_ij*(u[j]-u_i);                 sum+=l_ij*(u[j]-u_i);
717                        
718            }            }
719            #pragma ivdep            #pragma ivdep
720        for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {        for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
721                 const index_t j=pattern->col_couplePattern->index[iptr_ij];                 const index_t j=pattern->col_couplePattern->index[iptr_ij];
722                 const register double l_ij=L->col_coupleBlock->val[iptr_ij];                 const double l_ij=L->col_coupleBlock->val[iptr_ij];
723                 sum+=l_ij*(remote_u[j]-u_i);                 sum+=l_ij*(remote_u[j]-u_i);
724            }            }
725            out[i]+=a*sum;            out[i]+=a*sum;
726        }
727        }        }
728    }    }
729  }  }

Legend:
Removed from v.3794  
changed lines
  Added in v.3823

  ViewVC Help
Powered by ViewVC 1.1.26