/[escript]/trunk/paso/src/FCT_Solver.cpp
ViewVC logotype

Diff of /trunk/paso/src/FCT_Solver.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3794 by gross, Wed Feb 1 23:02:26 2012 UTC revision 3795 by caltinay, Thu Feb 2 05:54:34 2012 UTC
# Line 115  double Paso_FCT_Solver_getSafeTimeStepSi Line 115  double Paso_FCT_Solver_getSafeTimeStepSi
115             index_t fail_loc=0;             index_t fail_loc=0;
116                 #pragma omp for schedule(static)                 #pragma omp for schedule(static)
117                 for (i=0;i<n;++i) {                 for (i=0;i<n;++i) {
118                    const register double l_ii=fctp->main_diagonal_low_order_transport_matrix[i];                    const double l_ii=fctp->main_diagonal_low_order_transport_matrix[i];
119                    const register double m_i=fctp->lumped_mass_matrix[i];                    const double m_i=fctp->lumped_mass_matrix[i];
120            if ( (m_i > 0) ) {            if ( (m_i > 0) ) {
121                if (l_ii<0) dt_max_loc=MIN(dt_max_loc,m_i/(-l_ii));                if (l_ii<0) dt_max_loc=MIN(dt_max_loc,m_i/(-l_ii));
122            } else {            } else {
# Line 171  void Paso_FCT_Solver_initialize(const do Line 171  void Paso_FCT_Solver_initialize(const do
171      fct_solver->dt = dt;      fct_solver->dt = dt;
172      #pragma omp parallel for private(i)      #pragma omp parallel for private(i)
173      for (i = 0; i < n; ++i) {      for (i = 0; i < n; ++i) {
174             const register double m=fctp->lumped_mass_matrix[i];             const double m=fctp->lumped_mass_matrix[i];
175         const register double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];         const double l_ii = fctp->main_diagonal_low_order_transport_matrix[i];
176             fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m * omega - l_ii;             fctp->iteration_matrix->mainBlock->val[main_iptr[i]] = m * omega - l_ii;
177      }      }
178            
# Line 450  void Paso_FCT_setAntiDiffusionFlux_CN(Pa Line 450  void Paso_FCT_setAntiDiffusionFlux_CN(Pa
450    
451    #pragma omp parallel for schedule(static) private(i, iptr_ij)    #pragma omp parallel for schedule(static) private(i, iptr_ij)
452    for (i = 0; i < n; ++i) {    for (i = 0; i < n; ++i) {
453        const register double u_i     = u[i];        const double u_i     = u[i];
454        const register double u_old_i = u_old[i];        const double u_old_i = u_old[i];
455    
456        #pragma ivdep        #pragma ivdep
457        for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {        for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
458      const register index_t j      = pattern->mainPattern->index[iptr_ij];      const index_t j      = pattern->mainPattern->index[iptr_ij];
459      const register double m_ij    = fct->mass_matrix->mainBlock->val[iptr_ij];      const double m_ij    = fct->mass_matrix->mainBlock->val[iptr_ij];
460      const register double d_ij    = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */      const double d_ij    = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
461      const register double u_old_j = u_old[j];      const double u_old_j = u_old[j];
462      const register double u_j     = u[j];      const double u_j     = u[j];
463            
464      /* (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i]) */      /* (m_{ij} - dt (1-theta) d_{ij}) (u_old[j]-u_old[i]) - (m_{ij} + dt theta d_{ij}) (u[j]-u[i]) */
465      flux_matrix->mainBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i) - (m_ij-dt_half*d_ij)*(u_j-u_i);      flux_matrix->mainBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i) - (m_ij-dt_half*d_ij)*(u_j-u_i);
# Line 467  void Paso_FCT_setAntiDiffusionFlux_CN(Pa Line 467  void Paso_FCT_setAntiDiffusionFlux_CN(Pa
467        }        }
468        #pragma ivdep        #pragma ivdep
469        for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {        for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
470      const register index_t j      = pattern->col_couplePattern->index[iptr_ij];      const index_t j      = pattern->col_couplePattern->index[iptr_ij];
471      const register double m_ij    = fct->mass_matrix->col_coupleBlock->val[iptr_ij];      const double m_ij    = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
472      const register double d_ij    = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */      const double d_ij    = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
473          const register double u_old_j = remote_u_old[j];          const double u_old_j = remote_u_old[j];
474      const register double u_j     = remote_u[j];      const double u_j     = remote_u[j];
475      flux_matrix->col_coupleBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i)- (m_ij-dt_half*d_ij)*(u_j-u_i);      flux_matrix->col_coupleBlock->val[iptr_ij]=(m_ij+dt_half*d_ij)*(u_old_j-u_old_i)- (m_ij-dt_half*d_ij)*(u_j-u_i);
476        }        }
477    }    }
# Line 495  void Paso_FCT_setAntiDiffusionFlux_BE(Pa Line 495  void Paso_FCT_setAntiDiffusionFlux_BE(Pa
495    
496    #pragma omp parallel for schedule(static) private(i, iptr_ij)    #pragma omp parallel for schedule(static) private(i, iptr_ij)
497    for (i = 0; i < n; ++i) {    for (i = 0; i < n; ++i) {
498        const register double u_i     = u[i];        const double u_i     = u[i];
499        const register double u_old_i = u_old[i];        const double u_old_i = u_old[i];
500        #pragma ivdep        #pragma ivdep
501        for (iptr_ij=pattern->mainPattern->ptr[i]; iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {        for (iptr_ij=pattern->mainPattern->ptr[i]; iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
502    
503      const register index_t j      = pattern->mainPattern->index[iptr_ij];      const index_t j      = pattern->mainPattern->index[iptr_ij];
504      const register double m_ij    = fct->mass_matrix->mainBlock->val[iptr_ij];      const double m_ij    = fct->mass_matrix->mainBlock->val[iptr_ij];
505      const register double d_ij    = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */      const double d_ij    = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
506      const register double u_old_j = u_old[j];      const double u_old_j = u_old[j];
507      const register double u_j     = u[j];      const double u_j     = u[j];
508            
509      flux_matrix->mainBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);      flux_matrix->mainBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
510        }        }
511        #pragma ivdep        #pragma ivdep
512        for (iptr_ij=pattern->col_couplePattern->ptr[i]; iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {        for (iptr_ij=pattern->col_couplePattern->ptr[i]; iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
513      const register index_t j      = pattern->col_couplePattern->index[iptr_ij];      const index_t j      = pattern->col_couplePattern->index[iptr_ij];
514      const register double m_ij    = fct->mass_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */      const double m_ij    = fct->mass_matrix->col_coupleBlock->val[iptr_ij]; /* this is in fact -d_ij */
515      const register double d_ij    = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];      const double d_ij    = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];
516          const register double u_old_j = remote_u_old[j];          const double u_old_j = remote_u_old[j];
517      const register double u_j     = remote_u[j];      const double u_j     = remote_u[j];
518            
519      flux_matrix->col_coupleBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);      flux_matrix->col_coupleBlock->val[iptr_ij]=m_ij*(u_old_j-u_old_i)- (m_ij-dt*d_ij)*(u_j-u_i);
520        }        }
# Line 549  void Paso_FCT_setAntiDiffusionFlux_linea Line 549  void Paso_FCT_setAntiDiffusionFlux_linea
549    
550    #pragma omp parallel for schedule(static) private(i, iptr_ij)    #pragma omp parallel for schedule(static) private(i, iptr_ij)
551    for (i = 0; i < n; ++i) {    for (i = 0; i < n; ++i) {
552        const register double u_tilde_i = u_tilde[i];        const double u_tilde_i = u_tilde[i];
553        const register double u_old_i   = u_old[i];        const double u_old_i   = u_old[i];
554        const register double du_i      = u_old_i - u_tilde_i;        const double du_i      = u_old_i - u_tilde_i;
555        #pragma ivdep        #pragma ivdep
556        for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {        for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
557            
558        const register index_t j      = pattern->mainPattern->index[iptr_ij];        const index_t j      = pattern->mainPattern->index[iptr_ij];
559        const register double m_ij    = fct->mass_matrix->mainBlock->val[iptr_ij];        const double m_ij    = fct->mass_matrix->mainBlock->val[iptr_ij];
560        const register double d_ij    = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */        const double d_ij    = fct->transport_matrix->mainBlock->val[iptr_ij]+fct->iteration_matrix->mainBlock->val[iptr_ij]; /* this is in fact -d_ij */
561            const register double u_tilde_j = u_tilde[j];            const double u_tilde_j = u_tilde[j];
562        const register double u_old_j = u_old[j];        const double u_old_j = u_old[j];
563        const register double du_j    = u_old_j - u_tilde_j;        const double du_j    = u_old_j - u_tilde_j;
564                
565        flux_matrix->mainBlock->val[iptr_ij]=2 * m_ij * ( du_i - du_j ) + dt * d_ij * ( u_tilde_j - u_tilde_i);        flux_matrix->mainBlock->val[iptr_ij]=2 * m_ij * ( du_i - du_j ) + dt * d_ij * ( u_tilde_j - u_tilde_i);
566    
# Line 568  void Paso_FCT_setAntiDiffusionFlux_linea Line 568  void Paso_FCT_setAntiDiffusionFlux_linea
568        #pragma ivdep        #pragma ivdep
569        for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {        for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
570            
571      const register index_t j      = pattern->col_couplePattern->index[iptr_ij];      const index_t j      = pattern->col_couplePattern->index[iptr_ij];
572      const register double m_ij    = fct->mass_matrix->col_coupleBlock->val[iptr_ij];      const double m_ij    = fct->mass_matrix->col_coupleBlock->val[iptr_ij];
573      const register double d_ij    = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];/* this is in fact -d_ij */      const double d_ij    = fct->transport_matrix->col_coupleBlock->val[iptr_ij]+fct->iteration_matrix->col_coupleBlock->val[iptr_ij];/* this is in fact -d_ij */
574          const register double u_tilde_j = remote_u_tilde[j];          const double u_tilde_j = remote_u_tilde[j];
575      const register double u_old_j = remote_u_old[j];      const double u_old_j = remote_u_old[j];
576      const register double du_j    = u_old_j - u_tilde_j;          const double du_j    = u_old_j - u_tilde_j;
577    
578      flux_matrix->col_coupleBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) + dt * d_ij *  ( u_tilde_j - u_tilde_i);      flux_matrix->col_coupleBlock->val[iptr_ij]= 2 * m_ij * ( du_i - du_j ) + dt * d_ij *  ( u_tilde_j - u_tilde_i);
579            
# Line 622  void Paso_FCT_setLowOrderOperator(Paso_T Line 622  void Paso_FCT_setLowOrderOperator(Paso_T
622  /* printf("sum[%d] = %e -> ", i, sum); */  /* printf("sum[%d] = %e -> ", i, sum); */
623            /* look at a[i,j] */            /* look at a[i,j] */
624            for (iptr_ij=pattern->mainPattern->ptr[i];iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {            for (iptr_ij=pattern->mainPattern->ptr[i];iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
625                const register index_t j    = pattern->mainPattern->index[iptr_ij];                const index_t j    = pattern->mainPattern->index[iptr_ij];
626                const register double rtmp1 = fc->transport_matrix->mainBlock->val[iptr_ij];                const double rtmp1 = fc->transport_matrix->mainBlock->val[iptr_ij];
627            if (j!=i) {            if (j!=i) {
628                   /* find entry a[j,i] */                   /* find entry a[j,i] */
629                   #pragma ivdep                   #pragma ivdep
630                   for (iptr_ji=pattern->mainPattern->ptr[j]; iptr_ji<pattern->mainPattern->ptr[j+1]; ++iptr_ji) {                   for (iptr_ji=pattern->mainPattern->ptr[j]; iptr_ji<pattern->mainPattern->ptr[j+1]; ++iptr_ji) {
631                        
632                      if ( pattern->mainPattern->index[iptr_ji] == i) {                      if ( pattern->mainPattern->index[iptr_ji] == i) {
633                  const register double rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji];                  const double rtmp2=fc->transport_matrix->mainBlock->val[iptr_ji];
634  /*  /*
635  printf("a[%d,%d]=%e\n",i,j,rtmp1);  printf("a[%d,%d]=%e\n",i,j,rtmp1);
636  printf("a[%d,%d]=%e\n",j,i,rtmp2);  printf("a[%d,%d]=%e\n",j,i,rtmp2);
637  */  */
638    
639                          const register double d_ij=-MIN3(0.,rtmp1,rtmp2);                          const double d_ij=-MIN3(0.,rtmp1,rtmp2);
640                          fc->iteration_matrix->mainBlock->val[iptr_ij]=-(rtmp1+d_ij);                          fc->iteration_matrix->mainBlock->val[iptr_ij]=-(rtmp1+d_ij);
641  /* printf("l[%d,%d]=%e\n",i,j,fc->iteration_matrix->mainBlock->val[iptr_ij]); */  /* printf("l[%d,%d]=%e\n",i,j,fc->iteration_matrix->mainBlock->val[iptr_ij]); */
642                          sum-=d_ij;                          sum-=d_ij;
# Line 646  printf("a[%d,%d]=%e\n",j,i,rtmp2); Line 646  printf("a[%d,%d]=%e\n",j,i,rtmp2);
646               }               }
647            }            }
648            for (iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {            for (iptr_ij=pattern->col_couplePattern->ptr[i];iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
649                const register index_t    j = pattern->col_couplePattern->index[iptr_ij];                const index_t    j = pattern->col_couplePattern->index[iptr_ij];
650                const register double  rtmp1 = fc->transport_matrix->col_coupleBlock->val[iptr_ij];                const double  rtmp1 = fc->transport_matrix->col_coupleBlock->val[iptr_ij];
651            /* find entry a[j,i] */            /* find entry a[j,i] */
652                #pragma ivdep                #pragma ivdep
653                for (iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_ji<pattern->row_couplePattern->ptr[j+1]; ++iptr_ji) {                for (iptr_ji=pattern->row_couplePattern->ptr[j]; iptr_ji<pattern->row_couplePattern->ptr[j+1]; ++iptr_ji) {
654                      if (pattern->row_couplePattern->index[iptr_ji]==i) {                      if (pattern->row_couplePattern->index[iptr_ji]==i) {
655                          const register double rtmp2=fc->transport_matrix->row_coupleBlock->val[iptr_ji];                          const double rtmp2=fc->transport_matrix->row_coupleBlock->val[iptr_ji];
656                          const register double d_ij=-MIN3(0.,rtmp1,rtmp2);                          const double d_ij=-MIN3(0.,rtmp1,rtmp2);
657                          fc->iteration_matrix->col_coupleBlock->val[iptr_ij]=-(rtmp1+d_ij);                          fc->iteration_matrix->col_coupleBlock->val[iptr_ij]=-(rtmp1+d_ij);
658                          fc->iteration_matrix->row_coupleBlock->val[iptr_ji]=-(rtmp2+d_ij);                          fc->iteration_matrix->row_coupleBlock->val[iptr_ji]=-(rtmp2+d_ij);
659                          sum-=d_ij;                          sum-=d_ij;
# Line 683  void Paso_FCT_Solver_setMuPaLu(double* o Line 683  void Paso_FCT_Solver_setMuPaLu(double* o
683    const Paso_SystemMatrixPattern *pattern = L->pattern;    const Paso_SystemMatrixPattern *pattern = L->pattern;
684    const double *u=Paso_Coupler_borrowLocalData(u_coupler);    const double *u=Paso_Coupler_borrowLocalData(u_coupler);
685    const double *remote_u=Paso_Coupler_borrowRemoteData(u_coupler);    const double *remote_u=Paso_Coupler_borrowRemoteData(u_coupler);
686    register index_t iptr_ij;    index_t iptr_ij;
687    const dim_t n=Paso_SystemMatrix_getTotalNumRows(L);    const dim_t n=Paso_SystemMatrix_getTotalNumRows(L);
688    
689    #pragma omp parallel for private(i) schedule(static)    #pragma omp parallel for private(i) schedule(static)
# Line 693  void Paso_FCT_Solver_setMuPaLu(double* o Line 693  void Paso_FCT_Solver_setMuPaLu(double* o
693    if (ABS(a)>0) {    if (ABS(a)>0) {
694        #pragma omp parallel for schedule(static) private(i, iptr_ij)        #pragma omp parallel for schedule(static) private(i, iptr_ij)
695        for (i = 0; i < n; ++i) {        for (i = 0; i < n; ++i) {
696            register double sum=0;            double sum=0;
697            const register double u_i=u[i];            const double u_i=u[i];
698            #pragma ivdep            #pragma ivdep
699        for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {        for (iptr_ij=(pattern->mainPattern->ptr[i]);iptr_ij<pattern->mainPattern->ptr[i+1]; ++iptr_ij) {
700                 const index_t j=pattern->mainPattern->index[iptr_ij];                 const index_t j=pattern->mainPattern->index[iptr_ij];
701                 const register double l_ij=L->mainBlock->val[iptr_ij];                 const double l_ij=L->mainBlock->val[iptr_ij];
702                 sum+=l_ij*(u[j]-u_i);                 sum+=l_ij*(u[j]-u_i);
703                        
704            }            }
705            #pragma ivdep            #pragma ivdep
706        for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {        for (iptr_ij=(pattern->col_couplePattern->ptr[i]);iptr_ij<pattern->col_couplePattern->ptr[i+1]; ++iptr_ij) {
707                 const index_t j=pattern->col_couplePattern->index[iptr_ij];                 const index_t j=pattern->col_couplePattern->index[iptr_ij];
708                 const register double l_ij=L->col_coupleBlock->val[iptr_ij];                 const double l_ij=L->col_coupleBlock->val[iptr_ij];
709                 sum+=l_ij*(remote_u[j]-u_i);                 sum+=l_ij*(remote_u[j]-u_i);
710            }            }
711            out[i]+=a*sum;            out[i]+=a*sum;

Legend:
Removed from v.3794  
changed lines
  Added in v.3795

  ViewVC Help
Powered by ViewVC 1.1.26