/[escript]/trunk/paso/src/BiCGStab.c
ViewVC logotype

Diff of /trunk/paso/src/BiCGStab.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 929 by gross, Wed Jan 17 07:41:13 2007 UTC revision 1028 by gross, Wed Mar 14 00:15:24 2007 UTC
# Line 86  err_t Paso_Solver_BiCGStab( Line 86  err_t Paso_Solver_BiCGStab(
86    double beta,norm_of_residual,sum_1,sum_2,sum_3,sum_4,norm_of_residual_global;    double beta,norm_of_residual,sum_1,sum_2,sum_3,sum_4,norm_of_residual_global;
87    double alpha, omega, omegaNumtr, omegaDenumtr, rho, tol, rho1;    double alpha, omega, omegaNumtr, omegaDenumtr, rho, tol, rho1;
88    dim_t num_iter=0,maxit,num_iter_global;    dim_t num_iter=0,maxit,num_iter_global;
89    dim_t i0;    dim_t i0,n;
90    bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;    bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
91    dim_t status = SOLVER_NO_ERROR;    dim_t status = SOLVER_NO_ERROR;
92      double *resid = tolerance;
93    /* adapt original routine parameters */    /* adapt original routine parameters */
94    dim_t n = A->num_cols * A-> col_block_size;;    n = A->num_cols * A-> col_block_size;;
   double * resid = tolerance;  
95    
96    /* Executable Statements */    /* Executable Statements */
97    
# Line 154  err_t Paso_Solver_BiCGStab( Line 153  err_t Paso_Solver_BiCGStab(
153      omegaDenumtr = 0.0;      omegaDenumtr = 0.0;
154        }        }
155        #pragma omp barrier        #pragma omp barrier
       #pragma ivdep  
156        #pragma omp for private(i0) reduction(+:sum_1) schedule(static)        #pragma omp for private(i0) reduction(+:sum_1) schedule(static)
157        for (i0 = 0; i0 < n; i0++) sum_1 += rtld[i0] * r[i0];        for (i0 = 0; i0 < n; i0++) sum_1 += rtld[i0] * r[i0];
158        rho = sum_1;        rho = sum_1;
# Line 164  err_t Paso_Solver_BiCGStab( Line 162  err_t Paso_Solver_BiCGStab(
162                
163      if (num_iter > 1) {      if (num_iter > 1) {
164        beta = rho / rho1 * (alpha / omega);        beta = rho / rho1 * (alpha / omega);
           #pragma ivdep  
165            #pragma omp for private(i0) schedule(static)            #pragma omp for private(i0) schedule(static)
166        for (i0 = 0; i0 < n; i0++) p[i0] = r[i0] + beta * (p[i0] - omega * v[i0]);        for (i0 = 0; i0 < n; i0++) p[i0] = r[i0] + beta * (p[i0] - omega * v[i0]);
167      } else {      } else {
           #pragma ivdep  
168            #pragma omp for private(i0) schedule(static)            #pragma omp for private(i0) schedule(static)
169        for (i0 = 0; i0 < n; i0++) p[i0] = r[i0];        for (i0 = 0; i0 < n; i0++) p[i0] = r[i0];
170      }      }
# Line 178  err_t Paso_Solver_BiCGStab( Line 174  err_t Paso_Solver_BiCGStab(
174          Paso_Solver_solvePreconditioner(A,&phat[0], &p[0]);          Paso_Solver_solvePreconditioner(A,&phat[0], &p[0]);
175      Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &phat[0],ZERO, &v[0]);      Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &phat[0],ZERO, &v[0]);
176        
         // #pragma ivdep  
177          #pragma omp for private(i0) reduction(+:sum_2) schedule(static)          #pragma omp for private(i0) reduction(+:sum_2) schedule(static)
178      for (i0 = 0; i0 < n; i0++) sum_2 += rtld[i0] * v[i0];      for (i0 = 0; i0 < n; i0++) sum_2 += rtld[i0] * v[i0];
179          if (! (breakFlag = (ABS(sum_2) <= TOLERANCE_FOR_SCALARS))) {          if (! (breakFlag = (ABS(sum_2) <= TOLERANCE_FOR_SCALARS))) {
180         alpha = rho / sum_2;         alpha = rho / sum_2;
181    
            // #pragma ivdep  
182             #pragma omp for private(i0) reduction(+:sum_3) schedule(static)             #pragma omp for private(i0) reduction(+:sum_3) schedule(static)
183         for (i0 = 0; i0 < n; i0++) {         for (i0 = 0; i0 < n; i0++) {
184           r[i0] -= alpha * v[i0];           r[i0] -= alpha * v[i0];
# Line 195  err_t Paso_Solver_BiCGStab( Line 189  err_t Paso_Solver_BiCGStab(
189                    
190         /*        Early check for tolerance. */         /*        Early check for tolerance. */
191         if ( (convergeFlag = (norm_of_residual <= tol)) ) {         if ( (convergeFlag = (norm_of_residual <= tol)) ) {
              // #pragma ivdep  
192               #pragma omp for  private(i0) schedule(static)               #pragma omp for  private(i0) schedule(static)
193           for (i0 = 0; i0 < n; i0++) x[i0] += alpha * phat[i0];           for (i0 = 0; i0 < n; i0++) x[i0] += alpha * phat[i0];
194           maxIterFlag = FALSE;           maxIterFlag = FALSE;
# Line 205  err_t Paso_Solver_BiCGStab( Line 198  err_t Paso_Solver_BiCGStab(
198               Paso_Solver_solvePreconditioner(A,&shat[0], &s[0]);               Paso_Solver_solvePreconditioner(A,&shat[0], &s[0]);
199           Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &shat[0],ZERO,&t[0]);           Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &shat[0],ZERO,&t[0]);
200        
              // #pragma ivdep  
201               #pragma omp for private(i0) reduction(+:omegaNumtr,omegaDenumtr) schedule(static)               #pragma omp for private(i0) reduction(+:omegaNumtr,omegaDenumtr) schedule(static)
202           for (i0 = 0; i0 < n; i0++) {           for (i0 = 0; i0 < n; i0++) {
203             omegaNumtr +=t[i0] * s[i0];             omegaNumtr +=t[i0] * s[i0];
# Line 214  err_t Paso_Solver_BiCGStab( Line 206  err_t Paso_Solver_BiCGStab(
206               if (! (breakFlag = (ABS(omegaDenumtr) <= TOLERANCE_FOR_SCALARS))) {               if (! (breakFlag = (ABS(omegaDenumtr) <= TOLERANCE_FOR_SCALARS))) {
207              omega = omegaNumtr / omegaDenumtr;              omega = omegaNumtr / omegaDenumtr;
208        
                 // #pragma ivdep  
209                  #pragma omp for private(i0) reduction(+:sum_4) schedule(static)                  #pragma omp for private(i0) reduction(+:sum_4) schedule(static)
210              for (i0 = 0; i0 < n; i0++) {              for (i0 = 0; i0 < n; i0++) {
211                x[i0] += alpha * phat[i0] + omega * shat[i0];                x[i0] += alpha * phat[i0] + omega * shat[i0];

Legend:
Removed from v.929  
changed lines
  Added in v.1028

  ViewVC Help
Powered by ViewVC 1.1.26