/[escript]/trunk/paso/src/BiCGStab.c
ViewVC logotype

Diff of /trunk/paso/src/BiCGStab.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 929 by gross, Wed Jan 17 07:41:13 2007 UTC revision 1630 by trankine, Sat Jul 12 07:15:32 2008 UTC
# Line 1  Line 1 
1    
2  /* $Id$ */  /* $Id$ */
3    
4  /*  /*******************************************************
5  ********************************************************************************   *
6  *               Copyright   2006 by ACcESS MNRF                                *   *           Copyright 2003-2007 by ACceSS MNRF
7  *                                                                              *   *       Copyright 2007 by University of Queensland
8  *                 http://www.access.edu.au                                     *   *
9  *           Primary Business: Queensland, Australia                            *   *                http://esscc.uq.edu.au
10  *     Licensed under the Open Software License version 3.0             *   *        Primary Business: Queensland, Australia
11  *        http://www.opensource.org/licenses/osl-3.0.php                        *   *  Licensed under the Open Software License version 3.0
12  ********************************************************************************   *     http://www.opensource.org/licenses/osl-3.0.php
13  */   *
14     *******************************************************/
15    
16  /*  /*
17     Crude modifications and translations for Paso by Matt Davies and Lutz Gross     Crude modifications and translations for Paso by Matt Davies and Lutz Gross
# Line 82  err_t Paso_Solver_BiCGStab( Line 84  err_t Paso_Solver_BiCGStab(
84    
85    
86    /* Local variables */    /* Local variables */
87    double *rtld=NULL,*p=NULL,*v=NULL,*t=NULL,*phat=NULL,*shat=NULL,*s=NULL;    double *rtld=NULL,*p=NULL,*v=NULL,*t=NULL,*phat=NULL,*shat=NULL,*s=NULL, *buf1=NULL, *buf0=NULL;
88    double beta,norm_of_residual,sum_1,sum_2,sum_3,sum_4,norm_of_residual_global;    double beta,norm_of_residual,sum_1,sum_2,sum_3,sum_4,norm_of_residual_global;
89    double alpha, omega, omegaNumtr, omegaDenumtr, rho, tol, rho1;    double alpha, omega, omegaNumtr, omegaDenumtr, rho, tol, rho1;
90    #ifdef PASO_MPI
91      double loc_sum[2], sum[2];
92    #endif
93    dim_t num_iter=0,maxit,num_iter_global;    dim_t num_iter=0,maxit,num_iter_global;
94    dim_t i0;    dim_t i0;
95    bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;    bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
96    dim_t status = SOLVER_NO_ERROR;    dim_t status = SOLVER_NO_ERROR;
97      double *resid = tolerance;
98    /* adapt original routine parameters */    dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
   dim_t n = A->num_cols * A-> col_block_size;;  
   double * resid = tolerance;  
99    
100    /* Executable Statements */    /* Executable Statements */
101    
# Line 104  err_t Paso_Solver_BiCGStab( Line 107  err_t Paso_Solver_BiCGStab(
107    phat=TMPMEMALLOC(n,double);    phat=TMPMEMALLOC(n,double);
108    shat=TMPMEMALLOC(n,double);    shat=TMPMEMALLOC(n,double);
109    s=TMPMEMALLOC(n,double);    s=TMPMEMALLOC(n,double);
   
110    /*     Test the input parameters. */    /*     Test the input parameters. */
111    
112    if (n < 0) {    if (n < 0) {
# Line 117  err_t Paso_Solver_BiCGStab( Line 119  err_t Paso_Solver_BiCGStab(
119      maxit = *iter;      maxit = *iter;
120      tol = *resid;      tol = *resid;
121        
122      #pragma omp parallel firstprivate(maxit,tol) \      num_iter =0;
123         private(rho,omega,num_iter,norm_of_residual,beta,alpha,rho1, convergeFlag,maxIterFlag,breakFlag)      convergeFlag=FALSE;
124      {      maxIterFlag=FALSE;
125        num_iter =0;      breakFlag=FALSE;
126        convergeFlag=FALSE;  
127        maxIterFlag=FALSE;      /* initialize arrays */
128        breakFlag=FALSE;  
129        #pragma omp parallel for private(i0) schedule(static)
130        /* initialize arrays */      for (i0 = 0; i0 < n; i0++) {
     
       #pragma omp for private(i0) schedule(static)  
       for (i0 = 0; i0 < n; i0++) {  
131      rtld[i0]=0;      rtld[i0]=0;
132      p[i0]=0;      p[i0]=0;
133      v[i0]=0;      v[i0]=0;
134      t[i0]=0;      t[i0]=0;
135      phat[i0]=0;      phat[i0]=0;
136      shat[i0]=0;      shat[i0]=0;
137        }          rtld[i0] = r[i0];
138        #pragma omp for private(i0) schedule(static)      }
       for (i0 = 0; i0 < n; i0++) rtld[i0] = r[i0];  
139        
140        /*     Perform BiConjugate Gradient Stabilized iteration. */      /*     Perform BiConjugate Gradient Stabilized iteration. */
141        
142      L10:      L10:
143        ++(num_iter);        ++(num_iter);
       #pragma omp barrier  
       #pragma omp master  
       {  
144      sum_1 = 0;      sum_1 = 0;
145      sum_2 = 0;      sum_2 = 0;
146      sum_3 = 0;      sum_3 = 0;
147      sum_4 = 0;      sum_4 = 0;
148      omegaNumtr = 0.0;      omegaNumtr = 0.0;
149      omegaDenumtr = 0.0;        omegaDenumtr = 0.0;
150        }        #pragma omp parallel for private(i0) reduction(+:sum_1) schedule(static)
       #pragma omp barrier  
       #pragma ivdep  
       #pragma omp for private(i0) reduction(+:sum_1) schedule(static)  
151        for (i0 = 0; i0 < n; i0++) sum_1 += rtld[i0] * r[i0];        for (i0 = 0; i0 < n; i0++) sum_1 += rtld[i0] * r[i0];
152          #ifdef PASO_MPI
153              loc_sum[0] = sum_1;
154              MPI_Allreduce(loc_sum, &sum_1, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
155          #endif
156        rho = sum_1;        rho = sum_1;
157                
158        if (! (breakFlag = (ABS(rho) <= TOLERANCE_FOR_SCALARS))) {        if (! (breakFlag = (ABS(rho) <= TOLERANCE_FOR_SCALARS))) {
# Line 164  err_t Paso_Solver_BiCGStab( Line 160  err_t Paso_Solver_BiCGStab(
160                
161      if (num_iter > 1) {      if (num_iter > 1) {
162        beta = rho / rho1 * (alpha / omega);        beta = rho / rho1 * (alpha / omega);
163            #pragma ivdep            #pragma omp parallel for private(i0) schedule(static)
           #pragma omp for private(i0) schedule(static)  
164        for (i0 = 0; i0 < n; i0++) p[i0] = r[i0] + beta * (p[i0] - omega * v[i0]);        for (i0 = 0; i0 < n; i0++) p[i0] = r[i0] + beta * (p[i0] - omega * v[i0]);
165      } else {      } else {
166            #pragma ivdep            #pragma omp parallel for private(i0) schedule(static)
           #pragma omp for private(i0) schedule(static)  
167        for (i0 = 0; i0 < n; i0++) p[i0] = r[i0];        for (i0 = 0; i0 < n; i0++) p[i0] = r[i0];
168      }      }
169        
# Line 178  err_t Paso_Solver_BiCGStab( Line 172  err_t Paso_Solver_BiCGStab(
172          Paso_Solver_solvePreconditioner(A,&phat[0], &p[0]);          Paso_Solver_solvePreconditioner(A,&phat[0], &p[0]);
173      Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &phat[0],ZERO, &v[0]);      Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &phat[0],ZERO, &v[0]);
174        
175          // #pragma ivdep          #pragma omp parallel for private(i0) reduction(+:sum_2) schedule(static)
         #pragma omp for private(i0) reduction(+:sum_2) schedule(static)  
176      for (i0 = 0; i0 < n; i0++) sum_2 += rtld[i0] * v[i0];      for (i0 = 0; i0 < n; i0++) sum_2 += rtld[i0] * v[i0];
177            #ifdef PASO_MPI
178               loc_sum[0] = sum_2;
179                MPI_Allreduce(loc_sum, &sum_2, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
180            #endif
181          if (! (breakFlag = (ABS(sum_2) <= TOLERANCE_FOR_SCALARS))) {          if (! (breakFlag = (ABS(sum_2) <= TOLERANCE_FOR_SCALARS))) {
182         alpha = rho / sum_2;         alpha = rho / sum_2;
183    
184             // #pragma ivdep             #pragma omp parallel for private(i0) reduction(+:sum_3) schedule(static)
            #pragma omp for private(i0) reduction(+:sum_3) schedule(static)  
185         for (i0 = 0; i0 < n; i0++) {         for (i0 = 0; i0 < n; i0++) {
186           r[i0] -= alpha * v[i0];           r[i0] -= alpha * v[i0];
187           s[i0] = r[i0];           s[i0] = r[i0];
188           sum_3 += s[i0] * s[i0];           sum_3 += s[i0] * s[i0];
189         }         }
190               #ifdef PASO_MPI
191                   loc_sum[0] = sum_3;
192                   MPI_Allreduce(loc_sum, &sum_3, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
193               #endif
194         norm_of_residual = sqrt(sum_3);         norm_of_residual = sqrt(sum_3);
195                    
196         /*        Early check for tolerance. */         /*        Early check for tolerance. */
197         if ( (convergeFlag = (norm_of_residual <= tol)) ) {         if ( (convergeFlag = (norm_of_residual <= tol)) ) {
198               // #pragma ivdep               #pragma omp parallel for  private(i0) schedule(static)
              #pragma omp for  private(i0) schedule(static)  
199           for (i0 = 0; i0 < n; i0++) x[i0] += alpha * phat[i0];           for (i0 = 0; i0 < n; i0++) x[i0] += alpha * phat[i0];
200           maxIterFlag = FALSE;           maxIterFlag = FALSE;
201           breakFlag = FALSE;           breakFlag = FALSE;
# Line 205  err_t Paso_Solver_BiCGStab( Line 204  err_t Paso_Solver_BiCGStab(
204               Paso_Solver_solvePreconditioner(A,&shat[0], &s[0]);               Paso_Solver_solvePreconditioner(A,&shat[0], &s[0]);
205           Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &shat[0],ZERO,&t[0]);           Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &shat[0],ZERO,&t[0]);
206        
207               // #pragma ivdep               #pragma omp parallel for private(i0) reduction(+:omegaNumtr,omegaDenumtr) schedule(static)
              #pragma omp for private(i0) reduction(+:omegaNumtr,omegaDenumtr) schedule(static)  
208           for (i0 = 0; i0 < n; i0++) {           for (i0 = 0; i0 < n; i0++) {
209             omegaNumtr +=t[i0] * s[i0];             omegaNumtr +=t[i0] * s[i0];
210             omegaDenumtr += t[i0] * t[i0];             omegaDenumtr += t[i0] * t[i0];
211           }           }
212                 #ifdef PASO_MPI
213                    loc_sum[0] = omegaNumtr;
214                    loc_sum[1] = omegaDenumtr;
215                    MPI_Allreduce(loc_sum, sum, 2, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
216                    omegaNumtr=sum[0];
217                    omegaDenumtr=sum[1];
218                 #endif
219               if (! (breakFlag = (ABS(omegaDenumtr) <= TOLERANCE_FOR_SCALARS))) {               if (! (breakFlag = (ABS(omegaDenumtr) <= TOLERANCE_FOR_SCALARS))) {
220              omega = omegaNumtr / omegaDenumtr;              omega = omegaNumtr / omegaDenumtr;
221        
222                  // #pragma ivdep                  #pragma omp parallel for private(i0) reduction(+:sum_4) schedule(static)
                 #pragma omp for private(i0) reduction(+:sum_4) schedule(static)  
223              for (i0 = 0; i0 < n; i0++) {              for (i0 = 0; i0 < n; i0++) {
224                x[i0] += alpha * phat[i0] + omega * shat[i0];                x[i0] += alpha * phat[i0] + omega * shat[i0];
225                r[i0] -= omega * t[i0];                r[i0] = s[i0]-omega * t[i0];
226                sum_4 += r[i0] * r[i0];                sum_4 += r[i0] * r[i0];
227              }              }
228                    #ifdef PASO_MPI
229                       loc_sum[0] = sum_4;
230                        MPI_Allreduce(loc_sum, &sum_4, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
231                    #endif
232              norm_of_residual = sqrt(sum_4);              norm_of_residual = sqrt(sum_4);
233              convergeFlag = norm_of_residual <= tol;              convergeFlag = norm_of_residual <= tol;
234              maxIterFlag = num_iter == maxit;              maxIterFlag = num_iter == maxit;
# Line 234  err_t Paso_Solver_BiCGStab( Line 242  err_t Paso_Solver_BiCGStab(
242      }      }
243        }        }
244        /* end of iteration */        /* end of iteration */
245        #pragma omp master        num_iter_global=num_iter;
246        {        norm_of_residual_global=norm_of_residual;
247      num_iter_global=num_iter;        if (maxIterFlag) {
     norm_of_residual_global=norm_of_residual;  
     if (maxIterFlag) {  
248          status = SOLVER_MAXITER_REACHED;          status = SOLVER_MAXITER_REACHED;
249      } else if (breakFlag) {        } else if (breakFlag) {
250          status = SOLVER_BREAKDOWN;          status = SOLVER_BREAKDOWN;
     }  
251        }        }
     }  /* end of parallel region */  
252    }    }
253    TMPMEMFREE(rtld);    TMPMEMFREE(rtld);
254    TMPMEMFREE(p);    TMPMEMFREE(p);
# Line 259  err_t Paso_Solver_BiCGStab( Line 263  err_t Paso_Solver_BiCGStab(
263    /*     End of BICGSTAB */    /*     End of BICGSTAB */
264    return status;    return status;
265  }  }
 /*  
  * $Log$  
  * Revision 1.2  2005/09/15 03:44:40  jgs  
  * Merge of development branch dev-02 back to main trunk on 2005-09-15  
  *  
  * Revision 1.1.2.1  2005/09/05 06:29:49  gross  
  * These files have been extracted from finley to define a stand alone libray for iterative  
  * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but  
  * has not been tested yet.  
  *  
  *  
  */  

Legend:
Removed from v.929  
changed lines
  Added in v.1630

  ViewVC Help
Powered by ViewVC 1.1.26