/[escript]/trunk/paso/src/BiCGStab.c
ViewVC logotype

Diff of /trunk/paso/src/BiCGStab.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 929 by gross, Wed Jan 17 07:41:13 2007 UTC revision 1986 by jfenwick, Thu Nov 6 23:33:14 2008 UTC
# Line 1  Line 1 
 /* $Id$ */  
1    
2  /*  /*******************************************************
3  ********************************************************************************  *
4  *               Copyright   2006 by ACcESS MNRF                                *  * Copyright (c) 2003-2008 by University of Queensland
5  *                                                                              *  * Earth Systems Science Computational Center (ESSCC)
6  *                 http://www.access.edu.au                                     *  * http://www.uq.edu.au/esscc
7  *           Primary Business: Queensland, Australia                            *  *
8  *     Licensed under the Open Software License version 3.0             *  * Primary Business: Queensland, Australia
9  *        http://www.opensource.org/licenses/osl-3.0.php                        *  * Licensed under the Open Software License version 3.0
10  ********************************************************************************  * http://www.opensource.org/licenses/osl-3.0.php
11  */  *
12    *******************************************************/
13    
14    
15  /*  /*
16     Crude modifications and translations for Paso by Matt Davies and Lutz Gross     Crude modifications and translations for Paso by Matt Davies and Lutz Gross
# Line 82  err_t Paso_Solver_BiCGStab( Line 83  err_t Paso_Solver_BiCGStab(
83    
84    
85    /* Local variables */    /* Local variables */
86    double *rtld=NULL,*p=NULL,*v=NULL,*t=NULL,*phat=NULL,*shat=NULL,*s=NULL;    double *rtld=NULL,*p=NULL,*v=NULL,*t=NULL,*phat=NULL,*shat=NULL,*s=NULL;/*, *buf1=NULL, *buf0=NULL;*/
87    double beta,norm_of_residual,sum_1,sum_2,sum_3,sum_4,norm_of_residual_global;    double beta,norm_of_residual=0,sum_1,sum_2,sum_3,sum_4,norm_of_residual_global=0;
88    double alpha, omega, omegaNumtr, omegaDenumtr, rho, tol, rho1;    double alpha=0, omega=0, omegaNumtr, omegaDenumtr, rho, tol, rho1=0;
89    dim_t num_iter=0,maxit,num_iter_global;  #ifdef PASO_MPI
90      double loc_sum[2], sum[2];
91    #endif
92      dim_t num_iter=0,maxit,num_iter_global=0;
93    dim_t i0;    dim_t i0;
94    bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;    bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
95    dim_t status = SOLVER_NO_ERROR;    dim_t status = SOLVER_NO_ERROR;
96      double *resid = tolerance;
97    /* adapt original routine parameters */    dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
   dim_t n = A->num_cols * A-> col_block_size;;  
   double * resid = tolerance;  
98    
99    /* Executable Statements */    /* Executable Statements */
100    
# Line 104  err_t Paso_Solver_BiCGStab( Line 106  err_t Paso_Solver_BiCGStab(
106    phat=TMPMEMALLOC(n,double);    phat=TMPMEMALLOC(n,double);
107    shat=TMPMEMALLOC(n,double);    shat=TMPMEMALLOC(n,double);
108    s=TMPMEMALLOC(n,double);    s=TMPMEMALLOC(n,double);
   
109    /*     Test the input parameters. */    /*     Test the input parameters. */
110    
111    if (n < 0) {    if (n < 0) {
# Line 117  err_t Paso_Solver_BiCGStab( Line 118  err_t Paso_Solver_BiCGStab(
118      maxit = *iter;      maxit = *iter;
119      tol = *resid;      tol = *resid;
120        
121      #pragma omp parallel firstprivate(maxit,tol) \      num_iter =0;
122         private(rho,omega,num_iter,norm_of_residual,beta,alpha,rho1, convergeFlag,maxIterFlag,breakFlag)      convergeFlag=FALSE;
123      {      maxIterFlag=FALSE;
124        num_iter =0;      breakFlag=FALSE;
125        convergeFlag=FALSE;  
126        maxIterFlag=FALSE;      /* initialize arrays */
127        breakFlag=FALSE;  
128        #pragma omp parallel for private(i0) schedule(static)
129        /* initialize arrays */      for (i0 = 0; i0 < n; i0++) {
     
       #pragma omp for private(i0) schedule(static)  
       for (i0 = 0; i0 < n; i0++) {  
130      rtld[i0]=0;      rtld[i0]=0;
131      p[i0]=0;      p[i0]=0;
132      v[i0]=0;      v[i0]=0;
133      t[i0]=0;      t[i0]=0;
134      phat[i0]=0;      phat[i0]=0;
135      shat[i0]=0;      shat[i0]=0;
136        }          rtld[i0] = r[i0];
137        #pragma omp for private(i0) schedule(static)      }
       for (i0 = 0; i0 < n; i0++) rtld[i0] = r[i0];  
138        
139        /*     Perform BiConjugate Gradient Stabilized iteration. */      /*     Perform BiConjugate Gradient Stabilized iteration. */
140        
141      L10:      L10:
142        ++(num_iter);        ++(num_iter);
       #pragma omp barrier  
       #pragma omp master  
       {  
143      sum_1 = 0;      sum_1 = 0;
144      sum_2 = 0;      sum_2 = 0;
145      sum_3 = 0;      sum_3 = 0;
146      sum_4 = 0;      sum_4 = 0;
147      omegaNumtr = 0.0;      omegaNumtr = 0.0;
148      omegaDenumtr = 0.0;        omegaDenumtr = 0.0;
149        }        #pragma omp parallel for private(i0) reduction(+:sum_1) schedule(static)
       #pragma omp barrier  
       #pragma ivdep  
       #pragma omp for private(i0) reduction(+:sum_1) schedule(static)  
150        for (i0 = 0; i0 < n; i0++) sum_1 += rtld[i0] * r[i0];        for (i0 = 0; i0 < n; i0++) sum_1 += rtld[i0] * r[i0];
151          #ifdef PASO_MPI
152              loc_sum[0] = sum_1;
153              MPI_Allreduce(loc_sum, &sum_1, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
154          #endif
155        rho = sum_1;        rho = sum_1;
156                
157        if (! (breakFlag = (ABS(rho) <= TOLERANCE_FOR_SCALARS))) {        if (! (breakFlag = (ABS(rho) <= TOLERANCE_FOR_SCALARS))) {
# Line 164  err_t Paso_Solver_BiCGStab( Line 159  err_t Paso_Solver_BiCGStab(
159                
160      if (num_iter > 1) {      if (num_iter > 1) {
161        beta = rho / rho1 * (alpha / omega);        beta = rho / rho1 * (alpha / omega);
162            #pragma ivdep            #pragma omp parallel for private(i0) schedule(static)
           #pragma omp for private(i0) schedule(static)  
163        for (i0 = 0; i0 < n; i0++) p[i0] = r[i0] + beta * (p[i0] - omega * v[i0]);        for (i0 = 0; i0 < n; i0++) p[i0] = r[i0] + beta * (p[i0] - omega * v[i0]);
164      } else {      } else {
165            #pragma ivdep            #pragma omp parallel for private(i0) schedule(static)
           #pragma omp for private(i0) schedule(static)  
166        for (i0 = 0; i0 < n; i0++) p[i0] = r[i0];        for (i0 = 0; i0 < n; i0++) p[i0] = r[i0];
167      }      }
168        
169      /*        Compute direction adjusting vector PHAT and scalar ALPHA. */      /*        Compute direction adjusting vector PHAT and scalar ALPHA. */
170        
171          Paso_Solver_solvePreconditioner(A,&phat[0], &p[0]);          Paso_Solver_solvePreconditioner(A,&phat[0], &p[0]);
172      Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &phat[0],ZERO, &v[0]);      Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, &phat[0],PASO_ZERO, &v[0]);
173        
174          // #pragma ivdep          #pragma omp parallel for private(i0) reduction(+:sum_2) schedule(static)
         #pragma omp for private(i0) reduction(+:sum_2) schedule(static)  
175      for (i0 = 0; i0 < n; i0++) sum_2 += rtld[i0] * v[i0];      for (i0 = 0; i0 < n; i0++) sum_2 += rtld[i0] * v[i0];
176            #ifdef PASO_MPI
177               loc_sum[0] = sum_2;
178                MPI_Allreduce(loc_sum, &sum_2, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
179            #endif
180          if (! (breakFlag = (ABS(sum_2) <= TOLERANCE_FOR_SCALARS))) {          if (! (breakFlag = (ABS(sum_2) <= TOLERANCE_FOR_SCALARS))) {
181         alpha = rho / sum_2;         alpha = rho / sum_2;
182    
183             // #pragma ivdep             #pragma omp parallel for private(i0) reduction(+:sum_3) schedule(static)
            #pragma omp for private(i0) reduction(+:sum_3) schedule(static)  
184         for (i0 = 0; i0 < n; i0++) {         for (i0 = 0; i0 < n; i0++) {
185           r[i0] -= alpha * v[i0];           r[i0] -= alpha * v[i0];
186           s[i0] = r[i0];           s[i0] = r[i0];
187           sum_3 += s[i0] * s[i0];           sum_3 += s[i0] * s[i0];
188         }         }
189               #ifdef PASO_MPI
190                   loc_sum[0] = sum_3;
191                   MPI_Allreduce(loc_sum, &sum_3, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
192               #endif
193         norm_of_residual = sqrt(sum_3);         norm_of_residual = sqrt(sum_3);
194                    
195         /*        Early check for tolerance. */         /*        Early check for tolerance. */
196         if ( (convergeFlag = (norm_of_residual <= tol)) ) {         if ( (convergeFlag = (norm_of_residual <= tol)) ) {
197               // #pragma ivdep               #pragma omp parallel for  private(i0) schedule(static)
              #pragma omp for  private(i0) schedule(static)  
198           for (i0 = 0; i0 < n; i0++) x[i0] += alpha * phat[i0];           for (i0 = 0; i0 < n; i0++) x[i0] += alpha * phat[i0];
199           maxIterFlag = FALSE;           maxIterFlag = FALSE;
200           breakFlag = FALSE;           breakFlag = FALSE;
201         } else {         } else {
202           /*           Compute stabilizer vector SHAT and scalar OMEGA. */           /*           Compute stabilizer vector SHAT and scalar OMEGA. */
203               Paso_Solver_solvePreconditioner(A,&shat[0], &s[0]);               Paso_Solver_solvePreconditioner(A,&shat[0], &s[0]);
204           Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &shat[0],ZERO,&t[0]);           Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, &shat[0],PASO_ZERO,&t[0]);
205        
206               // #pragma ivdep               #pragma omp parallel for private(i0) reduction(+:omegaNumtr,omegaDenumtr) schedule(static)
              #pragma omp for private(i0) reduction(+:omegaNumtr,omegaDenumtr) schedule(static)  
207           for (i0 = 0; i0 < n; i0++) {           for (i0 = 0; i0 < n; i0++) {
208             omegaNumtr +=t[i0] * s[i0];             omegaNumtr +=t[i0] * s[i0];
209             omegaDenumtr += t[i0] * t[i0];             omegaDenumtr += t[i0] * t[i0];
210           }           }
211                 #ifdef PASO_MPI
212                    loc_sum[0] = omegaNumtr;
213                    loc_sum[1] = omegaDenumtr;
214                    MPI_Allreduce(loc_sum, sum, 2, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
215                    omegaNumtr=sum[0];
216                    omegaDenumtr=sum[1];
217                 #endif
218               if (! (breakFlag = (ABS(omegaDenumtr) <= TOLERANCE_FOR_SCALARS))) {               if (! (breakFlag = (ABS(omegaDenumtr) <= TOLERANCE_FOR_SCALARS))) {
219              omega = omegaNumtr / omegaDenumtr;              omega = omegaNumtr / omegaDenumtr;
220        
221                  // #pragma ivdep                  #pragma omp parallel for private(i0) reduction(+:sum_4) schedule(static)
                 #pragma omp for private(i0) reduction(+:sum_4) schedule(static)  
222              for (i0 = 0; i0 < n; i0++) {              for (i0 = 0; i0 < n; i0++) {
223                x[i0] += alpha * phat[i0] + omega * shat[i0];                x[i0] += alpha * phat[i0] + omega * shat[i0];
224                r[i0] -= omega * t[i0];                r[i0] = s[i0]-omega * t[i0];
225                sum_4 += r[i0] * r[i0];                sum_4 += r[i0] * r[i0];
226              }              }
227                    #ifdef PASO_MPI
228                       loc_sum[0] = sum_4;
229                        MPI_Allreduce(loc_sum, &sum_4, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
230                    #endif
231              norm_of_residual = sqrt(sum_4);              norm_of_residual = sqrt(sum_4);
232              convergeFlag = norm_of_residual <= tol;              convergeFlag = norm_of_residual <= tol;
233              maxIterFlag = num_iter == maxit;              maxIterFlag = num_iter == maxit;
# Line 234  err_t Paso_Solver_BiCGStab( Line 241  err_t Paso_Solver_BiCGStab(
241      }      }
242        }        }
243        /* end of iteration */        /* end of iteration */
244        #pragma omp master        num_iter_global=num_iter;
245        {        norm_of_residual_global=norm_of_residual;
246      num_iter_global=num_iter;        if (maxIterFlag) {
     norm_of_residual_global=norm_of_residual;  
     if (maxIterFlag) {  
247          status = SOLVER_MAXITER_REACHED;          status = SOLVER_MAXITER_REACHED;
248      } else if (breakFlag) {        } else if (breakFlag) {
249          status = SOLVER_BREAKDOWN;          status = SOLVER_BREAKDOWN;
     }  
250        }        }
     }  /* end of parallel region */  
251    }    }
252    TMPMEMFREE(rtld);    TMPMEMFREE(rtld);
253    TMPMEMFREE(p);    TMPMEMFREE(p);
# Line 259  err_t Paso_Solver_BiCGStab( Line 262  err_t Paso_Solver_BiCGStab(
262    /*     End of BICGSTAB */    /*     End of BICGSTAB */
263    return status;    return status;
264  }  }
 /*  
  * $Log$  
  * Revision 1.2  2005/09/15 03:44:40  jgs  
  * Merge of development branch dev-02 back to main trunk on 2005-09-15  
  *  
  * Revision 1.1.2.1  2005/09/05 06:29:49  gross  
  * These files have been extracted from finley to define a stand alone libray for iterative  
  * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but  
  * has not been tested yet.  
  *  
  *  
  */  

Legend:
Removed from v.929  
changed lines
  Added in v.1986

  ViewVC Help
Powered by ViewVC 1.1.26