/[escript]/trunk/paso/src/BiCGStab.c
ViewVC logotype

Diff of /trunk/paso/src/BiCGStab.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 929 by gross, Wed Jan 17 07:41:13 2007 UTC revision 1553 by gross, Thu May 8 09:38:07 2008 UTC
# Line 1  Line 1 
1    
2  /* $Id$ */  /* $Id$ */
3    
4  /*  /*******************************************************
5  ********************************************************************************   *
6  *               Copyright   2006 by ACcESS MNRF                                *   *           Copyright 2003-2007 by ACceSS MNRF
7  *                                                                              *   *       Copyright 2007 by University of Queensland
8  *                 http://www.access.edu.au                                     *   *
9  *           Primary Business: Queensland, Australia                            *   *                http://esscc.uq.edu.au
10  *     Licensed under the Open Software License version 3.0             *   *        Primary Business: Queensland, Australia
11  *        http://www.opensource.org/licenses/osl-3.0.php                        *   *  Licensed under the Open Software License version 3.0
12  ********************************************************************************   *     http://www.opensource.org/licenses/osl-3.0.php
13  */   *
14     *******************************************************/
15    
16  /*  /*
17     Crude modifications and translations for Paso by Matt Davies and Lutz Gross     Crude modifications and translations for Paso by Matt Davies and Lutz Gross
# Line 82  err_t Paso_Solver_BiCGStab( Line 84  err_t Paso_Solver_BiCGStab(
84    
85    
86    /* Local variables */    /* Local variables */
87    double *rtld=NULL,*p=NULL,*v=NULL,*t=NULL,*phat=NULL,*shat=NULL,*s=NULL;    double *rtld=NULL,*p=NULL,*v=NULL,*t=NULL,*phat=NULL,*shat=NULL,*s=NULL, *buf1=NULL, *buf0=NULL;
88    double beta,norm_of_residual,sum_1,sum_2,sum_3,sum_4,norm_of_residual_global;    double beta,norm_of_residual,sum_1,sum_2,sum_3,sum_4,norm_of_residual_global;
89    double alpha, omega, omegaNumtr, omegaDenumtr, rho, tol, rho1;    double alpha, omega, omegaNumtr, omegaDenumtr, rho, tol, rho1, loc_sum[2], sum[2];
90    dim_t num_iter=0,maxit,num_iter_global;    dim_t num_iter=0,maxit,num_iter_global;
91    dim_t i0;    dim_t i0;
92    bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;    bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
93    dim_t status = SOLVER_NO_ERROR;    dim_t status = SOLVER_NO_ERROR;
94      double *resid = tolerance;
95    /* adapt original routine parameters */    dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
   dim_t n = A->num_cols * A-> col_block_size;;  
   double * resid = tolerance;  
96    
97    /* Executable Statements */    /* Executable Statements */
98    
# Line 104  err_t Paso_Solver_BiCGStab( Line 104  err_t Paso_Solver_BiCGStab(
104    phat=TMPMEMALLOC(n,double);    phat=TMPMEMALLOC(n,double);
105    shat=TMPMEMALLOC(n,double);    shat=TMPMEMALLOC(n,double);
106    s=TMPMEMALLOC(n,double);    s=TMPMEMALLOC(n,double);
   
107    /*     Test the input parameters. */    /*     Test the input parameters. */
108    
109    if (n < 0) {    if (n < 0) {
# Line 154  err_t Paso_Solver_BiCGStab( Line 153  err_t Paso_Solver_BiCGStab(
153      omegaDenumtr = 0.0;      omegaDenumtr = 0.0;
154        }        }
155        #pragma omp barrier        #pragma omp barrier
       #pragma ivdep  
156        #pragma omp for private(i0) reduction(+:sum_1) schedule(static)        #pragma omp for private(i0) reduction(+:sum_1) schedule(static)
157        for (i0 = 0; i0 < n; i0++) sum_1 += rtld[i0] * r[i0];        for (i0 = 0; i0 < n; i0++) sum_1 += rtld[i0] * r[i0];
158           #ifdef PASO_MPI
159             #pragma omp master
160             {
161                loc_sum[0] = sum_1;
162                MPI_Allreduce(loc_sum, &sum_1, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
163             }
164          #endif
165        rho = sum_1;        rho = sum_1;
166                
167        if (! (breakFlag = (ABS(rho) <= TOLERANCE_FOR_SCALARS))) {        if (! (breakFlag = (ABS(rho) <= TOLERANCE_FOR_SCALARS))) {
# Line 164  err_t Paso_Solver_BiCGStab( Line 169  err_t Paso_Solver_BiCGStab(
169                
170      if (num_iter > 1) {      if (num_iter > 1) {
171        beta = rho / rho1 * (alpha / omega);        beta = rho / rho1 * (alpha / omega);
           #pragma ivdep  
172            #pragma omp for private(i0) schedule(static)            #pragma omp for private(i0) schedule(static)
173        for (i0 = 0; i0 < n; i0++) p[i0] = r[i0] + beta * (p[i0] - omega * v[i0]);        for (i0 = 0; i0 < n; i0++) p[i0] = r[i0] + beta * (p[i0] - omega * v[i0]);
174      } else {      } else {
           #pragma ivdep  
175            #pragma omp for private(i0) schedule(static)            #pragma omp for private(i0) schedule(static)
176        for (i0 = 0; i0 < n; i0++) p[i0] = r[i0];        for (i0 = 0; i0 < n; i0++) p[i0] = r[i0];
177      }      }
# Line 178  err_t Paso_Solver_BiCGStab( Line 181  err_t Paso_Solver_BiCGStab(
181          Paso_Solver_solvePreconditioner(A,&phat[0], &p[0]);          Paso_Solver_solvePreconditioner(A,&phat[0], &p[0]);
182      Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &phat[0],ZERO, &v[0]);      Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &phat[0],ZERO, &v[0]);
183        
         // #pragma ivdep  
184          #pragma omp for private(i0) reduction(+:sum_2) schedule(static)          #pragma omp for private(i0) reduction(+:sum_2) schedule(static)
185      for (i0 = 0; i0 < n; i0++) sum_2 += rtld[i0] * v[i0];      for (i0 = 0; i0 < n; i0++) sum_2 += rtld[i0] * v[i0];
186            #ifdef PASO_MPI
187                #pragma omp master
188                {
189                   loc_sum[0] = sum_2;
190                   MPI_Allreduce(loc_sum, &sum_2, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
191                }
192            #endif
193          if (! (breakFlag = (ABS(sum_2) <= TOLERANCE_FOR_SCALARS))) {          if (! (breakFlag = (ABS(sum_2) <= TOLERANCE_FOR_SCALARS))) {
194         alpha = rho / sum_2;         alpha = rho / sum_2;
195    
            // #pragma ivdep  
196             #pragma omp for private(i0) reduction(+:sum_3) schedule(static)             #pragma omp for private(i0) reduction(+:sum_3) schedule(static)
197         for (i0 = 0; i0 < n; i0++) {         for (i0 = 0; i0 < n; i0++) {
198           r[i0] -= alpha * v[i0];           r[i0] -= alpha * v[i0];
199           s[i0] = r[i0];           s[i0] = r[i0];
200           sum_3 += s[i0] * s[i0];           sum_3 += s[i0] * s[i0];
201         }         }
202               #ifdef PASO_MPI
203                   #pragma omp master
204                   {
205                      loc_sum[0] = sum_3;
206                      MPI_Allreduce(loc_sum, &sum_3, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
207                   }
208               #endif
209         norm_of_residual = sqrt(sum_3);         norm_of_residual = sqrt(sum_3);
210                    
211         /*        Early check for tolerance. */         /*        Early check for tolerance. */
212         if ( (convergeFlag = (norm_of_residual <= tol)) ) {         if ( (convergeFlag = (norm_of_residual <= tol)) ) {
              // #pragma ivdep  
213               #pragma omp for  private(i0) schedule(static)               #pragma omp for  private(i0) schedule(static)
214           for (i0 = 0; i0 < n; i0++) x[i0] += alpha * phat[i0];           for (i0 = 0; i0 < n; i0++) x[i0] += alpha * phat[i0];
215           maxIterFlag = FALSE;           maxIterFlag = FALSE;
# Line 205  err_t Paso_Solver_BiCGStab( Line 219  err_t Paso_Solver_BiCGStab(
219               Paso_Solver_solvePreconditioner(A,&shat[0], &s[0]);               Paso_Solver_solvePreconditioner(A,&shat[0], &s[0]);
220           Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &shat[0],ZERO,&t[0]);           Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &shat[0],ZERO,&t[0]);
221        
              // #pragma ivdep  
222               #pragma omp for private(i0) reduction(+:omegaNumtr,omegaDenumtr) schedule(static)               #pragma omp for private(i0) reduction(+:omegaNumtr,omegaDenumtr) schedule(static)
223           for (i0 = 0; i0 < n; i0++) {           for (i0 = 0; i0 < n; i0++) {
224             omegaNumtr +=t[i0] * s[i0];             omegaNumtr +=t[i0] * s[i0];
225             omegaDenumtr += t[i0] * t[i0];             omegaDenumtr += t[i0] * t[i0];
226           }           }
227                 #ifdef PASO_MPI
228                     #pragma omp master
229                     {
230                        loc_sum[0] = omegaNumtr;
231                        loc_sum[1] = omegaDenumtr;
232                        MPI_Allreduce(loc_sum, sum, 2, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
233                        omegaNumtr=sum[0];
234                        omegaDenumtr=sum[1];
235                     }
236                 #endif
237               if (! (breakFlag = (ABS(omegaDenumtr) <= TOLERANCE_FOR_SCALARS))) {               if (! (breakFlag = (ABS(omegaDenumtr) <= TOLERANCE_FOR_SCALARS))) {
238              omega = omegaNumtr / omegaDenumtr;              omega = omegaNumtr / omegaDenumtr;
239        
                 // #pragma ivdep  
240                  #pragma omp for private(i0) reduction(+:sum_4) schedule(static)                  #pragma omp for private(i0) reduction(+:sum_4) schedule(static)
241              for (i0 = 0; i0 < n; i0++) {              for (i0 = 0; i0 < n; i0++) {
242                x[i0] += alpha * phat[i0] + omega * shat[i0];                x[i0] += alpha * phat[i0] + omega * shat[i0];
243                r[i0] -= omega * t[i0];                r[i0] = s[i0]-omega * t[i0];
244                sum_4 += r[i0] * r[i0];                sum_4 += r[i0] * r[i0];
245              }              }
246                    #ifdef PASO_MPI
247                        #pragma omp master
248                        {
249                           loc_sum[0] = sum_4;
250                           MPI_Allreduce(loc_sum, &sum_4, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
251                        }
252                    #endif
253              norm_of_residual = sqrt(sum_4);              norm_of_residual = sqrt(sum_4);
254              convergeFlag = norm_of_residual <= tol;              convergeFlag = norm_of_residual <= tol;
255              maxIterFlag = num_iter == maxit;              maxIterFlag = num_iter == maxit;
# Line 259  err_t Paso_Solver_BiCGStab( Line 288  err_t Paso_Solver_BiCGStab(
288    /*     End of BICGSTAB */    /*     End of BICGSTAB */
289    return status;    return status;
290  }  }
 /*  
  * $Log$  
  * Revision 1.2  2005/09/15 03:44:40  jgs  
  * Merge of development branch dev-02 back to main trunk on 2005-09-15  
  *  
  * Revision 1.1.2.1  2005/09/05 06:29:49  gross  
  * These files have been extracted from finley to define a stand alone libray for iterative  
  * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but  
  * has not been tested yet.  
  *  
  *  
  */  

Legend:
Removed from v.929  
changed lines
  Added in v.1553

  ViewVC Help
Powered by ViewVC 1.1.26