Annotation of /trunk/paso/src/BiCGStab.c

Revision 1630 - (hide annotations)
Sat Jul 12 07:15:32 2008 UTC (14 years, 1 month ago) by trankine
File MIME type: text/plain
File size: 8296 byte(s)
```Merge resulted in double declaration.
```
 1 ksteube 1312 2 jgs 150 /* \$Id\$ */ 3 4 ksteube 1312 /******************************************************* 5 * 6 * Copyright 2003-2007 by ACceSS MNRF 7 * Copyright 2007 by University of Queensland 8 * 9 * http://esscc.uq.edu.au 10 * Primary Business: Queensland, Australia 11 * Licensed under the Open Software License version 3.0 12 * http://www.opensource.org/licenses/osl-3.0.php 13 * 14 *******************************************************/ 15 dhawcroft 631 16 /* 17 jgs 150 Crude modifications and translations for Paso by Matt Davies and Lutz Gross 18 */ 19 20 gross 700 #include "Paso.h" 21 #include "SystemMatrix.h" 22 jgs 150 #include "Solver.h" 23 #ifdef _OPENMP 24 #include 25 #endif 26 27 /* -- Iterative template routine -- 28 * Univ. of Tennessee and Oak Ridge National Laboratory 29 * October 1, 1993 30 * Details of this algorithm are described in "Templates for the 31 * Solution of Linear Systems: Building Blocks for Iterative 32 * Methods", Barrett, Berry, Chan, Demmel, Donato, Dongarra, 33 * Eijkhout, Pozo, Romine, and van der Vorst, SIAM Publications, 34 * 1993. (ftp netlib2.cs.utk.edu; cd linalg; get templates.ps). 35 * 36 * Purpose 37 * ======= 38 * 39 * BICGSTAB solves the linear system A*x = b using the 40 * BiConjugate Gradient Stabilized iterative method with 41 * preconditioning. 42 * 43 * Convergence test: norm( b - A*x )< TOL. 44 * For other measures, see the above reference. 45 * 46 * Arguments 47 * ========= 48 * 49 * A (input) 50 * 51 * R (input) DOUBLE PRECISION array, dimension N. 52 * On entry, residual of inital guess X 53 * 54 * X (input/output) DOUBLE PRECISION array, dimension N. 55 * On input, the initial guess. 56 * 57 * ITER (input/output) INT 58 * On input, the maximum iterations to be performed. 59 * On output, actual number of iterations performed. 60 * 61 * RESID (input/output) DOUBLE PRECISION 62 * On input, the allowable convergence measure for 63 * norm( b - A*x ) 64 * On output, the final value of this measure. 65 * 66 * return value 67 * 68 * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned. 69 * = SOLVER_MAXITER_REACHED 70 * = SOLVER_INPUT_ERROR Illegal parameter: 71 * = SOLVER_BREAKDOWN: If parameters RHO or OMEGA become smaller 72 * = SOLVER_MEMORY_ERROR : If parameters RHO or OMEGA become smaller 73 * 74 * ============================================================== 75 */ 76 77 err_t Paso_Solver_BiCGStab( 78 Paso_SystemMatrix * A, 79 double * r, 80 double * x, 81 dim_t *iter, 82 gross 584 double * tolerance, 83 Paso_Performance* pp) { 84 jgs 150 85 86 /* Local variables */ 87 ksteube 1312 double *rtld=NULL,*p=NULL,*v=NULL,*t=NULL,*phat=NULL,*shat=NULL,*s=NULL, *buf1=NULL, *buf0=NULL; 88 jgs 150 double beta,norm_of_residual,sum_1,sum_2,sum_3,sum_4,norm_of_residual_global; 89 trankine 1630 double alpha, omega, omegaNumtr, omegaDenumtr, rho, tol, rho1; 90 phornby 1628 #ifdef PASO_MPI 91 double loc_sum[2], sum[2]; 92 #endif 93 jgs 150 dim_t num_iter=0,maxit,num_iter_global; 94 ksteube 1312 dim_t i0; 95 jgs 150 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE; 96 dim_t status = SOLVER_NO_ERROR; 97 gross 1028 double *resid = tolerance; 98 ksteube 1312 dim_t n = Paso_SystemMatrix_getTotalNumRows(A); 99 jgs 150 100 /* Executable Statements */ 101 102 /* allocate memory: */ 103 rtld=TMPMEMALLOC(n,double); 104 p=TMPMEMALLOC(n,double); 105 v=TMPMEMALLOC(n,double); 106 t=TMPMEMALLOC(n,double); 107 phat=TMPMEMALLOC(n,double); 108 shat=TMPMEMALLOC(n,double); 109 s=TMPMEMALLOC(n,double); 110 /* Test the input parameters. */ 111 112 if (n < 0) { 113 status = SOLVER_INPUT_ERROR; 114 } else if (rtld==NULL || p==NULL || v==NULL || t==NULL || phat==NULL || shat==NULL || s==NULL) { 115 status = SOLVER_MEMORY_ERROR; 116 } else { 117 118 /* now bicgstab starts : */ 119 maxit = *iter; 120 tol = *resid; 121 122 gross 1556 num_iter =0; 123 convergeFlag=FALSE; 124 maxIterFlag=FALSE; 125 breakFlag=FALSE; 126 jgs 150 127 gross 1556 /* initialize arrays */ 128 129 #pragma omp parallel for private(i0) schedule(static) 130 for (i0 = 0; i0 < n; i0++) { 131 jgs 150 rtld[i0]=0; 132 p[i0]=0; 133 v[i0]=0; 134 t[i0]=0; 135 phat[i0]=0; 136 shat[i0]=0; 137 gross 1556 rtld[i0] = r[i0]; 138 } 139 jgs 150 140 gross 1556 /* Perform BiConjugate Gradient Stabilized iteration. */ 141 jgs 150 142 L10: 143 ++(num_iter); 144 sum_1 = 0; 145 sum_2 = 0; 146 sum_3 = 0; 147 sum_4 = 0; 148 omegaNumtr = 0.0; 149 gross 1556 omegaDenumtr = 0.0; 150 #pragma omp parallel for private(i0) reduction(+:sum_1) schedule(static) 151 jgs 150 for (i0 = 0; i0 < n; i0++) sum_1 += rtld[i0] * r[i0]; 152 gross 1556 #ifdef PASO_MPI 153 loc_sum[0] = sum_1; 154 MPI_Allreduce(loc_sum, &sum_1, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm); 155 gross 1553 #endif 156 jgs 150 rho = sum_1; 157 158 if (! (breakFlag = (ABS(rho) <= TOLERANCE_FOR_SCALARS))) { 159 /* Compute vector P. */ 160 161 if (num_iter > 1) { 162 beta = rho / rho1 * (alpha / omega); 163 gross 1556 #pragma omp parallel for private(i0) schedule(static) 164 jgs 150 for (i0 = 0; i0 < n; i0++) p[i0] = r[i0] + beta * (p[i0] - omega * v[i0]); 165 } else { 166 gross 1556 #pragma omp parallel for private(i0) schedule(static) 167 jgs 150 for (i0 = 0; i0 < n; i0++) p[i0] = r[i0]; 168 } 169 170 /* Compute direction adjusting vector PHAT and scalar ALPHA. */ 171 172 Paso_Solver_solvePreconditioner(A,&phat[0], &p[0]); 173 gross 415 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &phat[0],ZERO, &v[0]); 174 jgs 150 175 gross 1556 #pragma omp parallel for private(i0) reduction(+:sum_2) schedule(static) 176 jgs 150 for (i0 = 0; i0 < n; i0++) sum_2 += rtld[i0] * v[i0]; 177 gross 1553 #ifdef PASO_MPI 178 gross 1556 loc_sum[0] = sum_2; 179 MPI_Allreduce(loc_sum, &sum_2, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm); 180 gross 1553 #endif 181 jgs 150 if (! (breakFlag = (ABS(sum_2) <= TOLERANCE_FOR_SCALARS))) { 182 alpha = rho / sum_2; 183 184 gross 1556 #pragma omp parallel for private(i0) reduction(+:sum_3) schedule(static) 185 jgs 150 for (i0 = 0; i0 < n; i0++) { 186 r[i0] -= alpha * v[i0]; 187 s[i0] = r[i0]; 188 sum_3 += s[i0] * s[i0]; 189 } 190 gross 1553 #ifdef PASO_MPI 191 gross 1556 loc_sum[0] = sum_3; 192 MPI_Allreduce(loc_sum, &sum_3, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm); 193 gross 1553 #endif 194 jgs 150 norm_of_residual = sqrt(sum_3); 195 196 /* Early check for tolerance. */ 197 if ( (convergeFlag = (norm_of_residual <= tol)) ) { 198 gross 1556 #pragma omp parallel for private(i0) schedule(static) 199 jgs 150 for (i0 = 0; i0 < n; i0++) x[i0] += alpha * phat[i0]; 200 maxIterFlag = FALSE; 201 breakFlag = FALSE; 202 } else { 203 /* Compute stabilizer vector SHAT and scalar OMEGA. */ 204 Paso_Solver_solvePreconditioner(A,&shat[0], &s[0]); 205 gross 415 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &shat[0],ZERO,&t[0]); 206 jgs 150 207 gross 1556 #pragma omp parallel for private(i0) reduction(+:omegaNumtr,omegaDenumtr) schedule(static) 208 jgs 150 for (i0 = 0; i0 < n; i0++) { 209 omegaNumtr +=t[i0] * s[i0]; 210 omegaDenumtr += t[i0] * t[i0]; 211 } 212 gross 1553 #ifdef PASO_MPI 213 gross 1556 loc_sum[0] = omegaNumtr; 214 loc_sum[1] = omegaDenumtr; 215 MPI_Allreduce(loc_sum, sum, 2, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm); 216 omegaNumtr=sum[0]; 217 omegaDenumtr=sum[1]; 218 gross 1553 #endif 219 jgs 150 if (! (breakFlag = (ABS(omegaDenumtr) <= TOLERANCE_FOR_SCALARS))) { 220 omega = omegaNumtr / omegaDenumtr; 221 222 gross 1556 #pragma omp parallel for private(i0) reduction(+:sum_4) schedule(static) 223 jgs 150 for (i0 = 0; i0 < n; i0++) { 224 x[i0] += alpha * phat[i0] + omega * shat[i0]; 225 artak 1425 r[i0] = s[i0]-omega * t[i0]; 226 jgs 150 sum_4 += r[i0] * r[i0]; 227 } 228 gross 1553 #ifdef PASO_MPI 229 gross 1556 loc_sum[0] = sum_4; 230 MPI_Allreduce(loc_sum, &sum_4, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm); 231 gross 1553 #endif 232 jgs 150 norm_of_residual = sqrt(sum_4); 233 convergeFlag = norm_of_residual <= tol; 234 maxIterFlag = num_iter == maxit; 235 breakFlag = (ABS(omega) <= TOLERANCE_FOR_SCALARS); 236 } 237 } 238 } 239 if (!(convergeFlag || maxIterFlag || breakFlag)) { 240 rho1 = rho; 241 goto L10; 242 } 243 } 244 /* end of iteration */ 245 gross 1556 num_iter_global=num_iter; 246 norm_of_residual_global=norm_of_residual; 247 if (maxIterFlag) { 248 jgs 150 status = SOLVER_MAXITER_REACHED; 249 gross 1556 } else if (breakFlag) { 250 jgs 150 status = SOLVER_BREAKDOWN; 251 } 252 } 253 TMPMEMFREE(rtld); 254 TMPMEMFREE(p); 255 TMPMEMFREE(v); 256 TMPMEMFREE(t); 257 TMPMEMFREE(phat); 258 TMPMEMFREE(shat); 259 TMPMEMFREE(s); 260 *iter=num_iter_global; 261 *resid=norm_of_residual_global; 262 263 /* End of BICGSTAB */ 264 return status; 265 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

 ViewVC Help Powered by ViewVC 1.1.26