# Annotation of /branches/doubleplusgood/paso/src/TFQMR.cpp

Revision 1974 - (hide annotations)
Thu Nov 6 02:40:10 2008 UTC (11 years, 2 months ago) by jfenwick
Original Path: trunk/paso/src/TFQMR.c
File MIME type: text/plain
File size: 7358 byte(s)
```Changing ZERO and ONE in Solver.h to PASO_ZERO and PASO_ONE.
Changed three static vars to #defines
```
 1 artak 1703 2 /******************************************************* 3 ksteube 1811 * 4 * Copyright (c) 2003-2008 by University of Queensland 5 * Earth Systems Science Computational Center (ESSCC) 6 * http://www.uq.edu.au/esscc 7 * 8 * Primary Business: Queensland, Australia 9 * Licensed under the Open Software License version 3.0 10 * http://www.opensource.org/licenses/osl-3.0.php 11 * 12 *******************************************************/ 13 artak 1703 14 ksteube 1811 15 artak 1703 /* TFQMR iterations */ 16 17 #include "SystemMatrix.h" 18 #include "Paso.h" 19 #include "Solver.h" 20 #include "PasoUtil.h" 21 22 #ifdef _OPENMP 23 #include 24 #endif 25 26 #ifdef PASO_MPI 27 #include 28 #endif 29 30 /* 31 * 32 * Purpose 33 * ======= 34 * 35 * TFQMR solves the linear system A*x = b 36 * 37 * Convergence test: norm( b - A*x )< TOL. 38 * For other measures, see the above reference. 39 * 40 * Arguments 41 * ========= 42 * 43 * r (input) DOUBLE PRECISION array, dimension N. 44 * On entry, residual of inital guess x 45 * 46 * x (input/output) DOUBLE PRECISION array, dimension N. 47 * On input, the initial guess. 48 * 49 * ITER (input/output) INT 50 * On input, the maximum iterations to be performed. 51 * On output, actual number of iterations performed. 52 * 53 * INFO (output) INT 54 * 55 * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned. 56 * = SOLVEr_MAXITER_REACHED 57 * = SOLVER_INPUT_ERROR Illegal parameter: 58 * = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller 59 * = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller 60 * 61 * ============================================================== 62 */ 63 64 /* #define PASO_DYNAMIC_SCHEDULING_MVM */ 65 66 #if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP 67 #define USE_DYNAMIC_SCHEDULING 68 #endif 69 70 err_t Paso_Solver_TFQMR( 71 Paso_SystemMatrix * A, 72 double * r, 73 double * x, 74 dim_t *iter, 75 double * tolerance, 76 Paso_Performance* pp) { 77 78 /* Local variables */ 79 artak 1706 80 int m=1; 81 int j=0; 82 83 artak 1703 dim_t num_iter=0,maxit; 84 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE; 85 err_t status = SOLVER_NO_ERROR; 86 dim_t n = Paso_SystemMatrix_getTotalNumRows(A); 87 gross 1778 double *u1=NULL, *u2=NULL, *y1=NULL, *y2=NULL, *d=NULL, *w=NULL, *v=NULL, *v_old=NULL; 88 artak 1703 89 double eta,theta,tau,rho,beta,alpha,sigma,rhon,c; 90 91 double norm_of_residual; 92 artak 1706 93 artak 1703 /* */ 94 /*-----------------------------------------------------------------*/ 95 /* */ 96 /* Start of Calculation : */ 97 /* --------------------- */ 98 /* */ 99 /* */ 100 u1=TMPMEMALLOC(n,double); 101 u2=TMPMEMALLOC(n,double); 102 y1=TMPMEMALLOC(n,double); 103 y2=TMPMEMALLOC(n,double); 104 d=TMPMEMALLOC(n,double); 105 w=TMPMEMALLOC(n,double); 106 v=TMPMEMALLOC(n,double); 107 v_old=TMPMEMALLOC(n,double); 108 109 110 if (u1 ==NULL || u2== NULL || y1 == NULL || y2== NULL || d==NULL || w==NULL || v==NULL || v_old==NULL) { 111 status=SOLVER_MEMORY_ERROR; 112 } 113 114 maxit = *iter; 115 116 /* Test the input parameters. */ 117 if (n < 0 || maxit<=0 ) { 118 status=SOLVER_INPUT_ERROR; 119 } 120 121 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 122 Paso_Solver_solvePreconditioner(A,r,r); 123 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 124 125 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 126 127 Paso_zeroes(n,u2); 128 Paso_zeroes(n,y2); 129 130 Paso_Copy(n,w,r); 131 Paso_Copy(n,y1,r); 132 133 Paso_zeroes(n,d); 134 135 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 136 Performance_startMonitor(pp,PERFORMANCE_MVM); 137 jfenwick 1974 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,v); 138 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM); 139 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 140 141 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 142 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 143 Paso_Solver_solvePreconditioner(A,v,v); 144 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 145 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 146 147 Paso_Copy(n,u1,v); 148 149 theta = 0.0; 150 eta = 0.0; 151 152 tau = Paso_l2(n,r,A->mpi_info); 153 154 rho = tau * tau; 155 artak 1706 156 artak 1703 norm_of_residual=tau*sqrt ( m + 1 ); 157 158 while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) )) 159 { 160 161 162 sigma=Paso_InnerProduct(n,r,v,A->mpi_info); 163 164 if (! (breakFlag = (ABS(sigma) == 0.))) { 165 166 alpha = rho / sigma; 167 168 for (j=0; j<=1; j=j+1) 169 { 170 /* Compute y2 and u2 only if you have to */ 171 if ( j == 1 ){ 172 Paso_LinearCombination(n,y2,1.,y1,-alpha,v); 173 174 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 175 Performance_startMonitor(pp,PERFORMANCE_MVM); 176 jfenwick 1974 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y2,PASO_ZERO,u2); 177 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM); 178 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 179 180 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 181 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 182 Paso_Solver_solvePreconditioner(A,u2,u2); 183 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 184 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 185 } 186 m = 2 * (num_iter+1) - 2 + (j+1); 187 if (j==0) { 188 Paso_Update(n,1.,w,-alpha,u1); 189 Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y1); 190 } 191 if (j==1) { 192 Paso_Update(n,1.,w,-alpha,u2); 193 Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y2); 194 } 195 196 theta =Paso_l2(n,w,A->mpi_info)/tau; 197 c = 1.0 / sqrt ( 1.0 + theta * theta ); 198 tau = tau * theta * c; 199 eta = c * c * alpha; 200 Paso_Update(n,1.,x,eta,d); 201 } 202 203 breakFlag = (ABS(rho) == 0); 204 205 rhon = Paso_InnerProduct(n,r,w,A->mpi_info); 206 beta = rhon / rho; 207 rho = rhon; 208 209 Paso_LinearCombination(n,y1,1.,w,beta,y2); 210 211 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 212 artak 1711 213 artak 1703 Performance_startMonitor(pp,PERFORMANCE_MVM); 214 jfenwick 1974 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,u1); 215 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM); 216 217 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 218 Paso_Solver_solvePreconditioner(A,u1,u1); 219 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 220 artak 1711 221 artak 1703 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 222 223 Paso_Copy(n,v_old,v); 224 225 Paso_Update(n,beta,v_old,1,u2); 226 Paso_LinearCombination(n,v,1.,u1,beta,v_old); 227 } 228 maxIterFlag = (num_iter > maxit); 229 norm_of_residual=tau*sqrt ( m + 1 ); 230 convergeFlag=(norm_of_residual<(*tolerance)); 231 232 233 if (maxIterFlag) { 234 status = SOLVER_MAXITER_REACHED; 235 } else if (breakFlag) { 236 status = SOLVER_BREAKDOWN; 237 } 238 ++(num_iter); 239 } 240 /* end of iteration */ 241 242 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 243 TMPMEMFREE(u1); 244 TMPMEMFREE(u2); 245 TMPMEMFREE(y1); 246 TMPMEMFREE(y2); 247 TMPMEMFREE(d); 248 TMPMEMFREE(w); 249 TMPMEMFREE(v); 250 TMPMEMFREE(v_old); 251 252 *iter=num_iter; 253 *tolerance=norm_of_residual; 254 255 /* End of TFQMR */ 256 return status; 257 ksteube 1708 }