# Annotation of /trunk/paso/src/TFQMR.c

Revision 1711 - (hide annotations)
Tue Aug 19 03:37:25 2008 UTC (11 years, 6 months ago) by artak
File MIME type: text/plain
File size: 7398 byte(s)
```minor
```
 1 artak 1703 2 /******************************************************* 3 * 4 * Copyright 2003-2007 by ACceSS MNRF 5 * Copyright 2007 by University of Queensland 6 * 7 * http://esscc.uq.edu.au 8 * Primary Business: Queensland, Australia 9 * Licensed under the Open Software License version 3.0 10 * http://www.opensource.org/licenses/osl-3.0.php 11 * 12 *******************************************************/ 13 14 /* TFQMR iterations */ 15 16 #include "SystemMatrix.h" 17 #include "Paso.h" 18 #include "Solver.h" 19 #include "PasoUtil.h" 20 21 #ifdef _OPENMP 22 #include 23 #endif 24 25 #ifdef PASO_MPI 26 #include 27 #endif 28 29 /* 30 * 31 * Purpose 32 * ======= 33 * 34 * TFQMR solves the linear system A*x = b 35 * 36 * Convergence test: norm( b - A*x )< TOL. 37 * For other measures, see the above reference. 38 * 39 * Arguments 40 * ========= 41 * 42 * r (input) DOUBLE PRECISION array, dimension N. 43 * On entry, residual of inital guess x 44 * 45 * x (input/output) DOUBLE PRECISION array, dimension N. 46 * On input, the initial guess. 47 * 48 * ITER (input/output) INT 49 * On input, the maximum iterations to be performed. 50 * On output, actual number of iterations performed. 51 * 52 * INFO (output) INT 53 * 54 * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned. 55 * = SOLVEr_MAXITER_REACHED 56 * = SOLVER_INPUT_ERROR Illegal parameter: 57 * = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller 58 * = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller 59 * 60 * ============================================================== 61 */ 62 63 /* #define PASO_DYNAMIC_SCHEDULING_MVM */ 64 65 #if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP 66 #define USE_DYNAMIC_SCHEDULING 67 #endif 68 69 err_t Paso_Solver_TFQMR( 70 Paso_SystemMatrix * A, 71 double * r, 72 double * x, 73 dim_t *iter, 74 double * tolerance, 75 Paso_Performance* pp) { 76 77 /* Local variables */ 78 artak 1706 79 int m=1; 80 int j=0; 81 82 artak 1703 dim_t num_iter=0,maxit; 83 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE; 84 err_t status = SOLVER_NO_ERROR; 85 dim_t n = Paso_SystemMatrix_getTotalNumRows(A); 86 double *u1=NULL, *u2=NULL, *y1=NULL, *y2=NULL, *d=NULL, *w=NULL, *v=NULL, *v_old=NULL, *tmp=NULL ; 87 88 double eta,theta,tau,rho,beta,alpha,sigma,rhon,c; 89 90 double norm_of_residual; 91 artak 1706 92 artak 1703 /* */ 93 /*-----------------------------------------------------------------*/ 94 /* */ 95 /* Start of Calculation : */ 96 /* --------------------- */ 97 /* */ 98 /* */ 99 u1=TMPMEMALLOC(n,double); 100 u2=TMPMEMALLOC(n,double); 101 y1=TMPMEMALLOC(n,double); 102 y2=TMPMEMALLOC(n,double); 103 d=TMPMEMALLOC(n,double); 104 w=TMPMEMALLOC(n,double); 105 v=TMPMEMALLOC(n,double); 106 v_old=TMPMEMALLOC(n,double); 107 108 109 tmp=TMPMEMALLOC(n,double); 110 111 112 if (u1 ==NULL || u2== NULL || y1 == NULL || y2== NULL || d==NULL || w==NULL || v==NULL || v_old==NULL) { 113 status=SOLVER_MEMORY_ERROR; 114 } 115 116 maxit = *iter; 117 118 /* Test the input parameters. */ 119 if (n < 0 || maxit<=0 ) { 120 status=SOLVER_INPUT_ERROR; 121 } 122 123 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 124 Paso_Solver_solvePreconditioner(A,r,r); 125 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 126 127 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 128 129 Paso_zeroes(n,u2); 130 Paso_zeroes(n,y2); 131 132 Paso_Copy(n,w,r); 133 Paso_Copy(n,y1,r); 134 135 Paso_zeroes(n,d); 136 137 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 138 Performance_startMonitor(pp,PERFORMANCE_MVM); 139 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y1,ZERO,v); 140 Performance_stopMonitor(pp,PERFORMANCE_MVM); 141 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 142 143 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 144 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 145 Paso_Solver_solvePreconditioner(A,v,v); 146 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 147 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 148 149 Paso_Copy(n,u1,v); 150 151 theta = 0.0; 152 eta = 0.0; 153 154 tau = Paso_l2(n,r,A->mpi_info); 155 156 rho = tau * tau; 157 artak 1706 158 artak 1703 norm_of_residual=tau*sqrt ( m + 1 ); 159 160 while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) )) 161 { 162 163 164 sigma=Paso_InnerProduct(n,r,v,A->mpi_info); 165 166 if (! (breakFlag = (ABS(sigma) == 0.))) { 167 168 alpha = rho / sigma; 169 170 for (j=0; j<=1; j=j+1) 171 { 172 /* Compute y2 and u2 only if you have to */ 173 if ( j == 1 ){ 174 Paso_LinearCombination(n,y2,1.,y1,-alpha,v); 175 176 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 177 Performance_startMonitor(pp,PERFORMANCE_MVM); 178 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y2,ZERO,u2); 179 Performance_stopMonitor(pp,PERFORMANCE_MVM); 180 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 181 182 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 183 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 184 Paso_Solver_solvePreconditioner(A,u2,u2); 185 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 186 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 187 } 188 m = 2 * (num_iter+1) - 2 + (j+1); 189 if (j==0) { 190 Paso_Update(n,1.,w,-alpha,u1); 191 Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y1); 192 } 193 if (j==1) { 194 Paso_Update(n,1.,w,-alpha,u2); 195 Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y2); 196 } 197 198 theta =Paso_l2(n,w,A->mpi_info)/tau; 199 c = 1.0 / sqrt ( 1.0 + theta * theta ); 200 tau = tau * theta * c; 201 eta = c * c * alpha; 202 Paso_Update(n,1.,x,eta,d); 203 } 204 205 breakFlag = (ABS(rho) == 0); 206 207 rhon = Paso_InnerProduct(n,r,w,A->mpi_info); 208 beta = rhon / rho; 209 rho = rhon; 210 211 Paso_LinearCombination(n,y1,1.,w,beta,y2); 212 213 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 214 artak 1711 215 artak 1703 Performance_startMonitor(pp,PERFORMANCE_MVM); 216 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y1,ZERO,u1); 217 Performance_stopMonitor(pp,PERFORMANCE_MVM); 218 219 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 220 Paso_Solver_solvePreconditioner(A,u1,u1); 221 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 222 artak 1711 223 artak 1703 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 224 225 Paso_Copy(n,v_old,v); 226 227 Paso_Update(n,beta,v_old,1,u2); 228 Paso_LinearCombination(n,v,1.,u1,beta,v_old); 229 } 230 maxIterFlag = (num_iter > maxit); 231 norm_of_residual=tau*sqrt ( m + 1 ); 232 convergeFlag=(norm_of_residual<(*tolerance)); 233 234 235 if (maxIterFlag) { 236 status = SOLVER_MAXITER_REACHED; 237 } else if (breakFlag) { 238 status = SOLVER_BREAKDOWN; 239 } 240 ++(num_iter); 241 } 242 /* end of iteration */ 243 244 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 245 TMPMEMFREE(u1); 246 TMPMEMFREE(u2); 247 TMPMEMFREE(y1); 248 TMPMEMFREE(y2); 249 TMPMEMFREE(d); 250 TMPMEMFREE(w); 251 TMPMEMFREE(v); 252 TMPMEMFREE(v_old); 253 254 *iter=num_iter; 255 *tolerance=norm_of_residual; 256 257 /* End of TFQMR */ 258 return status; 259 ksteube 1708 }