# Annotation of /trunk/paso/src/TFQMR.c

Revision 1778 - (hide annotations)
Tue Sep 9 07:46:02 2008 UTC (11 years, 5 months ago) by gross
File MIME type: text/plain
File size: 7351 byte(s)
```memry leak fixed
```
 1 artak 1703 2 /******************************************************* 3 * 4 * Copyright 2003-2007 by ACceSS MNRF 5 * Copyright 2007 by University of Queensland 6 * 7 * http://esscc.uq.edu.au 8 * Primary Business: Queensland, Australia 9 * Licensed under the Open Software License version 3.0 10 * http://www.opensource.org/licenses/osl-3.0.php 11 * 12 *******************************************************/ 13 14 /* TFQMR iterations */ 15 16 #include "SystemMatrix.h" 17 #include "Paso.h" 18 #include "Solver.h" 19 #include "PasoUtil.h" 20 21 #ifdef _OPENMP 22 #include 23 #endif 24 25 #ifdef PASO_MPI 26 #include 27 #endif 28 29 /* 30 * 31 * Purpose 32 * ======= 33 * 34 * TFQMR solves the linear system A*x = b 35 * 36 * Convergence test: norm( b - A*x )< TOL. 37 * For other measures, see the above reference. 38 * 39 * Arguments 40 * ========= 41 * 42 * r (input) DOUBLE PRECISION array, dimension N. 43 * On entry, residual of inital guess x 44 * 45 * x (input/output) DOUBLE PRECISION array, dimension N. 46 * On input, the initial guess. 47 * 48 * ITER (input/output) INT 49 * On input, the maximum iterations to be performed. 50 * On output, actual number of iterations performed. 51 * 52 * INFO (output) INT 53 * 54 * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned. 55 * = SOLVEr_MAXITER_REACHED 56 * = SOLVER_INPUT_ERROR Illegal parameter: 57 * = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller 58 * = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller 59 * 60 * ============================================================== 61 */ 62 63 /* #define PASO_DYNAMIC_SCHEDULING_MVM */ 64 65 #if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP 66 #define USE_DYNAMIC_SCHEDULING 67 #endif 68 69 err_t Paso_Solver_TFQMR( 70 Paso_SystemMatrix * A, 71 double * r, 72 double * x, 73 dim_t *iter, 74 double * tolerance, 75 Paso_Performance* pp) { 76 77 /* Local variables */ 78 artak 1706 79 int m=1; 80 int j=0; 81 82 artak 1703 dim_t num_iter=0,maxit; 83 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE; 84 err_t status = SOLVER_NO_ERROR; 85 dim_t n = Paso_SystemMatrix_getTotalNumRows(A); 86 gross 1778 double *u1=NULL, *u2=NULL, *y1=NULL, *y2=NULL, *d=NULL, *w=NULL, *v=NULL, *v_old=NULL; 87 artak 1703 88 double eta,theta,tau,rho,beta,alpha,sigma,rhon,c; 89 90 double norm_of_residual; 91 artak 1706 92 artak 1703 /* */ 93 /*-----------------------------------------------------------------*/ 94 /* */ 95 /* Start of Calculation : */ 96 /* --------------------- */ 97 /* */ 98 /* */ 99 u1=TMPMEMALLOC(n,double); 100 u2=TMPMEMALLOC(n,double); 101 y1=TMPMEMALLOC(n,double); 102 y2=TMPMEMALLOC(n,double); 103 d=TMPMEMALLOC(n,double); 104 w=TMPMEMALLOC(n,double); 105 v=TMPMEMALLOC(n,double); 106 v_old=TMPMEMALLOC(n,double); 107 108 109 if (u1 ==NULL || u2== NULL || y1 == NULL || y2== NULL || d==NULL || w==NULL || v==NULL || v_old==NULL) { 110 status=SOLVER_MEMORY_ERROR; 111 } 112 113 maxit = *iter; 114 115 /* Test the input parameters. */ 116 if (n < 0 || maxit<=0 ) { 117 status=SOLVER_INPUT_ERROR; 118 } 119 120 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 121 Paso_Solver_solvePreconditioner(A,r,r); 122 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 123 124 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 125 126 Paso_zeroes(n,u2); 127 Paso_zeroes(n,y2); 128 129 Paso_Copy(n,w,r); 130 Paso_Copy(n,y1,r); 131 132 Paso_zeroes(n,d); 133 134 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 135 Performance_startMonitor(pp,PERFORMANCE_MVM); 136 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y1,ZERO,v); 137 Performance_stopMonitor(pp,PERFORMANCE_MVM); 138 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 139 140 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 141 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 142 Paso_Solver_solvePreconditioner(A,v,v); 143 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 144 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 145 146 Paso_Copy(n,u1,v); 147 148 theta = 0.0; 149 eta = 0.0; 150 151 tau = Paso_l2(n,r,A->mpi_info); 152 153 rho = tau * tau; 154 artak 1706 155 artak 1703 norm_of_residual=tau*sqrt ( m + 1 ); 156 157 while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) )) 158 { 159 160 161 sigma=Paso_InnerProduct(n,r,v,A->mpi_info); 162 163 if (! (breakFlag = (ABS(sigma) == 0.))) { 164 165 alpha = rho / sigma; 166 167 for (j=0; j<=1; j=j+1) 168 { 169 /* Compute y2 and u2 only if you have to */ 170 if ( j == 1 ){ 171 Paso_LinearCombination(n,y2,1.,y1,-alpha,v); 172 173 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 174 Performance_startMonitor(pp,PERFORMANCE_MVM); 175 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y2,ZERO,u2); 176 Performance_stopMonitor(pp,PERFORMANCE_MVM); 177 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 178 179 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 180 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 181 Paso_Solver_solvePreconditioner(A,u2,u2); 182 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 183 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 184 } 185 m = 2 * (num_iter+1) - 2 + (j+1); 186 if (j==0) { 187 Paso_Update(n,1.,w,-alpha,u1); 188 Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y1); 189 } 190 if (j==1) { 191 Paso_Update(n,1.,w,-alpha,u2); 192 Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y2); 193 } 194 195 theta =Paso_l2(n,w,A->mpi_info)/tau; 196 c = 1.0 / sqrt ( 1.0 + theta * theta ); 197 tau = tau * theta * c; 198 eta = c * c * alpha; 199 Paso_Update(n,1.,x,eta,d); 200 } 201 202 breakFlag = (ABS(rho) == 0); 203 204 rhon = Paso_InnerProduct(n,r,w,A->mpi_info); 205 beta = rhon / rho; 206 rho = rhon; 207 208 Paso_LinearCombination(n,y1,1.,w,beta,y2); 209 210 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 211 artak 1711 212 artak 1703 Performance_startMonitor(pp,PERFORMANCE_MVM); 213 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y1,ZERO,u1); 214 Performance_stopMonitor(pp,PERFORMANCE_MVM); 215 216 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 217 Paso_Solver_solvePreconditioner(A,u1,u1); 218 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 219 artak 1711 220 artak 1703 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 221 222 Paso_Copy(n,v_old,v); 223 224 Paso_Update(n,beta,v_old,1,u2); 225 Paso_LinearCombination(n,v,1.,u1,beta,v_old); 226 } 227 maxIterFlag = (num_iter > maxit); 228 norm_of_residual=tau*sqrt ( m + 1 ); 229 convergeFlag=(norm_of_residual<(*tolerance)); 230 231 232 if (maxIterFlag) { 233 status = SOLVER_MAXITER_REACHED; 234 } else if (breakFlag) { 235 status = SOLVER_BREAKDOWN; 236 } 237 ++(num_iter); 238 } 239 /* end of iteration */ 240 241 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 242 TMPMEMFREE(u1); 243 TMPMEMFREE(u2); 244 TMPMEMFREE(y1); 245 TMPMEMFREE(y2); 246 TMPMEMFREE(d); 247 TMPMEMFREE(w); 248 TMPMEMFREE(v); 249 TMPMEMFREE(v_old); 250 251 *iter=num_iter; 252 *tolerance=norm_of_residual; 253 254 /* End of TFQMR */ 255 return status; 256 ksteube 1708 }