# Contents of /trunk/paso/src/TFQMR.c

Revision 4154 - (show annotations)
Tue Jan 22 09:30:23 2013 UTC (7 years, 2 months ago) by jfenwick
File MIME type: text/plain
File size: 8009 byte(s)
```Round 1 of copyright fixes
```
 1 2 /***************************************************************************** 3 * 4 * Copyright (c) 2003-2013 by University of Queensland 5 * http://www.uq.edu.au 6 * 7 * Primary Business: Queensland, Australia 8 * Licensed under the Open Software License version 3.0 9 * http://www.opensource.org/licenses/osl-3.0.php 10 * 11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC) 12 * Development since 2012 by School of Earth Sciences 13 * 14 *****************************************************************************/ 15 16 17 /* TFQMR iterations */ 18 19 #include "SystemMatrix.h" 20 #include "Paso.h" 21 #include "Solver.h" 22 #include "PasoUtil.h" 23 24 #ifdef _OPENMP 25 #include 26 #endif 27 28 #ifdef ESYS_MPI 29 #include 30 #endif 31 32 /* 33 * 34 * Purpose 35 * ======= 36 * 37 * TFQMR solves the linear system A*x = b 38 * 39 * Convergence test: norm( b - A*x )< TOL. 40 * For other measures, see the above reference. 41 * 42 * Arguments 43 * ========= 44 * 45 * r (input) DOUBLE PRECISION array, dimension N. 46 * 47 * 48 * x (input/output) DOUBLE PRECISION array, dimension N. 49 * 50 * 51 * ITER (input/output) INT 52 * On input, the maximum iterations to be performed. 53 * On output, actual number of iterations performed. 54 * 55 * INFO (output) INT 56 * 57 * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned. 58 * = SOLVER_MAXITER_REACHED 59 * = SOLVER_INPUT_ERROR Illegal parameter: 60 * = SOLVER_BREAKDOWN: If parameters RHO or OMEGA become smaller 61 * = SOLVER_MEMORY_ERROR : If parameters RHO or OMEGA become smaller 62 * 63 * ============================================================== 64 */ 65 66 /* #define PASO_DYNAMIC_SCHEDULING_MVM */ 67 68 #if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP 69 #define USE_DYNAMIC_SCHEDULING 70 #endif 71 72 err_t Paso_Solver_TFQMR( 73 Paso_SystemMatrix * A, 74 double * r, 75 double * x, 76 dim_t *iter, 77 double * tolerance, 78 Paso_Performance* pp) { 79 80 /* Local variables */ 81 82 int m=1; 83 int j=0; 84 85 dim_t num_iter=0,maxit; 86 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE; 87 err_t status = SOLVER_NO_ERROR; 88 dim_t n = Paso_SystemMatrix_getTotalNumRows(A); 89 double *u1=NULL, *u2=NULL, *y1=NULL, *y2=NULL, *d=NULL, *w=NULL, *v=NULL, *temp_vector=NULL,*res=NULL; 90 91 double eta,theta,tau,rho,beta,alpha,sigma,rhon,c; 92 93 double norm_of_residual; 94 95 /* */ 96 /*-----------------------------------------------------------------*/ 97 /* */ 98 /* Start of Calculation : */ 99 /* --------------------- */ 100 /* */ 101 /* */ 102 u1=TMPMEMALLOC(n,double); 103 u2=TMPMEMALLOC(n,double); 104 y1=TMPMEMALLOC(n,double); 105 y2=TMPMEMALLOC(n,double); 106 d=TMPMEMALLOC(n,double); 107 w=TMPMEMALLOC(n,double); 108 v=TMPMEMALLOC(n,double); 109 temp_vector=TMPMEMALLOC(n,double); 110 res=TMPMEMALLOC(n,double); 111 112 if (u1 ==NULL || u2== NULL || y1 == NULL || y2== NULL || d==NULL || w==NULL || v==NULL ) { 113 status=SOLVER_MEMORY_ERROR; 114 } 115 116 maxit = *iter; 117 118 /* Test the input parameters. */ 119 if (n < 0 || maxit<=0 ) { 120 status=SOLVER_INPUT_ERROR; 121 } 122 123 Paso_zeroes(n,x); 124 125 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 126 Paso_SystemMatrix_solvePreconditioner(A,res,r); 127 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 128 129 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 130 131 Paso_zeroes(n,u2); 132 Paso_zeroes(n,y2); 133 134 Paso_Copy(n,w,res); 135 Paso_Copy(n,y1,res); 136 137 Paso_zeroes(n,d); 138 139 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 140 Performance_startMonitor(pp,PERFORMANCE_MVM); 141 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,temp_vector); 142 Performance_stopMonitor(pp,PERFORMANCE_MVM); 143 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 144 145 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 146 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 147 Paso_SystemMatrix_solvePreconditioner(A,v,temp_vector); 148 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 149 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 150 /* v = P^{-1} * A y1 */ 151 152 Paso_Copy(n,u1,v); 153 154 theta = 0.0; 155 eta = 0.0; 156 157 tau = Paso_l2(n,res,A->mpi_info); 158 159 rho = tau * tau; 160 161 norm_of_residual=tau; 162 163 while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) )) 164 { 165 166 167 sigma=Paso_InnerProduct(n,res,v,A->mpi_info); 168 169 if (! (breakFlag = (ABS(sigma) == 0.))) { 170 171 alpha = rho / sigma; 172 173 for (j=0; j<=1; j=j+1) 174 { 175 /* Compute y2 and u2 only if you have to */ 176 if ( j == 1 ){ 177 Paso_LinearCombination(n,y2,PASO_ONE,y1,-alpha,v); /* y2 = y1 - alpha*v */ 178 179 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 180 Performance_startMonitor(pp,PERFORMANCE_MVM); 181 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y2,PASO_ZERO,temp_vector); 182 Performance_stopMonitor(pp,PERFORMANCE_MVM); 183 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 184 185 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 186 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 187 Paso_SystemMatrix_solvePreconditioner(A,u2,temp_vector); 188 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 189 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 190 /* u2 = P^{-1} * A y2 */ 191 } 192 m = 2 * (num_iter+1) - 2 + (j+1); 193 194 if (j==0) { 195 Paso_Update(n,1.,w,-alpha,u1); /* w = w - alpha * u1 */ 196 Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y1); /* d = ( theta * theta * eta / alpha )*d + y1 */ 197 } 198 if (j==1) { 199 Paso_Update(n,1.,w,-alpha,u2); /* w = w - -alpha * u2 */ 200 Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y2); /* d = ( theta * theta * eta / alpha )*d + y2 */ 201 } 202 203 theta =Paso_l2(n,w,A->mpi_info)/tau; 204 /*printf("tau = %e, %e %e\n",tau, Paso_l2(n,w,A->mpi_info)/tau, theta);*/ 205 c = PASO_ONE / sqrt ( PASO_ONE + theta * theta ); 206 tau = tau * theta * c; 207 eta = c * c * alpha; 208 Paso_Update(n,1.,x,eta,d); 209 } 210 211 breakFlag = (ABS(rho) == 0); 212 213 rhon = Paso_InnerProduct(n,res,w,A->mpi_info); 214 beta = rhon / rho; 215 rho = rhon; 216 217 Paso_LinearCombination(n,y1, PASO_ONE,w,beta,y2); /* y1 = w + beta * y2 */ 218 219 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 220 Performance_startMonitor(pp,PERFORMANCE_MVM); 221 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,temp_vector); 222 Performance_stopMonitor(pp,PERFORMANCE_MVM); 223 224 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 225 Paso_SystemMatrix_solvePreconditioner(A,u1,temp_vector); 226 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 227 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 228 /* u1 = P^{-1} * A y1 */ 229 230 Paso_LinearCombination(n,temp_vector,PASO_ONE,u2,beta,v); /* t = u2 + beta * v */ 231 Paso_LinearCombination(n,v,PASO_ONE,u1,beta,temp_vector); /* v = u1 + beta * t */ 232 } 233 maxIterFlag = (num_iter > maxit); 234 norm_of_residual=tau*sqrt ( (double) (m + 1 ) ); 235 convergeFlag=(norm_of_residual<(*tolerance)); 236 237 238 if (maxIterFlag) { 239 status = SOLVER_MAXITER_REACHED; 240 } else if (breakFlag) { 241 status = SOLVER_BREAKDOWN; 242 } 243 ++(num_iter); 244 } 245 /* end of iteration */ 246 247 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 248 TMPMEMFREE(u1); 249 TMPMEMFREE(u2); 250 TMPMEMFREE(y1); 251 TMPMEMFREE(y2); 252 TMPMEMFREE(d); 253 TMPMEMFREE(w); 254 TMPMEMFREE(v); 255 TMPMEMFREE(temp_vector); 256 TMPMEMFREE(res); 257 *iter=num_iter; 258 *tolerance=norm_of_residual; 259 260 /* End of TFQMR */ 261 return status; 262 }