# Contents of /trunk/paso/src/TFQMR.c

Revision 3981 - (show annotations)
Fri Sep 21 02:47:54 2012 UTC (7 years, 6 months ago) by jfenwick
File MIME type: text/plain
File size: 7657 byte(s)
```First pass of updating copyright notices
```
 1 2 /***************************************************************************** 3 * 4 * Copyright (c) 2003-2012 by University of Queensland 5 * http://www.uq.edu.au 6 * 7 * Primary Business: Queensland, Australia 8 * Licensed under the Open Software License version 3.0 9 * http://www.opensource.org/licenses/osl-3.0.php 10 * 11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC) 12 * Development since 2012 by School of Earth Sciences 13 * 14 *****************************************************************************/ 15 16 17 /* TFQMR iterations */ 18 19 #include "SystemMatrix.h" 20 #include "Paso.h" 21 #include "Solver.h" 22 #include "PasoUtil.h" 23 24 #ifdef _OPENMP 25 #include 26 #endif 27 28 #ifdef ESYS_MPI 29 #include 30 #endif 31 32 /* 33 * 34 * Purpose 35 * ======= 36 * 37 * TFQMR solves the linear system A*x = b 38 * 39 * Convergence test: norm( b - A*x )< TOL. 40 * For other measures, see the above reference. 41 * 42 * Arguments 43 * ========= 44 * 45 * r (input) DOUBLE PRECISION array, dimension N. 46 * 47 * 48 * x (input/output) DOUBLE PRECISION array, dimension N. 49 * 50 * 51 * ITER (input/output) INT 52 * On input, the maximum iterations to be performed. 53 * On output, actual number of iterations performed. 54 * 55 * INFO (output) INT 56 * 57 * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned. 58 * = SOLVER_MAXITER_REACHED 59 * = SOLVER_INPUT_ERROR Illegal parameter: 60 * = SOLVER_BREAKDOWN: If parameters RHO or OMEGA become smaller 61 * = SOLVER_MEMORY_ERROR : If parameters RHO or OMEGA become smaller 62 * 63 * ============================================================== 64 */ 65 66 /* #define PASO_DYNAMIC_SCHEDULING_MVM */ 67 68 #if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP 69 #define USE_DYNAMIC_SCHEDULING 70 #endif 71 72 err_t Paso_Solver_TFQMR( 73 Paso_SystemMatrix * A, 74 double * r, 75 double * x, 76 dim_t *iter, 77 double * tolerance, 78 Paso_Performance* pp) { 79 80 /* Local variables */ 81 82 int m=1; 83 int j=0; 84 85 dim_t num_iter=0,maxit; 86 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE; 87 err_t status = SOLVER_NO_ERROR; 88 dim_t n = Paso_SystemMatrix_getTotalNumRows(A); 89 double *u1=NULL, *u2=NULL, *y1=NULL, *y2=NULL, *d=NULL, *w=NULL, *v=NULL, *v_old=NULL, *temp_vector=NULL,*res=NULL; 90 91 double eta,theta,tau,rho,beta,alpha,sigma,rhon,c; 92 93 double norm_of_residual; 94 95 /* */ 96 /*-----------------------------------------------------------------*/ 97 /* */ 98 /* Start of Calculation : */ 99 /* --------------------- */ 100 /* */ 101 /* */ 102 u1=TMPMEMALLOC(n,double); 103 u2=TMPMEMALLOC(n,double); 104 y1=TMPMEMALLOC(n,double); 105 y2=TMPMEMALLOC(n,double); 106 d=TMPMEMALLOC(n,double); 107 w=TMPMEMALLOC(n,double); 108 v=TMPMEMALLOC(n,double); 109 v_old=TMPMEMALLOC(n,double); 110 temp_vector=TMPMEMALLOC(n,double); 111 res=TMPMEMALLOC(n,double); 112 113 if (u1 ==NULL || u2== NULL || y1 == NULL || y2== NULL || d==NULL || w==NULL || v==NULL || v_old==NULL) { 114 status=SOLVER_MEMORY_ERROR; 115 } 116 117 maxit = *iter; 118 119 /* Test the input parameters. */ 120 if (n < 0 || maxit<=0 ) { 121 status=SOLVER_INPUT_ERROR; 122 } 123 124 Paso_zeroes(n,x); 125 126 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 127 Paso_SystemMatrix_solvePreconditioner(A,res,r); 128 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 129 130 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 131 132 Paso_zeroes(n,u2); 133 Paso_zeroes(n,y2); 134 135 Paso_Copy(n,w,res); 136 Paso_Copy(n,y1,res); 137 138 Paso_zeroes(n,d); 139 140 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 141 Performance_startMonitor(pp,PERFORMANCE_MVM); 142 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,temp_vector); 143 Performance_stopMonitor(pp,PERFORMANCE_MVM); 144 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 145 146 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 147 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 148 Paso_SystemMatrix_solvePreconditioner(A,v,temp_vector); 149 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 150 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 151 152 Paso_Copy(n,u1,v); 153 154 theta = 0.0; 155 eta = 0.0; 156 157 tau = Paso_l2(n,res,A->mpi_info); 158 159 rho = tau * tau; 160 161 norm_of_residual=tau*sqrt ( m + 1 ); 162 163 while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) )) 164 { 165 166 167 sigma=Paso_InnerProduct(n,res,v,A->mpi_info); 168 169 if (! (breakFlag = (ABS(sigma) == 0.))) { 170 171 alpha = rho / sigma; 172 173 for (j=0; j<=1; j=j+1) 174 { 175 /* Compute y2 and u2 only if you have to */ 176 if ( j == 1 ){ 177 Paso_LinearCombination(n,y2,1.,y1,-alpha,v); 178 179 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 180 Performance_startMonitor(pp,PERFORMANCE_MVM); 181 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y2,PASO_ZERO,temp_vector); 182 Performance_stopMonitor(pp,PERFORMANCE_MVM); 183 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 184 185 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 186 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 187 Paso_SystemMatrix_solvePreconditioner(A,u2,temp_vector); 188 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 189 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 190 } 191 m = 2 * (num_iter+1) - 2 + (j+1); 192 if (j==0) { 193 Paso_Update(n,1.,w,-alpha,u1); 194 Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y1); 195 } 196 if (j==1) { 197 Paso_Update(n,1.,w,-alpha,u2); 198 Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y2); 199 } 200 201 theta =Paso_l2(n,w,A->mpi_info)/tau; 202 c = 1.0 / sqrt ( 1.0 + theta * theta ); 203 tau = tau * theta * c; 204 eta = c * c * alpha; 205 Paso_Update(n,1.,x,eta,d); 206 } 207 208 breakFlag = (ABS(rho) == 0); 209 210 rhon = Paso_InnerProduct(n,res,w,A->mpi_info); 211 beta = rhon / rho; 212 rho = rhon; 213 214 Paso_LinearCombination(n,y1,1.,w,beta,y2); 215 216 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 217 218 Performance_startMonitor(pp,PERFORMANCE_MVM); 219 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,temp_vector); 220 Performance_stopMonitor(pp,PERFORMANCE_MVM); 221 222 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 223 Paso_SystemMatrix_solvePreconditioner(A,u1,temp_vector); 224 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 225 226 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 227 228 Paso_Copy(n,v_old,v); 229 230 Paso_Update(n,beta,v_old,1,u2); 231 Paso_LinearCombination(n,v,1.,u1,beta,v_old); 232 } 233 maxIterFlag = (num_iter > maxit); 234 norm_of_residual=tau*sqrt ( m + 1 ); 235 convergeFlag=(norm_of_residual<(*tolerance)); 236 237 238 if (maxIterFlag) { 239 status = SOLVER_MAXITER_REACHED; 240 } else if (breakFlag) { 241 status = SOLVER_BREAKDOWN; 242 } 243 ++(num_iter); 244 } 245 /* end of iteration */ 246 247 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 248 TMPMEMFREE(u1); 249 TMPMEMFREE(u2); 250 TMPMEMFREE(y1); 251 TMPMEMFREE(y2); 252 TMPMEMFREE(d); 253 TMPMEMFREE(w); 254 TMPMEMFREE(v); 255 TMPMEMFREE(v_old); 256 TMPMEMFREE(temp_vector); 257 TMPMEMFREE(res); 258 *iter=num_iter; 259 *tolerance=norm_of_residual; 260 261 /* End of TFQMR */ 262 return status; 263 }