# Annotation of /branches/doubleplusgood/paso/src/TFQMR.cpp

Revision 3642 - (hide annotations)
Thu Oct 27 03:41:51 2011 UTC (8 years, 2 months ago) by caltinay
Original Path: trunk/paso/src/TFQMR.c
File MIME type: text/plain
File size: 7538 byte(s)
```Assorted spelling/comment fixes in paso.

```
 1 artak 1703 2 /******************************************************* 3 ksteube 1811 * 4 jfenwick 2881 * Copyright (c) 2003-2010 by University of Queensland 5 ksteube 1811 * Earth Systems Science Computational Center (ESSCC) 6 * http://www.uq.edu.au/esscc 7 * 8 * Primary Business: Queensland, Australia 9 * Licensed under the Open Software License version 3.0 10 * http://www.opensource.org/licenses/osl-3.0.php 11 * 12 *******************************************************/ 13 artak 1703 14 ksteube 1811 15 artak 1703 /* TFQMR iterations */ 16 17 #include "SystemMatrix.h" 18 #include "Paso.h" 19 #include "Solver.h" 20 #include "PasoUtil.h" 21 22 #ifdef _OPENMP 23 #include 24 #endif 25 26 jfenwick 3259 #ifdef ESYS_MPI 27 artak 1703 #include 28 #endif 29 30 /* 31 * 32 * Purpose 33 * ======= 34 * 35 * TFQMR solves the linear system A*x = b 36 * 37 * Convergence test: norm( b - A*x )< TOL. 38 * For other measures, see the above reference. 39 * 40 * Arguments 41 * ========= 42 * 43 * r (input) DOUBLE PRECISION array, dimension N. 44 artak 2557 * 45 artak 1703 * 46 * x (input/output) DOUBLE PRECISION array, dimension N. 47 artak 2557 * 48 artak 1703 * 49 * ITER (input/output) INT 50 * On input, the maximum iterations to be performed. 51 * On output, actual number of iterations performed. 52 * 53 * INFO (output) INT 54 * 55 * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned. 56 caltinay 3642 * = SOLVER_MAXITER_REACHED 57 artak 1703 * = SOLVER_INPUT_ERROR Illegal parameter: 58 caltinay 3642 * = SOLVER_BREAKDOWN: If parameters RHO or OMEGA become smaller 59 * = SOLVER_MEMORY_ERROR : If parameters RHO or OMEGA become smaller 60 artak 1703 * 61 * ============================================================== 62 */ 63 64 /* #define PASO_DYNAMIC_SCHEDULING_MVM */ 65 66 #if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP 67 #define USE_DYNAMIC_SCHEDULING 68 #endif 69 70 err_t Paso_Solver_TFQMR( 71 Paso_SystemMatrix * A, 72 double * r, 73 double * x, 74 dim_t *iter, 75 double * tolerance, 76 Paso_Performance* pp) { 77 78 /* Local variables */ 79 artak 1706 80 int m=1; 81 int j=0; 82 83 artak 1703 dim_t num_iter=0,maxit; 84 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE; 85 err_t status = SOLVER_NO_ERROR; 86 dim_t n = Paso_SystemMatrix_getTotalNumRows(A); 87 artak 2562 double *u1=NULL, *u2=NULL, *y1=NULL, *y2=NULL, *d=NULL, *w=NULL, *v=NULL, *v_old=NULL, *temp_vector=NULL,*res=NULL; 88 artak 1703 89 double eta,theta,tau,rho,beta,alpha,sigma,rhon,c; 90 91 double norm_of_residual; 92 artak 1706 93 artak 1703 /* */ 94 /*-----------------------------------------------------------------*/ 95 /* */ 96 /* Start of Calculation : */ 97 /* --------------------- */ 98 /* */ 99 /* */ 100 u1=TMPMEMALLOC(n,double); 101 u2=TMPMEMALLOC(n,double); 102 y1=TMPMEMALLOC(n,double); 103 y2=TMPMEMALLOC(n,double); 104 d=TMPMEMALLOC(n,double); 105 w=TMPMEMALLOC(n,double); 106 v=TMPMEMALLOC(n,double); 107 v_old=TMPMEMALLOC(n,double); 108 artak 2562 temp_vector=TMPMEMALLOC(n,double); 109 res=TMPMEMALLOC(n,double); 110 artak 1703 111 if (u1 ==NULL || u2== NULL || y1 == NULL || y2== NULL || d==NULL || w==NULL || v==NULL || v_old==NULL) { 112 status=SOLVER_MEMORY_ERROR; 113 } 114 115 maxit = *iter; 116 117 /* Test the input parameters. */ 118 if (n < 0 || maxit<=0 ) { 119 status=SOLVER_INPUT_ERROR; 120 } 121 122 artak 2826 Paso_zeroes(n,x); 123 124 artak 1703 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 125 gross 3120 Paso_SystemMatrix_solvePreconditioner(A,res,r); 126 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 127 128 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 129 130 Paso_zeroes(n,u2); 131 Paso_zeroes(n,y2); 132 133 artak 2562 Paso_Copy(n,w,res); 134 Paso_Copy(n,y1,res); 135 artak 1703 136 Paso_zeroes(n,d); 137 138 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 139 Performance_startMonitor(pp,PERFORMANCE_MVM); 140 artak 2562 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,temp_vector); 141 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM); 142 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 143 144 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 145 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 146 gross 3120 Paso_SystemMatrix_solvePreconditioner(A,v,temp_vector); 147 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 148 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 149 150 Paso_Copy(n,u1,v); 151 152 theta = 0.0; 153 eta = 0.0; 154 155 artak 2562 tau = Paso_l2(n,res,A->mpi_info); 156 artak 1703 157 rho = tau * tau; 158 artak 1706 159 artak 1703 norm_of_residual=tau*sqrt ( m + 1 ); 160 161 while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) )) 162 { 163 164 165 artak 2562 sigma=Paso_InnerProduct(n,res,v,A->mpi_info); 166 artak 1703 167 if (! (breakFlag = (ABS(sigma) == 0.))) { 168 169 alpha = rho / sigma; 170 171 for (j=0; j<=1; j=j+1) 172 { 173 /* Compute y2 and u2 only if you have to */ 174 if ( j == 1 ){ 175 Paso_LinearCombination(n,y2,1.,y1,-alpha,v); 176 177 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 178 Performance_startMonitor(pp,PERFORMANCE_MVM); 179 artak 2562 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y2,PASO_ZERO,temp_vector); 180 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM); 181 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 182 183 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 184 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 185 gross 3120 Paso_SystemMatrix_solvePreconditioner(A,u2,temp_vector); 186 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 187 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 188 } 189 m = 2 * (num_iter+1) - 2 + (j+1); 190 if (j==0) { 191 Paso_Update(n,1.,w,-alpha,u1); 192 Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y1); 193 } 194 if (j==1) { 195 Paso_Update(n,1.,w,-alpha,u2); 196 Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y2); 197 } 198 199 theta =Paso_l2(n,w,A->mpi_info)/tau; 200 c = 1.0 / sqrt ( 1.0 + theta * theta ); 201 tau = tau * theta * c; 202 eta = c * c * alpha; 203 Paso_Update(n,1.,x,eta,d); 204 } 205 206 breakFlag = (ABS(rho) == 0); 207 208 artak 2562 rhon = Paso_InnerProduct(n,res,w,A->mpi_info); 209 artak 1703 beta = rhon / rho; 210 rho = rhon; 211 212 Paso_LinearCombination(n,y1,1.,w,beta,y2); 213 214 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 215 artak 1711 216 artak 1703 Performance_startMonitor(pp,PERFORMANCE_MVM); 217 artak 2562 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,temp_vector); 218 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM); 219 220 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 221 gross 3120 Paso_SystemMatrix_solvePreconditioner(A,u1,temp_vector); 222 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 223 artak 1711 224 artak 1703 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 225 226 Paso_Copy(n,v_old,v); 227 228 Paso_Update(n,beta,v_old,1,u2); 229 Paso_LinearCombination(n,v,1.,u1,beta,v_old); 230 } 231 maxIterFlag = (num_iter > maxit); 232 norm_of_residual=tau*sqrt ( m + 1 ); 233 convergeFlag=(norm_of_residual<(*tolerance)); 234 235 236 if (maxIterFlag) { 237 status = SOLVER_MAXITER_REACHED; 238 } else if (breakFlag) { 239 status = SOLVER_BREAKDOWN; 240 } 241 ++(num_iter); 242 } 243 /* end of iteration */ 244 245 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 246 TMPMEMFREE(u1); 247 TMPMEMFREE(u2); 248 TMPMEMFREE(y1); 249 TMPMEMFREE(y2); 250 TMPMEMFREE(d); 251 TMPMEMFREE(w); 252 TMPMEMFREE(v); 253 TMPMEMFREE(v_old); 254 artak 2562 TMPMEMFREE(temp_vector); 255 TMPMEMFREE(res); 256 artak 1703 *iter=num_iter; 257 *tolerance=norm_of_residual; 258 259 /* End of TFQMR */ 260 return status; 261 ksteube 1708 }

 ViewVC Help Powered by ViewVC 1.1.26