Annotation of /branches/doubleplusgood/paso/src/TFQMR.cpp

Revision 4275 - (hide annotations)
Tue Mar 5 09:32:52 2013 UTC (6 years, 10 months ago) by jfenwick
File size: 7928 byte(s)
```some experiments
```
 1 artak 1703 2 jfenwick 3981 /***************************************************************************** 3 ksteube 1811 * 4 jfenwick 4154 * Copyright (c) 2003-2013 by University of Queensland 5 jfenwick 3981 6 ksteube 1811 * 7 * Primary Business: Queensland, Australia 8 * Licensed under the Open Software License version 3.0 9 10 * 11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC) 12 * Development since 2012 by School of Earth Sciences 13 * 14 *****************************************************************************/ 15 artak 1703 16 ksteube 1811 17 artak 1703 /* TFQMR iterations */ 18 19 #include "SystemMatrix.h" 20 #include "Paso.h" 21 #include "Solver.h" 22 #include "PasoUtil.h" 23 24 #ifdef _OPENMP 25 #include 26 #endif 27 28 jfenwick 3259 #ifdef ESYS_MPI 29 artak 1703 #include 30 #endif 31 32 /* 33 * 34 * Purpose 35 * ======= 36 * 37 * TFQMR solves the linear system A*x = b 38 * 39 * Convergence test: norm( b - A*x )< TOL. 40 * For other measures, see the above reference. 41 * 42 * Arguments 43 * ========= 44 * 45 * r (input) DOUBLE PRECISION array, dimension N. 46 artak 2557 * 47 artak 1703 * 48 * x (input/output) DOUBLE PRECISION array, dimension N. 49 artak 2557 * 50 artak 1703 * 51 * ITER (input/output) INT 52 * On input, the maximum iterations to be performed. 53 * On output, actual number of iterations performed. 54 * 55 * INFO (output) INT 56 * 57 * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned. 58 caltinay 3642 * = SOLVER_MAXITER_REACHED 59 artak 1703 * = SOLVER_INPUT_ERROR Illegal parameter: 60 caltinay 3642 * = SOLVER_BREAKDOWN: If parameters RHO or OMEGA become smaller 61 * = SOLVER_MEMORY_ERROR : If parameters RHO or OMEGA become smaller 62 artak 1703 * 63 * ============================================================== 64 */ 65 66 /* #define PASO_DYNAMIC_SCHEDULING_MVM */ 67 68 #if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP 69 #define USE_DYNAMIC_SCHEDULING 70 #endif 71 72 err_t Paso_Solver_TFQMR( 73 Paso_SystemMatrix * A, 74 double * r, 75 double * x, 76 dim_t *iter, 77 double * tolerance, 78 Paso_Performance* pp) { 79 80 /* Local variables */ 81 artak 1706 82 int m=1; 83 int j=0; 84 85 artak 1703 dim_t num_iter=0,maxit; 86 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE; 87 err_t status = SOLVER_NO_ERROR; 88 dim_t n = Paso_SystemMatrix_getTotalNumRows(A); 89 gross 4093 double *u1=NULL, *u2=NULL, *y1=NULL, *y2=NULL, *d=NULL, *w=NULL, *v=NULL, *temp_vector=NULL,*res=NULL; 90 artak 1703 91 double eta,theta,tau,rho,beta,alpha,sigma,rhon,c; 92 93 double norm_of_residual; 94 artak 1706 95 artak 1703 /* */ 96 /*-----------------------------------------------------------------*/ 97 /* */ 98 /* Start of Calculation : */ 99 /* --------------------- */ 100 /* */ 101 /* */ 102 jfenwick 4275 u1=new double[n]; 103 u2=new double[n]; 104 y1=new double[n]; 105 y2=new double[n]; 106 d=new double[n]; 107 w=new double[n]; 108 v=new double[n]; 109 temp_vector=new double[n]; 110 res=new double[n]; 111 artak 1703 112 gross 4093 if (u1 ==NULL || u2== NULL || y1 == NULL || y2== NULL || d==NULL || w==NULL || v==NULL ) { 113 artak 1703 status=SOLVER_MEMORY_ERROR; 114 } 115 116 maxit = *iter; 117 118 /* Test the input parameters. */ 119 if (n < 0 || maxit<=0 ) { 120 status=SOLVER_INPUT_ERROR; 121 } 122 123 artak 2826 Paso_zeroes(n,x); 124 125 artak 1703 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 126 gross 3120 Paso_SystemMatrix_solvePreconditioner(A,res,r); 127 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 128 129 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 130 131 Paso_zeroes(n,u2); 132 Paso_zeroes(n,y2); 133 134 artak 2562 Paso_Copy(n,w,res); 135 Paso_Copy(n,y1,res); 136 artak 1703 137 Paso_zeroes(n,d); 138 139 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 140 Performance_startMonitor(pp,PERFORMANCE_MVM); 141 artak 2562 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,temp_vector); 142 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM); 143 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 144 145 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 146 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 147 gross 3120 Paso_SystemMatrix_solvePreconditioner(A,v,temp_vector); 148 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 149 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 150 gross 4093 /* v = P^{-1} * A y1 */ 151 artak 1703 152 Paso_Copy(n,u1,v); 153 154 theta = 0.0; 155 eta = 0.0; 156 157 artak 2562 tau = Paso_l2(n,res,A->mpi_info); 158 artak 1703 159 rho = tau * tau; 160 artak 1706 161 gross 4093 norm_of_residual=tau; 162 artak 1703 163 while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) )) 164 { 165 166 167 artak 2562 sigma=Paso_InnerProduct(n,res,v,A->mpi_info); 168 artak 1703 169 if (! (breakFlag = (ABS(sigma) == 0.))) { 170 171 alpha = rho / sigma; 172 173 for (j=0; j<=1; j=j+1) 174 { 175 /* Compute y2 and u2 only if you have to */ 176 if ( j == 1 ){ 177 gross 4093 Paso_LinearCombination(n,y2,PASO_ONE,y1,-alpha,v); /* y2 = y1 - alpha*v */ 178 artak 1703 179 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 180 Performance_startMonitor(pp,PERFORMANCE_MVM); 181 artak 2562 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y2,PASO_ZERO,temp_vector); 182 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM); 183 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 184 185 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 186 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 187 gross 4093 Paso_SystemMatrix_solvePreconditioner(A,u2,temp_vector); 188 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 189 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 190 gross 4093 /* u2 = P^{-1} * A y2 */ 191 artak 1703 } 192 m = 2 * (num_iter+1) - 2 + (j+1); 193 gross 4093 194 artak 1703 if (j==0) { 195 gross 4093 Paso_Update(n,1.,w,-alpha,u1); /* w = w - alpha * u1 */ 196 Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y1); /* d = ( theta * theta * eta / alpha )*d + y1 */ 197 artak 1703 } 198 if (j==1) { 199 gross 4093 Paso_Update(n,1.,w,-alpha,u2); /* w = w - -alpha * u2 */ 200 Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y2); /* d = ( theta * theta * eta / alpha )*d + y2 */ 201 artak 1703 } 202 203 theta =Paso_l2(n,w,A->mpi_info)/tau; 204 caltinay 4095 /*printf("tau = %e, %e %e\n",tau, Paso_l2(n,w,A->mpi_info)/tau, theta);*/ 205 gross 4093 c = PASO_ONE / sqrt ( PASO_ONE + theta * theta ); 206 artak 1703 tau = tau * theta * c; 207 eta = c * c * alpha; 208 Paso_Update(n,1.,x,eta,d); 209 } 210 211 breakFlag = (ABS(rho) == 0); 212 213 artak 2562 rhon = Paso_InnerProduct(n,res,w,A->mpi_info); 214 artak 1703 beta = rhon / rho; 215 rho = rhon; 216 217 gross 4093 Paso_LinearCombination(n,y1, PASO_ONE,w,beta,y2); /* y1 = w + beta * y2 */ 218 artak 1703 219 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 220 Performance_startMonitor(pp,PERFORMANCE_MVM); 221 artak 2562 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,temp_vector); 222 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_MVM); 223 224 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); 225 gross 3120 Paso_SystemMatrix_solvePreconditioner(A,u1,temp_vector); 226 artak 1703 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); 227 Performance_startMonitor(pp,PERFORMANCE_SOLVER); 228 gross 4093 /* u1 = P^{-1} * A y1 */ 229 artak 1703 230 gross 4093 Paso_LinearCombination(n,temp_vector,PASO_ONE,u2,beta,v); /* t = u2 + beta * v */ 231 Paso_LinearCombination(n,v,PASO_ONE,u1,beta,temp_vector); /* v = u1 + beta * t */ 232 artak 1703 } 233 maxIterFlag = (num_iter > maxit); 234 gross 4093 norm_of_residual=tau*sqrt ( (double) (m + 1 ) ); 235 artak 1703 convergeFlag=(norm_of_residual<(*tolerance)); 236 237 238 if (maxIterFlag) { 239 status = SOLVER_MAXITER_REACHED; 240 } else if (breakFlag) { 241 status = SOLVER_BREAKDOWN; 242 } 243 ++(num_iter); 244 } 245 /* end of iteration */ 246 247 Performance_stopMonitor(pp,PERFORMANCE_SOLVER); 248 jfenwick 4275 delete[] (u1); 249 delete[] (u2); 250 delete[] (y1); 251 delete[] (y2); 252 delete[] (d); 253 delete[] (w); 254 delete[] (v); 255 delete[] (temp_vector); 256 delete[] (res); 257 artak 1703 *iter=num_iter; 258 *tolerance=norm_of_residual; 259 260 /* End of TFQMR */ 261 return status; 262 ksteube 1708 }