1 |
/******************************************************* |
2 |
* |
3 |
* Copyright (c) 2003-2010 by University of Queensland |
4 |
* Earth Systems Science Computational Center (ESSCC) |
5 |
* http://www.uq.edu.au/esscc |
6 |
* |
7 |
* Primary Business: Queensland, Australia |
8 |
* Licensed under the Open Software License version 3.0 |
9 |
* http://www.opensource.org/licenses/osl-3.0.php |
10 |
* |
11 |
*******************************************************/ |
12 |
|
13 |
|
14 |
/**************************************************************/ |
15 |
|
16 |
/* Paso: SystemMatrix: controls iterative linear system solvers */ |
17 |
|
18 |
/**************************************************************/ |
19 |
|
20 |
/* Copyrights by ACcESS Australia 2003/04 */ |
21 |
/* Author: Lutz Gross, l.gross@uq.edu.au */ |
22 |
|
23 |
/**************************************************************/ |
24 |
|
25 |
#include "Paso.h" |
26 |
#include "SystemMatrix.h" |
27 |
#include "Solver.h" |
28 |
#include "esysUtils/blocktimer.h" |
29 |
|
30 |
/***********************************************************************************/ |
31 |
|
32 |
/* free space */ |
33 |
|
34 |
void Paso_Solver_free(Paso_SystemMatrix* A) { |
35 |
Paso_SystemMatrix_freePreconditioner(A); |
36 |
} |
37 |
/* call the iterative solver: */ |
38 |
|
39 |
void Paso_Solver(Paso_SystemMatrix* A,double* x,double* b, |
40 |
Paso_Options* options,Paso_Performance* pp) { |
41 |
|
42 |
double norm2_of_b,tol,tolerance,time_iter,net_time_start; |
43 |
double *r=NULL,norm2_of_residual,last_norm2_of_residual,norm_max_of_b; |
44 |
double norm2_of_b_local,norm_max_of_b_local,norm2_of_residual_local; |
45 |
double norm_max_of_residual_local,norm_max_of_residual; |
46 |
double last_norm_max_of_residual; |
47 |
#ifdef ESYS_MPI |
48 |
double loc_norm; |
49 |
#endif |
50 |
dim_t i,totIter=0,cntIter,method; |
51 |
bool_t finalizeIteration; |
52 |
err_t errorCode=SOLVER_NO_ERROR; |
53 |
const dim_t numSol = Paso_SystemMatrix_getTotalNumCols(A); |
54 |
const dim_t numEqua = Paso_SystemMatrix_getTotalNumRows(A); |
55 |
double blocktimer_precond, blocktimer_start = blocktimer_time(); |
56 |
double *x0=NULL; |
57 |
|
58 |
|
59 |
Esys_resetError(); |
60 |
tolerance=options->tolerance; |
61 |
if (tolerance < 100.* EPSILON) { |
62 |
Esys_setError(VALUE_ERROR,"Paso_Solver: Tolerance is too small."); |
63 |
} |
64 |
if (tolerance >1.) { |
65 |
Esys_setError(VALUE_ERROR,"Paso_Solver: Tolerance mut be less than one."); |
66 |
} |
67 |
method=Paso_Options_getSolver(options->method,PASO_PASO,options->symmetric,A->mpi_info); |
68 |
/* check matrix type */ |
69 |
if ((A->type & MATRIX_FORMAT_CSC) || (A->type & MATRIX_FORMAT_OFFSET1) ) { |
70 |
Esys_setError(TYPE_ERROR,"Paso_Solver: Iterative solver requires CSR format with unsymmetric storage scheme and index offset 0."); |
71 |
} |
72 |
if (A->col_block_size != A->row_block_size) { |
73 |
Esys_setError(TYPE_ERROR,"Paso_Solver: Iterative solver requires row and column block sizes to be equal."); |
74 |
} |
75 |
if (Paso_SystemMatrix_getGlobalNumCols(A) != Paso_SystemMatrix_getGlobalNumRows(A)) { |
76 |
Esys_setError(TYPE_ERROR,"Paso_Solver: Iterative solver requires a square matrix."); |
77 |
return; |
78 |
} |
79 |
/*if (A->block_size != 1 && options->preconditioner==PASO_AMG) { |
80 |
Esys_setError(TYPE_ERROR,"Paso_Solver: AMG on systems not supported yet."); |
81 |
} |
82 |
*/ |
83 |
time_iter=Esys_timer(); |
84 |
/* this for testing only */ |
85 |
if (method==PASO_NONLINEAR_GMRES) { |
86 |
Paso_Function* F=NULL; |
87 |
F=Paso_Function_LinearSystem_alloc(A,b,options); |
88 |
Paso_SystemMatrix_solvePreconditioner(A,x,b); |
89 |
errorCode=Paso_Solver_NewtonGMRES(F,x,options,pp); |
90 |
if (errorCode!=NO_ERROR) { |
91 |
Esys_setError(SYSTEM_ERROR,"Paso_Solver_NewtonGMRES: an error has occured."); |
92 |
} |
93 |
Paso_Function_LinearSystem_free(F); |
94 |
return; |
95 |
} |
96 |
|
97 |
r=TMPMEMALLOC(numEqua,double); |
98 |
x0=TMPMEMALLOC(numEqua,double); |
99 |
Esys_checkPtr(r); |
100 |
Esys_checkPtr(x0); |
101 |
Paso_SystemMatrix_balance(A); |
102 |
options->num_level=0; |
103 |
options->num_inner_iter=0; |
104 |
|
105 |
/* ========================= */ |
106 |
if (Esys_noError()) { |
107 |
Performance_startMonitor(pp,PERFORMANCE_ALL); |
108 |
|
109 |
Paso_SystemMatrix_applyBalance(A, r, b, TRUE); |
110 |
/* get the norm of the right hand side */ |
111 |
norm2_of_b=0.; |
112 |
norm_max_of_b=0.; |
113 |
#pragma omp parallel private(norm2_of_b_local,norm_max_of_b_local) |
114 |
{ |
115 |
norm2_of_b_local=0.; |
116 |
norm_max_of_b_local=0.; |
117 |
#pragma omp for private(i) schedule(static) |
118 |
for (i = 0; i < numEqua ; ++i) { |
119 |
norm2_of_b_local += r[i] * r[i]; |
120 |
norm_max_of_b_local = MAX(ABS(r[i]),norm_max_of_b_local); |
121 |
} |
122 |
#pragma omp critical |
123 |
{ |
124 |
norm2_of_b += norm2_of_b_local; |
125 |
norm_max_of_b = MAX(norm_max_of_b_local,norm_max_of_b); |
126 |
} |
127 |
} |
128 |
/* TODO: use one call */ |
129 |
#ifdef ESYS_MPI |
130 |
{ |
131 |
loc_norm = norm2_of_b; |
132 |
MPI_Allreduce(&loc_norm,&norm2_of_b, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm); |
133 |
loc_norm = norm_max_of_b; |
134 |
MPI_Allreduce(&loc_norm,&norm_max_of_b, 1, MPI_DOUBLE, MPI_MAX, A->mpi_info->comm); |
135 |
} |
136 |
#endif |
137 |
norm2_of_b=sqrt(norm2_of_b); |
138 |
/* if norm2_of_b==0 we are ready: x=0 */ |
139 |
if ( IS_NAN(norm2_of_b) || IS_NAN(norm_max_of_b) ) { |
140 |
Esys_setError(VALUE_ERROR,"Paso_Solver: Matrix or right hand side contains undefined values."); |
141 |
} else if (norm2_of_b <=0.) { |
142 |
|
143 |
#pragma omp parallel for private(i) schedule(static) |
144 |
for (i = 0; i < numSol; i++) x[i]=0.; |
145 |
if (options->verbose) printf("right hand side is identical zero.\n"); |
146 |
|
147 |
} else { |
148 |
|
149 |
if (options->verbose) { |
150 |
printf("Paso_Solver: l2/lmax-norm of right hand side is %e/%e.\n",norm2_of_b,norm_max_of_b); |
151 |
printf("Paso_Solver: l2/lmax-stopping criterion is %e/%e.\n",norm2_of_b*tolerance,norm_max_of_b*tolerance); |
152 |
switch (method) { |
153 |
case PASO_BICGSTAB: |
154 |
printf("Paso_Solver: Iterative method is BiCGStab.\n"); |
155 |
break; |
156 |
case PASO_PCG: |
157 |
printf("Paso_Solver: Iterative method is PCG.\n"); |
158 |
break; |
159 |
case PASO_TFQMR: |
160 |
printf("Paso_Solver: Iterative method is TFQMR.\n"); |
161 |
break; |
162 |
case PASO_MINRES: |
163 |
printf("Paso_Solver: Iterative method is MINRES.\n"); |
164 |
break; |
165 |
case PASO_PRES20: |
166 |
printf("Paso_Solver: Iterative method is PRES20.\n"); |
167 |
break; |
168 |
case PASO_GMRES: |
169 |
if (options->restart>0) { |
170 |
printf("Paso_Solver: Iterative method is GMRES(%d,%d)\n",options->truncation,options->restart); |
171 |
} else { |
172 |
printf("Paso_Solver: Iterative method is GMRES(%d)\n",options->truncation); |
173 |
} |
174 |
break; |
175 |
} |
176 |
|
177 |
} |
178 |
|
179 |
/* construct the preconditioner */ |
180 |
blocktimer_precond = blocktimer_time(); |
181 |
Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT); |
182 |
Paso_SystemMatrix_setPreconditioner(A,options); |
183 |
Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT); |
184 |
blocktimer_increment("Paso_Solver_setPreconditioner()", blocktimer_precond); |
185 |
options->set_up_time=Esys_timer()-time_iter; |
186 |
if (Esys_noError()) { |
187 |
|
188 |
|
189 |
/* get an initial guess by evaluating the preconditioner */ |
190 |
Paso_SystemMatrix_solvePreconditioner(A,x,r); |
191 |
|
192 |
totIter = 1; |
193 |
finalizeIteration = FALSE; |
194 |
last_norm2_of_residual=norm2_of_b; |
195 |
last_norm_max_of_residual=norm_max_of_b; |
196 |
net_time_start=Esys_timer(); |
197 |
|
198 |
/* Loop */ |
199 |
while (! finalizeIteration) { |
200 |
cntIter = options->iter_max - totIter; |
201 |
finalizeIteration = TRUE; |
202 |
|
203 |
/* Set initial residual. */ |
204 |
if (totIter>1) Paso_SystemMatrix_applyBalance(A, r, b, TRUE); /* in the first iteration r = balance * b already */ |
205 |
|
206 |
Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(DBLE(-1), A, x, DBLE(1), r); |
207 |
|
208 |
norm2_of_residual = 0; |
209 |
norm_max_of_residual = 0; |
210 |
#pragma omp parallel private(norm2_of_residual_local,norm_max_of_residual_local) |
211 |
{ |
212 |
norm2_of_residual_local = 0; |
213 |
norm_max_of_residual_local = 0; |
214 |
#pragma omp for private(i) schedule(static) |
215 |
for (i = 0; i < numEqua; i++) { |
216 |
norm2_of_residual_local+= r[i] * r[i]; |
217 |
norm_max_of_residual_local=MAX(ABS(r[i]),norm_max_of_residual_local); |
218 |
} |
219 |
#pragma omp critical |
220 |
{ |
221 |
norm2_of_residual += norm2_of_residual_local; |
222 |
norm_max_of_residual = MAX(norm_max_of_residual_local,norm_max_of_residual); |
223 |
} |
224 |
} |
225 |
/* TODO: use one call */ |
226 |
#ifdef ESYS_MPI |
227 |
loc_norm = norm2_of_residual; |
228 |
MPI_Allreduce(&loc_norm,&norm2_of_residual, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm); |
229 |
loc_norm = norm_max_of_residual; |
230 |
MPI_Allreduce(&loc_norm,&norm_max_of_residual, 1, MPI_DOUBLE, MPI_MAX, A->mpi_info->comm); |
231 |
#endif |
232 |
norm2_of_residual =sqrt(norm2_of_residual); |
233 |
options->residual_norm=norm2_of_residual; |
234 |
|
235 |
if (options->verbose) printf("Paso_Solver: Step %5d: l2/lmax-norm of residual is %e/%e",totIter,norm2_of_residual,norm_max_of_residual); |
236 |
|
237 |
if (totIter>1 && norm2_of_residual>=last_norm2_of_residual && norm_max_of_residual>=last_norm_max_of_residual) { |
238 |
|
239 |
if (options->verbose) printf(" divergence!\n"); |
240 |
Esys_setError(DIVERGED, "Paso_Solver: No improvement during iteration. Iterative solver gives up."); |
241 |
|
242 |
} else { |
243 |
if (norm2_of_residual>tolerance*norm2_of_b || norm_max_of_residual>tolerance*norm_max_of_b ) { |
244 |
|
245 |
tol=tolerance*MIN(norm2_of_b,0.1*norm2_of_residual/norm_max_of_residual*norm_max_of_b); |
246 |
if (options->verbose) printf(" (new tolerance = %e).\n",tol); |
247 |
|
248 |
last_norm2_of_residual=norm2_of_residual; |
249 |
last_norm_max_of_residual=norm_max_of_residual; |
250 |
|
251 |
/* call the solver */ |
252 |
switch (method) { |
253 |
case PASO_BICGSTAB: |
254 |
errorCode = Paso_Solver_BiCGStab(A, r, x, &cntIter, &tol, pp); |
255 |
break; |
256 |
case PASO_PCG: |
257 |
errorCode = Paso_Solver_PCG(A, r, x, &cntIter, &tol, pp); |
258 |
break; |
259 |
case PASO_TFQMR: |
260 |
tol=tolerance*norm2_of_residual/norm2_of_b; |
261 |
errorCode = Paso_Solver_TFQMR(A, r, x0, &cntIter, &tol, pp); |
262 |
#pragma omp for private(i) schedule(static) |
263 |
for (i = 0; i < numEqua; i++) { |
264 |
x[i]+= x0[i]; |
265 |
} |
266 |
break; |
267 |
case PASO_MINRES: |
268 |
/* tol=tolerance*norm2_of_residual/norm2_of_b; */ |
269 |
errorCode = Paso_Solver_MINRES(A, r, x, &cntIter, &tol, pp); |
270 |
break; |
271 |
case PASO_PRES20: |
272 |
errorCode = Paso_Solver_GMRES(A, r, x, &cntIter, &tol,5,20, pp); |
273 |
break; |
274 |
case PASO_GMRES: |
275 |
errorCode = Paso_Solver_GMRES(A, r, x, &cntIter, &tol,options->truncation,options->restart, pp); |
276 |
break; |
277 |
} |
278 |
|
279 |
totIter += cntIter; |
280 |
|
281 |
/* error handling */ |
282 |
if (errorCode==SOLVER_NO_ERROR) { |
283 |
finalizeIteration = FALSE; |
284 |
} else if (errorCode==SOLVER_MAXITER_REACHED) { |
285 |
Esys_setError(DIVERGED,"Paso_Solver: maximum number of iteration step reached.\nReturned solution does not fulfil stopping criterion."); |
286 |
if (options->verbose) printf("Paso_Solver: Maximum number of iterations reached.!\n"); |
287 |
} else if (errorCode == SOLVER_INPUT_ERROR ) { |
288 |
Esys_setError(SYSTEM_ERROR,"Paso_Solver: illegal dimension in iterative solver."); |
289 |
if (options->verbose) printf("Paso_Solver: Internal error!\n"); |
290 |
} else if (errorCode == SOLVER_NEGATIVE_NORM_ERROR) { |
291 |
Esys_setError(VALUE_ERROR,"Paso_Solver: negative energy norm (try other solver or preconditioner)."); |
292 |
if (options->verbose) printf("Paso_Solver: negative energy norm (try other solver or preconditioner)!\n"); |
293 |
} else if ( errorCode == SOLVER_BREAKDOWN ) { |
294 |
if (cntIter <= 1) { |
295 |
Esys_setError(ZERO_DIVISION_ERROR, "Paso_Solver: fatal break down in iterative solver."); |
296 |
if (options->verbose) printf("Paso_Solver: Uncurable break down!\n"); |
297 |
} else { |
298 |
if (options->verbose) printf("Paso_Solver: Breakdown at iter %d (residual = %e). Restarting ...\n", totIter, tol); |
299 |
finalizeIteration = FALSE; |
300 |
} |
301 |
} else { |
302 |
Esys_setError(SYSTEM_ERROR,"Paso_Solver:generic error in solver."); |
303 |
if (options->verbose) printf("Paso_Solver: generic error in solver!\n"); |
304 |
} |
305 |
} else { |
306 |
if (options->verbose) printf(". convergence! \n"); |
307 |
options->converged=TRUE; |
308 |
} |
309 |
} |
310 |
} /* while */ |
311 |
options->net_time=Esys_timer()-net_time_start; |
312 |
} |
313 |
options->num_iter=totIter; |
314 |
Paso_SystemMatrix_applyBalanceInPlace(A, x, FALSE); |
315 |
} |
316 |
|
317 |
} |
318 |
MEMFREE(r); |
319 |
MEMFREE(x0); |
320 |
options->time=Esys_timer()-time_iter; |
321 |
Performance_stopMonitor(pp,PERFORMANCE_ALL); |
322 |
blocktimer_increment("Paso_Solver()", blocktimer_start); |
323 |
} |