/[escript]/trunk/paso/src/Solver.c
ViewVC logotype

Contents of /trunk/paso/src/Solver.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2078 - (show annotations)
Thu Nov 20 16:10:10 2008 UTC (10 years, 5 months ago) by phornby
File MIME type: text/plain
File size: 13000 byte(s)
Two changes.

1. Move blocktimer from escript to esysUtils.
2. Make it possible to link to paso as a DLL or .so.

Should have no effect on 'nix's

In respect of 1., blocktimer had begun to spring up everywhere, so
for the moment I thought it best to move it to the only other library that
pops up all over the place.

In respect of 2., paso needed to be a DLL in order to use the windows intelc /fast
option, which does aggressive multi-file optimisations. Even in its current form, it either
vectorises or parallelises  hundreds more loops in the esys system than appear in the pragmas.

In achieving 2. I have not been too delicate in adding

PASO_DLL_API

declarations to the .h files in paso/src. Only toward the end of the process of
the conversion, when the number of linker errors dropped below 20, say, did I choosy about what
functions in a header I declared PASO_DLL_API. As a result, there are likely to be many routines
declared as external functions symbols that are in fact internal to the paso DLL. 
Why is this an issue?  It prevents the intelc compiler from getting aggressive on the paso module.
With pain there is sometimes gain. At least all the DLL rules in windows give good
(non-microsoft) compiler writers a chance to really shine.

So, if you should see a PASO_DLL_API on a function in a paso header file,
and think to yourself, "that function is only called in paso, why export it?", then feel free to
delete the PASO_DLL_API export declaration.

Here's hoping for no breakage.....
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2008 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: SystemMatrix: controls iterative linear system solvers */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia 2003/04 */
22 /* Author: gross@access.edu.au */
23
24 /**************************************************************/
25
26 #include "Paso.h"
27 #include "SystemMatrix.h"
28 #include "Solver.h"
29 #include "esysUtils/blocktimer.h"
30
31 /***********************************************************************************/
32
33 /* free space */
34
35 void Paso_Solver_free(Paso_SystemMatrix* A) {
36 Paso_Preconditioner_free(A->solver);
37 A->solver=NULL;
38 }
39 /* call the iterative solver: */
40
41 void Paso_Solver(Paso_SystemMatrix* A,double* x,double* b,
42 Paso_Options* options,Paso_Performance* pp) {
43
44 double norm2_of_b,tol,tolerance,time_iter;
45 double *r=NULL,norm2_of_residual,last_norm2_of_residual,norm_max_of_b;
46 double norm2_of_b_local,norm_max_of_b_local,norm2_of_residual_local;
47 double norm_max_of_residual_local,norm_max_of_residual;
48 double last_norm_max_of_residual,*scaling;
49 #ifdef PASO_MPI
50 double loc_norm;
51 #endif
52 dim_t i,totIter,cntIter,method;
53 bool_t finalizeIteration;
54 err_t errorCode=NO_ERROR;
55 dim_t numSol = Paso_SystemMatrix_getTotalNumCols(A);
56 dim_t numEqua = Paso_SystemMatrix_getTotalNumRows(A);
57 double blocktimer_precond, blocktimer_start = blocktimer_time();
58
59 tolerance=MAX(options->tolerance,EPSILON);
60 Paso_resetError();
61 method=Paso_Options_getSolver(options->method,PASO_PASO,options->symmetric);
62 /* check matrix type */
63 if ((A->type & MATRIX_FORMAT_CSC) || (A->type & MATRIX_FORMAT_OFFSET1) || (A->type & MATRIX_FORMAT_SYM) ) {
64 Paso_setError(TYPE_ERROR,"Iterative solver requires CSR format with unsymmetric storage scheme and index offset 0.");
65 }
66 if (A->col_block_size != A->row_block_size) {
67 Paso_setError(TYPE_ERROR,"Iterative solver requires row and column block sizes to be equal.");
68 }
69 if (Paso_SystemMatrix_getGlobalNumCols(A) != Paso_SystemMatrix_getGlobalNumRows(A)) {
70 Paso_setError(TYPE_ERROR,"Iterative solver requires a square matrix.");
71 return;
72 }
73 /* this for testing only */
74 if (method==PASO_NONLINEAR_GMRES) {
75 Paso_Function* F=NULL;
76 F=Paso_Function_LinearSystem_alloc(A,b,options);
77 Paso_Solver_solvePreconditioner(A,x,b);
78 errorCode=Paso_Solver_NewtonGMRES(F,x,options,pp);
79 if (errorCode!=NO_ERROR) {
80 Paso_setError(SYSTEM_ERROR,"Paso_Solver_NewtonGMRES: an error has occured.");
81 }
82 Paso_Function_LinearSystem_free(F);
83 return;
84 }
85 /* ========================= */
86 Performance_startMonitor(pp,PERFORMANCE_ALL);
87 if (Paso_noError()) {
88 /* get normalization */
89 scaling=Paso_SystemMatrix_borrowNormalization(A);
90 if (Paso_noError()) {
91 /* get the norm of the right hand side */
92 norm2_of_b=0.;
93 norm_max_of_b=0.;
94 #pragma omp parallel private(norm2_of_b_local,norm_max_of_b_local)
95 {
96 norm2_of_b_local=0.;
97 norm_max_of_b_local=0.;
98 #pragma omp for private(i) schedule(static)
99 for (i = 0; i < numEqua ; ++i) {
100 norm2_of_b_local += b[i] * b[i];
101 norm_max_of_b_local = MAX(ABS(scaling[i]*b[i]),norm_max_of_b_local);
102 }
103 #pragma omp critical
104 {
105 norm2_of_b += norm2_of_b_local;
106 norm_max_of_b = MAX(norm_max_of_b_local,norm_max_of_b);
107 }
108 }
109 /* TODO: use one call */
110 #ifdef PASO_MPI
111 {
112 loc_norm = norm2_of_b;
113 MPI_Allreduce(&loc_norm,&norm2_of_b, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
114 loc_norm = norm_max_of_b;
115 MPI_Allreduce(&loc_norm,&norm_max_of_b, 1, MPI_DOUBLE, MPI_MAX, A->mpi_info->comm);
116 }
117 #endif
118 norm2_of_b=sqrt(norm2_of_b);
119
120 /* if norm2_of_b==0 we are ready: x=0 */
121 if (norm2_of_b <=0.) {
122 #pragma omp parallel for private(i) schedule(static)
123 for (i = 0; i < numSol; i++) x[i]=0.;
124 if (options->verbose) printf("right hand side is identical zero.\n");
125 } else {
126 if (options->verbose) {
127 printf("l2/lmax-norm of right hand side is %e/%e.\n",norm2_of_b,norm_max_of_b);
128 printf("l2/lmax-stopping criterion is %e/%e.\n",norm2_of_b*tolerance,norm_max_of_b*tolerance);
129 switch (method) {
130 case PASO_BICGSTAB:
131 printf("Iterative method is BiCGStab.\n");
132 break;
133 case PASO_PCG:
134 printf("Iterative method is PCG.\n");
135 break;
136 case PASO_TFQMR:
137 printf("Iterative method is TFQMR.\n");
138 break;
139 case PASO_MINRES:
140 printf("Iterative method is MINRES.\n");
141 break;
142 case PASO_PRES20:
143 printf("Iterative method is PRES20.\n");
144 break;
145 case PASO_GMRES:
146 if (options->restart>0) {
147 printf("Iterative method is GMRES(%d,%d)\n",options->truncation,options->restart);
148 } else {
149 printf("Iterative method is GMRES(%d)\n",options->truncation);
150 }
151 break;
152 }
153 }
154 /* construct the preconditioner */
155
156 blocktimer_precond = blocktimer_time();
157 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
158 Paso_Solver_setPreconditioner(A,options);
159 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
160 blocktimer_increment("Paso_Solver_setPreconditioner()", blocktimer_precond);
161 if (! Paso_noError()) return;
162 time_iter=Paso_timer();
163 /* get an initial guess by evaluating the preconditioner */
164 Paso_Solver_solvePreconditioner(A,x,b);
165 /* start the iteration process :*/
166 r=TMPMEMALLOC(numEqua,double);
167 Paso_checkPtr(r);
168 if (Paso_noError()) {
169 totIter = 0;
170 finalizeIteration = FALSE;
171 last_norm2_of_residual=norm2_of_b;
172 last_norm_max_of_residual=norm_max_of_b;
173 /* Loop */
174 while (! finalizeIteration) {
175 cntIter = options->iter_max - totIter;
176 finalizeIteration = TRUE;
177 /* Set initial residual. */
178 norm2_of_residual = 0;
179 norm_max_of_residual = 0;
180 #pragma omp parallel for private(i) schedule(static)
181 for (i = 0; i < numEqua; i++) r[i]=b[i];
182 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(DBLE(-1), A, x, DBLE(1), r);
183
184 #pragma omp parallel private(norm2_of_residual_local,norm_max_of_residual_local)
185 {
186 norm2_of_residual_local = 0;
187 norm_max_of_residual_local = 0;
188 #pragma omp for private(i) schedule(static)
189 for (i = 0; i < numEqua; i++) {
190 norm2_of_residual_local+= r[i] * r[i];
191 norm_max_of_residual_local=MAX(ABS(scaling[i]*r[i]),norm_max_of_residual_local);
192 }
193 #pragma omp critical
194 {
195 norm2_of_residual += norm2_of_residual_local;
196 norm_max_of_residual = MAX(norm_max_of_residual_local,norm_max_of_residual);
197 }
198 }
199 /* TODO: use one call */
200 #ifdef PASO_MPI
201 loc_norm = norm2_of_residual;
202 MPI_Allreduce(&loc_norm,&norm2_of_residual, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
203 loc_norm = norm_max_of_residual;
204 MPI_Allreduce(&loc_norm,&norm_max_of_residual, 1, MPI_DOUBLE, MPI_MAX, A->mpi_info->comm);
205 #endif
206 norm2_of_residual =sqrt(norm2_of_residual);
207
208 if (options->verbose) printf("Step %5d: l2/lmax-norm of residual is %e/%e",totIter,norm2_of_residual,norm_max_of_residual);
209 if (totIter>0 && norm2_of_residual>=last_norm2_of_residual && norm_max_of_residual>=last_norm_max_of_residual) {
210 if (options->verbose) printf(" divergence!\n");
211 Paso_setError(WARNING, "No improvement during iteration. Iterative solver gives up.");
212 } else {
213 /* */
214 if (norm2_of_residual>tolerance*norm2_of_b || norm_max_of_residual>tolerance*norm_max_of_b ) {
215 tol=tolerance*MIN(norm2_of_b,0.1*norm2_of_residual/norm_max_of_residual*norm_max_of_b);
216 if (options->verbose) printf(" (new tolerance = %e).\n",tol);
217 last_norm2_of_residual=norm2_of_residual;
218 last_norm_max_of_residual=norm_max_of_residual;
219 /* call the solver */
220 switch (method) {
221 case PASO_BICGSTAB:
222 errorCode = Paso_Solver_BiCGStab(A, r, x, &cntIter, &tol, pp);
223 break;
224 case PASO_PCG:
225 errorCode = Paso_Solver_PCG(A, r, x, &cntIter, &tol, pp);
226 break;
227 case PASO_TFQMR:
228 errorCode = Paso_Solver_TFQMR(A, r, x, &cntIter, &tol, pp);
229 break;
230 case PASO_MINRES:
231 errorCode = Paso_Solver_MINRES(A, r, x, &cntIter, &tol, pp);
232 break;
233 case PASO_PRES20:
234 errorCode = Paso_Solver_GMRES(A, r, x, &cntIter, &tol,5,20, pp);
235 break;
236 case PASO_GMRES:
237 errorCode = Paso_Solver_GMRES(A, r, x, &cntIter, &tol,options->truncation,options->restart, pp);
238 break;
239 }
240 totIter += cntIter;
241 /* error handling */
242 if (errorCode==NO_ERROR) {
243 finalizeIteration = FALSE;
244 } else if (errorCode==SOLVER_MAXITER_REACHED) {
245 Paso_setError(WARNING,"maximum number of iteration step reached.\nReturned solution does not fulfil stopping criterion.");
246 if (options->verbose) printf("Maximum number of iterations reached.!\n");
247 } else if (errorCode == SOLVER_INPUT_ERROR ) {
248 Paso_setError(SYSTEM_ERROR,"illegal dimension in iterative solver.");
249 if (options->verbose) printf("Internal error!\n");
250 } else if ( errorCode == SOLVER_BREAKDOWN ) {
251 if (cntIter <= 1) {
252 Paso_setError(ZERO_DIVISION_ERROR, "fatal break down in iterative solver.");
253 if (options->verbose) printf("Uncurable break down!\n");
254 } else {
255 if (options->verbose) printf("Breakdown at iter %d (residual = %e). Restarting ...\n", cntIter+totIter, tol);
256 finalizeIteration = FALSE;
257 }
258 } else {
259 if (options->verbose) printf(". convergence! \n");
260 }
261 }
262 } /* while */
263 }
264 MEMFREE(r);
265 time_iter=Paso_timer()-time_iter;
266 if (options->verbose) {
267 printf("\ntiming: solver: %.4e sec\n",time_iter);
268 if (totIter>0) printf("timing: per iteration step: %.4e sec\n",time_iter/totIter);
269 }
270 }
271 }
272 }
273 }
274 Performance_stopMonitor(pp,PERFORMANCE_ALL);
275 blocktimer_increment("Paso_Solver()", blocktimer_start);
276 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26