/[escript]/trunk/paso/src/Solver.c
ViewVC logotype

Annotation of /trunk/paso/src/Solver.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2078 - (hide annotations)
Thu Nov 20 16:10:10 2008 UTC (10 years, 5 months ago) by phornby
File MIME type: text/plain
File size: 13000 byte(s)
Two changes.

1. Move blocktimer from escript to esysUtils.
2. Make it possible to link to paso as a DLL or .so.

Should have no effect on 'nix's

In respect of 1., blocktimer had begun to spring up everywhere, so
for the moment I thought it best to move it to the only other library that
pops up all over the place.

In respect of 2., paso needed to be a DLL in order to use the windows intelc /fast
option, which does aggressive multi-file optimisations. Even in its current form, it either
vectorises or parallelises  hundreds more loops in the esys system than appear in the pragmas.

In achieving 2. I have not been too delicate in adding

PASO_DLL_API

declarations to the .h files in paso/src. Only toward the end of the process of
the conversion, when the number of linker errors dropped below 20, say, did I choosy about what
functions in a header I declared PASO_DLL_API. As a result, there are likely to be many routines
declared as external functions symbols that are in fact internal to the paso DLL. 
Why is this an issue?  It prevents the intelc compiler from getting aggressive on the paso module.
With pain there is sometimes gain. At least all the DLL rules in windows give good
(non-microsoft) compiler writers a chance to really shine.

So, if you should see a PASO_DLL_API on a function in a paso header file,
and think to yourself, "that function is only called in paso, why export it?", then feel free to
delete the PASO_DLL_API export declaration.

Here's hoping for no breakage.....
1 ksteube 1312
2     /*******************************************************
3 ksteube 1811 *
4     * Copyright (c) 2003-2008 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13 dhawcroft 631
14 ksteube 1811
15 jgs 150 /**************************************************************/
16    
17     /* Paso: SystemMatrix: controls iterative linear system solvers */
18    
19     /**************************************************************/
20    
21     /* Copyrights by ACcESS Australia 2003/04 */
22     /* Author: gross@access.edu.au */
23    
24     /**************************************************************/
25    
26 gross 700 #include "Paso.h"
27     #include "SystemMatrix.h"
28 jgs 150 #include "Solver.h"
29 phornby 2078 #include "esysUtils/blocktimer.h"
30 jgs 150
31     /***********************************************************************************/
32    
33     /* free space */
34    
35     void Paso_Solver_free(Paso_SystemMatrix* A) {
36 phornby 1628 Paso_Preconditioner_free(A->solver);
37     A->solver=NULL;
38 jgs 150 }
39     /* call the iterative solver: */
40    
41 phornby 1628 void Paso_Solver(Paso_SystemMatrix* A,double* x,double* b,
42     Paso_Options* options,Paso_Performance* pp) {
43    
44     double norm2_of_b,tol,tolerance,time_iter;
45     double *r=NULL,norm2_of_residual,last_norm2_of_residual,norm_max_of_b;
46     double norm2_of_b_local,norm_max_of_b_local,norm2_of_residual_local;
47     double norm_max_of_residual_local,norm_max_of_residual;
48     double last_norm_max_of_residual,*scaling;
49     #ifdef PASO_MPI
50     double loc_norm;
51     #endif
52     dim_t i,totIter,cntIter,method;
53     bool_t finalizeIteration;
54 jfenwick 1981 err_t errorCode=NO_ERROR;
55 phornby 1628 dim_t numSol = Paso_SystemMatrix_getTotalNumCols(A);
56     dim_t numEqua = Paso_SystemMatrix_getTotalNumRows(A);
57 ksteube 1707 double blocktimer_precond, blocktimer_start = blocktimer_time();
58 gross 415
59 gross 1639 tolerance=MAX(options->tolerance,EPSILON);
60     Paso_resetError();
61     method=Paso_Options_getSolver(options->method,PASO_PASO,options->symmetric);
62     /* check matrix type */
63     if ((A->type & MATRIX_FORMAT_CSC) || (A->type & MATRIX_FORMAT_OFFSET1) || (A->type & MATRIX_FORMAT_SYM) ) {
64     Paso_setError(TYPE_ERROR,"Iterative solver requires CSR format with unsymmetric storage scheme and index offset 0.");
65     }
66     if (A->col_block_size != A->row_block_size) {
67     Paso_setError(TYPE_ERROR,"Iterative solver requires row and column block sizes to be equal.");
68     }
69     if (Paso_SystemMatrix_getGlobalNumCols(A) != Paso_SystemMatrix_getGlobalNumRows(A)) {
70 artak 1787 Paso_setError(TYPE_ERROR,"Iterative solver requires a square matrix.");
71 gross 1639 return;
72     }
73     /* this for testing only */
74     if (method==PASO_NONLINEAR_GMRES) {
75     Paso_Function* F=NULL;
76     F=Paso_Function_LinearSystem_alloc(A,b,options);
77 gross 1798 Paso_Solver_solvePreconditioner(A,x,b);
78 gross 1639 errorCode=Paso_Solver_NewtonGMRES(F,x,options,pp);
79 gross 1798 if (errorCode!=NO_ERROR) {
80 gross 1639 Paso_setError(SYSTEM_ERROR,"Paso_Solver_NewtonGMRES: an error has occured.");
81     }
82     Paso_Function_LinearSystem_free(F);
83 gross 1798 return;
84 gross 1639 }
85     /* ========================= */
86     Performance_startMonitor(pp,PERFORMANCE_ALL);
87     if (Paso_noError()) {
88     /* get normalization */
89     scaling=Paso_SystemMatrix_borrowNormalization(A);
90     if (Paso_noError()) {
91     /* get the norm of the right hand side */
92     norm2_of_b=0.;
93     norm_max_of_b=0.;
94     #pragma omp parallel private(norm2_of_b_local,norm_max_of_b_local)
95     {
96     norm2_of_b_local=0.;
97     norm_max_of_b_local=0.;
98     #pragma omp for private(i) schedule(static)
99     for (i = 0; i < numEqua ; ++i) {
100     norm2_of_b_local += b[i] * b[i];
101     norm_max_of_b_local = MAX(ABS(scaling[i]*b[i]),norm_max_of_b_local);
102     }
103     #pragma omp critical
104     {
105     norm2_of_b += norm2_of_b_local;
106     norm_max_of_b = MAX(norm_max_of_b_local,norm_max_of_b);
107     }
108     }
109     /* TODO: use one call */
110     #ifdef PASO_MPI
111     {
112     loc_norm = norm2_of_b;
113     MPI_Allreduce(&loc_norm,&norm2_of_b, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
114     loc_norm = norm_max_of_b;
115     MPI_Allreduce(&loc_norm,&norm_max_of_b, 1, MPI_DOUBLE, MPI_MAX, A->mpi_info->comm);
116     }
117     #endif
118     norm2_of_b=sqrt(norm2_of_b);
119 jgs 150
120 phornby 1628 /* if norm2_of_b==0 we are ready: x=0 */
121     if (norm2_of_b <=0.) {
122 gross 1798 #pragma omp parallel for private(i) schedule(static)
123 phornby 1628 for (i = 0; i < numSol; i++) x[i]=0.;
124     if (options->verbose) printf("right hand side is identical zero.\n");
125     } else {
126     if (options->verbose) {
127     printf("l2/lmax-norm of right hand side is %e/%e.\n",norm2_of_b,norm_max_of_b);
128     printf("l2/lmax-stopping criterion is %e/%e.\n",norm2_of_b*tolerance,norm_max_of_b*tolerance);
129     switch (method) {
130     case PASO_BICGSTAB:
131     printf("Iterative method is BiCGStab.\n");
132     break;
133     case PASO_PCG:
134     printf("Iterative method is PCG.\n");
135     break;
136 artak 1703 case PASO_TFQMR:
137     printf("Iterative method is TFQMR.\n");
138     break;
139 artak 1787 case PASO_MINRES:
140     printf("Iterative method is MINRES.\n");
141     break;
142 phornby 1628 case PASO_PRES20:
143     printf("Iterative method is PRES20.\n");
144     break;
145     case PASO_GMRES:
146     if (options->restart>0) {
147     printf("Iterative method is GMRES(%d,%d)\n",options->truncation,options->restart);
148     } else {
149     printf("Iterative method is GMRES(%d)\n",options->truncation);
150 gross 584 }
151 phornby 1628 break;
152     }
153     }
154     /* construct the preconditioner */
155 gross 584
156 ksteube 1707 blocktimer_precond = blocktimer_time();
157 phornby 1628 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
158     Paso_Solver_setPreconditioner(A,options);
159     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
160 ksteube 1707 blocktimer_increment("Paso_Solver_setPreconditioner()", blocktimer_precond);
161 phornby 1628 if (! Paso_noError()) return;
162 ksteube 1312 time_iter=Paso_timer();
163 gross 584 /* get an initial guess by evaluating the preconditioner */
164 gross 1556 Paso_Solver_solvePreconditioner(A,x,b);
165 gross 584 /* start the iteration process :*/
166 ksteube 1312 r=TMPMEMALLOC(numEqua,double);
167     Paso_checkPtr(r);
168     if (Paso_noError()) {
169     totIter = 0;
170     finalizeIteration = FALSE;
171     last_norm2_of_residual=norm2_of_b;
172     last_norm_max_of_residual=norm_max_of_b;
173 artak 1787 /* Loop */
174 ksteube 1312 while (! finalizeIteration) {
175     cntIter = options->iter_max - totIter;
176     finalizeIteration = TRUE;
177     /* Set initial residual. */
178     norm2_of_residual = 0;
179     norm_max_of_residual = 0;
180 gross 1556 #pragma omp parallel for private(i) schedule(static)
181     for (i = 0; i < numEqua; i++) r[i]=b[i];
182     Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(DBLE(-1), A, x, DBLE(1), r);
183    
184 ksteube 1312 #pragma omp parallel private(norm2_of_residual_local,norm_max_of_residual_local)
185     {
186     norm2_of_residual_local = 0;
187     norm_max_of_residual_local = 0;
188     #pragma omp for private(i) schedule(static)
189     for (i = 0; i < numEqua; i++) {
190     norm2_of_residual_local+= r[i] * r[i];
191     norm_max_of_residual_local=MAX(ABS(scaling[i]*r[i]),norm_max_of_residual_local);
192     }
193     #pragma omp critical
194     {
195     norm2_of_residual += norm2_of_residual_local;
196     norm_max_of_residual = MAX(norm_max_of_residual_local,norm_max_of_residual);
197     }
198 gross 584 }
199 ksteube 1312 /* TODO: use one call */
200     #ifdef PASO_MPI
201     loc_norm = norm2_of_residual;
202     MPI_Allreduce(&loc_norm,&norm2_of_residual, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
203     loc_norm = norm_max_of_residual;
204     MPI_Allreduce(&loc_norm,&norm_max_of_residual, 1, MPI_DOUBLE, MPI_MAX, A->mpi_info->comm);
205     #endif
206     norm2_of_residual =sqrt(norm2_of_residual);
207    
208 phornby 1628 if (options->verbose) printf("Step %5d: l2/lmax-norm of residual is %e/%e",totIter,norm2_of_residual,norm_max_of_residual);
209     if (totIter>0 && norm2_of_residual>=last_norm2_of_residual && norm_max_of_residual>=last_norm_max_of_residual) {
210     if (options->verbose) printf(" divergence!\n");
211     Paso_setError(WARNING, "No improvement during iteration. Iterative solver gives up.");
212     } else {
213     /* */
214     if (norm2_of_residual>tolerance*norm2_of_b || norm_max_of_residual>tolerance*norm_max_of_b ) {
215     tol=tolerance*MIN(norm2_of_b,0.1*norm2_of_residual/norm_max_of_residual*norm_max_of_b);
216     if (options->verbose) printf(" (new tolerance = %e).\n",tol);
217     last_norm2_of_residual=norm2_of_residual;
218     last_norm_max_of_residual=norm_max_of_residual;
219     /* call the solver */
220     switch (method) {
221     case PASO_BICGSTAB:
222     errorCode = Paso_Solver_BiCGStab(A, r, x, &cntIter, &tol, pp);
223     break;
224     case PASO_PCG:
225 artak 1819 errorCode = Paso_Solver_PCG(A, r, x, &cntIter, &tol, pp);
226 phornby 1628 break;
227 artak 1703 case PASO_TFQMR:
228 artak 1787 errorCode = Paso_Solver_TFQMR(A, r, x, &cntIter, &tol, pp);
229 artak 1703 break;
230 artak 1787 case PASO_MINRES:
231 artak 1862 errorCode = Paso_Solver_MINRES(A, r, x, &cntIter, &tol, pp);
232 artak 1787 break;
233 phornby 1628 case PASO_PRES20:
234     errorCode = Paso_Solver_GMRES(A, r, x, &cntIter, &tol,5,20, pp);
235     break;
236     case PASO_GMRES:
237     errorCode = Paso_Solver_GMRES(A, r, x, &cntIter, &tol,options->truncation,options->restart, pp);
238     break;
239     }
240     totIter += cntIter;
241     /* error handling */
242     if (errorCode==NO_ERROR) {
243     finalizeIteration = FALSE;
244     } else if (errorCode==SOLVER_MAXITER_REACHED) {
245     Paso_setError(WARNING,"maximum number of iteration step reached.\nReturned solution does not fulfil stopping criterion.");
246     if (options->verbose) printf("Maximum number of iterations reached.!\n");
247     } else if (errorCode == SOLVER_INPUT_ERROR ) {
248     Paso_setError(SYSTEM_ERROR,"illegal dimension in iterative solver.");
249     if (options->verbose) printf("Internal error!\n");
250     } else if ( errorCode == SOLVER_BREAKDOWN ) {
251     if (cntIter <= 1) {
252     Paso_setError(ZERO_DIVISION_ERROR, "fatal break down in iterative solver.");
253     if (options->verbose) printf("Uncurable break down!\n");
254     } else {
255     if (options->verbose) printf("Breakdown at iter %d (residual = %e). Restarting ...\n", cntIter+totIter, tol);
256     finalizeIteration = FALSE;
257 ksteube 1312 }
258     } else {
259     if (options->verbose) printf(". convergence! \n");
260     }
261 jgs 150 }
262 ksteube 1312 } /* while */
263     }
264 gross 584 MEMFREE(r);
265     time_iter=Paso_timer()-time_iter;
266     if (options->verbose) {
267 gross 1639 printf("\ntiming: solver: %.4e sec\n",time_iter);
268 gross 584 if (totIter>0) printf("timing: per iteration step: %.4e sec\n",time_iter/totIter);
269 jgs 150 }
270     }
271     }
272 gross 584 }
273 phornby 1628 }
274     Performance_stopMonitor(pp,PERFORMANCE_ALL);
275     blocktimer_increment("Paso_Solver()", blocktimer_start);
276 jgs 150 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26