/[escript]/trunk-mpi-branch/paso/src/Solver.c
ViewVC logotype

Annotation of /trunk-mpi-branch/paso/src/Solver.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1196 - (hide annotations)
Fri Jun 15 03:45:48 2007 UTC (11 years, 11 months ago) by ksteube
File MIME type: text/plain
File size: 13459 byte(s)
Use of PAPI on solver is now enabled with papi_instrument_solver=1 in scons/ess_options.py.
Can instrument other blocks of code with blockpapi.c.
Added interval timers to grad, integrate and Assemble_PDE.

1 jgs 150 /* $Id$ */
2    
3 dhawcroft 631
4     /*
5     ********************************************************************************
6 dhawcroft 633 * Copyright 2006 by ACcESS MNRF *
7 dhawcroft 631 * *
8     * http://www.access.edu.au *
9     * Primary Business: Queensland, Australia *
10     * Licensed under the Open Software License version 3.0 *
11     * http://www.opensource.org/licenses/osl-3.0.php *
12     ********************************************************************************
13     */
14    
15 jgs 150 /**************************************************************/
16    
17     /* Paso: SystemMatrix: controls iterative linear system solvers */
18    
19     /**************************************************************/
20    
21     /* Copyrights by ACcESS Australia 2003/04 */
22     /* Author: gross@access.edu.au */
23    
24     /**************************************************************/
25    
26 gross 700 #include "Paso.h"
27     #include "SystemMatrix.h"
28 jgs 150 #include "Solver.h"
29 ksteube 1196 #include "escript/blocktimer.h"
30 jgs 150
31     /***********************************************************************************/
32    
33     /* free space */
34    
35     void Paso_Solver_free(Paso_SystemMatrix* A) {
36 gross 425 Paso_Preconditioner_free(A->solver);
37     A->solver=NULL;
38 jgs 150 }
39     /* call the iterative solver: */
40    
41 gross 584 void Paso_Solver(Paso_SystemMatrix* A,double* x,double* b,Paso_Options* options,Paso_Performance* pp) {
42     double norm2_of_b,tol,tolerance,time_iter,*r=NULL,norm2_of_residual,last_norm2_of_residual,norm_max_of_b,
43 jgs 150 norm2_of_b_local,norm_max_of_b_local,norm2_of_residual_local,norm_max_of_residual_local,norm_max_of_residual,
44 gross 1087 last_norm_max_of_residual,*scaling, *buffer0, *buffer1, loc_norm;
45 gross 415 dim_t i,totIter,cntIter,method;
46     bool_t finalizeIteration;
47     err_t errorCode;
48 gross 1011 dim_t myNumSol = A->myNumCols * A-> col_block_size;
49 gross 1087 dim_t maxNumSol = A->maxNumCols * A-> col_block_size;
50 gross 1011 dim_t myNumEqua = A->myNumRows * A-> row_block_size;
51 gross 1087 dim_t maxNumEqua = A->maxNumRows * A-> row_block_size;
52 ksteube 1196 double blocktimer_start = blocktimer_time();
53 gross 415
54     tolerance=MAX(options->tolerance,EPSILON);
55     Paso_resetError();
56     method=Paso_Options_getSolver(options->method,PASO_PASO,options->symmetric);
57     /* check matrix type */
58     if ((A->type & MATRIX_FORMAT_CSC) || (A->type & MATRIX_FORMAT_OFFSET1) || (A->type & MATRIX_FORMAT_SYM) ) {
59     Paso_setError(TYPE_ERROR,"Iterative solver requires CSR format with unsymmetric storage scheme and index offset 0.");
60 jgs 150 }
61 gross 415 if (A->col_block_size != A->row_block_size) {
62     Paso_setError(TYPE_ERROR,"Iterative solver requires row and column block sizes to be equal.");
63     }
64 gross 1011 if (A->numCols != A->numRows) {
65 gross 415 Paso_setError(TYPE_ERROR,"Iterative solver requires requires a square matrix.");
66     return;
67 jgs 150 }
68 gross 584 Performance_startMonitor(pp,PERFORMANCE_ALL);
69 jgs 150 if (Paso_noError()) {
70     /* get normalization */
71     scaling=Paso_SystemMatrix_borrowNormalization(A);
72 gross 584 if (Paso_noError()) {
73     /* get the norm of the right hand side */
74     norm2_of_b=0.;
75     norm_max_of_b=0.;
76     #pragma omp parallel private(norm2_of_b_local,norm_max_of_b_local)
77     {
78 jgs 150 norm2_of_b_local=0.;
79     norm_max_of_b_local=0.;
80     #pragma omp for private(i) schedule(static)
81 gross 1011 for (i = 0; i < myNumEqua ; ++i) {
82 jgs 150 norm2_of_b_local += b[i] * b[i];
83     norm_max_of_b_local = MAX(ABS(scaling[i]*b[i]),norm_max_of_b_local);
84     }
85     #pragma omp critical
86     {
87     norm2_of_b += norm2_of_b_local;
88     norm_max_of_b = MAX(norm_max_of_b_local,norm_max_of_b);
89     }
90 gross 584 }
91 gross 1087 /* TODO: use one call */
92     #ifdef PASO_MPI
93     {
94     loc_norm = norm2_of_b;
95     MPI_Allreduce(&loc_norm,&norm2_of_b, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
96     loc_norm = norm_max_of_b;
97     MPI_Allreduce(&loc_norm,&norm_max_of_b, 1, MPI_DOUBLE, MPI_MAX, A->mpi_info->comm);
98     }
99     #endif
100 gross 584 norm2_of_b=sqrt(norm2_of_b);
101 jgs 150
102 gross 584 /* if norm2_of_b==0 we are ready: x=0 */
103     if (norm2_of_b <=0.) {
104     #pragma omp parallel for private(i) schedule(static)
105 gross 1011 for (i = 0; i < myNumSol; i++) x[i]=0.;
106 gross 584 if (options->verbose) printf("right hand side is identical zero.\n");
107     } else {
108     if (options->verbose) {
109     printf("l2/lmax-norm of right hand side is %e/%e.\n",norm2_of_b,norm_max_of_b);
110     printf("l2/lmax-stopping criterion is %e/%e.\n",norm2_of_b*tolerance,norm_max_of_b*tolerance);
111     switch (method) {
112     case PASO_BICGSTAB:
113     printf("Iterative method is BiCGStab.\n");
114     break;
115     case PASO_PCG:
116     printf("Iterative method is PCG.\n");
117     break;
118     case PASO_PRES20:
119     printf("Iterative method is PRES20.\n");
120     break;
121     case PASO_GMRES:
122     if (options->restart>0) {
123     printf("Iterative method is GMRES(%d,%d)\n",options->truncation,options->restart);
124     } else {
125     printf("Iterative method is GMRES(%d)\n",options->truncation);
126     }
127     break;
128     }
129     }
130     /* construct the preconditioner */
131    
132     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
133     Paso_Solver_setPreconditioner(A,options);
134     Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER_INIT);
135     if (! Paso_noError()) return;
136    
137     /* get an initial guess by evaluating the preconditioner */
138     time_iter=Paso_timer();
139     #pragma omp parallel
140 gross 1087 {
141     Paso_Solver_solvePreconditioner(A,x,b);
142     }
143 gross 584 if (! Paso_noError()) return;
144     /* start the iteration process :*/
145 gross 1087 r=TMPMEMALLOC(maxNumEqua,double);
146 gross 584 if (Paso_checkPtr(r)) return;
147 gross 1087 if (A->mpi_info->size>1) {
148     buffer1=TMPMEMALLOC(maxNumSol,double);
149     buffer0=TMPMEMALLOC(maxNumSol,double);
150     if (Paso_checkPtr(buffer1) || Paso_checkPtr(buffer0)) {
151     TMPMEMFREE(r);
152     MEMFREE(buffer0);
153     MEMFREE(buffer1);
154     return;
155     }
156     } else {
157     buffer0=NULL;
158     buffer1=NULL;
159     }
160 gross 584 totIter = 0;
161     finalizeIteration = FALSE;
162     last_norm2_of_residual=norm2_of_b;
163     last_norm_max_of_residual=norm_max_of_b;
164     while (! finalizeIteration) {
165     cntIter = options->iter_max - totIter;
166     finalizeIteration = TRUE;
167     /* Set initial residual. */
168     norm2_of_residual = 0;
169     norm_max_of_residual = 0;
170     #pragma omp parallel private(norm2_of_residual_local,norm_max_of_residual_local)
171     {
172     #pragma omp for private(i) schedule(static)
173 gross 1011 for (i = 0; i < myNumEqua; i++) r[i]=b[i];
174 gross 584
175 gross 1087 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(DBLE(-1), A, x, DBLE(1), r, buffer0, buffer1);
176 gross 584
177     norm2_of_residual_local = 0;
178     norm_max_of_residual_local = 0;
179     #pragma omp for private(i) schedule(static)
180 gross 1011 for (i = 0; i < myNumEqua; i++) {
181 gross 584 norm2_of_residual_local+= r[i] * r[i];
182     norm_max_of_residual_local=MAX(ABS(scaling[i]*r[i]),norm_max_of_residual_local);
183     }
184     #pragma omp critical
185     {
186     norm2_of_residual += norm2_of_residual_local;
187     norm_max_of_residual = MAX(norm_max_of_residual_local,norm_max_of_residual);
188     }
189     }
190 gross 1087 /* TODO: use one call */
191     #ifdef PASO_MPI
192     {
193     loc_norm = norm2_of_residual;
194     MPI_Allreduce(&loc_norm,&norm2_of_residual, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
195     loc_norm = norm_max_of_residual;
196     MPI_Allreduce(&loc_norm,&norm_max_of_residual, 1, MPI_DOUBLE, MPI_MAX, A->mpi_info->comm);
197     }
198     #endif
199 gross 584 norm2_of_residual =sqrt(norm2_of_residual);
200    
201     if (options->verbose) printf("Step %5d: l2/lmax-norm of residual is %e/%e",totIter,norm2_of_residual,norm_max_of_residual);
202     if (totIter>0 && norm2_of_residual>=last_norm2_of_residual && norm_max_of_residual>=last_norm_max_of_residual) {
203     if (options->verbose) printf(" divergence!\n");
204     Paso_setError(WARNING, "No improvement during iteration. Iterative solver gives up.");
205     } else {
206     /* */
207     if (norm2_of_residual>tolerance*norm2_of_b || norm_max_of_residual>tolerance*norm_max_of_b ) {
208     tol=tolerance*MIN(norm2_of_b,0.1*norm2_of_residual/norm_max_of_residual*norm_max_of_b);
209     if (options->verbose) printf(" (new tolerance = %e).\n",tol);
210     last_norm2_of_residual=norm2_of_residual;
211     last_norm_max_of_residual=norm_max_of_residual;
212     /* call the solver */
213     switch (method) {
214     case PASO_BICGSTAB:
215 gross 1087 errorCode = Paso_Solver_BiCGStab(A, r, x, &cntIter, &tol,buffer0, buffer1, pp);
216 gross 584 break;
217     case PASO_PCG:
218 gross 1087 errorCode = Paso_Solver_PCG(A, r, x, &cntIter, &tol, buffer0, buffer1, pp);
219 gross 584 break;
220     case PASO_PRES20:
221 gross 1087 errorCode = Paso_Solver_GMRES(A, r, x, &cntIter, &tol,5,20, buffer0, buffer1, pp);
222 gross 584 break;
223     case PASO_GMRES:
224 gross 1087 errorCode = Paso_Solver_GMRES(A, r, x, &cntIter, &tol,options->truncation,options->restart, buffer0, buffer1, pp);
225 gross 584 break;
226     }
227     totIter += cntIter;
228    
229     /* error handling */
230     if (errorCode==NO_ERROR) {
231     finalizeIteration = FALSE;
232     } else if (errorCode==SOLVER_MAXITER_REACHED) {
233     Paso_setError(WARNING,"maximum number of iteration step reached.\nReturned solution does not fulfil stopping criterion.");
234     if (options->verbose) printf("Maximum number of iterations reached.!\n");
235     } else if (errorCode == SOLVER_INPUT_ERROR ) {
236     Paso_setError(SYSTEM_ERROR,"illegal dimension in iterative solver.");
237     if (options->verbose) printf("Internal error!\n");
238     } else if ( errorCode == SOLVER_BREAKDOWN ) {
239     if (cntIter <= 1) {
240     Paso_setError(ZERO_DIVISION_ERROR, "fatal break down in iterative solver.");
241     if (options->verbose) printf("Uncurable break down!\n");
242     } else {
243     if (options->verbose) printf("Breakdown at iter %d (residual = %e). Restarting ...\n", cntIter+totIter, tol);
244     finalizeIteration = FALSE;
245     }
246     } else if (errorCode == SOLVER_MEMORY_ERROR) {
247     Paso_setError(MEMORY_ERROR,"memory allocation failed.");
248     if (options->verbose) printf("Memory allocation failed!\n");
249     } else if (errorCode !=SOLVER_NO_ERROR ) {
250     Paso_setError(SYSTEM_ERROR,"unidentified error in iterative solver.");
251     if (options->verbose) printf("Unidentified error!\n");
252     }
253 jgs 150 } else {
254 gross 584 if (options->verbose) printf(". convergence! \n");
255 jgs 150 }
256 gross 584 }
257     } /* while */
258     MEMFREE(r);
259 gross 1087 MEMFREE(buffer0);
260     MEMFREE(buffer1);
261 gross 584 time_iter=Paso_timer()-time_iter;
262     if (options->verbose) {
263     printf("timing: solver: %.4e sec\n",time_iter);
264     if (totIter>0) printf("timing: per iteration step: %.4e sec\n",time_iter/totIter);
265 jgs 150 }
266     }
267     }
268 gross 584 }
269     Performance_stopMonitor(pp,PERFORMANCE_ALL);
270 ksteube 1196 blocktimer_increment("Paso_Solver()", blocktimer_start);
271 jgs 150 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26