/[escript]/trunk/paso/src/GMRES.c
ViewVC logotype

Annotation of /trunk/paso/src/GMRES.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 682 - (hide annotations)
Mon Mar 27 02:43:09 2006 UTC (13 years, 11 months ago) by robwdcock
Original Path: trunk/paso/src/Solvers/GMRES.c
File MIME type: text/plain
File size: 23868 byte(s)
+ NEW BUILD SYSTEM

This commit contains the new build system with cross-platform support.
Most things work are before though you can have more control.

ENVIRONMENT settings have changed:
+ You no longer require LD_LIBRARY_PATH or PYTHONPATH to point to the
esysroot for building and testing performed via scons
+ ACcESS altix users: It is recommended you change your modules to load
the latest intel compiler and other libraries required by boost to match
the setup in svn (you can override). The correct modules are as follows

module load intel_cc.9.0.026
export
MODULEPATH=${MODULEPATH}:/data/raid2/toolspp4/modulefiles/gcc-3.3.6
module load boost/1.33.0/python-2.4.1
module load python/2.4.1
module load numarray/1.3.3


1 jgs 150 /* $Id$ */
2    
3 dhawcroft 631
4 jgs 150 /*
5 dhawcroft 631 ********************************************************************************
6 dhawcroft 633 * Copyright 2006 by ACcESS MNRF *
7 dhawcroft 631 * *
8     * http://www.access.edu.au *
9     * Primary Business: Queensland, Australia *
10     * Licensed under the Open Software License version 3.0 *
11     * http://www.opensource.org/licenses/osl-3.0.php *
12     ********************************************************************************
13     */
14    
15     /*
16 jgs 150 * Purpose
17     * =======
18     *
19     * GMRES solves the linear system A*x=b using the
20     * truncated and restered GMRES method with preconditioning.
21     *
22     * Convergence test: norm( b - A*x )< TOL.
23     *
24     *
25     *
26     * Arguments
27     * =========
28     *
29 jgs 154 * r (input/output) double array, dimension n.
30 jgs 150 * On entry, residual of inital guess X
31     *
32 jgs 154 * x (input/output) double array, dimension n.
33 jgs 150 * On input, the initial guess.
34     *
35     * iter (input/output) int
36     * On input, the maximum num_iterations to be performed.
37     * On output, actual number of num_iterations performed.
38     *
39     * tolerance (input/output) DOUBLE PRECISION
40     * On input, the allowable convergence measure for
41     * norm( b - A*x )
42     * On output, the final value of this measure.
43     *
44 jgs 154 * Length_of_recursion (input) gives the number of residual to be kept in orthogonalization process
45 jgs 150 *
46     * restart (input) If restart>0, iteration is resterted a after restart steps.
47     *
48     * INFO (output) int
49     *
50     * =SOLVER_NO_ERROR: Successful exit. num_iterated approximate solution returned.
51 jgs 154 * =SOLVER_MAXNUM_ITER_REACHED
52 jgs 150 * =SOLVER_INPUT_ERROR Illegal parameter:
53 jgs 154 * =SOLVER_BREAKDOWN: bad luck!
54     * =SOLVER_MEMORY_ERROR : no memory available
55 jgs 150 *
56     * ==============================================================
57     */
58    
59 robwdcock 682 #include "../Common.h"
60     #include "../SystemMatrix.h"
61 jgs 150 #include "Solver.h"
62     #ifdef _OPENMP
63     #include <omp.h>
64     #endif
65    
66     err_t Paso_Solver_GMRES(
67     Paso_SystemMatrix * A,
68     double * r,
69     double * x,
70     dim_t *iter,
71 gross 584 double * tolerance,dim_t Length_of_recursion,dim_t restart,
72     Paso_Performance* pp) {
73 jgs 150
74     /* Local variables */
75    
76 jgs 154 double *AP,*X_PRES[MAX(Length_of_recursion,0)+1],*R_PRES[MAX(Length_of_recursion,0)+1],*P_PRES[MAX(Length_of_recursion,0)+1];
77     double P_PRES_dot_AP[MAX(Length_of_recursion,0)],R_PRES_dot_P_PRES[MAX(Length_of_recursion,0)+1],BREAKF[MAX(Length_of_recursion,0)+1],ALPHA[MAX(Length_of_recursion,0)];
78     double P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6,R_PRES_dot_P,abs_RP,breakf0;
79     double tol,Factor,sum_BREAKF,gamma,SC1,SC2,norm_of_residual,diff,L2_R,Norm_of_residual_global;
80     double *save_XPRES, *save_P_PRES, *save_R_PRES,save_R_PRES_dot_P_PRES;
81     dim_t maxit,Num_iter_global,num_iter_restart,num_iter;
82     dim_t i,z,order;
83 jgs 150 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE,restartFlag=FALSE;
84 jgs 154 err_t Status=SOLVER_NO_ERROR;
85 jgs 150
86     /* adapt original routine parameters */
87    
88 jgs 154 dim_t n=A->num_cols * A-> col_block_size;
89     dim_t Length_of_mem=MAX(Length_of_recursion,0)+1;
90 jgs 150
91     /* Test the input parameters. */
92 jgs 154 if (restart>0) restart=MAX(Length_of_recursion,restart);
93     if (n < 0 || Length_of_recursion<=0) {
94 jgs 150 return SOLVER_INPUT_ERROR;
95     }
96    
97     /* allocate memory: */
98    
99 jgs 154 AP=TMPMEMALLOC(n,double);
100     if (AP==NULL) Status=SOLVER_MEMORY_ERROR;
101     for (i=0;i<Length_of_mem;i++) {
102     X_PRES[i]=TMPMEMALLOC(n,double);
103     R_PRES[i]=TMPMEMALLOC(n,double);
104     P_PRES[i]=TMPMEMALLOC(n,double);
105     if (X_PRES[i]==NULL || R_PRES[i]==NULL || P_PRES[i]==NULL) Status=SOLVER_MEMORY_ERROR;
106 jgs 150 }
107 jgs 154 if ( Status ==SOLVER_NO_ERROR ) {
108 jgs 150
109     /* now PRES starts : */
110     maxit=*iter;
111     tol=*tolerance;
112    
113 jgs 154 #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \
114     private(num_iter,i,num_iter_restart,order,sum_BREAKF,gamma,restartFlag,norm_of_residual,abs_RP,breakf0,\
115     save_XPRES,save_P_PRES,save_R_PRES,save_R_PRES_dot_P_PRES)
116 jgs 150 {
117     /* initialization */
118    
119     restartFlag=TRUE;
120     num_iter=0;
121     #pragma omp for private(z) schedule(static) nowait
122 jgs 154 for (z=0; z < n; ++z) AP[z]=0;
123     for(i=0;i<Length_of_mem;++i) {
124 jgs 150 #pragma omp for private(z) schedule(static) nowait
125 jgs 154 for (z=0; z < n; ++z) {
126     P_PRES[i][z]=0;
127     R_PRES[i][z]=0;
128     X_PRES[i][z]=0;
129     }
130 jgs 150 }
131    
132 jgs 154 while (! (convergeFlag || maxIterFlag || breakFlag)) {
133 jgs 150 #pragma omp barrier
134     if (restartFlag) {
135 jgs 154 #pragma omp master
136     BREAKF[0]=ONE;
137 jgs 150 #pragma omp for private(z) schedule(static) nowait
138 jgs 154 for (z=0; z < n; ++z) {
139     R_PRES[0][z]=r[z];
140     X_PRES[0][z]=x[z];
141     }
142 jgs 150 num_iter_restart=0;
143     restartFlag=FALSE;
144     }
145     ++num_iter;
146     ++num_iter_restart;
147 jgs 154 /* order is the dimension of the space on which the residual is minimized: */
148     order=MIN(num_iter_restart,Length_of_recursion);
149 jgs 150 /***
150 jgs 154 *** calculate new search direction P from R_PRES
151 jgs 150 ***/
152 gross 432 #pragma omp barrier
153 jgs 154 Paso_Solver_solvePreconditioner(A,&P_PRES[0][0], &R_PRES[0][0]);
154 jgs 150 /***
155     *** apply A to P to get AP
156     ***/
157 gross 432 #pragma omp barrier
158 gross 415 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &P_PRES[0][0],ZERO, &AP[0]);
159 jgs 150 /***
160     ***** calculation of the norm of R and the scalar products of
161     *** the residuals and A*P:
162     ***/
163 jgs 154 if (order==0) {
164 jgs 150 #pragma omp master
165 jgs 154 R_PRES_dot_P=ZERO;
166 jgs 150 #pragma omp barrier
167 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P) schedule(static)
168     for (z=0;z<n;++z)
169     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
170 jgs 150 #pragma omp master
171 jgs 154 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
172     } else if (order==1) {
173     #pragma omp master
174 jgs 150 {
175 jgs 154 R_PRES_dot_P=ZERO;
176     P_PRES_dot_AP0=ZERO;
177 jgs 150 }
178     #pragma omp barrier
179 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0) schedule(static)
180     for (z=0;z<n;++z) {
181     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
182     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
183 jgs 150 }
184     #pragma omp master
185     {
186 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
187     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
188 jgs 150 }
189 jgs 154 } else if (order==2) {
190 jgs 150 #pragma omp master
191     {
192 jgs 154 R_PRES_dot_P=ZERO;
193     P_PRES_dot_AP0=ZERO;
194     P_PRES_dot_AP1=ZERO;
195 jgs 150 }
196     #pragma omp barrier
197 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1) schedule(static)
198     for (z=0;z<n;++z) {
199     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
200     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
201     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
202 jgs 150 }
203     #pragma omp master
204     {
205 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
206     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
207     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
208 jgs 150 }
209 jgs 154 } else if (order==3) {
210 jgs 150 #pragma omp master
211     {
212 jgs 154 R_PRES_dot_P=ZERO;
213     P_PRES_dot_AP0=ZERO;
214     P_PRES_dot_AP1=ZERO;
215     P_PRES_dot_AP2=ZERO;
216 jgs 150 }
217     #pragma omp barrier
218 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2) schedule(static)
219     for (z=0;z<n;++z) {
220     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
221     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
222     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
223     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
224 jgs 150 }
225     #pragma omp master
226     {
227 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
228     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
229     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
230     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
231 jgs 150 }
232 jgs 154 } else if (order==4) {
233 jgs 150 #pragma omp master
234     {
235 jgs 154 R_PRES_dot_P=ZERO;
236     P_PRES_dot_AP0=ZERO;
237     P_PRES_dot_AP1=ZERO;
238     P_PRES_dot_AP2=ZERO;
239     P_PRES_dot_AP3=ZERO;
240 jgs 150 }
241     #pragma omp barrier
242 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3) schedule(static)
243     for (z=0;z<n;++z) {
244     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
245     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
246     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
247     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
248     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
249 jgs 150 }
250     #pragma omp master
251     {
252 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
253     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
254     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
255     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
256     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
257 jgs 150 }
258 jgs 154 } else if (order==5) {
259 jgs 150 #pragma omp master
260     {
261 jgs 154 R_PRES_dot_P=ZERO;
262     P_PRES_dot_AP0=ZERO;
263     P_PRES_dot_AP1=ZERO;
264     P_PRES_dot_AP2=ZERO;
265     P_PRES_dot_AP3=ZERO;
266     P_PRES_dot_AP4=ZERO;
267 jgs 150 }
268     #pragma omp barrier
269 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4) schedule(static)
270     for (z=0;z<n;++z) {
271     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
272     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
273     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
274     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
275     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
276     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
277     }
278 jgs 150 #pragma omp master
279     {
280 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
281     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
282     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
283     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
284     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
285     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
286 jgs 150 }
287 jgs 154 } else if (order==6) {
288 jgs 150 #pragma omp master
289     {
290 jgs 154 R_PRES_dot_P=ZERO;
291     P_PRES_dot_AP0=ZERO;
292     P_PRES_dot_AP1=ZERO;
293     P_PRES_dot_AP2=ZERO;
294     P_PRES_dot_AP3=ZERO;
295     P_PRES_dot_AP4=ZERO;
296     P_PRES_dot_AP5=ZERO;
297 jgs 150 }
298     #pragma omp barrier
299 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5) schedule(static)
300     for (z=0;z<n;++z) {
301     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
302     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
303     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
304     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
305     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
306     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
307     P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
308 jgs 150 }
309     #pragma omp master
310     {
311 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
312     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
313     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
314     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
315     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
316     P_PRES_dot_AP[5]=P_PRES_dot_AP5;
317     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
318 jgs 150 }
319 jgs 154 } else if (order>6) {
320 jgs 150 #pragma omp master
321     {
322 jgs 154 R_PRES_dot_P=ZERO;
323     P_PRES_dot_AP0=ZERO;
324     P_PRES_dot_AP1=ZERO;
325     P_PRES_dot_AP2=ZERO;
326     P_PRES_dot_AP3=ZERO;
327     P_PRES_dot_AP4=ZERO;
328     P_PRES_dot_AP5=ZERO;
329     P_PRES_dot_AP6=ZERO;
330 jgs 150 }
331     #pragma omp barrier
332 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6) schedule(static)
333     for (z=0;z<n;++z) {
334     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
335     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
336     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
337     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
338     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
339     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
340     P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
341     P_PRES_dot_AP6+=P_PRES[6][z]*AP[z];
342     }
343     #pragma omp master
344     {
345     P_PRES_dot_AP[0]=P_PRES_dot_AP0;
346     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
347     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
348     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
349     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
350     P_PRES_dot_AP[5]=P_PRES_dot_AP5;
351     P_PRES_dot_AP[6]=P_PRES_dot_AP6;
352     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
353    
354     P_PRES_dot_AP0=ZERO;
355 jgs 150 }
356 jgs 154 for (i=7;i<order;++i) {
357     #pragma omp barrier
358     #pragma omp for private(z) reduction(+:P_PRES_dot_AP0) schedule(static)
359     for (z=0;z<n;++z) P_PRES_dot_AP0+=P_PRES[i][z]*AP[z];
360 jgs 150 #pragma omp master
361     {
362 jgs 154 P_PRES_dot_AP[i]=P_PRES_dot_AP0;
363     P_PRES_dot_AP0=ZERO;
364 jgs 150 }
365     }
366     }
367 jgs 154 /* this fixes a problem with the intel compiler */
368     #pragma omp master
369     P_PRES_dot_AP0=R_PRES_dot_P_PRES[0];
370     /*** if sum_BREAKF is equal to zero a breakdown occurs.
371     *** iteration procedure can be continued but R_PRES is not the
372     *** residual of X_PRES approximation.
373     ***/
374 gross 432 #pragma omp barrier
375 jgs 154 sum_BREAKF=0.;
376     for (i=0;i<order;++i) sum_BREAKF +=BREAKF[i];
377     breakFlag=!((ABS(P_PRES_dot_AP0) > ZERO) && (sum_BREAKF >ZERO));
378     if (!breakFlag) {
379     breakFlag=FALSE;
380     /***
381     ***** X_PRES and R_PRES are moved to memory:
382     ***/
383     #pragma omp master
384 jgs 150 {
385 jgs 154 Factor=0.;
386     for (i=0;i<order;++i) {
387     ALPHA[i]=-P_PRES_dot_AP[i]/R_PRES_dot_P_PRES[i];
388     Factor+=BREAKF[i]*ALPHA[i];
389     }
390    
391     save_R_PRES_dot_P_PRES=R_PRES_dot_P_PRES[Length_of_mem-1];
392     save_R_PRES=R_PRES[Length_of_mem-1];
393     save_XPRES=X_PRES[Length_of_mem-1];
394     save_P_PRES=P_PRES[Length_of_mem-1];
395     for (i=Length_of_mem-1;i>0;--i) {
396     BREAKF[i]=BREAKF[i-1];
397     R_PRES_dot_P_PRES[i]=R_PRES_dot_P_PRES[i-1];
398     R_PRES[i]=R_PRES[i-1];
399     X_PRES[i]=X_PRES[i-1];
400     P_PRES[i]=P_PRES[i-1];
401     }
402     R_PRES_dot_P_PRES[0]=save_R_PRES_dot_P_PRES;
403     R_PRES[0]=save_R_PRES;
404     X_PRES[0]=save_XPRES;
405     P_PRES[0]=save_P_PRES;
406    
407     if (ABS(Factor)<=ZERO) {
408     Factor=1.;
409     BREAKF[0]=ZERO;
410     } else {
411     Factor=1./Factor;
412     BREAKF[0]=ONE;
413     }
414     for (i=0;i<order;++i) ALPHA[i]*=Factor;
415 jgs 150 }
416     /*
417 jgs 154 ***** update of solution X_PRES and its residual R_PRES:
418 jgs 150 ***
419     *** P is used to accumulate X and AP to accumulate R. X and R
420     *** are still needed until they are put into the X and R memory
421 jgs 154 *** R_PRES and X_PRES
422 jgs 150 ***
423     **/
424     #pragma omp barrier
425 jgs 154 breakf0=BREAKF[0];
426     if (order==0) {
427 jgs 150 #pragma omp for private(z) schedule(static)
428 jgs 154 for (z=0;z<n;++z) {
429     R_PRES[0][z]= Factor* AP[z];
430     X_PRES[0][z]=-Factor*P_PRES[1][z];
431     }
432     } else if (order==1) {
433 jgs 150 #pragma omp for private(z) schedule(static)
434 jgs 154 for (z=0;z<n;++z) {
435     R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z];
436     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z];
437     }
438     } else if (order==2) {
439 jgs 150 #pragma omp for private(z) schedule(static)
440 jgs 154 for (z=0;z<n;++z) {
441     R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z]
442     +ALPHA[1]*R_PRES[2][z];
443     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
444     +ALPHA[1]*X_PRES[2][z];
445     }
446     } else if (order==3) {
447 jgs 150 #pragma omp for private(z) schedule(static)
448 jgs 154 for (z=0;z<n;++z) {
449     R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
450     +ALPHA[1]*R_PRES[2][z]
451     +ALPHA[2]*R_PRES[3][z];
452     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
453     +ALPHA[1]*X_PRES[2][z]
454     +ALPHA[2]*X_PRES[3][z];
455     }
456     } else if (order==4) {
457 jgs 150 #pragma omp for private(z) schedule(static)
458 jgs 154 for (z=0;z<n;++z) {
459     R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
460     +ALPHA[1]*R_PRES[2][z]
461     +ALPHA[2]*R_PRES[3][z]
462     +ALPHA[3]*R_PRES[4][z];
463     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
464     +ALPHA[1]*X_PRES[2][z]
465     +ALPHA[2]*X_PRES[3][z]
466     +ALPHA[3]*X_PRES[4][z];
467     }
468     } else if (order==5) {
469 jgs 150 #pragma omp for private(z) schedule(static)
470 jgs 154 for (z=0;z<n;++z) {
471     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
472     +ALPHA[1]*R_PRES[2][z]
473     +ALPHA[2]*R_PRES[3][z]
474     +ALPHA[3]*R_PRES[4][z]
475     +ALPHA[4]*R_PRES[5][z];
476     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
477     +ALPHA[1]*X_PRES[2][z]
478     +ALPHA[2]*X_PRES[3][z]
479     +ALPHA[3]*X_PRES[4][z]
480     +ALPHA[4]*X_PRES[5][z];
481     }
482     } else if (order==6) {
483 jgs 150 #pragma omp for private(z) schedule(static)
484 jgs 154 for (z=0;z<n;++z) {
485     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
486     +ALPHA[1]*R_PRES[2][z]
487     +ALPHA[2]*R_PRES[3][z]
488     +ALPHA[3]*R_PRES[4][z]
489     +ALPHA[4]*R_PRES[5][z]
490     +ALPHA[5]*R_PRES[6][z];
491     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
492     +ALPHA[1]*X_PRES[2][z]
493     +ALPHA[2]*X_PRES[3][z]
494     +ALPHA[3]*X_PRES[4][z]
495     +ALPHA[4]*X_PRES[5][z]
496     +ALPHA[5]*X_PRES[6][z];
497     }
498     } else if (order>6) {
499 jgs 150 #pragma omp for private(z) schedule(static)
500 jgs 154 for (z=0;z<n;++z) {
501     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
502     +ALPHA[1]*R_PRES[2][z]
503     +ALPHA[2]*R_PRES[3][z]
504     +ALPHA[3]*R_PRES[4][z]
505     +ALPHA[4]*R_PRES[5][z]
506     +ALPHA[5]*R_PRES[6][z]
507     +ALPHA[6]*R_PRES[7][z];
508     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
509     +ALPHA[1]*X_PRES[2][z]
510     +ALPHA[2]*X_PRES[3][z]
511     +ALPHA[3]*X_PRES[4][z]
512     +ALPHA[4]*X_PRES[5][z]
513     +ALPHA[5]*X_PRES[6][z]
514     +ALPHA[6]*X_PRES[7][z];
515     }
516     for (i=7;i<order;++i) {
517 jgs 150 #pragma omp for private(z) schedule(static)
518 jgs 154 for (z=0;z<n;++z) {
519     R_PRES[0][z]+=ALPHA[i]*R_PRES[i+1][z];
520     X_PRES[0][z]+=ALPHA[i]*X_PRES[i+1][z];
521     }
522 jgs 150 }
523     }
524 jgs 154 if (breakf0>0.) {
525 jgs 150 /***
526 jgs 154 ***** calculate gamma from min_(gamma){|R+gamma*(R_PRES-R)|_2}:
527 jgs 150 ***/
528     #pragma omp master
529     {
530     SC1=ZERO;
531     SC2=ZERO;
532     L2_R=ZERO;
533     }
534     #pragma omp barrier
535     #pragma omp for private(z,diff) reduction(+:SC1,SC2) schedule(static)
536 jgs 154 for (z=0;z<n;++z) {
537     diff=R_PRES[0][z]-r[z];
538 jgs 150 SC1+=diff*diff;
539     SC2+=diff*r[z];
540     }
541 jgs 154 gamma=(SC1<=ZERO) ? ZERO : -SC2/SC1;
542 jgs 150 #pragma omp for private(z) reduction(+:L2_R) schedule(static)
543 jgs 154 for (z=0;z<n;++z) {
544     x[z]+=gamma*(X_PRES[0][z]-x[z]);
545     r[z]+=gamma*(R_PRES[0][z]-r[z]);
546 jgs 150 L2_R+=r[z]*r[z];
547     }
548     norm_of_residual=sqrt(L2_R);
549     convergeFlag = (norm_of_residual <= tol);
550 jgs 154 if (restart>0) restartFlag=(num_iter_restart >= restart);
551     } else {
552     convergeFlag=FALSE;
553     restartFlag=FALSE;
554 jgs 150 }
555 jgs 154 maxIterFlag = (num_iter >= maxit);
556     }
557     }
558     /* end of iteration */
559     #pragma omp master
560     {
561     Norm_of_residual_global=norm_of_residual;
562     Num_iter_global=num_iter;
563 jgs 150 if (maxIterFlag) {
564 jgs 154 Status = SOLVER_MAXITER_REACHED;
565 jgs 150 } else if (breakFlag) {
566 jgs 154 Status = SOLVER_BREAKDOWN;
567     }
568 jgs 150 }
569     } /* end of parallel region */
570     TMPMEMFREE(AP);
571 jgs 154 for (i=0;i<Length_of_recursion;i++) {
572     TMPMEMFREE(X_PRES[i]);
573     TMPMEMFREE(R_PRES[i]);
574     TMPMEMFREE(P_PRES[i]);
575 jgs 150 }
576 jgs 154 *iter=Num_iter_global;
577     *tolerance=Norm_of_residual_global;
578 jgs 150 }
579 jgs 154 return Status;
580 jgs 150 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26