/[escript]/trunk/paso/src/GMRES.c
ViewVC logotype

Annotation of /trunk/paso/src/GMRES.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 633 - (hide annotations)
Thu Mar 23 05:37:00 2006 UTC (13 years, 11 months ago) by dhawcroft
Original Path: trunk/paso/src/Solvers/GMRES.c
File MIME type: text/plain
File size: 23862 byte(s)


1 jgs 150 /* $Id$ */
2    
3 dhawcroft 631
4 jgs 150 /*
5 dhawcroft 631 ********************************************************************************
6 dhawcroft 633 * Copyright 2006 by ACcESS MNRF *
7 dhawcroft 631 * *
8     * http://www.access.edu.au *
9     * Primary Business: Queensland, Australia *
10     * Licensed under the Open Software License version 3.0 *
11     * http://www.opensource.org/licenses/osl-3.0.php *
12     ********************************************************************************
13     */
14    
15     /*
16 jgs 150 * Purpose
17     * =======
18     *
19     * GMRES solves the linear system A*x=b using the
20     * truncated and restered GMRES method with preconditioning.
21     *
22     * Convergence test: norm( b - A*x )< TOL.
23     *
24     *
25     *
26     * Arguments
27     * =========
28     *
29 jgs 154 * r (input/output) double array, dimension n.
30 jgs 150 * On entry, residual of inital guess X
31     *
32 jgs 154 * x (input/output) double array, dimension n.
33 jgs 150 * On input, the initial guess.
34     *
35     * iter (input/output) int
36     * On input, the maximum num_iterations to be performed.
37     * On output, actual number of num_iterations performed.
38     *
39     * tolerance (input/output) DOUBLE PRECISION
40     * On input, the allowable convergence measure for
41     * norm( b - A*x )
42     * On output, the final value of this measure.
43     *
44 jgs 154 * Length_of_recursion (input) gives the number of residual to be kept in orthogonalization process
45 jgs 150 *
46     * restart (input) If restart>0, iteration is resterted a after restart steps.
47     *
48     * INFO (output) int
49     *
50     * =SOLVER_NO_ERROR: Successful exit. num_iterated approximate solution returned.
51 jgs 154 * =SOLVER_MAXNUM_ITER_REACHED
52 jgs 150 * =SOLVER_INPUT_ERROR Illegal parameter:
53 jgs 154 * =SOLVER_BREAKDOWN: bad luck!
54     * =SOLVER_MEMORY_ERROR : no memory available
55 jgs 150 *
56     * ==============================================================
57     */
58    
59     #include "Common.h"
60     #include "SystemMatrix.h"
61     #include "Solver.h"
62     #ifdef _OPENMP
63     #include <omp.h>
64     #endif
65    
66     err_t Paso_Solver_GMRES(
67     Paso_SystemMatrix * A,
68     double * r,
69     double * x,
70     dim_t *iter,
71 gross 584 double * tolerance,dim_t Length_of_recursion,dim_t restart,
72     Paso_Performance* pp) {
73 jgs 150
74     /* Local variables */
75    
76 jgs 154 double *AP,*X_PRES[MAX(Length_of_recursion,0)+1],*R_PRES[MAX(Length_of_recursion,0)+1],*P_PRES[MAX(Length_of_recursion,0)+1];
77     double P_PRES_dot_AP[MAX(Length_of_recursion,0)],R_PRES_dot_P_PRES[MAX(Length_of_recursion,0)+1],BREAKF[MAX(Length_of_recursion,0)+1],ALPHA[MAX(Length_of_recursion,0)];
78     double P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6,R_PRES_dot_P,abs_RP,breakf0;
79     double tol,Factor,sum_BREAKF,gamma,SC1,SC2,norm_of_residual,diff,L2_R,Norm_of_residual_global;
80     double *save_XPRES, *save_P_PRES, *save_R_PRES,save_R_PRES_dot_P_PRES;
81     dim_t maxit,Num_iter_global,num_iter_restart,num_iter;
82     dim_t i,z,order;
83 jgs 150 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE,restartFlag=FALSE;
84 jgs 154 err_t Status=SOLVER_NO_ERROR;
85 jgs 150
86     /* adapt original routine parameters */
87    
88 jgs 154 dim_t n=A->num_cols * A-> col_block_size;
89     dim_t Length_of_mem=MAX(Length_of_recursion,0)+1;
90 jgs 150
91     /* Test the input parameters. */
92 jgs 154 if (restart>0) restart=MAX(Length_of_recursion,restart);
93     if (n < 0 || Length_of_recursion<=0) {
94 jgs 150 return SOLVER_INPUT_ERROR;
95     }
96    
97     /* allocate memory: */
98    
99 jgs 154 AP=TMPMEMALLOC(n,double);
100     if (AP==NULL) Status=SOLVER_MEMORY_ERROR;
101     for (i=0;i<Length_of_mem;i++) {
102     X_PRES[i]=TMPMEMALLOC(n,double);
103     R_PRES[i]=TMPMEMALLOC(n,double);
104     P_PRES[i]=TMPMEMALLOC(n,double);
105     if (X_PRES[i]==NULL || R_PRES[i]==NULL || P_PRES[i]==NULL) Status=SOLVER_MEMORY_ERROR;
106 jgs 150 }
107 jgs 154 if ( Status ==SOLVER_NO_ERROR ) {
108 jgs 150
109     /* now PRES starts : */
110     maxit=*iter;
111     tol=*tolerance;
112    
113 jgs 154 #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \
114     private(num_iter,i,num_iter_restart,order,sum_BREAKF,gamma,restartFlag,norm_of_residual,abs_RP,breakf0,\
115     save_XPRES,save_P_PRES,save_R_PRES,save_R_PRES_dot_P_PRES)
116 jgs 150 {
117     /* initialization */
118    
119     restartFlag=TRUE;
120     num_iter=0;
121     #pragma omp for private(z) schedule(static) nowait
122 jgs 154 for (z=0; z < n; ++z) AP[z]=0;
123     for(i=0;i<Length_of_mem;++i) {
124 jgs 150 #pragma omp for private(z) schedule(static) nowait
125 jgs 154 for (z=0; z < n; ++z) {
126     P_PRES[i][z]=0;
127     R_PRES[i][z]=0;
128     X_PRES[i][z]=0;
129     }
130 jgs 150 }
131    
132 jgs 154 while (! (convergeFlag || maxIterFlag || breakFlag)) {
133 jgs 150 #pragma omp barrier
134     if (restartFlag) {
135 jgs 154 #pragma omp master
136     BREAKF[0]=ONE;
137 jgs 150 #pragma omp for private(z) schedule(static) nowait
138 jgs 154 for (z=0; z < n; ++z) {
139     R_PRES[0][z]=r[z];
140     X_PRES[0][z]=x[z];
141     }
142 jgs 150 num_iter_restart=0;
143     restartFlag=FALSE;
144     }
145     ++num_iter;
146     ++num_iter_restart;
147 jgs 154 /* order is the dimension of the space on which the residual is minimized: */
148     order=MIN(num_iter_restart,Length_of_recursion);
149 jgs 150 /***
150 jgs 154 *** calculate new search direction P from R_PRES
151 jgs 150 ***/
152 gross 432 #pragma omp barrier
153 jgs 154 Paso_Solver_solvePreconditioner(A,&P_PRES[0][0], &R_PRES[0][0]);
154 jgs 150 /***
155     *** apply A to P to get AP
156     ***/
157 gross 432 #pragma omp barrier
158 gross 415 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &P_PRES[0][0],ZERO, &AP[0]);
159 jgs 150 /***
160     ***** calculation of the norm of R and the scalar products of
161     *** the residuals and A*P:
162     ***/
163 jgs 154 if (order==0) {
164 jgs 150 #pragma omp master
165 jgs 154 R_PRES_dot_P=ZERO;
166 jgs 150 #pragma omp barrier
167 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P) schedule(static)
168     for (z=0;z<n;++z)
169     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
170 jgs 150 #pragma omp master
171 jgs 154 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
172     } else if (order==1) {
173     #pragma omp master
174 jgs 150 {
175 jgs 154 R_PRES_dot_P=ZERO;
176     P_PRES_dot_AP0=ZERO;
177 jgs 150 }
178     #pragma omp barrier
179 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0) schedule(static)
180     for (z=0;z<n;++z) {
181     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
182     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
183 jgs 150 }
184     #pragma omp master
185     {
186 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
187     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
188 jgs 150 }
189 jgs 154 } else if (order==2) {
190 jgs 150 #pragma omp master
191     {
192 jgs 154 R_PRES_dot_P=ZERO;
193     P_PRES_dot_AP0=ZERO;
194     P_PRES_dot_AP1=ZERO;
195 jgs 150 }
196     #pragma omp barrier
197 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1) schedule(static)
198     for (z=0;z<n;++z) {
199     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
200     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
201     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
202 jgs 150 }
203     #pragma omp master
204     {
205 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
206     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
207     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
208 jgs 150 }
209 jgs 154 } else if (order==3) {
210 jgs 150 #pragma omp master
211     {
212 jgs 154 R_PRES_dot_P=ZERO;
213     P_PRES_dot_AP0=ZERO;
214     P_PRES_dot_AP1=ZERO;
215     P_PRES_dot_AP2=ZERO;
216 jgs 150 }
217     #pragma omp barrier
218 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2) schedule(static)
219     for (z=0;z<n;++z) {
220     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
221     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
222     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
223     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
224 jgs 150 }
225     #pragma omp master
226     {
227 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
228     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
229     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
230     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
231 jgs 150 }
232 jgs 154 } else if (order==4) {
233 jgs 150 #pragma omp master
234     {
235 jgs 154 R_PRES_dot_P=ZERO;
236     P_PRES_dot_AP0=ZERO;
237     P_PRES_dot_AP1=ZERO;
238     P_PRES_dot_AP2=ZERO;
239     P_PRES_dot_AP3=ZERO;
240 jgs 150 }
241     #pragma omp barrier
242 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3) schedule(static)
243     for (z=0;z<n;++z) {
244     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
245     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
246     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
247     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
248     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
249 jgs 150 }
250     #pragma omp master
251     {
252 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
253     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
254     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
255     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
256     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
257 jgs 150 }
258 jgs 154 } else if (order==5) {
259 jgs 150 #pragma omp master
260     {
261 jgs 154 R_PRES_dot_P=ZERO;
262     P_PRES_dot_AP0=ZERO;
263     P_PRES_dot_AP1=ZERO;
264     P_PRES_dot_AP2=ZERO;
265     P_PRES_dot_AP3=ZERO;
266     P_PRES_dot_AP4=ZERO;
267 jgs 150 }
268     #pragma omp barrier
269 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4) schedule(static)
270     for (z=0;z<n;++z) {
271     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
272     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
273     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
274     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
275     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
276     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
277     }
278 jgs 150 #pragma omp master
279     {
280 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
281     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
282     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
283     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
284     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
285     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
286 jgs 150 }
287 jgs 154 } else if (order==6) {
288 jgs 150 #pragma omp master
289     {
290 jgs 154 R_PRES_dot_P=ZERO;
291     P_PRES_dot_AP0=ZERO;
292     P_PRES_dot_AP1=ZERO;
293     P_PRES_dot_AP2=ZERO;
294     P_PRES_dot_AP3=ZERO;
295     P_PRES_dot_AP4=ZERO;
296     P_PRES_dot_AP5=ZERO;
297 jgs 150 }
298     #pragma omp barrier
299 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5) schedule(static)
300     for (z=0;z<n;++z) {
301     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
302     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
303     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
304     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
305     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
306     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
307     P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
308 jgs 150 }
309     #pragma omp master
310     {
311 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
312     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
313     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
314     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
315     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
316     P_PRES_dot_AP[5]=P_PRES_dot_AP5;
317     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
318 jgs 150 }
319 jgs 154 } else if (order>6) {
320 jgs 150 #pragma omp master
321     {
322 jgs 154 R_PRES_dot_P=ZERO;
323     P_PRES_dot_AP0=ZERO;
324     P_PRES_dot_AP1=ZERO;
325     P_PRES_dot_AP2=ZERO;
326     P_PRES_dot_AP3=ZERO;
327     P_PRES_dot_AP4=ZERO;
328     P_PRES_dot_AP5=ZERO;
329     P_PRES_dot_AP6=ZERO;
330 jgs 150 }
331     #pragma omp barrier
332 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6) schedule(static)
333     for (z=0;z<n;++z) {
334     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
335     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
336     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
337     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
338     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
339     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
340     P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
341     P_PRES_dot_AP6+=P_PRES[6][z]*AP[z];
342     }
343     #pragma omp master
344     {
345     P_PRES_dot_AP[0]=P_PRES_dot_AP0;
346     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
347     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
348     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
349     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
350     P_PRES_dot_AP[5]=P_PRES_dot_AP5;
351     P_PRES_dot_AP[6]=P_PRES_dot_AP6;
352     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
353    
354     P_PRES_dot_AP0=ZERO;
355 jgs 150 }
356 jgs 154 for (i=7;i<order;++i) {
357     #pragma omp barrier
358     #pragma omp for private(z) reduction(+:P_PRES_dot_AP0) schedule(static)
359     for (z=0;z<n;++z) P_PRES_dot_AP0+=P_PRES[i][z]*AP[z];
360 jgs 150 #pragma omp master
361     {
362 jgs 154 P_PRES_dot_AP[i]=P_PRES_dot_AP0;
363     P_PRES_dot_AP0=ZERO;
364 jgs 150 }
365     }
366     }
367 jgs 154 /* this fixes a problem with the intel compiler */
368     #pragma omp master
369     P_PRES_dot_AP0=R_PRES_dot_P_PRES[0];
370     /*** if sum_BREAKF is equal to zero a breakdown occurs.
371     *** iteration procedure can be continued but R_PRES is not the
372     *** residual of X_PRES approximation.
373     ***/
374 gross 432 #pragma omp barrier
375 jgs 154 sum_BREAKF=0.;
376     for (i=0;i<order;++i) sum_BREAKF +=BREAKF[i];
377     breakFlag=!((ABS(P_PRES_dot_AP0) > ZERO) && (sum_BREAKF >ZERO));
378     if (!breakFlag) {
379     breakFlag=FALSE;
380     /***
381     ***** X_PRES and R_PRES are moved to memory:
382     ***/
383     #pragma omp master
384 jgs 150 {
385 jgs 154 Factor=0.;
386     for (i=0;i<order;++i) {
387     ALPHA[i]=-P_PRES_dot_AP[i]/R_PRES_dot_P_PRES[i];
388     Factor+=BREAKF[i]*ALPHA[i];
389     }
390    
391     save_R_PRES_dot_P_PRES=R_PRES_dot_P_PRES[Length_of_mem-1];
392     save_R_PRES=R_PRES[Length_of_mem-1];
393     save_XPRES=X_PRES[Length_of_mem-1];
394     save_P_PRES=P_PRES[Length_of_mem-1];
395     for (i=Length_of_mem-1;i>0;--i) {
396     BREAKF[i]=BREAKF[i-1];
397     R_PRES_dot_P_PRES[i]=R_PRES_dot_P_PRES[i-1];
398     R_PRES[i]=R_PRES[i-1];
399     X_PRES[i]=X_PRES[i-1];
400     P_PRES[i]=P_PRES[i-1];
401     }
402     R_PRES_dot_P_PRES[0]=save_R_PRES_dot_P_PRES;
403     R_PRES[0]=save_R_PRES;
404     X_PRES[0]=save_XPRES;
405     P_PRES[0]=save_P_PRES;
406    
407     if (ABS(Factor)<=ZERO) {
408     Factor=1.;
409     BREAKF[0]=ZERO;
410     } else {
411     Factor=1./Factor;
412     BREAKF[0]=ONE;
413     }
414     for (i=0;i<order;++i) ALPHA[i]*=Factor;
415 jgs 150 }
416     /*
417 jgs 154 ***** update of solution X_PRES and its residual R_PRES:
418 jgs 150 ***
419     *** P is used to accumulate X and AP to accumulate R. X and R
420     *** are still needed until they are put into the X and R memory
421 jgs 154 *** R_PRES and X_PRES
422 jgs 150 ***
423     **/
424     #pragma omp barrier
425 jgs 154 breakf0=BREAKF[0];
426     if (order==0) {
427 jgs 150 #pragma omp for private(z) schedule(static)
428 jgs 154 for (z=0;z<n;++z) {
429     R_PRES[0][z]= Factor* AP[z];
430     X_PRES[0][z]=-Factor*P_PRES[1][z];
431     }
432     } else if (order==1) {
433 jgs 150 #pragma omp for private(z) schedule(static)
434 jgs 154 for (z=0;z<n;++z) {
435     R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z];
436     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z];
437     }
438     } else if (order==2) {
439 jgs 150 #pragma omp for private(z) schedule(static)
440 jgs 154 for (z=0;z<n;++z) {
441     R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z]
442     +ALPHA[1]*R_PRES[2][z];
443     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
444     +ALPHA[1]*X_PRES[2][z];
445     }
446     } else if (order==3) {
447 jgs 150 #pragma omp for private(z) schedule(static)
448 jgs 154 for (z=0;z<n;++z) {
449     R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
450     +ALPHA[1]*R_PRES[2][z]
451     +ALPHA[2]*R_PRES[3][z];
452     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
453     +ALPHA[1]*X_PRES[2][z]
454     +ALPHA[2]*X_PRES[3][z];
455     }
456     } else if (order==4) {
457 jgs 150 #pragma omp for private(z) schedule(static)
458 jgs 154 for (z=0;z<n;++z) {
459     R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
460     +ALPHA[1]*R_PRES[2][z]
461     +ALPHA[2]*R_PRES[3][z]
462     +ALPHA[3]*R_PRES[4][z];
463     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
464     +ALPHA[1]*X_PRES[2][z]
465     +ALPHA[2]*X_PRES[3][z]
466     +ALPHA[3]*X_PRES[4][z];
467     }
468     } else if (order==5) {
469 jgs 150 #pragma omp for private(z) schedule(static)
470 jgs 154 for (z=0;z<n;++z) {
471     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
472     +ALPHA[1]*R_PRES[2][z]
473     +ALPHA[2]*R_PRES[3][z]
474     +ALPHA[3]*R_PRES[4][z]
475     +ALPHA[4]*R_PRES[5][z];
476     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
477     +ALPHA[1]*X_PRES[2][z]
478     +ALPHA[2]*X_PRES[3][z]
479     +ALPHA[3]*X_PRES[4][z]
480     +ALPHA[4]*X_PRES[5][z];
481     }
482     } else if (order==6) {
483 jgs 150 #pragma omp for private(z) schedule(static)
484 jgs 154 for (z=0;z<n;++z) {
485     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
486     +ALPHA[1]*R_PRES[2][z]
487     +ALPHA[2]*R_PRES[3][z]
488     +ALPHA[3]*R_PRES[4][z]
489     +ALPHA[4]*R_PRES[5][z]
490     +ALPHA[5]*R_PRES[6][z];
491     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
492     +ALPHA[1]*X_PRES[2][z]
493     +ALPHA[2]*X_PRES[3][z]
494     +ALPHA[3]*X_PRES[4][z]
495     +ALPHA[4]*X_PRES[5][z]
496     +ALPHA[5]*X_PRES[6][z];
497     }
498     } else if (order>6) {
499 jgs 150 #pragma omp for private(z) schedule(static)
500 jgs 154 for (z=0;z<n;++z) {
501     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
502     +ALPHA[1]*R_PRES[2][z]
503     +ALPHA[2]*R_PRES[3][z]
504     +ALPHA[3]*R_PRES[4][z]
505     +ALPHA[4]*R_PRES[5][z]
506     +ALPHA[5]*R_PRES[6][z]
507     +ALPHA[6]*R_PRES[7][z];
508     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
509     +ALPHA[1]*X_PRES[2][z]
510     +ALPHA[2]*X_PRES[3][z]
511     +ALPHA[3]*X_PRES[4][z]
512     +ALPHA[4]*X_PRES[5][z]
513     +ALPHA[5]*X_PRES[6][z]
514     +ALPHA[6]*X_PRES[7][z];
515     }
516     for (i=7;i<order;++i) {
517 jgs 150 #pragma omp for private(z) schedule(static)
518 jgs 154 for (z=0;z<n;++z) {
519     R_PRES[0][z]+=ALPHA[i]*R_PRES[i+1][z];
520     X_PRES[0][z]+=ALPHA[i]*X_PRES[i+1][z];
521     }
522 jgs 150 }
523     }
524 jgs 154 if (breakf0>0.) {
525 jgs 150 /***
526 jgs 154 ***** calculate gamma from min_(gamma){|R+gamma*(R_PRES-R)|_2}:
527 jgs 150 ***/
528     #pragma omp master
529     {
530     SC1=ZERO;
531     SC2=ZERO;
532     L2_R=ZERO;
533     }
534     #pragma omp barrier
535     #pragma omp for private(z,diff) reduction(+:SC1,SC2) schedule(static)
536 jgs 154 for (z=0;z<n;++z) {
537     diff=R_PRES[0][z]-r[z];
538 jgs 150 SC1+=diff*diff;
539     SC2+=diff*r[z];
540     }
541 jgs 154 gamma=(SC1<=ZERO) ? ZERO : -SC2/SC1;
542 jgs 150 #pragma omp for private(z) reduction(+:L2_R) schedule(static)
543 jgs 154 for (z=0;z<n;++z) {
544     x[z]+=gamma*(X_PRES[0][z]-x[z]);
545     r[z]+=gamma*(R_PRES[0][z]-r[z]);
546 jgs 150 L2_R+=r[z]*r[z];
547     }
548     norm_of_residual=sqrt(L2_R);
549     convergeFlag = (norm_of_residual <= tol);
550 jgs 154 if (restart>0) restartFlag=(num_iter_restart >= restart);
551     } else {
552     convergeFlag=FALSE;
553     restartFlag=FALSE;
554 jgs 150 }
555 jgs 154 maxIterFlag = (num_iter >= maxit);
556     }
557     }
558     /* end of iteration */
559     #pragma omp master
560     {
561     Norm_of_residual_global=norm_of_residual;
562     Num_iter_global=num_iter;
563 jgs 150 if (maxIterFlag) {
564 jgs 154 Status = SOLVER_MAXITER_REACHED;
565 jgs 150 } else if (breakFlag) {
566 jgs 154 Status = SOLVER_BREAKDOWN;
567     }
568 jgs 150 }
569     } /* end of parallel region */
570     TMPMEMFREE(AP);
571 jgs 154 for (i=0;i<Length_of_recursion;i++) {
572     TMPMEMFREE(X_PRES[i]);
573     TMPMEMFREE(R_PRES[i]);
574     TMPMEMFREE(P_PRES[i]);
575 jgs 150 }
576 jgs 154 *iter=Num_iter_global;
577     *tolerance=Norm_of_residual_global;
578 jgs 150 }
579 jgs 154 return Status;
580 jgs 150 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26