/[escript]/trunk/paso/src/GMRES.c
ViewVC logotype

Annotation of /trunk/paso/src/GMRES.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 584 - (hide annotations)
Thu Mar 9 23:03:38 2006 UTC (13 years, 11 months ago) by gross
Original Path: trunk/paso/src/Solvers/GMRES.c
File MIME type: text/plain
File size: 23216 byte(s)
eigenvalues: compiles and passes tests on altix now
1 jgs 150 /* $Id$ */
2    
3     /*
4     * Purpose
5     * =======
6     *
7     * GMRES solves the linear system A*x=b using the
8     * truncated and restered GMRES method with preconditioning.
9     *
10     * Convergence test: norm( b - A*x )< TOL.
11     *
12     *
13     *
14     * Arguments
15     * =========
16     *
17 jgs 154 * r (input/output) double array, dimension n.
18 jgs 150 * On entry, residual of inital guess X
19     *
20 jgs 154 * x (input/output) double array, dimension n.
21 jgs 150 * On input, the initial guess.
22     *
23     * iter (input/output) int
24     * On input, the maximum num_iterations to be performed.
25     * On output, actual number of num_iterations performed.
26     *
27     * tolerance (input/output) DOUBLE PRECISION
28     * On input, the allowable convergence measure for
29     * norm( b - A*x )
30     * On output, the final value of this measure.
31     *
32 jgs 154 * Length_of_recursion (input) gives the number of residual to be kept in orthogonalization process
33 jgs 150 *
34     * restart (input) If restart>0, iteration is resterted a after restart steps.
35     *
36     * INFO (output) int
37     *
38     * =SOLVER_NO_ERROR: Successful exit. num_iterated approximate solution returned.
39 jgs 154 * =SOLVER_MAXNUM_ITER_REACHED
40 jgs 150 * =SOLVER_INPUT_ERROR Illegal parameter:
41 jgs 154 * =SOLVER_BREAKDOWN: bad luck!
42     * =SOLVER_MEMORY_ERROR : no memory available
43 jgs 150 *
44     * ==============================================================
45     */
46    
47     #include "Common.h"
48     #include "SystemMatrix.h"
49     #include "Solver.h"
50     #ifdef _OPENMP
51     #include <omp.h>
52     #endif
53    
54     err_t Paso_Solver_GMRES(
55     Paso_SystemMatrix * A,
56     double * r,
57     double * x,
58     dim_t *iter,
59 gross 584 double * tolerance,dim_t Length_of_recursion,dim_t restart,
60     Paso_Performance* pp) {
61 jgs 150
62     /* Local variables */
63    
64 jgs 154 double *AP,*X_PRES[MAX(Length_of_recursion,0)+1],*R_PRES[MAX(Length_of_recursion,0)+1],*P_PRES[MAX(Length_of_recursion,0)+1];
65     double P_PRES_dot_AP[MAX(Length_of_recursion,0)],R_PRES_dot_P_PRES[MAX(Length_of_recursion,0)+1],BREAKF[MAX(Length_of_recursion,0)+1],ALPHA[MAX(Length_of_recursion,0)];
66     double P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6,R_PRES_dot_P,abs_RP,breakf0;
67     double tol,Factor,sum_BREAKF,gamma,SC1,SC2,norm_of_residual,diff,L2_R,Norm_of_residual_global;
68     double *save_XPRES, *save_P_PRES, *save_R_PRES,save_R_PRES_dot_P_PRES;
69     dim_t maxit,Num_iter_global,num_iter_restart,num_iter;
70     dim_t i,z,order;
71 jgs 150 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE,restartFlag=FALSE;
72 jgs 154 err_t Status=SOLVER_NO_ERROR;
73 jgs 150
74     /* adapt original routine parameters */
75    
76 jgs 154 dim_t n=A->num_cols * A-> col_block_size;
77     dim_t Length_of_mem=MAX(Length_of_recursion,0)+1;
78 jgs 150
79     /* Test the input parameters. */
80 jgs 154 if (restart>0) restart=MAX(Length_of_recursion,restart);
81     if (n < 0 || Length_of_recursion<=0) {
82 jgs 150 return SOLVER_INPUT_ERROR;
83     }
84    
85     /* allocate memory: */
86    
87 jgs 154 AP=TMPMEMALLOC(n,double);
88     if (AP==NULL) Status=SOLVER_MEMORY_ERROR;
89     for (i=0;i<Length_of_mem;i++) {
90     X_PRES[i]=TMPMEMALLOC(n,double);
91     R_PRES[i]=TMPMEMALLOC(n,double);
92     P_PRES[i]=TMPMEMALLOC(n,double);
93     if (X_PRES[i]==NULL || R_PRES[i]==NULL || P_PRES[i]==NULL) Status=SOLVER_MEMORY_ERROR;
94 jgs 150 }
95 jgs 154 if ( Status ==SOLVER_NO_ERROR ) {
96 jgs 150
97     /* now PRES starts : */
98     maxit=*iter;
99     tol=*tolerance;
100    
101 jgs 154 #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \
102     private(num_iter,i,num_iter_restart,order,sum_BREAKF,gamma,restartFlag,norm_of_residual,abs_RP,breakf0,\
103     save_XPRES,save_P_PRES,save_R_PRES,save_R_PRES_dot_P_PRES)
104 jgs 150 {
105     /* initialization */
106    
107     restartFlag=TRUE;
108     num_iter=0;
109     #pragma omp for private(z) schedule(static) nowait
110 jgs 154 for (z=0; z < n; ++z) AP[z]=0;
111     for(i=0;i<Length_of_mem;++i) {
112 jgs 150 #pragma omp for private(z) schedule(static) nowait
113 jgs 154 for (z=0; z < n; ++z) {
114     P_PRES[i][z]=0;
115     R_PRES[i][z]=0;
116     X_PRES[i][z]=0;
117     }
118 jgs 150 }
119    
120 jgs 154 while (! (convergeFlag || maxIterFlag || breakFlag)) {
121 jgs 150 #pragma omp barrier
122     if (restartFlag) {
123 jgs 154 #pragma omp master
124     BREAKF[0]=ONE;
125 jgs 150 #pragma omp for private(z) schedule(static) nowait
126 jgs 154 for (z=0; z < n; ++z) {
127     R_PRES[0][z]=r[z];
128     X_PRES[0][z]=x[z];
129     }
130 jgs 150 num_iter_restart=0;
131     restartFlag=FALSE;
132     }
133     ++num_iter;
134     ++num_iter_restart;
135 jgs 154 /* order is the dimension of the space on which the residual is minimized: */
136     order=MIN(num_iter_restart,Length_of_recursion);
137 jgs 150 /***
138 jgs 154 *** calculate new search direction P from R_PRES
139 jgs 150 ***/
140 gross 432 #pragma omp barrier
141 jgs 154 Paso_Solver_solvePreconditioner(A,&P_PRES[0][0], &R_PRES[0][0]);
142 jgs 150 /***
143     *** apply A to P to get AP
144     ***/
145 gross 432 #pragma omp barrier
146 gross 415 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &P_PRES[0][0],ZERO, &AP[0]);
147 jgs 150 /***
148     ***** calculation of the norm of R and the scalar products of
149     *** the residuals and A*P:
150     ***/
151 jgs 154 if (order==0) {
152 jgs 150 #pragma omp master
153 jgs 154 R_PRES_dot_P=ZERO;
154 jgs 150 #pragma omp barrier
155 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P) schedule(static)
156     for (z=0;z<n;++z)
157     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
158 jgs 150 #pragma omp master
159 jgs 154 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
160     } else if (order==1) {
161     #pragma omp master
162 jgs 150 {
163 jgs 154 R_PRES_dot_P=ZERO;
164     P_PRES_dot_AP0=ZERO;
165 jgs 150 }
166     #pragma omp barrier
167 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0) schedule(static)
168     for (z=0;z<n;++z) {
169     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
170     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
171 jgs 150 }
172     #pragma omp master
173     {
174 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
175     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
176 jgs 150 }
177 jgs 154 } else if (order==2) {
178 jgs 150 #pragma omp master
179     {
180 jgs 154 R_PRES_dot_P=ZERO;
181     P_PRES_dot_AP0=ZERO;
182     P_PRES_dot_AP1=ZERO;
183 jgs 150 }
184     #pragma omp barrier
185 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1) schedule(static)
186     for (z=0;z<n;++z) {
187     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
188     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
189     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
190 jgs 150 }
191     #pragma omp master
192     {
193 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
194     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
195     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
196 jgs 150 }
197 jgs 154 } else if (order==3) {
198 jgs 150 #pragma omp master
199     {
200 jgs 154 R_PRES_dot_P=ZERO;
201     P_PRES_dot_AP0=ZERO;
202     P_PRES_dot_AP1=ZERO;
203     P_PRES_dot_AP2=ZERO;
204 jgs 150 }
205     #pragma omp barrier
206 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2) schedule(static)
207     for (z=0;z<n;++z) {
208     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
209     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
210     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
211     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
212 jgs 150 }
213     #pragma omp master
214     {
215 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
216     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
217     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
218     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
219 jgs 150 }
220 jgs 154 } else if (order==4) {
221 jgs 150 #pragma omp master
222     {
223 jgs 154 R_PRES_dot_P=ZERO;
224     P_PRES_dot_AP0=ZERO;
225     P_PRES_dot_AP1=ZERO;
226     P_PRES_dot_AP2=ZERO;
227     P_PRES_dot_AP3=ZERO;
228 jgs 150 }
229     #pragma omp barrier
230 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3) schedule(static)
231     for (z=0;z<n;++z) {
232     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
233     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
234     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
235     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
236     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
237 jgs 150 }
238     #pragma omp master
239     {
240 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
241     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
242     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
243     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
244     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
245 jgs 150 }
246 jgs 154 } else if (order==5) {
247 jgs 150 #pragma omp master
248     {
249 jgs 154 R_PRES_dot_P=ZERO;
250     P_PRES_dot_AP0=ZERO;
251     P_PRES_dot_AP1=ZERO;
252     P_PRES_dot_AP2=ZERO;
253     P_PRES_dot_AP3=ZERO;
254     P_PRES_dot_AP4=ZERO;
255 jgs 150 }
256     #pragma omp barrier
257 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4) schedule(static)
258     for (z=0;z<n;++z) {
259     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
260     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
261     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
262     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
263     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
264     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
265     }
266 jgs 150 #pragma omp master
267     {
268 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
269     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
270     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
271     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
272     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
273     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
274 jgs 150 }
275 jgs 154 } else if (order==6) {
276 jgs 150 #pragma omp master
277     {
278 jgs 154 R_PRES_dot_P=ZERO;
279     P_PRES_dot_AP0=ZERO;
280     P_PRES_dot_AP1=ZERO;
281     P_PRES_dot_AP2=ZERO;
282     P_PRES_dot_AP3=ZERO;
283     P_PRES_dot_AP4=ZERO;
284     P_PRES_dot_AP5=ZERO;
285 jgs 150 }
286     #pragma omp barrier
287 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5) schedule(static)
288     for (z=0;z<n;++z) {
289     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
290     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
291     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
292     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
293     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
294     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
295     P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
296 jgs 150 }
297     #pragma omp master
298     {
299 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
300     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
301     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
302     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
303     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
304     P_PRES_dot_AP[5]=P_PRES_dot_AP5;
305     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
306 jgs 150 }
307 jgs 154 } else if (order>6) {
308 jgs 150 #pragma omp master
309     {
310 jgs 154 R_PRES_dot_P=ZERO;
311     P_PRES_dot_AP0=ZERO;
312     P_PRES_dot_AP1=ZERO;
313     P_PRES_dot_AP2=ZERO;
314     P_PRES_dot_AP3=ZERO;
315     P_PRES_dot_AP4=ZERO;
316     P_PRES_dot_AP5=ZERO;
317     P_PRES_dot_AP6=ZERO;
318 jgs 150 }
319     #pragma omp barrier
320 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6) schedule(static)
321     for (z=0;z<n;++z) {
322     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
323     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
324     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
325     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
326     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
327     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
328     P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
329     P_PRES_dot_AP6+=P_PRES[6][z]*AP[z];
330     }
331     #pragma omp master
332     {
333     P_PRES_dot_AP[0]=P_PRES_dot_AP0;
334     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
335     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
336     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
337     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
338     P_PRES_dot_AP[5]=P_PRES_dot_AP5;
339     P_PRES_dot_AP[6]=P_PRES_dot_AP6;
340     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
341    
342     P_PRES_dot_AP0=ZERO;
343 jgs 150 }
344 jgs 154 for (i=7;i<order;++i) {
345     #pragma omp barrier
346     #pragma omp for private(z) reduction(+:P_PRES_dot_AP0) schedule(static)
347     for (z=0;z<n;++z) P_PRES_dot_AP0+=P_PRES[i][z]*AP[z];
348 jgs 150 #pragma omp master
349     {
350 jgs 154 P_PRES_dot_AP[i]=P_PRES_dot_AP0;
351     P_PRES_dot_AP0=ZERO;
352 jgs 150 }
353     }
354     }
355 jgs 154 /* this fixes a problem with the intel compiler */
356     #pragma omp master
357     P_PRES_dot_AP0=R_PRES_dot_P_PRES[0];
358     /*** if sum_BREAKF is equal to zero a breakdown occurs.
359     *** iteration procedure can be continued but R_PRES is not the
360     *** residual of X_PRES approximation.
361     ***/
362 gross 432 #pragma omp barrier
363 jgs 154 sum_BREAKF=0.;
364     for (i=0;i<order;++i) sum_BREAKF +=BREAKF[i];
365     breakFlag=!((ABS(P_PRES_dot_AP0) > ZERO) && (sum_BREAKF >ZERO));
366     if (!breakFlag) {
367     breakFlag=FALSE;
368     /***
369     ***** X_PRES and R_PRES are moved to memory:
370     ***/
371     #pragma omp master
372 jgs 150 {
373 jgs 154 Factor=0.;
374     for (i=0;i<order;++i) {
375     ALPHA[i]=-P_PRES_dot_AP[i]/R_PRES_dot_P_PRES[i];
376     Factor+=BREAKF[i]*ALPHA[i];
377     }
378    
379     save_R_PRES_dot_P_PRES=R_PRES_dot_P_PRES[Length_of_mem-1];
380     save_R_PRES=R_PRES[Length_of_mem-1];
381     save_XPRES=X_PRES[Length_of_mem-1];
382     save_P_PRES=P_PRES[Length_of_mem-1];
383     for (i=Length_of_mem-1;i>0;--i) {
384     BREAKF[i]=BREAKF[i-1];
385     R_PRES_dot_P_PRES[i]=R_PRES_dot_P_PRES[i-1];
386     R_PRES[i]=R_PRES[i-1];
387     X_PRES[i]=X_PRES[i-1];
388     P_PRES[i]=P_PRES[i-1];
389     }
390     R_PRES_dot_P_PRES[0]=save_R_PRES_dot_P_PRES;
391     R_PRES[0]=save_R_PRES;
392     X_PRES[0]=save_XPRES;
393     P_PRES[0]=save_P_PRES;
394    
395     if (ABS(Factor)<=ZERO) {
396     Factor=1.;
397     BREAKF[0]=ZERO;
398     } else {
399     Factor=1./Factor;
400     BREAKF[0]=ONE;
401     }
402     for (i=0;i<order;++i) ALPHA[i]*=Factor;
403 jgs 150 }
404     /*
405 jgs 154 ***** update of solution X_PRES and its residual R_PRES:
406 jgs 150 ***
407     *** P is used to accumulate X and AP to accumulate R. X and R
408     *** are still needed until they are put into the X and R memory
409 jgs 154 *** R_PRES and X_PRES
410 jgs 150 ***
411     **/
412     #pragma omp barrier
413 jgs 154 breakf0=BREAKF[0];
414     if (order==0) {
415 jgs 150 #pragma omp for private(z) schedule(static)
416 jgs 154 for (z=0;z<n;++z) {
417     R_PRES[0][z]= Factor* AP[z];
418     X_PRES[0][z]=-Factor*P_PRES[1][z];
419     }
420     } else if (order==1) {
421 jgs 150 #pragma omp for private(z) schedule(static)
422 jgs 154 for (z=0;z<n;++z) {
423     R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z];
424     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z];
425     }
426     } else if (order==2) {
427 jgs 150 #pragma omp for private(z) schedule(static)
428 jgs 154 for (z=0;z<n;++z) {
429     R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z]
430     +ALPHA[1]*R_PRES[2][z];
431     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
432     +ALPHA[1]*X_PRES[2][z];
433     }
434     } else if (order==3) {
435 jgs 150 #pragma omp for private(z) schedule(static)
436 jgs 154 for (z=0;z<n;++z) {
437     R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
438     +ALPHA[1]*R_PRES[2][z]
439     +ALPHA[2]*R_PRES[3][z];
440     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
441     +ALPHA[1]*X_PRES[2][z]
442     +ALPHA[2]*X_PRES[3][z];
443     }
444     } else if (order==4) {
445 jgs 150 #pragma omp for private(z) schedule(static)
446 jgs 154 for (z=0;z<n;++z) {
447     R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
448     +ALPHA[1]*R_PRES[2][z]
449     +ALPHA[2]*R_PRES[3][z]
450     +ALPHA[3]*R_PRES[4][z];
451     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
452     +ALPHA[1]*X_PRES[2][z]
453     +ALPHA[2]*X_PRES[3][z]
454     +ALPHA[3]*X_PRES[4][z];
455     }
456     } else if (order==5) {
457 jgs 150 #pragma omp for private(z) schedule(static)
458 jgs 154 for (z=0;z<n;++z) {
459     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
460     +ALPHA[1]*R_PRES[2][z]
461     +ALPHA[2]*R_PRES[3][z]
462     +ALPHA[3]*R_PRES[4][z]
463     +ALPHA[4]*R_PRES[5][z];
464     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
465     +ALPHA[1]*X_PRES[2][z]
466     +ALPHA[2]*X_PRES[3][z]
467     +ALPHA[3]*X_PRES[4][z]
468     +ALPHA[4]*X_PRES[5][z];
469     }
470     } else if (order==6) {
471 jgs 150 #pragma omp for private(z) schedule(static)
472 jgs 154 for (z=0;z<n;++z) {
473     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
474     +ALPHA[1]*R_PRES[2][z]
475     +ALPHA[2]*R_PRES[3][z]
476     +ALPHA[3]*R_PRES[4][z]
477     +ALPHA[4]*R_PRES[5][z]
478     +ALPHA[5]*R_PRES[6][z];
479     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
480     +ALPHA[1]*X_PRES[2][z]
481     +ALPHA[2]*X_PRES[3][z]
482     +ALPHA[3]*X_PRES[4][z]
483     +ALPHA[4]*X_PRES[5][z]
484     +ALPHA[5]*X_PRES[6][z];
485     }
486     } else if (order>6) {
487 jgs 150 #pragma omp for private(z) schedule(static)
488 jgs 154 for (z=0;z<n;++z) {
489     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
490     +ALPHA[1]*R_PRES[2][z]
491     +ALPHA[2]*R_PRES[3][z]
492     +ALPHA[3]*R_PRES[4][z]
493     +ALPHA[4]*R_PRES[5][z]
494     +ALPHA[5]*R_PRES[6][z]
495     +ALPHA[6]*R_PRES[7][z];
496     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
497     +ALPHA[1]*X_PRES[2][z]
498     +ALPHA[2]*X_PRES[3][z]
499     +ALPHA[3]*X_PRES[4][z]
500     +ALPHA[4]*X_PRES[5][z]
501     +ALPHA[5]*X_PRES[6][z]
502     +ALPHA[6]*X_PRES[7][z];
503     }
504     for (i=7;i<order;++i) {
505 jgs 150 #pragma omp for private(z) schedule(static)
506 jgs 154 for (z=0;z<n;++z) {
507     R_PRES[0][z]+=ALPHA[i]*R_PRES[i+1][z];
508     X_PRES[0][z]+=ALPHA[i]*X_PRES[i+1][z];
509     }
510 jgs 150 }
511     }
512 jgs 154 if (breakf0>0.) {
513 jgs 150 /***
514 jgs 154 ***** calculate gamma from min_(gamma){|R+gamma*(R_PRES-R)|_2}:
515 jgs 150 ***/
516     #pragma omp master
517     {
518     SC1=ZERO;
519     SC2=ZERO;
520     L2_R=ZERO;
521     }
522     #pragma omp barrier
523     #pragma omp for private(z,diff) reduction(+:SC1,SC2) schedule(static)
524 jgs 154 for (z=0;z<n;++z) {
525     diff=R_PRES[0][z]-r[z];
526 jgs 150 SC1+=diff*diff;
527     SC2+=diff*r[z];
528     }
529 jgs 154 gamma=(SC1<=ZERO) ? ZERO : -SC2/SC1;
530 jgs 150 #pragma omp for private(z) reduction(+:L2_R) schedule(static)
531 jgs 154 for (z=0;z<n;++z) {
532     x[z]+=gamma*(X_PRES[0][z]-x[z]);
533     r[z]+=gamma*(R_PRES[0][z]-r[z]);
534 jgs 150 L2_R+=r[z]*r[z];
535     }
536     norm_of_residual=sqrt(L2_R);
537     convergeFlag = (norm_of_residual <= tol);
538 jgs 154 if (restart>0) restartFlag=(num_iter_restart >= restart);
539     } else {
540     convergeFlag=FALSE;
541     restartFlag=FALSE;
542 jgs 150 }
543 jgs 154 maxIterFlag = (num_iter >= maxit);
544     }
545     }
546     /* end of iteration */
547     #pragma omp master
548     {
549     Norm_of_residual_global=norm_of_residual;
550     Num_iter_global=num_iter;
551 jgs 150 if (maxIterFlag) {
552 jgs 154 Status = SOLVER_MAXITER_REACHED;
553 jgs 150 } else if (breakFlag) {
554 jgs 154 Status = SOLVER_BREAKDOWN;
555     }
556 jgs 150 }
557     } /* end of parallel region */
558     TMPMEMFREE(AP);
559 jgs 154 for (i=0;i<Length_of_recursion;i++) {
560     TMPMEMFREE(X_PRES[i]);
561     TMPMEMFREE(R_PRES[i]);
562     TMPMEMFREE(P_PRES[i]);
563 jgs 150 }
564 jgs 154 *iter=Num_iter_global;
565     *tolerance=Norm_of_residual_global;
566 jgs 150 }
567 jgs 154 return Status;
568 jgs 150 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26