/[escript]/trunk/paso/src/GMRES.c
ViewVC logotype

Annotation of /trunk/paso/src/GMRES.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 432 - (hide annotations)
Fri Jan 13 07:38:54 2006 UTC (14 years, 1 month ago) by gross
Original Path: trunk/paso/src/Solvers/GMRES.c
File MIME type: text/plain
File size: 23190 byte(s)
some fixes for openmp
1 jgs 150 /* $Id$ */
2    
3     /*
4     * Purpose
5     * =======
6     *
7     * GMRES solves the linear system A*x=b using the
8     * truncated and restered GMRES method with preconditioning.
9     *
10     * Convergence test: norm( b - A*x )< TOL.
11     *
12     *
13     *
14     * Arguments
15     * =========
16     *
17 jgs 154 * r (input/output) double array, dimension n.
18 jgs 150 * On entry, residual of inital guess X
19     *
20 jgs 154 * x (input/output) double array, dimension n.
21 jgs 150 * On input, the initial guess.
22     *
23     * iter (input/output) int
24     * On input, the maximum num_iterations to be performed.
25     * On output, actual number of num_iterations performed.
26     *
27     * tolerance (input/output) DOUBLE PRECISION
28     * On input, the allowable convergence measure for
29     * norm( b - A*x )
30     * On output, the final value of this measure.
31     *
32 jgs 154 * Length_of_recursion (input) gives the number of residual to be kept in orthogonalization process
33 jgs 150 *
34     * restart (input) If restart>0, iteration is resterted a after restart steps.
35     *
36     * INFO (output) int
37     *
38     * =SOLVER_NO_ERROR: Successful exit. num_iterated approximate solution returned.
39 jgs 154 * =SOLVER_MAXNUM_ITER_REACHED
40 jgs 150 * =SOLVER_INPUT_ERROR Illegal parameter:
41 jgs 154 * =SOLVER_BREAKDOWN: bad luck!
42     * =SOLVER_MEMORY_ERROR : no memory available
43 jgs 150 *
44     * ==============================================================
45     */
46    
47     #include "Common.h"
48     #include "SystemMatrix.h"
49     #include "Solver.h"
50     #ifdef _OPENMP
51     #include <omp.h>
52     #endif
53    
54     err_t Paso_Solver_GMRES(
55     Paso_SystemMatrix * A,
56     double * r,
57     double * x,
58     dim_t *iter,
59 jgs 154 double * tolerance,dim_t Length_of_recursion,dim_t restart) {
60 jgs 150
61     /* Local variables */
62    
63 jgs 154 double *AP,*X_PRES[MAX(Length_of_recursion,0)+1],*R_PRES[MAX(Length_of_recursion,0)+1],*P_PRES[MAX(Length_of_recursion,0)+1];
64     double P_PRES_dot_AP[MAX(Length_of_recursion,0)],R_PRES_dot_P_PRES[MAX(Length_of_recursion,0)+1],BREAKF[MAX(Length_of_recursion,0)+1],ALPHA[MAX(Length_of_recursion,0)];
65     double P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6,R_PRES_dot_P,abs_RP,breakf0;
66     double tol,Factor,sum_BREAKF,gamma,SC1,SC2,norm_of_residual,diff,L2_R,Norm_of_residual_global;
67     double *save_XPRES, *save_P_PRES, *save_R_PRES,save_R_PRES_dot_P_PRES;
68     dim_t maxit,Num_iter_global,num_iter_restart,num_iter;
69     dim_t i,z,order;
70 jgs 150 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE,restartFlag=FALSE;
71 jgs 154 err_t Status=SOLVER_NO_ERROR;
72 jgs 150
73     /* adapt original routine parameters */
74    
75 jgs 154 dim_t n=A->num_cols * A-> col_block_size;
76     dim_t Length_of_mem=MAX(Length_of_recursion,0)+1;
77 jgs 150
78     /* Test the input parameters. */
79 jgs 154 if (restart>0) restart=MAX(Length_of_recursion,restart);
80     if (n < 0 || Length_of_recursion<=0) {
81 jgs 150 return SOLVER_INPUT_ERROR;
82     }
83    
84     /* allocate memory: */
85    
86 jgs 154 AP=TMPMEMALLOC(n,double);
87     if (AP==NULL) Status=SOLVER_MEMORY_ERROR;
88     for (i=0;i<Length_of_mem;i++) {
89     X_PRES[i]=TMPMEMALLOC(n,double);
90     R_PRES[i]=TMPMEMALLOC(n,double);
91     P_PRES[i]=TMPMEMALLOC(n,double);
92     if (X_PRES[i]==NULL || R_PRES[i]==NULL || P_PRES[i]==NULL) Status=SOLVER_MEMORY_ERROR;
93 jgs 150 }
94 jgs 154 if ( Status ==SOLVER_NO_ERROR ) {
95 jgs 150
96     /* now PRES starts : */
97     maxit=*iter;
98     tol=*tolerance;
99    
100 jgs 154 #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \
101     private(num_iter,i,num_iter_restart,order,sum_BREAKF,gamma,restartFlag,norm_of_residual,abs_RP,breakf0,\
102     save_XPRES,save_P_PRES,save_R_PRES,save_R_PRES_dot_P_PRES)
103 jgs 150 {
104     /* initialization */
105    
106     restartFlag=TRUE;
107     num_iter=0;
108     #pragma omp for private(z) schedule(static) nowait
109 jgs 154 for (z=0; z < n; ++z) AP[z]=0;
110     for(i=0;i<Length_of_mem;++i) {
111 jgs 150 #pragma omp for private(z) schedule(static) nowait
112 jgs 154 for (z=0; z < n; ++z) {
113     P_PRES[i][z]=0;
114     R_PRES[i][z]=0;
115     X_PRES[i][z]=0;
116     }
117 jgs 150 }
118    
119 jgs 154 while (! (convergeFlag || maxIterFlag || breakFlag)) {
120 jgs 150 #pragma omp barrier
121     if (restartFlag) {
122 jgs 154 #pragma omp master
123     BREAKF[0]=ONE;
124 jgs 150 #pragma omp for private(z) schedule(static) nowait
125 jgs 154 for (z=0; z < n; ++z) {
126     R_PRES[0][z]=r[z];
127     X_PRES[0][z]=x[z];
128     }
129 jgs 150 num_iter_restart=0;
130     restartFlag=FALSE;
131     }
132     ++num_iter;
133     ++num_iter_restart;
134 jgs 154 /* order is the dimension of the space on which the residual is minimized: */
135     order=MIN(num_iter_restart,Length_of_recursion);
136 jgs 150 /***
137 jgs 154 *** calculate new search direction P from R_PRES
138 jgs 150 ***/
139 gross 432 #pragma omp barrier
140 jgs 154 Paso_Solver_solvePreconditioner(A,&P_PRES[0][0], &R_PRES[0][0]);
141 jgs 150 /***
142     *** apply A to P to get AP
143     ***/
144 gross 432 #pragma omp barrier
145 gross 415 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &P_PRES[0][0],ZERO, &AP[0]);
146 jgs 150 /***
147     ***** calculation of the norm of R and the scalar products of
148     *** the residuals and A*P:
149     ***/
150 jgs 154 if (order==0) {
151 jgs 150 #pragma omp master
152 jgs 154 R_PRES_dot_P=ZERO;
153 jgs 150 #pragma omp barrier
154 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P) schedule(static)
155     for (z=0;z<n;++z)
156     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
157 jgs 150 #pragma omp master
158 jgs 154 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
159     } else if (order==1) {
160     #pragma omp master
161 jgs 150 {
162 jgs 154 R_PRES_dot_P=ZERO;
163     P_PRES_dot_AP0=ZERO;
164 jgs 150 }
165     #pragma omp barrier
166 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0) schedule(static)
167     for (z=0;z<n;++z) {
168     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
169     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
170 jgs 150 }
171     #pragma omp master
172     {
173 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
174     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
175 jgs 150 }
176 jgs 154 } else if (order==2) {
177 jgs 150 #pragma omp master
178     {
179 jgs 154 R_PRES_dot_P=ZERO;
180     P_PRES_dot_AP0=ZERO;
181     P_PRES_dot_AP1=ZERO;
182 jgs 150 }
183     #pragma omp barrier
184 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1) schedule(static)
185     for (z=0;z<n;++z) {
186     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
187     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
188     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
189 jgs 150 }
190     #pragma omp master
191     {
192 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
193     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
194     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
195 jgs 150 }
196 jgs 154 } else if (order==3) {
197 jgs 150 #pragma omp master
198     {
199 jgs 154 R_PRES_dot_P=ZERO;
200     P_PRES_dot_AP0=ZERO;
201     P_PRES_dot_AP1=ZERO;
202     P_PRES_dot_AP2=ZERO;
203 jgs 150 }
204     #pragma omp barrier
205 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2) schedule(static)
206     for (z=0;z<n;++z) {
207     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
208     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
209     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
210     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
211 jgs 150 }
212     #pragma omp master
213     {
214 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
215     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
216     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
217     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
218 jgs 150 }
219 jgs 154 } else if (order==4) {
220 jgs 150 #pragma omp master
221     {
222 jgs 154 R_PRES_dot_P=ZERO;
223     P_PRES_dot_AP0=ZERO;
224     P_PRES_dot_AP1=ZERO;
225     P_PRES_dot_AP2=ZERO;
226     P_PRES_dot_AP3=ZERO;
227 jgs 150 }
228     #pragma omp barrier
229 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3) schedule(static)
230     for (z=0;z<n;++z) {
231     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
232     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
233     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
234     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
235     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
236 jgs 150 }
237     #pragma omp master
238     {
239 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
240     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
241     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
242     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
243     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
244 jgs 150 }
245 jgs 154 } else if (order==5) {
246 jgs 150 #pragma omp master
247     {
248 jgs 154 R_PRES_dot_P=ZERO;
249     P_PRES_dot_AP0=ZERO;
250     P_PRES_dot_AP1=ZERO;
251     P_PRES_dot_AP2=ZERO;
252     P_PRES_dot_AP3=ZERO;
253     P_PRES_dot_AP4=ZERO;
254 jgs 150 }
255     #pragma omp barrier
256 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4) schedule(static)
257     for (z=0;z<n;++z) {
258     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
259     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
260     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
261     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
262     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
263     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
264     }
265 jgs 150 #pragma omp master
266     {
267 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
268     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
269     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
270     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
271     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
272     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
273 jgs 150 }
274 jgs 154 } else if (order==6) {
275 jgs 150 #pragma omp master
276     {
277 jgs 154 R_PRES_dot_P=ZERO;
278     P_PRES_dot_AP0=ZERO;
279     P_PRES_dot_AP1=ZERO;
280     P_PRES_dot_AP2=ZERO;
281     P_PRES_dot_AP3=ZERO;
282     P_PRES_dot_AP4=ZERO;
283     P_PRES_dot_AP5=ZERO;
284 jgs 150 }
285     #pragma omp barrier
286 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5) schedule(static)
287     for (z=0;z<n;++z) {
288     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
289     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
290     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
291     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
292     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
293     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
294     P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
295 jgs 150 }
296     #pragma omp master
297     {
298 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
299     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
300     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
301     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
302     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
303     P_PRES_dot_AP[5]=P_PRES_dot_AP5;
304     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
305 jgs 150 }
306 jgs 154 } else if (order>6) {
307 jgs 150 #pragma omp master
308     {
309 jgs 154 R_PRES_dot_P=ZERO;
310     P_PRES_dot_AP0=ZERO;
311     P_PRES_dot_AP1=ZERO;
312     P_PRES_dot_AP2=ZERO;
313     P_PRES_dot_AP3=ZERO;
314     P_PRES_dot_AP4=ZERO;
315     P_PRES_dot_AP5=ZERO;
316     P_PRES_dot_AP6=ZERO;
317 jgs 150 }
318     #pragma omp barrier
319 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6) schedule(static)
320     for (z=0;z<n;++z) {
321     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
322     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
323     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
324     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
325     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
326     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
327     P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
328     P_PRES_dot_AP6+=P_PRES[6][z]*AP[z];
329     }
330     #pragma omp master
331     {
332     P_PRES_dot_AP[0]=P_PRES_dot_AP0;
333     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
334     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
335     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
336     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
337     P_PRES_dot_AP[5]=P_PRES_dot_AP5;
338     P_PRES_dot_AP[6]=P_PRES_dot_AP6;
339     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
340    
341     P_PRES_dot_AP0=ZERO;
342 jgs 150 }
343 jgs 154 for (i=7;i<order;++i) {
344     #pragma omp barrier
345     #pragma omp for private(z) reduction(+:P_PRES_dot_AP0) schedule(static)
346     for (z=0;z<n;++z) P_PRES_dot_AP0+=P_PRES[i][z]*AP[z];
347 jgs 150 #pragma omp master
348     {
349 jgs 154 P_PRES_dot_AP[i]=P_PRES_dot_AP0;
350     P_PRES_dot_AP0=ZERO;
351 jgs 150 }
352     }
353     }
354 jgs 154 /* this fixes a problem with the intel compiler */
355     #pragma omp master
356     P_PRES_dot_AP0=R_PRES_dot_P_PRES[0];
357     /*** if sum_BREAKF is equal to zero a breakdown occurs.
358     *** iteration procedure can be continued but R_PRES is not the
359     *** residual of X_PRES approximation.
360     ***/
361 gross 432 #pragma omp barrier
362 jgs 154 sum_BREAKF=0.;
363     for (i=0;i<order;++i) sum_BREAKF +=BREAKF[i];
364     breakFlag=!((ABS(P_PRES_dot_AP0) > ZERO) && (sum_BREAKF >ZERO));
365     if (!breakFlag) {
366     breakFlag=FALSE;
367     /***
368     ***** X_PRES and R_PRES are moved to memory:
369     ***/
370     #pragma omp master
371 jgs 150 {
372 jgs 154 Factor=0.;
373     for (i=0;i<order;++i) {
374     ALPHA[i]=-P_PRES_dot_AP[i]/R_PRES_dot_P_PRES[i];
375     Factor+=BREAKF[i]*ALPHA[i];
376     }
377    
378     save_R_PRES_dot_P_PRES=R_PRES_dot_P_PRES[Length_of_mem-1];
379     save_R_PRES=R_PRES[Length_of_mem-1];
380     save_XPRES=X_PRES[Length_of_mem-1];
381     save_P_PRES=P_PRES[Length_of_mem-1];
382     for (i=Length_of_mem-1;i>0;--i) {
383     BREAKF[i]=BREAKF[i-1];
384     R_PRES_dot_P_PRES[i]=R_PRES_dot_P_PRES[i-1];
385     R_PRES[i]=R_PRES[i-1];
386     X_PRES[i]=X_PRES[i-1];
387     P_PRES[i]=P_PRES[i-1];
388     }
389     R_PRES_dot_P_PRES[0]=save_R_PRES_dot_P_PRES;
390     R_PRES[0]=save_R_PRES;
391     X_PRES[0]=save_XPRES;
392     P_PRES[0]=save_P_PRES;
393    
394     if (ABS(Factor)<=ZERO) {
395     Factor=1.;
396     BREAKF[0]=ZERO;
397     } else {
398     Factor=1./Factor;
399     BREAKF[0]=ONE;
400     }
401     for (i=0;i<order;++i) ALPHA[i]*=Factor;
402 jgs 150 }
403     /*
404 jgs 154 ***** update of solution X_PRES and its residual R_PRES:
405 jgs 150 ***
406     *** P is used to accumulate X and AP to accumulate R. X and R
407     *** are still needed until they are put into the X and R memory
408 jgs 154 *** R_PRES and X_PRES
409 jgs 150 ***
410     **/
411     #pragma omp barrier
412 jgs 154 breakf0=BREAKF[0];
413     if (order==0) {
414 jgs 150 #pragma omp for private(z) schedule(static)
415 jgs 154 for (z=0;z<n;++z) {
416     R_PRES[0][z]= Factor* AP[z];
417     X_PRES[0][z]=-Factor*P_PRES[1][z];
418     }
419     } else if (order==1) {
420 jgs 150 #pragma omp for private(z) schedule(static)
421 jgs 154 for (z=0;z<n;++z) {
422     R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z];
423     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z];
424     }
425     } else if (order==2) {
426 jgs 150 #pragma omp for private(z) schedule(static)
427 jgs 154 for (z=0;z<n;++z) {
428     R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z]
429     +ALPHA[1]*R_PRES[2][z];
430     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
431     +ALPHA[1]*X_PRES[2][z];
432     }
433     } else if (order==3) {
434 jgs 150 #pragma omp for private(z) schedule(static)
435 jgs 154 for (z=0;z<n;++z) {
436     R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
437     +ALPHA[1]*R_PRES[2][z]
438     +ALPHA[2]*R_PRES[3][z];
439     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
440     +ALPHA[1]*X_PRES[2][z]
441     +ALPHA[2]*X_PRES[3][z];
442     }
443     } else if (order==4) {
444 jgs 150 #pragma omp for private(z) schedule(static)
445 jgs 154 for (z=0;z<n;++z) {
446     R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
447     +ALPHA[1]*R_PRES[2][z]
448     +ALPHA[2]*R_PRES[3][z]
449     +ALPHA[3]*R_PRES[4][z];
450     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
451     +ALPHA[1]*X_PRES[2][z]
452     +ALPHA[2]*X_PRES[3][z]
453     +ALPHA[3]*X_PRES[4][z];
454     }
455     } else if (order==5) {
456 jgs 150 #pragma omp for private(z) schedule(static)
457 jgs 154 for (z=0;z<n;++z) {
458     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
459     +ALPHA[1]*R_PRES[2][z]
460     +ALPHA[2]*R_PRES[3][z]
461     +ALPHA[3]*R_PRES[4][z]
462     +ALPHA[4]*R_PRES[5][z];
463     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
464     +ALPHA[1]*X_PRES[2][z]
465     +ALPHA[2]*X_PRES[3][z]
466     +ALPHA[3]*X_PRES[4][z]
467     +ALPHA[4]*X_PRES[5][z];
468     }
469     } else if (order==6) {
470 jgs 150 #pragma omp for private(z) schedule(static)
471 jgs 154 for (z=0;z<n;++z) {
472     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
473     +ALPHA[1]*R_PRES[2][z]
474     +ALPHA[2]*R_PRES[3][z]
475     +ALPHA[3]*R_PRES[4][z]
476     +ALPHA[4]*R_PRES[5][z]
477     +ALPHA[5]*R_PRES[6][z];
478     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
479     +ALPHA[1]*X_PRES[2][z]
480     +ALPHA[2]*X_PRES[3][z]
481     +ALPHA[3]*X_PRES[4][z]
482     +ALPHA[4]*X_PRES[5][z]
483     +ALPHA[5]*X_PRES[6][z];
484     }
485     } else if (order>6) {
486 jgs 150 #pragma omp for private(z) schedule(static)
487 jgs 154 for (z=0;z<n;++z) {
488     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
489     +ALPHA[1]*R_PRES[2][z]
490     +ALPHA[2]*R_PRES[3][z]
491     +ALPHA[3]*R_PRES[4][z]
492     +ALPHA[4]*R_PRES[5][z]
493     +ALPHA[5]*R_PRES[6][z]
494     +ALPHA[6]*R_PRES[7][z];
495     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
496     +ALPHA[1]*X_PRES[2][z]
497     +ALPHA[2]*X_PRES[3][z]
498     +ALPHA[3]*X_PRES[4][z]
499     +ALPHA[4]*X_PRES[5][z]
500     +ALPHA[5]*X_PRES[6][z]
501     +ALPHA[6]*X_PRES[7][z];
502     }
503     for (i=7;i<order;++i) {
504 jgs 150 #pragma omp for private(z) schedule(static)
505 jgs 154 for (z=0;z<n;++z) {
506     R_PRES[0][z]+=ALPHA[i]*R_PRES[i+1][z];
507     X_PRES[0][z]+=ALPHA[i]*X_PRES[i+1][z];
508     }
509 jgs 150 }
510     }
511 jgs 154 if (breakf0>0.) {
512 jgs 150 /***
513 jgs 154 ***** calculate gamma from min_(gamma){|R+gamma*(R_PRES-R)|_2}:
514 jgs 150 ***/
515     #pragma omp master
516     {
517     SC1=ZERO;
518     SC2=ZERO;
519     L2_R=ZERO;
520     }
521     #pragma omp barrier
522     #pragma omp for private(z,diff) reduction(+:SC1,SC2) schedule(static)
523 jgs 154 for (z=0;z<n;++z) {
524     diff=R_PRES[0][z]-r[z];
525 jgs 150 SC1+=diff*diff;
526     SC2+=diff*r[z];
527     }
528 jgs 154 gamma=(SC1<=ZERO) ? ZERO : -SC2/SC1;
529 jgs 150 #pragma omp for private(z) reduction(+:L2_R) schedule(static)
530 jgs 154 for (z=0;z<n;++z) {
531     x[z]+=gamma*(X_PRES[0][z]-x[z]);
532     r[z]+=gamma*(R_PRES[0][z]-r[z]);
533 jgs 150 L2_R+=r[z]*r[z];
534     }
535     norm_of_residual=sqrt(L2_R);
536     convergeFlag = (norm_of_residual <= tol);
537 jgs 154 if (restart>0) restartFlag=(num_iter_restart >= restart);
538     } else {
539     convergeFlag=FALSE;
540     restartFlag=FALSE;
541 jgs 150 }
542 jgs 154 maxIterFlag = (num_iter >= maxit);
543     }
544     }
545     /* end of iteration */
546     #pragma omp master
547     {
548     Norm_of_residual_global=norm_of_residual;
549     Num_iter_global=num_iter;
550 jgs 150 if (maxIterFlag) {
551 jgs 154 Status = SOLVER_MAXITER_REACHED;
552 jgs 150 } else if (breakFlag) {
553 jgs 154 Status = SOLVER_BREAKDOWN;
554     }
555 jgs 150 }
556     } /* end of parallel region */
557     TMPMEMFREE(AP);
558 jgs 154 for (i=0;i<Length_of_recursion;i++) {
559     TMPMEMFREE(X_PRES[i]);
560     TMPMEMFREE(R_PRES[i]);
561     TMPMEMFREE(P_PRES[i]);
562 jgs 150 }
563 jgs 154 *iter=Num_iter_global;
564     *tolerance=Norm_of_residual_global;
565 jgs 150 }
566 jgs 154 return Status;
567 jgs 150 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26