/[escript]/trunk/paso/src/GMRES.c
ViewVC logotype

Annotation of /trunk/paso/src/GMRES.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 415 - (hide annotations)
Wed Jan 4 05:37:33 2006 UTC (14 years, 1 month ago) by gross
Original Path: trunk/paso/src/Solvers/GMRES.c
File MIME type: text/plain
File size: 23133 byte(s)
a better way of representing the matrix format type is introduced. this is needed for the Paradiso and UMFPACK interface
1 jgs 150 /* $Id$ */
2    
3     /*
4     * Purpose
5     * =======
6     *
7     * GMRES solves the linear system A*x=b using the
8     * truncated and restered GMRES method with preconditioning.
9     *
10     * Convergence test: norm( b - A*x )< TOL.
11     *
12     *
13     *
14     * Arguments
15     * =========
16     *
17 jgs 154 * r (input/output) double array, dimension n.
18 jgs 150 * On entry, residual of inital guess X
19     *
20 jgs 154 * x (input/output) double array, dimension n.
21 jgs 150 * On input, the initial guess.
22     *
23     * iter (input/output) int
24     * On input, the maximum num_iterations to be performed.
25     * On output, actual number of num_iterations performed.
26     *
27     * tolerance (input/output) DOUBLE PRECISION
28     * On input, the allowable convergence measure for
29     * norm( b - A*x )
30     * On output, the final value of this measure.
31     *
32 jgs 154 * Length_of_recursion (input) gives the number of residual to be kept in orthogonalization process
33 jgs 150 *
34     * restart (input) If restart>0, iteration is resterted a after restart steps.
35     *
36     * INFO (output) int
37     *
38     * =SOLVER_NO_ERROR: Successful exit. num_iterated approximate solution returned.
39 jgs 154 * =SOLVER_MAXNUM_ITER_REACHED
40 jgs 150 * =SOLVER_INPUT_ERROR Illegal parameter:
41 jgs 154 * =SOLVER_BREAKDOWN: bad luck!
42     * =SOLVER_MEMORY_ERROR : no memory available
43 jgs 150 *
44     * ==============================================================
45     */
46    
47     #include "Common.h"
48     #include "SystemMatrix.h"
49     #include "Solver.h"
50     #ifdef _OPENMP
51     #include <omp.h>
52     #endif
53    
54     err_t Paso_Solver_GMRES(
55     Paso_SystemMatrix * A,
56     double * r,
57     double * x,
58     dim_t *iter,
59 jgs 154 double * tolerance,dim_t Length_of_recursion,dim_t restart) {
60 jgs 150
61     /* Local variables */
62    
63 jgs 154 double *AP,*X_PRES[MAX(Length_of_recursion,0)+1],*R_PRES[MAX(Length_of_recursion,0)+1],*P_PRES[MAX(Length_of_recursion,0)+1];
64     double P_PRES_dot_AP[MAX(Length_of_recursion,0)],R_PRES_dot_P_PRES[MAX(Length_of_recursion,0)+1],BREAKF[MAX(Length_of_recursion,0)+1],ALPHA[MAX(Length_of_recursion,0)];
65     double P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6,R_PRES_dot_P,abs_RP,breakf0;
66     double tol,Factor,sum_BREAKF,gamma,SC1,SC2,norm_of_residual,diff,L2_R,Norm_of_residual_global;
67     double *save_XPRES, *save_P_PRES, *save_R_PRES,save_R_PRES_dot_P_PRES;
68     dim_t maxit,Num_iter_global,num_iter_restart,num_iter;
69     dim_t i,z,order;
70 jgs 150 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE,restartFlag=FALSE;
71 jgs 154 err_t Status=SOLVER_NO_ERROR;
72 jgs 150
73     /* adapt original routine parameters */
74    
75 jgs 154 dim_t n=A->num_cols * A-> col_block_size;
76     dim_t Length_of_mem=MAX(Length_of_recursion,0)+1;
77 jgs 150
78     /* Test the input parameters. */
79 jgs 154 if (restart>0) restart=MAX(Length_of_recursion,restart);
80     if (n < 0 || Length_of_recursion<=0) {
81 jgs 150 return SOLVER_INPUT_ERROR;
82     }
83    
84     /* allocate memory: */
85    
86 jgs 154 AP=TMPMEMALLOC(n,double);
87     if (AP==NULL) Status=SOLVER_MEMORY_ERROR;
88     for (i=0;i<Length_of_mem;i++) {
89     X_PRES[i]=TMPMEMALLOC(n,double);
90     R_PRES[i]=TMPMEMALLOC(n,double);
91     P_PRES[i]=TMPMEMALLOC(n,double);
92     if (X_PRES[i]==NULL || R_PRES[i]==NULL || P_PRES[i]==NULL) Status=SOLVER_MEMORY_ERROR;
93 jgs 150 }
94 jgs 154 if ( Status ==SOLVER_NO_ERROR ) {
95 jgs 150
96     /* now PRES starts : */
97     maxit=*iter;
98     tol=*tolerance;
99    
100 jgs 154 #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \
101     private(num_iter,i,num_iter_restart,order,sum_BREAKF,gamma,restartFlag,norm_of_residual,abs_RP,breakf0,\
102     save_XPRES,save_P_PRES,save_R_PRES,save_R_PRES_dot_P_PRES)
103 jgs 150 {
104     /* initialization */
105    
106     restartFlag=TRUE;
107     num_iter=0;
108     #pragma omp for private(z) schedule(static) nowait
109 jgs 154 for (z=0; z < n; ++z) AP[z]=0;
110     for(i=0;i<Length_of_mem;++i) {
111 jgs 150 #pragma omp for private(z) schedule(static) nowait
112 jgs 154 for (z=0; z < n; ++z) {
113     P_PRES[i][z]=0;
114     R_PRES[i][z]=0;
115     X_PRES[i][z]=0;
116     }
117 jgs 150 }
118    
119 jgs 154 while (! (convergeFlag || maxIterFlag || breakFlag)) {
120 jgs 150 #pragma omp barrier
121     if (restartFlag) {
122 jgs 154 #pragma omp master
123     BREAKF[0]=ONE;
124 jgs 150 #pragma omp for private(z) schedule(static) nowait
125 jgs 154 for (z=0; z < n; ++z) {
126     R_PRES[0][z]=r[z];
127     X_PRES[0][z]=x[z];
128     }
129 jgs 150 num_iter_restart=0;
130     restartFlag=FALSE;
131     }
132     ++num_iter;
133     ++num_iter_restart;
134 jgs 154 /* order is the dimension of the space on which the residual is minimized: */
135     order=MIN(num_iter_restart,Length_of_recursion);
136 jgs 150 /***
137 jgs 154 *** calculate new search direction P from R_PRES
138 jgs 150 ***/
139 jgs 154 Paso_Solver_solvePreconditioner(A,&P_PRES[0][0], &R_PRES[0][0]);
140 jgs 150 /***
141     *** apply A to P to get AP
142     ***/
143 gross 415 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &P_PRES[0][0],ZERO, &AP[0]);
144 jgs 150 /***
145     ***** calculation of the norm of R and the scalar products of
146     *** the residuals and A*P:
147     ***/
148 jgs 154 if (order==0) {
149 jgs 150 #pragma omp master
150 jgs 154 R_PRES_dot_P=ZERO;
151 jgs 150 #pragma omp barrier
152 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P) schedule(static)
153     for (z=0;z<n;++z)
154     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
155 jgs 150 #pragma omp master
156 jgs 154 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
157     } else if (order==1) {
158     #pragma omp master
159 jgs 150 {
160 jgs 154 R_PRES_dot_P=ZERO;
161     P_PRES_dot_AP0=ZERO;
162 jgs 150 }
163     #pragma omp barrier
164 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0) schedule(static)
165     for (z=0;z<n;++z) {
166     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
167     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
168 jgs 150 }
169     #pragma omp master
170     {
171 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
172     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
173 jgs 150 }
174 jgs 154 } else if (order==2) {
175 jgs 150 #pragma omp master
176     {
177 jgs 154 R_PRES_dot_P=ZERO;
178     P_PRES_dot_AP0=ZERO;
179     P_PRES_dot_AP1=ZERO;
180 jgs 150 }
181     #pragma omp barrier
182 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1) schedule(static)
183     for (z=0;z<n;++z) {
184     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
185     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
186     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
187 jgs 150 }
188     #pragma omp master
189     {
190 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
191     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
192     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
193 jgs 150 }
194 jgs 154 } else if (order==3) {
195 jgs 150 #pragma omp master
196     {
197 jgs 154 R_PRES_dot_P=ZERO;
198     P_PRES_dot_AP0=ZERO;
199     P_PRES_dot_AP1=ZERO;
200     P_PRES_dot_AP2=ZERO;
201 jgs 150 }
202     #pragma omp barrier
203 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2) schedule(static)
204     for (z=0;z<n;++z) {
205     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
206     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
207     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
208     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
209 jgs 150 }
210     #pragma omp master
211     {
212 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
213     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
214     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
215     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
216 jgs 150 }
217 jgs 154 } else if (order==4) {
218 jgs 150 #pragma omp master
219     {
220 jgs 154 R_PRES_dot_P=ZERO;
221     P_PRES_dot_AP0=ZERO;
222     P_PRES_dot_AP1=ZERO;
223     P_PRES_dot_AP2=ZERO;
224     P_PRES_dot_AP3=ZERO;
225 jgs 150 }
226     #pragma omp barrier
227 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3) schedule(static)
228     for (z=0;z<n;++z) {
229     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
230     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
231     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
232     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
233     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
234 jgs 150 }
235     #pragma omp master
236     {
237 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
238     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
239     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
240     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
241     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
242 jgs 150 }
243 jgs 154 } else if (order==5) {
244 jgs 150 #pragma omp master
245     {
246 jgs 154 R_PRES_dot_P=ZERO;
247     P_PRES_dot_AP0=ZERO;
248     P_PRES_dot_AP1=ZERO;
249     P_PRES_dot_AP2=ZERO;
250     P_PRES_dot_AP3=ZERO;
251     P_PRES_dot_AP4=ZERO;
252 jgs 150 }
253     #pragma omp barrier
254 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4) schedule(static)
255     for (z=0;z<n;++z) {
256     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
257     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
258     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
259     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
260     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
261     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
262     }
263 jgs 150 #pragma omp master
264     {
265 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
266     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
267     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
268     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
269     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
270     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
271 jgs 150 }
272 jgs 154 } else if (order==6) {
273 jgs 150 #pragma omp master
274     {
275 jgs 154 R_PRES_dot_P=ZERO;
276     P_PRES_dot_AP0=ZERO;
277     P_PRES_dot_AP1=ZERO;
278     P_PRES_dot_AP2=ZERO;
279     P_PRES_dot_AP3=ZERO;
280     P_PRES_dot_AP4=ZERO;
281     P_PRES_dot_AP5=ZERO;
282 jgs 150 }
283     #pragma omp barrier
284 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5) schedule(static)
285     for (z=0;z<n;++z) {
286     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
287     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
288     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
289     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
290     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
291     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
292     P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
293 jgs 150 }
294     #pragma omp master
295     {
296 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
297     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
298     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
299     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
300     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
301     P_PRES_dot_AP[5]=P_PRES_dot_AP5;
302     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
303 jgs 150 }
304 jgs 154 } else if (order>6) {
305 jgs 150 #pragma omp master
306     {
307 jgs 154 R_PRES_dot_P=ZERO;
308     P_PRES_dot_AP0=ZERO;
309     P_PRES_dot_AP1=ZERO;
310     P_PRES_dot_AP2=ZERO;
311     P_PRES_dot_AP3=ZERO;
312     P_PRES_dot_AP4=ZERO;
313     P_PRES_dot_AP5=ZERO;
314     P_PRES_dot_AP6=ZERO;
315 jgs 150 }
316     #pragma omp barrier
317 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6) schedule(static)
318     for (z=0;z<n;++z) {
319     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
320     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
321     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
322     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
323     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
324     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
325     P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
326     P_PRES_dot_AP6+=P_PRES[6][z]*AP[z];
327     }
328     #pragma omp master
329     {
330     P_PRES_dot_AP[0]=P_PRES_dot_AP0;
331     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
332     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
333     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
334     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
335     P_PRES_dot_AP[5]=P_PRES_dot_AP5;
336     P_PRES_dot_AP[6]=P_PRES_dot_AP6;
337     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
338    
339     P_PRES_dot_AP0=ZERO;
340 jgs 150 }
341 jgs 154 for (i=7;i<order;++i) {
342     #pragma omp barrier
343     #pragma omp for private(z) reduction(+:P_PRES_dot_AP0) schedule(static)
344     for (z=0;z<n;++z) P_PRES_dot_AP0+=P_PRES[i][z]*AP[z];
345 jgs 150 #pragma omp master
346     {
347 jgs 154 P_PRES_dot_AP[i]=P_PRES_dot_AP0;
348     P_PRES_dot_AP0=ZERO;
349 jgs 150 }
350     }
351     }
352 jgs 154 /* this fixes a problem with the intel compiler */
353     #pragma omp master
354     P_PRES_dot_AP0=R_PRES_dot_P_PRES[0];
355     #pragma omp barrier
356     /*** if sum_BREAKF is equal to zero a breakdown occurs.
357     *** iteration procedure can be continued but R_PRES is not the
358     *** residual of X_PRES approximation.
359     ***/
360     sum_BREAKF=0.;
361     for (i=0;i<order;++i) sum_BREAKF +=BREAKF[i];
362     breakFlag=!((ABS(P_PRES_dot_AP0) > ZERO) && (sum_BREAKF >ZERO));
363     if (!breakFlag) {
364     breakFlag=FALSE;
365     /***
366     ***** X_PRES and R_PRES are moved to memory:
367     ***/
368     #pragma omp master
369 jgs 150 {
370 jgs 154 Factor=0.;
371     for (i=0;i<order;++i) {
372     ALPHA[i]=-P_PRES_dot_AP[i]/R_PRES_dot_P_PRES[i];
373     Factor+=BREAKF[i]*ALPHA[i];
374     }
375    
376     save_R_PRES_dot_P_PRES=R_PRES_dot_P_PRES[Length_of_mem-1];
377     save_R_PRES=R_PRES[Length_of_mem-1];
378     save_XPRES=X_PRES[Length_of_mem-1];
379     save_P_PRES=P_PRES[Length_of_mem-1];
380     for (i=Length_of_mem-1;i>0;--i) {
381     BREAKF[i]=BREAKF[i-1];
382     R_PRES_dot_P_PRES[i]=R_PRES_dot_P_PRES[i-1];
383     R_PRES[i]=R_PRES[i-1];
384     X_PRES[i]=X_PRES[i-1];
385     P_PRES[i]=P_PRES[i-1];
386     }
387     R_PRES_dot_P_PRES[0]=save_R_PRES_dot_P_PRES;
388     R_PRES[0]=save_R_PRES;
389     X_PRES[0]=save_XPRES;
390     P_PRES[0]=save_P_PRES;
391    
392     if (ABS(Factor)<=ZERO) {
393     Factor=1.;
394     BREAKF[0]=ZERO;
395     } else {
396     Factor=1./Factor;
397     BREAKF[0]=ONE;
398     }
399     for (i=0;i<order;++i) ALPHA[i]*=Factor;
400 jgs 150 }
401     /*
402 jgs 154 ***** update of solution X_PRES and its residual R_PRES:
403 jgs 150 ***
404     *** P is used to accumulate X and AP to accumulate R. X and R
405     *** are still needed until they are put into the X and R memory
406 jgs 154 *** R_PRES and X_PRES
407 jgs 150 ***
408     **/
409     #pragma omp barrier
410 jgs 154 breakf0=BREAKF[0];
411     if (order==0) {
412 jgs 150 #pragma omp for private(z) schedule(static)
413 jgs 154 for (z=0;z<n;++z) {
414     R_PRES[0][z]= Factor* AP[z];
415     X_PRES[0][z]=-Factor*P_PRES[1][z];
416     }
417     } else if (order==1) {
418 jgs 150 #pragma omp for private(z) schedule(static)
419 jgs 154 for (z=0;z<n;++z) {
420     R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z];
421     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z];
422     }
423     } else if (order==2) {
424 jgs 150 #pragma omp for private(z) schedule(static)
425 jgs 154 for (z=0;z<n;++z) {
426     R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z]
427     +ALPHA[1]*R_PRES[2][z];
428     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
429     +ALPHA[1]*X_PRES[2][z];
430     }
431     } else if (order==3) {
432 jgs 150 #pragma omp for private(z) schedule(static)
433 jgs 154 for (z=0;z<n;++z) {
434     R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
435     +ALPHA[1]*R_PRES[2][z]
436     +ALPHA[2]*R_PRES[3][z];
437     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
438     +ALPHA[1]*X_PRES[2][z]
439     +ALPHA[2]*X_PRES[3][z];
440     }
441     } else if (order==4) {
442 jgs 150 #pragma omp for private(z) schedule(static)
443 jgs 154 for (z=0;z<n;++z) {
444     R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
445     +ALPHA[1]*R_PRES[2][z]
446     +ALPHA[2]*R_PRES[3][z]
447     +ALPHA[3]*R_PRES[4][z];
448     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
449     +ALPHA[1]*X_PRES[2][z]
450     +ALPHA[2]*X_PRES[3][z]
451     +ALPHA[3]*X_PRES[4][z];
452     }
453     } else if (order==5) {
454 jgs 150 #pragma omp for private(z) schedule(static)
455 jgs 154 for (z=0;z<n;++z) {
456     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
457     +ALPHA[1]*R_PRES[2][z]
458     +ALPHA[2]*R_PRES[3][z]
459     +ALPHA[3]*R_PRES[4][z]
460     +ALPHA[4]*R_PRES[5][z];
461     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
462     +ALPHA[1]*X_PRES[2][z]
463     +ALPHA[2]*X_PRES[3][z]
464     +ALPHA[3]*X_PRES[4][z]
465     +ALPHA[4]*X_PRES[5][z];
466     }
467     } else if (order==6) {
468 jgs 150 #pragma omp for private(z) schedule(static)
469 jgs 154 for (z=0;z<n;++z) {
470     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
471     +ALPHA[1]*R_PRES[2][z]
472     +ALPHA[2]*R_PRES[3][z]
473     +ALPHA[3]*R_PRES[4][z]
474     +ALPHA[4]*R_PRES[5][z]
475     +ALPHA[5]*R_PRES[6][z];
476     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
477     +ALPHA[1]*X_PRES[2][z]
478     +ALPHA[2]*X_PRES[3][z]
479     +ALPHA[3]*X_PRES[4][z]
480     +ALPHA[4]*X_PRES[5][z]
481     +ALPHA[5]*X_PRES[6][z];
482     }
483     } else if (order>6) {
484 jgs 150 #pragma omp for private(z) schedule(static)
485 jgs 154 for (z=0;z<n;++z) {
486     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
487     +ALPHA[1]*R_PRES[2][z]
488     +ALPHA[2]*R_PRES[3][z]
489     +ALPHA[3]*R_PRES[4][z]
490     +ALPHA[4]*R_PRES[5][z]
491     +ALPHA[5]*R_PRES[6][z]
492     +ALPHA[6]*R_PRES[7][z];
493     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
494     +ALPHA[1]*X_PRES[2][z]
495     +ALPHA[2]*X_PRES[3][z]
496     +ALPHA[3]*X_PRES[4][z]
497     +ALPHA[4]*X_PRES[5][z]
498     +ALPHA[5]*X_PRES[6][z]
499     +ALPHA[6]*X_PRES[7][z];
500     }
501     for (i=7;i<order;++i) {
502 jgs 150 #pragma omp for private(z) schedule(static)
503 jgs 154 for (z=0;z<n;++z) {
504     R_PRES[0][z]+=ALPHA[i]*R_PRES[i+1][z];
505     X_PRES[0][z]+=ALPHA[i]*X_PRES[i+1][z];
506     }
507 jgs 150 }
508     }
509 jgs 154 if (breakf0>0.) {
510 jgs 150 /***
511 jgs 154 ***** calculate gamma from min_(gamma){|R+gamma*(R_PRES-R)|_2}:
512 jgs 150 ***/
513     #pragma omp master
514     {
515     SC1=ZERO;
516     SC2=ZERO;
517     L2_R=ZERO;
518     }
519     #pragma omp barrier
520     #pragma omp for private(z,diff) reduction(+:SC1,SC2) schedule(static)
521 jgs 154 for (z=0;z<n;++z) {
522     diff=R_PRES[0][z]-r[z];
523 jgs 150 SC1+=diff*diff;
524     SC2+=diff*r[z];
525     }
526 jgs 154 gamma=(SC1<=ZERO) ? ZERO : -SC2/SC1;
527 jgs 150 #pragma omp for private(z) reduction(+:L2_R) schedule(static)
528 jgs 154 for (z=0;z<n;++z) {
529     x[z]+=gamma*(X_PRES[0][z]-x[z]);
530     r[z]+=gamma*(R_PRES[0][z]-r[z]);
531 jgs 150 L2_R+=r[z]*r[z];
532     }
533     norm_of_residual=sqrt(L2_R);
534     convergeFlag = (norm_of_residual <= tol);
535 jgs 154 if (restart>0) restartFlag=(num_iter_restart >= restart);
536     } else {
537     convergeFlag=FALSE;
538     restartFlag=FALSE;
539 jgs 150 }
540 jgs 154 maxIterFlag = (num_iter >= maxit);
541     }
542     }
543     /* end of iteration */
544     #pragma omp master
545     {
546     Norm_of_residual_global=norm_of_residual;
547     Num_iter_global=num_iter;
548 jgs 150 if (maxIterFlag) {
549 jgs 154 Status = SOLVER_MAXITER_REACHED;
550 jgs 150 } else if (breakFlag) {
551 jgs 154 Status = SOLVER_BREAKDOWN;
552     }
553 jgs 150 }
554     } /* end of parallel region */
555     TMPMEMFREE(AP);
556 jgs 154 for (i=0;i<Length_of_recursion;i++) {
557     TMPMEMFREE(X_PRES[i]);
558     TMPMEMFREE(R_PRES[i]);
559     TMPMEMFREE(P_PRES[i]);
560 jgs 150 }
561 jgs 154 *iter=Num_iter_global;
562     *tolerance=Norm_of_residual_global;
563 jgs 150 }
564 jgs 154 return Status;
565 jgs 150 }
566    

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26