/[escript]/trunk/paso/src/GMRES.c
ViewVC logotype

Annotation of /trunk/paso/src/GMRES.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1028 - (hide annotations)
Wed Mar 14 00:15:24 2007 UTC (12 years, 11 months ago) by gross
File MIME type: text/plain
File size: 24375 byte(s)
modifications to be compliant with _WIN32. The substitutes for asinh, acosh, atanh are still missing (erf will through an exception)
1 jgs 150 /* $Id$ */
2    
3 dhawcroft 631
4 jgs 150 /*
5 dhawcroft 631 ********************************************************************************
6 dhawcroft 633 * Copyright 2006 by ACcESS MNRF *
7 dhawcroft 631 * *
8     * http://www.access.edu.au *
9     * Primary Business: Queensland, Australia *
10     * Licensed under the Open Software License version 3.0 *
11     * http://www.opensource.org/licenses/osl-3.0.php *
12     ********************************************************************************
13     */
14    
15     /*
16 jgs 150 * Purpose
17     * =======
18     *
19     * GMRES solves the linear system A*x=b using the
20     * truncated and restered GMRES method with preconditioning.
21     *
22     * Convergence test: norm( b - A*x )< TOL.
23     *
24     *
25     *
26     * Arguments
27     * =========
28     *
29 jgs 154 * r (input/output) double array, dimension n.
30 jgs 150 * On entry, residual of inital guess X
31     *
32 jgs 154 * x (input/output) double array, dimension n.
33 jgs 150 * On input, the initial guess.
34     *
35     * iter (input/output) int
36     * On input, the maximum num_iterations to be performed.
37     * On output, actual number of num_iterations performed.
38     *
39     * tolerance (input/output) DOUBLE PRECISION
40     * On input, the allowable convergence measure for
41     * norm( b - A*x )
42     * On output, the final value of this measure.
43     *
44 jgs 154 * Length_of_recursion (input) gives the number of residual to be kept in orthogonalization process
45 jgs 150 *
46     * restart (input) If restart>0, iteration is resterted a after restart steps.
47     *
48     * INFO (output) int
49     *
50     * =SOLVER_NO_ERROR: Successful exit. num_iterated approximate solution returned.
51 jgs 154 * =SOLVER_MAXNUM_ITER_REACHED
52 jgs 150 * =SOLVER_INPUT_ERROR Illegal parameter:
53 jgs 154 * =SOLVER_BREAKDOWN: bad luck!
54     * =SOLVER_MEMORY_ERROR : no memory available
55 jgs 150 *
56     * ==============================================================
57     */
58    
59 gross 700 #include "Common.h"
60     #include "SystemMatrix.h"
61 jgs 150 #include "Solver.h"
62     #ifdef _OPENMP
63     #include <omp.h>
64     #endif
65    
66     err_t Paso_Solver_GMRES(
67     Paso_SystemMatrix * A,
68     double * r,
69     double * x,
70     dim_t *iter,
71 gross 584 double * tolerance,dim_t Length_of_recursion,dim_t restart,
72     Paso_Performance* pp) {
73 jgs 150
74     /* Local variables */
75    
76 gross 1028 double *AP,**X_PRES,**R_PRES,**P_PRES;
77     double *P_PRES_dot_AP,*R_PRES_dot_P_PRES,*BREAKF,*ALPHA;
78 jgs 154 double P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6,R_PRES_dot_P,abs_RP,breakf0;
79     double tol,Factor,sum_BREAKF,gamma,SC1,SC2,norm_of_residual,diff,L2_R,Norm_of_residual_global;
80     double *save_XPRES, *save_P_PRES, *save_R_PRES,save_R_PRES_dot_P_PRES;
81     dim_t maxit,Num_iter_global,num_iter_restart,num_iter;
82 gross 1028 dim_t i,z,order,n, Length_of_mem;
83 jgs 150 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE,restartFlag=FALSE;
84 jgs 154 err_t Status=SOLVER_NO_ERROR;
85 jgs 150
86 gross 1028
87 jgs 150 /* adapt original routine parameters */
88    
89 gross 1028 n=A->num_cols * A-> col_block_size;
90     Length_of_mem=MAX(Length_of_recursion,0)+1;
91 jgs 150
92 gross 1028
93 jgs 150 /* Test the input parameters. */
94 jgs 154 if (restart>0) restart=MAX(Length_of_recursion,restart);
95     if (n < 0 || Length_of_recursion<=0) {
96 jgs 150 return SOLVER_INPUT_ERROR;
97     }
98    
99     /* allocate memory: */
100 gross 1028 X_PRES=TMPMEMALLOC(Length_of_mem,double*);
101     R_PRES=TMPMEMALLOC(Length_of_mem,double*);
102     P_PRES=TMPMEMALLOC(Length_of_mem,double*);
103     P_PRES_dot_AP=TMPMEMALLOC(Length_of_mem,double);
104     R_PRES_dot_P_PRES=TMPMEMALLOC(Length_of_mem,double);
105     BREAKF=TMPMEMALLOC(Length_of_mem,double);
106     ALPHA=TMPMEMALLOC(Length_of_mem,double);
107 jgs 154 AP=TMPMEMALLOC(n,double);
108 gross 1028 if (AP==NULL || X_PRES ==NULL || R_PRES == NULL || P_PRES == NULL ||
109     P_PRES_dot_AP == NULL || R_PRES_dot_P_PRES ==NULL || BREAKF == NULL || ALPHA == NULL) {
110     Status=SOLVER_MEMORY_ERROR;
111     } else {
112     for (i=0;i<Length_of_mem;i++) {
113     X_PRES[i]=TMPMEMALLOC(n,double);
114     R_PRES[i]=TMPMEMALLOC(n,double);
115     P_PRES[i]=TMPMEMALLOC(n,double);
116     if (X_PRES[i]==NULL || R_PRES[i]==NULL || P_PRES[i]==NULL) Status=SOLVER_MEMORY_ERROR;
117     }
118 jgs 150 }
119 jgs 154 if ( Status ==SOLVER_NO_ERROR ) {
120 jgs 150
121     /* now PRES starts : */
122     maxit=*iter;
123     tol=*tolerance;
124    
125 jgs 154 #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \
126     private(num_iter,i,num_iter_restart,order,sum_BREAKF,gamma,restartFlag,norm_of_residual,abs_RP,breakf0,\
127     save_XPRES,save_P_PRES,save_R_PRES,save_R_PRES_dot_P_PRES)
128 jgs 150 {
129     /* initialization */
130    
131     restartFlag=TRUE;
132     num_iter=0;
133     #pragma omp for private(z) schedule(static) nowait
134 jgs 154 for (z=0; z < n; ++z) AP[z]=0;
135     for(i=0;i<Length_of_mem;++i) {
136 jgs 150 #pragma omp for private(z) schedule(static) nowait
137 jgs 154 for (z=0; z < n; ++z) {
138     P_PRES[i][z]=0;
139     R_PRES[i][z]=0;
140     X_PRES[i][z]=0;
141     }
142 jgs 150 }
143    
144 jgs 154 while (! (convergeFlag || maxIterFlag || breakFlag)) {
145 jgs 150 #pragma omp barrier
146     if (restartFlag) {
147 jgs 154 #pragma omp master
148     BREAKF[0]=ONE;
149 jgs 150 #pragma omp for private(z) schedule(static) nowait
150 jgs 154 for (z=0; z < n; ++z) {
151     R_PRES[0][z]=r[z];
152     X_PRES[0][z]=x[z];
153     }
154 jgs 150 num_iter_restart=0;
155     restartFlag=FALSE;
156     }
157     ++num_iter;
158     ++num_iter_restart;
159 jgs 154 /* order is the dimension of the space on which the residual is minimized: */
160     order=MIN(num_iter_restart,Length_of_recursion);
161 jgs 150 /***
162 jgs 154 *** calculate new search direction P from R_PRES
163 jgs 150 ***/
164 gross 432 #pragma omp barrier
165 jgs 154 Paso_Solver_solvePreconditioner(A,&P_PRES[0][0], &R_PRES[0][0]);
166 jgs 150 /***
167     *** apply A to P to get AP
168     ***/
169 gross 432 #pragma omp barrier
170 gross 415 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &P_PRES[0][0],ZERO, &AP[0]);
171 jgs 150 /***
172     ***** calculation of the norm of R and the scalar products of
173     *** the residuals and A*P:
174     ***/
175 jgs 154 if (order==0) {
176 jgs 150 #pragma omp master
177 jgs 154 R_PRES_dot_P=ZERO;
178 jgs 150 #pragma omp barrier
179 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P) schedule(static)
180     for (z=0;z<n;++z)
181     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
182 jgs 150 #pragma omp master
183 jgs 154 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
184     } else if (order==1) {
185     #pragma omp master
186 jgs 150 {
187 jgs 154 R_PRES_dot_P=ZERO;
188     P_PRES_dot_AP0=ZERO;
189 jgs 150 }
190     #pragma omp barrier
191 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0) schedule(static)
192     for (z=0;z<n;++z) {
193     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
194     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
195 jgs 150 }
196     #pragma omp master
197     {
198 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
199     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
200 jgs 150 }
201 jgs 154 } else if (order==2) {
202 jgs 150 #pragma omp master
203     {
204 jgs 154 R_PRES_dot_P=ZERO;
205     P_PRES_dot_AP0=ZERO;
206     P_PRES_dot_AP1=ZERO;
207 jgs 150 }
208     #pragma omp barrier
209 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1) schedule(static)
210     for (z=0;z<n;++z) {
211     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
212     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
213     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
214 jgs 150 }
215     #pragma omp master
216     {
217 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
218     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
219     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
220 jgs 150 }
221 jgs 154 } else if (order==3) {
222 jgs 150 #pragma omp master
223     {
224 jgs 154 R_PRES_dot_P=ZERO;
225     P_PRES_dot_AP0=ZERO;
226     P_PRES_dot_AP1=ZERO;
227     P_PRES_dot_AP2=ZERO;
228 jgs 150 }
229     #pragma omp barrier
230 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2) schedule(static)
231     for (z=0;z<n;++z) {
232     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
233     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
234     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
235     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
236 jgs 150 }
237     #pragma omp master
238     {
239 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
240     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
241     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
242     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
243 jgs 150 }
244 jgs 154 } else if (order==4) {
245 jgs 150 #pragma omp master
246     {
247 jgs 154 R_PRES_dot_P=ZERO;
248     P_PRES_dot_AP0=ZERO;
249     P_PRES_dot_AP1=ZERO;
250     P_PRES_dot_AP2=ZERO;
251     P_PRES_dot_AP3=ZERO;
252 jgs 150 }
253     #pragma omp barrier
254 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3) schedule(static)
255     for (z=0;z<n;++z) {
256     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
257     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
258     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
259     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
260     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
261 jgs 150 }
262     #pragma omp master
263     {
264 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
265     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
266     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
267     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
268     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
269 jgs 150 }
270 jgs 154 } else if (order==5) {
271 jgs 150 #pragma omp master
272     {
273 jgs 154 R_PRES_dot_P=ZERO;
274     P_PRES_dot_AP0=ZERO;
275     P_PRES_dot_AP1=ZERO;
276     P_PRES_dot_AP2=ZERO;
277     P_PRES_dot_AP3=ZERO;
278     P_PRES_dot_AP4=ZERO;
279 jgs 150 }
280     #pragma omp barrier
281 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4) schedule(static)
282     for (z=0;z<n;++z) {
283     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
284     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
285     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
286     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
287     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
288     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
289     }
290 jgs 150 #pragma omp master
291     {
292 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
293     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
294     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
295     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
296     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
297     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
298 jgs 150 }
299 jgs 154 } else if (order==6) {
300 jgs 150 #pragma omp master
301     {
302 jgs 154 R_PRES_dot_P=ZERO;
303     P_PRES_dot_AP0=ZERO;
304     P_PRES_dot_AP1=ZERO;
305     P_PRES_dot_AP2=ZERO;
306     P_PRES_dot_AP3=ZERO;
307     P_PRES_dot_AP4=ZERO;
308     P_PRES_dot_AP5=ZERO;
309 jgs 150 }
310     #pragma omp barrier
311 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5) schedule(static)
312     for (z=0;z<n;++z) {
313     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
314     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
315     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
316     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
317     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
318     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
319     P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
320 jgs 150 }
321     #pragma omp master
322     {
323 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
324     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
325     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
326     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
327     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
328     P_PRES_dot_AP[5]=P_PRES_dot_AP5;
329     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
330 jgs 150 }
331 jgs 154 } else if (order>6) {
332 jgs 150 #pragma omp master
333     {
334 jgs 154 R_PRES_dot_P=ZERO;
335     P_PRES_dot_AP0=ZERO;
336     P_PRES_dot_AP1=ZERO;
337     P_PRES_dot_AP2=ZERO;
338     P_PRES_dot_AP3=ZERO;
339     P_PRES_dot_AP4=ZERO;
340     P_PRES_dot_AP5=ZERO;
341     P_PRES_dot_AP6=ZERO;
342 jgs 150 }
343     #pragma omp barrier
344 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6) schedule(static)
345     for (z=0;z<n;++z) {
346     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
347     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
348     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
349     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
350     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
351     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
352     P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
353     P_PRES_dot_AP6+=P_PRES[6][z]*AP[z];
354     }
355     #pragma omp master
356     {
357     P_PRES_dot_AP[0]=P_PRES_dot_AP0;
358     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
359     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
360     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
361     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
362     P_PRES_dot_AP[5]=P_PRES_dot_AP5;
363     P_PRES_dot_AP[6]=P_PRES_dot_AP6;
364     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
365    
366     P_PRES_dot_AP0=ZERO;
367 jgs 150 }
368 jgs 154 for (i=7;i<order;++i) {
369     #pragma omp barrier
370     #pragma omp for private(z) reduction(+:P_PRES_dot_AP0) schedule(static)
371     for (z=0;z<n;++z) P_PRES_dot_AP0+=P_PRES[i][z]*AP[z];
372 jgs 150 #pragma omp master
373     {
374 jgs 154 P_PRES_dot_AP[i]=P_PRES_dot_AP0;
375     P_PRES_dot_AP0=ZERO;
376 jgs 150 }
377     }
378     }
379 jgs 154 /* this fixes a problem with the intel compiler */
380     #pragma omp master
381     P_PRES_dot_AP0=R_PRES_dot_P_PRES[0];
382     /*** if sum_BREAKF is equal to zero a breakdown occurs.
383     *** iteration procedure can be continued but R_PRES is not the
384     *** residual of X_PRES approximation.
385     ***/
386 gross 432 #pragma omp barrier
387 jgs 154 sum_BREAKF=0.;
388     for (i=0;i<order;++i) sum_BREAKF +=BREAKF[i];
389     breakFlag=!((ABS(P_PRES_dot_AP0) > ZERO) && (sum_BREAKF >ZERO));
390     if (!breakFlag) {
391     breakFlag=FALSE;
392     /***
393     ***** X_PRES and R_PRES are moved to memory:
394     ***/
395     #pragma omp master
396 jgs 150 {
397 jgs 154 Factor=0.;
398     for (i=0;i<order;++i) {
399     ALPHA[i]=-P_PRES_dot_AP[i]/R_PRES_dot_P_PRES[i];
400     Factor+=BREAKF[i]*ALPHA[i];
401     }
402    
403     save_R_PRES_dot_P_PRES=R_PRES_dot_P_PRES[Length_of_mem-1];
404     save_R_PRES=R_PRES[Length_of_mem-1];
405     save_XPRES=X_PRES[Length_of_mem-1];
406     save_P_PRES=P_PRES[Length_of_mem-1];
407     for (i=Length_of_mem-1;i>0;--i) {
408     BREAKF[i]=BREAKF[i-1];
409     R_PRES_dot_P_PRES[i]=R_PRES_dot_P_PRES[i-1];
410     R_PRES[i]=R_PRES[i-1];
411     X_PRES[i]=X_PRES[i-1];
412     P_PRES[i]=P_PRES[i-1];
413     }
414     R_PRES_dot_P_PRES[0]=save_R_PRES_dot_P_PRES;
415     R_PRES[0]=save_R_PRES;
416     X_PRES[0]=save_XPRES;
417     P_PRES[0]=save_P_PRES;
418    
419     if (ABS(Factor)<=ZERO) {
420     Factor=1.;
421     BREAKF[0]=ZERO;
422     } else {
423     Factor=1./Factor;
424     BREAKF[0]=ONE;
425     }
426     for (i=0;i<order;++i) ALPHA[i]*=Factor;
427 jgs 150 }
428     /*
429 jgs 154 ***** update of solution X_PRES and its residual R_PRES:
430 jgs 150 ***
431     *** P is used to accumulate X and AP to accumulate R. X and R
432     *** are still needed until they are put into the X and R memory
433 jgs 154 *** R_PRES and X_PRES
434 jgs 150 ***
435     **/
436     #pragma omp barrier
437 jgs 154 breakf0=BREAKF[0];
438     if (order==0) {
439 jgs 150 #pragma omp for private(z) schedule(static)
440 jgs 154 for (z=0;z<n;++z) {
441     R_PRES[0][z]= Factor* AP[z];
442     X_PRES[0][z]=-Factor*P_PRES[1][z];
443     }
444     } else if (order==1) {
445 jgs 150 #pragma omp for private(z) schedule(static)
446 jgs 154 for (z=0;z<n;++z) {
447     R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z];
448     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z];
449     }
450     } else if (order==2) {
451 jgs 150 #pragma omp for private(z) schedule(static)
452 jgs 154 for (z=0;z<n;++z) {
453     R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z]
454     +ALPHA[1]*R_PRES[2][z];
455     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
456     +ALPHA[1]*X_PRES[2][z];
457     }
458     } else if (order==3) {
459 jgs 150 #pragma omp for private(z) schedule(static)
460 jgs 154 for (z=0;z<n;++z) {
461     R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
462     +ALPHA[1]*R_PRES[2][z]
463     +ALPHA[2]*R_PRES[3][z];
464     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
465     +ALPHA[1]*X_PRES[2][z]
466     +ALPHA[2]*X_PRES[3][z];
467     }
468     } else if (order==4) {
469 jgs 150 #pragma omp for private(z) schedule(static)
470 jgs 154 for (z=0;z<n;++z) {
471     R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
472     +ALPHA[1]*R_PRES[2][z]
473     +ALPHA[2]*R_PRES[3][z]
474     +ALPHA[3]*R_PRES[4][z];
475     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
476     +ALPHA[1]*X_PRES[2][z]
477     +ALPHA[2]*X_PRES[3][z]
478     +ALPHA[3]*X_PRES[4][z];
479     }
480     } else if (order==5) {
481 jgs 150 #pragma omp for private(z) schedule(static)
482 jgs 154 for (z=0;z<n;++z) {
483     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
484     +ALPHA[1]*R_PRES[2][z]
485     +ALPHA[2]*R_PRES[3][z]
486     +ALPHA[3]*R_PRES[4][z]
487     +ALPHA[4]*R_PRES[5][z];
488     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
489     +ALPHA[1]*X_PRES[2][z]
490     +ALPHA[2]*X_PRES[3][z]
491     +ALPHA[3]*X_PRES[4][z]
492     +ALPHA[4]*X_PRES[5][z];
493     }
494     } else if (order==6) {
495 jgs 150 #pragma omp for private(z) schedule(static)
496 jgs 154 for (z=0;z<n;++z) {
497     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
498     +ALPHA[1]*R_PRES[2][z]
499     +ALPHA[2]*R_PRES[3][z]
500     +ALPHA[3]*R_PRES[4][z]
501     +ALPHA[4]*R_PRES[5][z]
502     +ALPHA[5]*R_PRES[6][z];
503     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
504     +ALPHA[1]*X_PRES[2][z]
505     +ALPHA[2]*X_PRES[3][z]
506     +ALPHA[3]*X_PRES[4][z]
507     +ALPHA[4]*X_PRES[5][z]
508     +ALPHA[5]*X_PRES[6][z];
509     }
510     } else if (order>6) {
511 jgs 150 #pragma omp for private(z) schedule(static)
512 jgs 154 for (z=0;z<n;++z) {
513     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
514     +ALPHA[1]*R_PRES[2][z]
515     +ALPHA[2]*R_PRES[3][z]
516     +ALPHA[3]*R_PRES[4][z]
517     +ALPHA[4]*R_PRES[5][z]
518     +ALPHA[5]*R_PRES[6][z]
519     +ALPHA[6]*R_PRES[7][z];
520     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
521     +ALPHA[1]*X_PRES[2][z]
522     +ALPHA[2]*X_PRES[3][z]
523     +ALPHA[3]*X_PRES[4][z]
524     +ALPHA[4]*X_PRES[5][z]
525     +ALPHA[5]*X_PRES[6][z]
526     +ALPHA[6]*X_PRES[7][z];
527     }
528     for (i=7;i<order;++i) {
529 jgs 150 #pragma omp for private(z) schedule(static)
530 jgs 154 for (z=0;z<n;++z) {
531     R_PRES[0][z]+=ALPHA[i]*R_PRES[i+1][z];
532     X_PRES[0][z]+=ALPHA[i]*X_PRES[i+1][z];
533     }
534 jgs 150 }
535     }
536 jgs 154 if (breakf0>0.) {
537 jgs 150 /***
538 jgs 154 ***** calculate gamma from min_(gamma){|R+gamma*(R_PRES-R)|_2}:
539 jgs 150 ***/
540     #pragma omp master
541     {
542     SC1=ZERO;
543     SC2=ZERO;
544     L2_R=ZERO;
545     }
546     #pragma omp barrier
547     #pragma omp for private(z,diff) reduction(+:SC1,SC2) schedule(static)
548 jgs 154 for (z=0;z<n;++z) {
549     diff=R_PRES[0][z]-r[z];
550 jgs 150 SC1+=diff*diff;
551     SC2+=diff*r[z];
552     }
553 jgs 154 gamma=(SC1<=ZERO) ? ZERO : -SC2/SC1;
554 jgs 150 #pragma omp for private(z) reduction(+:L2_R) schedule(static)
555 jgs 154 for (z=0;z<n;++z) {
556     x[z]+=gamma*(X_PRES[0][z]-x[z]);
557     r[z]+=gamma*(R_PRES[0][z]-r[z]);
558 jgs 150 L2_R+=r[z]*r[z];
559     }
560     norm_of_residual=sqrt(L2_R);
561     convergeFlag = (norm_of_residual <= tol);
562 jgs 154 if (restart>0) restartFlag=(num_iter_restart >= restart);
563     } else {
564     convergeFlag=FALSE;
565     restartFlag=FALSE;
566 jgs 150 }
567 jgs 154 maxIterFlag = (num_iter >= maxit);
568     }
569     }
570     /* end of iteration */
571     #pragma omp master
572     {
573     Norm_of_residual_global=norm_of_residual;
574     Num_iter_global=num_iter;
575 jgs 150 if (maxIterFlag) {
576 jgs 154 Status = SOLVER_MAXITER_REACHED;
577 jgs 150 } else if (breakFlag) {
578 jgs 154 Status = SOLVER_BREAKDOWN;
579     }
580 jgs 150 }
581     } /* end of parallel region */
582 jgs 154 for (i=0;i<Length_of_recursion;i++) {
583     TMPMEMFREE(X_PRES[i]);
584     TMPMEMFREE(R_PRES[i]);
585     TMPMEMFREE(P_PRES[i]);
586 jgs 150 }
587 gross 1028 TMPMEMFREE(AP);
588     TMPMEMFREE(X_PRES);
589     TMPMEMFREE(R_PRES);
590     TMPMEMFREE(P_PRES);
591     TMPMEMFREE(P_PRES_dot_AP);
592     TMPMEMFREE(R_PRES_dot_P_PRES);
593     TMPMEMFREE(BREAKF);
594     TMPMEMFREE(ALPHA);
595 jgs 154 *iter=Num_iter_global;
596     *tolerance=Norm_of_residual_global;
597 jgs 150 }
598 jgs 154 return Status;
599 jgs 150 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26