/[escript]/trunk/paso/src/GMRES.c
ViewVC logotype

Annotation of /trunk/paso/src/GMRES.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1384 - (hide annotations)
Fri Jan 11 02:29:38 2008 UTC (12 years, 1 month ago) by phornby
Original Path: temp_trunk_copy/paso/src/GMRES.c
File MIME type: text/plain
File size: 24161 byte(s)
Make a temp copy of the trunk before checking in the windows changes


1 ksteube 1312
2 jgs 150 /* $Id$ */
3    
4 ksteube 1312 /*******************************************************
5     *
6     * Copyright 2003-2007 by ACceSS MNRF
7     * Copyright 2007 by University of Queensland
8     *
9     * http://esscc.uq.edu.au
10     * Primary Business: Queensland, Australia
11     * Licensed under the Open Software License version 3.0
12     * http://www.opensource.org/licenses/osl-3.0.php
13     *
14     *******************************************************/
15 dhawcroft 631
16 jgs 150 /*
17     * Purpose
18     * =======
19     *
20     * GMRES solves the linear system A*x=b using the
21     * truncated and restered GMRES method with preconditioning.
22     *
23     * Convergence test: norm( b - A*x )< TOL.
24     *
25     *
26     *
27     * Arguments
28     * =========
29     *
30 jgs 154 * r (input/output) double array, dimension n.
31 jgs 150 * On entry, residual of inital guess X
32     *
33 jgs 154 * x (input/output) double array, dimension n.
34 jgs 150 * On input, the initial guess.
35     *
36     * iter (input/output) int
37     * On input, the maximum num_iterations to be performed.
38     * On output, actual number of num_iterations performed.
39     *
40     * tolerance (input/output) DOUBLE PRECISION
41     * On input, the allowable convergence measure for
42     * norm( b - A*x )
43     * On output, the final value of this measure.
44     *
45 jgs 154 * Length_of_recursion (input) gives the number of residual to be kept in orthogonalization process
46 jgs 150 *
47     * restart (input) If restart>0, iteration is resterted a after restart steps.
48     *
49     * INFO (output) int
50     *
51     * =SOLVER_NO_ERROR: Successful exit. num_iterated approximate solution returned.
52 jgs 154 * =SOLVER_MAXNUM_ITER_REACHED
53 jgs 150 * =SOLVER_INPUT_ERROR Illegal parameter:
54 jgs 154 * =SOLVER_BREAKDOWN: bad luck!
55     * =SOLVER_MEMORY_ERROR : no memory available
56 jgs 150 *
57     * ==============================================================
58     */
59    
60 gross 700 #include "Common.h"
61     #include "SystemMatrix.h"
62 jgs 150 #include "Solver.h"
63     #ifdef _OPENMP
64     #include <omp.h>
65     #endif
66    
67     err_t Paso_Solver_GMRES(
68     Paso_SystemMatrix * A,
69     double * r,
70     double * x,
71     dim_t *iter,
72 gross 584 double * tolerance,dim_t Length_of_recursion,dim_t restart,
73     Paso_Performance* pp) {
74 jgs 150
75     /* Local variables */
76    
77 gross 1028 double *AP,**X_PRES,**R_PRES,**P_PRES;
78     double *P_PRES_dot_AP,*R_PRES_dot_P_PRES,*BREAKF,*ALPHA;
79 jgs 154 double P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6,R_PRES_dot_P,abs_RP,breakf0;
80     double tol,Factor,sum_BREAKF,gamma,SC1,SC2,norm_of_residual,diff,L2_R,Norm_of_residual_global;
81     double *save_XPRES, *save_P_PRES, *save_R_PRES,save_R_PRES_dot_P_PRES;
82     dim_t maxit,Num_iter_global,num_iter_restart,num_iter;
83 gross 1028 dim_t i,z,order,n, Length_of_mem;
84 jgs 150 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE,restartFlag=FALSE;
85 jgs 154 err_t Status=SOLVER_NO_ERROR;
86 jgs 150
87 gross 1028
88 jgs 150 /* adapt original routine parameters */
89 ksteube 1312 n = Paso_SystemMatrix_getTotalNumRows(A);
90 gross 1028 Length_of_mem=MAX(Length_of_recursion,0)+1;
91 jgs 150
92     /* Test the input parameters. */
93 jgs 154 if (restart>0) restart=MAX(Length_of_recursion,restart);
94     if (n < 0 || Length_of_recursion<=0) {
95 jgs 150 return SOLVER_INPUT_ERROR;
96     }
97    
98     /* allocate memory: */
99 gross 1028 X_PRES=TMPMEMALLOC(Length_of_mem,double*);
100     R_PRES=TMPMEMALLOC(Length_of_mem,double*);
101     P_PRES=TMPMEMALLOC(Length_of_mem,double*);
102     P_PRES_dot_AP=TMPMEMALLOC(Length_of_mem,double);
103     R_PRES_dot_P_PRES=TMPMEMALLOC(Length_of_mem,double);
104     BREAKF=TMPMEMALLOC(Length_of_mem,double);
105     ALPHA=TMPMEMALLOC(Length_of_mem,double);
106 jgs 154 AP=TMPMEMALLOC(n,double);
107 gross 1028 if (AP==NULL || X_PRES ==NULL || R_PRES == NULL || P_PRES == NULL ||
108     P_PRES_dot_AP == NULL || R_PRES_dot_P_PRES ==NULL || BREAKF == NULL || ALPHA == NULL) {
109     Status=SOLVER_MEMORY_ERROR;
110     } else {
111     for (i=0;i<Length_of_mem;i++) {
112     X_PRES[i]=TMPMEMALLOC(n,double);
113     R_PRES[i]=TMPMEMALLOC(n,double);
114     P_PRES[i]=TMPMEMALLOC(n,double);
115     if (X_PRES[i]==NULL || R_PRES[i]==NULL || P_PRES[i]==NULL) Status=SOLVER_MEMORY_ERROR;
116     }
117 jgs 150 }
118 jgs 154 if ( Status ==SOLVER_NO_ERROR ) {
119 jgs 150
120     /* now PRES starts : */
121     maxit=*iter;
122     tol=*tolerance;
123    
124 jgs 154 #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \
125     private(num_iter,i,num_iter_restart,order,sum_BREAKF,gamma,restartFlag,norm_of_residual,abs_RP,breakf0,\
126     save_XPRES,save_P_PRES,save_R_PRES,save_R_PRES_dot_P_PRES)
127 jgs 150 {
128     /* initialization */
129    
130     restartFlag=TRUE;
131     num_iter=0;
132     #pragma omp for private(z) schedule(static) nowait
133 jgs 154 for (z=0; z < n; ++z) AP[z]=0;
134     for(i=0;i<Length_of_mem;++i) {
135 jgs 150 #pragma omp for private(z) schedule(static) nowait
136 jgs 154 for (z=0; z < n; ++z) {
137     P_PRES[i][z]=0;
138     R_PRES[i][z]=0;
139     X_PRES[i][z]=0;
140     }
141 jgs 150 }
142    
143 jgs 154 while (! (convergeFlag || maxIterFlag || breakFlag)) {
144 jgs 150 #pragma omp barrier
145     if (restartFlag) {
146 jgs 154 #pragma omp master
147     BREAKF[0]=ONE;
148 jgs 150 #pragma omp for private(z) schedule(static) nowait
149 jgs 154 for (z=0; z < n; ++z) {
150     R_PRES[0][z]=r[z];
151     X_PRES[0][z]=x[z];
152     }
153 jgs 150 num_iter_restart=0;
154     restartFlag=FALSE;
155     }
156     ++num_iter;
157     ++num_iter_restart;
158 jgs 154 /* order is the dimension of the space on which the residual is minimized: */
159     order=MIN(num_iter_restart,Length_of_recursion);
160 jgs 150 /***
161 jgs 154 *** calculate new search direction P from R_PRES
162 jgs 150 ***/
163 gross 432 #pragma omp barrier
164 jgs 154 Paso_Solver_solvePreconditioner(A,&P_PRES[0][0], &R_PRES[0][0]);
165 jgs 150 /***
166     *** apply A to P to get AP
167     ***/
168 gross 432 #pragma omp barrier
169 gross 415 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &P_PRES[0][0],ZERO, &AP[0]);
170 jgs 150 /***
171     ***** calculation of the norm of R and the scalar products of
172     *** the residuals and A*P:
173     ***/
174 jgs 154 if (order==0) {
175 jgs 150 #pragma omp master
176 jgs 154 R_PRES_dot_P=ZERO;
177 jgs 150 #pragma omp barrier
178 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P) schedule(static)
179     for (z=0;z<n;++z)
180     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
181 jgs 150 #pragma omp master
182 jgs 154 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
183     } else if (order==1) {
184     #pragma omp master
185 jgs 150 {
186 jgs 154 R_PRES_dot_P=ZERO;
187     P_PRES_dot_AP0=ZERO;
188 jgs 150 }
189     #pragma omp barrier
190 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0) schedule(static)
191     for (z=0;z<n;++z) {
192     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
193     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
194 jgs 150 }
195     #pragma omp master
196     {
197 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
198     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
199 jgs 150 }
200 jgs 154 } else if (order==2) {
201 jgs 150 #pragma omp master
202     {
203 jgs 154 R_PRES_dot_P=ZERO;
204     P_PRES_dot_AP0=ZERO;
205     P_PRES_dot_AP1=ZERO;
206 jgs 150 }
207     #pragma omp barrier
208 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1) schedule(static)
209     for (z=0;z<n;++z) {
210     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
211     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
212     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
213 jgs 150 }
214     #pragma omp master
215     {
216 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
217     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
218     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
219 jgs 150 }
220 jgs 154 } else if (order==3) {
221 jgs 150 #pragma omp master
222     {
223 jgs 154 R_PRES_dot_P=ZERO;
224     P_PRES_dot_AP0=ZERO;
225     P_PRES_dot_AP1=ZERO;
226     P_PRES_dot_AP2=ZERO;
227 jgs 150 }
228     #pragma omp barrier
229 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2) schedule(static)
230     for (z=0;z<n;++z) {
231     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
232     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
233     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
234     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
235 jgs 150 }
236     #pragma omp master
237     {
238 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
239     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
240     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
241     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
242 jgs 150 }
243 jgs 154 } else if (order==4) {
244 jgs 150 #pragma omp master
245     {
246 jgs 154 R_PRES_dot_P=ZERO;
247     P_PRES_dot_AP0=ZERO;
248     P_PRES_dot_AP1=ZERO;
249     P_PRES_dot_AP2=ZERO;
250     P_PRES_dot_AP3=ZERO;
251 jgs 150 }
252     #pragma omp barrier
253 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3) schedule(static)
254     for (z=0;z<n;++z) {
255     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
256     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
257     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
258     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
259     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
260 jgs 150 }
261     #pragma omp master
262     {
263 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
264     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
265     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
266     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
267     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
268 jgs 150 }
269 jgs 154 } else if (order==5) {
270 jgs 150 #pragma omp master
271     {
272 jgs 154 R_PRES_dot_P=ZERO;
273     P_PRES_dot_AP0=ZERO;
274     P_PRES_dot_AP1=ZERO;
275     P_PRES_dot_AP2=ZERO;
276     P_PRES_dot_AP3=ZERO;
277     P_PRES_dot_AP4=ZERO;
278 jgs 150 }
279     #pragma omp barrier
280 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4) schedule(static)
281     for (z=0;z<n;++z) {
282     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
283     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
284     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
285     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
286     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
287     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
288     }
289 jgs 150 #pragma omp master
290     {
291 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
292     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
293     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
294     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
295     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
296     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
297 jgs 150 }
298 jgs 154 } else if (order==6) {
299 jgs 150 #pragma omp master
300     {
301 jgs 154 R_PRES_dot_P=ZERO;
302     P_PRES_dot_AP0=ZERO;
303     P_PRES_dot_AP1=ZERO;
304     P_PRES_dot_AP2=ZERO;
305     P_PRES_dot_AP3=ZERO;
306     P_PRES_dot_AP4=ZERO;
307     P_PRES_dot_AP5=ZERO;
308 jgs 150 }
309     #pragma omp barrier
310 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5) schedule(static)
311     for (z=0;z<n;++z) {
312     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
313     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
314     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
315     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
316     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
317     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
318     P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
319 jgs 150 }
320     #pragma omp master
321     {
322 jgs 154 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
323     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
324     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
325     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
326     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
327     P_PRES_dot_AP[5]=P_PRES_dot_AP5;
328     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
329 jgs 150 }
330 jgs 154 } else if (order>6) {
331 jgs 150 #pragma omp master
332     {
333 jgs 154 R_PRES_dot_P=ZERO;
334     P_PRES_dot_AP0=ZERO;
335     P_PRES_dot_AP1=ZERO;
336     P_PRES_dot_AP2=ZERO;
337     P_PRES_dot_AP3=ZERO;
338     P_PRES_dot_AP4=ZERO;
339     P_PRES_dot_AP5=ZERO;
340     P_PRES_dot_AP6=ZERO;
341 jgs 150 }
342     #pragma omp barrier
343 jgs 154 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6) schedule(static)
344     for (z=0;z<n;++z) {
345     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
346     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
347     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
348     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
349     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
350     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
351     P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
352     P_PRES_dot_AP6+=P_PRES[6][z]*AP[z];
353     }
354     #pragma omp master
355     {
356     P_PRES_dot_AP[0]=P_PRES_dot_AP0;
357     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
358     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
359     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
360     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
361     P_PRES_dot_AP[5]=P_PRES_dot_AP5;
362     P_PRES_dot_AP[6]=P_PRES_dot_AP6;
363     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
364    
365     P_PRES_dot_AP0=ZERO;
366 jgs 150 }
367 jgs 154 for (i=7;i<order;++i) {
368     #pragma omp barrier
369     #pragma omp for private(z) reduction(+:P_PRES_dot_AP0) schedule(static)
370     for (z=0;z<n;++z) P_PRES_dot_AP0+=P_PRES[i][z]*AP[z];
371 jgs 150 #pragma omp master
372     {
373 jgs 154 P_PRES_dot_AP[i]=P_PRES_dot_AP0;
374     P_PRES_dot_AP0=ZERO;
375 jgs 150 }
376     }
377     }
378 jgs 154 /* this fixes a problem with the intel compiler */
379     #pragma omp master
380     P_PRES_dot_AP0=R_PRES_dot_P_PRES[0];
381     /*** if sum_BREAKF is equal to zero a breakdown occurs.
382     *** iteration procedure can be continued but R_PRES is not the
383     *** residual of X_PRES approximation.
384     ***/
385 gross 432 #pragma omp barrier
386 jgs 154 sum_BREAKF=0.;
387     for (i=0;i<order;++i) sum_BREAKF +=BREAKF[i];
388     breakFlag=!((ABS(P_PRES_dot_AP0) > ZERO) && (sum_BREAKF >ZERO));
389     if (!breakFlag) {
390     breakFlag=FALSE;
391     /***
392     ***** X_PRES and R_PRES are moved to memory:
393     ***/
394     #pragma omp master
395 jgs 150 {
396 jgs 154 Factor=0.;
397     for (i=0;i<order;++i) {
398     ALPHA[i]=-P_PRES_dot_AP[i]/R_PRES_dot_P_PRES[i];
399     Factor+=BREAKF[i]*ALPHA[i];
400     }
401    
402     save_R_PRES_dot_P_PRES=R_PRES_dot_P_PRES[Length_of_mem-1];
403     save_R_PRES=R_PRES[Length_of_mem-1];
404     save_XPRES=X_PRES[Length_of_mem-1];
405     save_P_PRES=P_PRES[Length_of_mem-1];
406     for (i=Length_of_mem-1;i>0;--i) {
407     BREAKF[i]=BREAKF[i-1];
408     R_PRES_dot_P_PRES[i]=R_PRES_dot_P_PRES[i-1];
409     R_PRES[i]=R_PRES[i-1];
410     X_PRES[i]=X_PRES[i-1];
411     P_PRES[i]=P_PRES[i-1];
412     }
413     R_PRES_dot_P_PRES[0]=save_R_PRES_dot_P_PRES;
414     R_PRES[0]=save_R_PRES;
415     X_PRES[0]=save_XPRES;
416     P_PRES[0]=save_P_PRES;
417    
418     if (ABS(Factor)<=ZERO) {
419     Factor=1.;
420     BREAKF[0]=ZERO;
421     } else {
422     Factor=1./Factor;
423     BREAKF[0]=ONE;
424     }
425     for (i=0;i<order;++i) ALPHA[i]*=Factor;
426 jgs 150 }
427     /*
428 jgs 154 ***** update of solution X_PRES and its residual R_PRES:
429 jgs 150 ***
430     *** P is used to accumulate X and AP to accumulate R. X and R
431     *** are still needed until they are put into the X and R memory
432 jgs 154 *** R_PRES and X_PRES
433 jgs 150 ***
434     **/
435     #pragma omp barrier
436 jgs 154 breakf0=BREAKF[0];
437     if (order==0) {
438 jgs 150 #pragma omp for private(z) schedule(static)
439 jgs 154 for (z=0;z<n;++z) {
440     R_PRES[0][z]= Factor* AP[z];
441     X_PRES[0][z]=-Factor*P_PRES[1][z];
442     }
443     } else if (order==1) {
444 jgs 150 #pragma omp for private(z) schedule(static)
445 jgs 154 for (z=0;z<n;++z) {
446     R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z];
447     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z];
448     }
449     } else if (order==2) {
450 jgs 150 #pragma omp for private(z) schedule(static)
451 jgs 154 for (z=0;z<n;++z) {
452     R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z]
453     +ALPHA[1]*R_PRES[2][z];
454     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
455     +ALPHA[1]*X_PRES[2][z];
456     }
457     } else if (order==3) {
458 jgs 150 #pragma omp for private(z) schedule(static)
459 jgs 154 for (z=0;z<n;++z) {
460     R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
461     +ALPHA[1]*R_PRES[2][z]
462     +ALPHA[2]*R_PRES[3][z];
463     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
464     +ALPHA[1]*X_PRES[2][z]
465     +ALPHA[2]*X_PRES[3][z];
466     }
467     } else if (order==4) {
468 jgs 150 #pragma omp for private(z) schedule(static)
469 jgs 154 for (z=0;z<n;++z) {
470     R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
471     +ALPHA[1]*R_PRES[2][z]
472     +ALPHA[2]*R_PRES[3][z]
473     +ALPHA[3]*R_PRES[4][z];
474     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
475     +ALPHA[1]*X_PRES[2][z]
476     +ALPHA[2]*X_PRES[3][z]
477     +ALPHA[3]*X_PRES[4][z];
478     }
479     } else if (order==5) {
480 jgs 150 #pragma omp for private(z) schedule(static)
481 jgs 154 for (z=0;z<n;++z) {
482     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
483     +ALPHA[1]*R_PRES[2][z]
484     +ALPHA[2]*R_PRES[3][z]
485     +ALPHA[3]*R_PRES[4][z]
486     +ALPHA[4]*R_PRES[5][z];
487     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
488     +ALPHA[1]*X_PRES[2][z]
489     +ALPHA[2]*X_PRES[3][z]
490     +ALPHA[3]*X_PRES[4][z]
491     +ALPHA[4]*X_PRES[5][z];
492     }
493     } else if (order==6) {
494 jgs 150 #pragma omp for private(z) schedule(static)
495 jgs 154 for (z=0;z<n;++z) {
496     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
497     +ALPHA[1]*R_PRES[2][z]
498     +ALPHA[2]*R_PRES[3][z]
499     +ALPHA[3]*R_PRES[4][z]
500     +ALPHA[4]*R_PRES[5][z]
501     +ALPHA[5]*R_PRES[6][z];
502     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
503     +ALPHA[1]*X_PRES[2][z]
504     +ALPHA[2]*X_PRES[3][z]
505     +ALPHA[3]*X_PRES[4][z]
506     +ALPHA[4]*X_PRES[5][z]
507     +ALPHA[5]*X_PRES[6][z];
508     }
509     } else if (order>6) {
510 jgs 150 #pragma omp for private(z) schedule(static)
511 jgs 154 for (z=0;z<n;++z) {
512     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
513     +ALPHA[1]*R_PRES[2][z]
514     +ALPHA[2]*R_PRES[3][z]
515     +ALPHA[3]*R_PRES[4][z]
516     +ALPHA[4]*R_PRES[5][z]
517     +ALPHA[5]*R_PRES[6][z]
518     +ALPHA[6]*R_PRES[7][z];
519     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
520     +ALPHA[1]*X_PRES[2][z]
521     +ALPHA[2]*X_PRES[3][z]
522     +ALPHA[3]*X_PRES[4][z]
523     +ALPHA[4]*X_PRES[5][z]
524     +ALPHA[5]*X_PRES[6][z]
525     +ALPHA[6]*X_PRES[7][z];
526     }
527     for (i=7;i<order;++i) {
528 jgs 150 #pragma omp for private(z) schedule(static)
529 jgs 154 for (z=0;z<n;++z) {
530     R_PRES[0][z]+=ALPHA[i]*R_PRES[i+1][z];
531     X_PRES[0][z]+=ALPHA[i]*X_PRES[i+1][z];
532     }
533 jgs 150 }
534     }
535 jgs 154 if (breakf0>0.) {
536 jgs 150 /***
537 jgs 154 ***** calculate gamma from min_(gamma){|R+gamma*(R_PRES-R)|_2}:
538 jgs 150 ***/
539     #pragma omp master
540     {
541     SC1=ZERO;
542     SC2=ZERO;
543     L2_R=ZERO;
544     }
545     #pragma omp barrier
546     #pragma omp for private(z,diff) reduction(+:SC1,SC2) schedule(static)
547 jgs 154 for (z=0;z<n;++z) {
548     diff=R_PRES[0][z]-r[z];
549 jgs 150 SC1+=diff*diff;
550     SC2+=diff*r[z];
551     }
552 jgs 154 gamma=(SC1<=ZERO) ? ZERO : -SC2/SC1;
553 jgs 150 #pragma omp for private(z) reduction(+:L2_R) schedule(static)
554 jgs 154 for (z=0;z<n;++z) {
555     x[z]+=gamma*(X_PRES[0][z]-x[z]);
556     r[z]+=gamma*(R_PRES[0][z]-r[z]);
557 jgs 150 L2_R+=r[z]*r[z];
558     }
559     norm_of_residual=sqrt(L2_R);
560     convergeFlag = (norm_of_residual <= tol);
561 jgs 154 if (restart>0) restartFlag=(num_iter_restart >= restart);
562     } else {
563     convergeFlag=FALSE;
564     restartFlag=FALSE;
565 jgs 150 }
566 jgs 154 maxIterFlag = (num_iter >= maxit);
567     }
568     }
569     /* end of iteration */
570     #pragma omp master
571     {
572     Norm_of_residual_global=norm_of_residual;
573     Num_iter_global=num_iter;
574 jgs 150 if (maxIterFlag) {
575 jgs 154 Status = SOLVER_MAXITER_REACHED;
576 jgs 150 } else if (breakFlag) {
577 jgs 154 Status = SOLVER_BREAKDOWN;
578     }
579 jgs 150 }
580     } /* end of parallel region */
581 jgs 154 for (i=0;i<Length_of_recursion;i++) {
582     TMPMEMFREE(X_PRES[i]);
583     TMPMEMFREE(R_PRES[i]);
584     TMPMEMFREE(P_PRES[i]);
585 jgs 150 }
586 gross 1028 TMPMEMFREE(AP);
587     TMPMEMFREE(X_PRES);
588     TMPMEMFREE(R_PRES);
589     TMPMEMFREE(P_PRES);
590     TMPMEMFREE(P_PRES_dot_AP);
591     TMPMEMFREE(R_PRES_dot_P_PRES);
592     TMPMEMFREE(BREAKF);
593     TMPMEMFREE(ALPHA);
594 jgs 154 *iter=Num_iter_global;
595     *tolerance=Norm_of_residual_global;
596 jgs 150 }
597 jgs 154 return Status;
598 jgs 150 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26