/[escript]/trunk/paso/src/GMRES.c
ViewVC logotype

Annotation of /trunk/paso/src/GMRES.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1556 - (hide annotations)
Mon May 12 00:54:58 2008 UTC (11 years, 9 months ago) by gross
File MIME type: text/plain
File size: 22221 byte(s)
Modification to allow mixed mode execution. 
In order to keep the code portable accross platform all MPI calls within
parallel regions have been moved. 


1 ksteube 1312
2 jgs 150 /* $Id$ */
3    
4 ksteube 1312 /*******************************************************
5     *
6     * Copyright 2003-2007 by ACceSS MNRF
7     * Copyright 2007 by University of Queensland
8     *
9     * http://esscc.uq.edu.au
10     * Primary Business: Queensland, Australia
11     * Licensed under the Open Software License version 3.0
12     * http://www.opensource.org/licenses/osl-3.0.php
13     *
14     *******************************************************/
15 dhawcroft 631
16 jgs 150 /*
17     * Purpose
18     * =======
19     *
20     * GMRES solves the linear system A*x=b using the
21     * truncated and restered GMRES method with preconditioning.
22     *
23     * Convergence test: norm( b - A*x )< TOL.
24     *
25     *
26     *
27     * Arguments
28     * =========
29     *
30 jgs 154 * r (input/output) double array, dimension n.
31 jgs 150 * On entry, residual of inital guess X
32     *
33 jgs 154 * x (input/output) double array, dimension n.
34 jgs 150 * On input, the initial guess.
35     *
36     * iter (input/output) int
37     * On input, the maximum num_iterations to be performed.
38     * On output, actual number of num_iterations performed.
39     *
40     * tolerance (input/output) DOUBLE PRECISION
41     * On input, the allowable convergence measure for
42     * norm( b - A*x )
43     * On output, the final value of this measure.
44     *
45 jgs 154 * Length_of_recursion (input) gives the number of residual to be kept in orthogonalization process
46 jgs 150 *
47     * restart (input) If restart>0, iteration is resterted a after restart steps.
48     *
49     * INFO (output) int
50     *
51     * =SOLVER_NO_ERROR: Successful exit. num_iterated approximate solution returned.
52 jgs 154 * =SOLVER_MAXNUM_ITER_REACHED
53 jgs 150 * =SOLVER_INPUT_ERROR Illegal parameter:
54 jgs 154 * =SOLVER_BREAKDOWN: bad luck!
55     * =SOLVER_MEMORY_ERROR : no memory available
56 jgs 150 *
57     * ==============================================================
58     */
59    
60 gross 700 #include "Common.h"
61     #include "SystemMatrix.h"
62 jgs 150 #include "Solver.h"
63     #ifdef _OPENMP
64     #include <omp.h>
65     #endif
66    
67     err_t Paso_Solver_GMRES(
68     Paso_SystemMatrix * A,
69     double * r,
70     double * x,
71     dim_t *iter,
72 gross 584 double * tolerance,dim_t Length_of_recursion,dim_t restart,
73     Paso_Performance* pp) {
74 jgs 150
75     /* Local variables */
76    
77 gross 1028 double *AP,**X_PRES,**R_PRES,**P_PRES;
78     double *P_PRES_dot_AP,*R_PRES_dot_P_PRES,*BREAKF,*ALPHA;
79 jgs 154 double P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6,R_PRES_dot_P,abs_RP,breakf0;
80     double tol,Factor,sum_BREAKF,gamma,SC1,SC2,norm_of_residual,diff,L2_R,Norm_of_residual_global;
81     double *save_XPRES, *save_P_PRES, *save_R_PRES,save_R_PRES_dot_P_PRES;
82     dim_t maxit,Num_iter_global,num_iter_restart,num_iter;
83 gross 1028 dim_t i,z,order,n, Length_of_mem;
84 jgs 150 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE,restartFlag=FALSE;
85 jgs 154 err_t Status=SOLVER_NO_ERROR;
86 jgs 150
87 gross 1028
88 jgs 150 /* adapt original routine parameters */
89 ksteube 1312 n = Paso_SystemMatrix_getTotalNumRows(A);
90 gross 1028 Length_of_mem=MAX(Length_of_recursion,0)+1;
91 jgs 150
92     /* Test the input parameters. */
93 jgs 154 if (restart>0) restart=MAX(Length_of_recursion,restart);
94     if (n < 0 || Length_of_recursion<=0) {
95 jgs 150 return SOLVER_INPUT_ERROR;
96     }
97    
98     /* allocate memory: */
99 gross 1028 X_PRES=TMPMEMALLOC(Length_of_mem,double*);
100     R_PRES=TMPMEMALLOC(Length_of_mem,double*);
101     P_PRES=TMPMEMALLOC(Length_of_mem,double*);
102     P_PRES_dot_AP=TMPMEMALLOC(Length_of_mem,double);
103     R_PRES_dot_P_PRES=TMPMEMALLOC(Length_of_mem,double);
104     BREAKF=TMPMEMALLOC(Length_of_mem,double);
105     ALPHA=TMPMEMALLOC(Length_of_mem,double);
106 jgs 154 AP=TMPMEMALLOC(n,double);
107 gross 1028 if (AP==NULL || X_PRES ==NULL || R_PRES == NULL || P_PRES == NULL ||
108     P_PRES_dot_AP == NULL || R_PRES_dot_P_PRES ==NULL || BREAKF == NULL || ALPHA == NULL) {
109     Status=SOLVER_MEMORY_ERROR;
110     } else {
111     for (i=0;i<Length_of_mem;i++) {
112     X_PRES[i]=TMPMEMALLOC(n,double);
113     R_PRES[i]=TMPMEMALLOC(n,double);
114     P_PRES[i]=TMPMEMALLOC(n,double);
115     if (X_PRES[i]==NULL || R_PRES[i]==NULL || P_PRES[i]==NULL) Status=SOLVER_MEMORY_ERROR;
116     }
117 jgs 150 }
118 jgs 154 if ( Status ==SOLVER_NO_ERROR ) {
119 jgs 150
120     /* now PRES starts : */
121     maxit=*iter;
122     tol=*tolerance;
123    
124 gross 1556 /* initialization */
125 jgs 150
126     restartFlag=TRUE;
127     num_iter=0;
128 gross 1556 #pragma omp parallel private(i)
129     {
130     #pragma omp for private(z) schedule(static) nowait
131     for (z=0; z < n; ++z) AP[z]=0;
132     for(i=0;i<Length_of_mem;++i) {
133     #pragma omp for private(z) schedule(static) nowait
134     for (z=0; z < n; ++z) {
135     P_PRES[i][z]=0;
136     R_PRES[i][z]=0;
137     X_PRES[i][z]=0;
138     }
139     }
140 jgs 150 }
141    
142 jgs 154 while (! (convergeFlag || maxIterFlag || breakFlag)) {
143 jgs 150 if (restartFlag) {
144 jgs 154 BREAKF[0]=ONE;
145 gross 1556 #pragma omp parallel for private(z) schedule(static)
146 jgs 154 for (z=0; z < n; ++z) {
147     R_PRES[0][z]=r[z];
148     X_PRES[0][z]=x[z];
149     }
150 jgs 150 num_iter_restart=0;
151     restartFlag=FALSE;
152     }
153     ++num_iter;
154     ++num_iter_restart;
155 jgs 154 /* order is the dimension of the space on which the residual is minimized: */
156     order=MIN(num_iter_restart,Length_of_recursion);
157 jgs 150 /***
158 jgs 154 *** calculate new search direction P from R_PRES
159 jgs 150 ***/
160 jgs 154 Paso_Solver_solvePreconditioner(A,&P_PRES[0][0], &R_PRES[0][0]);
161 jgs 150 /***
162     *** apply A to P to get AP
163     ***/
164 gross 415 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &P_PRES[0][0],ZERO, &AP[0]);
165 jgs 150 /***
166     ***** calculation of the norm of R and the scalar products of
167     *** the residuals and A*P:
168     ***/
169 jgs 154 if (order==0) {
170     R_PRES_dot_P=ZERO;
171 gross 1556 #pragma omp parallel for private(z) reduction(+:R_PRES_dot_P) schedule(static)
172     for (z=0;z<n;++z) R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
173 jgs 154 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
174     } else if (order==1) {
175 gross 1556 R_PRES_dot_P=ZERO;
176     P_PRES_dot_AP0=ZERO;
177     #pragma omp parallel for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0) schedule(static)
178 jgs 154 for (z=0;z<n;++z) {
179     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
180     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
181 jgs 150 }
182 gross 1556 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
183     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
184 jgs 154 } else if (order==2) {
185 gross 1556 R_PRES_dot_P=ZERO;
186     P_PRES_dot_AP0=ZERO;
187     P_PRES_dot_AP1=ZERO;
188     #pragma omp parallel for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1) schedule(static)
189 jgs 154 for (z=0;z<n;++z) {
190     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
191     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
192     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
193 jgs 150 }
194 gross 1556 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
195     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
196     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
197 jgs 154 } else if (order==3) {
198 gross 1556 R_PRES_dot_P=ZERO;
199     P_PRES_dot_AP0=ZERO;
200     P_PRES_dot_AP1=ZERO;
201     P_PRES_dot_AP2=ZERO;
202     #pragma omp parallel for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2) schedule(static)
203 jgs 154 for (z=0;z<n;++z) {
204     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
205     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
206     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
207     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
208 jgs 150 }
209 gross 1556 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
210     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
211     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
212     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
213 jgs 154 } else if (order==4) {
214 gross 1556 R_PRES_dot_P=ZERO;
215     P_PRES_dot_AP0=ZERO;
216     P_PRES_dot_AP1=ZERO;
217     P_PRES_dot_AP2=ZERO;
218     P_PRES_dot_AP3=ZERO;
219     #pragma omp parallel for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3) schedule(static)
220 jgs 154 for (z=0;z<n;++z) {
221     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
222     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
223     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
224     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
225     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
226 jgs 150 }
227 gross 1556 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
228     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
229     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
230     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
231     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
232 jgs 154 } else if (order==5) {
233 gross 1556 R_PRES_dot_P=ZERO;
234     P_PRES_dot_AP0=ZERO;
235     P_PRES_dot_AP1=ZERO;
236     P_PRES_dot_AP2=ZERO;
237     P_PRES_dot_AP3=ZERO;
238     P_PRES_dot_AP4=ZERO;
239     #pragma omp parallel for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4) schedule(static)
240 jgs 154 for (z=0;z<n;++z) {
241     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
242     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
243     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
244     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
245     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
246     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
247     }
248 gross 1556 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
249     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
250     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
251     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
252     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
253     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
254 jgs 154 } else if (order==6) {
255 gross 1556 R_PRES_dot_P=ZERO;
256     P_PRES_dot_AP0=ZERO;
257     P_PRES_dot_AP1=ZERO;
258     P_PRES_dot_AP2=ZERO;
259     P_PRES_dot_AP3=ZERO;
260     P_PRES_dot_AP4=ZERO;
261     P_PRES_dot_AP5=ZERO;
262     #pragma omp parallel for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5) schedule(static)
263 jgs 154 for (z=0;z<n;++z) {
264     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
265     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
266     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
267     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
268     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
269     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
270     P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
271 jgs 150 }
272 gross 1556 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
273     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
274     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
275     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
276     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
277     P_PRES_dot_AP[5]=P_PRES_dot_AP5;
278     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
279 jgs 154 } else if (order>6) {
280 gross 1556 R_PRES_dot_P=ZERO;
281     P_PRES_dot_AP0=ZERO;
282     P_PRES_dot_AP1=ZERO;
283     P_PRES_dot_AP2=ZERO;
284     P_PRES_dot_AP3=ZERO;
285     P_PRES_dot_AP4=ZERO;
286     P_PRES_dot_AP5=ZERO;
287     P_PRES_dot_AP6=ZERO;
288     #pragma omp parallel for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6) schedule(static)
289 jgs 154 for (z=0;z<n;++z) {
290     R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
291     P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
292     P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
293     P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
294     P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
295     P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
296     P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
297     P_PRES_dot_AP6+=P_PRES[6][z]*AP[z];
298 gross 1556 }
299     P_PRES_dot_AP[0]=P_PRES_dot_AP0;
300     P_PRES_dot_AP[1]=P_PRES_dot_AP1;
301     P_PRES_dot_AP[2]=P_PRES_dot_AP2;
302     P_PRES_dot_AP[3]=P_PRES_dot_AP3;
303     P_PRES_dot_AP[4]=P_PRES_dot_AP4;
304     P_PRES_dot_AP[5]=P_PRES_dot_AP5;
305     P_PRES_dot_AP[6]=P_PRES_dot_AP6;
306     R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
307 jgs 154
308 gross 1556 P_PRES_dot_AP0=ZERO;
309 jgs 154 for (i=7;i<order;++i) {
310 gross 1556 #pragma omp parallel for private(z) reduction(+:P_PRES_dot_AP0) schedule(static)
311 jgs 154 for (z=0;z<n;++z) P_PRES_dot_AP0+=P_PRES[i][z]*AP[z];
312 gross 1556 P_PRES_dot_AP[i]=P_PRES_dot_AP0;
313     P_PRES_dot_AP0=ZERO;
314 jgs 150 }
315     }
316 jgs 154 /* this fixes a problem with the intel compiler */
317     P_PRES_dot_AP0=R_PRES_dot_P_PRES[0];
318     /*** if sum_BREAKF is equal to zero a breakdown occurs.
319     *** iteration procedure can be continued but R_PRES is not the
320     *** residual of X_PRES approximation.
321     ***/
322     sum_BREAKF=0.;
323     for (i=0;i<order;++i) sum_BREAKF +=BREAKF[i];
324     breakFlag=!((ABS(P_PRES_dot_AP0) > ZERO) && (sum_BREAKF >ZERO));
325     if (!breakFlag) {
326     breakFlag=FALSE;
327     /***
328     ***** X_PRES and R_PRES are moved to memory:
329     ***/
330 gross 1556 Factor=0.;
331     for (i=0;i<order;++i) {
332 jgs 154 ALPHA[i]=-P_PRES_dot_AP[i]/R_PRES_dot_P_PRES[i];
333     Factor+=BREAKF[i]*ALPHA[i];
334 gross 1556 }
335 jgs 154
336 gross 1556 save_R_PRES_dot_P_PRES=R_PRES_dot_P_PRES[Length_of_mem-1];
337     save_R_PRES=R_PRES[Length_of_mem-1];
338     save_XPRES=X_PRES[Length_of_mem-1];
339     save_P_PRES=P_PRES[Length_of_mem-1];
340     for (i=Length_of_mem-1;i>0;--i) {
341 jgs 154 BREAKF[i]=BREAKF[i-1];
342     R_PRES_dot_P_PRES[i]=R_PRES_dot_P_PRES[i-1];
343     R_PRES[i]=R_PRES[i-1];
344     X_PRES[i]=X_PRES[i-1];
345     P_PRES[i]=P_PRES[i-1];
346 gross 1556 }
347     R_PRES_dot_P_PRES[0]=save_R_PRES_dot_P_PRES;
348     R_PRES[0]=save_R_PRES;
349     X_PRES[0]=save_XPRES;
350     P_PRES[0]=save_P_PRES;
351 jgs 154
352 gross 1556 if (ABS(Factor)<=ZERO) {
353 jgs 154 Factor=1.;
354     BREAKF[0]=ZERO;
355 gross 1556 } else {
356 jgs 154 Factor=1./Factor;
357     BREAKF[0]=ONE;
358 jgs 150 }
359 gross 1556 for (i=0;i<order;++i) ALPHA[i]*=Factor;
360 jgs 150 /*
361 jgs 154 ***** update of solution X_PRES and its residual R_PRES:
362 jgs 150 ***
363     *** P is used to accumulate X and AP to accumulate R. X and R
364     *** are still needed until they are put into the X and R memory
365 jgs 154 *** R_PRES and X_PRES
366 jgs 150 ***
367     **/
368 jgs 154 breakf0=BREAKF[0];
369     if (order==0) {
370 gross 1556 #pragma omp parallel for private(z) schedule(static)
371 jgs 154 for (z=0;z<n;++z) {
372     R_PRES[0][z]= Factor* AP[z];
373     X_PRES[0][z]=-Factor*P_PRES[1][z];
374     }
375     } else if (order==1) {
376 gross 1556 #pragma omp parallel for private(z) schedule(static)
377 jgs 154 for (z=0;z<n;++z) {
378     R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z];
379     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z];
380     }
381     } else if (order==2) {
382 gross 1556 #pragma omp parallel for private(z) schedule(static)
383 jgs 154 for (z=0;z<n;++z) {
384     R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z]
385     +ALPHA[1]*R_PRES[2][z];
386     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
387     +ALPHA[1]*X_PRES[2][z];
388     }
389     } else if (order==3) {
390 gross 1556 #pragma omp parallel for private(z) schedule(static)
391 jgs 154 for (z=0;z<n;++z) {
392     R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
393     +ALPHA[1]*R_PRES[2][z]
394     +ALPHA[2]*R_PRES[3][z];
395     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
396     +ALPHA[1]*X_PRES[2][z]
397     +ALPHA[2]*X_PRES[3][z];
398     }
399     } else if (order==4) {
400 gross 1556 #pragma omp parallel for private(z) schedule(static)
401 jgs 154 for (z=0;z<n;++z) {
402     R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
403     +ALPHA[1]*R_PRES[2][z]
404     +ALPHA[2]*R_PRES[3][z]
405     +ALPHA[3]*R_PRES[4][z];
406     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
407     +ALPHA[1]*X_PRES[2][z]
408     +ALPHA[2]*X_PRES[3][z]
409     +ALPHA[3]*X_PRES[4][z];
410     }
411     } else if (order==5) {
412 gross 1556 #pragma omp parallel for private(z) schedule(static)
413 jgs 154 for (z=0;z<n;++z) {
414     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
415     +ALPHA[1]*R_PRES[2][z]
416     +ALPHA[2]*R_PRES[3][z]
417     +ALPHA[3]*R_PRES[4][z]
418     +ALPHA[4]*R_PRES[5][z];
419     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
420     +ALPHA[1]*X_PRES[2][z]
421     +ALPHA[2]*X_PRES[3][z]
422     +ALPHA[3]*X_PRES[4][z]
423     +ALPHA[4]*X_PRES[5][z];
424     }
425     } else if (order==6) {
426 gross 1556 #pragma omp parallel for private(z) schedule(static)
427 jgs 154 for (z=0;z<n;++z) {
428     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
429     +ALPHA[1]*R_PRES[2][z]
430     +ALPHA[2]*R_PRES[3][z]
431     +ALPHA[3]*R_PRES[4][z]
432     +ALPHA[4]*R_PRES[5][z]
433     +ALPHA[5]*R_PRES[6][z];
434     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
435     +ALPHA[1]*X_PRES[2][z]
436     +ALPHA[2]*X_PRES[3][z]
437     +ALPHA[3]*X_PRES[4][z]
438     +ALPHA[4]*X_PRES[5][z]
439     +ALPHA[5]*X_PRES[6][z];
440     }
441     } else if (order>6) {
442 gross 1556 #pragma omp parallel for private(z) schedule(static)
443 jgs 154 for (z=0;z<n;++z) {
444     R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
445     +ALPHA[1]*R_PRES[2][z]
446     +ALPHA[2]*R_PRES[3][z]
447     +ALPHA[3]*R_PRES[4][z]
448     +ALPHA[4]*R_PRES[5][z]
449     +ALPHA[5]*R_PRES[6][z]
450     +ALPHA[6]*R_PRES[7][z];
451     X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
452     +ALPHA[1]*X_PRES[2][z]
453     +ALPHA[2]*X_PRES[3][z]
454     +ALPHA[3]*X_PRES[4][z]
455     +ALPHA[4]*X_PRES[5][z]
456     +ALPHA[5]*X_PRES[6][z]
457     +ALPHA[6]*X_PRES[7][z];
458     }
459     for (i=7;i<order;++i) {
460 gross 1556 #pragma omp parallel for private(z) schedule(static)
461 jgs 154 for (z=0;z<n;++z) {
462     R_PRES[0][z]+=ALPHA[i]*R_PRES[i+1][z];
463     X_PRES[0][z]+=ALPHA[i]*X_PRES[i+1][z];
464     }
465 jgs 150 }
466     }
467 jgs 154 if (breakf0>0.) {
468 jgs 150 /***
469 jgs 154 ***** calculate gamma from min_(gamma){|R+gamma*(R_PRES-R)|_2}:
470 jgs 150 ***/
471 gross 1556 SC1=ZERO;
472     SC2=ZERO;
473     L2_R=ZERO;
474     #pragma omp parallel for private(z,diff) reduction(+:SC1,SC2) schedule(static)
475 jgs 154 for (z=0;z<n;++z) {
476     diff=R_PRES[0][z]-r[z];
477 jgs 150 SC1+=diff*diff;
478     SC2+=diff*r[z];
479     }
480 jgs 154 gamma=(SC1<=ZERO) ? ZERO : -SC2/SC1;
481 gross 1556 #pragma omp parallel for private(z) reduction(+:L2_R) schedule(static)
482 jgs 154 for (z=0;z<n;++z) {
483     x[z]+=gamma*(X_PRES[0][z]-x[z]);
484     r[z]+=gamma*(R_PRES[0][z]-r[z]);
485 jgs 150 L2_R+=r[z]*r[z];
486     }
487     norm_of_residual=sqrt(L2_R);
488     convergeFlag = (norm_of_residual <= tol);
489 jgs 154 if (restart>0) restartFlag=(num_iter_restart >= restart);
490     } else {
491     convergeFlag=FALSE;
492     restartFlag=FALSE;
493 jgs 150 }
494 jgs 154 maxIterFlag = (num_iter >= maxit);
495     }
496     }
497     /* end of iteration */
498 gross 1556 Norm_of_residual_global=norm_of_residual;
499     Num_iter_global=num_iter;
500     if (maxIterFlag) {
501 jgs 154 Status = SOLVER_MAXITER_REACHED;
502 jgs 150 } else if (breakFlag) {
503 jgs 154 Status = SOLVER_BREAKDOWN;
504 gross 1556 }
505     }
506 jgs 154 for (i=0;i<Length_of_recursion;i++) {
507     TMPMEMFREE(X_PRES[i]);
508     TMPMEMFREE(R_PRES[i]);
509     TMPMEMFREE(P_PRES[i]);
510 jgs 150 }
511 gross 1028 TMPMEMFREE(AP);
512     TMPMEMFREE(X_PRES);
513     TMPMEMFREE(R_PRES);
514     TMPMEMFREE(P_PRES);
515     TMPMEMFREE(P_PRES_dot_AP);
516     TMPMEMFREE(R_PRES_dot_P_PRES);
517     TMPMEMFREE(BREAKF);
518     TMPMEMFREE(ALPHA);
519 jgs 154 *iter=Num_iter_global;
520     *tolerance=Norm_of_residual_global;
521     return Status;
522 jgs 150 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26