/[escript]/trunk/paso/src/GMRES.c
ViewVC logotype

Annotation of /trunk/paso/src/GMRES.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 150 - (hide annotations)
Thu Sep 15 03:44:45 2005 UTC (14 years, 5 months ago) by jgs
Original Path: trunk/esys2/paso/src/Solvers/GMRES.c
File MIME type: text/plain
File size: 24069 byte(s)
Merge of development branch dev-02 back to main trunk on 2005-09-15

1 jgs 150 /* $Id$ */
2    
3     /*
4     * Purpose
5     * =======
6     *
7     * GMRES solves the linear system A*x=b using the
8     * truncated and restered GMRES method with preconditioning.
9     *
10     * Convergence test: norm( b - A*x )< TOL.
11     *
12     *
13     *
14     * Arguments
15     * =========
16     *
17     * r (input/output) double array, dimension n_row.
18     * On entry, residual of inital guess X
19     *
20     * x (input/output) double array, dimension n_col.
21     * On input, the initial guess.
22     *
23     * iter (input/output) int
24     * On input, the maximum num_iterations to be performed.
25     * On output, actual number of num_iterations performed.
26     *
27     * tolerance (input/output) DOUBLE PRECISION
28     * On input, the allowable convergence measure for
29     * norm( b - A*x )
30     * On output, the final value of this measure.
31     *
32     * length_of_recursion (input) gives the number of residual to be kept in orthogonalization process
33     *
34     * restart (input) If restart>0, iteration is resterted a after restart steps.
35     *
36     * INFO (output) int
37     *
38     * =SOLVER_NO_ERROR: Successful exit. num_iterated approximate solution returned.
39     * =SOLVER_MAXNUM_ITEr_REACHED
40     * =SOLVER_INPUT_ERROR Illegal parameter:
41     * =SOLVER_BREAKDOWN: If parameters RHO or OMEGA become smaller
42     * =SOLVER_MEMORY_ERROR : If parameters RHO or OMEGA become smaller
43     *
44     * ==============================================================
45     */
46    
47     #include "Common.h"
48     #include "SystemMatrix.h"
49     #include "Solver.h"
50     #ifdef _OPENMP
51     #include <omp.h>
52     #endif
53    
54     err_t Paso_Solver_GMRES(
55     Paso_SystemMatrix * A,
56     double * r,
57     double * x,
58     dim_t *iter,
59     double * tolerance,dim_t length_of_recursion,dim_t restart) {
60    
61     /* Local variables */
62    
63     double *x_PRES,*r_PRES,*P,*AP,*x_PRES_MEM[MAX(length_of_recursion,0)],*r_PRES_MEM[MAX(length_of_recursion,0)];
64     double r_PRES_MEMdotAP[MAX(length_of_recursion,0)],L2_r_PRES_MEM[MAX(length_of_recursion,0)],BREAKF_MEM[MAX(length_of_recursion,0)];
65     double r_PRESdotAP,r_PRES_MEMdotAP0,r_PRES_MEMdotAP1,r_PRES_MEMdotAP2,r_PRES_MEMdotAP3,r_PRES_MEMdotAP4,L2_r_PRES,r_PRES_MEMdotAP5;
66     double *tmp,tol,BREAKF,factor,GAMMA,SC1,SC2,norm_of_residual,diff,L2_R,norm_of_residual_global;
67     dim_t maxit,num_iter_global,num_iter_restart,num_iter;
68     dim_t i,z,OLDEST,ORDER;
69     bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE,restartFlag=FALSE;
70     err_t status=SOLVER_NO_ERROR;
71    
72     /* adapt original routine parameters */
73    
74     dim_t n_col=A->num_cols * A-> col_block_size;
75     dim_t n_row=A->num_rows * A-> row_block_size;
76    
77     /* Test the input parameters. */
78     if (restart>0) restart=MAX(length_of_recursion,restart);
79     if (n_col < 0 || n_row<0 || length_of_recursion<=0) {
80     return SOLVER_INPUT_ERROR;
81     }
82    
83     /* allocate memory: */
84    
85     x_PRES=TMPMEMALLOC(n_col,double);
86     r_PRES=TMPMEMALLOC(n_row,double);
87     P=TMPMEMALLOC(n_col,double);
88     AP=TMPMEMALLOC(n_row,double);
89     if (x_PRES==NULL || r_PRES==NULL || P==NULL || AP==NULL) status=SOLVER_MEMORY_ERROR;
90     for (i=0;i<length_of_recursion;i++) {
91     x_PRES_MEM[i]=TMPMEMALLOC(n_col,double);
92     r_PRES_MEM[i]=TMPMEMALLOC(n_row,double);
93     if (x_PRES_MEM[i]==NULL || r_PRES_MEM[i]==NULL) status=SOLVER_MEMORY_ERROR;
94     }
95     if ( status ==SOLVER_NO_ERROR ) {
96    
97     /* now PRES starts : */
98     maxit=*iter;
99     tol=*tolerance;
100     norm_of_residual=tol;
101    
102     #pragma omp parallel firstprivate(maxit,tol) \
103     private(num_iter,i,num_iter_restart,ORDER,OLDEST,BREAKF,factor,GAMMA,restartFlag,convergeFlag,maxIterFlag,breakFlag)
104     {
105     /* initialization */
106    
107     restartFlag=TRUE;
108     num_iter=0;
109     #pragma omp for private(z) schedule(static) nowait
110     for (z=0; z < n_row; ++z) AP[z]=0;
111     #pragma omp for private(z) schedule(static) nowait
112     for (z=0; z < n_col; ++z) P[z]=0;
113     for(i=0;i<length_of_recursion;++i) {
114     #pragma omp for private(z) schedule(static) nowait
115     for (z=0; z < n_row; ++z) r_PRES_MEM[i][z]=0;
116     #pragma omp for private(z) schedule(static) nowait
117     for (z=0; z < n_col; ++z) x_PRES_MEM[i][z]=0;
118     }
119    
120     next:
121     #pragma omp barrier
122     if (restartFlag) {
123     #pragma omp for private(z) schedule(static) nowait
124     for (z=0; z < n_row; ++z) r_PRES[z]=r[z];
125     #pragma omp for private(z) schedule(static) nowait
126     for (z=0; z < n_col; ++z) x_PRES[z]=x[z];
127     num_iter_restart=0;
128     OLDEST=length_of_recursion-1;
129     BREAKF=ONE;
130     restartFlag=FALSE;
131     }
132     ++num_iter;
133     ++num_iter_restart;
134     /* ORDER is the dimension of the space on which the residual is minimized: */
135     ORDER=MIN(num_iter_restart-1,length_of_recursion);
136     /* OLDEST points to the oldest r_PRES and x_PRES in memory :*/
137     OLDEST=(OLDEST==length_of_recursion-1) ? 0 : OLDEST+1;
138    
139     /***
140     *** calculate new search direction P from r_PRES
141     ***/
142     Paso_Solver_solvePreconditioner(A,&P[0], &r_PRES[0]);
143     /***
144     *** apply A to P to get AP
145     ***/
146     Paso_SystemMatrix_MatrixVector(ONE, A, &P[0],ZERO, &AP[0]);
147     /***
148     ***** calculation of the norm of R and the scalar products of
149     *** the residuals and A*P:
150     ***/
151     if (ORDER==0) {
152     #pragma omp master
153     {
154     L2_r_PRES=ZERO;
155     r_PRESdotAP=ZERO;
156     }
157     #pragma omp barrier
158     #pragma omp for private(z) reduction(+:L2_r_PRES,r_PRESdotAP) schedule(static)
159     for (z=0;z<n_row;++z) {
160     L2_r_PRES+=r_PRES[z]*r_PRES[z];
161     r_PRESdotAP+=r_PRES[z]*AP[z];
162     }
163     } else if (ORDER==1) {
164     #pragma omp master
165     {
166     L2_r_PRES=ZERO;
167     r_PRESdotAP=ZERO;
168     r_PRES_MEMdotAP0=ZERO;
169     }
170     #pragma omp barrier
171     #pragma omp for private(z) reduction(+:L2_r_PRES,r_PRESdotAP,r_PRES_MEMdotAP0) schedule(static)
172     for (z=0;z<n_row;++z) {
173     L2_r_PRES+=r_PRES[z]*r_PRES[z];
174     r_PRESdotAP+=r_PRES[z]*AP[z];
175     r_PRES_MEMdotAP0+=r_PRES_MEM[0][z]*AP[z];
176     }
177     #pragma omp master
178     {
179     r_PRES_MEMdotAP[0]=r_PRES_MEMdotAP0;
180     }
181    
182     } else if (ORDER==2) {
183     #pragma omp master
184     {
185     L2_r_PRES=ZERO;
186     r_PRESdotAP=ZERO;
187     r_PRES_MEMdotAP0=ZERO;
188     r_PRES_MEMdotAP1=ZERO;
189     }
190     #pragma omp barrier
191     #pragma omp for private(z) reduction(+:L2_r_PRES,r_PRESdotAP,r_PRES_MEMdotAP0,r_PRES_MEMdotAP1) schedule(static)
192     for (z=0;z<n_row;++z) {
193     L2_r_PRES+=r_PRES[z]*r_PRES[z];
194     r_PRESdotAP+=r_PRES[z]*AP[z];
195     r_PRES_MEMdotAP0+=r_PRES_MEM[0][z]*AP[z];
196     r_PRES_MEMdotAP1+=r_PRES_MEM[1][z]*AP[z];
197     }
198     #pragma omp master
199     {
200     r_PRES_MEMdotAP[0]=r_PRES_MEMdotAP0;
201     r_PRES_MEMdotAP[1]=r_PRES_MEMdotAP1;
202     }
203    
204     } else if (ORDER==3) {
205     #pragma omp master
206     {
207     L2_r_PRES=ZERO;
208     r_PRESdotAP=ZERO;
209     r_PRES_MEMdotAP0=ZERO;
210     r_PRES_MEMdotAP1=ZERO;
211     r_PRES_MEMdotAP2=ZERO;
212     }
213     #pragma omp barrier
214     #pragma omp for private(z) reduction(+:L2_r_PRES,r_PRESdotAP,r_PRES_MEMdotAP0,r_PRES_MEMdotAP1,r_PRES_MEMdotAP2) schedule(static)
215     for (z=0;z<n_row;++z) {
216     L2_r_PRES+=r_PRES[z]*r_PRES[z];
217     r_PRESdotAP+=r_PRES[z]*AP[z];
218     r_PRES_MEMdotAP0+=r_PRES_MEM[0][z]*AP[z];
219     r_PRES_MEMdotAP1+=r_PRES_MEM[1][z]*AP[z];
220     r_PRES_MEMdotAP2+=r_PRES_MEM[2][z]*AP[z];
221     }
222     #pragma omp master
223     {
224     r_PRES_MEMdotAP[0]=r_PRES_MEMdotAP0;
225     r_PRES_MEMdotAP[1]=r_PRES_MEMdotAP1;
226     r_PRES_MEMdotAP[2]=r_PRES_MEMdotAP2;
227     }
228     } else if (ORDER==4) {
229     #pragma omp master
230     {
231     L2_r_PRES=ZERO;
232     r_PRESdotAP=ZERO;
233     r_PRES_MEMdotAP0=ZERO;
234     r_PRES_MEMdotAP1=ZERO;
235     r_PRES_MEMdotAP2=ZERO;
236     r_PRES_MEMdotAP3=ZERO;
237     }
238     #pragma omp barrier
239     #pragma omp for private(z) reduction(+:L2_r_PRES,r_PRESdotAP,r_PRES_MEMdotAP0,r_PRES_MEMdotAP1,r_PRES_MEMdotAP2,r_PRES_MEMdotAP3) schedule(static)
240     for (z=0;z<n_row;++z) {
241     L2_r_PRES+=r_PRES[z]*r_PRES[z];
242     r_PRESdotAP+=r_PRES[z]*AP[z];
243     r_PRES_MEMdotAP0+=r_PRES_MEM[0][z]*AP[z];
244     r_PRES_MEMdotAP1+=r_PRES_MEM[1][z]*AP[z];
245     r_PRES_MEMdotAP2+=r_PRES_MEM[2][z]*AP[z];
246     r_PRES_MEMdotAP3+=r_PRES_MEM[3][z]*AP[z];
247     }
248     #pragma omp master
249     {
250     r_PRES_MEMdotAP[0]=r_PRES_MEMdotAP0;
251     r_PRES_MEMdotAP[1]=r_PRES_MEMdotAP1;
252     r_PRES_MEMdotAP[2]=r_PRES_MEMdotAP2;
253     r_PRES_MEMdotAP[3]=r_PRES_MEMdotAP3;
254     }
255     } else if (ORDER==5) {
256     #pragma omp master
257     {
258     L2_r_PRES=ZERO;
259     r_PRESdotAP=ZERO;
260     r_PRES_MEMdotAP0=ZERO;
261     r_PRES_MEMdotAP1=ZERO;
262     r_PRES_MEMdotAP2=ZERO;
263     r_PRES_MEMdotAP3=ZERO;
264     r_PRES_MEMdotAP4=ZERO;
265     }
266     #pragma omp barrier
267     #pragma omp for private(z) reduction(+:L2_r_PRES,r_PRESdotAP,r_PRES_MEMdotAP0,r_PRES_MEMdotAP1,r_PRES_MEMdotAP2,r_PRES_MEMdotAP3,r_PRES_MEMdotAP4) schedule(static)
268     for (z=0;z<n_row;++z) {
269     L2_r_PRES+=r_PRES[z]*r_PRES[z];
270     r_PRESdotAP+=r_PRES[z]*AP[z];
271     r_PRES_MEMdotAP0+=r_PRES_MEM[0][z]*AP[z];
272     r_PRES_MEMdotAP1+=r_PRES_MEM[1][z]*AP[z];
273     r_PRES_MEMdotAP2+=r_PRES_MEM[2][z]*AP[z];
274     r_PRES_MEMdotAP3+=r_PRES_MEM[3][z]*AP[z];
275     r_PRES_MEMdotAP4+=r_PRES_MEM[4][z]*AP[z];
276     }
277     #pragma omp master
278     {
279     r_PRES_MEMdotAP[0]=r_PRES_MEMdotAP0;
280     r_PRES_MEMdotAP[1]=r_PRES_MEMdotAP1;
281     r_PRES_MEMdotAP[2]=r_PRES_MEMdotAP2;
282     r_PRES_MEMdotAP[3]=r_PRES_MEMdotAP3;
283     r_PRES_MEMdotAP[4]=r_PRES_MEMdotAP4;
284     }
285     } else if (ORDER==6) {
286     #pragma omp master
287     {
288     L2_r_PRES=ZERO;
289     r_PRESdotAP=ZERO;
290     r_PRES_MEMdotAP0=ZERO;
291     r_PRES_MEMdotAP1=ZERO;
292     r_PRES_MEMdotAP2=ZERO;
293     r_PRES_MEMdotAP3=ZERO;
294     r_PRES_MEMdotAP4=ZERO;
295     r_PRES_MEMdotAP5=ZERO;
296     }
297     #pragma omp barrier
298     #pragma omp for private(z) reduction(+:L2_r_PRES,r_PRESdotAP,r_PRES_MEMdotAP0,r_PRES_MEMdotAP1,r_PRES_MEMdotAP2,r_PRES_MEMdotAP3,r_PRES_MEMdotAP4,r_PRES_MEMdotAP5) schedule(static)
299     for (z=0;z<n_row;++z) {
300     L2_r_PRES+=r_PRES[z]*r_PRES[z];
301     r_PRESdotAP+=r_PRES[z]*AP[z];
302     r_PRES_MEMdotAP0+=r_PRES_MEM[0][z]*AP[z];
303     r_PRES_MEMdotAP1+=r_PRES_MEM[1][z]*AP[z];
304     r_PRES_MEMdotAP2+=r_PRES_MEM[2][z]*AP[z];
305     r_PRES_MEMdotAP3+=r_PRES_MEM[3][z]*AP[z];
306     r_PRES_MEMdotAP4+=r_PRES_MEM[4][z]*AP[z];
307     r_PRES_MEMdotAP5+=r_PRES_MEM[5][z]*AP[z];
308     }
309     #pragma omp master
310     {
311     r_PRES_MEMdotAP[0]=r_PRES_MEMdotAP0;
312     r_PRES_MEMdotAP[1]=r_PRES_MEMdotAP1;
313     r_PRES_MEMdotAP[2]=r_PRES_MEMdotAP2;
314     r_PRES_MEMdotAP[3]=r_PRES_MEMdotAP3;
315     r_PRES_MEMdotAP[4]=r_PRES_MEMdotAP4;
316     r_PRES_MEMdotAP[5]=r_PRES_MEMdotAP5;
317     }
318     } else {
319     #pragma omp master
320     {
321     L2_r_PRES=ZERO;
322     r_PRESdotAP=ZERO;
323     r_PRES_MEMdotAP0=ZERO;
324     }
325     #pragma omp barrier
326     #pragma omp for private(z) reduction(+:L2_r_PRES,r_PRESdotAP) schedule(static)
327     for (z=0;z<n_row;++z) {
328     L2_r_PRES+=r_PRES[z]*r_PRES[z];
329     r_PRESdotAP+=r_PRES[z]*AP[z];
330     }
331     for (i=0;i<ORDER;++i) {
332     #pragma omp for private(z) reduction(+:r_PRES_MEMdotAP0) schedule(static)
333     for (z=0;z<n_row;++z) r_PRES_MEMdotAP0+=r_PRES_MEM[i][z]*AP[z];
334     #pragma omp master
335     {
336     r_PRES_MEMdotAP[i]=r_PRES_MEMdotAP0;
337     r_PRES_MEMdotAP0=ZERO;
338     }
339     #pragma omp barrier
340     }
341     }
342     /* if L2_r_PRES=0 there is a fatal breakdown */
343     if (L2_r_PRES<=ZERO) {
344     breakFlag=TRUE;
345     } else {
346     /***
347     ***** calculation of the weights for the update of X:
348     */
349     #pragma omp master
350     {
351     r_PRESdotAP*= -ONE/L2_r_PRES;
352     for (i=0;i<ORDER;++i) r_PRES_MEMdotAP[i]*=-ONE/L2_r_PRES_MEM[i];
353     }
354     /*
355     ***** update of solution x_PRES and its residual r_PRES:
356     ***
357     *** P is used to accumulate X and AP to accumulate R. X and R
358     *** are still needed until they are put into the X and R memory
359     *** r_PRES_MEM and x_PRES_MEM
360     ***
361     **/
362     #pragma omp barrier
363     if (ORDER==0) {
364     #pragma omp for private(z) schedule(static)
365     for (z=0;z<n_row;++z)
366     AP[z]+=r_PRESdotAP*r_PRES[z];
367     #pragma omp for private(z) schedule(static)
368     for (z=0;z<n_col;++z)
369     P[z]=-P[z]+r_PRESdotAP*x_PRES[z];
370     } else if (ORDER==1) {
371     #pragma omp for private(z) schedule(static)
372     for (z=0;z<n_row;++z)
373     AP[z]+=r_PRESdotAP*r_PRES[z]+r_PRES_MEMdotAP[0]*r_PRES_MEM[0][z];
374     #pragma omp for private(z) schedule(static)
375     for (z=0;z<n_col;++z)
376     P[z]=-P[z]+r_PRESdotAP*x_PRES[z]+r_PRES_MEMdotAP[0]*x_PRES_MEM[0][z];
377     } else if (ORDER==2) {
378     #pragma omp for private(z) schedule(static)
379     for (z=0;z<n_row;++z)
380     AP[z]+=r_PRESdotAP*r_PRES[z]+r_PRES_MEMdotAP[0]*r_PRES_MEM[0][z]
381     +r_PRES_MEMdotAP[1]*r_PRES_MEM[1][z];
382     #pragma omp for private(z) schedule(static)
383     for (z=0;z<n_col;++z)
384     P[z]=-P[z]+r_PRESdotAP*x_PRES[z]+r_PRES_MEMdotAP[0]*x_PRES_MEM[0][z]
385     +r_PRES_MEMdotAP[1]*x_PRES_MEM[1][z];
386     } else if (ORDER==3) {
387     #pragma omp for private(z) schedule(static)
388     for (z=0;z<n_row;++z)
389     AP[z]+=r_PRESdotAP*r_PRES[z]+r_PRES_MEMdotAP[0]*r_PRES_MEM[0][z]
390     +r_PRES_MEMdotAP[1]*r_PRES_MEM[1][z]
391     +r_PRES_MEMdotAP[2]*r_PRES_MEM[2][z];
392     #pragma omp for private(z) schedule(static)
393     for (z=0;z<n_col;++z)
394     P[z]=-P[z]+r_PRESdotAP*x_PRES[z]+r_PRES_MEMdotAP[0]*x_PRES_MEM[0][z]
395     +r_PRES_MEMdotAP[1]*x_PRES_MEM[1][z]
396     +r_PRES_MEMdotAP[2]*x_PRES_MEM[2][z];
397     } else if (ORDER==4) {
398     #pragma omp for private(z) schedule(static)
399     for (z=0;z<n_row;++z)
400     AP[z]+=r_PRESdotAP*r_PRES[z]+r_PRES_MEMdotAP[0]*r_PRES_MEM[0][z]
401     +r_PRES_MEMdotAP[1]*r_PRES_MEM[1][z]
402     +r_PRES_MEMdotAP[2]*r_PRES_MEM[2][z]
403     +r_PRES_MEMdotAP[3]*r_PRES_MEM[3][z];
404     #pragma omp for private(z) schedule(static)
405     for (z=0;z<n_col;++z)
406     P[z]=-P[z]+r_PRESdotAP*x_PRES[z]+r_PRES_MEMdotAP[0]*x_PRES_MEM[0][z]
407     +r_PRES_MEMdotAP[1]*x_PRES_MEM[1][z]
408     +r_PRES_MEMdotAP[2]*x_PRES_MEM[2][z]
409     +r_PRES_MEMdotAP[3]*x_PRES_MEM[3][z];
410     } else if (ORDER==5) {
411     #pragma omp for private(z) schedule(static)
412     for (z=0;z<n_row;++z)
413     AP[z]+=r_PRESdotAP*r_PRES[z]+r_PRES_MEMdotAP[0]*r_PRES_MEM[0][z]
414     +r_PRES_MEMdotAP[1]*r_PRES_MEM[1][z]
415     +r_PRES_MEMdotAP[2]*r_PRES_MEM[2][z]
416     +r_PRES_MEMdotAP[3]*r_PRES_MEM[3][z]
417     +r_PRES_MEMdotAP[4]*r_PRES_MEM[4][z];
418     #pragma omp for private(z) schedule(static)
419     for (z=0;z<n_col;++z)
420     P[z]=-P[z]+r_PRESdotAP*x_PRES[z]+r_PRES_MEMdotAP[0]*x_PRES_MEM[0][z]
421     +r_PRES_MEMdotAP[1]*x_PRES_MEM[1][z]
422     +r_PRES_MEMdotAP[2]*x_PRES_MEM[2][z]
423     +r_PRES_MEMdotAP[3]*x_PRES_MEM[3][z]
424     +r_PRES_MEMdotAP[4]*x_PRES_MEM[4][z];
425     } else if (ORDER==6) {
426     #pragma omp for private(z) schedule(static)
427     for (z=0;z<n_row;++z)
428     AP[z]+=r_PRESdotAP*r_PRES[z]+r_PRES_MEMdotAP[0]*r_PRES_MEM[0][z]
429     +r_PRES_MEMdotAP[1]*r_PRES_MEM[1][z]
430     +r_PRES_MEMdotAP[2]*r_PRES_MEM[2][z]
431     +r_PRES_MEMdotAP[3]*r_PRES_MEM[3][z]
432     +r_PRES_MEMdotAP[4]*r_PRES_MEM[4][z]
433     +r_PRES_MEMdotAP[5]*r_PRES_MEM[5][z];
434     #pragma omp for private(z) schedule(static)
435     for (z=0;z<n_col;++z)
436     P[z]=-P[z]+r_PRESdotAP*x_PRES[z]+r_PRES_MEMdotAP[0]*x_PRES_MEM[0][z]
437     +r_PRES_MEMdotAP[1]*x_PRES_MEM[1][z]
438     +r_PRES_MEMdotAP[2]*x_PRES_MEM[2][z]
439     +r_PRES_MEMdotAP[3]*x_PRES_MEM[3][z]
440     +r_PRES_MEMdotAP[4]*x_PRES_MEM[4][z]
441     +r_PRES_MEMdotAP[5]*x_PRES_MEM[5][z];
442     } else {
443     #pragma omp for private(z) schedule(static)
444     for (z=0;z<n_row;++z)
445     AP[z]+=r_PRESdotAP*r_PRES[z];
446     #pragma omp for private(z) schedule(static)
447     for (z=0;z<n_col;++z)
448     P[z]=-P[z]+r_PRESdotAP*x_PRES[z];
449    
450     for (i=0;i<ORDER;++i) {
451     #pragma omp for private(z) schedule(static)
452     for (z=0;z<n_row;++z)
453     AP[z]+=r_PRES_MEMdotAP[i]*r_PRES_MEM[i][z];
454     #pragma omp for private(z) schedule(static)
455     for (z=0;z<n_col;++z)
456     P[z]+=r_PRES_MEMdotAP[i]*x_PRES_MEM[i][z];
457     }
458     }
459     /*** factor scales AP and P to make it a residual and
460     *** as solution approximation
461     ***
462     *** if factor is equal to zero a breakdown occurs. the
463     *** iteration procedure can be continued but r_PRES is not the
464     *** residual of x_PRES approximation.
465     ***/
466     factor=BREAKF*r_PRESdotAP;
467     for (i=0;i<ORDER;++i) factor +=BREAKF_MEM[i]*r_PRES_MEMdotAP[i];
468     /***
469     ***** x_PRES and r_PRES are moved to memory:
470     ***/
471     #pragma omp master
472     {
473     L2_r_PRES_MEM[OLDEST]=L2_r_PRES;
474     BREAKF_MEM[OLDEST]=BREAKF;
475     tmp=x_PRES; x_PRES=x_PRES_MEM[OLDEST];x_PRES_MEM[OLDEST]=tmp;
476     tmp=r_PRES; r_PRES=r_PRES_MEM[OLDEST];r_PRES_MEM[OLDEST]=tmp;
477     }
478     if (ABS(factor)<=ZERO) {
479     /* in case of a break down: */
480     BREAKF=ZERO;
481     #pragma omp for private(z) schedule(static)
482     for (z=0;z<n_row;++z) r_PRES[z]=AP[z];
483     #pragma omp for private(z) schedule(static)
484     for (z=0;z<n_col;++z) x_PRES[z]=P[z];
485     /* is there any progress */
486     breakFlag=TRUE;
487     #pragma omp barrier
488     for (i=0;i<ORDER;++i) if (BREAKF_MEM[i]>ZERO) breakFlag=FALSE;
489     convergeFlag = FALSE;
490     maxIterFlag = FALSE;
491     } else {
492     BREAKF=ONE;
493     breakFlag=FALSE;
494     /***
495     *** rescale P an AP and apply smooting:
496     ***
497     ***** calculate GAMMA from min_(GAMMA){|R+GAMMA*(r_PRES-R)|_2}:
498     ***/
499     #pragma omp master
500     {
501     SC1=ZERO;
502     SC2=ZERO;
503     L2_R=ZERO;
504     }
505     factor=ONE/factor;
506     #pragma omp barrier
507     #pragma omp for private(z,diff) reduction(+:SC1,SC2) schedule(static)
508     for (z=0;z<n_row;++z) {
509     r_PRES[z]=factor*AP[z];
510     diff=r_PRES[z]-r[z];
511     SC1+=diff*diff;
512     SC2+=diff*r[z];
513     }
514     GAMMA=(SC1<=ZERO) ? ZERO : -SC2/SC1;
515     #pragma omp for private(z) schedule(static)
516     for (z=0;z<n_col;++z) {
517     x_PRES[z]=factor* P[z];
518     x[z]+=GAMMA*(x_PRES[z]-x[z]);
519     }
520     #pragma omp for private(z) reduction(+:L2_R) schedule(static)
521     for (z=0;z<n_row;++z) {
522     r[z]+=GAMMA*(r_PRES[z]-r[z]);
523     L2_R+=r[z]*r[z];
524     }
525     norm_of_residual=sqrt(L2_R);
526     convergeFlag = (norm_of_residual <= tol);
527     maxIterFlag = (num_iter >= maxit);
528     }
529     }
530     if (restart>0) restartFlag=(num_iter_restart >= restart);
531     if (! (convergeFlag || maxIterFlag || breakFlag)) goto next;
532     /* end of iteration */
533     #pragma omp master
534     {
535     norm_of_residual_global=norm_of_residual;
536     num_iter_global=num_iter;
537     if (maxIterFlag) {
538     status = SOLVER_MAXITER_REACHED;
539     } else if (breakFlag) {
540     status = SOLVER_BREAKDOWN;
541     }
542     }
543     } /* end of parallel region */
544     TMPMEMFREE(x_PRES);
545     TMPMEMFREE(r_PRES);
546     TMPMEMFREE(P);
547     TMPMEMFREE(AP);
548     for (i=0;i<length_of_recursion;i++) {
549     TMPMEMFREE(x_PRES_MEM[i]);
550     TMPMEMFREE(r_PRES_MEM[i]);
551     }
552     *iter=num_iter_global;
553     *tolerance=norm_of_residual_global;
554     }
555     return status;
556     }
557    
558    
559     /*
560     * $Log$
561     * Revision 1.2 2005/09/15 03:44:40 jgs
562     * Merge of development branch dev-02 back to main trunk on 2005-09-15
563     *
564     * Revision 1.1.2.1 2005/09/05 06:29:49 gross
565     * These files have been extracted from finley to define a stand alone libray for iterative
566     * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
567     * has not been tested yet.
568     *
569     *
570     */
571    
572    

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26