/[escript]/trunk/paso/src/GMRES.c
ViewVC logotype

Contents of /trunk/paso/src/GMRES.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 154 - (show annotations)
Mon Nov 7 05:51:17 2005 UTC (14 years, 3 months ago) by jgs
Original Path: trunk/esys2/paso/src/Solvers/GMRES.c
File MIME type: text/plain
File size: 23121 byte(s)
Merge of development branch dev-02 back to main trunk on 2005-11-07

1 /* $Id$ */
2
3 /*
4 * Purpose
5 * =======
6 *
7 * GMRES solves the linear system A*x=b using the
8 * truncated and restered GMRES method with preconditioning.
9 *
10 * Convergence test: norm( b - A*x )< TOL.
11 *
12 *
13 *
14 * Arguments
15 * =========
16 *
17 * r (input/output) double array, dimension n.
18 * On entry, residual of inital guess X
19 *
20 * x (input/output) double array, dimension n.
21 * On input, the initial guess.
22 *
23 * iter (input/output) int
24 * On input, the maximum num_iterations to be performed.
25 * On output, actual number of num_iterations performed.
26 *
27 * tolerance (input/output) DOUBLE PRECISION
28 * On input, the allowable convergence measure for
29 * norm( b - A*x )
30 * On output, the final value of this measure.
31 *
32 * Length_of_recursion (input) gives the number of residual to be kept in orthogonalization process
33 *
34 * restart (input) If restart>0, iteration is resterted a after restart steps.
35 *
36 * INFO (output) int
37 *
38 * =SOLVER_NO_ERROR: Successful exit. num_iterated approximate solution returned.
39 * =SOLVER_MAXNUM_ITER_REACHED
40 * =SOLVER_INPUT_ERROR Illegal parameter:
41 * =SOLVER_BREAKDOWN: bad luck!
42 * =SOLVER_MEMORY_ERROR : no memory available
43 *
44 * ==============================================================
45 */
46
47 #include "Common.h"
48 #include "SystemMatrix.h"
49 #include "Solver.h"
50 #ifdef _OPENMP
51 #include <omp.h>
52 #endif
53
54 err_t Paso_Solver_GMRES(
55 Paso_SystemMatrix * A,
56 double * r,
57 double * x,
58 dim_t *iter,
59 double * tolerance,dim_t Length_of_recursion,dim_t restart) {
60
61 /* Local variables */
62
63 double *AP,*X_PRES[MAX(Length_of_recursion,0)+1],*R_PRES[MAX(Length_of_recursion,0)+1],*P_PRES[MAX(Length_of_recursion,0)+1];
64 double P_PRES_dot_AP[MAX(Length_of_recursion,0)],R_PRES_dot_P_PRES[MAX(Length_of_recursion,0)+1],BREAKF[MAX(Length_of_recursion,0)+1],ALPHA[MAX(Length_of_recursion,0)];
65 double P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6,R_PRES_dot_P,abs_RP,breakf0;
66 double tol,Factor,sum_BREAKF,gamma,SC1,SC2,norm_of_residual,diff,L2_R,Norm_of_residual_global;
67 double *save_XPRES, *save_P_PRES, *save_R_PRES,save_R_PRES_dot_P_PRES;
68 dim_t maxit,Num_iter_global,num_iter_restart,num_iter;
69 dim_t i,z,order;
70 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE,restartFlag=FALSE;
71 err_t Status=SOLVER_NO_ERROR;
72
73 /* adapt original routine parameters */
74
75 dim_t n=A->num_cols * A-> col_block_size;
76 dim_t Length_of_mem=MAX(Length_of_recursion,0)+1;
77
78 /* Test the input parameters. */
79 if (restart>0) restart=MAX(Length_of_recursion,restart);
80 if (n < 0 || Length_of_recursion<=0) {
81 return SOLVER_INPUT_ERROR;
82 }
83
84 /* allocate memory: */
85
86 AP=TMPMEMALLOC(n,double);
87 if (AP==NULL) Status=SOLVER_MEMORY_ERROR;
88 for (i=0;i<Length_of_mem;i++) {
89 X_PRES[i]=TMPMEMALLOC(n,double);
90 R_PRES[i]=TMPMEMALLOC(n,double);
91 P_PRES[i]=TMPMEMALLOC(n,double);
92 if (X_PRES[i]==NULL || R_PRES[i]==NULL || P_PRES[i]==NULL) Status=SOLVER_MEMORY_ERROR;
93 }
94 if ( Status ==SOLVER_NO_ERROR ) {
95
96 /* now PRES starts : */
97 maxit=*iter;
98 tol=*tolerance;
99
100 #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \
101 private(num_iter,i,num_iter_restart,order,sum_BREAKF,gamma,restartFlag,norm_of_residual,abs_RP,breakf0,\
102 save_XPRES,save_P_PRES,save_R_PRES,save_R_PRES_dot_P_PRES)
103 {
104 /* initialization */
105
106 restartFlag=TRUE;
107 num_iter=0;
108 #pragma omp for private(z) schedule(static) nowait
109 for (z=0; z < n; ++z) AP[z]=0;
110 for(i=0;i<Length_of_mem;++i) {
111 #pragma omp for private(z) schedule(static) nowait
112 for (z=0; z < n; ++z) {
113 P_PRES[i][z]=0;
114 R_PRES[i][z]=0;
115 X_PRES[i][z]=0;
116 }
117 }
118
119 while (! (convergeFlag || maxIterFlag || breakFlag)) {
120 #pragma omp barrier
121 if (restartFlag) {
122 #pragma omp master
123 BREAKF[0]=ONE;
124 #pragma omp for private(z) schedule(static) nowait
125 for (z=0; z < n; ++z) {
126 R_PRES[0][z]=r[z];
127 X_PRES[0][z]=x[z];
128 }
129 num_iter_restart=0;
130 restartFlag=FALSE;
131 }
132 ++num_iter;
133 ++num_iter_restart;
134 /* order is the dimension of the space on which the residual is minimized: */
135 order=MIN(num_iter_restart,Length_of_recursion);
136 /***
137 *** calculate new search direction P from R_PRES
138 ***/
139 Paso_Solver_solvePreconditioner(A,&P_PRES[0][0], &R_PRES[0][0]);
140 /***
141 *** apply A to P to get AP
142 ***/
143 Paso_SystemMatrix_MatrixVector(ONE, A, &P_PRES[0][0],ZERO, &AP[0]);
144 /***
145 ***** calculation of the norm of R and the scalar products of
146 *** the residuals and A*P:
147 ***/
148 if (order==0) {
149 #pragma omp master
150 R_PRES_dot_P=ZERO;
151 #pragma omp barrier
152 #pragma omp for private(z) reduction(+:R_PRES_dot_P) schedule(static)
153 for (z=0;z<n;++z)
154 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
155 #pragma omp master
156 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
157 } else if (order==1) {
158 #pragma omp master
159 {
160 R_PRES_dot_P=ZERO;
161 P_PRES_dot_AP0=ZERO;
162 }
163 #pragma omp barrier
164 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0) schedule(static)
165 for (z=0;z<n;++z) {
166 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
167 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
168 }
169 #pragma omp master
170 {
171 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
172 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
173 }
174 } else if (order==2) {
175 #pragma omp master
176 {
177 R_PRES_dot_P=ZERO;
178 P_PRES_dot_AP0=ZERO;
179 P_PRES_dot_AP1=ZERO;
180 }
181 #pragma omp barrier
182 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1) schedule(static)
183 for (z=0;z<n;++z) {
184 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
185 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
186 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
187 }
188 #pragma omp master
189 {
190 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
191 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
192 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
193 }
194 } else if (order==3) {
195 #pragma omp master
196 {
197 R_PRES_dot_P=ZERO;
198 P_PRES_dot_AP0=ZERO;
199 P_PRES_dot_AP1=ZERO;
200 P_PRES_dot_AP2=ZERO;
201 }
202 #pragma omp barrier
203 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2) schedule(static)
204 for (z=0;z<n;++z) {
205 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
206 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
207 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
208 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
209 }
210 #pragma omp master
211 {
212 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
213 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
214 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
215 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
216 }
217 } else if (order==4) {
218 #pragma omp master
219 {
220 R_PRES_dot_P=ZERO;
221 P_PRES_dot_AP0=ZERO;
222 P_PRES_dot_AP1=ZERO;
223 P_PRES_dot_AP2=ZERO;
224 P_PRES_dot_AP3=ZERO;
225 }
226 #pragma omp barrier
227 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3) schedule(static)
228 for (z=0;z<n;++z) {
229 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
230 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
231 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
232 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
233 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
234 }
235 #pragma omp master
236 {
237 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
238 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
239 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
240 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
241 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
242 }
243 } else if (order==5) {
244 #pragma omp master
245 {
246 R_PRES_dot_P=ZERO;
247 P_PRES_dot_AP0=ZERO;
248 P_PRES_dot_AP1=ZERO;
249 P_PRES_dot_AP2=ZERO;
250 P_PRES_dot_AP3=ZERO;
251 P_PRES_dot_AP4=ZERO;
252 }
253 #pragma omp barrier
254 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4) schedule(static)
255 for (z=0;z<n;++z) {
256 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
257 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
258 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
259 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
260 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
261 P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
262 }
263 #pragma omp master
264 {
265 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
266 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
267 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
268 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
269 P_PRES_dot_AP[4]=P_PRES_dot_AP4;
270 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
271 }
272 } else if (order==6) {
273 #pragma omp master
274 {
275 R_PRES_dot_P=ZERO;
276 P_PRES_dot_AP0=ZERO;
277 P_PRES_dot_AP1=ZERO;
278 P_PRES_dot_AP2=ZERO;
279 P_PRES_dot_AP3=ZERO;
280 P_PRES_dot_AP4=ZERO;
281 P_PRES_dot_AP5=ZERO;
282 }
283 #pragma omp barrier
284 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5) schedule(static)
285 for (z=0;z<n;++z) {
286 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
287 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
288 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
289 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
290 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
291 P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
292 P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
293 }
294 #pragma omp master
295 {
296 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
297 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
298 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
299 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
300 P_PRES_dot_AP[4]=P_PRES_dot_AP4;
301 P_PRES_dot_AP[5]=P_PRES_dot_AP5;
302 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
303 }
304 } else if (order>6) {
305 #pragma omp master
306 {
307 R_PRES_dot_P=ZERO;
308 P_PRES_dot_AP0=ZERO;
309 P_PRES_dot_AP1=ZERO;
310 P_PRES_dot_AP2=ZERO;
311 P_PRES_dot_AP3=ZERO;
312 P_PRES_dot_AP4=ZERO;
313 P_PRES_dot_AP5=ZERO;
314 P_PRES_dot_AP6=ZERO;
315 }
316 #pragma omp barrier
317 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6) schedule(static)
318 for (z=0;z<n;++z) {
319 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
320 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
321 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
322 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
323 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
324 P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
325 P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
326 P_PRES_dot_AP6+=P_PRES[6][z]*AP[z];
327 }
328 #pragma omp master
329 {
330 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
331 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
332 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
333 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
334 P_PRES_dot_AP[4]=P_PRES_dot_AP4;
335 P_PRES_dot_AP[5]=P_PRES_dot_AP5;
336 P_PRES_dot_AP[6]=P_PRES_dot_AP6;
337 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
338
339 P_PRES_dot_AP0=ZERO;
340 }
341 for (i=7;i<order;++i) {
342 #pragma omp barrier
343 #pragma omp for private(z) reduction(+:P_PRES_dot_AP0) schedule(static)
344 for (z=0;z<n;++z) P_PRES_dot_AP0+=P_PRES[i][z]*AP[z];
345 #pragma omp master
346 {
347 P_PRES_dot_AP[i]=P_PRES_dot_AP0;
348 P_PRES_dot_AP0=ZERO;
349 }
350 }
351 }
352 /* this fixes a problem with the intel compiler */
353 #pragma omp master
354 P_PRES_dot_AP0=R_PRES_dot_P_PRES[0];
355 #pragma omp barrier
356 /*** if sum_BREAKF is equal to zero a breakdown occurs.
357 *** iteration procedure can be continued but R_PRES is not the
358 *** residual of X_PRES approximation.
359 ***/
360 sum_BREAKF=0.;
361 for (i=0;i<order;++i) sum_BREAKF +=BREAKF[i];
362 breakFlag=!((ABS(P_PRES_dot_AP0) > ZERO) && (sum_BREAKF >ZERO));
363 if (!breakFlag) {
364 breakFlag=FALSE;
365 /***
366 ***** X_PRES and R_PRES are moved to memory:
367 ***/
368 #pragma omp master
369 {
370 Factor=0.;
371 for (i=0;i<order;++i) {
372 ALPHA[i]=-P_PRES_dot_AP[i]/R_PRES_dot_P_PRES[i];
373 Factor+=BREAKF[i]*ALPHA[i];
374 }
375
376 save_R_PRES_dot_P_PRES=R_PRES_dot_P_PRES[Length_of_mem-1];
377 save_R_PRES=R_PRES[Length_of_mem-1];
378 save_XPRES=X_PRES[Length_of_mem-1];
379 save_P_PRES=P_PRES[Length_of_mem-1];
380 for (i=Length_of_mem-1;i>0;--i) {
381 BREAKF[i]=BREAKF[i-1];
382 R_PRES_dot_P_PRES[i]=R_PRES_dot_P_PRES[i-1];
383 R_PRES[i]=R_PRES[i-1];
384 X_PRES[i]=X_PRES[i-1];
385 P_PRES[i]=P_PRES[i-1];
386 }
387 R_PRES_dot_P_PRES[0]=save_R_PRES_dot_P_PRES;
388 R_PRES[0]=save_R_PRES;
389 X_PRES[0]=save_XPRES;
390 P_PRES[0]=save_P_PRES;
391
392 if (ABS(Factor)<=ZERO) {
393 Factor=1.;
394 BREAKF[0]=ZERO;
395 } else {
396 Factor=1./Factor;
397 BREAKF[0]=ONE;
398 }
399 for (i=0;i<order;++i) ALPHA[i]*=Factor;
400 }
401 /*
402 ***** update of solution X_PRES and its residual R_PRES:
403 ***
404 *** P is used to accumulate X and AP to accumulate R. X and R
405 *** are still needed until they are put into the X and R memory
406 *** R_PRES and X_PRES
407 ***
408 **/
409 #pragma omp barrier
410 breakf0=BREAKF[0];
411 if (order==0) {
412 #pragma omp for private(z) schedule(static)
413 for (z=0;z<n;++z) {
414 R_PRES[0][z]= Factor* AP[z];
415 X_PRES[0][z]=-Factor*P_PRES[1][z];
416 }
417 } else if (order==1) {
418 #pragma omp for private(z) schedule(static)
419 for (z=0;z<n;++z) {
420 R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z];
421 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z];
422 }
423 } else if (order==2) {
424 #pragma omp for private(z) schedule(static)
425 for (z=0;z<n;++z) {
426 R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z]
427 +ALPHA[1]*R_PRES[2][z];
428 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
429 +ALPHA[1]*X_PRES[2][z];
430 }
431 } else if (order==3) {
432 #pragma omp for private(z) schedule(static)
433 for (z=0;z<n;++z) {
434 R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
435 +ALPHA[1]*R_PRES[2][z]
436 +ALPHA[2]*R_PRES[3][z];
437 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
438 +ALPHA[1]*X_PRES[2][z]
439 +ALPHA[2]*X_PRES[3][z];
440 }
441 } else if (order==4) {
442 #pragma omp for private(z) schedule(static)
443 for (z=0;z<n;++z) {
444 R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
445 +ALPHA[1]*R_PRES[2][z]
446 +ALPHA[2]*R_PRES[3][z]
447 +ALPHA[3]*R_PRES[4][z];
448 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
449 +ALPHA[1]*X_PRES[2][z]
450 +ALPHA[2]*X_PRES[3][z]
451 +ALPHA[3]*X_PRES[4][z];
452 }
453 } else if (order==5) {
454 #pragma omp for private(z) schedule(static)
455 for (z=0;z<n;++z) {
456 R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
457 +ALPHA[1]*R_PRES[2][z]
458 +ALPHA[2]*R_PRES[3][z]
459 +ALPHA[3]*R_PRES[4][z]
460 +ALPHA[4]*R_PRES[5][z];
461 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
462 +ALPHA[1]*X_PRES[2][z]
463 +ALPHA[2]*X_PRES[3][z]
464 +ALPHA[3]*X_PRES[4][z]
465 +ALPHA[4]*X_PRES[5][z];
466 }
467 } else if (order==6) {
468 #pragma omp for private(z) schedule(static)
469 for (z=0;z<n;++z) {
470 R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
471 +ALPHA[1]*R_PRES[2][z]
472 +ALPHA[2]*R_PRES[3][z]
473 +ALPHA[3]*R_PRES[4][z]
474 +ALPHA[4]*R_PRES[5][z]
475 +ALPHA[5]*R_PRES[6][z];
476 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
477 +ALPHA[1]*X_PRES[2][z]
478 +ALPHA[2]*X_PRES[3][z]
479 +ALPHA[3]*X_PRES[4][z]
480 +ALPHA[4]*X_PRES[5][z]
481 +ALPHA[5]*X_PRES[6][z];
482 }
483 } else if (order>6) {
484 #pragma omp for private(z) schedule(static)
485 for (z=0;z<n;++z) {
486 R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
487 +ALPHA[1]*R_PRES[2][z]
488 +ALPHA[2]*R_PRES[3][z]
489 +ALPHA[3]*R_PRES[4][z]
490 +ALPHA[4]*R_PRES[5][z]
491 +ALPHA[5]*R_PRES[6][z]
492 +ALPHA[6]*R_PRES[7][z];
493 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
494 +ALPHA[1]*X_PRES[2][z]
495 +ALPHA[2]*X_PRES[3][z]
496 +ALPHA[3]*X_PRES[4][z]
497 +ALPHA[4]*X_PRES[5][z]
498 +ALPHA[5]*X_PRES[6][z]
499 +ALPHA[6]*X_PRES[7][z];
500 }
501 for (i=7;i<order;++i) {
502 #pragma omp for private(z) schedule(static)
503 for (z=0;z<n;++z) {
504 R_PRES[0][z]+=ALPHA[i]*R_PRES[i+1][z];
505 X_PRES[0][z]+=ALPHA[i]*X_PRES[i+1][z];
506 }
507 }
508 }
509 if (breakf0>0.) {
510 /***
511 ***** calculate gamma from min_(gamma){|R+gamma*(R_PRES-R)|_2}:
512 ***/
513 #pragma omp master
514 {
515 SC1=ZERO;
516 SC2=ZERO;
517 L2_R=ZERO;
518 }
519 #pragma omp barrier
520 #pragma omp for private(z,diff) reduction(+:SC1,SC2) schedule(static)
521 for (z=0;z<n;++z) {
522 diff=R_PRES[0][z]-r[z];
523 SC1+=diff*diff;
524 SC2+=diff*r[z];
525 }
526 gamma=(SC1<=ZERO) ? ZERO : -SC2/SC1;
527 #pragma omp for private(z) reduction(+:L2_R) schedule(static)
528 for (z=0;z<n;++z) {
529 x[z]+=gamma*(X_PRES[0][z]-x[z]);
530 r[z]+=gamma*(R_PRES[0][z]-r[z]);
531 L2_R+=r[z]*r[z];
532 }
533 norm_of_residual=sqrt(L2_R);
534 convergeFlag = (norm_of_residual <= tol);
535 if (restart>0) restartFlag=(num_iter_restart >= restart);
536 } else {
537 convergeFlag=FALSE;
538 restartFlag=FALSE;
539 }
540 maxIterFlag = (num_iter >= maxit);
541 }
542 }
543 /* end of iteration */
544 #pragma omp master
545 {
546 Norm_of_residual_global=norm_of_residual;
547 Num_iter_global=num_iter;
548 if (maxIterFlag) {
549 Status = SOLVER_MAXITER_REACHED;
550 } else if (breakFlag) {
551 Status = SOLVER_BREAKDOWN;
552 }
553 }
554 } /* end of parallel region */
555 TMPMEMFREE(AP);
556 for (i=0;i<Length_of_recursion;i++) {
557 TMPMEMFREE(X_PRES[i]);
558 TMPMEMFREE(R_PRES[i]);
559 TMPMEMFREE(P_PRES[i]);
560 }
561 *iter=Num_iter_global;
562 *tolerance=Norm_of_residual_global;
563 }
564 return Status;
565 }
566

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26