/[escript]/trunk/paso/src/Solvers/GMRES.c
ViewVC logotype

Contents of /trunk/paso/src/Solvers/GMRES.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 584 - (show annotations)
Thu Mar 9 23:03:38 2006 UTC (13 years, 1 month ago) by gross
File MIME type: text/plain
File size: 23216 byte(s)
eigenvalues: compiles and passes tests on altix now
1 /* $Id$ */
2
3 /*
4 * Purpose
5 * =======
6 *
7 * GMRES solves the linear system A*x=b using the
8 * truncated and restered GMRES method with preconditioning.
9 *
10 * Convergence test: norm( b - A*x )< TOL.
11 *
12 *
13 *
14 * Arguments
15 * =========
16 *
17 * r (input/output) double array, dimension n.
18 * On entry, residual of inital guess X
19 *
20 * x (input/output) double array, dimension n.
21 * On input, the initial guess.
22 *
23 * iter (input/output) int
24 * On input, the maximum num_iterations to be performed.
25 * On output, actual number of num_iterations performed.
26 *
27 * tolerance (input/output) DOUBLE PRECISION
28 * On input, the allowable convergence measure for
29 * norm( b - A*x )
30 * On output, the final value of this measure.
31 *
32 * Length_of_recursion (input) gives the number of residual to be kept in orthogonalization process
33 *
34 * restart (input) If restart>0, iteration is resterted a after restart steps.
35 *
36 * INFO (output) int
37 *
38 * =SOLVER_NO_ERROR: Successful exit. num_iterated approximate solution returned.
39 * =SOLVER_MAXNUM_ITER_REACHED
40 * =SOLVER_INPUT_ERROR Illegal parameter:
41 * =SOLVER_BREAKDOWN: bad luck!
42 * =SOLVER_MEMORY_ERROR : no memory available
43 *
44 * ==============================================================
45 */
46
47 #include "Common.h"
48 #include "SystemMatrix.h"
49 #include "Solver.h"
50 #ifdef _OPENMP
51 #include <omp.h>
52 #endif
53
54 err_t Paso_Solver_GMRES(
55 Paso_SystemMatrix * A,
56 double * r,
57 double * x,
58 dim_t *iter,
59 double * tolerance,dim_t Length_of_recursion,dim_t restart,
60 Paso_Performance* pp) {
61
62 /* Local variables */
63
64 double *AP,*X_PRES[MAX(Length_of_recursion,0)+1],*R_PRES[MAX(Length_of_recursion,0)+1],*P_PRES[MAX(Length_of_recursion,0)+1];
65 double P_PRES_dot_AP[MAX(Length_of_recursion,0)],R_PRES_dot_P_PRES[MAX(Length_of_recursion,0)+1],BREAKF[MAX(Length_of_recursion,0)+1],ALPHA[MAX(Length_of_recursion,0)];
66 double P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6,R_PRES_dot_P,abs_RP,breakf0;
67 double tol,Factor,sum_BREAKF,gamma,SC1,SC2,norm_of_residual,diff,L2_R,Norm_of_residual_global;
68 double *save_XPRES, *save_P_PRES, *save_R_PRES,save_R_PRES_dot_P_PRES;
69 dim_t maxit,Num_iter_global,num_iter_restart,num_iter;
70 dim_t i,z,order;
71 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE,restartFlag=FALSE;
72 err_t Status=SOLVER_NO_ERROR;
73
74 /* adapt original routine parameters */
75
76 dim_t n=A->num_cols * A-> col_block_size;
77 dim_t Length_of_mem=MAX(Length_of_recursion,0)+1;
78
79 /* Test the input parameters. */
80 if (restart>0) restart=MAX(Length_of_recursion,restart);
81 if (n < 0 || Length_of_recursion<=0) {
82 return SOLVER_INPUT_ERROR;
83 }
84
85 /* allocate memory: */
86
87 AP=TMPMEMALLOC(n,double);
88 if (AP==NULL) Status=SOLVER_MEMORY_ERROR;
89 for (i=0;i<Length_of_mem;i++) {
90 X_PRES[i]=TMPMEMALLOC(n,double);
91 R_PRES[i]=TMPMEMALLOC(n,double);
92 P_PRES[i]=TMPMEMALLOC(n,double);
93 if (X_PRES[i]==NULL || R_PRES[i]==NULL || P_PRES[i]==NULL) Status=SOLVER_MEMORY_ERROR;
94 }
95 if ( Status ==SOLVER_NO_ERROR ) {
96
97 /* now PRES starts : */
98 maxit=*iter;
99 tol=*tolerance;
100
101 #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \
102 private(num_iter,i,num_iter_restart,order,sum_BREAKF,gamma,restartFlag,norm_of_residual,abs_RP,breakf0,\
103 save_XPRES,save_P_PRES,save_R_PRES,save_R_PRES_dot_P_PRES)
104 {
105 /* initialization */
106
107 restartFlag=TRUE;
108 num_iter=0;
109 #pragma omp for private(z) schedule(static) nowait
110 for (z=0; z < n; ++z) AP[z]=0;
111 for(i=0;i<Length_of_mem;++i) {
112 #pragma omp for private(z) schedule(static) nowait
113 for (z=0; z < n; ++z) {
114 P_PRES[i][z]=0;
115 R_PRES[i][z]=0;
116 X_PRES[i][z]=0;
117 }
118 }
119
120 while (! (convergeFlag || maxIterFlag || breakFlag)) {
121 #pragma omp barrier
122 if (restartFlag) {
123 #pragma omp master
124 BREAKF[0]=ONE;
125 #pragma omp for private(z) schedule(static) nowait
126 for (z=0; z < n; ++z) {
127 R_PRES[0][z]=r[z];
128 X_PRES[0][z]=x[z];
129 }
130 num_iter_restart=0;
131 restartFlag=FALSE;
132 }
133 ++num_iter;
134 ++num_iter_restart;
135 /* order is the dimension of the space on which the residual is minimized: */
136 order=MIN(num_iter_restart,Length_of_recursion);
137 /***
138 *** calculate new search direction P from R_PRES
139 ***/
140 #pragma omp barrier
141 Paso_Solver_solvePreconditioner(A,&P_PRES[0][0], &R_PRES[0][0]);
142 /***
143 *** apply A to P to get AP
144 ***/
145 #pragma omp barrier
146 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &P_PRES[0][0],ZERO, &AP[0]);
147 /***
148 ***** calculation of the norm of R and the scalar products of
149 *** the residuals and A*P:
150 ***/
151 if (order==0) {
152 #pragma omp master
153 R_PRES_dot_P=ZERO;
154 #pragma omp barrier
155 #pragma omp for private(z) reduction(+:R_PRES_dot_P) schedule(static)
156 for (z=0;z<n;++z)
157 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
158 #pragma omp master
159 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
160 } else if (order==1) {
161 #pragma omp master
162 {
163 R_PRES_dot_P=ZERO;
164 P_PRES_dot_AP0=ZERO;
165 }
166 #pragma omp barrier
167 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0) schedule(static)
168 for (z=0;z<n;++z) {
169 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
170 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
171 }
172 #pragma omp master
173 {
174 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
175 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
176 }
177 } else if (order==2) {
178 #pragma omp master
179 {
180 R_PRES_dot_P=ZERO;
181 P_PRES_dot_AP0=ZERO;
182 P_PRES_dot_AP1=ZERO;
183 }
184 #pragma omp barrier
185 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1) schedule(static)
186 for (z=0;z<n;++z) {
187 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
188 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
189 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
190 }
191 #pragma omp master
192 {
193 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
194 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
195 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
196 }
197 } else if (order==3) {
198 #pragma omp master
199 {
200 R_PRES_dot_P=ZERO;
201 P_PRES_dot_AP0=ZERO;
202 P_PRES_dot_AP1=ZERO;
203 P_PRES_dot_AP2=ZERO;
204 }
205 #pragma omp barrier
206 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2) schedule(static)
207 for (z=0;z<n;++z) {
208 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
209 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
210 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
211 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
212 }
213 #pragma omp master
214 {
215 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
216 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
217 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
218 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
219 }
220 } else if (order==4) {
221 #pragma omp master
222 {
223 R_PRES_dot_P=ZERO;
224 P_PRES_dot_AP0=ZERO;
225 P_PRES_dot_AP1=ZERO;
226 P_PRES_dot_AP2=ZERO;
227 P_PRES_dot_AP3=ZERO;
228 }
229 #pragma omp barrier
230 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3) schedule(static)
231 for (z=0;z<n;++z) {
232 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
233 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
234 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
235 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
236 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
237 }
238 #pragma omp master
239 {
240 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
241 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
242 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
243 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
244 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
245 }
246 } else if (order==5) {
247 #pragma omp master
248 {
249 R_PRES_dot_P=ZERO;
250 P_PRES_dot_AP0=ZERO;
251 P_PRES_dot_AP1=ZERO;
252 P_PRES_dot_AP2=ZERO;
253 P_PRES_dot_AP3=ZERO;
254 P_PRES_dot_AP4=ZERO;
255 }
256 #pragma omp barrier
257 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4) schedule(static)
258 for (z=0;z<n;++z) {
259 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
260 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
261 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
262 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
263 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
264 P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
265 }
266 #pragma omp master
267 {
268 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
269 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
270 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
271 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
272 P_PRES_dot_AP[4]=P_PRES_dot_AP4;
273 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
274 }
275 } else if (order==6) {
276 #pragma omp master
277 {
278 R_PRES_dot_P=ZERO;
279 P_PRES_dot_AP0=ZERO;
280 P_PRES_dot_AP1=ZERO;
281 P_PRES_dot_AP2=ZERO;
282 P_PRES_dot_AP3=ZERO;
283 P_PRES_dot_AP4=ZERO;
284 P_PRES_dot_AP5=ZERO;
285 }
286 #pragma omp barrier
287 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5) schedule(static)
288 for (z=0;z<n;++z) {
289 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
290 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
291 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
292 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
293 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
294 P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
295 P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
296 }
297 #pragma omp master
298 {
299 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
300 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
301 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
302 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
303 P_PRES_dot_AP[4]=P_PRES_dot_AP4;
304 P_PRES_dot_AP[5]=P_PRES_dot_AP5;
305 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
306 }
307 } else if (order>6) {
308 #pragma omp master
309 {
310 R_PRES_dot_P=ZERO;
311 P_PRES_dot_AP0=ZERO;
312 P_PRES_dot_AP1=ZERO;
313 P_PRES_dot_AP2=ZERO;
314 P_PRES_dot_AP3=ZERO;
315 P_PRES_dot_AP4=ZERO;
316 P_PRES_dot_AP5=ZERO;
317 P_PRES_dot_AP6=ZERO;
318 }
319 #pragma omp barrier
320 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6) schedule(static)
321 for (z=0;z<n;++z) {
322 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
323 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
324 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
325 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
326 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
327 P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
328 P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
329 P_PRES_dot_AP6+=P_PRES[6][z]*AP[z];
330 }
331 #pragma omp master
332 {
333 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
334 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
335 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
336 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
337 P_PRES_dot_AP[4]=P_PRES_dot_AP4;
338 P_PRES_dot_AP[5]=P_PRES_dot_AP5;
339 P_PRES_dot_AP[6]=P_PRES_dot_AP6;
340 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
341
342 P_PRES_dot_AP0=ZERO;
343 }
344 for (i=7;i<order;++i) {
345 #pragma omp barrier
346 #pragma omp for private(z) reduction(+:P_PRES_dot_AP0) schedule(static)
347 for (z=0;z<n;++z) P_PRES_dot_AP0+=P_PRES[i][z]*AP[z];
348 #pragma omp master
349 {
350 P_PRES_dot_AP[i]=P_PRES_dot_AP0;
351 P_PRES_dot_AP0=ZERO;
352 }
353 }
354 }
355 /* this fixes a problem with the intel compiler */
356 #pragma omp master
357 P_PRES_dot_AP0=R_PRES_dot_P_PRES[0];
358 /*** if sum_BREAKF is equal to zero a breakdown occurs.
359 *** iteration procedure can be continued but R_PRES is not the
360 *** residual of X_PRES approximation.
361 ***/
362 #pragma omp barrier
363 sum_BREAKF=0.;
364 for (i=0;i<order;++i) sum_BREAKF +=BREAKF[i];
365 breakFlag=!((ABS(P_PRES_dot_AP0) > ZERO) && (sum_BREAKF >ZERO));
366 if (!breakFlag) {
367 breakFlag=FALSE;
368 /***
369 ***** X_PRES and R_PRES are moved to memory:
370 ***/
371 #pragma omp master
372 {
373 Factor=0.;
374 for (i=0;i<order;++i) {
375 ALPHA[i]=-P_PRES_dot_AP[i]/R_PRES_dot_P_PRES[i];
376 Factor+=BREAKF[i]*ALPHA[i];
377 }
378
379 save_R_PRES_dot_P_PRES=R_PRES_dot_P_PRES[Length_of_mem-1];
380 save_R_PRES=R_PRES[Length_of_mem-1];
381 save_XPRES=X_PRES[Length_of_mem-1];
382 save_P_PRES=P_PRES[Length_of_mem-1];
383 for (i=Length_of_mem-1;i>0;--i) {
384 BREAKF[i]=BREAKF[i-1];
385 R_PRES_dot_P_PRES[i]=R_PRES_dot_P_PRES[i-1];
386 R_PRES[i]=R_PRES[i-1];
387 X_PRES[i]=X_PRES[i-1];
388 P_PRES[i]=P_PRES[i-1];
389 }
390 R_PRES_dot_P_PRES[0]=save_R_PRES_dot_P_PRES;
391 R_PRES[0]=save_R_PRES;
392 X_PRES[0]=save_XPRES;
393 P_PRES[0]=save_P_PRES;
394
395 if (ABS(Factor)<=ZERO) {
396 Factor=1.;
397 BREAKF[0]=ZERO;
398 } else {
399 Factor=1./Factor;
400 BREAKF[0]=ONE;
401 }
402 for (i=0;i<order;++i) ALPHA[i]*=Factor;
403 }
404 /*
405 ***** update of solution X_PRES and its residual R_PRES:
406 ***
407 *** P is used to accumulate X and AP to accumulate R. X and R
408 *** are still needed until they are put into the X and R memory
409 *** R_PRES and X_PRES
410 ***
411 **/
412 #pragma omp barrier
413 breakf0=BREAKF[0];
414 if (order==0) {
415 #pragma omp for private(z) schedule(static)
416 for (z=0;z<n;++z) {
417 R_PRES[0][z]= Factor* AP[z];
418 X_PRES[0][z]=-Factor*P_PRES[1][z];
419 }
420 } else if (order==1) {
421 #pragma omp for private(z) schedule(static)
422 for (z=0;z<n;++z) {
423 R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z];
424 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z];
425 }
426 } else if (order==2) {
427 #pragma omp for private(z) schedule(static)
428 for (z=0;z<n;++z) {
429 R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z]
430 +ALPHA[1]*R_PRES[2][z];
431 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
432 +ALPHA[1]*X_PRES[2][z];
433 }
434 } else if (order==3) {
435 #pragma omp for private(z) schedule(static)
436 for (z=0;z<n;++z) {
437 R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
438 +ALPHA[1]*R_PRES[2][z]
439 +ALPHA[2]*R_PRES[3][z];
440 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
441 +ALPHA[1]*X_PRES[2][z]
442 +ALPHA[2]*X_PRES[3][z];
443 }
444 } else if (order==4) {
445 #pragma omp for private(z) schedule(static)
446 for (z=0;z<n;++z) {
447 R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
448 +ALPHA[1]*R_PRES[2][z]
449 +ALPHA[2]*R_PRES[3][z]
450 +ALPHA[3]*R_PRES[4][z];
451 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
452 +ALPHA[1]*X_PRES[2][z]
453 +ALPHA[2]*X_PRES[3][z]
454 +ALPHA[3]*X_PRES[4][z];
455 }
456 } else if (order==5) {
457 #pragma omp for private(z) schedule(static)
458 for (z=0;z<n;++z) {
459 R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
460 +ALPHA[1]*R_PRES[2][z]
461 +ALPHA[2]*R_PRES[3][z]
462 +ALPHA[3]*R_PRES[4][z]
463 +ALPHA[4]*R_PRES[5][z];
464 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
465 +ALPHA[1]*X_PRES[2][z]
466 +ALPHA[2]*X_PRES[3][z]
467 +ALPHA[3]*X_PRES[4][z]
468 +ALPHA[4]*X_PRES[5][z];
469 }
470 } else if (order==6) {
471 #pragma omp for private(z) schedule(static)
472 for (z=0;z<n;++z) {
473 R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
474 +ALPHA[1]*R_PRES[2][z]
475 +ALPHA[2]*R_PRES[3][z]
476 +ALPHA[3]*R_PRES[4][z]
477 +ALPHA[4]*R_PRES[5][z]
478 +ALPHA[5]*R_PRES[6][z];
479 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
480 +ALPHA[1]*X_PRES[2][z]
481 +ALPHA[2]*X_PRES[3][z]
482 +ALPHA[3]*X_PRES[4][z]
483 +ALPHA[4]*X_PRES[5][z]
484 +ALPHA[5]*X_PRES[6][z];
485 }
486 } else if (order>6) {
487 #pragma omp for private(z) schedule(static)
488 for (z=0;z<n;++z) {
489 R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
490 +ALPHA[1]*R_PRES[2][z]
491 +ALPHA[2]*R_PRES[3][z]
492 +ALPHA[3]*R_PRES[4][z]
493 +ALPHA[4]*R_PRES[5][z]
494 +ALPHA[5]*R_PRES[6][z]
495 +ALPHA[6]*R_PRES[7][z];
496 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
497 +ALPHA[1]*X_PRES[2][z]
498 +ALPHA[2]*X_PRES[3][z]
499 +ALPHA[3]*X_PRES[4][z]
500 +ALPHA[4]*X_PRES[5][z]
501 +ALPHA[5]*X_PRES[6][z]
502 +ALPHA[6]*X_PRES[7][z];
503 }
504 for (i=7;i<order;++i) {
505 #pragma omp for private(z) schedule(static)
506 for (z=0;z<n;++z) {
507 R_PRES[0][z]+=ALPHA[i]*R_PRES[i+1][z];
508 X_PRES[0][z]+=ALPHA[i]*X_PRES[i+1][z];
509 }
510 }
511 }
512 if (breakf0>0.) {
513 /***
514 ***** calculate gamma from min_(gamma){|R+gamma*(R_PRES-R)|_2}:
515 ***/
516 #pragma omp master
517 {
518 SC1=ZERO;
519 SC2=ZERO;
520 L2_R=ZERO;
521 }
522 #pragma omp barrier
523 #pragma omp for private(z,diff) reduction(+:SC1,SC2) schedule(static)
524 for (z=0;z<n;++z) {
525 diff=R_PRES[0][z]-r[z];
526 SC1+=diff*diff;
527 SC2+=diff*r[z];
528 }
529 gamma=(SC1<=ZERO) ? ZERO : -SC2/SC1;
530 #pragma omp for private(z) reduction(+:L2_R) schedule(static)
531 for (z=0;z<n;++z) {
532 x[z]+=gamma*(X_PRES[0][z]-x[z]);
533 r[z]+=gamma*(R_PRES[0][z]-r[z]);
534 L2_R+=r[z]*r[z];
535 }
536 norm_of_residual=sqrt(L2_R);
537 convergeFlag = (norm_of_residual <= tol);
538 if (restart>0) restartFlag=(num_iter_restart >= restart);
539 } else {
540 convergeFlag=FALSE;
541 restartFlag=FALSE;
542 }
543 maxIterFlag = (num_iter >= maxit);
544 }
545 }
546 /* end of iteration */
547 #pragma omp master
548 {
549 Norm_of_residual_global=norm_of_residual;
550 Num_iter_global=num_iter;
551 if (maxIterFlag) {
552 Status = SOLVER_MAXITER_REACHED;
553 } else if (breakFlag) {
554 Status = SOLVER_BREAKDOWN;
555 }
556 }
557 } /* end of parallel region */
558 TMPMEMFREE(AP);
559 for (i=0;i<Length_of_recursion;i++) {
560 TMPMEMFREE(X_PRES[i]);
561 TMPMEMFREE(R_PRES[i]);
562 TMPMEMFREE(P_PRES[i]);
563 }
564 *iter=Num_iter_global;
565 *tolerance=Norm_of_residual_global;
566 }
567 return Status;
568 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26