/[escript]/trunk/paso/src/GMRES.c
ViewVC logotype

Contents of /trunk/paso/src/GMRES.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 682 - (show annotations)
Mon Mar 27 02:43:09 2006 UTC (13 years, 11 months ago) by robwdcock
Original Path: trunk/paso/src/Solvers/GMRES.c
File MIME type: text/plain
File size: 23868 byte(s)
+ NEW BUILD SYSTEM

This commit contains the new build system with cross-platform support.
Most things work are before though you can have more control.

ENVIRONMENT settings have changed:
+ You no longer require LD_LIBRARY_PATH or PYTHONPATH to point to the
esysroot for building and testing performed via scons
+ ACcESS altix users: It is recommended you change your modules to load
the latest intel compiler and other libraries required by boost to match
the setup in svn (you can override). The correct modules are as follows

module load intel_cc.9.0.026
export
MODULEPATH=${MODULEPATH}:/data/raid2/toolspp4/modulefiles/gcc-3.3.6
module load boost/1.33.0/python-2.4.1
module load python/2.4.1
module load numarray/1.3.3


1 /* $Id$ */
2
3
4 /*
5 ********************************************************************************
6 * Copyright 2006 by ACcESS MNRF *
7 * *
8 * http://www.access.edu.au *
9 * Primary Business: Queensland, Australia *
10 * Licensed under the Open Software License version 3.0 *
11 * http://www.opensource.org/licenses/osl-3.0.php *
12 ********************************************************************************
13 */
14
15 /*
16 * Purpose
17 * =======
18 *
19 * GMRES solves the linear system A*x=b using the
20 * truncated and restered GMRES method with preconditioning.
21 *
22 * Convergence test: norm( b - A*x )< TOL.
23 *
24 *
25 *
26 * Arguments
27 * =========
28 *
29 * r (input/output) double array, dimension n.
30 * On entry, residual of inital guess X
31 *
32 * x (input/output) double array, dimension n.
33 * On input, the initial guess.
34 *
35 * iter (input/output) int
36 * On input, the maximum num_iterations to be performed.
37 * On output, actual number of num_iterations performed.
38 *
39 * tolerance (input/output) DOUBLE PRECISION
40 * On input, the allowable convergence measure for
41 * norm( b - A*x )
42 * On output, the final value of this measure.
43 *
44 * Length_of_recursion (input) gives the number of residual to be kept in orthogonalization process
45 *
46 * restart (input) If restart>0, iteration is resterted a after restart steps.
47 *
48 * INFO (output) int
49 *
50 * =SOLVER_NO_ERROR: Successful exit. num_iterated approximate solution returned.
51 * =SOLVER_MAXNUM_ITER_REACHED
52 * =SOLVER_INPUT_ERROR Illegal parameter:
53 * =SOLVER_BREAKDOWN: bad luck!
54 * =SOLVER_MEMORY_ERROR : no memory available
55 *
56 * ==============================================================
57 */
58
59 #include "../Common.h"
60 #include "../SystemMatrix.h"
61 #include "Solver.h"
62 #ifdef _OPENMP
63 #include <omp.h>
64 #endif
65
66 err_t Paso_Solver_GMRES(
67 Paso_SystemMatrix * A,
68 double * r,
69 double * x,
70 dim_t *iter,
71 double * tolerance,dim_t Length_of_recursion,dim_t restart,
72 Paso_Performance* pp) {
73
74 /* Local variables */
75
76 double *AP,*X_PRES[MAX(Length_of_recursion,0)+1],*R_PRES[MAX(Length_of_recursion,0)+1],*P_PRES[MAX(Length_of_recursion,0)+1];
77 double P_PRES_dot_AP[MAX(Length_of_recursion,0)],R_PRES_dot_P_PRES[MAX(Length_of_recursion,0)+1],BREAKF[MAX(Length_of_recursion,0)+1],ALPHA[MAX(Length_of_recursion,0)];
78 double P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6,R_PRES_dot_P,abs_RP,breakf0;
79 double tol,Factor,sum_BREAKF,gamma,SC1,SC2,norm_of_residual,diff,L2_R,Norm_of_residual_global;
80 double *save_XPRES, *save_P_PRES, *save_R_PRES,save_R_PRES_dot_P_PRES;
81 dim_t maxit,Num_iter_global,num_iter_restart,num_iter;
82 dim_t i,z,order;
83 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE,restartFlag=FALSE;
84 err_t Status=SOLVER_NO_ERROR;
85
86 /* adapt original routine parameters */
87
88 dim_t n=A->num_cols * A-> col_block_size;
89 dim_t Length_of_mem=MAX(Length_of_recursion,0)+1;
90
91 /* Test the input parameters. */
92 if (restart>0) restart=MAX(Length_of_recursion,restart);
93 if (n < 0 || Length_of_recursion<=0) {
94 return SOLVER_INPUT_ERROR;
95 }
96
97 /* allocate memory: */
98
99 AP=TMPMEMALLOC(n,double);
100 if (AP==NULL) Status=SOLVER_MEMORY_ERROR;
101 for (i=0;i<Length_of_mem;i++) {
102 X_PRES[i]=TMPMEMALLOC(n,double);
103 R_PRES[i]=TMPMEMALLOC(n,double);
104 P_PRES[i]=TMPMEMALLOC(n,double);
105 if (X_PRES[i]==NULL || R_PRES[i]==NULL || P_PRES[i]==NULL) Status=SOLVER_MEMORY_ERROR;
106 }
107 if ( Status ==SOLVER_NO_ERROR ) {
108
109 /* now PRES starts : */
110 maxit=*iter;
111 tol=*tolerance;
112
113 #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \
114 private(num_iter,i,num_iter_restart,order,sum_BREAKF,gamma,restartFlag,norm_of_residual,abs_RP,breakf0,\
115 save_XPRES,save_P_PRES,save_R_PRES,save_R_PRES_dot_P_PRES)
116 {
117 /* initialization */
118
119 restartFlag=TRUE;
120 num_iter=0;
121 #pragma omp for private(z) schedule(static) nowait
122 for (z=0; z < n; ++z) AP[z]=0;
123 for(i=0;i<Length_of_mem;++i) {
124 #pragma omp for private(z) schedule(static) nowait
125 for (z=0; z < n; ++z) {
126 P_PRES[i][z]=0;
127 R_PRES[i][z]=0;
128 X_PRES[i][z]=0;
129 }
130 }
131
132 while (! (convergeFlag || maxIterFlag || breakFlag)) {
133 #pragma omp barrier
134 if (restartFlag) {
135 #pragma omp master
136 BREAKF[0]=ONE;
137 #pragma omp for private(z) schedule(static) nowait
138 for (z=0; z < n; ++z) {
139 R_PRES[0][z]=r[z];
140 X_PRES[0][z]=x[z];
141 }
142 num_iter_restart=0;
143 restartFlag=FALSE;
144 }
145 ++num_iter;
146 ++num_iter_restart;
147 /* order is the dimension of the space on which the residual is minimized: */
148 order=MIN(num_iter_restart,Length_of_recursion);
149 /***
150 *** calculate new search direction P from R_PRES
151 ***/
152 #pragma omp barrier
153 Paso_Solver_solvePreconditioner(A,&P_PRES[0][0], &R_PRES[0][0]);
154 /***
155 *** apply A to P to get AP
156 ***/
157 #pragma omp barrier
158 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &P_PRES[0][0],ZERO, &AP[0]);
159 /***
160 ***** calculation of the norm of R and the scalar products of
161 *** the residuals and A*P:
162 ***/
163 if (order==0) {
164 #pragma omp master
165 R_PRES_dot_P=ZERO;
166 #pragma omp barrier
167 #pragma omp for private(z) reduction(+:R_PRES_dot_P) schedule(static)
168 for (z=0;z<n;++z)
169 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
170 #pragma omp master
171 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
172 } else if (order==1) {
173 #pragma omp master
174 {
175 R_PRES_dot_P=ZERO;
176 P_PRES_dot_AP0=ZERO;
177 }
178 #pragma omp barrier
179 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0) schedule(static)
180 for (z=0;z<n;++z) {
181 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
182 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
183 }
184 #pragma omp master
185 {
186 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
187 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
188 }
189 } else if (order==2) {
190 #pragma omp master
191 {
192 R_PRES_dot_P=ZERO;
193 P_PRES_dot_AP0=ZERO;
194 P_PRES_dot_AP1=ZERO;
195 }
196 #pragma omp barrier
197 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1) schedule(static)
198 for (z=0;z<n;++z) {
199 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
200 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
201 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
202 }
203 #pragma omp master
204 {
205 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
206 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
207 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
208 }
209 } else if (order==3) {
210 #pragma omp master
211 {
212 R_PRES_dot_P=ZERO;
213 P_PRES_dot_AP0=ZERO;
214 P_PRES_dot_AP1=ZERO;
215 P_PRES_dot_AP2=ZERO;
216 }
217 #pragma omp barrier
218 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2) schedule(static)
219 for (z=0;z<n;++z) {
220 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
221 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
222 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
223 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
224 }
225 #pragma omp master
226 {
227 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
228 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
229 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
230 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
231 }
232 } else if (order==4) {
233 #pragma omp master
234 {
235 R_PRES_dot_P=ZERO;
236 P_PRES_dot_AP0=ZERO;
237 P_PRES_dot_AP1=ZERO;
238 P_PRES_dot_AP2=ZERO;
239 P_PRES_dot_AP3=ZERO;
240 }
241 #pragma omp barrier
242 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3) schedule(static)
243 for (z=0;z<n;++z) {
244 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
245 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
246 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
247 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
248 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
249 }
250 #pragma omp master
251 {
252 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
253 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
254 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
255 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
256 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
257 }
258 } else if (order==5) {
259 #pragma omp master
260 {
261 R_PRES_dot_P=ZERO;
262 P_PRES_dot_AP0=ZERO;
263 P_PRES_dot_AP1=ZERO;
264 P_PRES_dot_AP2=ZERO;
265 P_PRES_dot_AP3=ZERO;
266 P_PRES_dot_AP4=ZERO;
267 }
268 #pragma omp barrier
269 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4) schedule(static)
270 for (z=0;z<n;++z) {
271 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
272 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
273 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
274 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
275 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
276 P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
277 }
278 #pragma omp master
279 {
280 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
281 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
282 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
283 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
284 P_PRES_dot_AP[4]=P_PRES_dot_AP4;
285 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
286 }
287 } else if (order==6) {
288 #pragma omp master
289 {
290 R_PRES_dot_P=ZERO;
291 P_PRES_dot_AP0=ZERO;
292 P_PRES_dot_AP1=ZERO;
293 P_PRES_dot_AP2=ZERO;
294 P_PRES_dot_AP3=ZERO;
295 P_PRES_dot_AP4=ZERO;
296 P_PRES_dot_AP5=ZERO;
297 }
298 #pragma omp barrier
299 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5) schedule(static)
300 for (z=0;z<n;++z) {
301 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
302 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
303 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
304 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
305 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
306 P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
307 P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
308 }
309 #pragma omp master
310 {
311 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
312 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
313 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
314 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
315 P_PRES_dot_AP[4]=P_PRES_dot_AP4;
316 P_PRES_dot_AP[5]=P_PRES_dot_AP5;
317 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
318 }
319 } else if (order>6) {
320 #pragma omp master
321 {
322 R_PRES_dot_P=ZERO;
323 P_PRES_dot_AP0=ZERO;
324 P_PRES_dot_AP1=ZERO;
325 P_PRES_dot_AP2=ZERO;
326 P_PRES_dot_AP3=ZERO;
327 P_PRES_dot_AP4=ZERO;
328 P_PRES_dot_AP5=ZERO;
329 P_PRES_dot_AP6=ZERO;
330 }
331 #pragma omp barrier
332 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6) schedule(static)
333 for (z=0;z<n;++z) {
334 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
335 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
336 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
337 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
338 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
339 P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
340 P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
341 P_PRES_dot_AP6+=P_PRES[6][z]*AP[z];
342 }
343 #pragma omp master
344 {
345 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
346 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
347 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
348 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
349 P_PRES_dot_AP[4]=P_PRES_dot_AP4;
350 P_PRES_dot_AP[5]=P_PRES_dot_AP5;
351 P_PRES_dot_AP[6]=P_PRES_dot_AP6;
352 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
353
354 P_PRES_dot_AP0=ZERO;
355 }
356 for (i=7;i<order;++i) {
357 #pragma omp barrier
358 #pragma omp for private(z) reduction(+:P_PRES_dot_AP0) schedule(static)
359 for (z=0;z<n;++z) P_PRES_dot_AP0+=P_PRES[i][z]*AP[z];
360 #pragma omp master
361 {
362 P_PRES_dot_AP[i]=P_PRES_dot_AP0;
363 P_PRES_dot_AP0=ZERO;
364 }
365 }
366 }
367 /* this fixes a problem with the intel compiler */
368 #pragma omp master
369 P_PRES_dot_AP0=R_PRES_dot_P_PRES[0];
370 /*** if sum_BREAKF is equal to zero a breakdown occurs.
371 *** iteration procedure can be continued but R_PRES is not the
372 *** residual of X_PRES approximation.
373 ***/
374 #pragma omp barrier
375 sum_BREAKF=0.;
376 for (i=0;i<order;++i) sum_BREAKF +=BREAKF[i];
377 breakFlag=!((ABS(P_PRES_dot_AP0) > ZERO) && (sum_BREAKF >ZERO));
378 if (!breakFlag) {
379 breakFlag=FALSE;
380 /***
381 ***** X_PRES and R_PRES are moved to memory:
382 ***/
383 #pragma omp master
384 {
385 Factor=0.;
386 for (i=0;i<order;++i) {
387 ALPHA[i]=-P_PRES_dot_AP[i]/R_PRES_dot_P_PRES[i];
388 Factor+=BREAKF[i]*ALPHA[i];
389 }
390
391 save_R_PRES_dot_P_PRES=R_PRES_dot_P_PRES[Length_of_mem-1];
392 save_R_PRES=R_PRES[Length_of_mem-1];
393 save_XPRES=X_PRES[Length_of_mem-1];
394 save_P_PRES=P_PRES[Length_of_mem-1];
395 for (i=Length_of_mem-1;i>0;--i) {
396 BREAKF[i]=BREAKF[i-1];
397 R_PRES_dot_P_PRES[i]=R_PRES_dot_P_PRES[i-1];
398 R_PRES[i]=R_PRES[i-1];
399 X_PRES[i]=X_PRES[i-1];
400 P_PRES[i]=P_PRES[i-1];
401 }
402 R_PRES_dot_P_PRES[0]=save_R_PRES_dot_P_PRES;
403 R_PRES[0]=save_R_PRES;
404 X_PRES[0]=save_XPRES;
405 P_PRES[0]=save_P_PRES;
406
407 if (ABS(Factor)<=ZERO) {
408 Factor=1.;
409 BREAKF[0]=ZERO;
410 } else {
411 Factor=1./Factor;
412 BREAKF[0]=ONE;
413 }
414 for (i=0;i<order;++i) ALPHA[i]*=Factor;
415 }
416 /*
417 ***** update of solution X_PRES and its residual R_PRES:
418 ***
419 *** P is used to accumulate X and AP to accumulate R. X and R
420 *** are still needed until they are put into the X and R memory
421 *** R_PRES and X_PRES
422 ***
423 **/
424 #pragma omp barrier
425 breakf0=BREAKF[0];
426 if (order==0) {
427 #pragma omp for private(z) schedule(static)
428 for (z=0;z<n;++z) {
429 R_PRES[0][z]= Factor* AP[z];
430 X_PRES[0][z]=-Factor*P_PRES[1][z];
431 }
432 } else if (order==1) {
433 #pragma omp for private(z) schedule(static)
434 for (z=0;z<n;++z) {
435 R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z];
436 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z];
437 }
438 } else if (order==2) {
439 #pragma omp for private(z) schedule(static)
440 for (z=0;z<n;++z) {
441 R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z]
442 +ALPHA[1]*R_PRES[2][z];
443 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
444 +ALPHA[1]*X_PRES[2][z];
445 }
446 } else if (order==3) {
447 #pragma omp for private(z) schedule(static)
448 for (z=0;z<n;++z) {
449 R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
450 +ALPHA[1]*R_PRES[2][z]
451 +ALPHA[2]*R_PRES[3][z];
452 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
453 +ALPHA[1]*X_PRES[2][z]
454 +ALPHA[2]*X_PRES[3][z];
455 }
456 } else if (order==4) {
457 #pragma omp for private(z) schedule(static)
458 for (z=0;z<n;++z) {
459 R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
460 +ALPHA[1]*R_PRES[2][z]
461 +ALPHA[2]*R_PRES[3][z]
462 +ALPHA[3]*R_PRES[4][z];
463 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
464 +ALPHA[1]*X_PRES[2][z]
465 +ALPHA[2]*X_PRES[3][z]
466 +ALPHA[3]*X_PRES[4][z];
467 }
468 } else if (order==5) {
469 #pragma omp for private(z) schedule(static)
470 for (z=0;z<n;++z) {
471 R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
472 +ALPHA[1]*R_PRES[2][z]
473 +ALPHA[2]*R_PRES[3][z]
474 +ALPHA[3]*R_PRES[4][z]
475 +ALPHA[4]*R_PRES[5][z];
476 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
477 +ALPHA[1]*X_PRES[2][z]
478 +ALPHA[2]*X_PRES[3][z]
479 +ALPHA[3]*X_PRES[4][z]
480 +ALPHA[4]*X_PRES[5][z];
481 }
482 } else if (order==6) {
483 #pragma omp for private(z) schedule(static)
484 for (z=0;z<n;++z) {
485 R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
486 +ALPHA[1]*R_PRES[2][z]
487 +ALPHA[2]*R_PRES[3][z]
488 +ALPHA[3]*R_PRES[4][z]
489 +ALPHA[4]*R_PRES[5][z]
490 +ALPHA[5]*R_PRES[6][z];
491 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
492 +ALPHA[1]*X_PRES[2][z]
493 +ALPHA[2]*X_PRES[3][z]
494 +ALPHA[3]*X_PRES[4][z]
495 +ALPHA[4]*X_PRES[5][z]
496 +ALPHA[5]*X_PRES[6][z];
497 }
498 } else if (order>6) {
499 #pragma omp for private(z) schedule(static)
500 for (z=0;z<n;++z) {
501 R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
502 +ALPHA[1]*R_PRES[2][z]
503 +ALPHA[2]*R_PRES[3][z]
504 +ALPHA[3]*R_PRES[4][z]
505 +ALPHA[4]*R_PRES[5][z]
506 +ALPHA[5]*R_PRES[6][z]
507 +ALPHA[6]*R_PRES[7][z];
508 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
509 +ALPHA[1]*X_PRES[2][z]
510 +ALPHA[2]*X_PRES[3][z]
511 +ALPHA[3]*X_PRES[4][z]
512 +ALPHA[4]*X_PRES[5][z]
513 +ALPHA[5]*X_PRES[6][z]
514 +ALPHA[6]*X_PRES[7][z];
515 }
516 for (i=7;i<order;++i) {
517 #pragma omp for private(z) schedule(static)
518 for (z=0;z<n;++z) {
519 R_PRES[0][z]+=ALPHA[i]*R_PRES[i+1][z];
520 X_PRES[0][z]+=ALPHA[i]*X_PRES[i+1][z];
521 }
522 }
523 }
524 if (breakf0>0.) {
525 /***
526 ***** calculate gamma from min_(gamma){|R+gamma*(R_PRES-R)|_2}:
527 ***/
528 #pragma omp master
529 {
530 SC1=ZERO;
531 SC2=ZERO;
532 L2_R=ZERO;
533 }
534 #pragma omp barrier
535 #pragma omp for private(z,diff) reduction(+:SC1,SC2) schedule(static)
536 for (z=0;z<n;++z) {
537 diff=R_PRES[0][z]-r[z];
538 SC1+=diff*diff;
539 SC2+=diff*r[z];
540 }
541 gamma=(SC1<=ZERO) ? ZERO : -SC2/SC1;
542 #pragma omp for private(z) reduction(+:L2_R) schedule(static)
543 for (z=0;z<n;++z) {
544 x[z]+=gamma*(X_PRES[0][z]-x[z]);
545 r[z]+=gamma*(R_PRES[0][z]-r[z]);
546 L2_R+=r[z]*r[z];
547 }
548 norm_of_residual=sqrt(L2_R);
549 convergeFlag = (norm_of_residual <= tol);
550 if (restart>0) restartFlag=(num_iter_restart >= restart);
551 } else {
552 convergeFlag=FALSE;
553 restartFlag=FALSE;
554 }
555 maxIterFlag = (num_iter >= maxit);
556 }
557 }
558 /* end of iteration */
559 #pragma omp master
560 {
561 Norm_of_residual_global=norm_of_residual;
562 Num_iter_global=num_iter;
563 if (maxIterFlag) {
564 Status = SOLVER_MAXITER_REACHED;
565 } else if (breakFlag) {
566 Status = SOLVER_BREAKDOWN;
567 }
568 }
569 } /* end of parallel region */
570 TMPMEMFREE(AP);
571 for (i=0;i<Length_of_recursion;i++) {
572 TMPMEMFREE(X_PRES[i]);
573 TMPMEMFREE(R_PRES[i]);
574 TMPMEMFREE(P_PRES[i]);
575 }
576 *iter=Num_iter_global;
577 *tolerance=Norm_of_residual_global;
578 }
579 return Status;
580 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26