/[escript]/trunk/paso/src/GMRES.c
ViewVC logotype

Contents of /trunk/paso/src/GMRES.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 700 - (show annotations)
Thu Apr 6 00:13:40 2006 UTC (13 years, 10 months ago) by gross
File MIME type: text/plain
File size: 23862 byte(s)
A few changes in the build mechanism and the file structure so scons can build release tar files:

  * paso/src/Solver has been moved to paso/src 
  * all test_.py are now run_.py files and are assumed to be passing python tests. they can run by 
    scons py_tests and are part of the release test set
  * escript/py_src/test_ are moved to escript/test/python and are installed in to the build directory 
    (rather then the PYTHONPATH).
  * all py files in test/python which don't start with run_ or test_ are now 'local_py_tests'. they are installed i
    by not run automatically.
  * CppUnitTest is now treated as a escript module (against previous decisions).
  * scons realse builds nor tar/zip files with relvant source code (src and tests in seperate files)

the python tests don't pass yet due to path problems.


1 /* $Id$ */
2
3
4 /*
5 ********************************************************************************
6 * Copyright 2006 by ACcESS MNRF *
7 * *
8 * http://www.access.edu.au *
9 * Primary Business: Queensland, Australia *
10 * Licensed under the Open Software License version 3.0 *
11 * http://www.opensource.org/licenses/osl-3.0.php *
12 ********************************************************************************
13 */
14
15 /*
16 * Purpose
17 * =======
18 *
19 * GMRES solves the linear system A*x=b using the
20 * truncated and restered GMRES method with preconditioning.
21 *
22 * Convergence test: norm( b - A*x )< TOL.
23 *
24 *
25 *
26 * Arguments
27 * =========
28 *
29 * r (input/output) double array, dimension n.
30 * On entry, residual of inital guess X
31 *
32 * x (input/output) double array, dimension n.
33 * On input, the initial guess.
34 *
35 * iter (input/output) int
36 * On input, the maximum num_iterations to be performed.
37 * On output, actual number of num_iterations performed.
38 *
39 * tolerance (input/output) DOUBLE PRECISION
40 * On input, the allowable convergence measure for
41 * norm( b - A*x )
42 * On output, the final value of this measure.
43 *
44 * Length_of_recursion (input) gives the number of residual to be kept in orthogonalization process
45 *
46 * restart (input) If restart>0, iteration is resterted a after restart steps.
47 *
48 * INFO (output) int
49 *
50 * =SOLVER_NO_ERROR: Successful exit. num_iterated approximate solution returned.
51 * =SOLVER_MAXNUM_ITER_REACHED
52 * =SOLVER_INPUT_ERROR Illegal parameter:
53 * =SOLVER_BREAKDOWN: bad luck!
54 * =SOLVER_MEMORY_ERROR : no memory available
55 *
56 * ==============================================================
57 */
58
59 #include "Common.h"
60 #include "SystemMatrix.h"
61 #include "Solver.h"
62 #ifdef _OPENMP
63 #include <omp.h>
64 #endif
65
66 err_t Paso_Solver_GMRES(
67 Paso_SystemMatrix * A,
68 double * r,
69 double * x,
70 dim_t *iter,
71 double * tolerance,dim_t Length_of_recursion,dim_t restart,
72 Paso_Performance* pp) {
73
74 /* Local variables */
75
76 double *AP,*X_PRES[MAX(Length_of_recursion,0)+1],*R_PRES[MAX(Length_of_recursion,0)+1],*P_PRES[MAX(Length_of_recursion,0)+1];
77 double P_PRES_dot_AP[MAX(Length_of_recursion,0)],R_PRES_dot_P_PRES[MAX(Length_of_recursion,0)+1],BREAKF[MAX(Length_of_recursion,0)+1],ALPHA[MAX(Length_of_recursion,0)];
78 double P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6,R_PRES_dot_P,abs_RP,breakf0;
79 double tol,Factor,sum_BREAKF,gamma,SC1,SC2,norm_of_residual,diff,L2_R,Norm_of_residual_global;
80 double *save_XPRES, *save_P_PRES, *save_R_PRES,save_R_PRES_dot_P_PRES;
81 dim_t maxit,Num_iter_global,num_iter_restart,num_iter;
82 dim_t i,z,order;
83 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE,restartFlag=FALSE;
84 err_t Status=SOLVER_NO_ERROR;
85
86 /* adapt original routine parameters */
87
88 dim_t n=A->num_cols * A-> col_block_size;
89 dim_t Length_of_mem=MAX(Length_of_recursion,0)+1;
90
91 /* Test the input parameters. */
92 if (restart>0) restart=MAX(Length_of_recursion,restart);
93 if (n < 0 || Length_of_recursion<=0) {
94 return SOLVER_INPUT_ERROR;
95 }
96
97 /* allocate memory: */
98
99 AP=TMPMEMALLOC(n,double);
100 if (AP==NULL) Status=SOLVER_MEMORY_ERROR;
101 for (i=0;i<Length_of_mem;i++) {
102 X_PRES[i]=TMPMEMALLOC(n,double);
103 R_PRES[i]=TMPMEMALLOC(n,double);
104 P_PRES[i]=TMPMEMALLOC(n,double);
105 if (X_PRES[i]==NULL || R_PRES[i]==NULL || P_PRES[i]==NULL) Status=SOLVER_MEMORY_ERROR;
106 }
107 if ( Status ==SOLVER_NO_ERROR ) {
108
109 /* now PRES starts : */
110 maxit=*iter;
111 tol=*tolerance;
112
113 #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \
114 private(num_iter,i,num_iter_restart,order,sum_BREAKF,gamma,restartFlag,norm_of_residual,abs_RP,breakf0,\
115 save_XPRES,save_P_PRES,save_R_PRES,save_R_PRES_dot_P_PRES)
116 {
117 /* initialization */
118
119 restartFlag=TRUE;
120 num_iter=0;
121 #pragma omp for private(z) schedule(static) nowait
122 for (z=0; z < n; ++z) AP[z]=0;
123 for(i=0;i<Length_of_mem;++i) {
124 #pragma omp for private(z) schedule(static) nowait
125 for (z=0; z < n; ++z) {
126 P_PRES[i][z]=0;
127 R_PRES[i][z]=0;
128 X_PRES[i][z]=0;
129 }
130 }
131
132 while (! (convergeFlag || maxIterFlag || breakFlag)) {
133 #pragma omp barrier
134 if (restartFlag) {
135 #pragma omp master
136 BREAKF[0]=ONE;
137 #pragma omp for private(z) schedule(static) nowait
138 for (z=0; z < n; ++z) {
139 R_PRES[0][z]=r[z];
140 X_PRES[0][z]=x[z];
141 }
142 num_iter_restart=0;
143 restartFlag=FALSE;
144 }
145 ++num_iter;
146 ++num_iter_restart;
147 /* order is the dimension of the space on which the residual is minimized: */
148 order=MIN(num_iter_restart,Length_of_recursion);
149 /***
150 *** calculate new search direction P from R_PRES
151 ***/
152 #pragma omp barrier
153 Paso_Solver_solvePreconditioner(A,&P_PRES[0][0], &R_PRES[0][0]);
154 /***
155 *** apply A to P to get AP
156 ***/
157 #pragma omp barrier
158 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &P_PRES[0][0],ZERO, &AP[0]);
159 /***
160 ***** calculation of the norm of R and the scalar products of
161 *** the residuals and A*P:
162 ***/
163 if (order==0) {
164 #pragma omp master
165 R_PRES_dot_P=ZERO;
166 #pragma omp barrier
167 #pragma omp for private(z) reduction(+:R_PRES_dot_P) schedule(static)
168 for (z=0;z<n;++z)
169 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
170 #pragma omp master
171 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
172 } else if (order==1) {
173 #pragma omp master
174 {
175 R_PRES_dot_P=ZERO;
176 P_PRES_dot_AP0=ZERO;
177 }
178 #pragma omp barrier
179 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0) schedule(static)
180 for (z=0;z<n;++z) {
181 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
182 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
183 }
184 #pragma omp master
185 {
186 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
187 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
188 }
189 } else if (order==2) {
190 #pragma omp master
191 {
192 R_PRES_dot_P=ZERO;
193 P_PRES_dot_AP0=ZERO;
194 P_PRES_dot_AP1=ZERO;
195 }
196 #pragma omp barrier
197 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1) schedule(static)
198 for (z=0;z<n;++z) {
199 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
200 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
201 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
202 }
203 #pragma omp master
204 {
205 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
206 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
207 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
208 }
209 } else if (order==3) {
210 #pragma omp master
211 {
212 R_PRES_dot_P=ZERO;
213 P_PRES_dot_AP0=ZERO;
214 P_PRES_dot_AP1=ZERO;
215 P_PRES_dot_AP2=ZERO;
216 }
217 #pragma omp barrier
218 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2) schedule(static)
219 for (z=0;z<n;++z) {
220 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
221 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
222 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
223 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
224 }
225 #pragma omp master
226 {
227 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
228 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
229 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
230 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
231 }
232 } else if (order==4) {
233 #pragma omp master
234 {
235 R_PRES_dot_P=ZERO;
236 P_PRES_dot_AP0=ZERO;
237 P_PRES_dot_AP1=ZERO;
238 P_PRES_dot_AP2=ZERO;
239 P_PRES_dot_AP3=ZERO;
240 }
241 #pragma omp barrier
242 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3) schedule(static)
243 for (z=0;z<n;++z) {
244 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
245 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
246 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
247 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
248 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
249 }
250 #pragma omp master
251 {
252 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
253 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
254 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
255 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
256 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
257 }
258 } else if (order==5) {
259 #pragma omp master
260 {
261 R_PRES_dot_P=ZERO;
262 P_PRES_dot_AP0=ZERO;
263 P_PRES_dot_AP1=ZERO;
264 P_PRES_dot_AP2=ZERO;
265 P_PRES_dot_AP3=ZERO;
266 P_PRES_dot_AP4=ZERO;
267 }
268 #pragma omp barrier
269 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4) schedule(static)
270 for (z=0;z<n;++z) {
271 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
272 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
273 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
274 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
275 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
276 P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
277 }
278 #pragma omp master
279 {
280 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
281 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
282 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
283 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
284 P_PRES_dot_AP[4]=P_PRES_dot_AP4;
285 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
286 }
287 } else if (order==6) {
288 #pragma omp master
289 {
290 R_PRES_dot_P=ZERO;
291 P_PRES_dot_AP0=ZERO;
292 P_PRES_dot_AP1=ZERO;
293 P_PRES_dot_AP2=ZERO;
294 P_PRES_dot_AP3=ZERO;
295 P_PRES_dot_AP4=ZERO;
296 P_PRES_dot_AP5=ZERO;
297 }
298 #pragma omp barrier
299 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5) schedule(static)
300 for (z=0;z<n;++z) {
301 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
302 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
303 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
304 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
305 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
306 P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
307 P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
308 }
309 #pragma omp master
310 {
311 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
312 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
313 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
314 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
315 P_PRES_dot_AP[4]=P_PRES_dot_AP4;
316 P_PRES_dot_AP[5]=P_PRES_dot_AP5;
317 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
318 }
319 } else if (order>6) {
320 #pragma omp master
321 {
322 R_PRES_dot_P=ZERO;
323 P_PRES_dot_AP0=ZERO;
324 P_PRES_dot_AP1=ZERO;
325 P_PRES_dot_AP2=ZERO;
326 P_PRES_dot_AP3=ZERO;
327 P_PRES_dot_AP4=ZERO;
328 P_PRES_dot_AP5=ZERO;
329 P_PRES_dot_AP6=ZERO;
330 }
331 #pragma omp barrier
332 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6) schedule(static)
333 for (z=0;z<n;++z) {
334 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
335 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
336 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
337 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
338 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
339 P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
340 P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
341 P_PRES_dot_AP6+=P_PRES[6][z]*AP[z];
342 }
343 #pragma omp master
344 {
345 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
346 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
347 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
348 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
349 P_PRES_dot_AP[4]=P_PRES_dot_AP4;
350 P_PRES_dot_AP[5]=P_PRES_dot_AP5;
351 P_PRES_dot_AP[6]=P_PRES_dot_AP6;
352 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
353
354 P_PRES_dot_AP0=ZERO;
355 }
356 for (i=7;i<order;++i) {
357 #pragma omp barrier
358 #pragma omp for private(z) reduction(+:P_PRES_dot_AP0) schedule(static)
359 for (z=0;z<n;++z) P_PRES_dot_AP0+=P_PRES[i][z]*AP[z];
360 #pragma omp master
361 {
362 P_PRES_dot_AP[i]=P_PRES_dot_AP0;
363 P_PRES_dot_AP0=ZERO;
364 }
365 }
366 }
367 /* this fixes a problem with the intel compiler */
368 #pragma omp master
369 P_PRES_dot_AP0=R_PRES_dot_P_PRES[0];
370 /*** if sum_BREAKF is equal to zero a breakdown occurs.
371 *** iteration procedure can be continued but R_PRES is not the
372 *** residual of X_PRES approximation.
373 ***/
374 #pragma omp barrier
375 sum_BREAKF=0.;
376 for (i=0;i<order;++i) sum_BREAKF +=BREAKF[i];
377 breakFlag=!((ABS(P_PRES_dot_AP0) > ZERO) && (sum_BREAKF >ZERO));
378 if (!breakFlag) {
379 breakFlag=FALSE;
380 /***
381 ***** X_PRES and R_PRES are moved to memory:
382 ***/
383 #pragma omp master
384 {
385 Factor=0.;
386 for (i=0;i<order;++i) {
387 ALPHA[i]=-P_PRES_dot_AP[i]/R_PRES_dot_P_PRES[i];
388 Factor+=BREAKF[i]*ALPHA[i];
389 }
390
391 save_R_PRES_dot_P_PRES=R_PRES_dot_P_PRES[Length_of_mem-1];
392 save_R_PRES=R_PRES[Length_of_mem-1];
393 save_XPRES=X_PRES[Length_of_mem-1];
394 save_P_PRES=P_PRES[Length_of_mem-1];
395 for (i=Length_of_mem-1;i>0;--i) {
396 BREAKF[i]=BREAKF[i-1];
397 R_PRES_dot_P_PRES[i]=R_PRES_dot_P_PRES[i-1];
398 R_PRES[i]=R_PRES[i-1];
399 X_PRES[i]=X_PRES[i-1];
400 P_PRES[i]=P_PRES[i-1];
401 }
402 R_PRES_dot_P_PRES[0]=save_R_PRES_dot_P_PRES;
403 R_PRES[0]=save_R_PRES;
404 X_PRES[0]=save_XPRES;
405 P_PRES[0]=save_P_PRES;
406
407 if (ABS(Factor)<=ZERO) {
408 Factor=1.;
409 BREAKF[0]=ZERO;
410 } else {
411 Factor=1./Factor;
412 BREAKF[0]=ONE;
413 }
414 for (i=0;i<order;++i) ALPHA[i]*=Factor;
415 }
416 /*
417 ***** update of solution X_PRES and its residual R_PRES:
418 ***
419 *** P is used to accumulate X and AP to accumulate R. X and R
420 *** are still needed until they are put into the X and R memory
421 *** R_PRES and X_PRES
422 ***
423 **/
424 #pragma omp barrier
425 breakf0=BREAKF[0];
426 if (order==0) {
427 #pragma omp for private(z) schedule(static)
428 for (z=0;z<n;++z) {
429 R_PRES[0][z]= Factor* AP[z];
430 X_PRES[0][z]=-Factor*P_PRES[1][z];
431 }
432 } else if (order==1) {
433 #pragma omp for private(z) schedule(static)
434 for (z=0;z<n;++z) {
435 R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z];
436 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z];
437 }
438 } else if (order==2) {
439 #pragma omp for private(z) schedule(static)
440 for (z=0;z<n;++z) {
441 R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z]
442 +ALPHA[1]*R_PRES[2][z];
443 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
444 +ALPHA[1]*X_PRES[2][z];
445 }
446 } else if (order==3) {
447 #pragma omp for private(z) schedule(static)
448 for (z=0;z<n;++z) {
449 R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
450 +ALPHA[1]*R_PRES[2][z]
451 +ALPHA[2]*R_PRES[3][z];
452 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
453 +ALPHA[1]*X_PRES[2][z]
454 +ALPHA[2]*X_PRES[3][z];
455 }
456 } else if (order==4) {
457 #pragma omp for private(z) schedule(static)
458 for (z=0;z<n;++z) {
459 R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
460 +ALPHA[1]*R_PRES[2][z]
461 +ALPHA[2]*R_PRES[3][z]
462 +ALPHA[3]*R_PRES[4][z];
463 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
464 +ALPHA[1]*X_PRES[2][z]
465 +ALPHA[2]*X_PRES[3][z]
466 +ALPHA[3]*X_PRES[4][z];
467 }
468 } else if (order==5) {
469 #pragma omp for private(z) schedule(static)
470 for (z=0;z<n;++z) {
471 R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
472 +ALPHA[1]*R_PRES[2][z]
473 +ALPHA[2]*R_PRES[3][z]
474 +ALPHA[3]*R_PRES[4][z]
475 +ALPHA[4]*R_PRES[5][z];
476 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
477 +ALPHA[1]*X_PRES[2][z]
478 +ALPHA[2]*X_PRES[3][z]
479 +ALPHA[3]*X_PRES[4][z]
480 +ALPHA[4]*X_PRES[5][z];
481 }
482 } else if (order==6) {
483 #pragma omp for private(z) schedule(static)
484 for (z=0;z<n;++z) {
485 R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
486 +ALPHA[1]*R_PRES[2][z]
487 +ALPHA[2]*R_PRES[3][z]
488 +ALPHA[3]*R_PRES[4][z]
489 +ALPHA[4]*R_PRES[5][z]
490 +ALPHA[5]*R_PRES[6][z];
491 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
492 +ALPHA[1]*X_PRES[2][z]
493 +ALPHA[2]*X_PRES[3][z]
494 +ALPHA[3]*X_PRES[4][z]
495 +ALPHA[4]*X_PRES[5][z]
496 +ALPHA[5]*X_PRES[6][z];
497 }
498 } else if (order>6) {
499 #pragma omp for private(z) schedule(static)
500 for (z=0;z<n;++z) {
501 R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
502 +ALPHA[1]*R_PRES[2][z]
503 +ALPHA[2]*R_PRES[3][z]
504 +ALPHA[3]*R_PRES[4][z]
505 +ALPHA[4]*R_PRES[5][z]
506 +ALPHA[5]*R_PRES[6][z]
507 +ALPHA[6]*R_PRES[7][z];
508 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
509 +ALPHA[1]*X_PRES[2][z]
510 +ALPHA[2]*X_PRES[3][z]
511 +ALPHA[3]*X_PRES[4][z]
512 +ALPHA[4]*X_PRES[5][z]
513 +ALPHA[5]*X_PRES[6][z]
514 +ALPHA[6]*X_PRES[7][z];
515 }
516 for (i=7;i<order;++i) {
517 #pragma omp for private(z) schedule(static)
518 for (z=0;z<n;++z) {
519 R_PRES[0][z]+=ALPHA[i]*R_PRES[i+1][z];
520 X_PRES[0][z]+=ALPHA[i]*X_PRES[i+1][z];
521 }
522 }
523 }
524 if (breakf0>0.) {
525 /***
526 ***** calculate gamma from min_(gamma){|R+gamma*(R_PRES-R)|_2}:
527 ***/
528 #pragma omp master
529 {
530 SC1=ZERO;
531 SC2=ZERO;
532 L2_R=ZERO;
533 }
534 #pragma omp barrier
535 #pragma omp for private(z,diff) reduction(+:SC1,SC2) schedule(static)
536 for (z=0;z<n;++z) {
537 diff=R_PRES[0][z]-r[z];
538 SC1+=diff*diff;
539 SC2+=diff*r[z];
540 }
541 gamma=(SC1<=ZERO) ? ZERO : -SC2/SC1;
542 #pragma omp for private(z) reduction(+:L2_R) schedule(static)
543 for (z=0;z<n;++z) {
544 x[z]+=gamma*(X_PRES[0][z]-x[z]);
545 r[z]+=gamma*(R_PRES[0][z]-r[z]);
546 L2_R+=r[z]*r[z];
547 }
548 norm_of_residual=sqrt(L2_R);
549 convergeFlag = (norm_of_residual <= tol);
550 if (restart>0) restartFlag=(num_iter_restart >= restart);
551 } else {
552 convergeFlag=FALSE;
553 restartFlag=FALSE;
554 }
555 maxIterFlag = (num_iter >= maxit);
556 }
557 }
558 /* end of iteration */
559 #pragma omp master
560 {
561 Norm_of_residual_global=norm_of_residual;
562 Num_iter_global=num_iter;
563 if (maxIterFlag) {
564 Status = SOLVER_MAXITER_REACHED;
565 } else if (breakFlag) {
566 Status = SOLVER_BREAKDOWN;
567 }
568 }
569 } /* end of parallel region */
570 TMPMEMFREE(AP);
571 for (i=0;i<Length_of_recursion;i++) {
572 TMPMEMFREE(X_PRES[i]);
573 TMPMEMFREE(R_PRES[i]);
574 TMPMEMFREE(P_PRES[i]);
575 }
576 *iter=Num_iter_global;
577 *tolerance=Norm_of_residual_global;
578 }
579 return Status;
580 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26