/[escript]/trunk/paso/src/GMRES.c
ViewVC logotype

Contents of /trunk/paso/src/GMRES.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1028 - (show annotations)
Wed Mar 14 00:15:24 2007 UTC (12 years, 5 months ago) by gross
File MIME type: text/plain
File size: 24375 byte(s)
modifications to be compliant with _WIN32. The substitutes for asinh, acosh, atanh are still missing (erf will through an exception)
1 /* $Id$ */
2
3
4 /*
5 ********************************************************************************
6 * Copyright 2006 by ACcESS MNRF *
7 * *
8 * http://www.access.edu.au *
9 * Primary Business: Queensland, Australia *
10 * Licensed under the Open Software License version 3.0 *
11 * http://www.opensource.org/licenses/osl-3.0.php *
12 ********************************************************************************
13 */
14
15 /*
16 * Purpose
17 * =======
18 *
19 * GMRES solves the linear system A*x=b using the
20 * truncated and restered GMRES method with preconditioning.
21 *
22 * Convergence test: norm( b - A*x )< TOL.
23 *
24 *
25 *
26 * Arguments
27 * =========
28 *
29 * r (input/output) double array, dimension n.
30 * On entry, residual of inital guess X
31 *
32 * x (input/output) double array, dimension n.
33 * On input, the initial guess.
34 *
35 * iter (input/output) int
36 * On input, the maximum num_iterations to be performed.
37 * On output, actual number of num_iterations performed.
38 *
39 * tolerance (input/output) DOUBLE PRECISION
40 * On input, the allowable convergence measure for
41 * norm( b - A*x )
42 * On output, the final value of this measure.
43 *
44 * Length_of_recursion (input) gives the number of residual to be kept in orthogonalization process
45 *
46 * restart (input) If restart>0, iteration is resterted a after restart steps.
47 *
48 * INFO (output) int
49 *
50 * =SOLVER_NO_ERROR: Successful exit. num_iterated approximate solution returned.
51 * =SOLVER_MAXNUM_ITER_REACHED
52 * =SOLVER_INPUT_ERROR Illegal parameter:
53 * =SOLVER_BREAKDOWN: bad luck!
54 * =SOLVER_MEMORY_ERROR : no memory available
55 *
56 * ==============================================================
57 */
58
59 #include "Common.h"
60 #include "SystemMatrix.h"
61 #include "Solver.h"
62 #ifdef _OPENMP
63 #include <omp.h>
64 #endif
65
66 err_t Paso_Solver_GMRES(
67 Paso_SystemMatrix * A,
68 double * r,
69 double * x,
70 dim_t *iter,
71 double * tolerance,dim_t Length_of_recursion,dim_t restart,
72 Paso_Performance* pp) {
73
74 /* Local variables */
75
76 double *AP,**X_PRES,**R_PRES,**P_PRES;
77 double *P_PRES_dot_AP,*R_PRES_dot_P_PRES,*BREAKF,*ALPHA;
78 double P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6,R_PRES_dot_P,abs_RP,breakf0;
79 double tol,Factor,sum_BREAKF,gamma,SC1,SC2,norm_of_residual,diff,L2_R,Norm_of_residual_global;
80 double *save_XPRES, *save_P_PRES, *save_R_PRES,save_R_PRES_dot_P_PRES;
81 dim_t maxit,Num_iter_global,num_iter_restart,num_iter;
82 dim_t i,z,order,n, Length_of_mem;
83 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE,restartFlag=FALSE;
84 err_t Status=SOLVER_NO_ERROR;
85
86
87 /* adapt original routine parameters */
88
89 n=A->num_cols * A-> col_block_size;
90 Length_of_mem=MAX(Length_of_recursion,0)+1;
91
92
93 /* Test the input parameters. */
94 if (restart>0) restart=MAX(Length_of_recursion,restart);
95 if (n < 0 || Length_of_recursion<=0) {
96 return SOLVER_INPUT_ERROR;
97 }
98
99 /* allocate memory: */
100 X_PRES=TMPMEMALLOC(Length_of_mem,double*);
101 R_PRES=TMPMEMALLOC(Length_of_mem,double*);
102 P_PRES=TMPMEMALLOC(Length_of_mem,double*);
103 P_PRES_dot_AP=TMPMEMALLOC(Length_of_mem,double);
104 R_PRES_dot_P_PRES=TMPMEMALLOC(Length_of_mem,double);
105 BREAKF=TMPMEMALLOC(Length_of_mem,double);
106 ALPHA=TMPMEMALLOC(Length_of_mem,double);
107 AP=TMPMEMALLOC(n,double);
108 if (AP==NULL || X_PRES ==NULL || R_PRES == NULL || P_PRES == NULL ||
109 P_PRES_dot_AP == NULL || R_PRES_dot_P_PRES ==NULL || BREAKF == NULL || ALPHA == NULL) {
110 Status=SOLVER_MEMORY_ERROR;
111 } else {
112 for (i=0;i<Length_of_mem;i++) {
113 X_PRES[i]=TMPMEMALLOC(n,double);
114 R_PRES[i]=TMPMEMALLOC(n,double);
115 P_PRES[i]=TMPMEMALLOC(n,double);
116 if (X_PRES[i]==NULL || R_PRES[i]==NULL || P_PRES[i]==NULL) Status=SOLVER_MEMORY_ERROR;
117 }
118 }
119 if ( Status ==SOLVER_NO_ERROR ) {
120
121 /* now PRES starts : */
122 maxit=*iter;
123 tol=*tolerance;
124
125 #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \
126 private(num_iter,i,num_iter_restart,order,sum_BREAKF,gamma,restartFlag,norm_of_residual,abs_RP,breakf0,\
127 save_XPRES,save_P_PRES,save_R_PRES,save_R_PRES_dot_P_PRES)
128 {
129 /* initialization */
130
131 restartFlag=TRUE;
132 num_iter=0;
133 #pragma omp for private(z) schedule(static) nowait
134 for (z=0; z < n; ++z) AP[z]=0;
135 for(i=0;i<Length_of_mem;++i) {
136 #pragma omp for private(z) schedule(static) nowait
137 for (z=0; z < n; ++z) {
138 P_PRES[i][z]=0;
139 R_PRES[i][z]=0;
140 X_PRES[i][z]=0;
141 }
142 }
143
144 while (! (convergeFlag || maxIterFlag || breakFlag)) {
145 #pragma omp barrier
146 if (restartFlag) {
147 #pragma omp master
148 BREAKF[0]=ONE;
149 #pragma omp for private(z) schedule(static) nowait
150 for (z=0; z < n; ++z) {
151 R_PRES[0][z]=r[z];
152 X_PRES[0][z]=x[z];
153 }
154 num_iter_restart=0;
155 restartFlag=FALSE;
156 }
157 ++num_iter;
158 ++num_iter_restart;
159 /* order is the dimension of the space on which the residual is minimized: */
160 order=MIN(num_iter_restart,Length_of_recursion);
161 /***
162 *** calculate new search direction P from R_PRES
163 ***/
164 #pragma omp barrier
165 Paso_Solver_solvePreconditioner(A,&P_PRES[0][0], &R_PRES[0][0]);
166 /***
167 *** apply A to P to get AP
168 ***/
169 #pragma omp barrier
170 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &P_PRES[0][0],ZERO, &AP[0]);
171 /***
172 ***** calculation of the norm of R and the scalar products of
173 *** the residuals and A*P:
174 ***/
175 if (order==0) {
176 #pragma omp master
177 R_PRES_dot_P=ZERO;
178 #pragma omp barrier
179 #pragma omp for private(z) reduction(+:R_PRES_dot_P) schedule(static)
180 for (z=0;z<n;++z)
181 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
182 #pragma omp master
183 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
184 } else if (order==1) {
185 #pragma omp master
186 {
187 R_PRES_dot_P=ZERO;
188 P_PRES_dot_AP0=ZERO;
189 }
190 #pragma omp barrier
191 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0) schedule(static)
192 for (z=0;z<n;++z) {
193 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
194 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
195 }
196 #pragma omp master
197 {
198 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
199 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
200 }
201 } else if (order==2) {
202 #pragma omp master
203 {
204 R_PRES_dot_P=ZERO;
205 P_PRES_dot_AP0=ZERO;
206 P_PRES_dot_AP1=ZERO;
207 }
208 #pragma omp barrier
209 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1) schedule(static)
210 for (z=0;z<n;++z) {
211 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
212 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
213 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
214 }
215 #pragma omp master
216 {
217 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
218 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
219 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
220 }
221 } else if (order==3) {
222 #pragma omp master
223 {
224 R_PRES_dot_P=ZERO;
225 P_PRES_dot_AP0=ZERO;
226 P_PRES_dot_AP1=ZERO;
227 P_PRES_dot_AP2=ZERO;
228 }
229 #pragma omp barrier
230 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2) schedule(static)
231 for (z=0;z<n;++z) {
232 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
233 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
234 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
235 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
236 }
237 #pragma omp master
238 {
239 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
240 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
241 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
242 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
243 }
244 } else if (order==4) {
245 #pragma omp master
246 {
247 R_PRES_dot_P=ZERO;
248 P_PRES_dot_AP0=ZERO;
249 P_PRES_dot_AP1=ZERO;
250 P_PRES_dot_AP2=ZERO;
251 P_PRES_dot_AP3=ZERO;
252 }
253 #pragma omp barrier
254 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3) schedule(static)
255 for (z=0;z<n;++z) {
256 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
257 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
258 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
259 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
260 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
261 }
262 #pragma omp master
263 {
264 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
265 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
266 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
267 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
268 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
269 }
270 } else if (order==5) {
271 #pragma omp master
272 {
273 R_PRES_dot_P=ZERO;
274 P_PRES_dot_AP0=ZERO;
275 P_PRES_dot_AP1=ZERO;
276 P_PRES_dot_AP2=ZERO;
277 P_PRES_dot_AP3=ZERO;
278 P_PRES_dot_AP4=ZERO;
279 }
280 #pragma omp barrier
281 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4) schedule(static)
282 for (z=0;z<n;++z) {
283 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
284 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
285 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
286 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
287 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
288 P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
289 }
290 #pragma omp master
291 {
292 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
293 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
294 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
295 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
296 P_PRES_dot_AP[4]=P_PRES_dot_AP4;
297 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
298 }
299 } else if (order==6) {
300 #pragma omp master
301 {
302 R_PRES_dot_P=ZERO;
303 P_PRES_dot_AP0=ZERO;
304 P_PRES_dot_AP1=ZERO;
305 P_PRES_dot_AP2=ZERO;
306 P_PRES_dot_AP3=ZERO;
307 P_PRES_dot_AP4=ZERO;
308 P_PRES_dot_AP5=ZERO;
309 }
310 #pragma omp barrier
311 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5) schedule(static)
312 for (z=0;z<n;++z) {
313 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
314 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
315 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
316 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
317 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
318 P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
319 P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
320 }
321 #pragma omp master
322 {
323 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
324 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
325 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
326 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
327 P_PRES_dot_AP[4]=P_PRES_dot_AP4;
328 P_PRES_dot_AP[5]=P_PRES_dot_AP5;
329 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
330 }
331 } else if (order>6) {
332 #pragma omp master
333 {
334 R_PRES_dot_P=ZERO;
335 P_PRES_dot_AP0=ZERO;
336 P_PRES_dot_AP1=ZERO;
337 P_PRES_dot_AP2=ZERO;
338 P_PRES_dot_AP3=ZERO;
339 P_PRES_dot_AP4=ZERO;
340 P_PRES_dot_AP5=ZERO;
341 P_PRES_dot_AP6=ZERO;
342 }
343 #pragma omp barrier
344 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6) schedule(static)
345 for (z=0;z<n;++z) {
346 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
347 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
348 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
349 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
350 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
351 P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
352 P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
353 P_PRES_dot_AP6+=P_PRES[6][z]*AP[z];
354 }
355 #pragma omp master
356 {
357 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
358 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
359 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
360 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
361 P_PRES_dot_AP[4]=P_PRES_dot_AP4;
362 P_PRES_dot_AP[5]=P_PRES_dot_AP5;
363 P_PRES_dot_AP[6]=P_PRES_dot_AP6;
364 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
365
366 P_PRES_dot_AP0=ZERO;
367 }
368 for (i=7;i<order;++i) {
369 #pragma omp barrier
370 #pragma omp for private(z) reduction(+:P_PRES_dot_AP0) schedule(static)
371 for (z=0;z<n;++z) P_PRES_dot_AP0+=P_PRES[i][z]*AP[z];
372 #pragma omp master
373 {
374 P_PRES_dot_AP[i]=P_PRES_dot_AP0;
375 P_PRES_dot_AP0=ZERO;
376 }
377 }
378 }
379 /* this fixes a problem with the intel compiler */
380 #pragma omp master
381 P_PRES_dot_AP0=R_PRES_dot_P_PRES[0];
382 /*** if sum_BREAKF is equal to zero a breakdown occurs.
383 *** iteration procedure can be continued but R_PRES is not the
384 *** residual of X_PRES approximation.
385 ***/
386 #pragma omp barrier
387 sum_BREAKF=0.;
388 for (i=0;i<order;++i) sum_BREAKF +=BREAKF[i];
389 breakFlag=!((ABS(P_PRES_dot_AP0) > ZERO) && (sum_BREAKF >ZERO));
390 if (!breakFlag) {
391 breakFlag=FALSE;
392 /***
393 ***** X_PRES and R_PRES are moved to memory:
394 ***/
395 #pragma omp master
396 {
397 Factor=0.;
398 for (i=0;i<order;++i) {
399 ALPHA[i]=-P_PRES_dot_AP[i]/R_PRES_dot_P_PRES[i];
400 Factor+=BREAKF[i]*ALPHA[i];
401 }
402
403 save_R_PRES_dot_P_PRES=R_PRES_dot_P_PRES[Length_of_mem-1];
404 save_R_PRES=R_PRES[Length_of_mem-1];
405 save_XPRES=X_PRES[Length_of_mem-1];
406 save_P_PRES=P_PRES[Length_of_mem-1];
407 for (i=Length_of_mem-1;i>0;--i) {
408 BREAKF[i]=BREAKF[i-1];
409 R_PRES_dot_P_PRES[i]=R_PRES_dot_P_PRES[i-1];
410 R_PRES[i]=R_PRES[i-1];
411 X_PRES[i]=X_PRES[i-1];
412 P_PRES[i]=P_PRES[i-1];
413 }
414 R_PRES_dot_P_PRES[0]=save_R_PRES_dot_P_PRES;
415 R_PRES[0]=save_R_PRES;
416 X_PRES[0]=save_XPRES;
417 P_PRES[0]=save_P_PRES;
418
419 if (ABS(Factor)<=ZERO) {
420 Factor=1.;
421 BREAKF[0]=ZERO;
422 } else {
423 Factor=1./Factor;
424 BREAKF[0]=ONE;
425 }
426 for (i=0;i<order;++i) ALPHA[i]*=Factor;
427 }
428 /*
429 ***** update of solution X_PRES and its residual R_PRES:
430 ***
431 *** P is used to accumulate X and AP to accumulate R. X and R
432 *** are still needed until they are put into the X and R memory
433 *** R_PRES and X_PRES
434 ***
435 **/
436 #pragma omp barrier
437 breakf0=BREAKF[0];
438 if (order==0) {
439 #pragma omp for private(z) schedule(static)
440 for (z=0;z<n;++z) {
441 R_PRES[0][z]= Factor* AP[z];
442 X_PRES[0][z]=-Factor*P_PRES[1][z];
443 }
444 } else if (order==1) {
445 #pragma omp for private(z) schedule(static)
446 for (z=0;z<n;++z) {
447 R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z];
448 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z];
449 }
450 } else if (order==2) {
451 #pragma omp for private(z) schedule(static)
452 for (z=0;z<n;++z) {
453 R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z]
454 +ALPHA[1]*R_PRES[2][z];
455 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
456 +ALPHA[1]*X_PRES[2][z];
457 }
458 } else if (order==3) {
459 #pragma omp for private(z) schedule(static)
460 for (z=0;z<n;++z) {
461 R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
462 +ALPHA[1]*R_PRES[2][z]
463 +ALPHA[2]*R_PRES[3][z];
464 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
465 +ALPHA[1]*X_PRES[2][z]
466 +ALPHA[2]*X_PRES[3][z];
467 }
468 } else if (order==4) {
469 #pragma omp for private(z) schedule(static)
470 for (z=0;z<n;++z) {
471 R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
472 +ALPHA[1]*R_PRES[2][z]
473 +ALPHA[2]*R_PRES[3][z]
474 +ALPHA[3]*R_PRES[4][z];
475 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
476 +ALPHA[1]*X_PRES[2][z]
477 +ALPHA[2]*X_PRES[3][z]
478 +ALPHA[3]*X_PRES[4][z];
479 }
480 } else if (order==5) {
481 #pragma omp for private(z) schedule(static)
482 for (z=0;z<n;++z) {
483 R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
484 +ALPHA[1]*R_PRES[2][z]
485 +ALPHA[2]*R_PRES[3][z]
486 +ALPHA[3]*R_PRES[4][z]
487 +ALPHA[4]*R_PRES[5][z];
488 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
489 +ALPHA[1]*X_PRES[2][z]
490 +ALPHA[2]*X_PRES[3][z]
491 +ALPHA[3]*X_PRES[4][z]
492 +ALPHA[4]*X_PRES[5][z];
493 }
494 } else if (order==6) {
495 #pragma omp for private(z) schedule(static)
496 for (z=0;z<n;++z) {
497 R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
498 +ALPHA[1]*R_PRES[2][z]
499 +ALPHA[2]*R_PRES[3][z]
500 +ALPHA[3]*R_PRES[4][z]
501 +ALPHA[4]*R_PRES[5][z]
502 +ALPHA[5]*R_PRES[6][z];
503 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
504 +ALPHA[1]*X_PRES[2][z]
505 +ALPHA[2]*X_PRES[3][z]
506 +ALPHA[3]*X_PRES[4][z]
507 +ALPHA[4]*X_PRES[5][z]
508 +ALPHA[5]*X_PRES[6][z];
509 }
510 } else if (order>6) {
511 #pragma omp for private(z) schedule(static)
512 for (z=0;z<n;++z) {
513 R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
514 +ALPHA[1]*R_PRES[2][z]
515 +ALPHA[2]*R_PRES[3][z]
516 +ALPHA[3]*R_PRES[4][z]
517 +ALPHA[4]*R_PRES[5][z]
518 +ALPHA[5]*R_PRES[6][z]
519 +ALPHA[6]*R_PRES[7][z];
520 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
521 +ALPHA[1]*X_PRES[2][z]
522 +ALPHA[2]*X_PRES[3][z]
523 +ALPHA[3]*X_PRES[4][z]
524 +ALPHA[4]*X_PRES[5][z]
525 +ALPHA[5]*X_PRES[6][z]
526 +ALPHA[6]*X_PRES[7][z];
527 }
528 for (i=7;i<order;++i) {
529 #pragma omp for private(z) schedule(static)
530 for (z=0;z<n;++z) {
531 R_PRES[0][z]+=ALPHA[i]*R_PRES[i+1][z];
532 X_PRES[0][z]+=ALPHA[i]*X_PRES[i+1][z];
533 }
534 }
535 }
536 if (breakf0>0.) {
537 /***
538 ***** calculate gamma from min_(gamma){|R+gamma*(R_PRES-R)|_2}:
539 ***/
540 #pragma omp master
541 {
542 SC1=ZERO;
543 SC2=ZERO;
544 L2_R=ZERO;
545 }
546 #pragma omp barrier
547 #pragma omp for private(z,diff) reduction(+:SC1,SC2) schedule(static)
548 for (z=0;z<n;++z) {
549 diff=R_PRES[0][z]-r[z];
550 SC1+=diff*diff;
551 SC2+=diff*r[z];
552 }
553 gamma=(SC1<=ZERO) ? ZERO : -SC2/SC1;
554 #pragma omp for private(z) reduction(+:L2_R) schedule(static)
555 for (z=0;z<n;++z) {
556 x[z]+=gamma*(X_PRES[0][z]-x[z]);
557 r[z]+=gamma*(R_PRES[0][z]-r[z]);
558 L2_R+=r[z]*r[z];
559 }
560 norm_of_residual=sqrt(L2_R);
561 convergeFlag = (norm_of_residual <= tol);
562 if (restart>0) restartFlag=(num_iter_restart >= restart);
563 } else {
564 convergeFlag=FALSE;
565 restartFlag=FALSE;
566 }
567 maxIterFlag = (num_iter >= maxit);
568 }
569 }
570 /* end of iteration */
571 #pragma omp master
572 {
573 Norm_of_residual_global=norm_of_residual;
574 Num_iter_global=num_iter;
575 if (maxIterFlag) {
576 Status = SOLVER_MAXITER_REACHED;
577 } else if (breakFlag) {
578 Status = SOLVER_BREAKDOWN;
579 }
580 }
581 } /* end of parallel region */
582 for (i=0;i<Length_of_recursion;i++) {
583 TMPMEMFREE(X_PRES[i]);
584 TMPMEMFREE(R_PRES[i]);
585 TMPMEMFREE(P_PRES[i]);
586 }
587 TMPMEMFREE(AP);
588 TMPMEMFREE(X_PRES);
589 TMPMEMFREE(R_PRES);
590 TMPMEMFREE(P_PRES);
591 TMPMEMFREE(P_PRES_dot_AP);
592 TMPMEMFREE(R_PRES_dot_P_PRES);
593 TMPMEMFREE(BREAKF);
594 TMPMEMFREE(ALPHA);
595 *iter=Num_iter_global;
596 *tolerance=Norm_of_residual_global;
597 }
598 return Status;
599 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26