/[escript]/trunk/paso/src/GMRES.c
ViewVC logotype

Contents of /trunk/paso/src/GMRES.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1388 - (show annotations)
Fri Jan 11 07:45:58 2008 UTC (11 years, 10 months ago) by trankine
File MIME type: text/plain
File size: 24161 byte(s)
And get the *(&(*&(* name right
1
2 /* $Id$ */
3
4 /*******************************************************
5 *
6 * Copyright 2003-2007 by ACceSS MNRF
7 * Copyright 2007 by University of Queensland
8 *
9 * http://esscc.uq.edu.au
10 * Primary Business: Queensland, Australia
11 * Licensed under the Open Software License version 3.0
12 * http://www.opensource.org/licenses/osl-3.0.php
13 *
14 *******************************************************/
15
16 /*
17 * Purpose
18 * =======
19 *
20 * GMRES solves the linear system A*x=b using the
21 * truncated and restered GMRES method with preconditioning.
22 *
23 * Convergence test: norm( b - A*x )< TOL.
24 *
25 *
26 *
27 * Arguments
28 * =========
29 *
30 * r (input/output) double array, dimension n.
31 * On entry, residual of inital guess X
32 *
33 * x (input/output) double array, dimension n.
34 * On input, the initial guess.
35 *
36 * iter (input/output) int
37 * On input, the maximum num_iterations to be performed.
38 * On output, actual number of num_iterations performed.
39 *
40 * tolerance (input/output) DOUBLE PRECISION
41 * On input, the allowable convergence measure for
42 * norm( b - A*x )
43 * On output, the final value of this measure.
44 *
45 * Length_of_recursion (input) gives the number of residual to be kept in orthogonalization process
46 *
47 * restart (input) If restart>0, iteration is resterted a after restart steps.
48 *
49 * INFO (output) int
50 *
51 * =SOLVER_NO_ERROR: Successful exit. num_iterated approximate solution returned.
52 * =SOLVER_MAXNUM_ITER_REACHED
53 * =SOLVER_INPUT_ERROR Illegal parameter:
54 * =SOLVER_BREAKDOWN: bad luck!
55 * =SOLVER_MEMORY_ERROR : no memory available
56 *
57 * ==============================================================
58 */
59
60 #include "Common.h"
61 #include "SystemMatrix.h"
62 #include "Solver.h"
63 #ifdef _OPENMP
64 #include <omp.h>
65 #endif
66
67 err_t Paso_Solver_GMRES(
68 Paso_SystemMatrix * A,
69 double * r,
70 double * x,
71 dim_t *iter,
72 double * tolerance,dim_t Length_of_recursion,dim_t restart,
73 Paso_Performance* pp) {
74
75 /* Local variables */
76
77 double *AP,**X_PRES,**R_PRES,**P_PRES;
78 double *P_PRES_dot_AP,*R_PRES_dot_P_PRES,*BREAKF,*ALPHA;
79 double P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6,R_PRES_dot_P,abs_RP,breakf0;
80 double tol,Factor,sum_BREAKF,gamma,SC1,SC2,norm_of_residual,diff,L2_R,Norm_of_residual_global;
81 double *save_XPRES, *save_P_PRES, *save_R_PRES,save_R_PRES_dot_P_PRES;
82 dim_t maxit,Num_iter_global,num_iter_restart,num_iter;
83 dim_t i,z,order,n, Length_of_mem;
84 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE,restartFlag=FALSE;
85 err_t Status=SOLVER_NO_ERROR;
86
87
88 /* adapt original routine parameters */
89 n = Paso_SystemMatrix_getTotalNumRows(A);
90 Length_of_mem=MAX(Length_of_recursion,0)+1;
91
92 /* Test the input parameters. */
93 if (restart>0) restart=MAX(Length_of_recursion,restart);
94 if (n < 0 || Length_of_recursion<=0) {
95 return SOLVER_INPUT_ERROR;
96 }
97
98 /* allocate memory: */
99 X_PRES=TMPMEMALLOC(Length_of_mem,double*);
100 R_PRES=TMPMEMALLOC(Length_of_mem,double*);
101 P_PRES=TMPMEMALLOC(Length_of_mem,double*);
102 P_PRES_dot_AP=TMPMEMALLOC(Length_of_mem,double);
103 R_PRES_dot_P_PRES=TMPMEMALLOC(Length_of_mem,double);
104 BREAKF=TMPMEMALLOC(Length_of_mem,double);
105 ALPHA=TMPMEMALLOC(Length_of_mem,double);
106 AP=TMPMEMALLOC(n,double);
107 if (AP==NULL || X_PRES ==NULL || R_PRES == NULL || P_PRES == NULL ||
108 P_PRES_dot_AP == NULL || R_PRES_dot_P_PRES ==NULL || BREAKF == NULL || ALPHA == NULL) {
109 Status=SOLVER_MEMORY_ERROR;
110 } else {
111 for (i=0;i<Length_of_mem;i++) {
112 X_PRES[i]=TMPMEMALLOC(n,double);
113 R_PRES[i]=TMPMEMALLOC(n,double);
114 P_PRES[i]=TMPMEMALLOC(n,double);
115 if (X_PRES[i]==NULL || R_PRES[i]==NULL || P_PRES[i]==NULL) Status=SOLVER_MEMORY_ERROR;
116 }
117 }
118 if ( Status ==SOLVER_NO_ERROR ) {
119
120 /* now PRES starts : */
121 maxit=*iter;
122 tol=*tolerance;
123
124 #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \
125 private(num_iter,i,num_iter_restart,order,sum_BREAKF,gamma,restartFlag,norm_of_residual,abs_RP,breakf0,\
126 save_XPRES,save_P_PRES,save_R_PRES,save_R_PRES_dot_P_PRES)
127 {
128 /* initialization */
129
130 restartFlag=TRUE;
131 num_iter=0;
132 #pragma omp for private(z) schedule(static) nowait
133 for (z=0; z < n; ++z) AP[z]=0;
134 for(i=0;i<Length_of_mem;++i) {
135 #pragma omp for private(z) schedule(static) nowait
136 for (z=0; z < n; ++z) {
137 P_PRES[i][z]=0;
138 R_PRES[i][z]=0;
139 X_PRES[i][z]=0;
140 }
141 }
142
143 while (! (convergeFlag || maxIterFlag || breakFlag)) {
144 #pragma omp barrier
145 if (restartFlag) {
146 #pragma omp master
147 BREAKF[0]=ONE;
148 #pragma omp for private(z) schedule(static) nowait
149 for (z=0; z < n; ++z) {
150 R_PRES[0][z]=r[z];
151 X_PRES[0][z]=x[z];
152 }
153 num_iter_restart=0;
154 restartFlag=FALSE;
155 }
156 ++num_iter;
157 ++num_iter_restart;
158 /* order is the dimension of the space on which the residual is minimized: */
159 order=MIN(num_iter_restart,Length_of_recursion);
160 /***
161 *** calculate new search direction P from R_PRES
162 ***/
163 #pragma omp barrier
164 Paso_Solver_solvePreconditioner(A,&P_PRES[0][0], &R_PRES[0][0]);
165 /***
166 *** apply A to P to get AP
167 ***/
168 #pragma omp barrier
169 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &P_PRES[0][0],ZERO, &AP[0]);
170 /***
171 ***** calculation of the norm of R and the scalar products of
172 *** the residuals and A*P:
173 ***/
174 if (order==0) {
175 #pragma omp master
176 R_PRES_dot_P=ZERO;
177 #pragma omp barrier
178 #pragma omp for private(z) reduction(+:R_PRES_dot_P) schedule(static)
179 for (z=0;z<n;++z)
180 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
181 #pragma omp master
182 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
183 } else if (order==1) {
184 #pragma omp master
185 {
186 R_PRES_dot_P=ZERO;
187 P_PRES_dot_AP0=ZERO;
188 }
189 #pragma omp barrier
190 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0) schedule(static)
191 for (z=0;z<n;++z) {
192 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
193 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
194 }
195 #pragma omp master
196 {
197 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
198 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
199 }
200 } else if (order==2) {
201 #pragma omp master
202 {
203 R_PRES_dot_P=ZERO;
204 P_PRES_dot_AP0=ZERO;
205 P_PRES_dot_AP1=ZERO;
206 }
207 #pragma omp barrier
208 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1) schedule(static)
209 for (z=0;z<n;++z) {
210 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
211 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
212 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
213 }
214 #pragma omp master
215 {
216 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
217 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
218 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
219 }
220 } else if (order==3) {
221 #pragma omp master
222 {
223 R_PRES_dot_P=ZERO;
224 P_PRES_dot_AP0=ZERO;
225 P_PRES_dot_AP1=ZERO;
226 P_PRES_dot_AP2=ZERO;
227 }
228 #pragma omp barrier
229 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2) schedule(static)
230 for (z=0;z<n;++z) {
231 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
232 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
233 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
234 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
235 }
236 #pragma omp master
237 {
238 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
239 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
240 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
241 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
242 }
243 } else if (order==4) {
244 #pragma omp master
245 {
246 R_PRES_dot_P=ZERO;
247 P_PRES_dot_AP0=ZERO;
248 P_PRES_dot_AP1=ZERO;
249 P_PRES_dot_AP2=ZERO;
250 P_PRES_dot_AP3=ZERO;
251 }
252 #pragma omp barrier
253 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3) schedule(static)
254 for (z=0;z<n;++z) {
255 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
256 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
257 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
258 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
259 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
260 }
261 #pragma omp master
262 {
263 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
264 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
265 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
266 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
267 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
268 }
269 } else if (order==5) {
270 #pragma omp master
271 {
272 R_PRES_dot_P=ZERO;
273 P_PRES_dot_AP0=ZERO;
274 P_PRES_dot_AP1=ZERO;
275 P_PRES_dot_AP2=ZERO;
276 P_PRES_dot_AP3=ZERO;
277 P_PRES_dot_AP4=ZERO;
278 }
279 #pragma omp barrier
280 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4) schedule(static)
281 for (z=0;z<n;++z) {
282 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
283 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
284 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
285 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
286 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
287 P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
288 }
289 #pragma omp master
290 {
291 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
292 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
293 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
294 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
295 P_PRES_dot_AP[4]=P_PRES_dot_AP4;
296 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
297 }
298 } else if (order==6) {
299 #pragma omp master
300 {
301 R_PRES_dot_P=ZERO;
302 P_PRES_dot_AP0=ZERO;
303 P_PRES_dot_AP1=ZERO;
304 P_PRES_dot_AP2=ZERO;
305 P_PRES_dot_AP3=ZERO;
306 P_PRES_dot_AP4=ZERO;
307 P_PRES_dot_AP5=ZERO;
308 }
309 #pragma omp barrier
310 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5) schedule(static)
311 for (z=0;z<n;++z) {
312 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
313 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
314 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
315 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
316 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
317 P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
318 P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
319 }
320 #pragma omp master
321 {
322 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
323 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
324 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
325 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
326 P_PRES_dot_AP[4]=P_PRES_dot_AP4;
327 P_PRES_dot_AP[5]=P_PRES_dot_AP5;
328 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
329 }
330 } else if (order>6) {
331 #pragma omp master
332 {
333 R_PRES_dot_P=ZERO;
334 P_PRES_dot_AP0=ZERO;
335 P_PRES_dot_AP1=ZERO;
336 P_PRES_dot_AP2=ZERO;
337 P_PRES_dot_AP3=ZERO;
338 P_PRES_dot_AP4=ZERO;
339 P_PRES_dot_AP5=ZERO;
340 P_PRES_dot_AP6=ZERO;
341 }
342 #pragma omp barrier
343 #pragma omp for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6) schedule(static)
344 for (z=0;z<n;++z) {
345 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
346 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
347 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
348 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
349 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
350 P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
351 P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
352 P_PRES_dot_AP6+=P_PRES[6][z]*AP[z];
353 }
354 #pragma omp master
355 {
356 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
357 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
358 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
359 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
360 P_PRES_dot_AP[4]=P_PRES_dot_AP4;
361 P_PRES_dot_AP[5]=P_PRES_dot_AP5;
362 P_PRES_dot_AP[6]=P_PRES_dot_AP6;
363 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
364
365 P_PRES_dot_AP0=ZERO;
366 }
367 for (i=7;i<order;++i) {
368 #pragma omp barrier
369 #pragma omp for private(z) reduction(+:P_PRES_dot_AP0) schedule(static)
370 for (z=0;z<n;++z) P_PRES_dot_AP0+=P_PRES[i][z]*AP[z];
371 #pragma omp master
372 {
373 P_PRES_dot_AP[i]=P_PRES_dot_AP0;
374 P_PRES_dot_AP0=ZERO;
375 }
376 }
377 }
378 /* this fixes a problem with the intel compiler */
379 #pragma omp master
380 P_PRES_dot_AP0=R_PRES_dot_P_PRES[0];
381 /*** if sum_BREAKF is equal to zero a breakdown occurs.
382 *** iteration procedure can be continued but R_PRES is not the
383 *** residual of X_PRES approximation.
384 ***/
385 #pragma omp barrier
386 sum_BREAKF=0.;
387 for (i=0;i<order;++i) sum_BREAKF +=BREAKF[i];
388 breakFlag=!((ABS(P_PRES_dot_AP0) > ZERO) && (sum_BREAKF >ZERO));
389 if (!breakFlag) {
390 breakFlag=FALSE;
391 /***
392 ***** X_PRES and R_PRES are moved to memory:
393 ***/
394 #pragma omp master
395 {
396 Factor=0.;
397 for (i=0;i<order;++i) {
398 ALPHA[i]=-P_PRES_dot_AP[i]/R_PRES_dot_P_PRES[i];
399 Factor+=BREAKF[i]*ALPHA[i];
400 }
401
402 save_R_PRES_dot_P_PRES=R_PRES_dot_P_PRES[Length_of_mem-1];
403 save_R_PRES=R_PRES[Length_of_mem-1];
404 save_XPRES=X_PRES[Length_of_mem-1];
405 save_P_PRES=P_PRES[Length_of_mem-1];
406 for (i=Length_of_mem-1;i>0;--i) {
407 BREAKF[i]=BREAKF[i-1];
408 R_PRES_dot_P_PRES[i]=R_PRES_dot_P_PRES[i-1];
409 R_PRES[i]=R_PRES[i-1];
410 X_PRES[i]=X_PRES[i-1];
411 P_PRES[i]=P_PRES[i-1];
412 }
413 R_PRES_dot_P_PRES[0]=save_R_PRES_dot_P_PRES;
414 R_PRES[0]=save_R_PRES;
415 X_PRES[0]=save_XPRES;
416 P_PRES[0]=save_P_PRES;
417
418 if (ABS(Factor)<=ZERO) {
419 Factor=1.;
420 BREAKF[0]=ZERO;
421 } else {
422 Factor=1./Factor;
423 BREAKF[0]=ONE;
424 }
425 for (i=0;i<order;++i) ALPHA[i]*=Factor;
426 }
427 /*
428 ***** update of solution X_PRES and its residual R_PRES:
429 ***
430 *** P is used to accumulate X and AP to accumulate R. X and R
431 *** are still needed until they are put into the X and R memory
432 *** R_PRES and X_PRES
433 ***
434 **/
435 #pragma omp barrier
436 breakf0=BREAKF[0];
437 if (order==0) {
438 #pragma omp for private(z) schedule(static)
439 for (z=0;z<n;++z) {
440 R_PRES[0][z]= Factor* AP[z];
441 X_PRES[0][z]=-Factor*P_PRES[1][z];
442 }
443 } else if (order==1) {
444 #pragma omp for private(z) schedule(static)
445 for (z=0;z<n;++z) {
446 R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z];
447 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z];
448 }
449 } else if (order==2) {
450 #pragma omp for private(z) schedule(static)
451 for (z=0;z<n;++z) {
452 R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z]
453 +ALPHA[1]*R_PRES[2][z];
454 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
455 +ALPHA[1]*X_PRES[2][z];
456 }
457 } else if (order==3) {
458 #pragma omp for private(z) schedule(static)
459 for (z=0;z<n;++z) {
460 R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
461 +ALPHA[1]*R_PRES[2][z]
462 +ALPHA[2]*R_PRES[3][z];
463 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
464 +ALPHA[1]*X_PRES[2][z]
465 +ALPHA[2]*X_PRES[3][z];
466 }
467 } else if (order==4) {
468 #pragma omp for private(z) schedule(static)
469 for (z=0;z<n;++z) {
470 R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
471 +ALPHA[1]*R_PRES[2][z]
472 +ALPHA[2]*R_PRES[3][z]
473 +ALPHA[3]*R_PRES[4][z];
474 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
475 +ALPHA[1]*X_PRES[2][z]
476 +ALPHA[2]*X_PRES[3][z]
477 +ALPHA[3]*X_PRES[4][z];
478 }
479 } else if (order==5) {
480 #pragma omp for private(z) schedule(static)
481 for (z=0;z<n;++z) {
482 R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
483 +ALPHA[1]*R_PRES[2][z]
484 +ALPHA[2]*R_PRES[3][z]
485 +ALPHA[3]*R_PRES[4][z]
486 +ALPHA[4]*R_PRES[5][z];
487 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
488 +ALPHA[1]*X_PRES[2][z]
489 +ALPHA[2]*X_PRES[3][z]
490 +ALPHA[3]*X_PRES[4][z]
491 +ALPHA[4]*X_PRES[5][z];
492 }
493 } else if (order==6) {
494 #pragma omp for private(z) schedule(static)
495 for (z=0;z<n;++z) {
496 R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
497 +ALPHA[1]*R_PRES[2][z]
498 +ALPHA[2]*R_PRES[3][z]
499 +ALPHA[3]*R_PRES[4][z]
500 +ALPHA[4]*R_PRES[5][z]
501 +ALPHA[5]*R_PRES[6][z];
502 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
503 +ALPHA[1]*X_PRES[2][z]
504 +ALPHA[2]*X_PRES[3][z]
505 +ALPHA[3]*X_PRES[4][z]
506 +ALPHA[4]*X_PRES[5][z]
507 +ALPHA[5]*X_PRES[6][z];
508 }
509 } else if (order>6) {
510 #pragma omp for private(z) schedule(static)
511 for (z=0;z<n;++z) {
512 R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
513 +ALPHA[1]*R_PRES[2][z]
514 +ALPHA[2]*R_PRES[3][z]
515 +ALPHA[3]*R_PRES[4][z]
516 +ALPHA[4]*R_PRES[5][z]
517 +ALPHA[5]*R_PRES[6][z]
518 +ALPHA[6]*R_PRES[7][z];
519 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
520 +ALPHA[1]*X_PRES[2][z]
521 +ALPHA[2]*X_PRES[3][z]
522 +ALPHA[3]*X_PRES[4][z]
523 +ALPHA[4]*X_PRES[5][z]
524 +ALPHA[5]*X_PRES[6][z]
525 +ALPHA[6]*X_PRES[7][z];
526 }
527 for (i=7;i<order;++i) {
528 #pragma omp for private(z) schedule(static)
529 for (z=0;z<n;++z) {
530 R_PRES[0][z]+=ALPHA[i]*R_PRES[i+1][z];
531 X_PRES[0][z]+=ALPHA[i]*X_PRES[i+1][z];
532 }
533 }
534 }
535 if (breakf0>0.) {
536 /***
537 ***** calculate gamma from min_(gamma){|R+gamma*(R_PRES-R)|_2}:
538 ***/
539 #pragma omp master
540 {
541 SC1=ZERO;
542 SC2=ZERO;
543 L2_R=ZERO;
544 }
545 #pragma omp barrier
546 #pragma omp for private(z,diff) reduction(+:SC1,SC2) schedule(static)
547 for (z=0;z<n;++z) {
548 diff=R_PRES[0][z]-r[z];
549 SC1+=diff*diff;
550 SC2+=diff*r[z];
551 }
552 gamma=(SC1<=ZERO) ? ZERO : -SC2/SC1;
553 #pragma omp for private(z) reduction(+:L2_R) schedule(static)
554 for (z=0;z<n;++z) {
555 x[z]+=gamma*(X_PRES[0][z]-x[z]);
556 r[z]+=gamma*(R_PRES[0][z]-r[z]);
557 L2_R+=r[z]*r[z];
558 }
559 norm_of_residual=sqrt(L2_R);
560 convergeFlag = (norm_of_residual <= tol);
561 if (restart>0) restartFlag=(num_iter_restart >= restart);
562 } else {
563 convergeFlag=FALSE;
564 restartFlag=FALSE;
565 }
566 maxIterFlag = (num_iter >= maxit);
567 }
568 }
569 /* end of iteration */
570 #pragma omp master
571 {
572 Norm_of_residual_global=norm_of_residual;
573 Num_iter_global=num_iter;
574 if (maxIterFlag) {
575 Status = SOLVER_MAXITER_REACHED;
576 } else if (breakFlag) {
577 Status = SOLVER_BREAKDOWN;
578 }
579 }
580 } /* end of parallel region */
581 for (i=0;i<Length_of_recursion;i++) {
582 TMPMEMFREE(X_PRES[i]);
583 TMPMEMFREE(R_PRES[i]);
584 TMPMEMFREE(P_PRES[i]);
585 }
586 TMPMEMFREE(AP);
587 TMPMEMFREE(X_PRES);
588 TMPMEMFREE(R_PRES);
589 TMPMEMFREE(P_PRES);
590 TMPMEMFREE(P_PRES_dot_AP);
591 TMPMEMFREE(R_PRES_dot_P_PRES);
592 TMPMEMFREE(BREAKF);
593 TMPMEMFREE(ALPHA);
594 *iter=Num_iter_global;
595 *tolerance=Norm_of_residual_global;
596 }
597 return Status;
598 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26