/[escript]/trunk/paso/src/GMRES.c
ViewVC logotype

Contents of /trunk/paso/src/GMRES.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 150 - (show annotations)
Thu Sep 15 03:44:45 2005 UTC (14 years, 5 months ago) by jgs
Original Path: trunk/esys2/paso/src/Solvers/GMRES.c
File MIME type: text/plain
File size: 24069 byte(s)
Merge of development branch dev-02 back to main trunk on 2005-09-15

1 /* $Id$ */
2
3 /*
4 * Purpose
5 * =======
6 *
7 * GMRES solves the linear system A*x=b using the
8 * truncated and restered GMRES method with preconditioning.
9 *
10 * Convergence test: norm( b - A*x )< TOL.
11 *
12 *
13 *
14 * Arguments
15 * =========
16 *
17 * r (input/output) double array, dimension n_row.
18 * On entry, residual of inital guess X
19 *
20 * x (input/output) double array, dimension n_col.
21 * On input, the initial guess.
22 *
23 * iter (input/output) int
24 * On input, the maximum num_iterations to be performed.
25 * On output, actual number of num_iterations performed.
26 *
27 * tolerance (input/output) DOUBLE PRECISION
28 * On input, the allowable convergence measure for
29 * norm( b - A*x )
30 * On output, the final value of this measure.
31 *
32 * length_of_recursion (input) gives the number of residual to be kept in orthogonalization process
33 *
34 * restart (input) If restart>0, iteration is resterted a after restart steps.
35 *
36 * INFO (output) int
37 *
38 * =SOLVER_NO_ERROR: Successful exit. num_iterated approximate solution returned.
39 * =SOLVER_MAXNUM_ITEr_REACHED
40 * =SOLVER_INPUT_ERROR Illegal parameter:
41 * =SOLVER_BREAKDOWN: If parameters RHO or OMEGA become smaller
42 * =SOLVER_MEMORY_ERROR : If parameters RHO or OMEGA become smaller
43 *
44 * ==============================================================
45 */
46
47 #include "Common.h"
48 #include "SystemMatrix.h"
49 #include "Solver.h"
50 #ifdef _OPENMP
51 #include <omp.h>
52 #endif
53
54 err_t Paso_Solver_GMRES(
55 Paso_SystemMatrix * A,
56 double * r,
57 double * x,
58 dim_t *iter,
59 double * tolerance,dim_t length_of_recursion,dim_t restart) {
60
61 /* Local variables */
62
63 double *x_PRES,*r_PRES,*P,*AP,*x_PRES_MEM[MAX(length_of_recursion,0)],*r_PRES_MEM[MAX(length_of_recursion,0)];
64 double r_PRES_MEMdotAP[MAX(length_of_recursion,0)],L2_r_PRES_MEM[MAX(length_of_recursion,0)],BREAKF_MEM[MAX(length_of_recursion,0)];
65 double r_PRESdotAP,r_PRES_MEMdotAP0,r_PRES_MEMdotAP1,r_PRES_MEMdotAP2,r_PRES_MEMdotAP3,r_PRES_MEMdotAP4,L2_r_PRES,r_PRES_MEMdotAP5;
66 double *tmp,tol,BREAKF,factor,GAMMA,SC1,SC2,norm_of_residual,diff,L2_R,norm_of_residual_global;
67 dim_t maxit,num_iter_global,num_iter_restart,num_iter;
68 dim_t i,z,OLDEST,ORDER;
69 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE,restartFlag=FALSE;
70 err_t status=SOLVER_NO_ERROR;
71
72 /* adapt original routine parameters */
73
74 dim_t n_col=A->num_cols * A-> col_block_size;
75 dim_t n_row=A->num_rows * A-> row_block_size;
76
77 /* Test the input parameters. */
78 if (restart>0) restart=MAX(length_of_recursion,restart);
79 if (n_col < 0 || n_row<0 || length_of_recursion<=0) {
80 return SOLVER_INPUT_ERROR;
81 }
82
83 /* allocate memory: */
84
85 x_PRES=TMPMEMALLOC(n_col,double);
86 r_PRES=TMPMEMALLOC(n_row,double);
87 P=TMPMEMALLOC(n_col,double);
88 AP=TMPMEMALLOC(n_row,double);
89 if (x_PRES==NULL || r_PRES==NULL || P==NULL || AP==NULL) status=SOLVER_MEMORY_ERROR;
90 for (i=0;i<length_of_recursion;i++) {
91 x_PRES_MEM[i]=TMPMEMALLOC(n_col,double);
92 r_PRES_MEM[i]=TMPMEMALLOC(n_row,double);
93 if (x_PRES_MEM[i]==NULL || r_PRES_MEM[i]==NULL) status=SOLVER_MEMORY_ERROR;
94 }
95 if ( status ==SOLVER_NO_ERROR ) {
96
97 /* now PRES starts : */
98 maxit=*iter;
99 tol=*tolerance;
100 norm_of_residual=tol;
101
102 #pragma omp parallel firstprivate(maxit,tol) \
103 private(num_iter,i,num_iter_restart,ORDER,OLDEST,BREAKF,factor,GAMMA,restartFlag,convergeFlag,maxIterFlag,breakFlag)
104 {
105 /* initialization */
106
107 restartFlag=TRUE;
108 num_iter=0;
109 #pragma omp for private(z) schedule(static) nowait
110 for (z=0; z < n_row; ++z) AP[z]=0;
111 #pragma omp for private(z) schedule(static) nowait
112 for (z=0; z < n_col; ++z) P[z]=0;
113 for(i=0;i<length_of_recursion;++i) {
114 #pragma omp for private(z) schedule(static) nowait
115 for (z=0; z < n_row; ++z) r_PRES_MEM[i][z]=0;
116 #pragma omp for private(z) schedule(static) nowait
117 for (z=0; z < n_col; ++z) x_PRES_MEM[i][z]=0;
118 }
119
120 next:
121 #pragma omp barrier
122 if (restartFlag) {
123 #pragma omp for private(z) schedule(static) nowait
124 for (z=0; z < n_row; ++z) r_PRES[z]=r[z];
125 #pragma omp for private(z) schedule(static) nowait
126 for (z=0; z < n_col; ++z) x_PRES[z]=x[z];
127 num_iter_restart=0;
128 OLDEST=length_of_recursion-1;
129 BREAKF=ONE;
130 restartFlag=FALSE;
131 }
132 ++num_iter;
133 ++num_iter_restart;
134 /* ORDER is the dimension of the space on which the residual is minimized: */
135 ORDER=MIN(num_iter_restart-1,length_of_recursion);
136 /* OLDEST points to the oldest r_PRES and x_PRES in memory :*/
137 OLDEST=(OLDEST==length_of_recursion-1) ? 0 : OLDEST+1;
138
139 /***
140 *** calculate new search direction P from r_PRES
141 ***/
142 Paso_Solver_solvePreconditioner(A,&P[0], &r_PRES[0]);
143 /***
144 *** apply A to P to get AP
145 ***/
146 Paso_SystemMatrix_MatrixVector(ONE, A, &P[0],ZERO, &AP[0]);
147 /***
148 ***** calculation of the norm of R and the scalar products of
149 *** the residuals and A*P:
150 ***/
151 if (ORDER==0) {
152 #pragma omp master
153 {
154 L2_r_PRES=ZERO;
155 r_PRESdotAP=ZERO;
156 }
157 #pragma omp barrier
158 #pragma omp for private(z) reduction(+:L2_r_PRES,r_PRESdotAP) schedule(static)
159 for (z=0;z<n_row;++z) {
160 L2_r_PRES+=r_PRES[z]*r_PRES[z];
161 r_PRESdotAP+=r_PRES[z]*AP[z];
162 }
163 } else if (ORDER==1) {
164 #pragma omp master
165 {
166 L2_r_PRES=ZERO;
167 r_PRESdotAP=ZERO;
168 r_PRES_MEMdotAP0=ZERO;
169 }
170 #pragma omp barrier
171 #pragma omp for private(z) reduction(+:L2_r_PRES,r_PRESdotAP,r_PRES_MEMdotAP0) schedule(static)
172 for (z=0;z<n_row;++z) {
173 L2_r_PRES+=r_PRES[z]*r_PRES[z];
174 r_PRESdotAP+=r_PRES[z]*AP[z];
175 r_PRES_MEMdotAP0+=r_PRES_MEM[0][z]*AP[z];
176 }
177 #pragma omp master
178 {
179 r_PRES_MEMdotAP[0]=r_PRES_MEMdotAP0;
180 }
181
182 } else if (ORDER==2) {
183 #pragma omp master
184 {
185 L2_r_PRES=ZERO;
186 r_PRESdotAP=ZERO;
187 r_PRES_MEMdotAP0=ZERO;
188 r_PRES_MEMdotAP1=ZERO;
189 }
190 #pragma omp barrier
191 #pragma omp for private(z) reduction(+:L2_r_PRES,r_PRESdotAP,r_PRES_MEMdotAP0,r_PRES_MEMdotAP1) schedule(static)
192 for (z=0;z<n_row;++z) {
193 L2_r_PRES+=r_PRES[z]*r_PRES[z];
194 r_PRESdotAP+=r_PRES[z]*AP[z];
195 r_PRES_MEMdotAP0+=r_PRES_MEM[0][z]*AP[z];
196 r_PRES_MEMdotAP1+=r_PRES_MEM[1][z]*AP[z];
197 }
198 #pragma omp master
199 {
200 r_PRES_MEMdotAP[0]=r_PRES_MEMdotAP0;
201 r_PRES_MEMdotAP[1]=r_PRES_MEMdotAP1;
202 }
203
204 } else if (ORDER==3) {
205 #pragma omp master
206 {
207 L2_r_PRES=ZERO;
208 r_PRESdotAP=ZERO;
209 r_PRES_MEMdotAP0=ZERO;
210 r_PRES_MEMdotAP1=ZERO;
211 r_PRES_MEMdotAP2=ZERO;
212 }
213 #pragma omp barrier
214 #pragma omp for private(z) reduction(+:L2_r_PRES,r_PRESdotAP,r_PRES_MEMdotAP0,r_PRES_MEMdotAP1,r_PRES_MEMdotAP2) schedule(static)
215 for (z=0;z<n_row;++z) {
216 L2_r_PRES+=r_PRES[z]*r_PRES[z];
217 r_PRESdotAP+=r_PRES[z]*AP[z];
218 r_PRES_MEMdotAP0+=r_PRES_MEM[0][z]*AP[z];
219 r_PRES_MEMdotAP1+=r_PRES_MEM[1][z]*AP[z];
220 r_PRES_MEMdotAP2+=r_PRES_MEM[2][z]*AP[z];
221 }
222 #pragma omp master
223 {
224 r_PRES_MEMdotAP[0]=r_PRES_MEMdotAP0;
225 r_PRES_MEMdotAP[1]=r_PRES_MEMdotAP1;
226 r_PRES_MEMdotAP[2]=r_PRES_MEMdotAP2;
227 }
228 } else if (ORDER==4) {
229 #pragma omp master
230 {
231 L2_r_PRES=ZERO;
232 r_PRESdotAP=ZERO;
233 r_PRES_MEMdotAP0=ZERO;
234 r_PRES_MEMdotAP1=ZERO;
235 r_PRES_MEMdotAP2=ZERO;
236 r_PRES_MEMdotAP3=ZERO;
237 }
238 #pragma omp barrier
239 #pragma omp for private(z) reduction(+:L2_r_PRES,r_PRESdotAP,r_PRES_MEMdotAP0,r_PRES_MEMdotAP1,r_PRES_MEMdotAP2,r_PRES_MEMdotAP3) schedule(static)
240 for (z=0;z<n_row;++z) {
241 L2_r_PRES+=r_PRES[z]*r_PRES[z];
242 r_PRESdotAP+=r_PRES[z]*AP[z];
243 r_PRES_MEMdotAP0+=r_PRES_MEM[0][z]*AP[z];
244 r_PRES_MEMdotAP1+=r_PRES_MEM[1][z]*AP[z];
245 r_PRES_MEMdotAP2+=r_PRES_MEM[2][z]*AP[z];
246 r_PRES_MEMdotAP3+=r_PRES_MEM[3][z]*AP[z];
247 }
248 #pragma omp master
249 {
250 r_PRES_MEMdotAP[0]=r_PRES_MEMdotAP0;
251 r_PRES_MEMdotAP[1]=r_PRES_MEMdotAP1;
252 r_PRES_MEMdotAP[2]=r_PRES_MEMdotAP2;
253 r_PRES_MEMdotAP[3]=r_PRES_MEMdotAP3;
254 }
255 } else if (ORDER==5) {
256 #pragma omp master
257 {
258 L2_r_PRES=ZERO;
259 r_PRESdotAP=ZERO;
260 r_PRES_MEMdotAP0=ZERO;
261 r_PRES_MEMdotAP1=ZERO;
262 r_PRES_MEMdotAP2=ZERO;
263 r_PRES_MEMdotAP3=ZERO;
264 r_PRES_MEMdotAP4=ZERO;
265 }
266 #pragma omp barrier
267 #pragma omp for private(z) reduction(+:L2_r_PRES,r_PRESdotAP,r_PRES_MEMdotAP0,r_PRES_MEMdotAP1,r_PRES_MEMdotAP2,r_PRES_MEMdotAP3,r_PRES_MEMdotAP4) schedule(static)
268 for (z=0;z<n_row;++z) {
269 L2_r_PRES+=r_PRES[z]*r_PRES[z];
270 r_PRESdotAP+=r_PRES[z]*AP[z];
271 r_PRES_MEMdotAP0+=r_PRES_MEM[0][z]*AP[z];
272 r_PRES_MEMdotAP1+=r_PRES_MEM[1][z]*AP[z];
273 r_PRES_MEMdotAP2+=r_PRES_MEM[2][z]*AP[z];
274 r_PRES_MEMdotAP3+=r_PRES_MEM[3][z]*AP[z];
275 r_PRES_MEMdotAP4+=r_PRES_MEM[4][z]*AP[z];
276 }
277 #pragma omp master
278 {
279 r_PRES_MEMdotAP[0]=r_PRES_MEMdotAP0;
280 r_PRES_MEMdotAP[1]=r_PRES_MEMdotAP1;
281 r_PRES_MEMdotAP[2]=r_PRES_MEMdotAP2;
282 r_PRES_MEMdotAP[3]=r_PRES_MEMdotAP3;
283 r_PRES_MEMdotAP[4]=r_PRES_MEMdotAP4;
284 }
285 } else if (ORDER==6) {
286 #pragma omp master
287 {
288 L2_r_PRES=ZERO;
289 r_PRESdotAP=ZERO;
290 r_PRES_MEMdotAP0=ZERO;
291 r_PRES_MEMdotAP1=ZERO;
292 r_PRES_MEMdotAP2=ZERO;
293 r_PRES_MEMdotAP3=ZERO;
294 r_PRES_MEMdotAP4=ZERO;
295 r_PRES_MEMdotAP5=ZERO;
296 }
297 #pragma omp barrier
298 #pragma omp for private(z) reduction(+:L2_r_PRES,r_PRESdotAP,r_PRES_MEMdotAP0,r_PRES_MEMdotAP1,r_PRES_MEMdotAP2,r_PRES_MEMdotAP3,r_PRES_MEMdotAP4,r_PRES_MEMdotAP5) schedule(static)
299 for (z=0;z<n_row;++z) {
300 L2_r_PRES+=r_PRES[z]*r_PRES[z];
301 r_PRESdotAP+=r_PRES[z]*AP[z];
302 r_PRES_MEMdotAP0+=r_PRES_MEM[0][z]*AP[z];
303 r_PRES_MEMdotAP1+=r_PRES_MEM[1][z]*AP[z];
304 r_PRES_MEMdotAP2+=r_PRES_MEM[2][z]*AP[z];
305 r_PRES_MEMdotAP3+=r_PRES_MEM[3][z]*AP[z];
306 r_PRES_MEMdotAP4+=r_PRES_MEM[4][z]*AP[z];
307 r_PRES_MEMdotAP5+=r_PRES_MEM[5][z]*AP[z];
308 }
309 #pragma omp master
310 {
311 r_PRES_MEMdotAP[0]=r_PRES_MEMdotAP0;
312 r_PRES_MEMdotAP[1]=r_PRES_MEMdotAP1;
313 r_PRES_MEMdotAP[2]=r_PRES_MEMdotAP2;
314 r_PRES_MEMdotAP[3]=r_PRES_MEMdotAP3;
315 r_PRES_MEMdotAP[4]=r_PRES_MEMdotAP4;
316 r_PRES_MEMdotAP[5]=r_PRES_MEMdotAP5;
317 }
318 } else {
319 #pragma omp master
320 {
321 L2_r_PRES=ZERO;
322 r_PRESdotAP=ZERO;
323 r_PRES_MEMdotAP0=ZERO;
324 }
325 #pragma omp barrier
326 #pragma omp for private(z) reduction(+:L2_r_PRES,r_PRESdotAP) schedule(static)
327 for (z=0;z<n_row;++z) {
328 L2_r_PRES+=r_PRES[z]*r_PRES[z];
329 r_PRESdotAP+=r_PRES[z]*AP[z];
330 }
331 for (i=0;i<ORDER;++i) {
332 #pragma omp for private(z) reduction(+:r_PRES_MEMdotAP0) schedule(static)
333 for (z=0;z<n_row;++z) r_PRES_MEMdotAP0+=r_PRES_MEM[i][z]*AP[z];
334 #pragma omp master
335 {
336 r_PRES_MEMdotAP[i]=r_PRES_MEMdotAP0;
337 r_PRES_MEMdotAP0=ZERO;
338 }
339 #pragma omp barrier
340 }
341 }
342 /* if L2_r_PRES=0 there is a fatal breakdown */
343 if (L2_r_PRES<=ZERO) {
344 breakFlag=TRUE;
345 } else {
346 /***
347 ***** calculation of the weights for the update of X:
348 */
349 #pragma omp master
350 {
351 r_PRESdotAP*= -ONE/L2_r_PRES;
352 for (i=0;i<ORDER;++i) r_PRES_MEMdotAP[i]*=-ONE/L2_r_PRES_MEM[i];
353 }
354 /*
355 ***** update of solution x_PRES and its residual r_PRES:
356 ***
357 *** P is used to accumulate X and AP to accumulate R. X and R
358 *** are still needed until they are put into the X and R memory
359 *** r_PRES_MEM and x_PRES_MEM
360 ***
361 **/
362 #pragma omp barrier
363 if (ORDER==0) {
364 #pragma omp for private(z) schedule(static)
365 for (z=0;z<n_row;++z)
366 AP[z]+=r_PRESdotAP*r_PRES[z];
367 #pragma omp for private(z) schedule(static)
368 for (z=0;z<n_col;++z)
369 P[z]=-P[z]+r_PRESdotAP*x_PRES[z];
370 } else if (ORDER==1) {
371 #pragma omp for private(z) schedule(static)
372 for (z=0;z<n_row;++z)
373 AP[z]+=r_PRESdotAP*r_PRES[z]+r_PRES_MEMdotAP[0]*r_PRES_MEM[0][z];
374 #pragma omp for private(z) schedule(static)
375 for (z=0;z<n_col;++z)
376 P[z]=-P[z]+r_PRESdotAP*x_PRES[z]+r_PRES_MEMdotAP[0]*x_PRES_MEM[0][z];
377 } else if (ORDER==2) {
378 #pragma omp for private(z) schedule(static)
379 for (z=0;z<n_row;++z)
380 AP[z]+=r_PRESdotAP*r_PRES[z]+r_PRES_MEMdotAP[0]*r_PRES_MEM[0][z]
381 +r_PRES_MEMdotAP[1]*r_PRES_MEM[1][z];
382 #pragma omp for private(z) schedule(static)
383 for (z=0;z<n_col;++z)
384 P[z]=-P[z]+r_PRESdotAP*x_PRES[z]+r_PRES_MEMdotAP[0]*x_PRES_MEM[0][z]
385 +r_PRES_MEMdotAP[1]*x_PRES_MEM[1][z];
386 } else if (ORDER==3) {
387 #pragma omp for private(z) schedule(static)
388 for (z=0;z<n_row;++z)
389 AP[z]+=r_PRESdotAP*r_PRES[z]+r_PRES_MEMdotAP[0]*r_PRES_MEM[0][z]
390 +r_PRES_MEMdotAP[1]*r_PRES_MEM[1][z]
391 +r_PRES_MEMdotAP[2]*r_PRES_MEM[2][z];
392 #pragma omp for private(z) schedule(static)
393 for (z=0;z<n_col;++z)
394 P[z]=-P[z]+r_PRESdotAP*x_PRES[z]+r_PRES_MEMdotAP[0]*x_PRES_MEM[0][z]
395 +r_PRES_MEMdotAP[1]*x_PRES_MEM[1][z]
396 +r_PRES_MEMdotAP[2]*x_PRES_MEM[2][z];
397 } else if (ORDER==4) {
398 #pragma omp for private(z) schedule(static)
399 for (z=0;z<n_row;++z)
400 AP[z]+=r_PRESdotAP*r_PRES[z]+r_PRES_MEMdotAP[0]*r_PRES_MEM[0][z]
401 +r_PRES_MEMdotAP[1]*r_PRES_MEM[1][z]
402 +r_PRES_MEMdotAP[2]*r_PRES_MEM[2][z]
403 +r_PRES_MEMdotAP[3]*r_PRES_MEM[3][z];
404 #pragma omp for private(z) schedule(static)
405 for (z=0;z<n_col;++z)
406 P[z]=-P[z]+r_PRESdotAP*x_PRES[z]+r_PRES_MEMdotAP[0]*x_PRES_MEM[0][z]
407 +r_PRES_MEMdotAP[1]*x_PRES_MEM[1][z]
408 +r_PRES_MEMdotAP[2]*x_PRES_MEM[2][z]
409 +r_PRES_MEMdotAP[3]*x_PRES_MEM[3][z];
410 } else if (ORDER==5) {
411 #pragma omp for private(z) schedule(static)
412 for (z=0;z<n_row;++z)
413 AP[z]+=r_PRESdotAP*r_PRES[z]+r_PRES_MEMdotAP[0]*r_PRES_MEM[0][z]
414 +r_PRES_MEMdotAP[1]*r_PRES_MEM[1][z]
415 +r_PRES_MEMdotAP[2]*r_PRES_MEM[2][z]
416 +r_PRES_MEMdotAP[3]*r_PRES_MEM[3][z]
417 +r_PRES_MEMdotAP[4]*r_PRES_MEM[4][z];
418 #pragma omp for private(z) schedule(static)
419 for (z=0;z<n_col;++z)
420 P[z]=-P[z]+r_PRESdotAP*x_PRES[z]+r_PRES_MEMdotAP[0]*x_PRES_MEM[0][z]
421 +r_PRES_MEMdotAP[1]*x_PRES_MEM[1][z]
422 +r_PRES_MEMdotAP[2]*x_PRES_MEM[2][z]
423 +r_PRES_MEMdotAP[3]*x_PRES_MEM[3][z]
424 +r_PRES_MEMdotAP[4]*x_PRES_MEM[4][z];
425 } else if (ORDER==6) {
426 #pragma omp for private(z) schedule(static)
427 for (z=0;z<n_row;++z)
428 AP[z]+=r_PRESdotAP*r_PRES[z]+r_PRES_MEMdotAP[0]*r_PRES_MEM[0][z]
429 +r_PRES_MEMdotAP[1]*r_PRES_MEM[1][z]
430 +r_PRES_MEMdotAP[2]*r_PRES_MEM[2][z]
431 +r_PRES_MEMdotAP[3]*r_PRES_MEM[3][z]
432 +r_PRES_MEMdotAP[4]*r_PRES_MEM[4][z]
433 +r_PRES_MEMdotAP[5]*r_PRES_MEM[5][z];
434 #pragma omp for private(z) schedule(static)
435 for (z=0;z<n_col;++z)
436 P[z]=-P[z]+r_PRESdotAP*x_PRES[z]+r_PRES_MEMdotAP[0]*x_PRES_MEM[0][z]
437 +r_PRES_MEMdotAP[1]*x_PRES_MEM[1][z]
438 +r_PRES_MEMdotAP[2]*x_PRES_MEM[2][z]
439 +r_PRES_MEMdotAP[3]*x_PRES_MEM[3][z]
440 +r_PRES_MEMdotAP[4]*x_PRES_MEM[4][z]
441 +r_PRES_MEMdotAP[5]*x_PRES_MEM[5][z];
442 } else {
443 #pragma omp for private(z) schedule(static)
444 for (z=0;z<n_row;++z)
445 AP[z]+=r_PRESdotAP*r_PRES[z];
446 #pragma omp for private(z) schedule(static)
447 for (z=0;z<n_col;++z)
448 P[z]=-P[z]+r_PRESdotAP*x_PRES[z];
449
450 for (i=0;i<ORDER;++i) {
451 #pragma omp for private(z) schedule(static)
452 for (z=0;z<n_row;++z)
453 AP[z]+=r_PRES_MEMdotAP[i]*r_PRES_MEM[i][z];
454 #pragma omp for private(z) schedule(static)
455 for (z=0;z<n_col;++z)
456 P[z]+=r_PRES_MEMdotAP[i]*x_PRES_MEM[i][z];
457 }
458 }
459 /*** factor scales AP and P to make it a residual and
460 *** as solution approximation
461 ***
462 *** if factor is equal to zero a breakdown occurs. the
463 *** iteration procedure can be continued but r_PRES is not the
464 *** residual of x_PRES approximation.
465 ***/
466 factor=BREAKF*r_PRESdotAP;
467 for (i=0;i<ORDER;++i) factor +=BREAKF_MEM[i]*r_PRES_MEMdotAP[i];
468 /***
469 ***** x_PRES and r_PRES are moved to memory:
470 ***/
471 #pragma omp master
472 {
473 L2_r_PRES_MEM[OLDEST]=L2_r_PRES;
474 BREAKF_MEM[OLDEST]=BREAKF;
475 tmp=x_PRES; x_PRES=x_PRES_MEM[OLDEST];x_PRES_MEM[OLDEST]=tmp;
476 tmp=r_PRES; r_PRES=r_PRES_MEM[OLDEST];r_PRES_MEM[OLDEST]=tmp;
477 }
478 if (ABS(factor)<=ZERO) {
479 /* in case of a break down: */
480 BREAKF=ZERO;
481 #pragma omp for private(z) schedule(static)
482 for (z=0;z<n_row;++z) r_PRES[z]=AP[z];
483 #pragma omp for private(z) schedule(static)
484 for (z=0;z<n_col;++z) x_PRES[z]=P[z];
485 /* is there any progress */
486 breakFlag=TRUE;
487 #pragma omp barrier
488 for (i=0;i<ORDER;++i) if (BREAKF_MEM[i]>ZERO) breakFlag=FALSE;
489 convergeFlag = FALSE;
490 maxIterFlag = FALSE;
491 } else {
492 BREAKF=ONE;
493 breakFlag=FALSE;
494 /***
495 *** rescale P an AP and apply smooting:
496 ***
497 ***** calculate GAMMA from min_(GAMMA){|R+GAMMA*(r_PRES-R)|_2}:
498 ***/
499 #pragma omp master
500 {
501 SC1=ZERO;
502 SC2=ZERO;
503 L2_R=ZERO;
504 }
505 factor=ONE/factor;
506 #pragma omp barrier
507 #pragma omp for private(z,diff) reduction(+:SC1,SC2) schedule(static)
508 for (z=0;z<n_row;++z) {
509 r_PRES[z]=factor*AP[z];
510 diff=r_PRES[z]-r[z];
511 SC1+=diff*diff;
512 SC2+=diff*r[z];
513 }
514 GAMMA=(SC1<=ZERO) ? ZERO : -SC2/SC1;
515 #pragma omp for private(z) schedule(static)
516 for (z=0;z<n_col;++z) {
517 x_PRES[z]=factor* P[z];
518 x[z]+=GAMMA*(x_PRES[z]-x[z]);
519 }
520 #pragma omp for private(z) reduction(+:L2_R) schedule(static)
521 for (z=0;z<n_row;++z) {
522 r[z]+=GAMMA*(r_PRES[z]-r[z]);
523 L2_R+=r[z]*r[z];
524 }
525 norm_of_residual=sqrt(L2_R);
526 convergeFlag = (norm_of_residual <= tol);
527 maxIterFlag = (num_iter >= maxit);
528 }
529 }
530 if (restart>0) restartFlag=(num_iter_restart >= restart);
531 if (! (convergeFlag || maxIterFlag || breakFlag)) goto next;
532 /* end of iteration */
533 #pragma omp master
534 {
535 norm_of_residual_global=norm_of_residual;
536 num_iter_global=num_iter;
537 if (maxIterFlag) {
538 status = SOLVER_MAXITER_REACHED;
539 } else if (breakFlag) {
540 status = SOLVER_BREAKDOWN;
541 }
542 }
543 } /* end of parallel region */
544 TMPMEMFREE(x_PRES);
545 TMPMEMFREE(r_PRES);
546 TMPMEMFREE(P);
547 TMPMEMFREE(AP);
548 for (i=0;i<length_of_recursion;i++) {
549 TMPMEMFREE(x_PRES_MEM[i]);
550 TMPMEMFREE(r_PRES_MEM[i]);
551 }
552 *iter=num_iter_global;
553 *tolerance=norm_of_residual_global;
554 }
555 return status;
556 }
557
558
559 /*
560 * $Log$
561 * Revision 1.2 2005/09/15 03:44:40 jgs
562 * Merge of development branch dev-02 back to main trunk on 2005-09-15
563 *
564 * Revision 1.1.2.1 2005/09/05 06:29:49 gross
565 * These files have been extracted from finley to define a stand alone libray for iterative
566 * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
567 * has not been tested yet.
568 *
569 *
570 */
571
572

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26