/[escript]/trunk/paso/src/GMRES.c
ViewVC logotype

Contents of /trunk/paso/src/GMRES.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1628 - (show annotations)
Fri Jul 11 13:12:46 2008 UTC (11 years, 7 months ago) by phornby
File MIME type: text/plain
File size: 22214 byte(s)

Merge in /branches/windows_from_1456_trunk_1620_merged_in branch.

You will find a preserved pre-merge trunk in tags under tags/trunk_at_1625.
That will be useful for diffing & checking on my stupidity.

Here is a list of the conflicts and their resolution at this
point in time.


=================================================================================
(LLWS == looks like white space).

finley/src/Assemble_addToSystemMatrix.c - resolve to branch - unused var. may be wrong.....
finley/src/CPPAdapter/SystemMatrixAdapter.cpp - resolve to branch - LLWS
finley/src/CPPAdapter/MeshAdapter.cpp - resolve to branch - LLWS
paso/src/PCG.c - resolve to branch - unused var fixes.
paso/src/SolverFCT.c - resolve to branch - LLWS
paso/src/FGMRES.c - resolve to branch - LLWS
paso/src/Common.h - resolve to trunk version. It's omp.h's include... not sure it's needed,
but for the sake of saftey.....
paso/src/Functions.c - resolve to branch version, indentation/tab removal and return error
on bad unimplemented Paso_FunctionCall.
paso/src/SolverFCT_solve.c - resolve to branch version, unused vars
paso/src/SparseMatrix_MatrixVector.c - resolve to branch version, unused vars.
escript/src/Utils.cpp - resloved to branch, needs WinSock2.h
escript/src/DataExpanded.cpp - resolved to branch version - LLWS
escript/src/DataFactory.cpp - resolve to branch version
=================================================================================

This currently passes tests on linux (debian), but is not checked on windows or Altix yet.

This checkin is to make a trunk I can check out for windows to do tests on it.

Known outstanding problem is in the operator=() method of exceptions
causing warning messages on the intel compilers.

May the God of doughnuts have mercy on my soul.


1
2 /* $Id$ */
3
4 /*******************************************************
5 *
6 * Copyright 2003-2007 by ACceSS MNRF
7 * Copyright 2007 by University of Queensland
8 *
9 * http://esscc.uq.edu.au
10 * Primary Business: Queensland, Australia
11 * Licensed under the Open Software License version 3.0
12 * http://www.opensource.org/licenses/osl-3.0.php
13 *
14 *******************************************************/
15
16 /*
17 * Purpose
18 * =======
19 *
20 * GMRES solves the linear system A*x=b using the
21 * truncated and restered GMRES method with preconditioning.
22 *
23 * Convergence test: norm( b - A*x )< TOL.
24 *
25 *
26 *
27 * Arguments
28 * =========
29 *
30 * r (input/output) double array, dimension n.
31 * On entry, residual of inital guess X
32 *
33 * x (input/output) double array, dimension n.
34 * On input, the initial guess.
35 *
36 * iter (input/output) int
37 * On input, the maximum num_iterations to be performed.
38 * On output, actual number of num_iterations performed.
39 *
40 * tolerance (input/output) DOUBLE PRECISION
41 * On input, the allowable convergence measure for
42 * norm( b - A*x )
43 * On output, the final value of this measure.
44 *
45 * Length_of_recursion (input) gives the number of residual to be kept in orthogonalization process
46 *
47 * restart (input) If restart>0, iteration is resterted a after restart steps.
48 *
49 * INFO (output) int
50 *
51 * =SOLVER_NO_ERROR: Successful exit. num_iterated approximate solution returned.
52 * =SOLVER_MAXNUM_ITER_REACHED
53 * =SOLVER_INPUT_ERROR Illegal parameter:
54 * =SOLVER_BREAKDOWN: bad luck!
55 * =SOLVER_MEMORY_ERROR : no memory available
56 *
57 * ==============================================================
58 */
59
60 #include "Common.h"
61 #include "SystemMatrix.h"
62 #include "Solver.h"
63 #ifdef _OPENMP
64 #include <omp.h>
65 #endif
66
67 err_t Paso_Solver_GMRES(
68 Paso_SystemMatrix * A,
69 double * r,
70 double * x,
71 dim_t *iter,
72 double * tolerance,dim_t Length_of_recursion,dim_t restart,
73 Paso_Performance* pp) {
74
75 /* Local variables */
76
77 double *AP,**X_PRES,**R_PRES,**P_PRES;
78 double *P_PRES_dot_AP,*R_PRES_dot_P_PRES,*BREAKF,*ALPHA;
79 double P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6,R_PRES_dot_P,breakf0;
80 double tol,Factor,sum_BREAKF,gamma,SC1,SC2,norm_of_residual,diff,L2_R,Norm_of_residual_global;
81 double *save_XPRES, *save_P_PRES, *save_R_PRES,save_R_PRES_dot_P_PRES;
82 dim_t maxit,Num_iter_global,num_iter_restart,num_iter;
83 dim_t i,z,order,n, Length_of_mem;
84 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE,restartFlag=FALSE;
85 err_t Status=SOLVER_NO_ERROR;
86
87
88 /* adapt original routine parameters */
89 n = Paso_SystemMatrix_getTotalNumRows(A);
90 Length_of_mem=MAX(Length_of_recursion,0)+1;
91
92 /* Test the input parameters. */
93 if (restart>0) restart=MAX(Length_of_recursion,restart);
94 if (n < 0 || Length_of_recursion<=0) {
95 return SOLVER_INPUT_ERROR;
96 }
97
98 /* allocate memory: */
99 X_PRES=TMPMEMALLOC(Length_of_mem,double*);
100 R_PRES=TMPMEMALLOC(Length_of_mem,double*);
101 P_PRES=TMPMEMALLOC(Length_of_mem,double*);
102 P_PRES_dot_AP=TMPMEMALLOC(Length_of_mem,double);
103 R_PRES_dot_P_PRES=TMPMEMALLOC(Length_of_mem,double);
104 BREAKF=TMPMEMALLOC(Length_of_mem,double);
105 ALPHA=TMPMEMALLOC(Length_of_mem,double);
106 AP=TMPMEMALLOC(n,double);
107 if (AP==NULL || X_PRES ==NULL || R_PRES == NULL || P_PRES == NULL ||
108 P_PRES_dot_AP == NULL || R_PRES_dot_P_PRES ==NULL || BREAKF == NULL || ALPHA == NULL) {
109 Status=SOLVER_MEMORY_ERROR;
110 } else {
111 for (i=0;i<Length_of_mem;i++) {
112 X_PRES[i]=TMPMEMALLOC(n,double);
113 R_PRES[i]=TMPMEMALLOC(n,double);
114 P_PRES[i]=TMPMEMALLOC(n,double);
115 if (X_PRES[i]==NULL || R_PRES[i]==NULL || P_PRES[i]==NULL) Status=SOLVER_MEMORY_ERROR;
116 }
117 }
118 if ( Status ==SOLVER_NO_ERROR ) {
119
120 /* now PRES starts : */
121 maxit=*iter;
122 tol=*tolerance;
123
124 /* initialization */
125
126 restartFlag=TRUE;
127 num_iter=0;
128 #pragma omp parallel private(i)
129 {
130 #pragma omp for private(z) schedule(static) nowait
131 for (z=0; z < n; ++z) AP[z]=0;
132 for(i=0;i<Length_of_mem;++i) {
133 #pragma omp for private(z) schedule(static) nowait
134 for (z=0; z < n; ++z) {
135 P_PRES[i][z]=0;
136 R_PRES[i][z]=0;
137 X_PRES[i][z]=0;
138 }
139 }
140 }
141
142 while (! (convergeFlag || maxIterFlag || breakFlag)) {
143 if (restartFlag) {
144 BREAKF[0]=ONE;
145 #pragma omp parallel for private(z) schedule(static)
146 for (z=0; z < n; ++z) {
147 R_PRES[0][z]=r[z];
148 X_PRES[0][z]=x[z];
149 }
150 num_iter_restart=0;
151 restartFlag=FALSE;
152 }
153 ++num_iter;
154 ++num_iter_restart;
155 /* order is the dimension of the space on which the residual is minimized: */
156 order=MIN(num_iter_restart,Length_of_recursion);
157 /***
158 *** calculate new search direction P from R_PRES
159 ***/
160 Paso_Solver_solvePreconditioner(A,&P_PRES[0][0], &R_PRES[0][0]);
161 /***
162 *** apply A to P to get AP
163 ***/
164 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, &P_PRES[0][0],ZERO, &AP[0]);
165 /***
166 ***** calculation of the norm of R and the scalar products of
167 *** the residuals and A*P:
168 ***/
169 if (order==0) {
170 R_PRES_dot_P=ZERO;
171 #pragma omp parallel for private(z) reduction(+:R_PRES_dot_P) schedule(static)
172 for (z=0;z<n;++z) R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
173 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
174 } else if (order==1) {
175 R_PRES_dot_P=ZERO;
176 P_PRES_dot_AP0=ZERO;
177 #pragma omp parallel for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0) schedule(static)
178 for (z=0;z<n;++z) {
179 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
180 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
181 }
182 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
183 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
184 } else if (order==2) {
185 R_PRES_dot_P=ZERO;
186 P_PRES_dot_AP0=ZERO;
187 P_PRES_dot_AP1=ZERO;
188 #pragma omp parallel for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1) schedule(static)
189 for (z=0;z<n;++z) {
190 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
191 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
192 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
193 }
194 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
195 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
196 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
197 } else if (order==3) {
198 R_PRES_dot_P=ZERO;
199 P_PRES_dot_AP0=ZERO;
200 P_PRES_dot_AP1=ZERO;
201 P_PRES_dot_AP2=ZERO;
202 #pragma omp parallel for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2) schedule(static)
203 for (z=0;z<n;++z) {
204 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
205 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
206 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
207 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
208 }
209 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
210 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
211 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
212 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
213 } else if (order==4) {
214 R_PRES_dot_P=ZERO;
215 P_PRES_dot_AP0=ZERO;
216 P_PRES_dot_AP1=ZERO;
217 P_PRES_dot_AP2=ZERO;
218 P_PRES_dot_AP3=ZERO;
219 #pragma omp parallel for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3) schedule(static)
220 for (z=0;z<n;++z) {
221 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
222 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
223 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
224 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
225 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
226 }
227 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
228 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
229 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
230 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
231 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
232 } else if (order==5) {
233 R_PRES_dot_P=ZERO;
234 P_PRES_dot_AP0=ZERO;
235 P_PRES_dot_AP1=ZERO;
236 P_PRES_dot_AP2=ZERO;
237 P_PRES_dot_AP3=ZERO;
238 P_PRES_dot_AP4=ZERO;
239 #pragma omp parallel for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4) schedule(static)
240 for (z=0;z<n;++z) {
241 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
242 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
243 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
244 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
245 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
246 P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
247 }
248 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
249 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
250 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
251 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
252 P_PRES_dot_AP[4]=P_PRES_dot_AP4;
253 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
254 } else if (order==6) {
255 R_PRES_dot_P=ZERO;
256 P_PRES_dot_AP0=ZERO;
257 P_PRES_dot_AP1=ZERO;
258 P_PRES_dot_AP2=ZERO;
259 P_PRES_dot_AP3=ZERO;
260 P_PRES_dot_AP4=ZERO;
261 P_PRES_dot_AP5=ZERO;
262 #pragma omp parallel for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5) schedule(static)
263 for (z=0;z<n;++z) {
264 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
265 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
266 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
267 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
268 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
269 P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
270 P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
271 }
272 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
273 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
274 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
275 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
276 P_PRES_dot_AP[4]=P_PRES_dot_AP4;
277 P_PRES_dot_AP[5]=P_PRES_dot_AP5;
278 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
279 } else if (order>6) {
280 R_PRES_dot_P=ZERO;
281 P_PRES_dot_AP0=ZERO;
282 P_PRES_dot_AP1=ZERO;
283 P_PRES_dot_AP2=ZERO;
284 P_PRES_dot_AP3=ZERO;
285 P_PRES_dot_AP4=ZERO;
286 P_PRES_dot_AP5=ZERO;
287 P_PRES_dot_AP6=ZERO;
288 #pragma omp parallel for private(z) reduction(+:R_PRES_dot_P,P_PRES_dot_AP0,P_PRES_dot_AP1,P_PRES_dot_AP2,P_PRES_dot_AP3,P_PRES_dot_AP4,P_PRES_dot_AP5,P_PRES_dot_AP6) schedule(static)
289 for (z=0;z<n;++z) {
290 R_PRES_dot_P+=R_PRES[0][z]*P_PRES[0][z];
291 P_PRES_dot_AP0+=P_PRES[0][z]*AP[z];
292 P_PRES_dot_AP1+=P_PRES[1][z]*AP[z];
293 P_PRES_dot_AP2+=P_PRES[2][z]*AP[z];
294 P_PRES_dot_AP3+=P_PRES[3][z]*AP[z];
295 P_PRES_dot_AP4+=P_PRES[4][z]*AP[z];
296 P_PRES_dot_AP5+=P_PRES[5][z]*AP[z];
297 P_PRES_dot_AP6+=P_PRES[6][z]*AP[z];
298 }
299 P_PRES_dot_AP[0]=P_PRES_dot_AP0;
300 P_PRES_dot_AP[1]=P_PRES_dot_AP1;
301 P_PRES_dot_AP[2]=P_PRES_dot_AP2;
302 P_PRES_dot_AP[3]=P_PRES_dot_AP3;
303 P_PRES_dot_AP[4]=P_PRES_dot_AP4;
304 P_PRES_dot_AP[5]=P_PRES_dot_AP5;
305 P_PRES_dot_AP[6]=P_PRES_dot_AP6;
306 R_PRES_dot_P_PRES[0]=R_PRES_dot_P;
307
308 P_PRES_dot_AP0=ZERO;
309 for (i=7;i<order;++i) {
310 #pragma omp parallel for private(z) reduction(+:P_PRES_dot_AP0) schedule(static)
311 for (z=0;z<n;++z) P_PRES_dot_AP0+=P_PRES[i][z]*AP[z];
312 P_PRES_dot_AP[i]=P_PRES_dot_AP0;
313 P_PRES_dot_AP0=ZERO;
314 }
315 }
316 /* this fixes a problem with the intel compiler */
317 P_PRES_dot_AP0=R_PRES_dot_P_PRES[0];
318 /*** if sum_BREAKF is equal to zero a breakdown occurs.
319 *** iteration procedure can be continued but R_PRES is not the
320 *** residual of X_PRES approximation.
321 ***/
322 sum_BREAKF=0.;
323 for (i=0;i<order;++i) sum_BREAKF +=BREAKF[i];
324 breakFlag=!((ABS(P_PRES_dot_AP0) > ZERO) && (sum_BREAKF >ZERO));
325 if (!breakFlag) {
326 breakFlag=FALSE;
327 /***
328 ***** X_PRES and R_PRES are moved to memory:
329 ***/
330 Factor=0.;
331 for (i=0;i<order;++i) {
332 ALPHA[i]=-P_PRES_dot_AP[i]/R_PRES_dot_P_PRES[i];
333 Factor+=BREAKF[i]*ALPHA[i];
334 }
335
336 save_R_PRES_dot_P_PRES=R_PRES_dot_P_PRES[Length_of_mem-1];
337 save_R_PRES=R_PRES[Length_of_mem-1];
338 save_XPRES=X_PRES[Length_of_mem-1];
339 save_P_PRES=P_PRES[Length_of_mem-1];
340 for (i=Length_of_mem-1;i>0;--i) {
341 BREAKF[i]=BREAKF[i-1];
342 R_PRES_dot_P_PRES[i]=R_PRES_dot_P_PRES[i-1];
343 R_PRES[i]=R_PRES[i-1];
344 X_PRES[i]=X_PRES[i-1];
345 P_PRES[i]=P_PRES[i-1];
346 }
347 R_PRES_dot_P_PRES[0]=save_R_PRES_dot_P_PRES;
348 R_PRES[0]=save_R_PRES;
349 X_PRES[0]=save_XPRES;
350 P_PRES[0]=save_P_PRES;
351
352 if (ABS(Factor)<=ZERO) {
353 Factor=1.;
354 BREAKF[0]=ZERO;
355 } else {
356 Factor=1./Factor;
357 BREAKF[0]=ONE;
358 }
359 for (i=0;i<order;++i) ALPHA[i]*=Factor;
360 /*
361 ***** update of solution X_PRES and its residual R_PRES:
362 ***
363 *** P is used to accumulate X and AP to accumulate R. X and R
364 *** are still needed until they are put into the X and R memory
365 *** R_PRES and X_PRES
366 ***
367 **/
368 breakf0=BREAKF[0];
369 if (order==0) {
370 #pragma omp parallel for private(z) schedule(static)
371 for (z=0;z<n;++z) {
372 R_PRES[0][z]= Factor* AP[z];
373 X_PRES[0][z]=-Factor*P_PRES[1][z];
374 }
375 } else if (order==1) {
376 #pragma omp parallel for private(z) schedule(static)
377 for (z=0;z<n;++z) {
378 R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z];
379 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z];
380 }
381 } else if (order==2) {
382 #pragma omp parallel for private(z) schedule(static)
383 for (z=0;z<n;++z) {
384 R_PRES[0][z]= Factor* AP[z]+ALPHA[0]*R_PRES[1][z]
385 +ALPHA[1]*R_PRES[2][z];
386 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
387 +ALPHA[1]*X_PRES[2][z];
388 }
389 } else if (order==3) {
390 #pragma omp parallel for private(z) schedule(static)
391 for (z=0;z<n;++z) {
392 R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
393 +ALPHA[1]*R_PRES[2][z]
394 +ALPHA[2]*R_PRES[3][z];
395 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
396 +ALPHA[1]*X_PRES[2][z]
397 +ALPHA[2]*X_PRES[3][z];
398 }
399 } else if (order==4) {
400 #pragma omp parallel for private(z) schedule(static)
401 for (z=0;z<n;++z) {
402 R_PRES[0][z]= Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
403 +ALPHA[1]*R_PRES[2][z]
404 +ALPHA[2]*R_PRES[3][z]
405 +ALPHA[3]*R_PRES[4][z];
406 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
407 +ALPHA[1]*X_PRES[2][z]
408 +ALPHA[2]*X_PRES[3][z]
409 +ALPHA[3]*X_PRES[4][z];
410 }
411 } else if (order==5) {
412 #pragma omp parallel for private(z) schedule(static)
413 for (z=0;z<n;++z) {
414 R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
415 +ALPHA[1]*R_PRES[2][z]
416 +ALPHA[2]*R_PRES[3][z]
417 +ALPHA[3]*R_PRES[4][z]
418 +ALPHA[4]*R_PRES[5][z];
419 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
420 +ALPHA[1]*X_PRES[2][z]
421 +ALPHA[2]*X_PRES[3][z]
422 +ALPHA[3]*X_PRES[4][z]
423 +ALPHA[4]*X_PRES[5][z];
424 }
425 } else if (order==6) {
426 #pragma omp parallel for private(z) schedule(static)
427 for (z=0;z<n;++z) {
428 R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
429 +ALPHA[1]*R_PRES[2][z]
430 +ALPHA[2]*R_PRES[3][z]
431 +ALPHA[3]*R_PRES[4][z]
432 +ALPHA[4]*R_PRES[5][z]
433 +ALPHA[5]*R_PRES[6][z];
434 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
435 +ALPHA[1]*X_PRES[2][z]
436 +ALPHA[2]*X_PRES[3][z]
437 +ALPHA[3]*X_PRES[4][z]
438 +ALPHA[4]*X_PRES[5][z]
439 +ALPHA[5]*X_PRES[6][z];
440 }
441 } else if (order>6) {
442 #pragma omp parallel for private(z) schedule(static)
443 for (z=0;z<n;++z) {
444 R_PRES[0][z]=Factor*AP[z]+ALPHA[0]*R_PRES[1][z]
445 +ALPHA[1]*R_PRES[2][z]
446 +ALPHA[2]*R_PRES[3][z]
447 +ALPHA[3]*R_PRES[4][z]
448 +ALPHA[4]*R_PRES[5][z]
449 +ALPHA[5]*R_PRES[6][z]
450 +ALPHA[6]*R_PRES[7][z];
451 X_PRES[0][z]=-Factor*P_PRES[1][z]+ALPHA[0]*X_PRES[1][z]
452 +ALPHA[1]*X_PRES[2][z]
453 +ALPHA[2]*X_PRES[3][z]
454 +ALPHA[3]*X_PRES[4][z]
455 +ALPHA[4]*X_PRES[5][z]
456 +ALPHA[5]*X_PRES[6][z]
457 +ALPHA[6]*X_PRES[7][z];
458 }
459 for (i=7;i<order;++i) {
460 #pragma omp parallel for private(z) schedule(static)
461 for (z=0;z<n;++z) {
462 R_PRES[0][z]+=ALPHA[i]*R_PRES[i+1][z];
463 X_PRES[0][z]+=ALPHA[i]*X_PRES[i+1][z];
464 }
465 }
466 }
467 if (breakf0>0.) {
468 /***
469 ***** calculate gamma from min_(gamma){|R+gamma*(R_PRES-R)|_2}:
470 ***/
471 SC1=ZERO;
472 SC2=ZERO;
473 L2_R=ZERO;
474 #pragma omp parallel for private(z,diff) reduction(+:SC1,SC2) schedule(static)
475 for (z=0;z<n;++z) {
476 diff=R_PRES[0][z]-r[z];
477 SC1+=diff*diff;
478 SC2+=diff*r[z];
479 }
480 gamma=(SC1<=ZERO) ? ZERO : -SC2/SC1;
481 #pragma omp parallel for private(z) reduction(+:L2_R) schedule(static)
482 for (z=0;z<n;++z) {
483 x[z]+=gamma*(X_PRES[0][z]-x[z]);
484 r[z]+=gamma*(R_PRES[0][z]-r[z]);
485 L2_R+=r[z]*r[z];
486 }
487 norm_of_residual=sqrt(L2_R);
488 convergeFlag = (norm_of_residual <= tol);
489 if (restart>0) restartFlag=(num_iter_restart >= restart);
490 } else {
491 convergeFlag=FALSE;
492 restartFlag=FALSE;
493 }
494 maxIterFlag = (num_iter >= maxit);
495 }
496 }
497 /* end of iteration */
498 Norm_of_residual_global=norm_of_residual;
499 Num_iter_global=num_iter;
500 if (maxIterFlag) {
501 Status = SOLVER_MAXITER_REACHED;
502 } else if (breakFlag) {
503 Status = SOLVER_BREAKDOWN;
504 }
505 }
506 for (i=0;i<Length_of_recursion;i++) {
507 TMPMEMFREE(X_PRES[i]);
508 TMPMEMFREE(R_PRES[i]);
509 TMPMEMFREE(P_PRES[i]);
510 }
511 TMPMEMFREE(AP);
512 TMPMEMFREE(X_PRES);
513 TMPMEMFREE(R_PRES);
514 TMPMEMFREE(P_PRES);
515 TMPMEMFREE(P_PRES_dot_AP);
516 TMPMEMFREE(R_PRES_dot_P_PRES);
517 TMPMEMFREE(BREAKF);
518 TMPMEMFREE(ALPHA);
519 *iter=Num_iter_global;
520 *tolerance=Norm_of_residual_global;
521 return Status;
522 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26