1 |
/* $Id:$ */ |
2 |
|
3 |
/******************************************************* |
4 |
* |
5 |
* Copyright 2008 by University of Queensland |
6 |
* |
7 |
* http://esscc.uq.edu.au |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
/* |
14 |
* Purpose |
15 |
* ======= |
16 |
* |
17 |
* FGMRES solves the non-linear system f0+J_0*d=0 |
18 |
* where f0=F(x0), J_0 is the jacobian of F at x0. |
19 |
* |
20 |
* Convergence test: norm(f0+J_0*d)<=tolerance*norm(f0) |
21 |
* |
22 |
* Arguments |
23 |
* ========= |
24 |
* |
25 |
* Paso_Function * F evaluation of F (including any preconditioner) |
26 |
* |
27 |
* x0 (input) current point |
28 |
* |
29 |
* f0 (input) function F at current point x0 |
30 |
* |
31 |
* d (output) solution of f0+J0*d=0 with accuracy tolerance |
32 |
* |
33 |
* iter (input/output) |
34 |
* On input, the maximum num_iterations to be performed. |
35 |
* On output, actual number of num_iterations performed. |
36 |
* tolerance (input/output) |
37 |
* On input, the allowable convergence measure for norm(f0+J0*d)/norm(f0) |
38 |
* On output, the final value for norm(f0+J0*d) |
39 |
* return value |
40 |
* |
41 |
* =SOLVER_NO_ERROR: Successful exit. approximate solution returned. |
42 |
* =SOLVER_MAXNUM_ITER_REACHED |
43 |
* =SOLVER_INPUT_ERROR Illegal parameter: |
44 |
* =SOLVER_BREAKDOWN: bad luck! |
45 |
* =SOLVER_MEMORY_ERROR : no memory available |
46 |
* |
47 |
* ============================================================== |
48 |
*/ |
49 |
|
50 |
#include "Common.h" |
51 |
#include "Solver.h" |
52 |
#include "Util.h" |
53 |
#ifdef _OPENMP |
54 |
#include <omp.h> |
55 |
#endif |
56 |
|
57 |
err_t Paso_Solver_NLGMRES( |
58 |
Paso_Function * F, |
59 |
const double* f0, |
60 |
const double* x0, |
61 |
double * x, |
62 |
dim_t *iter, |
63 |
double* tolerance, |
64 |
Paso_Performance* pp) |
65 |
{ |
66 |
double static RENORMALIZATION_CONST=0.001; |
67 |
dim_t l=(*iter)+1, iter_max=*iter,k=0,n=F->local_n,i,j; |
68 |
double rel_tol=*tolerance, abs_tol, normf0, normv, normv2, hh, hr, nu, norm_of_residual=0.; |
69 |
bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE; |
70 |
double *h=NULL, **v=NULL, *c=NULL,*s=NULL,*g=NULL, *work=NULL; |
71 |
err_t Status=SOLVER_NO_ERROR; |
72 |
|
73 |
/* Test the input parameters. */ |
74 |
if (n < 0 || iter_max<=0 || l<1 || rel_tol<0) { |
75 |
return SOLVER_INPUT_ERROR; |
76 |
} |
77 |
Paso_zeroes(n,x); |
78 |
/* |
79 |
* allocate memory: |
80 |
*/ |
81 |
h=TMPMEMALLOC(l*l,double); |
82 |
v=TMPMEMALLOC(iter_max,double*); |
83 |
c=TMPMEMALLOC(l,double); |
84 |
s=TMPMEMALLOC(l,double); |
85 |
g=TMPMEMALLOC(l,double); |
86 |
work=TMPMEMALLOC(n,double); |
87 |
|
88 |
if (h==NULL || v ==NULL || c== NULL || s == NULL || g== NULL || work==NULL) { |
89 |
Status=SOLVER_MEMORY_ERROR; |
90 |
} else { |
91 |
for (i=0;i<iter_max;i++) v[i]=NULL; |
92 |
for (i=0;i<iter_max;i++) { |
93 |
v[i]=TMPMEMALLOC(n,double); |
94 |
if (v[i]==NULL) { |
95 |
Status=SOLVER_MEMORY_ERROR; |
96 |
break; |
97 |
} |
98 |
} |
99 |
} |
100 |
if ( Status ==SOLVER_NO_ERROR ) { |
101 |
/* |
102 |
* the show begins: |
103 |
*/ |
104 |
normf0=Paso_l2(n,f0,F->mpi_info); |
105 |
k=0; |
106 |
convergeFlag=(ABS(normf0)<=0); |
107 |
if (! convergeFlag) { |
108 |
abs_tol=rel_tol*normf0; |
109 |
Paso_zeroes(n,v[0]); |
110 |
Paso_Update(n,1.,v[0],-1./normf0,f0); |
111 |
g[0]=normf0; |
112 |
while(! (breakFlag || maxIterFlag || convergeFlag)) { |
113 |
k++; |
114 |
/* |
115 |
* call directional derivative function |
116 |
*/ |
117 |
Paso_FunctionDerivative(v[k],v[k-1],F,f0,x0,work); |
118 |
normv=Paso_l2(n,v[k],F->mpi_info); |
119 |
/* |
120 |
* Modified Gram-Schmidt |
121 |
*/ |
122 |
for (j=0;j<k;j++){ |
123 |
hh=Paso_InnerProduct(n,v[j],v[k],F->mpi_info); |
124 |
Paso_Update(n,1.,v[k],(-hh),v[j]); |
125 |
h[INDEX2(j,k-1,l)]=hh; |
126 |
} |
127 |
normv2=Paso_l2(n,v[k],F->mpi_info); |
128 |
h[INDEX2(k,k-1,l)]=normv2; |
129 |
/* |
130 |
* reorthogonalize |
131 |
*/ |
132 |
if (! (normv + RENORMALIZATION_CONST*normv2 > normv)) { |
133 |
for (j=0;j<k;j++){ |
134 |
hr=Paso_InnerProduct(n,v[j],v[k],F->mpi_info); |
135 |
h[INDEX2(j,k-1,l)]+=hr; |
136 |
Paso_Update(n,1.,v[k],(-hr),v[j]); |
137 |
} |
138 |
normv2=Paso_l2(n,v[k],F->mpi_info); |
139 |
h[INDEX2(k,k-1,l)]=normv2; |
140 |
} |
141 |
/* |
142 |
* watch out for happy breakdown |
143 |
*/ |
144 |
if(normv2 > 0.) { |
145 |
Paso_Update(n,1./normv2,v[k],0.,v[k]); |
146 |
} |
147 |
/* |
148 |
* Form and store the information for the new Givens rotation |
149 |
*/ |
150 |
ApplyGivensRotations(k,&h[INDEX2(0,k-1,l)],c,s); |
151 |
/* |
152 |
* Don't divide by zero if solution has been found |
153 |
*/ |
154 |
g[k]=0; |
155 |
nu=sqrt(h[INDEX2(k-1,k-1,l)]*h[INDEX2(k-1,k-1,l)]+h[INDEX2(k,k-1,l)]*h[INDEX2(k,k-1,l)]); |
156 |
if (nu>0) { |
157 |
c[k-1]=h[INDEX2(k-1,k-1,l)]/nu; |
158 |
s[k-1]=-h[k,k-1]/nu; |
159 |
h[INDEX2(k-1,k-1,l)]=c[k-1]*h[INDEX2(k-1,k-1,l)]-s[k-1]*h[k,k-1]; |
160 |
h[INDEX2(k,k-1,l)]=0; |
161 |
ApplyGivensRotations(2,&(g[k-1]),&(c[k-1]),&(s[k-1])); |
162 |
} |
163 |
norm_of_residual=abs(g[k]); |
164 |
maxIterFlag = (k>=iter_max); |
165 |
convergeFlag = (abs(g[k]) <= abs_tol); |
166 |
printf("FGMRES step %d: error %e (tol=%e)\n",k,fabs(g[k]),abs_tol); |
167 |
} |
168 |
} |
169 |
/* |
170 |
* all done and ready for the forward substitution: |
171 |
*/ |
172 |
|
173 |
for (i=0;i<k;i++) { |
174 |
for (j=0;j<i;j++) { |
175 |
g[i]-=h[INDEX2(j,i,l)]*g[j]; |
176 |
} |
177 |
g[i]/=h[INDEX2(i,i,l)]; |
178 |
Paso_Update(n,1.,x,g[i],v[i]); |
179 |
} |
180 |
} |
181 |
/* |
182 |
* clean up: |
183 |
*/ |
184 |
if ( v !=NULL) { |
185 |
for (i=0;i<iter_max;i++) TMPMEMFREE(v); |
186 |
} |
187 |
TMPMEMFREE(h); |
188 |
TMPMEMFREE(v); |
189 |
TMPMEMFREE(c); |
190 |
TMPMEMFREE(s); |
191 |
TMPMEMFREE(g); |
192 |
TMPMEMFREE(work); |
193 |
*iter=k; |
194 |
*tolerance=norm_of_residual; |
195 |
return Status; |
196 |
} |
197 |
|