1 |
gross |
1476 |
/* $Id:$ */ |
2 |
|
|
|
3 |
|
|
/******************************************************* |
4 |
|
|
* |
5 |
|
|
* Copyright 2008 by University of Queensland |
6 |
|
|
* |
7 |
|
|
* http://esscc.uq.edu.au |
8 |
|
|
* Primary Business: Queensland, Australia |
9 |
|
|
* Licensed under the Open Software License version 3.0 |
10 |
|
|
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
|
|
* |
12 |
|
|
*******************************************************/ |
13 |
|
|
/* |
14 |
|
|
* Purpose |
15 |
|
|
* ======= |
16 |
|
|
* |
17 |
|
|
* FGMRES solves the non-linear system f0+J_0*d=0 |
18 |
|
|
* where f0=F(x0), J_0 is the jacobian of F at x0. |
19 |
|
|
* |
20 |
|
|
* Convergence test: norm(f0+J_0*d)<=tolerance*norm(f0) |
21 |
|
|
* |
22 |
|
|
* Arguments |
23 |
|
|
* ========= |
24 |
|
|
* |
25 |
|
|
* Paso_Function * F evaluation of F (including any preconditioner) |
26 |
|
|
* |
27 |
|
|
* x0 (input) current point |
28 |
|
|
* |
29 |
|
|
* f0 (input) function F at current point x0 |
30 |
|
|
* |
31 |
|
|
* d (output) solution of f0+J0*d=0 with accuracy tolerance |
32 |
|
|
* |
33 |
|
|
* iter (input/output) |
34 |
|
|
* On input, the maximum num_iterations to be performed. |
35 |
|
|
* On output, actual number of num_iterations performed. |
36 |
|
|
* tolerance (input/output) |
37 |
|
|
* On input, the allowable convergence measure for norm(f0+J0*d)/norm(f0) |
38 |
|
|
* On output, the final value for norm(f0+J0*d) |
39 |
|
|
* return value |
40 |
|
|
* |
41 |
|
|
* =SOLVER_NO_ERROR: Successful exit. approximate solution returned. |
42 |
|
|
* =SOLVER_MAXNUM_ITER_REACHED |
43 |
|
|
* =SOLVER_INPUT_ERROR Illegal parameter: |
44 |
|
|
* =SOLVER_BREAKDOWN: bad luck! |
45 |
|
|
* =SOLVER_MEMORY_ERROR : no memory available |
46 |
|
|
* |
47 |
|
|
* ============================================================== |
48 |
|
|
*/ |
49 |
|
|
|
50 |
|
|
#include "Common.h" |
51 |
|
|
#include "Solver.h" |
52 |
gross |
1477 |
#include "Util.h" |
53 |
gross |
1476 |
#ifdef _OPENMP |
54 |
|
|
#include <omp.h> |
55 |
|
|
#endif |
56 |
|
|
|
57 |
|
|
err_t Paso_Solver_NLGMRES( |
58 |
|
|
Paso_Function * F, |
59 |
|
|
const double* f0, |
60 |
|
|
const double* x0, |
61 |
|
|
double * x, |
62 |
|
|
dim_t *iter, |
63 |
|
|
double* tolerance, |
64 |
|
|
Paso_Performance* pp) |
65 |
|
|
{ |
66 |
|
|
double static RENORMALIZATION_CONST=0.001; |
67 |
|
|
dim_t l=(*iter)+1, iter_max=*iter,k=0,n=F->local_n,i,j; |
68 |
|
|
double rel_tol=*tolerance, abs_tol, normf0, normv, normv2, hh, hr, nu, norm_of_residual=0.; |
69 |
|
|
bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE; |
70 |
|
|
double *h=NULL, **v=NULL, *c=NULL,*s=NULL,*g=NULL, *work=NULL; |
71 |
|
|
err_t Status=SOLVER_NO_ERROR; |
72 |
|
|
|
73 |
|
|
/* Test the input parameters. */ |
74 |
|
|
if (n < 0 || iter_max<=0 || l<1 || rel_tol<0) { |
75 |
|
|
return SOLVER_INPUT_ERROR; |
76 |
|
|
} |
77 |
|
|
Paso_zeroes(n,x); |
78 |
|
|
/* |
79 |
|
|
* allocate memory: |
80 |
|
|
*/ |
81 |
|
|
h=TMPMEMALLOC(l*l,double); |
82 |
|
|
v=TMPMEMALLOC(iter_max,double*); |
83 |
|
|
c=TMPMEMALLOC(l,double); |
84 |
|
|
s=TMPMEMALLOC(l,double); |
85 |
|
|
g=TMPMEMALLOC(l,double); |
86 |
|
|
work=TMPMEMALLOC(n,double); |
87 |
|
|
|
88 |
|
|
if (h==NULL || v ==NULL || c== NULL || s == NULL || g== NULL || work==NULL) { |
89 |
|
|
Status=SOLVER_MEMORY_ERROR; |
90 |
|
|
} else { |
91 |
|
|
for (i=0;i<iter_max;i++) v[i]=NULL; |
92 |
|
|
for (i=0;i<iter_max;i++) { |
93 |
|
|
v[i]=TMPMEMALLOC(n,double); |
94 |
|
|
if (v[i]==NULL) { |
95 |
|
|
Status=SOLVER_MEMORY_ERROR; |
96 |
|
|
break; |
97 |
|
|
} |
98 |
|
|
} |
99 |
|
|
} |
100 |
|
|
if ( Status ==SOLVER_NO_ERROR ) { |
101 |
|
|
/* |
102 |
|
|
* the show begins: |
103 |
|
|
*/ |
104 |
|
|
normf0=Paso_l2(n,f0,F->mpi_info); |
105 |
|
|
k=0; |
106 |
|
|
convergeFlag=(ABS(normf0)<=0); |
107 |
|
|
if (! convergeFlag) { |
108 |
|
|
abs_tol=rel_tol*normf0; |
109 |
|
|
Paso_zeroes(n,v[0]); |
110 |
|
|
Paso_Update(n,1.,v[0],-1./normf0,f0); |
111 |
|
|
g[0]=normf0; |
112 |
|
|
while(! (breakFlag || maxIterFlag || convergeFlag)) { |
113 |
|
|
k++; |
114 |
|
|
/* |
115 |
|
|
* call directional derivative function |
116 |
|
|
*/ |
117 |
|
|
Paso_FunctionDerivative(v[k],v[k-1],F,f0,x0,work); |
118 |
|
|
normv=Paso_l2(n,v[k],F->mpi_info); |
119 |
|
|
/* |
120 |
|
|
* Modified Gram-Schmidt |
121 |
|
|
*/ |
122 |
|
|
for (j=0;j<k;j++){ |
123 |
|
|
hh=Paso_InnerProduct(n,v[j],v[k],F->mpi_info); |
124 |
|
|
Paso_Update(n,1.,v[k],(-hh),v[j]); |
125 |
|
|
h[INDEX2(j,k-1,l)]=hh; |
126 |
|
|
} |
127 |
|
|
normv2=Paso_l2(n,v[k],F->mpi_info); |
128 |
|
|
h[INDEX2(k,k-1,l)]=normv2; |
129 |
|
|
/* |
130 |
|
|
* reorthogonalize |
131 |
|
|
*/ |
132 |
|
|
if (! (normv + RENORMALIZATION_CONST*normv2 > normv)) { |
133 |
|
|
for (j=0;j<k;j++){ |
134 |
|
|
hr=Paso_InnerProduct(n,v[j],v[k],F->mpi_info); |
135 |
|
|
h[INDEX2(j,k-1,l)]+=hr; |
136 |
|
|
Paso_Update(n,1.,v[k],(-hr),v[j]); |
137 |
|
|
} |
138 |
|
|
normv2=Paso_l2(n,v[k],F->mpi_info); |
139 |
|
|
h[INDEX2(k,k-1,l)]=normv2; |
140 |
|
|
} |
141 |
|
|
/* |
142 |
|
|
* watch out for happy breakdown |
143 |
|
|
*/ |
144 |
|
|
if(normv2 > 0.) { |
145 |
gross |
1478 |
Paso_Update(n,1./normv2,v[k],0.,v[k]); |
146 |
gross |
1476 |
} |
147 |
|
|
/* |
148 |
|
|
* Form and store the information for the new Givens rotation |
149 |
|
|
*/ |
150 |
gross |
1477 |
ApplyGivensRotations(k,&h[INDEX2(0,k-1,l)],c,s); |
151 |
gross |
1476 |
/* |
152 |
|
|
* Don't divide by zero if solution has been found |
153 |
|
|
*/ |
154 |
|
|
g[k]=0; |
155 |
|
|
nu=sqrt(h[INDEX2(k-1,k-1,l)]*h[INDEX2(k-1,k-1,l)]+h[INDEX2(k,k-1,l)]*h[INDEX2(k,k-1,l)]); |
156 |
|
|
if (nu>0) { |
157 |
|
|
c[k-1]=h[INDEX2(k-1,k-1,l)]/nu; |
158 |
|
|
s[k-1]=-h[k,k-1]/nu; |
159 |
|
|
h[INDEX2(k-1,k-1,l)]=c[k-1]*h[INDEX2(k-1,k-1,l)]-s[k-1]*h[k,k-1]; |
160 |
|
|
h[INDEX2(k,k-1,l)]=0; |
161 |
|
|
ApplyGivensRotations(2,&(g[k-1]),&(c[k-1]),&(s[k-1])); |
162 |
|
|
} |
163 |
|
|
norm_of_residual=abs(g[k]); |
164 |
|
|
maxIterFlag = (k>=iter_max); |
165 |
|
|
convergeFlag = (abs(g[k]) <= abs_tol); |
166 |
ksteube |
1512 |
printf("FGMRES step %d: error %e (tol=%e)\n",k,fabs(g[k]),abs_tol); |
167 |
gross |
1476 |
} |
168 |
|
|
} |
169 |
|
|
/* |
170 |
|
|
* all done and ready for the forward substitution: |
171 |
|
|
*/ |
172 |
|
|
|
173 |
|
|
for (i=0;i<k;i++) { |
174 |
|
|
for (j=0;j<i;j++) { |
175 |
|
|
g[i]-=h[INDEX2(j,i,l)]*g[j]; |
176 |
|
|
} |
177 |
|
|
g[i]/=h[INDEX2(i,i,l)]; |
178 |
gross |
1477 |
Paso_Update(n,1.,x,g[i],v[i]); |
179 |
gross |
1476 |
} |
180 |
|
|
} |
181 |
|
|
/* |
182 |
|
|
* clean up: |
183 |
|
|
*/ |
184 |
|
|
if ( v !=NULL) { |
185 |
|
|
for (i=0;i<iter_max;i++) TMPMEMFREE(v); |
186 |
|
|
} |
187 |
|
|
TMPMEMFREE(h); |
188 |
|
|
TMPMEMFREE(v); |
189 |
|
|
TMPMEMFREE(c); |
190 |
|
|
TMPMEMFREE(s); |
191 |
|
|
TMPMEMFREE(g); |
192 |
|
|
TMPMEMFREE(work); |
193 |
|
|
*iter=k; |
194 |
|
|
*tolerance=norm_of_residual; |
195 |
|
|
return Status; |
196 |
|
|
} |
197 |
|
|
|