1 |
|
2 |
/******************************************************* |
3 |
* |
4 |
* Copyright (c) 2003-2010 by University of Queensland |
5 |
* Earth Systems Science Computational Center (ESSCC) |
6 |
* http://www.uq.edu.au/esscc |
7 |
* |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
|
15 |
#include "Common.h" |
16 |
#include "Solver.h" |
17 |
#include "PasoUtil.h" |
18 |
#ifdef _OPENMP |
19 |
#include <omp.h> |
20 |
#endif |
21 |
|
22 |
err_t Paso_Solver_GMRES2( |
23 |
Paso_Function * F, |
24 |
const double* f0, |
25 |
const double* x0, |
26 |
double * dx, |
27 |
dim_t *iter, |
28 |
double* tolerance, |
29 |
Paso_Performance* pp) |
30 |
{ |
31 |
static double RENORMALIZATION_CONST=0.001; |
32 |
const dim_t l=(*iter)+1, iter_max=*iter; |
33 |
dim_t k=0,i,j; |
34 |
const dim_t n=F->n; |
35 |
const double rel_tol=*tolerance; |
36 |
double abs_tol, normf0, normv, normv2, hh, hr, nu, norm_of_residual=0.; |
37 |
bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE; |
38 |
double *h=NULL, **v=NULL, *c=NULL,*s=NULL,*g=NULL, *work=NULL; |
39 |
err_t Status=SOLVER_NO_ERROR; |
40 |
|
41 |
/* Test the input parameters. */ |
42 |
if (n < 0 || iter_max<=0 || l<1 || rel_tol<0) { |
43 |
return SOLVER_INPUT_ERROR; |
44 |
} |
45 |
Paso_zeroes(n,dx); |
46 |
/* |
47 |
* allocate memory: |
48 |
*/ |
49 |
h=TMPMEMALLOC(l*l,double); |
50 |
v=TMPMEMALLOC(l,double*); |
51 |
c=TMPMEMALLOC(l,double); |
52 |
s=TMPMEMALLOC(l,double); |
53 |
g=TMPMEMALLOC(l,double); |
54 |
work=TMPMEMALLOC(n,double); |
55 |
|
56 |
if (h==NULL || v ==NULL || c== NULL || s == NULL || g== NULL || work==NULL) { |
57 |
Status=SOLVER_MEMORY_ERROR; |
58 |
} |
59 |
if ( Status ==SOLVER_NO_ERROR ) { |
60 |
for (i=0;i<iter_max;i++) v[i]=NULL; |
61 |
/* |
62 |
* the show begins: |
63 |
*/ |
64 |
normf0=Paso_l2(n,f0,F->mpi_info); |
65 |
k=0; |
66 |
convergeFlag=(ABS(normf0)<=0); |
67 |
if (! convergeFlag) { |
68 |
abs_tol=rel_tol*normf0; |
69 |
printf("GMRES2 initial residual norm %e (rel. tol=%e)\n",normf0,rel_tol); |
70 |
v[0]=TMPMEMALLOC(n,double); |
71 |
if (v[0]==NULL) { |
72 |
Status=SOLVER_MEMORY_ERROR; |
73 |
} else { |
74 |
Paso_zeroes(n,v[0]); |
75 |
Paso_Update(n,1.,v[0],-1./normf0,f0); /* v = -1./normf0*f0 */ |
76 |
g[0]=normf0; |
77 |
} |
78 |
while( (! (breakFlag || maxIterFlag || convergeFlag)) && (Status ==SOLVER_NO_ERROR) ) { |
79 |
k++; |
80 |
v[k]=TMPMEMALLOC(n,double); |
81 |
if (v[k]==NULL) { |
82 |
Status=SOLVER_MEMORY_ERROR; |
83 |
} else { |
84 |
/* |
85 |
* call directional derivative function |
86 |
*/ |
87 |
Paso_FunctionDerivative(v[k],v[k-1],F,f0,x0,work,pp); |
88 |
normv=Paso_l2(n,v[k],F->mpi_info); |
89 |
/* |
90 |
* Modified Gram-Schmidt |
91 |
*/ |
92 |
for (j=0;j<k;j++){ |
93 |
hh=Paso_InnerProduct(n,v[j],v[k],F->mpi_info); |
94 |
Paso_Update(n,1.,v[k],(-hh),v[j]); /* v[k]-hh*v[j] */ |
95 |
h[INDEX2(j,k-1,l)]=hh; |
96 |
/* printf("%d : %d = %e\n",k,j,hh); */ |
97 |
} |
98 |
normv2=Paso_l2(n,v[k],F->mpi_info); |
99 |
h[INDEX2(k,k-1,l)]=normv2; |
100 |
/* |
101 |
* reorthogonalize |
102 |
*/ |
103 |
if (! (normv + RENORMALIZATION_CONST*normv2 > normv)) { |
104 |
/* printf("GMRES2: renormalization!"); */ |
105 |
for (j=0;j<k;j++){ |
106 |
hr=Paso_InnerProduct(n,v[j],v[k],F->mpi_info); |
107 |
|
108 |
h[INDEX2(j,k-1,l)]+=hr; |
109 |
Paso_Update(n,1.,v[k],(-hr),v[j]); |
110 |
} |
111 |
normv2=Paso_l2(n,v[k],F->mpi_info); |
112 |
h[INDEX2(k,k-1,l)]=normv2; |
113 |
} |
114 |
/* |
115 |
{int p,q; |
116 |
for (p=0;p<k+1;p++){ |
117 |
for (q=0;q<k+1;q++)printf("%e ",h[INDEX2(p,q,l)]); |
118 |
printf("\n"); |
119 |
|
120 |
} |
121 |
} |
122 |
*/ |
123 |
/* |
124 |
* watch out for happy breakdown |
125 |
*/ |
126 |
if(normv2 > 0.) { |
127 |
Paso_Update(n,1./normv2,v[k],0.,v[k]); /* normalize v[k] */ |
128 |
} |
129 |
/* |
130 |
* Form and store the information for the new Givens rotation |
131 |
*/ |
132 |
ApplyGivensRotations(k,&h[INDEX2(0,k-1,l)],c,s); |
133 |
|
134 |
/* |
135 |
* Don't divide by zero if solution has been found |
136 |
*/ |
137 |
g[k]=0; |
138 |
nu=sqrt(h[INDEX2(k-1,k-1,l)]*h[INDEX2(k-1,k-1,l)]+h[INDEX2(k,k-1,l)]*h[INDEX2(k,k-1,l)]); |
139 |
if (nu>0) { |
140 |
c[k-1]= h[INDEX2(k-1,k-1,l)]/nu; |
141 |
s[k-1]=-h[INDEX2(k,k-1,l)]/nu; |
142 |
h[INDEX2(k-1,k-1,l)]=c[k-1]*h[INDEX2(k-1,k-1,l)]-s[k-1]*h[INDEX2(k,k-1,l)]; |
143 |
h[INDEX2(k,k-1,l)]=0; |
144 |
ApplyGivensRotations(2,&(g[k-1]),&(c[k-1]),&(s[k-1])); |
145 |
} |
146 |
norm_of_residual=fabs(g[k]); |
147 |
maxIterFlag = (k>=iter_max); |
148 |
convergeFlag = (norm_of_residual <= abs_tol); |
149 |
printf("GMRES2 step %d: residual %e (abs. tol=%e)\n",k,fabs(g[k]),abs_tol); |
150 |
} |
151 |
} |
152 |
} |
153 |
/* |
154 |
* all done and ready for the forward substitution: |
155 |
*/ |
156 |
|
157 |
for (i=k-1;i>=0;--i) { |
158 |
for (j=i+1;j<k;j++) { |
159 |
g[i]-=h[INDEX2(i,j,l)]*g[j]; |
160 |
} |
161 |
g[i]/=h[INDEX2(i,i,l)]; |
162 |
Paso_Update(n,1.,dx,g[i],v[i]); /* dx = dx+g[i]*v[i] */ |
163 |
} |
164 |
} |
165 |
/* |
166 |
* clean up: |
167 |
*/ |
168 |
if ( v !=NULL) { |
169 |
for (i=0;i<iter_max;i++) TMPMEMFREE(v[i]); |
170 |
} |
171 |
TMPMEMFREE(h); |
172 |
TMPMEMFREE(v); |
173 |
TMPMEMFREE(c); |
174 |
TMPMEMFREE(s); |
175 |
TMPMEMFREE(g); |
176 |
TMPMEMFREE(work); |
177 |
*iter=k; |
178 |
*tolerance=norm_of_residual; |
179 |
return Status; |
180 |
} |
181 |
|