# Diff of /branches/doubleplusgood/paso/src/TFQMR.cpp

revision 1708 by ksteube, Thu Aug 14 22:42:24 2008 UTC revision 4154 by jfenwick, Tue Jan 22 09:30:23 2013 UTC
# Line 1  Line 1
1
2  /*******************************************************  /*****************************************************************************
3   *  *
4   *           Copyright 2003-2007 by ACceSS MNRF  * Copyright (c) 2003-2013 by University of Queensland
5   *       Copyright 2007 by University of Queensland  * http://www.uq.edu.au
6   *  *
7   *                http://esscc.uq.edu.au  * Primary Business: Queensland, Australia
11   *  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12   *******************************************************/  * Development since 2012 by School of Earth Sciences
13    *
14    *****************************************************************************/
15
16
17  /* TFQMR iterations */  /* TFQMR iterations */
18
# Line 22  Line 25
25  #include <omp.h>  #include <omp.h>
26  #endif  #endif
27
28  #ifdef PASO_MPI  #ifdef ESYS_MPI
29  #include <mpi.h>  #include <mpi.h>
30  #endif  #endif
31
# Line 40  Line 43
43  *  =========  *  =========
44  *  *
45  *  r       (input) DOUBLE PRECISION array, dimension N.  *  r       (input) DOUBLE PRECISION array, dimension N.
46  *          On entry, residual of inital guess x  *
47  *  *
48  *  x       (input/output) DOUBLE PRECISION array, dimension N.  *  x       (input/output) DOUBLE PRECISION array, dimension N.
49  *          On input, the initial guess.  *
50  *  *
51  *  ITER    (input/output) INT  *  ITER    (input/output) INT
52  *          On input, the maximum iterations to be performed.  *          On input, the maximum iterations to be performed.
# Line 52  Line 55
55  *  INFO    (output) INT  *  INFO    (output) INT
56  *  *
57  *          = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.  *          = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
58  *          = SOLVEr_MAXITER_REACHED  *          = SOLVER_MAXITER_REACHED
59  *          = SOLVER_INPUT_ERROR Illegal parameter:  *          = SOLVER_INPUT_ERROR Illegal parameter:
60  *          = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller  *          = SOLVER_BREAKDOWN: If parameters RHO or OMEGA become smaller
61  *          = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller  *          = SOLVER_MEMORY_ERROR : If parameters RHO or OMEGA become smaller
62  *  *
63  *  ==============================================================  *  ==============================================================
64  */  */
# Line 83  err_t Paso_Solver_TFQMR( Line 86  err_t Paso_Solver_TFQMR(
86    bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;    bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
87    err_t status = SOLVER_NO_ERROR;    err_t status = SOLVER_NO_ERROR;
88    dim_t n = Paso_SystemMatrix_getTotalNumRows(A);    dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
89    double  *u1=NULL, *u2=NULL, *y1=NULL, *y2=NULL, *d=NULL, *w=NULL, *v=NULL, *v_old=NULL, *tmp=NULL ;    double  *u1=NULL, *u2=NULL, *y1=NULL, *y2=NULL, *d=NULL, *w=NULL, *v=NULL, *temp_vector=NULL,*res=NULL;
90
91    double eta,theta,tau,rho,beta,alpha,sigma,rhon,c;    double eta,theta,tau,rho,beta,alpha,sigma,rhon,c;
92
# Line 103  err_t Paso_Solver_TFQMR( Line 106  err_t Paso_Solver_TFQMR(
106    d=TMPMEMALLOC(n,double);    d=TMPMEMALLOC(n,double);
107    w=TMPMEMALLOC(n,double);    w=TMPMEMALLOC(n,double);
108    v=TMPMEMALLOC(n,double);    v=TMPMEMALLOC(n,double);
109    v_old=TMPMEMALLOC(n,double);    temp_vector=TMPMEMALLOC(n,double);
110        res=TMPMEMALLOC(n,double);

tmp=TMPMEMALLOC(n,double);

111
112   if (u1 ==NULL || u2== NULL || y1 == NULL || y2== NULL || d==NULL || w==NULL || v==NULL || v_old==NULL) {   if (u1 ==NULL || u2== NULL || y1 == NULL || y2== NULL || d==NULL || w==NULL || v==NULL ) {
113       status=SOLVER_MEMORY_ERROR;       status=SOLVER_MEMORY_ERROR;
114    }    }
115
# Line 120  err_t Paso_Solver_TFQMR( Line 120  err_t Paso_Solver_TFQMR(
120      status=SOLVER_INPUT_ERROR;      status=SOLVER_INPUT_ERROR;
121    }    }
122
123      Paso_zeroes(n,x);
124
125    Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);    Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
126    Paso_Solver_solvePreconditioner(A,r,r);    Paso_SystemMatrix_solvePreconditioner(A,res,r);
127    Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);    Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
128
129    Performance_startMonitor(pp,PERFORMANCE_SOLVER);    Performance_startMonitor(pp,PERFORMANCE_SOLVER);
# Line 129  err_t Paso_Solver_TFQMR( Line 131  err_t Paso_Solver_TFQMR(
131    Paso_zeroes(n,u2);    Paso_zeroes(n,u2);
132    Paso_zeroes(n,y2);    Paso_zeroes(n,y2);
133
134    Paso_Copy(n,w,r);    Paso_Copy(n,w,res);
135    Paso_Copy(n,y1,r);    Paso_Copy(n,y1,res);
136
137    Paso_zeroes(n,d);    Paso_zeroes(n,d);
138
139    Performance_stopMonitor(pp,PERFORMANCE_SOLVER);    Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
140    Performance_startMonitor(pp,PERFORMANCE_MVM);    Performance_startMonitor(pp,PERFORMANCE_MVM);
141    Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y1,ZERO,v);    Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,temp_vector);
142    Performance_stopMonitor(pp,PERFORMANCE_MVM);    Performance_stopMonitor(pp,PERFORMANCE_MVM);
143    Performance_startMonitor(pp,PERFORMANCE_SOLVER);    Performance_startMonitor(pp,PERFORMANCE_SOLVER);
144
145    Performance_stopMonitor(pp,PERFORMANCE_SOLVER);    Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
146    Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);    Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
147    Paso_Solver_solvePreconditioner(A,v,v);    Paso_SystemMatrix_solvePreconditioner(A,v,temp_vector);
148    Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);    Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
149    Performance_startMonitor(pp,PERFORMANCE_SOLVER);    Performance_startMonitor(pp,PERFORMANCE_SOLVER);
150      /* v = P^{-1} * A y1 */
151
152    Paso_Copy(n,u1,v);    Paso_Copy(n,u1,v);
153
154    theta = 0.0;    theta = 0.0;
155    eta = 0.0;    eta = 0.0;
156
157    tau = Paso_l2(n,r,A->mpi_info);    tau = Paso_l2(n,res,A->mpi_info);
158
159    rho = tau * tau;    rho = tau * tau;
160
161    norm_of_residual=tau*sqrt ( m + 1 );    norm_of_residual=tau;
162
163    while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) ))    while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) ))
164    {    {
165
166
167       sigma=Paso_InnerProduct(n,r,v,A->mpi_info);       sigma=Paso_InnerProduct(n,res,v,A->mpi_info);
168
169       if (! (breakFlag = (ABS(sigma) == 0.))) {       if (! (breakFlag = (ABS(sigma) == 0.))) {
170
# Line 171  err_t Paso_Solver_TFQMR( Line 174  err_t Paso_Solver_TFQMR(
174         {         {
175           /*  Compute y2 and u2 only if you have to */           /*  Compute y2 and u2 only if you have to */
176           if ( j == 1 ){           if ( j == 1 ){
177            Paso_LinearCombination(n,y2,1.,y1,-alpha,v);            Paso_LinearCombination(n,y2,PASO_ONE,y1,-alpha,v); /* y2 = y1 - alpha*v */
178
179            Performance_stopMonitor(pp,PERFORMANCE_SOLVER);            Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
180            Performance_startMonitor(pp,PERFORMANCE_MVM);            Performance_startMonitor(pp,PERFORMANCE_MVM);
181            Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y2,ZERO,u2);            Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y2,PASO_ZERO,temp_vector);
182            Performance_stopMonitor(pp,PERFORMANCE_MVM);            Performance_stopMonitor(pp,PERFORMANCE_MVM);
183            Performance_startMonitor(pp,PERFORMANCE_SOLVER);            Performance_startMonitor(pp,PERFORMANCE_SOLVER);
184
185            Performance_stopMonitor(pp,PERFORMANCE_SOLVER);            Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
186            Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);            Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
187            Paso_Solver_solvePreconditioner(A,u2,u2);        Paso_SystemMatrix_solvePreconditioner(A,u2,temp_vector);
188            Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);            Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
189            Performance_startMonitor(pp,PERFORMANCE_SOLVER);            Performance_startMonitor(pp,PERFORMANCE_SOLVER);
190              /* u2 = P^{-1} * A y2 */
191           }           }
192           m = 2 * (num_iter+1) - 2 + (j+1);           m = 2 * (num_iter+1) - 2 + (j+1);
193
194            if (j==0) {            if (j==0) {
195              Paso_Update(n,1.,w,-alpha,u1);              Paso_Update(n,1.,w,-alpha,u1); /* w = w - alpha * u1 */
196              Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y1);              Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y1); /* d = ( theta * theta * eta / alpha )*d + y1 */
197            }            }
198            if (j==1) {            if (j==1) {
199              Paso_Update(n,1.,w,-alpha,u2);              Paso_Update(n,1.,w,-alpha,u2);  /* w = w - -alpha * u2 */
200              Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y2);              Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y2); /* d = ( theta * theta * eta / alpha )*d + y2 */
201            }            }
202
203           theta =Paso_l2(n,w,A->mpi_info)/tau;           theta =Paso_l2(n,w,A->mpi_info)/tau;
204           c = 1.0 / sqrt ( 1.0 + theta * theta );           /*printf("tau = %e, %e %e\n",tau, Paso_l2(n,w,A->mpi_info)/tau, theta);*/
205             c = PASO_ONE / sqrt ( PASO_ONE + theta * theta );
206           tau = tau * theta * c;           tau = tau * theta * c;
207           eta = c * c * alpha;           eta = c * c * alpha;
208           Paso_Update(n,1.,x,eta,d);                 Paso_Update(n,1.,x,eta,d);
# Line 204  err_t Paso_Solver_TFQMR( Line 210  err_t Paso_Solver_TFQMR(
210
211       breakFlag = (ABS(rho) == 0);       breakFlag = (ABS(rho) == 0);
212
213       rhon = Paso_InnerProduct(n,r,w,A->mpi_info);       rhon = Paso_InnerProduct(n,res,w,A->mpi_info);
214       beta = rhon / rho;       beta = rhon / rho;
215       rho = rhon;       rho = rhon;
216
217       Paso_LinearCombination(n,y1,1.,w,beta,y2);       Paso_LinearCombination(n,y1, PASO_ONE,w,beta,y2); /* y1 = w + beta * y2 */
218
219       Performance_stopMonitor(pp,PERFORMANCE_SOLVER);       Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
220       Performance_startMonitor(pp,PERFORMANCE_MVM);       Performance_startMonitor(pp,PERFORMANCE_MVM);
221       Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, y1,ZERO,u1);       Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,temp_vector);
222       Performance_stopMonitor(pp,PERFORMANCE_MVM);       Performance_stopMonitor(pp,PERFORMANCE_MVM);
Performance_startMonitor(pp,PERFORMANCE_SOLVER);
223
Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
224       Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);       Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
225       Paso_Solver_solvePreconditioner(A,u1,u1);       Paso_SystemMatrix_solvePreconditioner(A,u1,temp_vector);
226       Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);       Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
227       Performance_startMonitor(pp,PERFORMANCE_SOLVER);       Performance_startMonitor(pp,PERFORMANCE_SOLVER);
228         /*  u1 = P^{-1} * A y1 */
229
230       Paso_Copy(n,v_old,v);       Paso_LinearCombination(n,temp_vector,PASO_ONE,u2,beta,v); /* t = u2 + beta * v */
231                   Paso_LinearCombination(n,v,PASO_ONE,u1,beta,temp_vector); /* v = u1 + beta * t */
Paso_Update(n,beta,v_old,1,u2);
Paso_LinearCombination(n,v,1.,u1,beta,v_old);
232       }       }
233       maxIterFlag = (num_iter > maxit);       maxIterFlag = (num_iter > maxit);
234       norm_of_residual=tau*sqrt ( m + 1 );       norm_of_residual=tau*sqrt ( (double) (m + 1 ) );
235       convergeFlag=(norm_of_residual<(*tolerance));       convergeFlag=(norm_of_residual<(*tolerance));
236
237
# Line 249  err_t Paso_Solver_TFQMR( Line 252  err_t Paso_Solver_TFQMR(
252      TMPMEMFREE(d);      TMPMEMFREE(d);
253      TMPMEMFREE(w);      TMPMEMFREE(w);
254      TMPMEMFREE(v);      TMPMEMFREE(v);
255      TMPMEMFREE(v_old);      TMPMEMFREE(temp_vector);
256          TMPMEMFREE(res);
257      *iter=num_iter;      *iter=num_iter;
258      *tolerance=norm_of_residual;      *tolerance=norm_of_residual;
259

Legend:
 Removed from v.1708 changed lines Added in v.4154