--- trunk/paso/src/Solvers/PCG.c 2006/02/06 06:32:06 495 +++ temp_trunk_copy/paso/src/PCG.c 2008/01/11 02:29:38 1384 @@ -1,16 +1,32 @@ + /* \$Id\$ */ +/******************************************************* + * + * Copyright 2003-2007 by ACceSS MNRF + * Copyright 2007 by University of Queensland + * + * http://esscc.uq.edu.au + * Primary Business: Queensland, Australia + * Licensed under the Open Software License version 3.0 + * http://www.opensource.org/licenses/osl-3.0.php + * + *******************************************************/ /* PCG iterations */ #include "SystemMatrix.h" #include "Paso.h" #include "Solver.h" -/* #include */ + #ifdef _OPENMP #include #endif +#ifdef PASO_MPI +#include +#endif + /* * * Purpose @@ -52,7 +68,8 @@ double * r, double * x, dim_t *iter, - double * tolerance) { + double * tolerance, + Paso_Performance* pp) { /* Local variables */ @@ -60,10 +77,10 @@ dim_t i0; bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE; err_t status = SOLVER_NO_ERROR; - dim_t n = A->num_cols * A-> col_block_size; + dim_t n = Paso_SystemMatrix_getTotalNumRows(A); double *resid = tolerance, *rs=NULL, *p=NULL, *v=NULL, *x2=NULL ; double tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,sum_1,sum_2,sum_3,sum_4,sum_5,tol; - double norm_of_residual,norm_of_residual_global; + double norm_of_residual,norm_of_residual_global, loc_sum[2], sum[2]; register double r_tmp,d,rs_tmp,x2_tmp,x_tmp; /* */ @@ -90,6 +107,7 @@ #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \ private(tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,norm_of_residual,num_iter) { + Performance_startMonitor(pp,PERFORMANCE_SOLVER); /* initialize data */ #pragma omp for private(i0) schedule(static) for (i0=0;i0mpi_info->comm); + } + #endif tau_old=tau; tau=sum_1; /* p=v+beta*p */ @@ -131,10 +162,22 @@ for (i0=0;i0mpi_info->comm); + } + #endif delta=sum_2; @@ -148,7 +191,17 @@ d=r[i0]-rs[i0]; sum_3+=d*d; sum_4+=d*rs[i0]; - } + } + #ifdef PASO_MPI + #pragma omp master + { + loc_sum[0] = sum_3; + loc_sum[1] = sum_4; + MPI_Allreduce(loc_sum, sum, 2, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm); + sum_3=sum[0]; + sum_4=sum[1]; + } + #endif gamma_1= ( (ABS(sum_3)<= ZERO) ? 0 : -sum_4/sum_3) ; gamma_2= ONE-gamma_1; #pragma omp for private(i0,x2_tmp,x_tmp,rs_tmp) schedule(static) @@ -159,6 +212,13 @@ } #pragma omp for private(i0) reduction(+:sum_5) schedule(static) for (i0=0;i0mpi_info->comm); + } + #endif norm_of_residual=sqrt(sum_5); convergeFlag = norm_of_residual <= tol; maxIterFlag = num_iter == maxit; @@ -175,7 +235,8 @@ } else if (breakFlag) { status = SOLVER_BREAKDOWN; } - } + } + Performance_stopMonitor(pp,PERFORMANCE_SOLVER); } /* end of parallel region */ TMPMEMFREE(rs); TMPMEMFREE(x2); @@ -187,16 +248,3 @@ /* End of PCG */ return status; } - -/* - * \$Log\$ - * Revision 1.2 2005/09/15 03:44:40 jgs - * Merge of development branch dev-02 back to main trunk on 2005-09-15 - * - * Revision 1.1.2.1 2005/09/05 06:29:49 gross - * These files have been extracted from finley to define a stand alone libray for iterative - * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but - * has not been tested yet. - * - * - */