--- trunk/paso/src/Solvers/PCG.c 2006/02/06 06:32:06 495 +++ branches/doubleplusgood/paso/src/PCG.cpp 2013/03/06 06:45:32 4280 @@ -1,4 +1,17 @@ -/* \$Id\$ */ + +/***************************************************************************** +* +* Copyright (c) 2003-2013 by University of Queensland +* http://www.uq.edu.au +* +* Primary Business: Queensland, Australia +* Licensed under the Open Software License version 3.0 +* http://www.opensource.org/licenses/osl-3.0.php +* +* Development until 2012 by Earth Systems Science Computational Center (ESSCC) +* Development since 2012 by School of Earth Sciences +* +*****************************************************************************/ /* PCG iterations */ @@ -6,18 +19,22 @@ #include "SystemMatrix.h" #include "Paso.h" #include "Solver.h" -/* #include */ + #ifdef _OPENMP #include #endif +#ifdef ESYS_MPI +#include +#endif + /* * * Purpose * ======= * * PCG solves the linear system A*x = b using the -* preconditioned conjugate gradient method plus a smoother +* preconditioned conjugate gradient method plus a smoother. * A has to be symmetric. * * Convergence test: norm( b - A*x )< TOL. @@ -27,7 +44,7 @@ * ========= * * r (input) DOUBLE PRECISION array, dimension N. -* On entry, residual of inital guess x +* On entry, residual of initial guess x. * * x (input/output) DOUBLE PRECISION array, dimension N. * On input, the initial guess. @@ -39,33 +56,65 @@ * INFO (output) INT * * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned. -* = SOLVEr_MAXITER_REACHED +* = SOLVER_MAXITER_REACHED * = SOLVER_INPUT_ERROR Illegal parameter: -* = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller -* = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller +* = SOLVER_BREAKDOWN: If parameters RHO or OMEGA become smaller +* = SOLVER_MEMORY_ERROR : If parameters RHO or OMEGA become smaller * * ============================================================== */ +/* #define PASO_DYNAMIC_SCHEDULING_MVM */ + +#if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP +#define USE_DYNAMIC_SCHEDULING +#endif + err_t Paso_Solver_PCG( Paso_SystemMatrix * A, double * r, double * x, dim_t *iter, - double * tolerance) { - + double * tolerance, + Paso_Performance* pp) { /* Local variables */ - dim_t num_iter=0,maxit,num_iter_global; - dim_t i0; + dim_t num_iter=0,maxit,num_iter_global, len,rest, np, ipp; +#ifdef USE_DYNAMIC_SCHEDULING + dim_t chunk_size=-1; +#endif + register double ss,ss1; + dim_t i0, istart, iend; bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE; err_t status = SOLVER_NO_ERROR; - dim_t n = A->num_cols * A-> col_block_size; + dim_t n = Paso_SystemMatrix_getTotalNumRows(A); double *resid = tolerance, *rs=NULL, *p=NULL, *v=NULL, *x2=NULL ; double tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,sum_1,sum_2,sum_3,sum_4,sum_5,tol; - double norm_of_residual,norm_of_residual_global; - register double r_tmp,d,rs_tmp,x2_tmp,x_tmp; +#ifdef ESYS_MPI + double loc_sum[2], sum[2]; +#endif + double norm_of_residual=0,norm_of_residual_global; + register double d; + + /* There should not be any executable code before this ifdef */ +#ifdef USE_DYNAMIC_SCHEDULING + + /* Watch out for these declarations (see above) */ + char* chksz_chr; + dim_t n_chunks; + + chksz_chr=getenv("PASO_CHUNK_SIZE_PCG"); + if (chksz_chr!=NULL) sscanf(chksz_chr, "%d",&chunk_size); + np=omp_get_max_threads(); + chunk_size=MIN(MAX(1,chunk_size),n/np); + n_chunks=n/chunk_size; + if (n_chunks*chunk_sizempi_info->comm); + #endif tau_old=tau; tau=sum_1; /* p=v+beta*p */ - if (num_iter==1) { - #pragma omp for private(i0) schedule(static) - for (i0=0;i0mpi_info->comm); + #endif delta=sum_2; - + alpha=tau/delta; if (! (breakFlag = (ABS(delta) <= TOLERANCE_FOR_SCALARS))) { - alpha=tau/delta; /* smoother */ - #pragma omp for private(i0) schedule(static) - for (i0=0;i0mpi_info->comm); + sum_3=sum[0]; + sum_4=sum[1]; + #endif + sum_5 = 0; + #pragma omp parallel private(i0, istart, iend, ipp, ss, gamma_1,gamma_2) + { + gamma_1= ( (ABS(sum_3)<= PASO_ZERO) ? 0 : -sum_4/sum_3) ; + gamma_2= PASO_ONE-gamma_1; + ss=0; + #ifdef USE_DYNAMIC_SCHEDULING + #pragma omp for schedule(dynamic, 1) + for (ipp=0; ipp < n_chunks; ++ipp) { + istart=chunk_size*ipp; + iend=MIN(istart+chunk_size,n); + #else + #pragma omp for schedule(static) + for (ipp=0; ipp mpi_info->comm); + #endif norm_of_residual=sqrt(sum_5); convergeFlag = norm_of_residual <= tol; - maxIterFlag = num_iter == maxit; + maxIterFlag = num_iter > maxit; breakFlag = (ABS(tau) <= TOLERANCE_FOR_SCALARS); } - } - /* end of iteration */ - #pragma omp master - { - num_iter_global=num_iter; - norm_of_residual_global=norm_of_residual; - if (maxIterFlag) { - status = SOLVER_MAXITER_REACHED; - } else if (breakFlag) { - status = SOLVER_BREAKDOWN; - } - } - } /* end of parallel region */ - TMPMEMFREE(rs); - TMPMEMFREE(x2); - TMPMEMFREE(v); - TMPMEMFREE(p); + } + /* end of iteration */ + num_iter_global=num_iter; + norm_of_residual_global=norm_of_residual; + if (maxIterFlag) { + status = SOLVER_MAXITER_REACHED; + } else if (breakFlag) { + status = SOLVER_BREAKDOWN; + } + Performance_stopMonitor(pp,PERFORMANCE_SOLVER); + delete[] rs; + delete[] x2; + delete[] v; + delete[] p; *iter=num_iter_global; *resid=norm_of_residual_global; } @@ -188,15 +399,3 @@ return status; } -/* - * \$Log\$ - * Revision 1.2 2005/09/15 03:44:40 jgs - * Merge of development branch dev-02 back to main trunk on 2005-09-15 - * - * Revision 1.1.2.1 2005/09/05 06:29:49 gross - * These files have been extracted from finley to define a stand alone libray for iterative - * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but - * has not been tested yet. - * - * - */