/[escript]/branches/diaplayground/ripley/src/RipleySystemMatrix.cpp
ViewVC logotype

Diff of /branches/diaplayground/ripley/src/RipleySystemMatrix.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 5145 by caltinay, Wed Sep 10 04:01:57 2014 UTC revision 5146 by caltinay, Thu Sep 11 06:29:28 2014 UTC
# Line 1  Line 1 
1    
2  /*****************************************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2014 by University of Queensland  * Copyright (c) 2014 by University of Queensland
5  * http://www.uq.edu.au  * http://www.uq.edu.au
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
# Line 29  Line 29 
29  #include <cusp/krylov/lsqr.h>  #include <cusp/krylov/lsqr.h>
30  #include <cusp/precond/diagonal.h>  #include <cusp/precond/diagonal.h>
31    
 #define BLOCK_SIZE 128  
   
32  namespace bp = boost::python;  namespace bp = boost::python;
33    
34  namespace ripley {  namespace ripley {
# Line 57  void SystemMatrix::checkCUDA() Line 55  void SystemMatrix::checkCUDA()
55      cudaError err = cudaGetDeviceCount(&deviceCount);      cudaError err = cudaGetDeviceCount(&deviceCount);
56      if (deviceCount == 0 || err == cudaErrorNoDevice) {      if (deviceCount == 0 || err == cudaErrorNoDevice) {
57          std::cout << "Note: There is no device supporting CUDA" << std::endl;          std::cout << "Note: There is no device supporting CUDA" << std::endl;
58            cudaDevices.push_back(-1);
59          return;          return;
60      } else if (deviceCount == 1) {      } else if (deviceCount == 1) {
61          std::cout << "There is 1 GPU device" << std::endl;          std::cout << "There is 1 GPU device" << std::endl;
# Line 68  void SystemMatrix::checkCUDA() Line 67  void SystemMatrix::checkCUDA()
67          cudaDeviceProp deviceProp;          cudaDeviceProp deviceProp;
68          CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, dev));          CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, dev));
69    
70          std::cout << "\nDevice " << dev << ": \"" << deviceProp.name << "\""          std::cout << "\nDevice " << dev << ": \"" << deviceProp.name << "\" -- ";
                   << std::endl;  
71          if (deviceProp.major == 9999 && deviceProp.minor == 9999) {          if (deviceProp.major == 9999 && deviceProp.minor == 9999) {
72              std::cout << "  No CUDA support." << std::endl;              std::cout << "No CUDA support." << std::endl;
73          } else {          } else {
74              cudaDevices.push_back(dev);              cudaDevices.push_back(dev);
75              std::cout << "  Major revision number:                         "              std::cout << "Rev. " << deviceProp.major << "." << deviceProp.minor
76                        << deviceProp.major << std::endl;                        << ", Mem: "
77              std::cout << "  Minor revision number:                         "                        << deviceProp.totalGlobalMem << " bytes." << std::endl;
                       << deviceProp.minor << std::endl;  
             std::cout << "  Total amount of global memory:                 "  
                       << deviceProp.totalGlobalMem << " bytes" << std::endl;  
78          }          }
79      }      }
80      std::cout << std::endl;      // neither of the found devices supports CUDA:
81        if (cudaDevices.empty()) {
82            cudaDevices.push_back(-1);
83        }
84  #endif  #endif
85  }  }
86    
# Line 97  SystemMatrix::SystemMatrix(esysUtils::JM Line 95  SystemMatrix::SystemMatrix(esysUtils::JM
95      for (size_t i = 0; i < diagonalOffsets.size(); i++) {      for (size_t i = 0; i < diagonalOffsets.size(); i++) {
96          numEntries += blocksize*blocksize*(nRows-std::abs(diagonalOffsets[i]));          numEntries += blocksize*blocksize*(nRows-std::abs(diagonalOffsets[i]));
97      }      }
98      //std::cerr << "Matrix has " <<numEntries<<" entries."<<std::endl;      //std::cout << "Matrix has " <<numEntries<<" entries."<<std::endl;
   
     if (cudaDevices.size() == 0)  
         checkCUDA();  
99    
100      mat.resize(nRows*blocksize, numEntries, diagonalOffsets.size(), blocksize);      mat.resize(nRows*blocksize, numEntries, diagonalOffsets.size(), blocksize);
101      mat.diagonal_offsets.assign(diagonalOffsets.begin(), diagonalOffsets.end());      mat.diagonal_offsets.assign(diagonalOffsets.begin(), diagonalOffsets.end());
# Line 152  void SystemMatrix::ypAx(escript::Data& y Line 147  void SystemMatrix::ypAx(escript::Data& y
147      HostVectorType yy(mat.num_rows, 0.);      HostVectorType yy(mat.num_rows, 0.);
148      cusp::multiply(mat, xx, yy);      cusp::multiply(mat, xx, yy);
149      thrust::copy(yy.begin(), yy.end(), y_dp);      thrust::copy(yy.begin(), yy.end(), y_dp);
150      //std::cerr << "ypAx: " << gettime()-T0 << " seconds." << std::endl;      //std::cout << "ypAx: " << gettime()-T0 << " seconds." << std::endl;
151  }  }
152    
153  template<class LinearOperator,  template<class LinearOperator,
# Line 249  void SystemMatrix::setToSolution(escript Line 244  void SystemMatrix::setToSolution(escript
244    
245      if (sb.getSolverTarget() == escript::SO_TARGET_GPU) {      if (sb.getSolverTarget() == escript::SO_TARGET_GPU) {
246  #ifdef USE_CUDA  #ifdef USE_CUDA
247          if (cudaDevices.size() == 0) {          if (cudaDevices.empty()) {
248                checkCUDA();
249            }
250    
251            if (cudaDevices[0] == -1) {
252              throw RipleyException("solve: GPU-based solver requested but no "              throw RipleyException("solve: GPU-based solver requested but no "
253                                    "CUDA compatible device available.");                                    "CUDA compatible device available.");
254          }          }
255    
256          //TODO: give users options...          //TODO: give users options...
257          if (sb.isVerbose()) {          if (sb.isVerbose()) {
258              std::cerr << "Using CUDA device " << cudaDevices[0] << std::endl;              std::cout << "Using CUDA device " << cudaDevices[0] << std::endl;
259          }          }
260          cudaSetDevice(cudaDevices[0]);          cudaSetDevice(cudaDevices[0]);
261    
# Line 263  void SystemMatrix::setToSolution(escript Line 263  void SystemMatrix::setToSolution(escript
263          DeviceVectorType b(in_dp, in_dp+mat.num_rows);          DeviceVectorType b(in_dp, in_dp+mat.num_rows);
264          double host2dev = gettime()-T0;          double host2dev = gettime()-T0;
265          if (sb.isVerbose())          if (sb.isVerbose())
266              std::cerr << "Copy of b: " << host2dev << " seconds." << std::endl;              std::cout << "Copy of b: " << host2dev << " seconds." << std::endl;
267          if (matrixAltered) {          if (matrixAltered) {
268              T0 = gettime();              T0 = gettime();
269              dmat = mat;              dmat = mat;
270              host2dev = gettime()-T0;              host2dev = gettime()-T0;
271              if (sb.isVerbose())              if (sb.isVerbose())
272                  std::cerr << "Copy of A: " << host2dev << " seconds." << std::endl;                  std::cout << "Copy of A: " << host2dev << " seconds." << std::endl;
273              matrixAltered = false;              matrixAltered = false;
274          }          }
275          DeviceVectorType x(mat.num_rows, 0.);          DeviceVectorType x(mat.num_rows, 0.);
276          if (sb.isVerbose())          if (sb.isVerbose())
277              std::cerr << "Solving on CUDA device..." << std::endl;              std::cout << "Solving on CUDA device..." << std::endl;
278    
279          if (sb.getPreconditioner() == escript::SO_PRECONDITIONER_NONE) {          if (sb.getPreconditioner() == escript::SO_PRECONDITIONER_NONE) {
280              cusp::identity_operator<double, cusp::device_memory> M(mat.num_rows, mat.num_rows);              cusp::identity_operator<double, cusp::device_memory> M(mat.num_rows, mat.num_rows);
281              runSolver(dmat, x, b, M, sb);              runSolver(dmat, x, b, M, sb);
282          } else if (sb.getPreconditioner() == escript::SO_PRECONDITIONER_JACOBI) {          } else if (sb.getPreconditioner() == escript::SO_PRECONDITIONER_JACOBI) {
283              if (sb.isVerbose())              if (sb.isVerbose())
284                  std::cerr << "Using Jacobi preconditioner" << std::endl;                  std::cout << "Using Jacobi preconditioner" << std::endl;
285              // TODO: This should be cached as well but that's not supported              // TODO: This should be cached as well but that's not supported
286              // at the moment.              // at the moment.
287              cusp::precond::diagonal<double, cusp::device_memory> M(dmat);              cusp::precond::diagonal<double, cusp::device_memory> M(dmat);
# Line 294  void SystemMatrix::setToSolution(escript Line 294  void SystemMatrix::setToSolution(escript
294          thrust::copy(x.begin(), x.end(), out_dp);          thrust::copy(x.begin(), x.end(), out_dp);
295          const double copyTime = gettime()-T0;          const double copyTime = gettime()-T0;
296          if (sb.isVerbose())          if (sb.isVerbose())
297              std::cerr << "Copy of x: " << copyTime << " seconds." << std::endl;              std::cout << "Copy of x: " << copyTime << " seconds." << std::endl;
298  #else  #else
299          throw RipleyException("solve: GPU-based solver requested but escript "          throw RipleyException("solve: GPU-based solver requested but escript "
300                                "not compiled with CUDA.");                                "not compiled with CUDA.");
# Line 304  void SystemMatrix::setToSolution(escript Line 304  void SystemMatrix::setToSolution(escript
304          HostVectorType b(in_dp, in_dp+mat.num_rows);          HostVectorType b(in_dp, in_dp+mat.num_rows);
305          double copytime = gettime()-T0;          double copytime = gettime()-T0;
306          if (sb.isVerbose()) {          if (sb.isVerbose()) {
307              std::cerr << "Copy of b: " << copytime << " seconds." << std::endl;              std::cout << "Copy of b: " << copytime << " seconds." << std::endl;
308              std::cerr << "Solving on the CPU..." << std::endl;              std::cout << "Solving on the CPU..." << std::endl;
309          }          }
310          HostVectorType x(mat.num_rows, 0.);          HostVectorType x(mat.num_rows, 0.);
311          if (sb.getPreconditioner() == escript::SO_PRECONDITIONER_NONE) {          if (sb.getPreconditioner() == escript::SO_PRECONDITIONER_NONE) {
# Line 313  void SystemMatrix::setToSolution(escript Line 313  void SystemMatrix::setToSolution(escript
313              runSolver(mat, x, b, M, sb);              runSolver(mat, x, b, M, sb);
314          } else if (sb.getPreconditioner() == escript::SO_PRECONDITIONER_JACOBI) {          } else if (sb.getPreconditioner() == escript::SO_PRECONDITIONER_JACOBI) {
315              if (sb.isVerbose())              if (sb.isVerbose())
316                  std::cerr << "Using Jacobi preconditioner" << std::endl;                  std::cout << "Using Jacobi preconditioner" << std::endl;
317              // TODO: This should be cached as well but that's not supported              // TODO: This should be cached as well but that's not supported
318              // at the moment.              // at the moment.
319              cusp::precond::diagonal<double, cusp::host_memory> M(mat);              cusp::precond::diagonal<double, cusp::host_memory> M(mat);
# Line 326  void SystemMatrix::setToSolution(escript Line 326  void SystemMatrix::setToSolution(escript
326          thrust::copy(x.begin(), x.end(), out_dp);          thrust::copy(x.begin(), x.end(), out_dp);
327          const double copyTime = gettime()-T0;          const double copyTime = gettime()-T0;
328          if (sb.isVerbose()) {          if (sb.isVerbose()) {
329              std::cerr << "Copy of x: " << copyTime << " seconds." << std::endl;              std::cout << "Copy of x: " << copyTime << " seconds." << std::endl;
330          }          }
331      }      }
332  }  }
# Line 365  void SystemMatrix::nullifyRowsAndCols(es Line 365  void SystemMatrix::nullifyRowsAndCols(es
365              }              }
366          }          }
367      }      }
368      //std::cerr << "nullifyRowsAndCols: " << gettime()-T0 << " seconds." << std::endl;      //std::cout << "nullifyRowsAndCols: " << gettime()-T0 << " seconds." << std::endl;
369      matrixAltered = true;      matrixAltered = true;
370  }  }
371    

Legend:
Removed from v.5145  
changed lines
  Added in v.5146

  ViewVC Help
Powered by ViewVC 1.1.26