/[escript]/branches/diaplayground/ripley/src/RipleySystemMatrix.cpp
ViewVC logotype

Contents of /branches/diaplayground/ripley/src/RipleySystemMatrix.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5136 - (show annotations)
Tue Sep 9 07:13:55 2014 UTC (4 years, 5 months ago) by caltinay
File size: 15896 byte(s)
ripley now supports paso solvers again and returns an appropriate matrix type
id. Changed the getSystemMatrixTypeId() method to take a full SolverBuddy
instance and made some other simplifications.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include "RipleySystemMatrix.h"
18 #include "RipleyException.h"
19
20 #include <esysUtils/index.h>
21 #include <escript/Data.h>
22 #include <escript/SolverOptions.h>
23
24 #include <cusp/multiply.h>
25 #include <cusp/krylov/bicgstab.h>
26 #include <cusp/krylov/cg.h>
27 #include <cusp/krylov/gmres.h>
28 #include <cusp/krylov/lsqr.h>
29 #include <cusp/precond/diagonal.h>
30
31 #define BLOCK_SIZE 128
32
33 namespace bp = boost::python;
34
35 namespace ripley {
36
37 double gettime()
38 {
39 #ifdef _OPENMP
40 return omp_get_wtime();
41 #else
42 struct timeval tv;
43 gettimeofday(&tv, NULL);
44 suseconds_t ret = tv.tv_usec + tv.tv_sec*1e6;
45 return 1e-6*(double)ret;
46 #endif
47 }
48
49 std::vector<int> SystemMatrix::cudaDevices;
50
51 void SystemMatrix::checkCUDA()
52 {
53 #ifdef USE_CUDA
54 cudaDevices.clear();
55 int deviceCount;
56 cudaError err = cudaGetDeviceCount(&deviceCount);
57 if (deviceCount == 0 || err == cudaErrorNoDevice) {
58 std::cout << "Note: There is no device supporting CUDA" << std::endl;
59 return;
60 } else if (deviceCount == 1) {
61 std::cout << "There is 1 GPU device" << std::endl;
62 } else {
63 std::cout << "There are " << deviceCount << " GPU devices" << std::endl;
64 }
65
66 for (int dev = 0; dev < deviceCount; ++dev) {
67 cudaDeviceProp deviceProp;
68 CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, dev));
69
70 std::cout << "\nDevice " << dev << ": \"" << deviceProp.name << "\""
71 << std::endl;
72 if (deviceProp.major == 9999 && deviceProp.minor == 9999) {
73 std::cout << " No CUDA support." << std::endl;
74 } else {
75 cudaDevices.push_back(dev);
76 std::cout << " Major revision number: "
77 << deviceProp.major << std::endl;
78 std::cout << " Minor revision number: "
79 << deviceProp.minor << std::endl;
80 std::cout << " Total amount of global memory: "
81 << deviceProp.totalGlobalMem << " bytes" << std::endl;
82 }
83 }
84 std::cout << std::endl;
85 #endif
86 }
87
88 SystemMatrix::SystemMatrix(esysUtils::JMPI mpiInfo, int blocksize,
89 const escript::FunctionSpace& fs, int nRows,
90 const IndexVector& diagonalOffsets) :
91 AbstractSystemMatrix(blocksize, fs, blocksize, fs),
92 m_mpiInfo(mpiInfo)
93 {
94 // count nonzero entries
95 int numEntries = 0;
96 for (size_t i = 0; i < diagonalOffsets.size(); i++) {
97 numEntries += blocksize*blocksize*(nRows-std::abs(diagonalOffsets[i]));
98 }
99 //std::cerr << "Matrix has " <<numEntries<<" entries."<<std::endl;
100
101 if (cudaDevices.size() == 0)
102 checkCUDA();
103
104 mat.resize(nRows*blocksize, numEntries, diagonalOffsets.size(), blocksize);
105 mat.diagonal_offsets.assign(diagonalOffsets.begin(), diagonalOffsets.end());
106 matrixAltered = true;
107 }
108
109 void SystemMatrix::add(const IndexVector& rowIdx,
110 const std::vector<double>& array)
111 {
112 const int blockSize = getBlockSize();
113 const int emSize = rowIdx.size();
114 for (int i=0; i<emSize; i++) {
115 for (int j=0; j<emSize; j++) {
116 const int revi = emSize-1-i;
117 const int diag = j%2 + revi%2 + 3*(j/2+revi/2 + j/4+revi/4);
118 for (int k=0; k<blockSize; k++) {
119 for (int m=0; m<blockSize; m++) {
120 const int row = rowIdx[i]*blockSize + k;
121 const int d = diag*blockSize + m;
122 const int srcIdx = INDEX4(k, m, i, j, blockSize, blockSize, emSize);
123 mat.values(row, d) += array[srcIdx];
124 }
125 }
126 }
127 }
128 matrixAltered = true;
129 }
130
131 void SystemMatrix::ypAx(escript::Data& y, escript::Data& x) const
132 {
133 if (x.getDataPointSize() != getBlockSize()) {
134 throw RipleyException("matrix vector product: block size does not match the number of components in input.");
135 } else if (y.getDataPointSize() != getBlockSize()) {
136 throw RipleyException("matrix vector product: block size does not match the number of components in output.");
137 } else if (x.getFunctionSpace() != getColumnFunctionSpace()) {
138 throw RipleyException("matrix vector product: matrix function space and function space of input don't match.");
139 } else if (y.getFunctionSpace() != getRowFunctionSpace()) {
140 throw RipleyException("matrix vector product: matrix function space and function space of output don't match.");
141 }
142
143 // expand data object if necessary to be able to grab the whole data
144 const_cast<escript::Data*>(&x)->expand();
145 y.expand();
146 y.requireWrite();
147 const double* x_dp = x.getSampleDataRO(0);
148 double* y_dp = y.getSampleDataRW(0);
149 double T0 = gettime();
150 HostVectorType xx(x_dp, x_dp+mat.num_rows);
151 HostVectorType yy(mat.num_rows, 0.);
152 cusp::multiply(mat, xx, yy);
153 thrust::copy(yy.begin(), yy.end(), y_dp);
154 //std::cerr << "ypAx: " << gettime()-T0 << " seconds." << std::endl;
155 }
156
157 template<class LinearOperator,
158 class Vector,
159 class Preconditioner>
160 void SystemMatrix::runSolver(LinearOperator& A, Vector& x, Vector& b,
161 Preconditioner& M, escript::SolverBuddy& sb) const
162 {
163 typedef typename LinearOperator::memory_space MemorySpace;
164 //cusp::verbose_monitor<double> monitor(b, sb.getIterMax(), sb.getTolerance(), sb.getAbsoluteTolerance());
165 cusp::default_monitor<double> monitor(b, sb.getIterMax(), sb.getTolerance(), sb.getAbsoluteTolerance());
166 int solver = sb.getSolverMethod();
167 if (solver == escript::SO_DEFAULT) {
168 if (sb.isSymmetric()) {
169 solver = escript::SO_METHOD_PCG;
170 } else {
171 solver = escript::SO_METHOD_BICGSTAB;
172 }
173 }
174
175 double T0 = gettime();
176 switch (solver) {
177 case escript::SO_DEFAULT:
178 case escript::SO_METHOD_PCG:
179 cusp::krylov::cg(A, x, b, monitor, M);
180 break;
181 case escript::SO_METHOD_LSQR:
182 cusp::krylov::lsqr(A, x, b, cusp::krylov::lsqr_parameters<double>(), monitor);
183 break;
184 case escript::SO_METHOD_BICGSTAB:
185 cusp::krylov::bicgstab(A, x, b, monitor, M);
186 break;
187 case escript::SO_METHOD_GMRES:
188 {
189 const int restart = (sb.getRestart()==0 ? 1000 : sb.getRestart());
190 if (restart < 1)
191 throw RipleyException("Invalid restart parameter for GMRES");
192 cusp::krylov::gmres(A, x, b, restart, monitor, M);
193 }
194 break;
195 case escript::SO_METHOD_PRES20:
196 {
197 const int restart = 20;
198 cusp::krylov::gmres(A, x, b, restart, monitor, M);
199 }
200 break;
201 default:
202 throw RipleyException("Unsupported solver.");
203 }
204 double solvertime = gettime()-T0;
205
206 if (monitor.converged()) {
207 if (sb.isVerbose()) {
208 std::cout << "Solver converged to " << monitor.relative_tolerance()
209 << " relative tolerance after " << monitor.iteration_count()
210 << " iterations and " << solvertime << " seconds."<< std::endl;
211 }
212 } else {
213 std::cout << "Solver reached iteration limit "
214 << monitor.iteration_limit() << " before converging"
215 << " to " << monitor.relative_tolerance() << " rel. tolerance."
216 << std::endl;
217 }
218 }
219
220 void SystemMatrix::setToSolution(escript::Data& out, escript::Data& in,
221 bp::object& options) const
222 {
223 if (m_mpiInfo->size > 1) {
224 throw RipleyException("solve: ripley's block diagonal matrix "
225 "is incompatible with MPI.");
226 }
227 if (out.getDataPointSize() != getBlockSize()) {
228 throw RipleyException("solve: block size does not match the number of components of solution.");
229 } else if (in.getDataPointSize() != getBlockSize()) {
230 throw RipleyException("solve: block size does not match the number of components of right hand side.");
231 } else if (out.getFunctionSpace() != getColumnFunctionSpace()) {
232 throw RipleyException("solve: matrix function space and function space of solution don't match.");
233 } else if (in.getFunctionSpace() != getRowFunctionSpace()) {
234 throw RipleyException("solve: matrix function space and function space of right hand side don't match.");
235 }
236
237 options.attr("resetDiagnostics")();
238 escript::SolverBuddy sb = bp::extract<escript::SolverBuddy>(options);
239 out.expand();
240 in.expand();
241
242 double* out_dp = out.getSampleDataRW(0);
243 const double* in_dp = in.getSampleDataRO(0);
244 double T0;
245
246 if (sb.getSolverTarget() == escript::SO_TARGET_GPU) {
247 #ifdef USE_CUDA
248 if (cudaDevices.size() == 0) {
249 throw RipleyException("solve: GPU-based solver requested but no "
250 "CUDA compatible device available.");
251 }
252 //TODO: give users options...
253 if (sb.isVerbose()) {
254 std::cerr << "Using CUDA device " << cudaDevices[0] << std::endl;
255 }
256 cudaSetDevice(cudaDevices[0]);
257
258 T0 = gettime();
259 DeviceVectorType b(in_dp, in_dp+mat.num_rows);
260 double host2dev = gettime()-T0;
261 if (sb.isVerbose())
262 std::cerr << "Copy of b: " << host2dev << " seconds." << std::endl;
263 if (matrixAltered) {
264 T0 = gettime();
265 dmat = mat;
266 host2dev = gettime()-T0;
267 if (sb.isVerbose())
268 std::cerr << "Copy of A: " << host2dev << " seconds." << std::endl;
269 matrixAltered = false;
270 }
271 DeviceVectorType x(mat.num_rows, 0.);
272 if (sb.isVerbose())
273 std::cerr << "Solving on CUDA device..." << std::endl;
274
275 if (sb.getPreconditioner() == escript::SO_PRECONDITIONER_NONE) {
276 cusp::identity_operator<double, cusp::device_memory> M(mat.num_rows, mat.num_rows);
277 runSolver(dmat, x, b, M, sb);
278 } else if (sb.getPreconditioner() == escript::SO_PRECONDITIONER_JACOBI) {
279 if (sb.isVerbose())
280 std::cerr << "Using Jacobi preconditioner" << std::endl;
281 // TODO: This should be cached as well but that's not supported
282 // at the moment.
283 cusp::precond::diagonal<double, cusp::device_memory> M(dmat);
284 runSolver(dmat, x, b, M, sb);
285 } else {
286 throw RipleyException("Unsupported preconditioner requested.");
287 }
288
289 T0 = gettime();
290 thrust::copy(x.begin(), x.end(), out_dp);
291 const double copyTime = gettime()-T0;
292 if (sb.isVerbose())
293 std::cerr << "Copy of x: " << copyTime << " seconds." << std::endl;
294 #else
295 throw RipleyException("solve: GPU-based solver requested but escript "
296 "not compiled with CUDA.");
297 #endif
298 } else { // CPU
299 T0 = gettime();
300 HostVectorType b(in_dp, in_dp+mat.num_rows);
301 double copytime = gettime()-T0;
302 if (sb.isVerbose()) {
303 std::cerr << "Copy of b: " << copytime << " seconds." << std::endl;
304 std::cerr << "Solving on the CPU..." << std::endl;
305 }
306 HostVectorType x(mat.num_rows, 0.);
307 if (sb.getPreconditioner() == escript::SO_PRECONDITIONER_NONE) {
308 cusp::identity_operator<double, cusp::host_memory> M(mat.num_rows, mat.num_rows);
309 runSolver(mat, x, b, M, sb);
310 } else if (sb.getPreconditioner() == escript::SO_PRECONDITIONER_JACOBI) {
311 if (sb.isVerbose())
312 std::cerr << "Using Jacobi preconditioner" << std::endl;
313 // TODO: This should be cached as well but that's not supported
314 // at the moment.
315 cusp::precond::diagonal<double, cusp::host_memory> M(mat);
316 runSolver(mat, x, b, M, sb);
317 } else {
318 throw RipleyException("Unsupported preconditioner requested.");
319 }
320
321 T0 = gettime();
322 thrust::copy(x.begin(), x.end(), out_dp);
323 const double copyTime = gettime()-T0;
324 if (sb.isVerbose()) {
325 std::cerr << "Copy of x: " << copyTime << " seconds." << std::endl;
326 }
327 }
328 }
329
330 void SystemMatrix::nullifyRowsAndCols(escript::Data& row_q,
331 escript::Data& col_q,
332 double mdv)
333 {
334 double T0 = gettime();
335 if (col_q.getDataPointSize() != getColumnBlockSize()) {
336 throw RipleyException("nullifyRowsAndCols: column block size does not match the number of components of column mask.");
337 } else if (row_q.getDataPointSize() != getRowBlockSize()) {
338 throw RipleyException("nullifyRowsAndCols: row block size does not match the number of components of row mask.");
339 } else if (col_q.getFunctionSpace() != getColumnFunctionSpace()) {
340 throw RipleyException("nullifyRowsAndCols: column function space and function space of column mask don't match.");
341 } else if (row_q.getFunctionSpace() != getRowFunctionSpace()) {
342 throw RipleyException("nullifyRowsAndCols: row function space and function space of row mask don't match.");
343 }
344
345 row_q.expand();
346 col_q.expand();
347 const double* rowMask = row_q.getSampleDataRO(0);
348 const double* colMask = col_q.getSampleDataRO(0);
349 const int blockSize = getBlockSize();
350 #pragma omp parallel for
351 for (int row=0; row < mat.num_rows; row++) {
352 for (int diag=0; diag < mat.diagonal_offsets.size(); diag++) {
353 const int col = blockSize*(row/blockSize)+mat.diagonal_offsets[diag]*blockSize;
354 if (col >= 0 && col <= mat.num_rows-blockSize) {
355 for (int i=0; i<blockSize; i++) {
356 if (rowMask[row] > 0. || colMask[col+i] > 0.) {
357 mat.values(row, diag*blockSize+i) =
358 (row==col+i ? mdv : 0);
359 }
360 }
361 }
362 }
363 }
364 //std::cerr << "nullifyRowsAndCols: " << gettime()-T0 << " seconds." << std::endl;
365 matrixAltered = true;
366 }
367
368 void SystemMatrix::saveMM(const std::string& filename) const
369 {
370 const int blockSize = getBlockSize();
371
372 std::ofstream f(filename.c_str());
373 f << "%%MatrixMarket matrix coordinate real general" << std::endl;
374 f << mat.num_rows << " " << mat.num_rows << " " << mat.num_entries << std::endl;
375 f.setf(std::ios_base::scientific, std::ios_base::floatfield);
376 f.precision(15);
377 for (int row=0; row < mat.num_rows; row++) {
378 for (int diag=0; diag < mat.diagonal_offsets.size(); diag++) {
379 const int col = blockSize*(row/blockSize)+mat.diagonal_offsets[diag]*blockSize;
380 if (col >= 0 && col <= mat.num_rows-blockSize) {
381 for (int i=0; i<blockSize; i++) {
382 f << row+1 << " " << col+i+1 << " "
383 << mat.values(row, diag*blockSize+i) << std::endl;
384 }
385 }
386 }
387 }
388 }
389
390 void SystemMatrix::saveHB(const std::string& filename) const
391 {
392 throw RipleyException("Harwell-Boeing interface not available.");
393 }
394
395 void SystemMatrix::resetValues()
396 {
397 mat.values.values.assign(mat.values.values.size(), 0.);
398 matrixAltered = true;
399 }
400
401 } // end of namespace

  ViewVC Help
Powered by ViewVC 1.1.26