/[escript]/branches/trilinos_from_5897/paso/src/SystemMatrix.cpp
ViewVC logotype

Contents of /branches/trilinos_from_5897/paso/src/SystemMatrix.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6058 - (show annotations)
Thu Mar 10 06:51:55 2016 UTC (3 years, 1 month ago) by caltinay
File size: 25969 byte(s)
added getPtr() to AbstractSystemMatrix so we can now use shared systemmatrix
pointers rather than circumventing them.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2016 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17
18 /****************************************************************************/
19
20 /* Paso: SystemMatrix */
21
22 /****************************************************************************/
23
24 /* Author: Lutz Gross, l.gross@uq.edu.au */
25
26 /****************************************************************************/
27
28 #include "SystemMatrix.h"
29 #include "Options.h"
30 #include "PasoException.h"
31 #include "Preconditioner.h"
32 #include "Solver.h"
33
34 #include <escript/Data.h>
35
36 #include <cstring> // memcpy
37 #include <vector>
38
39 namespace paso {
40
41 SystemMatrix::SystemMatrix()
42 {
43 throw PasoException("SystemMatrix: Illegal to generate default SystemMatrix.");
44 }
45
46 /// Allocates a SystemMatrix of given type using the given matrix pattern.
47 /// Values are initialized with zero.
48 /// If patternIsUnrolled and type & MATRIX_FORMAT_BLK1, it is assumed
49 /// that the pattern is already unrolled to match the requested block size
50 /// and offsets. Otherwise unrolling and offset adjustment will be performed.
51 SystemMatrix::SystemMatrix(SystemMatrixType ntype,
52 SystemMatrixPattern_ptr npattern, dim_t rowBlockSize,
53 dim_t colBlockSize, bool patternIsUnrolled,
54 const escript::FunctionSpace& rowFS,
55 const escript::FunctionSpace& colFS) :
56 escript::AbstractSystemMatrix(rowBlockSize, rowFS, colBlockSize, colFS),
57 type(ntype),
58 logical_row_block_size(rowBlockSize),
59 logical_col_block_size(colBlockSize),
60 is_balanced(false),
61 balance_vector(NULL),
62 global_id(NULL),
63 solver_package(PASO_PASO),
64 solver_p(NULL)
65 {
66 if (patternIsUnrolled) {
67 if ((ntype & MATRIX_FORMAT_OFFSET1) != (npattern->type & MATRIX_FORMAT_OFFSET1)) {
68 throw PasoException("SystemMatrix: requested offset and pattern offset do not match.");
69 }
70 }
71 // do we need to apply unrolling?
72 bool unroll
73 // we don't like non-square blocks
74 = (rowBlockSize != colBlockSize)
75 #ifndef USE_LAPACK
76 // or any block size bigger than 3
77 || (colBlockSize > 3)
78 # endif
79 // or if block size one requested and the block size is not 1
80 || ((ntype & MATRIX_FORMAT_BLK1) && colBlockSize > 1)
81 // or the offsets don't match
82 || ((ntype & MATRIX_FORMAT_OFFSET1) != (npattern->type & MATRIX_FORMAT_OFFSET1));
83
84 SystemMatrixType pattern_format_out = (ntype & MATRIX_FORMAT_OFFSET1)
85 ? MATRIX_FORMAT_OFFSET1 : MATRIX_FORMAT_DEFAULT;
86
87 mpi_info = npattern->mpi_info;
88
89 if (ntype & MATRIX_FORMAT_CSC) {
90 if (unroll) {
91 if (patternIsUnrolled) {
92 pattern=npattern;
93 } else {
94 pattern = npattern->unrollBlocks(pattern_format_out,
95 colBlockSize, rowBlockSize);
96 }
97 row_block_size = 1;
98 col_block_size = 1;
99 } else {
100 pattern = npattern->unrollBlocks(pattern_format_out, 1, 1);
101 row_block_size = rowBlockSize;
102 col_block_size = colBlockSize;
103 }
104 row_distribution = pattern->input_distribution;
105 col_distribution = pattern->output_distribution;
106 } else {
107 if (unroll) {
108 if (patternIsUnrolled) {
109 pattern = npattern;
110 } else {
111 pattern = npattern->unrollBlocks(pattern_format_out,
112 rowBlockSize, colBlockSize);
113 }
114 row_block_size = 1;
115 col_block_size = 1;
116 } else {
117 pattern = npattern->unrollBlocks(pattern_format_out, 1, 1);
118 row_block_size = rowBlockSize;
119 col_block_size = colBlockSize;
120 }
121 row_distribution = pattern->output_distribution;
122 col_distribution = pattern->input_distribution;
123 }
124 if (ntype & MATRIX_FORMAT_DIAGONAL_BLOCK) {
125 block_size = std::min(row_block_size, col_block_size);
126 } else {
127 block_size = row_block_size*col_block_size;
128 }
129 col_coupler.reset(new Coupler(pattern->col_connector, col_block_size));
130 row_coupler.reset(new Coupler(pattern->row_connector, row_block_size));
131 mainBlock.reset(new SparseMatrix(type, pattern->mainPattern, row_block_size, col_block_size, true));
132 col_coupleBlock.reset(new SparseMatrix(type, pattern->col_couplePattern, row_block_size, col_block_size, true));
133 row_coupleBlock.reset(new SparseMatrix(type, pattern->row_couplePattern, row_block_size, col_block_size, true));
134 const dim_t n_norm = std::max(mainBlock->numCols*col_block_size, mainBlock->numRows*row_block_size);
135 balance_vector = new double[n_norm];
136 #pragma omp parallel for
137 for (dim_t i=0; i<n_norm; ++i)
138 balance_vector[i] = 1.;
139 }
140
141 // deallocates a SystemMatrix
142 SystemMatrix::~SystemMatrix()
143 {
144 solve_free(this);
145 delete[] balance_vector;
146 delete[] global_id;
147 }
148
149 void SystemMatrix::setPreconditioner(Options* options)
150 {
151 if (!solver_p) {
152 SystemMatrix_ptr mat(boost::dynamic_pointer_cast<SystemMatrix>(getPtr()));
153 solver_p = Preconditioner_alloc(mat, options);
154 }
155 }
156
157 void SystemMatrix::solvePreconditioner(double* x, double* b)
158 {
159 Preconditioner* prec=(Preconditioner*)solver_p;
160 SystemMatrix_ptr mat(boost::dynamic_pointer_cast<SystemMatrix>(getPtr()));
161 Preconditioner_solve(prec, mat, x, b);
162 }
163
164 void SystemMatrix::freePreconditioner()
165 {
166 Preconditioner* prec = (Preconditioner*) solver_p;
167 Preconditioner_free(prec);
168 solver_p = NULL;
169 }
170
171 double SystemMatrix::getGlobalSize() const
172 {
173 double global_size=0;
174 double my_size = mainBlock->getSize() + col_coupleBlock->getSize();
175 if (mpi_info->size > 1) {
176 #ifdef ESYS_MPI
177 MPI_Allreduce(&my_size, &global_size, 1, MPI_DOUBLE, MPI_SUM, mpi_info->comm);
178 #else
179 global_size = my_size;
180 #endif
181 } else {
182 global_size = my_size;
183 }
184 return global_size;
185 }
186
187 index_t* SystemMatrix::borrowMainDiagonalPointer() const
188 {
189 int fail=0;
190 index_t* out = mainBlock->borrowMainDiagonalPointer();
191 if (out==NULL) fail=1;
192 #ifdef ESYS_MPI
193 int fail_loc = fail;
194 MPI_Allreduce(&fail_loc, &fail, 1, MPI_INT, MPI_MAX, mpi_info->comm);
195 #endif
196 if (fail>0)
197 throw PasoException("SystemMatrix::borrowMainDiagonalPointer: no main diagonal");
198 return out;
199 }
200
201 void SystemMatrix::makeZeroRowSums(double* left_over)
202 {
203 const dim_t n = pattern->getNumOutput();
204 const dim_t nblk = block_size;
205 const dim_t blk = row_block_size;
206 const index_t* main_ptr = borrowMainDiagonalPointer();
207
208 rowSum(left_over);
209 // left_over now holds the row sum
210
211 #pragma omp parallel for
212 for (index_t ir=0; ir<n; ir++) {
213 for (index_t ib=0; ib<blk; ib++) {
214 const index_t irow = ib+blk*ir;
215 const double rtmp2 = mainBlock->val[main_ptr[ir]*nblk+ib+blk*ib];
216 const double rtmp1 = rtmp2-left_over[irow];
217 mainBlock->val[main_ptr[ir]*nblk+ib+blk*ib] = rtmp1;
218 left_over[irow]=rtmp2-rtmp1;
219 }
220 }
221 }
222
223 void SystemMatrix::nullifyRows(double* mask_row, double main_diagonal_value)
224 {
225 if (type & MATRIX_FORMAT_CSC) {
226 throw PasoException("SystemMatrix::nullifyRows: Only CSR format is supported.");
227 }
228
229 if (col_block_size==1 && row_block_size==1) {
230 startRowCollect(mask_row);
231 mainBlock->nullifyRows_CSR_BLK1(mask_row, main_diagonal_value);
232 col_coupleBlock->nullifyRows_CSR_BLK1(mask_row, 0.);
233 double* remote_values = finishRowCollect();
234 row_coupleBlock->nullifyRows_CSR_BLK1(remote_values, 0.);
235 } else {
236 startRowCollect(mask_row);
237 mainBlock->nullifyRows_CSR(mask_row, main_diagonal_value);
238 col_coupleBlock->nullifyRows_CSR(mask_row, 0.);
239 double* remote_values = finishRowCollect();
240 row_coupleBlock->nullifyRows_CSR(remote_values, 0.);
241 }
242 }
243
244 void SystemMatrix::nullifyRowsAndCols(escript::Data& row_q,
245 escript::Data& col_q,
246 double main_diagonal_value)
247 {
248 if (col_q.getDataPointSize() != getColumnBlockSize()) {
249 throw PasoException("nullifyRowsAndCols: column block size does not match the number of components of column mask.");
250 } else if (row_q.getDataPointSize() != getRowBlockSize()) {
251 throw PasoException("nullifyRowsAndCols: row block size does not match the number of components of row mask.");
252 } else if (col_q.getFunctionSpace() != getColumnFunctionSpace()) {
253 throw PasoException("nullifyRowsAndCols: column function space and function space of column mask don't match.");
254 } else if (row_q.getFunctionSpace() != getRowFunctionSpace()) {
255 throw PasoException("nullifyRowsAndCols: row function space and function space of row mask don't match.");
256 }
257 row_q.expand();
258 col_q.expand();
259 row_q.requireWrite();
260 col_q.requireWrite();
261 double* mask_row = row_q.getSampleDataRW(0);
262 double* mask_col = col_q.getSampleDataRW(0);
263
264 if (mpi_info->size > 1) {
265 if (type & MATRIX_FORMAT_CSC) {
266 throw PasoException("SystemMatrix::nullifyRowsAndCols: "
267 "CSC is not supported with MPI.");
268 }
269
270 startColCollect(mask_col);
271 startRowCollect(mask_row);
272 if (col_block_size==1 && row_block_size==1) {
273 mainBlock->nullifyRowsAndCols_CSR_BLK1(mask_row, mask_col, main_diagonal_value);
274 double* remote_values = finishColCollect();
275 col_coupleBlock->nullifyRowsAndCols_CSR_BLK1(mask_row, remote_values, 0.);
276 remote_values = finishRowCollect();
277 row_coupleBlock->nullifyRowsAndCols_CSR_BLK1(remote_values, mask_col, 0.);
278 } else {
279 mainBlock->nullifyRowsAndCols_CSR(mask_row, mask_col, main_diagonal_value);
280 double* remote_values = finishColCollect();
281 col_coupleBlock->nullifyRowsAndCols_CSR(mask_row, remote_values, 0.);
282 remote_values = finishRowCollect();
283 row_coupleBlock->nullifyRowsAndCols_CSR(remote_values, mask_col, 0.);
284 }
285 } else {
286 if (col_block_size==1 && row_block_size==1) {
287 if (type & MATRIX_FORMAT_CSC) {
288 mainBlock->nullifyRowsAndCols_CSC_BLK1(mask_row, mask_col, main_diagonal_value);
289 } else {
290 mainBlock->nullifyRowsAndCols_CSR_BLK1(mask_row, mask_col, main_diagonal_value);
291 }
292 } else {
293 if (type & MATRIX_FORMAT_CSC) {
294 mainBlock->nullifyRowsAndCols_CSC(mask_row, mask_col, main_diagonal_value);
295 } else {
296 mainBlock->nullifyRowsAndCols_CSR(mask_row, mask_col, main_diagonal_value);
297 }
298 }
299 }
300 }
301
302 void SystemMatrix::resetValues()
303 {
304 setValues(0.);
305 solve_free(this);
306 }
307
308 void SystemMatrix::setToSolution(escript::Data& out, escript::Data& in,
309 boost::python::object& options) const
310 {
311 options.attr("resetDiagnostics")();
312 Options paso_options(options);
313 if (out.getDataPointSize() != getColumnBlockSize()) {
314 throw PasoException("solve: column block size does not match the number of components of solution.");
315 } else if (in.getDataPointSize() != getRowBlockSize()) {
316 throw PasoException("solve: row block size does not match the number of components of right hand side.");
317 } else if (out.getFunctionSpace() != getColumnFunctionSpace()) {
318 throw PasoException("solve: column function space and function space of solution don't match.");
319 } else if (in.getFunctionSpace() != getRowFunctionSpace()) {
320 throw PasoException("solve: row function space and function space of right hand side don't match.");
321 }
322 out.expand();
323 in.expand();
324 out.requireWrite();
325 in.requireWrite();
326 double* out_dp = out.getSampleDataRW(0);
327 double* in_dp = in.getSampleDataRW(0);
328 solve(out_dp, in_dp, &paso_options);
329 paso_options.updateEscriptDiagnostics(options);
330 }
331
332 void SystemMatrix::ypAx(escript::Data& y, escript::Data& x) const
333 {
334 if (x.getDataPointSize() != getColumnBlockSize()) {
335 throw PasoException("matrix vector product: column block size does not match the number of components in input.");
336 } else if (y.getDataPointSize() != getRowBlockSize()) {
337 throw PasoException("matrix vector product: row block size does not match the number of components in output.");
338 } else if (x.getFunctionSpace() != getColumnFunctionSpace()) {
339 throw PasoException("matrix vector product: column function space and function space of input don't match.");
340 } else if (y.getFunctionSpace() != getRowFunctionSpace()) {
341 throw PasoException("matrix vector product: row function space and function space of output don't match.");
342 }
343 x.expand();
344 y.expand();
345 x.requireWrite();
346 y.requireWrite();
347 double* x_dp = x.getSampleDataRW(0);
348 double* y_dp = y.getSampleDataRW(0);
349 MatrixVector(1., x_dp, 1., y_dp);
350 }
351
352 void SystemMatrix::copyColCoupleBlock()
353 {
354 if (mpi_info->size == 1) {
355 // nothing to do
356 return;
357 } else if (!row_coupleBlock) {
358 throw PasoException("SystemMatrix::copyColCoupleBlock: "
359 "creation of row_coupleBlock pattern not supported yet.");
360 } else if (row_coupler->in_use) {
361 throw PasoException("SystemMatrix::copyColCoupleBlock: Coupler in use.");
362 }
363
364 // start receiving
365 for (dim_t p=0; p<row_coupler->connector->recv->numNeighbors; p++) {
366 #ifdef ESYS_MPI
367 const index_t irow1 = row_coupler->connector->recv->offsetInShared[p];
368 const index_t irow2 = row_coupler->connector->recv->offsetInShared[p+1];
369 const index_t a = row_coupleBlock->pattern->ptr[irow1];
370 const index_t b = row_coupleBlock->pattern->ptr[irow2];
371
372 MPI_Irecv(&row_coupleBlock->val[a*block_size], (b-a) * block_size,
373 MPI_DOUBLE, row_coupler->connector->recv->neighbor[p],
374 mpi_info->counter()+row_coupler->connector->recv->neighbor[p],
375 mpi_info->comm, &row_coupler->mpi_requests[p]);
376
377 #endif
378 }
379
380 // start sending
381 index_t z0 = 0;
382 double* send_buffer = new double[col_coupleBlock->len];
383 const size_t block_size_size = block_size*sizeof(double);
384
385 for (dim_t p=0; p<row_coupler->connector->send->numNeighbors; p++) {
386 // j_min, j_max defines the range of columns to be sent to processor p
387 const index_t j_min = col_coupler->connector->recv->offsetInShared[p];
388 const index_t j_max = col_coupler->connector->recv->offsetInShared[p+1];
389 index_t z = z0;
390
391 // run over the rows to be connected to processor p
392 for (index_t rPtr=row_coupler->connector->send->offsetInShared[p];
393 rPtr < row_coupler->connector->send->offsetInShared[p+1]; ++rPtr) {
394 const index_t row = row_coupler->connector->send->shared[rPtr];
395
396 // collect the entries in the col couple block referring to
397 // columns on processor p
398 for (index_t iPtr=col_coupleBlock->pattern->ptr[row];
399 iPtr < col_coupleBlock->pattern->ptr[row+1]; ++iPtr) {
400 const index_t j = col_coupleBlock->pattern->index[iPtr];
401 if (j_min <= j && j < j_max) {
402 memcpy(&send_buffer[z],
403 &col_coupleBlock->val[block_size*iPtr],
404 block_size_size);
405 z+=block_size;
406 }
407 }
408 }
409 #ifdef ESYS_MPI
410 MPI_Issend(&send_buffer[z0], z-z0, MPI_DOUBLE,
411 row_coupler->connector->send->neighbor[p],
412 mpi_info->counter()+mpi_info->rank,
413 mpi_info->comm,
414 &row_coupler->mpi_requests[p+row_coupler->connector->recv->numNeighbors]);
415 #endif
416 z0 = z;
417 }
418
419 // wait until everything is done
420 #ifdef ESYS_MPI
421 mpi_info->incCounter(mpi_info->size);
422 MPI_Waitall(row_coupler->connector->send->numNeighbors+row_coupler->connector->recv->numNeighbors,
423 row_coupler->mpi_requests,
424 row_coupler->mpi_stati);
425 #endif
426 delete[] send_buffer;
427 }
428
429 void SystemMatrix::applyBalanceInPlace(double* x, const bool RHS) const
430 {
431 if (is_balanced) {
432 if (RHS) {
433 const dim_t nrow = getTotalNumRows();
434 #pragma omp parallel for
435 for (index_t i=0; i<nrow; ++i) {
436 x[i] *= balance_vector[i];
437 }
438 } else {
439 const dim_t ncol = getTotalNumCols();
440 #pragma omp parallel for
441 for (index_t i=0; i<ncol; ++i) {
442 x[i] *= balance_vector[i];
443 }
444 }
445 }
446 }
447
448 void SystemMatrix::applyBalance(double* x_out, const double* x, bool RHS) const
449 {
450 if (is_balanced) {
451 if (RHS) {
452 const dim_t nrow = getTotalNumRows();
453 #pragma omp parallel for
454 for (index_t i=0; i<nrow; ++i) {
455 x_out[i] = x[i] * balance_vector[i];
456 }
457 } else {
458 const dim_t ncol = getTotalNumCols();
459 #pragma omp parallel for
460 for (index_t i=0; i<ncol; ++i) {
461 x_out[i] = x[i] * balance_vector[i];
462 }
463 }
464 }
465 }
466
467 void SystemMatrix::balance()
468 {
469 const dim_t nrow = getTotalNumRows();
470
471 if (!is_balanced) {
472 if ((type & MATRIX_FORMAT_CSC) || (type & MATRIX_FORMAT_OFFSET1)) {
473 throw PasoException("SystemMatrix_balance: No normalization "
474 "available for compressed sparse column or index offset 1.");
475 }
476 if (getGlobalNumRows() != getGlobalNumCols() ||
477 row_block_size != col_block_size) {
478 throw PasoException("SystemMatrix::balance: matrix needs to be a square matrix.");
479 }
480 // calculate absolute max value over each row
481 #pragma omp parallel for
482 for (dim_t irow=0; irow<nrow; ++irow) {
483 balance_vector[irow]=0;
484 }
485 mainBlock->maxAbsRow_CSR_OFFSET0(balance_vector);
486 if (col_coupleBlock->pattern->ptr != NULL) {
487 col_coupleBlock->maxAbsRow_CSR_OFFSET0(balance_vector);
488 }
489
490 // set balancing vector
491 #pragma omp parallel for
492 for (dim_t irow=0; irow<nrow; ++irow) {
493 const double fac = balance_vector[irow];
494 if (fac > 0) {
495 balance_vector[irow]=sqrt(1./fac);
496 } else {
497 balance_vector[irow]=1.;
498 }
499 }
500 ///// rescale matrix /////
501 // start exchange
502 startCollect(balance_vector);
503 // process main block
504 mainBlock->applyDiagonal_CSR_OFFSET0(balance_vector, balance_vector);
505 // finish exchange
506 double* remote_values = finishCollect();
507 // process couple block
508 if (col_coupleBlock->pattern->ptr != NULL) {
509 col_coupleBlock->applyDiagonal_CSR_OFFSET0(balance_vector, remote_values);
510 }
511 if (row_coupleBlock->pattern->ptr != NULL) {
512 row_coupleBlock->applyDiagonal_CSR_OFFSET0(remote_values, balance_vector);
513 }
514 is_balanced = true;
515 }
516 }
517
518 int SystemMatrix::getSystemMatrixTypeId(int solver, int preconditioner,
519 int package, bool symmetry,
520 const escript::JMPI& mpi_info)
521 {
522 int out = -1;
523 int true_package = Options::getPackage(Options::mapEscriptOption(solver),
524 Options::mapEscriptOption(package),
525 symmetry, mpi_info);
526
527 switch(true_package) {
528 case PASO_PASO:
529 out = MATRIX_FORMAT_DEFAULT;
530 break;
531
532 case PASO_MKL:
533 out = MATRIX_FORMAT_BLK1 | MATRIX_FORMAT_OFFSET1;
534 break;
535
536 case PASO_UMFPACK:
537 if (mpi_info->size > 1) {
538 throw PasoException("The selected solver UMFPACK "
539 "requires CSC format which is not supported with "
540 "more than one rank.");
541 } else {
542 out = MATRIX_FORMAT_CSC | MATRIX_FORMAT_BLK1;
543 }
544 break;
545
546 default:
547 throw PasoException("unknown package code");
548 }
549 return out;
550 }
551
552 SparseMatrix_ptr SystemMatrix::mergeSystemMatrix() const
553 {
554 const index_t n = mainBlock->numRows;
555
556 if (mpi_info->size == 1) {
557 index_t* ptr = new index_t[n];
558 #pragma omp parallel for
559 for (index_t i=0; i<n; i++)
560 ptr[i] = i;
561 SparseMatrix_ptr out(mainBlock->getSubmatrix(n, n, ptr, ptr));
562 delete[] ptr;
563 return out;
564 }
565
566 #ifdef ESYS_MPI
567 const index_t size=mpi_info->size;
568 const index_t rank=mpi_info->rank;
569
570 // Merge main block and couple block to get the complete column entries
571 // for each row allocated to current rank. Output (ptr, idx, val)
572 // contains all info needed from current rank to merge a system matrix
573 index_t *ptr, *idx;
574 double *val;
575 mergeMainAndCouple(&ptr, &idx, &val);
576
577 std::vector<MPI_Request> mpi_requests(size*2);
578 std::vector<MPI_Status> mpi_stati(size*2);
579
580 // Now, pass all info to rank 0 and merge them into one sparse matrix
581 if (rank == 0) {
582 // First, copy local ptr values into ptr_global
583 const index_t global_n = getGlobalNumRows();
584 index_t* ptr_global = new index_t[global_n+1];
585 memcpy(ptr_global, ptr, (n+1)*sizeof(index_t));
586 delete[] ptr;
587 index_t iptr = n+1;
588 index_t* temp_n = new index_t[size];
589 index_t* temp_len = new index_t[size];
590 temp_n[0] = iptr;
591
592 // Second, receive ptr values from other ranks
593 for (index_t i=1; i<size; i++) {
594 const index_t remote_n = row_distribution->first_component[i+1] -
595 row_distribution->first_component[i];
596 MPI_Irecv(&ptr_global[iptr], remote_n, MPI_INT, i,
597 mpi_info->counter()+i, mpi_info->comm,
598 &mpi_requests[i]);
599 temp_n[i] = remote_n;
600 iptr += remote_n;
601 }
602 mpi_info->incCounter(size);
603 MPI_Waitall(size-1, &mpi_requests[1], &mpi_stati[0]);
604
605 // Then, prepare to receive idx and val from other ranks
606 index_t len = 0;
607 index_t offset = -1;
608 for (index_t i=0; i<size; i++) {
609 if (temp_n[i] > 0) {
610 offset += temp_n[i];
611 len += ptr_global[offset];
612 temp_len[i] = ptr_global[offset];
613 } else
614 temp_len[i] = 0;
615 }
616
617 index_t* idx_global = new index_t[len];
618 iptr = temp_len[0];
619 offset = n+1;
620 for (index_t i=1; i<size; i++) {
621 len = temp_len[i];
622 MPI_Irecv(&idx_global[iptr], len, MPI_INT, i,
623 mpi_info->counter()+i,
624 mpi_info->comm, &mpi_requests[i]);
625 const index_t remote_n = temp_n[i];
626 for (index_t j=0; j<remote_n; j++) {
627 ptr_global[j+offset] = ptr_global[j+offset] + iptr;
628 }
629 offset += remote_n;
630 iptr += len;
631 }
632 memcpy(idx_global, idx, temp_len[0]*sizeof(index_t));
633 delete[] idx;
634 MPI_Waitall(size-1, &mpi_requests[1], &mpi_stati[0]);
635 mpi_info->incCounter(size);
636 delete[] temp_n;
637
638 // Then generate the sparse matrix
639 const index_t rowBlockSize = mainBlock->row_block_size;
640 const index_t colBlockSize = mainBlock->col_block_size;
641 Pattern_ptr pat(new Pattern(mainBlock->pattern->type,
642 global_n, global_n, ptr_global, idx_global));
643 SparseMatrix_ptr out(new SparseMatrix(mainBlock->type, pat,
644 rowBlockSize, colBlockSize, false));
645
646 // Finally, receive and copy the values
647 iptr = temp_len[0] * block_size;
648 for (index_t i=1; i<size; i++) {
649 len = temp_len[i];
650 MPI_Irecv(&out->val[iptr], len * block_size, MPI_DOUBLE, i,
651 mpi_info->counter()+i, mpi_info->comm,
652 &mpi_requests[i]);
653 iptr += len*block_size;
654 }
655 memcpy(out->val, val, temp_len[0] * sizeof(double) * block_size);
656 delete[] val;
657 mpi_info->incCounter(size);
658 MPI_Waitall(size-1, &mpi_requests[1], &mpi_stati[0]);
659 delete[] temp_len;
660 return out;
661
662 } else { // it's not rank 0
663
664 // First, send out the local ptr
665 index_t tag = mpi_info->counter()+rank;
666 MPI_Issend(&ptr[1], n, MPI_INT, 0, tag, mpi_info->comm,
667 &mpi_requests[0]);
668
669 // Next, send out the local idx
670 index_t len = ptr[n];
671 tag += size;
672 MPI_Issend(idx, len, MPI_INT, 0, tag, mpi_info->comm,
673 &mpi_requests[1]);
674
675 // At last, send out the local val
676 len *= block_size;
677 tag += size;
678 MPI_Issend(val, len, MPI_DOUBLE, 0, tag, mpi_info->comm,
679 &mpi_requests[2]);
680
681 MPI_Waitall(3, &mpi_requests[0], &mpi_stati[0]);
682 mpi_info->setCounter(tag + size - rank);
683 delete[] ptr;
684 delete[] idx;
685 delete[] val;
686 } // rank
687 #endif
688
689 return SparseMatrix_ptr();
690 }
691
692 } // namespace paso
693

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision
svn:mergeinfo /branches/4.0fordebian/paso/src/SystemMatrix.cpp:5567-5588 /branches/amg_from_3530/paso/src/SystemMatrix.cpp:3531-3826 /branches/diaplayground/paso/src/SystemMatrix.cpp:4940-5147 /branches/lapack2681/paso/src/SystemMatrix.cpp:2682-2741 /branches/pasowrap/paso/src/SystemMatrix.cpp:3661-3674 /branches/py3_attempt2/paso/src/SystemMatrix.cpp:3871-3891 /branches/restext/paso/src/SystemMatrix.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/SystemMatrix.cpp:3669-3791 /branches/stage3.0/paso/src/SystemMatrix.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/SystemMatrix.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/SystemMatrix.cpp:3517-3974 /release/3.0/paso/src/SystemMatrix.cpp:2591-2601 /release/4.0/paso/src/SystemMatrix.cpp:5380-5406 /trunk/paso/src/SystemMatrix.cpp:4257-4344,5898-6007 /trunk/ripley/test/python/paso/src/SystemMatrix.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26