/[escript]/trunk/paso/src/MergedSolver.cpp
ViewVC logotype

Contents of /trunk/paso/src/MergedSolver.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4829 - (show annotations)
Thu Apr 3 04:02:53 2014 UTC (5 years, 6 months ago) by caltinay
File size: 10412 byte(s)
checkpointing some SparseMatrix cleanup.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17
18 /************************************************************************************/
19
20 /* Paso: AMG preconditioner (local version) */
21
22 /************************************************************************************/
23
24 /* Author: lgao@uq.edu.au, l.gross@uq.edu.au */
25
26 /************************************************************************************/
27
28
29 #include "Paso.h"
30 #include "MergedSolver.h"
31 #include "Preconditioner.h"
32 #include "PasoUtil.h"
33 #include "UMFPACK.h"
34 #include "MKL.h"
35 #include<stdio.h>
36
37 Paso_MergedSolver* Paso_MergedSolver_alloc(Paso_SystemMatrix *A, Paso_Options* options)
38 {
39 const index_t rank = A->mpi_info->rank;
40 const index_t size = A->mpi_info->size;
41 const dim_t global_n = Paso_SystemMatrix_getGlobalNumRows(A);
42 const dim_t n_block = A->mainBlock->row_block_size;
43 const dim_t* dist = A->pattern->input_distribution->first_component;
44 dim_t i;
45
46 Paso_MergedSolver* out = NULL;
47 paso::SparseMatrix_ptr A_temp(Paso_MergedSolver_mergeSystemMatrix(A));
48 out = new Paso_MergedSolver;
49 if (Esys_noError()) {
50 /* merge matrix */
51 out->mpi_info=Esys_MPIInfo_getReference(A->mpi_info);
52 out->reordering = options->reordering;
53 out->refinements = options->coarse_matrix_refinements;
54 /*out->verbose = options->verbose; */
55 out->verbose = FALSE;
56 out->sweeps = options->pre_sweeps+options->post_sweeps;
57
58 /* First, gather x and b into rank 0 */
59 out->b = new double[global_n*n_block];
60 out->x = new double[global_n*n_block];
61 out->counts = new index_t[size];
62 out->offset = new index_t[size];
63
64 #pragma omp parallel for private(i)
65 for (i=0; i<size; i++) {
66 const dim_t p = dist[i];
67 out->counts[i] = (dist[i+1] - p)*n_block;
68 out->offset[i] = p*n_block;
69 }
70
71 if (rank == 0) {
72 /* solve locally */
73 #ifdef MKL
74 out->A = A_temp->unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1);
75 out->A->solver_package = PASO_MKL;
76 #elif defined UMFPACK
77 out->A = A_temp->unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC);
78 out->A->solver_package = PASO_UMFPACK;
79 #else
80 out->A->solver_p = Paso_Preconditioner_LocalSmoother_alloc(out->A, (options->smoother == PASO_JACOBI), out->verbose);
81 out->A->solver_package = PASO_SMOOTHER;
82 #endif
83 }
84 }
85
86 if ( Esys_noError()) {
87 return out;
88 } else {
89 Paso_MergedSolver_free(out);
90 return NULL;
91 }
92 }
93
94 void Paso_MergedSolver_free(Paso_MergedSolver* in)
95 {
96 if (in!=NULL) {
97 delete[] in->x;
98 delete[] in->b;
99 delete[] in->counts;
100 delete[] in->offset;
101 delete in;
102 }
103 }
104
105 /* Merge the system matrix which is distributed on ranks into a complete
106 matrix on rank 0, then solve this matrix on rank 0 only */
107 paso::SparseMatrix_ptr Paso_MergedSolver_mergeSystemMatrix(Paso_SystemMatrix* A)
108 {
109 index_t i, iptr, j, n, remote_n, global_n, len, offset, tag;
110 index_t row_block_size, col_block_size, block_size;
111 index_t size=A->mpi_info->size;
112 index_t rank=A->mpi_info->rank;
113 index_t *ptr=NULL, *idx=NULL, *ptr_global=NULL, *idx_global=NULL;
114 index_t *temp_n=NULL, *temp_len=NULL;
115 double *val=NULL;
116 paso::SparseMatrix_ptr out;
117 #ifdef ESYS_MPI
118 MPI_Request* mpi_requests=NULL;
119 MPI_Status* mpi_stati=NULL;
120 #else
121 int *mpi_requests=NULL, *mpi_stati=NULL;
122 #endif
123
124 if (size == 1) {
125 n = A->mainBlock->numRows;
126 ptr = new index_t[n];
127 #pragma omp parallel for private(i)
128 for (i=0; i<n; i++) ptr[i] = i;
129 out = A->mainBlock->getSubmatrix(n, n, ptr, ptr);
130 delete[] ptr;
131 return out;
132 }
133
134 n = A->mainBlock->numRows;
135 block_size = A->block_size;
136
137 /* Merge MainBlock and CoupleBlock to get a complete column entries
138 for each row allocated to current rank. Output (ptr, idx, val)
139 contains all info needed from current rank to merge a system
140 matrix */
141 Paso_SystemMatrix_mergeMainAndCouple(A, &ptr, &idx, &val);
142
143 #ifdef ESYS_MPI
144 mpi_requests=new MPI_Request[size*2];
145 mpi_stati=new MPI_Status[size*2];
146 #else
147 mpi_requests=new int[size*2];
148 mpi_stati=new int[size*2];
149 #endif
150
151 /* Now, pass all info to rank 0 and merge them into one sparse
152 matrix */
153 if (rank == 0) {
154 /* First, copy local ptr values into ptr_global */
155 global_n=Paso_SystemMatrix_getGlobalNumRows(A);
156 ptr_global = new index_t[global_n+1];
157 memcpy(ptr_global, ptr, (n+1) * sizeof(index_t));
158 iptr = n+1;
159 delete[] ptr;
160 temp_n = new index_t[size];
161 temp_len = new index_t[size];
162 temp_n[0] = iptr;
163
164 /* Second, receive ptr values from other ranks */
165 for (i=1; i<size; i++) {
166 remote_n = A->row_distribution->first_component[i+1] -
167 A->row_distribution->first_component[i];
168 #ifdef ESYS_MPI
169 MPI_Irecv(&(ptr_global[iptr]), remote_n, MPI_INT, i,
170 A->mpi_info->msg_tag_counter+i,
171 A->mpi_info->comm,
172 &mpi_requests[i]);
173 #endif
174 temp_n[i] = remote_n;
175 iptr += remote_n;
176 }
177 #ifdef ESYS_MPI
178 MPI_Waitall(size-1, &(mpi_requests[1]), mpi_stati);
179 #endif
180 ESYS_MPI_INC_COUNTER(*(A->mpi_info), size);
181
182 /* Then, prepare to receive idx and val from other ranks */
183 len = 0;
184 offset = -1;
185 for (i=0; i<size; i++) {
186 if (temp_n[i] > 0) {
187 offset += temp_n[i];
188 len += ptr_global[offset];
189 temp_len[i] = ptr_global[offset];
190 }else
191 temp_len[i] = 0;
192 }
193
194 idx_global = new index_t[len];
195 iptr = temp_len[0];
196 offset = n+1;
197 for (i=1; i<size; i++) {
198 len = temp_len[i];
199 #ifdef ESYS_MPI
200 MPI_Irecv(&(idx_global[iptr]), len, MPI_INT, i,
201 A->mpi_info->msg_tag_counter+i,
202 A->mpi_info->comm,
203 &mpi_requests[i]);
204 #endif
205 remote_n = temp_n[i];
206 for (j=0; j<remote_n; j++) {
207 ptr_global[j+offset] = ptr_global[j+offset] + iptr;
208 }
209 offset += remote_n;
210 iptr += len;
211 }
212 memcpy(idx_global, idx, temp_len[0] * sizeof(index_t));
213 delete[] idx;
214 row_block_size = A->mainBlock->row_block_size;
215 col_block_size = A->mainBlock->col_block_size;
216 #ifdef ESYS_MPI
217 MPI_Waitall(size-1, &(mpi_requests[1]), mpi_stati);
218 #endif
219 ESYS_MPI_INC_COUNTER(*(A->mpi_info), size);
220 delete[] temp_n;
221
222 /* Then generate the sparse matrix */
223 paso::Pattern_ptr pattern(new paso::Pattern(A->mainBlock->pattern->type,
224 global_n, global_n, ptr_global, idx_global));
225 out.reset(new paso::SparseMatrix(A->mainBlock->type, pattern,
226 row_block_size, col_block_size, false));
227
228 /* Finally, receive and copy the value */
229 iptr = temp_len[0] * block_size;
230 for (i=1; i<size; i++) {
231 len = temp_len[i];
232 #ifdef ESYS_MPI
233 MPI_Irecv(&(out->val[iptr]), len * block_size, MPI_DOUBLE, i,
234 A->mpi_info->msg_tag_counter+i,
235 A->mpi_info->comm,
236 &mpi_requests[i]);
237 #endif
238 iptr += (len * block_size);
239 }
240 memcpy(out->val, val, temp_len[0] * sizeof(double) * block_size);
241 delete[] val;
242 #ifdef ESYS_MPI
243 MPI_Waitall(size-1, &(mpi_requests[1]), mpi_stati);
244 #endif
245 ESYS_MPI_INC_COUNTER(*(A->mpi_info), size);
246 delete[] temp_len;
247 } else { /* it's not rank 0 */
248
249 /* First, send out the local ptr */
250 tag = A->mpi_info->msg_tag_counter+rank;
251 #ifdef ESYS_MPI
252 MPI_Issend(&(ptr[1]), n, MPI_INT, 0, tag, A->mpi_info->comm,
253 &mpi_requests[0]);
254 #endif
255
256 /* Next, send out the local idx */
257 len = ptr[n];
258 tag += size;
259 #ifdef ESYS_MPI
260 MPI_Issend(idx, len, MPI_INT, 0, tag, A->mpi_info->comm,
261 &mpi_requests[1]);
262 #endif
263
264 /* At last, send out the local val */
265 len *= block_size;
266 tag += size;
267 #ifdef ESYS_MPI
268 MPI_Issend(val, len, MPI_DOUBLE, 0, tag, A->mpi_info->comm,
269 &mpi_requests[2]);
270
271 MPI_Waitall(3, mpi_requests, mpi_stati);
272 #endif
273 ESYS_MPI_SET_COUNTER(*(A->mpi_info), tag + size - rank)
274 delete[] ptr;
275 delete[] idx;
276 delete[] val;
277 out.reset();
278 }
279
280 delete[] mpi_requests;
281 delete[] mpi_stati;
282 return out;
283 }
284
285
286 void Paso_MergedSolver_solve(Paso_MergedSolver* ms, double* local_x, double* local_b) {
287
288 const index_t rank = ms->mpi_info->rank;
289 const dim_t count = ms->counts[rank];
290
291
292 #ifdef ESYS_MPI
293 {
294 MPI_Gatherv(local_b, count, MPI_DOUBLE, ms->b, ms->counts, ms->offset, MPI_DOUBLE, 0, ms->mpi_info->comm);
295 }
296 #else
297 {
298 dim_t i;
299 #pragma omp parallel for private(i)
300 for (i=0; i<count; i++) {
301 ms->b[i]=local_b[i];
302 ms->x[i]=local_x[i];
303 }
304 }
305 #endif
306 if (rank == 0) {
307 switch (ms->A->solver_package) {
308 case (PASO_MKL):
309 Paso_MKL(ms->A, ms->x,ms->b, ms->reordering, ms->refinements, ms->verbose);
310 break;
311 case (PASO_UMFPACK):
312 Paso_UMFPACK(ms->A, ms->x,ms->b, ms->refinements, ms->verbose);
313 break;
314 case (PASO_SMOOTHER):
315 Paso_Preconditioner_LocalSmoother_solve(ms->A, reinterpret_cast<Paso_Preconditioner_LocalSmoother*>(ms->A->solver_p),ms->x,ms->b,ms->sweeps, FALSE);
316 break;
317 }
318 }
319 #ifdef ESYS_MPI
320 /* now we need to distribute the solution to all ranks */
321 {
322 MPI_Scatterv(ms->x, ms->counts, ms->offset, MPI_DOUBLE, local_x, count, MPI_DOUBLE, 0, ms->mpi_info->comm);
323 }
324 #else
325 {
326 dim_t i;
327 #pragma omp parallel for private(i)
328 for (i=0; i<count; i++) local_x[i]=ms->x[i];
329 }
330 #endif
331 }

Properties

Name Value
svn:mergeinfo /branches/amg_from_3530/paso/src/MergedSolver.cpp:3531-3826 /branches/lapack2681/paso/src/MergedSolver.cpp:2682-2741 /branches/pasowrap/paso/src/MergedSolver.cpp:3661-3674 /branches/py3_attempt2/paso/src/MergedSolver.cpp:3871-3891 /branches/restext/paso/src/MergedSolver.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/MergedSolver.cpp:3669-3791 /branches/stage3.0/paso/src/MergedSolver.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/MergedSolver.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/MergedSolver.cpp:3517-3974 /release/3.0/paso/src/MergedSolver.cpp:2591-2601 /trunk/paso/src/MergedSolver.cpp:4257-4344 /trunk/ripley/test/python/paso/src/MergedSolver.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26