/[escript]/trunk/paso/src/MergedSolver.cpp
ViewVC logotype

Annotation of /trunk/paso/src/MergedSolver.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4829 - (hide annotations)
Thu Apr 3 04:02:53 2014 UTC (5 years, 6 months ago) by caltinay
File size: 10412 byte(s)
checkpointing some SparseMatrix cleanup.

1 gross 3887
2 jfenwick 3981 /*****************************************************************************
3 gross 3887 *
4 jfenwick 4657 * Copyright (c) 2003-2014 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 gross 3887 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 jfenwick 4657 * Development 2012-2013 by School of Earth Sciences
13     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 jfenwick 3981 *
15     *****************************************************************************/
16 gross 3887
17    
18 jfenwick 3981 /************************************************************************************/
19 gross 3887
20     /* Paso: AMG preconditioner (local version) */
21    
22 jfenwick 3981 /************************************************************************************/
23 gross 3887
24     /* Author: lgao@uq.edu.au, l.gross@uq.edu.au */
25    
26 jfenwick 3981 /************************************************************************************/
27 gross 3887
28    
29     #include "Paso.h"
30     #include "MergedSolver.h"
31     #include "Preconditioner.h"
32     #include "PasoUtil.h"
33     #include "UMFPACK.h"
34     #include "MKL.h"
35     #include<stdio.h>
36    
37     Paso_MergedSolver* Paso_MergedSolver_alloc(Paso_SystemMatrix *A, Paso_Options* options)
38     {
39     const index_t rank = A->mpi_info->rank;
40     const index_t size = A->mpi_info->size;
41     const dim_t global_n = Paso_SystemMatrix_getGlobalNumRows(A);
42     const dim_t n_block = A->mainBlock->row_block_size;
43     const dim_t* dist = A->pattern->input_distribution->first_component;
44     dim_t i;
45    
46     Paso_MergedSolver* out = NULL;
47 caltinay 4829 paso::SparseMatrix_ptr A_temp(Paso_MergedSolver_mergeSystemMatrix(A));
48 jfenwick 4280 out = new Paso_MergedSolver;
49 gross 3887 if (Esys_noError()) {
50     /* merge matrix */
51     out->mpi_info=Esys_MPIInfo_getReference(A->mpi_info);
52     out->reordering = options->reordering;
53     out->refinements = options->coarse_matrix_refinements;
54     /*out->verbose = options->verbose; */
55     out->verbose = FALSE;
56     out->sweeps = options->pre_sweeps+options->post_sweeps;
57    
58     /* First, gather x and b into rank 0 */
59 jfenwick 4280 out->b = new double[global_n*n_block];
60     out->x = new double[global_n*n_block];
61     out->counts = new index_t[size];
62     out->offset = new index_t[size];
63 gross 3887
64 caltinay 4810 #pragma omp parallel for private(i)
65     for (i=0; i<size; i++) {
66     const dim_t p = dist[i];
67     out->counts[i] = (dist[i+1] - p)*n_block;
68     out->offset[i] = p*n_block;
69     }
70 gross 3887
71 caltinay 4810 if (rank == 0) {
72     /* solve locally */
73 caltinay 4829 #ifdef MKL
74     out->A = A_temp->unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1);
75 caltinay 4810 out->A->solver_package = PASO_MKL;
76 caltinay 4829 #elif defined UMFPACK
77     out->A = A_temp->unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC);
78     out->A->solver_package = PASO_UMFPACK;
79     #else
80     out->A->solver_p = Paso_Preconditioner_LocalSmoother_alloc(out->A, (options->smoother == PASO_JACOBI), out->verbose);
81     out->A->solver_package = PASO_SMOOTHER;
82     #endif
83 gross 3887 }
84     }
85    
86     if ( Esys_noError()) {
87     return out;
88     } else {
89     Paso_MergedSolver_free(out);
90     return NULL;
91     }
92     }
93    
94 caltinay 4829 void Paso_MergedSolver_free(Paso_MergedSolver* in)
95     {
96     if (in!=NULL) {
97     delete[] in->x;
98     delete[] in->b;
99     delete[] in->counts;
100     delete[] in->offset;
101     delete in;
102     }
103 gross 3887 }
104    
105     /* Merge the system matrix which is distributed on ranks into a complete
106     matrix on rank 0, then solve this matrix on rank 0 only */
107 caltinay 4829 paso::SparseMatrix_ptr Paso_MergedSolver_mergeSystemMatrix(Paso_SystemMatrix* A)
108     {
109 gross 3887 index_t i, iptr, j, n, remote_n, global_n, len, offset, tag;
110     index_t row_block_size, col_block_size, block_size;
111     index_t size=A->mpi_info->size;
112     index_t rank=A->mpi_info->rank;
113     index_t *ptr=NULL, *idx=NULL, *ptr_global=NULL, *idx_global=NULL;
114     index_t *temp_n=NULL, *temp_len=NULL;
115     double *val=NULL;
116 caltinay 4829 paso::SparseMatrix_ptr out;
117 gross 3887 #ifdef ESYS_MPI
118     MPI_Request* mpi_requests=NULL;
119     MPI_Status* mpi_stati=NULL;
120     #else
121     int *mpi_requests=NULL, *mpi_stati=NULL;
122     #endif
123    
124     if (size == 1) {
125     n = A->mainBlock->numRows;
126 jfenwick 4280 ptr = new index_t[n];
127 gross 3887 #pragma omp parallel for private(i)
128     for (i=0; i<n; i++) ptr[i] = i;
129 caltinay 4829 out = A->mainBlock->getSubmatrix(n, n, ptr, ptr);
130 jfenwick 4280 delete[] ptr;
131 gross 3887 return out;
132     }
133    
134     n = A->mainBlock->numRows;
135     block_size = A->block_size;
136    
137     /* Merge MainBlock and CoupleBlock to get a complete column entries
138     for each row allocated to current rank. Output (ptr, idx, val)
139     contains all info needed from current rank to merge a system
140     matrix */
141     Paso_SystemMatrix_mergeMainAndCouple(A, &ptr, &idx, &val);
142    
143     #ifdef ESYS_MPI
144 jfenwick 4280 mpi_requests=new MPI_Request[size*2];
145     mpi_stati=new MPI_Status[size*2];
146 gross 3887 #else
147 jfenwick 4280 mpi_requests=new int[size*2];
148     mpi_stati=new int[size*2];
149 gross 3887 #endif
150    
151     /* Now, pass all info to rank 0 and merge them into one sparse
152     matrix */
153     if (rank == 0) {
154     /* First, copy local ptr values into ptr_global */
155     global_n=Paso_SystemMatrix_getGlobalNumRows(A);
156 jfenwick 4280 ptr_global = new index_t[global_n+1];
157 gross 3887 memcpy(ptr_global, ptr, (n+1) * sizeof(index_t));
158     iptr = n+1;
159 jfenwick 4280 delete[] ptr;
160     temp_n = new index_t[size];
161     temp_len = new index_t[size];
162 gross 3887 temp_n[0] = iptr;
163    
164     /* Second, receive ptr values from other ranks */
165     for (i=1; i<size; i++) {
166     remote_n = A->row_distribution->first_component[i+1] -
167 caltinay 4829 A->row_distribution->first_component[i];
168 gross 3887 #ifdef ESYS_MPI
169     MPI_Irecv(&(ptr_global[iptr]), remote_n, MPI_INT, i,
170 caltinay 4829 A->mpi_info->msg_tag_counter+i,
171     A->mpi_info->comm,
172     &mpi_requests[i]);
173 gross 3887 #endif
174     temp_n[i] = remote_n;
175     iptr += remote_n;
176     }
177     #ifdef ESYS_MPI
178     MPI_Waitall(size-1, &(mpi_requests[1]), mpi_stati);
179     #endif
180 jfenwick 4505 ESYS_MPI_INC_COUNTER(*(A->mpi_info), size);
181 gross 3887
182     /* Then, prepare to receive idx and val from other ranks */
183     len = 0;
184     offset = -1;
185     for (i=0; i<size; i++) {
186     if (temp_n[i] > 0) {
187 caltinay 4829 offset += temp_n[i];
188     len += ptr_global[offset];
189     temp_len[i] = ptr_global[offset];
190 gross 3887 }else
191 caltinay 4829 temp_len[i] = 0;
192 gross 3887 }
193    
194 jfenwick 4280 idx_global = new index_t[len];
195 gross 3887 iptr = temp_len[0];
196     offset = n+1;
197     for (i=1; i<size; i++) {
198     len = temp_len[i];
199     #ifdef ESYS_MPI
200     MPI_Irecv(&(idx_global[iptr]), len, MPI_INT, i,
201 caltinay 4829 A->mpi_info->msg_tag_counter+i,
202     A->mpi_info->comm,
203     &mpi_requests[i]);
204 gross 3887 #endif
205     remote_n = temp_n[i];
206     for (j=0; j<remote_n; j++) {
207 caltinay 4829 ptr_global[j+offset] = ptr_global[j+offset] + iptr;
208 gross 3887 }
209     offset += remote_n;
210     iptr += len;
211     }
212     memcpy(idx_global, idx, temp_len[0] * sizeof(index_t));
213 jfenwick 4280 delete[] idx;
214 gross 3887 row_block_size = A->mainBlock->row_block_size;
215     col_block_size = A->mainBlock->col_block_size;
216     #ifdef ESYS_MPI
217     MPI_Waitall(size-1, &(mpi_requests[1]), mpi_stati);
218     #endif
219 jfenwick 4505 ESYS_MPI_INC_COUNTER(*(A->mpi_info), size);
220 jfenwick 4280 delete[] temp_n;
221 gross 3887
222     /* Then generate the sparse matrix */
223 caltinay 4819 paso::Pattern_ptr pattern(new paso::Pattern(A->mainBlock->pattern->type,
224     global_n, global_n, ptr_global, idx_global));
225 caltinay 4829 out.reset(new paso::SparseMatrix(A->mainBlock->type, pattern,
226     row_block_size, col_block_size, false));
227 gross 3887
228     /* Finally, receive and copy the value */
229     iptr = temp_len[0] * block_size;
230     for (i=1; i<size; i++) {
231     len = temp_len[i];
232     #ifdef ESYS_MPI
233     MPI_Irecv(&(out->val[iptr]), len * block_size, MPI_DOUBLE, i,
234     A->mpi_info->msg_tag_counter+i,
235     A->mpi_info->comm,
236     &mpi_requests[i]);
237     #endif
238     iptr += (len * block_size);
239     }
240     memcpy(out->val, val, temp_len[0] * sizeof(double) * block_size);
241 jfenwick 4280 delete[] val;
242 gross 3887 #ifdef ESYS_MPI
243     MPI_Waitall(size-1, &(mpi_requests[1]), mpi_stati);
244     #endif
245 jfenwick 4505 ESYS_MPI_INC_COUNTER(*(A->mpi_info), size);
246 jfenwick 4280 delete[] temp_len;
247 gross 3887 } else { /* it's not rank 0 */
248    
249     /* First, send out the local ptr */
250     tag = A->mpi_info->msg_tag_counter+rank;
251     #ifdef ESYS_MPI
252     MPI_Issend(&(ptr[1]), n, MPI_INT, 0, tag, A->mpi_info->comm,
253 caltinay 4829 &mpi_requests[0]);
254 gross 3887 #endif
255    
256     /* Next, send out the local idx */
257     len = ptr[n];
258     tag += size;
259     #ifdef ESYS_MPI
260     MPI_Issend(idx, len, MPI_INT, 0, tag, A->mpi_info->comm,
261 caltinay 4829 &mpi_requests[1]);
262 gross 3887 #endif
263    
264     /* At last, send out the local val */
265     len *= block_size;
266     tag += size;
267     #ifdef ESYS_MPI
268     MPI_Issend(val, len, MPI_DOUBLE, 0, tag, A->mpi_info->comm,
269 caltinay 4829 &mpi_requests[2]);
270 gross 3887
271     MPI_Waitall(3, mpi_requests, mpi_stati);
272     #endif
273 jfenwick 4505 ESYS_MPI_SET_COUNTER(*(A->mpi_info), tag + size - rank)
274 jfenwick 4280 delete[] ptr;
275     delete[] idx;
276     delete[] val;
277 caltinay 4829 out.reset();
278 gross 3887 }
279    
280 jfenwick 4280 delete[] mpi_requests;
281     delete[] mpi_stati;
282 gross 3887 return out;
283     }
284    
285    
286     void Paso_MergedSolver_solve(Paso_MergedSolver* ms, double* local_x, double* local_b) {
287    
288     const index_t rank = ms->mpi_info->rank;
289     const dim_t count = ms->counts[rank];
290    
291    
292     #ifdef ESYS_MPI
293     {
294     MPI_Gatherv(local_b, count, MPI_DOUBLE, ms->b, ms->counts, ms->offset, MPI_DOUBLE, 0, ms->mpi_info->comm);
295     }
296     #else
297     {
298     dim_t i;
299     #pragma omp parallel for private(i)
300     for (i=0; i<count; i++) {
301     ms->b[i]=local_b[i];
302     ms->x[i]=local_x[i];
303     }
304     }
305     #endif
306     if (rank == 0) {
307     switch (ms->A->solver_package) {
308     case (PASO_MKL):
309     Paso_MKL(ms->A, ms->x,ms->b, ms->reordering, ms->refinements, ms->verbose);
310     break;
311     case (PASO_UMFPACK):
312     Paso_UMFPACK(ms->A, ms->x,ms->b, ms->refinements, ms->verbose);
313     break;
314     case (PASO_SMOOTHER):
315 jfenwick 4261 Paso_Preconditioner_LocalSmoother_solve(ms->A, reinterpret_cast<Paso_Preconditioner_LocalSmoother*>(ms->A->solver_p),ms->x,ms->b,ms->sweeps, FALSE);
316 gross 3887 break;
317     }
318     }
319     #ifdef ESYS_MPI
320     /* now we need to distribute the solution to all ranks */
321     {
322     MPI_Scatterv(ms->x, ms->counts, ms->offset, MPI_DOUBLE, local_x, count, MPI_DOUBLE, 0, ms->mpi_info->comm);
323     }
324     #else
325     {
326     dim_t i;
327     #pragma omp parallel for private(i)
328     for (i=0; i<count; i++) local_x[i]=ms->x[i];
329     }
330     #endif
331     }

Properties

Name Value
svn:mergeinfo /branches/amg_from_3530/paso/src/MergedSolver.cpp:3531-3826 /branches/lapack2681/paso/src/MergedSolver.cpp:2682-2741 /branches/pasowrap/paso/src/MergedSolver.cpp:3661-3674 /branches/py3_attempt2/paso/src/MergedSolver.cpp:3871-3891 /branches/restext/paso/src/MergedSolver.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/MergedSolver.cpp:3669-3791 /branches/stage3.0/paso/src/MergedSolver.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/MergedSolver.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/MergedSolver.cpp:3517-3974 /release/3.0/paso/src/MergedSolver.cpp:2591-2601 /trunk/paso/src/MergedSolver.cpp:4257-4344 /trunk/ripley/test/python/paso/src/MergedSolver.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26