/[escript]/trunk/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp
ViewVC logotype

Contents of /trunk/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4803 - (show annotations)
Wed Mar 26 06:52:28 2014 UTC (5 years, 10 months ago) by caltinay
File size: 10739 byte(s)
Removed obsolete wrappers for malloc and friends.
Paso_Pattern -> paso::Pattern

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17
18 /**********************************************************************************************/
19 /* Paso: SystemMatrix */
20 /* */
21 /* Copy mainBlock and col_coupleBlock in other ranks */
22 /* into remote_coupleBlock */
23 /* */
24 /* WARNING: function uses mpi_requests of the coupler attached to A. */
25 /* */
26 /**********************************************************************************************/
27
28 /* Copyrights by ACcESS Australia 2003 */
29 /* Author: Lin Gao, l.gao@uq.edu.au */
30
31 /**********************************************************************************/
32
33 #include "Paso.h"
34 #include "SystemMatrix.h"
35
36 #ifdef _OPENMP
37 #include <omp.h>
38 #endif
39
40 void Paso_SystemMatrix_copyRemoteCoupleBlock(Paso_SystemMatrix* A, const bool recreatePattern){
41 paso::Pattern *pattern=NULL;
42 paso::Coupler *coupler=NULL;
43 Paso_SharedComponents *send=NULL, *recv=NULL;
44 double *cols=NULL, *send_buf=NULL;
45 index_t *global_id=NULL, *cols_array=NULL, *ptr_ptr=NULL, *ptr_idx=NULL;
46 index_t *send_idx=NULL, *send_offset=NULL, *recv_buf=NULL, *recv_offset=NULL;
47 index_t i, j, k, l, m, n, p, q, j_ub, k_lb, k_ub, l_lb, l_ub, i0;
48 index_t offset, len, overlapped_n, base, block_size, block_size_size;
49 index_t num_main_cols, num_couple_cols, num_neighbors, row, neighbor;
50 index_t *recv_degree=NULL, *send_degree=NULL;
51 dim_t rank=A->mpi_info->rank, mpi_size=A->mpi_info->size;
52
53 if (mpi_size == 1) return;
54
55 if (recreatePattern) paso::SparseMatrix_free(A->remote_coupleBlock);
56
57 if (A->remote_coupleBlock) return;
58
59 /* sending/receiving unknown's global ID */
60 num_main_cols = A->mainBlock->numCols;
61 cols = new double[num_main_cols];
62 offset = A->col_distribution->first_component[rank];
63 #pragma omp parallel for private(i) schedule(static)
64 for (i=0; i<num_main_cols; ++i) cols[i] = offset + i;
65 if (A->global_id == NULL) {
66 coupler=paso::Coupler_alloc(A->col_coupler->connector, 1);
67 paso::Coupler_startCollect(coupler, cols);
68 }
69
70 recv_buf = new index_t[mpi_size];
71 recv_degree = new index_t[mpi_size];
72 recv_offset = new index_t[mpi_size+1];
73 #pragma omp parallel for private(i) schedule(static)
74 for (i=0; i<mpi_size; i++){
75 recv_buf[i] = 0;
76 recv_degree[i] = 1;
77 recv_offset[i] = i;
78 }
79
80 num_couple_cols = A->col_coupleBlock->numCols;
81 overlapped_n = A->row_coupleBlock->numRows;
82 send = A->row_coupler->connector->send;
83 recv = A->row_coupler->connector->recv;
84 num_neighbors = send->numNeighbors;
85 block_size = A->block_size;
86 block_size_size = block_size * sizeof(double);
87
88 /* waiting for receiving unknown's global ID */
89 if (A->global_id == NULL) {
90 paso::Coupler_finishCollect(coupler);
91 global_id = new index_t[num_couple_cols+1];
92 #pragma omp parallel for private(i) schedule(static)
93 for (i=0; i<num_couple_cols; ++i)
94 global_id[i] = coupler->recv_buffer[i];
95 A->global_id = global_id;
96 paso::Coupler_free(coupler);
97 } else
98 global_id = A->global_id;
99
100 /* distribute the number of cols in current col_coupleBlock to all ranks */
101 #ifdef ESYS_MPI
102 MPI_Allgatherv(&num_couple_cols, 1, MPI_INT, recv_buf, recv_degree, recv_offset, MPI_INT, A->mpi_info->comm);
103 #endif
104
105 /* distribute global_ids of cols to be considered to all ranks*/
106 len = 0;
107 for (i=0; i<mpi_size; i++){
108 recv_degree[i] = recv_buf[i];
109 recv_offset[i] = len;
110 len += recv_buf[i];
111 }
112 recv_offset[mpi_size] = len;
113 cols_array = new index_t[len];
114 if (Esys_checkPtr(cols_array)) fprintf(stderr, "rank %d MALLOC has trouble\n", rank);
115
116 #ifdef ESYS_MPI
117 MPI_Allgatherv(global_id, num_couple_cols, MPI_INT, cols_array, recv_degree, recv_offset, MPI_INT, A->mpi_info->comm);
118 #endif
119
120 /* first, prepare the ptr_ptr to be received */
121 ptr_ptr = new index_t[overlapped_n+1];
122 for (p=0; p<recv->numNeighbors; p++) {
123 row = recv->offsetInShared[p];
124 i = recv->offsetInShared[p+1];
125 #ifdef ESYS_MPI
126 MPI_Irecv(&(ptr_ptr[row]), i-row, MPI_INT, recv->neighbor[p],
127 A->mpi_info->msg_tag_counter+recv->neighbor[p],
128 A->mpi_info->comm,
129 &(A->row_coupler->mpi_requests[p]));
130 #endif
131 }
132
133 /* now prepare the rows to be sent (the degree, the offset and the data) */
134 p = send->offsetInShared[num_neighbors];
135 len = 0;
136 for (i=0; i<num_neighbors; i++) {
137 /* #cols per row X #rows */
138 len += recv_buf[send->neighbor[i]] *
139 (send->offsetInShared[i+1] - send->offsetInShared[i]);
140 }
141 send_buf = new double[len*block_size];
142 send_idx = new index_t[len];
143 send_offset = new index_t[p+1];
144 send_degree = new index_t[num_neighbors];
145
146 len = 0;
147 base = 0;
148 i0 = 0;
149 for (p=0; p<num_neighbors; p++) {
150 i = i0;
151 neighbor = send->neighbor[p];
152 l_ub = recv_offset[neighbor+1];
153 l_lb = recv_offset[neighbor];
154 j_ub = send->offsetInShared[p + 1];
155 for (j=send->offsetInShared[p]; j<j_ub; j++) {
156 row = send->shared[j];
157
158 /* check col_coupleBlock for data to be passed @row */
159 l = l_lb;
160 k_ub = A->col_coupleBlock->pattern->ptr[row+1];
161 k = A->col_coupleBlock->pattern->ptr[row];
162 q = A->mainBlock->pattern->index[A->mainBlock->pattern->ptr[row]] + offset;
163 while (k<k_ub && l<l_ub) {
164 m = global_id[A->col_coupleBlock->pattern->index[k]];
165 if (m > q) break;
166 n = cols_array[l];
167 if (m == n) {
168 send_idx[len] = l - l_lb;
169 memcpy(&(send_buf[len*block_size]), &(A->col_coupleBlock->val[block_size*k]), block_size_size);
170 len++;
171 l++;
172 k++;
173 } else if (m < n) {
174 k++;
175 } else {
176 l++;
177 }
178 }
179 k_lb = k;
180
181 /* check mainBlock for data to be passed @row */
182 k_ub = A->mainBlock->pattern->ptr[row+1];
183 k=A->mainBlock->pattern->ptr[row];
184 while (k<k_ub && l<l_ub) {
185 m = A->mainBlock->pattern->index[k] + offset;
186 n = cols_array[l];
187 if (m == n) {
188 send_idx[len] = l - l_lb;
189 memcpy(&(send_buf[len*block_size]), &(A->mainBlock->val[block_size*k]), block_size_size);
190 len++;
191 l++;
192 k++;
193 } else if (m < n) {
194 k++;
195 } else {
196 l++;
197 }
198 }
199
200 /* check col_coupleBlock for data to be passed @row */
201 k_ub = A->col_coupleBlock->pattern->ptr[row+1];
202 k=k_lb;
203 while (k<k_ub && l<l_ub) {
204 m = global_id[A->col_coupleBlock->pattern->index[k]];
205 n = cols_array[l];
206 if (m == n) {
207 send_idx[len] = l - l_lb;
208 memcpy(&(send_buf[len*block_size]), &(A->col_coupleBlock->val[block_size*k]), block_size_size);
209 len++;
210 l++;
211 k++;
212 } else if (m < n) {
213 k++;
214 } else {
215 l++;
216 }
217 }
218
219 send_offset[i] = len - base;
220 base = len;
221 i++;
222 }
223
224 /* sending */
225 #ifdef ESYS_MPI
226 MPI_Issend(&(send_offset[i0]), i-i0, MPI_INT, send->neighbor[p],
227 A->mpi_info->msg_tag_counter+rank,
228 A->mpi_info->comm,
229 &(A->row_coupler->mpi_requests[p+recv->numNeighbors]));
230 #endif
231 send_degree[p] = len;
232 i0 = i;
233 }
234
235 #ifdef ESYS_MPI
236 MPI_Waitall(A->row_coupler->connector->send->numNeighbors+A->row_coupler->connector->recv->numNeighbors,
237 A->row_coupler->mpi_requests,
238 A->row_coupler->mpi_stati);
239 #endif
240 ESYS_MPI_INC_COUNTER(*(A->mpi_info), mpi_size)
241
242 len = 0;
243 for (i=0; i<overlapped_n; i++) {
244 p = ptr_ptr[i];
245 ptr_ptr[i] = len;
246 len += p;
247 }
248 ptr_ptr[overlapped_n] = len;
249 ptr_idx = new index_t[len];
250
251 /* send/receive index array */
252 j=0;
253 for (p=0; p<recv->numNeighbors; p++) {
254 i = ptr_ptr[recv->offsetInShared[p+1]] - ptr_ptr[recv->offsetInShared[p]];
255 #ifdef ESYS_MPI
256 if (i > 0)
257 MPI_Irecv(&(ptr_idx[j]), i, MPI_INT, recv->neighbor[p],
258 A->mpi_info->msg_tag_counter+recv->neighbor[p],
259 A->mpi_info->comm,
260 &(A->row_coupler->mpi_requests[p]));
261 #endif
262 j += i;
263 }
264
265 j=0;
266 for (p=0; p<num_neighbors; p++) {
267 i = send_degree[p] - j;
268 #ifdef ESYS_MPI
269 if (i > 0)
270 MPI_Issend(&(send_idx[j]), i, MPI_INT, send->neighbor[p],
271 A->mpi_info->msg_tag_counter+rank,
272 A->mpi_info->comm,
273 &(A->row_coupler->mpi_requests[p+recv->numNeighbors]));
274 #endif
275 j = send_degree[p];
276 }
277
278 #ifdef ESYS_MPI
279 MPI_Waitall(A->row_coupler->connector->send->numNeighbors+A->row_coupler->connector->recv->numNeighbors,
280 A->row_coupler->mpi_requests,
281 A->row_coupler->mpi_stati);
282 #endif
283 ESYS_MPI_INC_COUNTER(*(A->mpi_info), mpi_size)
284
285 /* allocate pattern and sparsematrix for remote_coupleBlock */
286 pattern = paso::Pattern_alloc(A->row_coupleBlock->pattern->type,
287 overlapped_n, num_couple_cols, ptr_ptr, ptr_idx);
288 A->remote_coupleBlock = paso::SparseMatrix_alloc(A->row_coupleBlock->type,
289 pattern, A->row_block_size, A->col_block_size,
290 FALSE);
291 paso::Pattern_free(pattern);
292
293 /* send/receive value array */
294 j=0;
295 for (p=0; p<recv->numNeighbors; p++) {
296 i = ptr_ptr[recv->offsetInShared[p+1]] - ptr_ptr[recv->offsetInShared[p]];
297 #ifdef ESYS_MPI
298 if (i > 0)
299 MPI_Irecv(&(A->remote_coupleBlock->val[j]), i * block_size,
300 MPI_DOUBLE, recv->neighbor[p],
301 A->mpi_info->msg_tag_counter+recv->neighbor[p],
302 A->mpi_info->comm,
303 &(A->row_coupler->mpi_requests[p]));
304 #endif
305 j += (i * block_size);
306 }
307
308 j=0;
309 for (p=0; p<num_neighbors; p++) {
310 i = send_degree[p] - j;
311 #ifdef ESYS_MPI
312 if (i > 0)
313 MPI_Issend(&(send_buf[j*block_size]), i*block_size, MPI_DOUBLE, send->neighbor[p],
314 A->mpi_info->msg_tag_counter+rank,
315 A->mpi_info->comm,
316 &(A->row_coupler->mpi_requests[p+recv->numNeighbors]));
317 #endif
318 j = send_degree[p];
319 }
320
321 #ifdef ESYS_MPI
322 MPI_Waitall(A->row_coupler->connector->send->numNeighbors+A->row_coupler->connector->recv->numNeighbors,
323 A->row_coupler->mpi_requests,
324 A->row_coupler->mpi_stati);
325 #endif
326 ESYS_MPI_INC_COUNTER(*(A->mpi_info), mpi_size)
327
328 /* release all temp memory allocation */
329 delete[] cols;
330 delete[] cols_array;
331 delete[] recv_offset;
332 delete[] recv_degree;
333 delete[] recv_buf;
334 delete[] send_buf;
335 delete[] send_offset;
336 delete[] send_degree;
337 delete[] send_idx;
338 }
339
340
341

Properties

Name Value
svn:mergeinfo /branches/amg_from_3530/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:3531-3826 /branches/lapack2681/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:2682-2741 /branches/pasowrap/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:3661-3674 /branches/py3_attempt2/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:3871-3891 /branches/restext/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:3669-3791 /branches/stage3.0/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:3517-3974 /release/3.0/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:2591-2601 /trunk/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:4257-4344 /trunk/ripley/test/python/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26