/[escript]/trunk/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp
ViewVC logotype

Contents of /trunk/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4873 - (show annotations)
Wed Apr 16 06:38:51 2014 UTC (5 years, 5 months ago) by caltinay
File size: 11442 byte(s)
whitespace only changes.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17
18 /****************************************************************************/
19 /* Paso: SystemMatrix */
20 /* */
21 /* Copy mainBlock and col_coupleBlock in other ranks */
22 /* into remote_coupleBlock */
23 /* */
24 /* WARNING: function uses mpi_requests of the coupler attached to matrix. */
25 /* */
26 /****************************************************************************/
27
28 /* Copyrights by ACcESS Australia 2003 */
29 /* Author: Lin Gao, l.gao@uq.edu.au */
30
31 /****************************************************************************/
32
33 #include "SystemMatrix.h"
34
35 #include <cstring> // memcpy
36
37 namespace paso {
38
39 void SystemMatrix::copyRemoteCoupleBlock(bool recreatePattern)
40 {
41 if (mpi_info->size == 1)
42 return;
43
44 if (recreatePattern)
45 remote_coupleBlock.reset();
46
47 if (remote_coupleBlock)
48 return;
49
50 #ifdef ESYS_MPI
51 // sending/receiving unknown's global ID
52 const dim_t rank = mpi_info->rank;
53 const dim_t mpi_size = mpi_info->size;
54 index_t num_main_cols = mainBlock->numCols;
55 double* cols = new double[num_main_cols];
56 const index_t offset = col_distribution->first_component[rank];
57 #pragma omp parallel for
58 for (index_t i=0; i<num_main_cols; ++i)
59 cols[i] = offset + i;
60
61 Coupler_ptr coupler;
62 if (!global_id) {
63 coupler.reset(new Coupler(col_coupler->connector, 1));
64 coupler->startCollect(cols);
65 }
66
67 index_t* recv_buf = new index_t[mpi_size];
68 index_t* recv_degree = new index_t[mpi_size];
69 index_t* recv_offset = new index_t[mpi_size+1];
70 #pragma omp parallel for
71 for (index_t i=0; i<mpi_size; i++) {
72 recv_buf[i] = 0;
73 recv_degree[i] = 1;
74 recv_offset[i] = i;
75 }
76
77 index_t num_couple_cols = col_coupleBlock->numCols;
78 const index_t overlapped_n = row_coupleBlock->numRows;
79 SharedComponents_ptr send(row_coupler->connector->send);
80 SharedComponents_ptr recv(row_coupler->connector->recv);
81 index_t num_neighbors = send->numNeighbors;
82 const size_t block_size_size = block_size * sizeof(double);
83
84 // waiting for receiving unknown's global ID
85 if (!global_id) {
86 coupler->finishCollect();
87 global_id = new index_t[num_couple_cols+1];
88 #pragma omp parallel for
89 for (index_t i=0; i<num_couple_cols; ++i)
90 global_id[i] = coupler->recv_buffer[i];
91 coupler.reset();
92 }
93
94 // distribute the number of cols in current col_coupleBlock to all ranks
95 MPI_Allgatherv(&num_couple_cols, 1, MPI_INT, recv_buf, recv_degree,
96 recv_offset, MPI_INT, mpi_info->comm);
97
98 // distribute global_ids of cols to be considered to all ranks
99 index_t len = 0;
100 for (index_t i=0; i<mpi_size; i++){
101 recv_degree[i] = recv_buf[i];
102 recv_offset[i] = len;
103 len += recv_buf[i];
104 }
105 recv_offset[mpi_size] = len;
106 index_t* cols_array = new index_t[len];
107
108 MPI_Allgatherv(global_id, num_couple_cols, MPI_INT, cols_array,
109 recv_degree, recv_offset, MPI_INT, mpi_info->comm);
110
111 // first, prepare the ptr_ptr to be received
112 index_t* ptr_ptr = new index_t[overlapped_n+1];
113 for (index_t p=0; p<recv->numNeighbors; p++) {
114 const index_t row = recv->offsetInShared[p];
115 const index_t i = recv->offsetInShared[p+1];
116 MPI_Irecv(&(ptr_ptr[row]), i-row, MPI_INT, recv->neighbor[p],
117 mpi_info->msg_tag_counter+recv->neighbor[p],
118 mpi_info->comm,
119 &(row_coupler->mpi_requests[p]));
120 }
121
122 // now prepare the rows to be sent (the degree, the offset and the data)
123 index_t p = send->offsetInShared[num_neighbors];
124 len = 0;
125 for (index_t i=0; i<num_neighbors; i++) {
126 // #cols per row X #rows
127 len += recv_buf[send->neighbor[i]] *
128 (send->offsetInShared[i+1] - send->offsetInShared[i]);
129 }
130 double* send_buf = new double[len*block_size];
131 index_t* send_idx = new index_t[len];
132 index_t* send_offset = new index_t[p+1];
133 index_t* send_degree = new index_t[num_neighbors];
134
135 index_t k, l, m, n, q;
136 len = 0;
137 index_t base = 0;
138 index_t i0 = 0;
139 for (p=0; p<num_neighbors; p++) {
140 index_t i = i0;
141 const index_t neighbor = send->neighbor[p];
142 const index_t l_ub = recv_offset[neighbor+1];
143 const index_t l_lb = recv_offset[neighbor];
144 const index_t j_ub = send->offsetInShared[p + 1];
145 for (index_t j=send->offsetInShared[p]; j<j_ub; j++) {
146 const index_t row = send->shared[j];
147
148 // check col_coupleBlock for data to be passed @row
149 l = l_lb;
150 index_t k_ub = col_coupleBlock->pattern->ptr[row+1];
151 k = col_coupleBlock->pattern->ptr[row];
152 q = mainBlock->pattern->index[mainBlock->pattern->ptr[row]] + offset;
153 while (k<k_ub && l<l_ub) {
154 m = global_id[col_coupleBlock->pattern->index[k]];
155 if (m > q) break;
156 n = cols_array[l];
157 if (m == n) {
158 send_idx[len] = l - l_lb;
159 memcpy(&send_buf[len*block_size],
160 &col_coupleBlock->val[block_size*k],
161 block_size_size);
162 len++;
163 l++;
164 k++;
165 } else if (m < n) {
166 k++;
167 } else {
168 l++;
169 }
170 }
171 const index_t k_lb = k;
172
173 // check mainBlock for data to be passed @row
174 k_ub = mainBlock->pattern->ptr[row+1];
175 k=mainBlock->pattern->ptr[row];
176 while (k<k_ub && l<l_ub) {
177 m = mainBlock->pattern->index[k] + offset;
178 n = cols_array[l];
179 if (m == n) {
180 send_idx[len] = l - l_lb;
181 memcpy(&send_buf[len*block_size],
182 &mainBlock->val[block_size*k], block_size_size);
183 len++;
184 l++;
185 k++;
186 } else if (m < n) {
187 k++;
188 } else {
189 l++;
190 }
191 }
192
193 // check col_coupleBlock for data to be passed @row
194 k_ub = col_coupleBlock->pattern->ptr[row+1];
195 k=k_lb;
196 while (k<k_ub && l<l_ub) {
197 m = global_id[col_coupleBlock->pattern->index[k]];
198 n = cols_array[l];
199 if (m == n) {
200 send_idx[len] = l - l_lb;
201 memcpy(&send_buf[len*block_size],
202 &col_coupleBlock->val[block_size*k],
203 block_size_size);
204 len++;
205 l++;
206 k++;
207 } else if (m < n) {
208 k++;
209 } else {
210 l++;
211 }
212 }
213
214 send_offset[i] = len - base;
215 base = len;
216 i++;
217 }
218
219 /* sending */
220 MPI_Issend(&send_offset[i0], i-i0, MPI_INT, send->neighbor[p],
221 mpi_info->msg_tag_counter+rank, mpi_info->comm,
222 &row_coupler->mpi_requests[p+recv->numNeighbors]);
223 send_degree[p] = len;
224 i0 = i;
225 }
226
227 MPI_Waitall(row_coupler->connector->send->numNeighbors +
228 row_coupler->connector->recv->numNeighbors,
229 row_coupler->mpi_requests, row_coupler->mpi_stati);
230 ESYS_MPI_INC_COUNTER(*mpi_info, mpi_size)
231
232 len = 0;
233 for (index_t i=0; i<overlapped_n; i++) {
234 p = ptr_ptr[i];
235 ptr_ptr[i] = len;
236 len += p;
237 }
238 ptr_ptr[overlapped_n] = len;
239 index_t* ptr_idx = new index_t[len];
240
241 // send/receive index array
242 index_t j=0;
243 for (p=0; p<recv->numNeighbors; p++) {
244 const index_t i = ptr_ptr[recv->offsetInShared[p+1]] - ptr_ptr[recv->offsetInShared[p]];
245 if (i > 0)
246 MPI_Irecv(&ptr_idx[j], i, MPI_INT, recv->neighbor[p],
247 mpi_info->msg_tag_counter+recv->neighbor[p], mpi_info->comm,
248 &row_coupler->mpi_requests[p]);
249 j += i;
250 }
251
252 j=0;
253 for (p=0; p<num_neighbors; p++) {
254 const index_t i = send_degree[p] - j;
255 if (i > 0)
256 MPI_Issend(&send_idx[j], i, MPI_INT, send->neighbor[p],
257 mpi_info->msg_tag_counter+rank, mpi_info->comm,
258 &row_coupler->mpi_requests[p+recv->numNeighbors]);
259 j = send_degree[p];
260 }
261
262 MPI_Waitall(row_coupler->connector->send->numNeighbors +
263 row_coupler->connector->recv->numNeighbors,
264 row_coupler->mpi_requests, row_coupler->mpi_stati);
265 ESYS_MPI_INC_COUNTER(*mpi_info, mpi_size)
266
267 // allocate pattern and sparse matrix for remote_coupleBlock
268 Pattern_ptr pattern(new Pattern(row_coupleBlock->pattern->type,
269 overlapped_n, num_couple_cols, ptr_ptr, ptr_idx));
270 remote_coupleBlock.reset(new SparseMatrix(row_coupleBlock->type,
271 pattern, row_block_size, col_block_size, false));
272
273 // send/receive value array
274 j=0;
275 for (p=0; p<recv->numNeighbors; p++) {
276 const index_t i = ptr_ptr[recv->offsetInShared[p+1]] - ptr_ptr[recv->offsetInShared[p]];
277 if (i > 0)
278 MPI_Irecv(&remote_coupleBlock->val[j], i * block_size,
279 MPI_DOUBLE, recv->neighbor[p],
280 mpi_info->msg_tag_counter+recv->neighbor[p], mpi_info->comm,
281 &row_coupler->mpi_requests[p]);
282 j += i*block_size;
283 }
284
285 j=0;
286 for (p=0; p<num_neighbors; p++) {
287 const index_t i = send_degree[p] - j;
288 if (i > 0)
289 MPI_Issend(&send_buf[j*block_size], i*block_size, MPI_DOUBLE,
290 send->neighbor[p], mpi_info->msg_tag_counter+rank,
291 mpi_info->comm,
292 &row_coupler->mpi_requests[p+recv->numNeighbors]);
293 j = send_degree[p];
294 }
295
296 MPI_Waitall(row_coupler->connector->send->numNeighbors +
297 row_coupler->connector->recv->numNeighbors,
298 row_coupler->mpi_requests, row_coupler->mpi_stati);
299 ESYS_MPI_INC_COUNTER(*mpi_info, mpi_size)
300
301 // release all temp memory allocation
302 delete[] cols;
303 delete[] cols_array;
304 delete[] recv_offset;
305 delete[] recv_degree;
306 delete[] recv_buf;
307 delete[] send_buf;
308 delete[] send_offset;
309 delete[] send_degree;
310 delete[] send_idx;
311 #endif
312 }
313
314 } // namespace paso
315

Properties

Name Value
svn:mergeinfo /branches/amg_from_3530/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:3531-3826 /branches/lapack2681/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:2682-2741 /branches/pasowrap/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:3661-3674 /branches/py3_attempt2/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:3871-3891 /branches/restext/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:3669-3791 /branches/stage3.0/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:3517-3974 /release/3.0/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:2591-2601 /trunk/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:4257-4344 /trunk/ripley/test/python/paso/src/SystemMatrix_copyRemoteCoupleBlock.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26