/[escript]/branches/trilinos_from_5897/paso/src/AMG_Restriction.cpp
ViewVC logotype

Contents of /branches/trilinos_from_5897/paso/src/AMG_Restriction.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5933 - (show annotations)
Wed Feb 17 23:53:30 2016 UTC (21 months, 4 weeks ago) by caltinay
File size: 14031 byte(s)
sync with trunk.

1 /*****************************************************************************
2 *
3 * Copyright (c) 2003-2016 by The University of Queensland
4 * http://www.uq.edu.au
5 *
6 * Primary Business: Queensland, Australia
7 * Licensed under the Open Software License version 3.0
8 * http://www.opensource.org/licenses/osl-3.0.php
9 *
10 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
11 * Development 2012-2013 by School of Earth Sciences
12 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
13 *
14 *****************************************************************************/
15
16
17 /****************************************************************************/
18
19 /* Paso: defines AMG Restriction Operator */
20
21 /****************************************************************************/
22
23 /* Author: Lin Gao, lgao@uq.edu.au */
24
25 /****************************************************************************/
26
27 #include "Paso.h"
28 #include "SparseMatrix.h"
29 #include "PasoUtil.h"
30 #include "Preconditioner.h"
31
32 #include <cstring> // memcpy
33
34 namespace paso {
35
36 /****************************************************************************
37
38 Methods necessary for AMG preconditioner
39
40 construct n_C x n the Restriction matrix R from A_p.
41
42 R->mainBlock is the transpose of P->mainBlock, but we need
43 to recover R->col_coupleBlock from P's data in other ranks.
44
45 *****************************************************************************/
46
47 SystemMatrix_ptr Preconditioner_AMG_getRestriction(SystemMatrix_ptr P)
48 {
49 esysUtils::JMPI& mpi_info=P->mpi_info;
50 Distribution_ptr input_dist, output_dist;
51 SharedComponents_ptr send, recv;
52 Connector_ptr col_connector;
53 const dim_t row_block_size=P->row_block_size;
54 const dim_t col_block_size=P->col_block_size;
55 const dim_t n=P->mainBlock->numRows;
56 const dim_t n_C=P->mainBlock->numCols;
57 index_t size=mpi_info->size, rank=mpi_info->rank, *dist=NULL;
58 index_t *ptr=NULL, *idx=NULL, *degree_set=NULL, *offset_set=NULL;
59 index_t *send_ptr=NULL, *recv_ptr=NULL, *recv_idx=NULL;
60 index_t *temp=NULL, *where_p=NULL;
61 index_t num_Pcouple_cols, num_Rcouple_cols, numNeighbors;
62 index_t i, j, j_ub, k, p, iptr, iptr_ub, icb, irb;
63 index_t block_size, copy_block_size, sum, offset, len, msgs;
64 double *val=NULL, *data_set=NULL, *recv_val=NULL;
65 index_t *shared=NULL, *offsetInShared=NULL;
66 Esys_MPI_rank *neighbor=NULL;
67 #ifdef ESYS_MPI
68 MPI_Request* mpi_requests=NULL;
69 MPI_Status* mpi_stati=NULL;
70 #else
71 int *mpi_requests=NULL, *mpi_stati=NULL;
72 #endif
73
74 /* get main_block of R from the transpose of P->mainBlock */
75 SparseMatrix_ptr main_block(P->mainBlock->getTranspose());
76
77 /* prepare "ptr" for the col_coupleBlock of R, start with get info about
78 the degree_set (associated with "ptr"), offset_set (associated with
79 "idx" and data_set (associated with "val") to be sent to other ranks */
80 SparseMatrix_ptr couple_block(P->col_coupleBlock);
81 num_Pcouple_cols = couple_block->numCols;
82 block_size = P->block_size;
83 copy_block_size = block_size * sizeof(double);
84 degree_set = new index_t[num_Pcouple_cols];
85 send_ptr = new index_t[num_Pcouple_cols+1];
86 memset(degree_set, 0, sizeof(index_t) * num_Pcouple_cols);
87 for (i=0; i<n; i++) {
88 iptr_ub = couple_block->pattern->ptr[i+1];
89 for (iptr=couple_block->pattern->ptr[i]; iptr<iptr_ub; iptr++) {
90 j = couple_block->pattern->index[iptr];
91 degree_set[j] ++;
92 }
93 }
94
95 send_ptr[0] = 0;
96 for (i=0; i<num_Pcouple_cols; i++) {
97 send_ptr[i+1] = send_ptr[i] + degree_set[i];
98 }
99
100 memset(degree_set, 0, sizeof(index_t) * num_Pcouple_cols);
101 sum = couple_block->pattern->ptr[n];
102 offset_set = new index_t[sum];
103 data_set = new double[sum * block_size];
104 offset = P->pattern->output_distribution->first_component[rank];
105
106 if (P->type & MATRIX_FORMAT_DIAGONAL_BLOCK) {
107 for (i=0; i<n; i++) {
108 iptr_ub = couple_block->pattern->ptr[i+1];
109 for (iptr=couple_block->pattern->ptr[i]; iptr<iptr_ub; iptr++) {
110 j = couple_block->pattern->index[iptr];
111 k = send_ptr[j] + degree_set[j];
112 offset_set[k] = i + offset; /* now we have the global id for row i,
113 which will be used as col index of R */
114 memcpy(&(data_set[k*block_size]), &(couple_block->val[iptr*block_size]), copy_block_size);
115 degree_set[j] ++;
116 }
117 }
118 } else {
119 for (i=0; i<n; i++) {
120 iptr_ub = couple_block->pattern->ptr[i+1];
121 for (iptr=couple_block->pattern->ptr[i]; iptr<iptr_ub; iptr++) {
122 j = couple_block->pattern->index[iptr];
123 k = send_ptr[j] + degree_set[j];
124 offset_set[k] = i + offset; /* now we have the global id for row i,
125 which will be used as col index of R */
126 for (irb=0 ; irb < row_block_size; irb++) {
127 for (icb =0 ; icb < col_block_size; icb++) {
128 data_set[k*block_size+icb+col_block_size*irb] = couple_block->val[iptr*block_size+irb+row_block_size*icb];
129 }
130 }
131 degree_set[j] ++;
132 }
133 }
134 }
135
136
137 #ifdef ESYS_MPI
138 mpi_requests=new MPI_Request[size*4];
139 mpi_stati=new MPI_Status[size*4];
140 #else
141 mpi_requests=new int[size*4];
142 mpi_stati=new int[size*4];
143 #endif
144
145 /* send/receive degree_set to build the "ptr" for R->col_coupleBlock */
146 msgs = 0;
147 send = P->col_coupler->connector->send;
148 recv = P->col_coupler->connector->recv;
149 recv_ptr = new index_t[send->offsetInShared[send->numNeighbors]];
150 for (p=0; p<send->numNeighbors; p++) {
151 i = send->offsetInShared[p];
152 j = send->offsetInShared[p+1];
153 k = j - i;
154 if (k > 0) {
155 #ifdef ESYS_MPI
156 MPI_Irecv(&(recv_ptr[i]), k, MPI_INT, send->neighbor[p],
157 mpi_info->msg_tag_counter+send->neighbor[p],
158 mpi_info->comm, &mpi_requests[msgs]);
159 #endif
160 msgs++;
161 }
162 }
163
164 for (p=0; p<recv->numNeighbors; p++) {
165 i = recv->offsetInShared[p];
166 j = recv->offsetInShared[p+1];
167 k = j - i;
168 if (k > 0) {
169 #ifdef ESYS_MPI
170 MPI_Issend(&(degree_set[i]), k, MPI_INT, recv->neighbor[p],
171 mpi_info->msg_tag_counter+rank, mpi_info->comm,
172 &mpi_requests[msgs]);
173 #endif
174 msgs++;
175 }
176 }
177
178 #ifdef ESYS_MPI
179 MPI_Waitall(msgs, mpi_requests, mpi_stati);
180 #endif
181 ESYS_MPI_INC_COUNTER(*mpi_info, size)
182
183 delete[] degree_set;
184 degree_set = new index_t[send->numNeighbors];
185 memset(degree_set, 0, sizeof(index_t)*send->numNeighbors);
186 for (p=0, sum=0; p<send->numNeighbors; p++) {
187 iptr_ub = send->offsetInShared[p+1];
188 for (iptr = send->offsetInShared[p]; iptr < iptr_ub; iptr++){
189 degree_set[p] += recv_ptr[iptr];
190 }
191 sum += degree_set[p];
192 }
193
194 /* send/receive offset_set and data_set to build the "idx" and "val"
195 for R->col_coupleBlock */
196 msgs = 0;
197 recv_idx = new index_t[sum];
198 recv_val = new double[sum * block_size];
199 for (p=0, offset=0; p<send->numNeighbors; p++) {
200 if (degree_set[p]) {
201 #ifdef ESYS_MPI
202 MPI_Irecv(&(recv_idx[offset]), degree_set[p], MPI_INT,
203 send->neighbor[p], mpi_info->msg_tag_counter+send->neighbor[p],
204 mpi_info->comm, &mpi_requests[msgs]);
205 msgs++;
206 MPI_Irecv(&(recv_val[offset*block_size]), degree_set[p] * block_size,
207 MPI_DOUBLE, send->neighbor[p],
208 mpi_info->msg_tag_counter+send->neighbor[p]+size,
209 mpi_info->comm, &mpi_requests[msgs]);
210 offset += degree_set[p];
211 #endif
212 msgs++;
213 }
214 }
215
216 for (p=0; p<recv->numNeighbors; p++) {
217 i = recv->offsetInShared[p];
218 j = recv->offsetInShared[p+1];
219 k = send_ptr[j] - send_ptr[i];
220 if (k > 0) {
221 #ifdef ESYS_MPI
222 MPI_Issend(&(offset_set[send_ptr[i]]), k, MPI_INT,
223 recv->neighbor[p], mpi_info->msg_tag_counter+rank,
224 mpi_info->comm, &mpi_requests[msgs]);
225 msgs++;
226 MPI_Issend(&(data_set[send_ptr[i]*block_size]), k*block_size, MPI_DOUBLE,
227 recv->neighbor[p], mpi_info->msg_tag_counter+rank+size,
228 mpi_info->comm, &mpi_requests[msgs]);
229 #endif
230 msgs++;
231 }
232 }
233
234 len = send->offsetInShared[send->numNeighbors];
235 temp = new index_t[len];
236 memset(temp, 0, sizeof(index_t)*len);
237 for (p=1; p<len; p++) {
238 temp[p] = temp[p-1] + recv_ptr[p-1];
239 }
240
241 #ifdef ESYS_MPI
242 MPI_Waitall(msgs, mpi_requests, mpi_stati);
243 #endif
244 ESYS_MPI_INC_COUNTER(*mpi_info, 2*size)
245 delete[] degree_set;
246 delete[] offset_set;
247 delete[] data_set;
248 delete[] send_ptr;
249 delete[] mpi_requests;
250 delete[] mpi_stati;
251
252 /* construct "ptr", "idx" and "val" for R->col_coupleBlock */
253 ptr = new index_t[n_C + 1];
254 idx = new index_t[sum];
255 val = new double[sum*block_size];
256 ptr[0] = 0;
257 for (i=0; i<n_C; i++) {
258 icb = 0;
259 for (p=0; p<send->numNeighbors; p++) {
260 k = send->offsetInShared[p+1];
261 for (j=send->offsetInShared[p]; j<k; j++) {
262 if (send->shared[j] == i) {
263 offset = ptr[i] + icb;
264 len = recv_ptr[j];
265 memcpy(&idx[offset], &recv_idx[temp[j]], sizeof(index_t)*len);
266 memcpy(&val[offset*block_size], &recv_val[temp[j]*block_size], sizeof(double)*len*block_size);
267 icb += len;
268 break;
269 }
270 }
271 }
272 ptr[i+1] = ptr[i] + icb;
273 }
274 sum = ptr[n_C];
275 delete[] temp;
276 delete[] recv_ptr;
277 delete[] recv_val;
278
279 /* count the number of cols (num_Rcouple_cols) in R->col_coupleBlock,
280 and convert the global id in "idx" into local id */
281 num_Rcouple_cols = 0;
282 if (sum) {
283 qsort(recv_idx, (size_t)sum, sizeof(index_t), util::comparIndex);
284 num_Rcouple_cols = 1;
285 i = recv_idx[0];
286 for (j=1; j<sum; j++) {
287 if (recv_idx[j] > i) {
288 i = recv_idx[j];
289 recv_idx[num_Rcouple_cols] = i;
290 num_Rcouple_cols++;
291 }
292 }
293 #pragma omp parallel for private(i,where_p) schedule(static)
294 for (i=0; i<sum; i++) {
295 where_p = (index_t *)bsearch(&(idx[i]), recv_idx, num_Rcouple_cols,
296 sizeof(index_t), util::comparIndex);
297 idx[i] = (index_t)(where_p - recv_idx);
298 }
299 }
300
301 /* prepare the receiver for the col_connector */
302 dist = P->pattern->output_distribution->first_component;
303 offsetInShared = new index_t[size+1];
304 shared = new index_t[num_Rcouple_cols];
305 numNeighbors = send->numNeighbors;
306 neighbor = send->neighbor;
307 memset(offsetInShared, 0, sizeof(index_t) * (size+1));
308 if (num_Rcouple_cols > 0) offset = dist[neighbor[0] + 1];
309 for (i=0, p=0; i<num_Rcouple_cols; i++) {
310 /* cols i is received from rank neighbor[p] when it's still smaller
311 than "offset", otherwise, it is received from rank neighbor[p+1] */
312 while (recv_idx[i] >= offset) {
313 p++;
314 offsetInShared[p] = i;
315 offset = dist[neighbor[p] + 1];
316 }
317 shared[i] = i + n; /* n is the number of cols in R->mainBlock */
318 }
319 #pragma omp parallel for private(i) schedule(static)
320 for (i=p; i<numNeighbors; i++) {
321 offsetInShared[i+1] = num_Rcouple_cols;
322 }
323 recv.reset(new SharedComponents(n, numNeighbors, neighbor, shared,
324 offsetInShared, 1, 0, mpi_info));
325 delete[] recv_idx;
326
327 /* prepare the sender for the col_connector */
328 delete[] shared;
329 numNeighbors = P->col_coupler->connector->recv->numNeighbors;
330 neighbor = P->col_coupler->connector->recv->neighbor;
331 shared = new index_t[n * numNeighbors];
332 Pattern_ptr couple_pattern(P->col_coupleBlock->pattern);
333 sum=0;
334 memset(offsetInShared, 0, sizeof(index_t) * (size+1));
335 for (p=0; p<numNeighbors; p++) {
336 j = P->col_coupler->connector->recv->offsetInShared[p];
337 j_ub = P->col_coupler->connector->recv->offsetInShared[p+1];
338 for (i=0; i<n; i++) {
339 iptr = couple_pattern->ptr[i];
340 iptr_ub = couple_pattern->ptr[i+1];
341 for (; iptr<iptr_ub; iptr++) {
342 k = couple_pattern->index[iptr];
343 if (k >= j && k < j_ub) {
344 shared[sum] = i;
345 sum++;
346 break;
347 }
348 }
349 }
350 offsetInShared[p+1] = sum;
351 }
352 send.reset(new SharedComponents(n, numNeighbors, neighbor, shared,
353 offsetInShared, 1, 0, mpi_info));
354
355 /* build the col_connector based on sender and receiver */
356 col_connector.reset(new Connector(send, recv));
357 delete[] offsetInShared;
358 delete[] shared;
359
360 couple_pattern.reset(new Pattern(MATRIX_FORMAT_DEFAULT, n_C,
361 num_Rcouple_cols, ptr, idx));
362
363 input_dist.reset(new Distribution(mpi_info, dist, 1, 0));
364 dist = P->pattern->input_distribution->first_component;
365 output_dist.reset(new Distribution(mpi_info, dist, 1, 0));
366
367 /* now we need to create the System Matrix
368 TO BE FIXED: at this stage, we only construction col_couple_pattern
369 and col_connector for Restriction matrix R. To be completed,
370 row_couple_pattern and row_connector need to be constructed as well */
371 SystemMatrix_ptr out;
372 SystemMatrixPattern_ptr pattern;
373 if (Esys_noError()) {
374 pattern.reset(new SystemMatrixPattern(MATRIX_FORMAT_DEFAULT,
375 output_dist, input_dist, main_block->pattern, couple_pattern,
376 couple_pattern, col_connector, col_connector));
377 out.reset(new SystemMatrix(MATRIX_FORMAT_DIAGONAL_BLOCK, pattern,
378 row_block_size, col_block_size, false,
379 P->getRowFunctionSpace(), P->getColumnFunctionSpace()));
380 }
381
382 /* now fill in the matrix */
383 memcpy(out->mainBlock->val, main_block->val,
384 main_block->len * sizeof(double));
385 memcpy(out->col_coupleBlock->val, val,
386 out->col_coupleBlock->len * sizeof(double));
387 delete[] val;
388
389 if (!Esys_noError()) {
390 out.reset();
391 }
392 return out;
393 }
394
395 } // namespace paso
396

Properties

Name Value
svn:mergeinfo /branches/4.0fordebian/paso/src/AMG_Restriction.cpp:5567-5588 /branches/amg_from_3530/paso/src/AMG_Restriction.cpp:3531-3826 /branches/lapack2681/paso/src/AMG_Restriction.cpp:2682-2741 /branches/pasowrap/paso/src/AMG_Restriction.cpp:3661-3674 /branches/py3_attempt2/paso/src/AMG_Restriction.cpp:3871-3891 /branches/restext/paso/src/AMG_Restriction.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/AMG_Restriction.cpp:3669-3791 /branches/stage3.0/paso/src/AMG_Restriction.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/AMG_Restriction.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/AMG_Restriction.cpp:3517-3974 /release/3.0/paso/src/AMG_Restriction.cpp:2591-2601 /release/4.0/paso/src/AMG_Restriction.cpp:5380-5406 /trunk/paso/src/AMG_Restriction.cpp:4257-4344,5898-5932 /trunk/ripley/test/python/paso/src/AMG_Restriction.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26