/[escript]/trunk/paso/src/AMG_Restriction.c
ViewVC logotype

Contents of /trunk/paso/src/AMG_Restriction.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4286 - (show annotations)
Thu Mar 7 04:28:11 2013 UTC (6 years, 5 months ago) by caltinay
File MIME type: text/plain
File size: 13961 byte(s)
Assorted spelling fixes.

1 /*****************************************************************************
2 *
3 * Copyright (c) 2003-2013 by University of Queensland
4 * http://www.uq.edu.au
5 *
6 * Primary Business: Queensland, Australia
7 * Licensed under the Open Software License version 3.0
8 * http://www.opensource.org/licenses/osl-3.0.php
9 *
10 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
11 * Development since 2012 by School of Earth Sciences
12 *
13 *****************************************************************************/
14
15
16 /************************************************************************************/
17
18 /* Paso: defines AMG Restriction Operator */
19
20 /************************************************************************************/
21
22 /* Author: Lin Gao, lgao@uq.edu.au */
23
24 /************************************************************************************/
25
26 #include "Paso.h"
27 #include "SparseMatrix.h"
28 #include "PasoUtil.h"
29 #include "Preconditioner.h"
30
31 /************************************************************************************
32
33 Methods necessary for AMG preconditioner
34
35 construct n_C x n the Restriction matrix R from A_p.
36
37 R->mainBlock is the transpose of P->mainBlock, but we need
38 to recover R->col_coupleBlock from P's data in other ranks.
39
40 *************************************************************************************/
41
42 Paso_SystemMatrix* Paso_Preconditioner_AMG_getRestriction(Paso_SystemMatrix* P)
43 {
44 Esys_MPIInfo *mpi_info=Esys_MPIInfo_getReference(P->mpi_info);
45 Paso_SparseMatrix *main_block=NULL, *couple_block=NULL;
46 Paso_SystemMatrix *out=NULL;
47 Paso_SystemMatrixPattern *pattern=NULL;
48 Paso_Distribution *input_dist=NULL, *output_dist=NULL;
49 Paso_SharedComponents *send =NULL, *recv=NULL;
50 Paso_Connector *col_connector=NULL;
51 Paso_Pattern *couple_pattern=NULL;
52 const dim_t row_block_size=P->row_block_size;
53 const dim_t col_block_size=P->col_block_size;
54 const dim_t n=P->mainBlock->numRows;
55 const dim_t n_C=P->mainBlock->numCols;
56 index_t size=mpi_info->size, rank=mpi_info->rank, *dist=NULL;
57 index_t *ptr=NULL, *idx=NULL, *degree_set=NULL, *offset_set=NULL;
58 index_t *send_ptr=NULL, *recv_ptr=NULL, *recv_idx=NULL;
59 index_t *temp=NULL, *where_p=NULL;
60 index_t num_Pcouple_cols, num_Rcouple_cols, numNeighbors;
61 index_t i, j, j_ub, k, p, iptr, iptr_ub, icb, irb;
62 index_t block_size, copy_block_size, sum, offset, len, msgs;
63 double *val=NULL, *data_set=NULL, *recv_val=NULL;
64 index_t *shared=NULL, *offsetInShared=NULL;
65 Esys_MPI_rank *neighbor=NULL;
66 #ifdef ESYS_MPI
67 MPI_Request* mpi_requests=NULL;
68 MPI_Status* mpi_stati=NULL;
69 #else
70 int *mpi_requests=NULL, *mpi_stati=NULL;
71 #endif
72
73 /* get main_block of R from the transpose of P->mainBlock */
74 main_block = Paso_SparseMatrix_getTranspose(P->mainBlock);
75
76 /* prepare "ptr" for the col_coupleBlock of R, start with get info about
77 the degree_set (associated with "ptr"), offset_set (associated with
78 "idx" and data_set (associated with "val") to be sent to other ranks */
79 couple_block = P->col_coupleBlock;
80 num_Pcouple_cols = couple_block->numCols;
81 block_size = P->block_size;
82 copy_block_size = block_size * sizeof(double);
83 degree_set = TMPMEMALLOC(num_Pcouple_cols, index_t);
84 send_ptr = TMPMEMALLOC(num_Pcouple_cols+1, index_t);
85 memset(degree_set, 0, sizeof(index_t) * num_Pcouple_cols);
86 for (i=0; i<n; i++) {
87 iptr_ub = couple_block->pattern->ptr[i+1];
88 for (iptr=couple_block->pattern->ptr[i]; iptr<iptr_ub; iptr++) {
89 j = couple_block->pattern->index[iptr];
90 degree_set[j] ++;
91 }
92 }
93
94 send_ptr[0] = 0;
95 for (i=0; i<num_Pcouple_cols; i++) {
96 send_ptr[i+1] = send_ptr[i] + degree_set[i];
97 }
98
99 memset(degree_set, 0, sizeof(index_t) * num_Pcouple_cols);
100 sum = couple_block->pattern->ptr[n];
101 offset_set = TMPMEMALLOC(sum, index_t);
102 data_set = TMPMEMALLOC(sum * block_size, double);
103 offset = P->pattern->output_distribution->first_component[rank];
104
105 if (P->type & MATRIX_FORMAT_DIAGONAL_BLOCK) {
106 for (i=0; i<n; i++) {
107 iptr_ub = couple_block->pattern->ptr[i+1];
108 for (iptr=couple_block->pattern->ptr[i]; iptr<iptr_ub; iptr++) {
109 j = couple_block->pattern->index[iptr];
110 k = send_ptr[j] + degree_set[j];
111 offset_set[k] = i + offset; /* now we have the global id for row i,
112 which will be used as col index of R */
113 memcpy(&(data_set[k*block_size]), &(couple_block->val[iptr*block_size]), copy_block_size);
114 degree_set[j] ++;
115 }
116 }
117 } else {
118 for (i=0; i<n; i++) {
119 iptr_ub = couple_block->pattern->ptr[i+1];
120 for (iptr=couple_block->pattern->ptr[i]; iptr<iptr_ub; iptr++) {
121 j = couple_block->pattern->index[iptr];
122 k = send_ptr[j] + degree_set[j];
123 offset_set[k] = i + offset; /* now we have the global id for row i,
124 which will be used as col index of R */
125 for (irb=0 ; irb < row_block_size; irb++) {
126 for (icb =0 ; icb < col_block_size; icb++) {
127 data_set[k*block_size+icb+col_block_size*irb] = couple_block->val[iptr*block_size+irb+row_block_size*icb];
128 }
129 }
130 degree_set[j] ++;
131 }
132 }
133 }
134
135
136 #ifdef ESYS_MPI
137 mpi_requests=TMPMEMALLOC(size*4,MPI_Request);
138 mpi_stati=TMPMEMALLOC(size*4,MPI_Status);
139 #else
140 mpi_requests=TMPMEMALLOC(size*4,int);
141 mpi_stati=TMPMEMALLOC(size*4,int);
142 #endif
143
144 /* send/receive degree_set to build the "ptr" for R->col_coupleBlock */
145 msgs = 0;
146 send = P->col_coupler->connector->send;
147 recv = P->col_coupler->connector->recv;
148 recv_ptr = TMPMEMALLOC(send->offsetInShared[send->numNeighbors], index_t);
149 for (p=0; p<send->numNeighbors; p++) {
150 i = send->offsetInShared[p];
151 j = send->offsetInShared[p+1];
152 k = j - i;
153 if (k > 0) {
154 #ifdef ESYS_MPI
155 MPI_Irecv(&(recv_ptr[i]), k, MPI_INT, send->neighbor[p],
156 mpi_info->msg_tag_counter+send->neighbor[p],
157 mpi_info->comm, &mpi_requests[msgs]);
158 #endif
159 msgs++;
160 }
161 }
162
163 for (p=0; p<recv->numNeighbors; p++) {
164 i = recv->offsetInShared[p];
165 j = recv->offsetInShared[p+1];
166 k = j - i;
167 if (k > 0) {
168 #ifdef ESYS_MPI
169 MPI_Issend(&(degree_set[i]), k, MPI_INT, recv->neighbor[p],
170 mpi_info->msg_tag_counter+rank, mpi_info->comm,
171 &mpi_requests[msgs]);
172 #endif
173 msgs++;
174 }
175 }
176
177 #ifdef ESYS_MPI
178 MPI_Waitall(msgs, mpi_requests, mpi_stati);
179 #endif
180 mpi_info->msg_tag_counter += size;
181
182 TMPMEMFREE(degree_set);
183 degree_set = TMPMEMALLOC(send->numNeighbors, index_t);
184 memset(degree_set, 0, sizeof(index_t)*send->numNeighbors);
185 for (p=0, sum=0; p<send->numNeighbors; p++) {
186 iptr_ub = send->offsetInShared[p+1];
187 for (iptr = send->offsetInShared[p]; iptr < iptr_ub; iptr++){
188 degree_set[p] += recv_ptr[iptr];
189 }
190 sum += degree_set[p];
191 }
192
193 /* send/receive offset_set and data_set to build the "idx" and "val"
194 for R->col_coupleBlock */
195 msgs = 0;
196 recv_idx = TMPMEMALLOC(sum, index_t);
197 recv_val = TMPMEMALLOC(sum * block_size, double);
198 for (p=0, offset=0; p<send->numNeighbors; p++) {
199 if (degree_set[p]) {
200 #ifdef ESYS_MPI
201 MPI_Irecv(&(recv_idx[offset]), degree_set[p], MPI_INT,
202 send->neighbor[p], mpi_info->msg_tag_counter+send->neighbor[p],
203 mpi_info->comm, &mpi_requests[msgs]);
204 msgs++;
205 MPI_Irecv(&(recv_val[offset*block_size]), degree_set[p] * block_size,
206 MPI_DOUBLE, send->neighbor[p],
207 mpi_info->msg_tag_counter+send->neighbor[p]+size,
208 mpi_info->comm, &mpi_requests[msgs]);
209 offset += degree_set[p];
210 #endif
211 msgs++;
212 }
213 }
214
215 for (p=0; p<recv->numNeighbors; p++) {
216 i = recv->offsetInShared[p];
217 j = recv->offsetInShared[p+1];
218 k = send_ptr[j] - send_ptr[i];
219 if (k > 0) {
220 #ifdef ESYS_MPI
221 MPI_Issend(&(offset_set[send_ptr[i]]), k, MPI_INT,
222 recv->neighbor[p], mpi_info->msg_tag_counter+rank,
223 mpi_info->comm, &mpi_requests[msgs]);
224 msgs++;
225 MPI_Issend(&(data_set[send_ptr[i]*block_size]), k*block_size, MPI_DOUBLE,
226 recv->neighbor[p], mpi_info->msg_tag_counter+rank+size,
227 mpi_info->comm, &mpi_requests[msgs]);
228 #endif
229 msgs++;
230 }
231 }
232
233 len = send->offsetInShared[send->numNeighbors];
234 temp = TMPMEMALLOC(len, index_t);
235 memset(temp, 0, sizeof(index_t)*len);
236 for (p=1; p<len; p++) {
237 temp[p] = temp[p-1] + recv_ptr[p-1];
238 }
239
240 #ifdef ESYS_MPI
241 MPI_Waitall(msgs, mpi_requests, mpi_stati);
242 #endif
243 mpi_info->msg_tag_counter += 2*size;
244 TMPMEMFREE(degree_set);
245 TMPMEMFREE(offset_set);
246 TMPMEMFREE(data_set);
247 TMPMEMFREE(send_ptr);
248 TMPMEMFREE(mpi_requests);
249 TMPMEMFREE(mpi_stati);
250
251 /* construct "ptr", "idx" and "val" for R->col_coupleBlock */
252 ptr = MEMALLOC(n_C + 1, index_t);
253 idx = MEMALLOC(sum, index_t);
254 val = MEMALLOC(sum*block_size, double);
255 ptr[0] = 0;
256 for (i=0; i<n_C; i++) {
257 icb = 0;
258 for (p=0; p<send->numNeighbors; p++) {
259 k = send->offsetInShared[p+1];
260 for (j=send->offsetInShared[p]; j<k; j++) {
261 if (send->shared[j] == i) {
262 offset = ptr[i] + icb;
263 len = recv_ptr[j];
264 memcpy(&idx[offset], &recv_idx[temp[j]], sizeof(index_t)*len);
265 memcpy(&val[offset*block_size], &recv_val[temp[j]*block_size], sizeof(double)*len*block_size);
266 icb += len;
267 break;
268 }
269 }
270 }
271 ptr[i+1] = ptr[i] + icb;
272 }
273 sum = ptr[n_C];
274 TMPMEMFREE(temp);
275 TMPMEMFREE(recv_ptr);
276 TMPMEMFREE(recv_val);
277
278 /* count the number of cols (num_Rcouple_cols) in R->col_coupleBlock,
279 and convert the global id in "idx" into local id */
280 num_Rcouple_cols = 0;
281 if (sum) {
282 #ifdef USE_QSORTG
283 qsortG(recv_idx, (size_t)sum, sizeof(index_t), Paso_comparIndex);
284 #else
285 qsort(recv_idx, (size_t)sum, sizeof(index_t), Paso_comparIndex);
286 #endif
287 num_Rcouple_cols = 1;
288 i = recv_idx[0];
289 for (j=1; j<sum; j++) {
290 if (recv_idx[j] > i) {
291 i = recv_idx[j];
292 recv_idx[num_Rcouple_cols] = i;
293 num_Rcouple_cols++;
294 }
295 }
296 #pragma omp parallel for private(i,where_p) schedule(static)
297 for (i=0; i<sum; i++) {
298 where_p = (index_t *)bsearch(&(idx[i]), recv_idx, num_Rcouple_cols,
299 sizeof(index_t), Paso_comparIndex);
300 idx[i] = (index_t)(where_p - recv_idx);
301 }
302 }
303
304 /* prepare the receiver for the col_connector */
305 dist = P->pattern->output_distribution->first_component;
306 offsetInShared = TMPMEMALLOC(size+1, index_t);
307 shared = TMPMEMALLOC(num_Rcouple_cols, index_t);
308 numNeighbors = send->numNeighbors;
309 neighbor = send->neighbor;
310 memset(offsetInShared, 0, sizeof(index_t) * (size+1));
311 if (num_Rcouple_cols > 0) offset = dist[neighbor[0] + 1];
312 for (i=0, p=0; i<num_Rcouple_cols; i++) {
313 /* cols i is received from rank neighbor[p] when it's still smaller
314 than "offset", otherwise, it is received from rank neighbor[p+1] */
315 while (recv_idx[i] >= offset) {
316 p++;
317 offsetInShared[p] = i;
318 offset = dist[neighbor[p] + 1];
319 }
320 shared[i] = i + n; /* n is the number of cols in R->mainBlock */
321 }
322 #pragma omp parallel for private(i) schedule(static)
323 for (i=p; i<numNeighbors; i++) {
324 offsetInShared[i+1] = num_Rcouple_cols;
325 }
326 recv = Paso_SharedComponents_alloc(n, numNeighbors,
327 neighbor, shared, offsetInShared, 1, 0, mpi_info);
328 TMPMEMFREE(recv_idx);
329
330 /* prepare the sender for the col_connector */
331 TMPMEMFREE(shared);
332 numNeighbors = P->col_coupler->connector->recv->numNeighbors;
333 neighbor = P->col_coupler->connector->recv->neighbor;
334 shared = TMPMEMALLOC(n * numNeighbors, index_t);
335 couple_pattern = P->col_coupleBlock->pattern;
336 sum=0;
337 memset(offsetInShared, 0, sizeof(index_t) * (size+1));
338 for (p=0; p<numNeighbors; p++) {
339 j = P->col_coupler->connector->recv->offsetInShared[p];
340 j_ub = P->col_coupler->connector->recv->offsetInShared[p+1];
341 for (i=0; i<n; i++) {
342 iptr = couple_pattern->ptr[i];
343 iptr_ub = couple_pattern->ptr[i+1];
344 for (; iptr<iptr_ub; iptr++) {
345 k = couple_pattern->index[iptr];
346 if (k >= j && k < j_ub) {
347 shared[sum] = i;
348 sum++;
349 break;
350 }
351 }
352 }
353 offsetInShared[p+1] = sum;
354 }
355 send = Paso_SharedComponents_alloc(n, numNeighbors,
356 neighbor, shared, offsetInShared, 1, 0, mpi_info);
357
358 /* build the col_connector based on sender and receiver */
359 col_connector = Paso_Connector_alloc(send, recv);
360 Paso_SharedComponents_free(recv);
361 Paso_SharedComponents_free(send);
362 TMPMEMFREE(offsetInShared);
363 TMPMEMFREE(shared);
364
365 couple_pattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, n_C,
366 num_Rcouple_cols, ptr, idx);
367
368 input_dist = Paso_Distribution_alloc(mpi_info, dist, 1, 0);
369 dist = P->pattern->input_distribution->first_component;
370 output_dist = Paso_Distribution_alloc(mpi_info, dist, 1, 0);
371
372 /* now we need to create the System Matrix
373 TO BE FIXED: at this stage, we only construction col_couple_pattern
374 and col_connector for Restriction matrix R. To be completed,
375 row_couple_pattern and row_connector need to be constructed as well */
376 if (Esys_noError()) {
377 pattern = Paso_SystemMatrixPattern_alloc(MATRIX_FORMAT_DEFAULT,
378 output_dist, input_dist, main_block->pattern, couple_pattern,
379 couple_pattern, col_connector, col_connector);
380 out = Paso_SystemMatrix_alloc(MATRIX_FORMAT_DIAGONAL_BLOCK, pattern,
381 row_block_size, col_block_size, FALSE);
382 }
383
384 /* now fill in the matrix */
385 memcpy(out->mainBlock->val, main_block->val,
386 main_block->len * sizeof(double));
387 memcpy(out->col_coupleBlock->val, val,
388 out->col_coupleBlock->len * sizeof(double));
389 MEMFREE(val);
390
391 /* clean up */
392 Paso_SparseMatrix_free(main_block);
393 Paso_SystemMatrixPattern_free(pattern);
394 Paso_Pattern_free(couple_pattern);
395 Paso_Connector_free(col_connector);
396 Paso_Distribution_free(output_dist);
397 Paso_Distribution_free(input_dist);
398
399 if (Esys_noError()) {
400 return out;
401 } else {
402 Paso_SystemMatrix_free(out);
403 return NULL;
404 }
405 }
406

  ViewVC Help
Powered by ViewVC 1.1.26