/[escript]/branches/symbolic_from_3470/paso/src/AMG_Restriction.c
ViewVC logotype

Contents of /branches/symbolic_from_3470/paso/src/AMG_Restriction.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3868 - (show annotations)
Thu Mar 15 06:07:08 2012 UTC (7 years, 1 month ago) by caltinay
File MIME type: text/plain
File size: 13732 byte(s)
Update to latest trunk

1 /*******************************************************
2 *
3 * Copyright (c) 2003-2011 by University of Queensland
4 * Earth Systems Science Computational Center (ESSCC)
5 * http://www.uq.edu.au/esscc
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 *******************************************************/
12
13
14 /**************************************************************/
15
16 /* Paso: defines AMG Restriction Operator */
17
18 /**************************************************************/
19
20 /* Author: Lin Gao, lgao@uq.edu.au */
21
22 /**************************************************************/
23
24 #include "Paso.h"
25 #include "SparseMatrix.h"
26 #include "PasoUtil.h"
27 #include "Preconditioner.h"
28
29 /**************************************************************
30
31 Methods nessecary for AMG preconditioner
32
33 construct n_C x n the Restriction matrix R from A_p.
34
35 R->mainBlock is the transpose of P->mainBlock, but we need
36 to recover R->col_coupleBlock from P's data in other ranks.
37
38 ***************************************************************/
39
40 Paso_SystemMatrix* Paso_Preconditioner_AMG_getRestriction(Paso_SystemMatrix* P)
41 {
42 Esys_MPIInfo *mpi_info=Esys_MPIInfo_getReference(P->mpi_info);
43 Paso_SparseMatrix *main_block=NULL, *couple_block=NULL;
44 Paso_SystemMatrix *out=NULL;
45 Paso_SystemMatrixPattern *pattern=NULL;
46 Paso_Distribution *input_dist=NULL, *output_dist=NULL;
47 Paso_SharedComponents *send =NULL, *recv=NULL;
48 Paso_Connector *col_connector=NULL;
49 Paso_Pattern *couple_pattern=NULL;
50 const dim_t row_block_size=P->row_block_size;
51 const dim_t col_block_size=P->col_block_size;
52 const dim_t n=P->mainBlock->numRows;
53 const dim_t n_C=P->mainBlock->numCols;
54 index_t size=mpi_info->size, rank=mpi_info->rank, *dist=NULL;
55 index_t *ptr=NULL, *idx=NULL, *degree_set=NULL, *offset_set=NULL;
56 index_t *send_ptr=NULL, *recv_ptr=NULL, *recv_idx=NULL;
57 index_t *temp=NULL, *where_p=NULL;
58 index_t num_Pcouple_cols, num_Rcouple_cols, numNeighbors;
59 index_t i, j, j_ub, k, p, iptr, iptr_ub, icb, irb;
60 index_t block_size, copy_block_size, sum, offset, len, msgs;
61 double *val=NULL, *data_set=NULL, *recv_val=NULL;
62 index_t *shared=NULL, *offsetInShared=NULL;
63 Esys_MPI_rank *neighbor=NULL;
64 #ifdef ESYS_MPI
65 MPI_Request* mpi_requests=NULL;
66 MPI_Status* mpi_stati=NULL;
67 #else
68 int *mpi_requests=NULL, *mpi_stati=NULL;
69 #endif
70
71 /* get main_block of R from the transpose of P->mainBlock */
72 main_block = Paso_SparseMatrix_getTranspose(P->mainBlock);
73
74 /* prepare "ptr" for the col_coupleBlock of R, start with get info about
75 the degree_set (associated with "ptr"), offset_set (associated with
76 "idx" and data_set (associated with "val") to be sent to other ranks */
77 couple_block = P->col_coupleBlock;
78 num_Pcouple_cols = couple_block->numCols;
79 block_size = P->block_size;
80 copy_block_size = block_size * sizeof(double);
81 degree_set = TMPMEMALLOC(num_Pcouple_cols, index_t);
82 send_ptr = TMPMEMALLOC(num_Pcouple_cols+1, index_t);
83 memset(degree_set, 0, sizeof(index_t) * num_Pcouple_cols);
84 for (i=0; i<n; i++) {
85 iptr_ub = couple_block->pattern->ptr[i+1];
86 for (iptr=couple_block->pattern->ptr[i]; iptr<iptr_ub; iptr++) {
87 j = couple_block->pattern->index[iptr];
88 degree_set[j] ++;
89 }
90 }
91
92 send_ptr[0] = 0;
93 for (i=0; i<num_Pcouple_cols; i++) {
94 send_ptr[i+1] = send_ptr[i] + degree_set[i];
95 }
96
97 memset(degree_set, 0, sizeof(index_t) * num_Pcouple_cols);
98 sum = couple_block->pattern->ptr[n];
99 offset_set = TMPMEMALLOC(sum, index_t);
100 data_set = TMPMEMALLOC(sum * block_size, double);
101 offset = P->pattern->output_distribution->first_component[rank];
102
103 if (P->type & MATRIX_FORMAT_DIAGONAL_BLOCK) {
104 for (i=0; i<n; i++) {
105 iptr_ub = couple_block->pattern->ptr[i+1];
106 for (iptr=couple_block->pattern->ptr[i]; iptr<iptr_ub; iptr++) {
107 j = couple_block->pattern->index[iptr];
108 k = send_ptr[j] + degree_set[j];
109 offset_set[k] = i + offset; /* now we have the global id for row i,
110 which will be used as col index of R */
111 memcpy(&(data_set[k*block_size]), &(couple_block->val[iptr*block_size]), copy_block_size);
112 degree_set[j] ++;
113 }
114 }
115 } else {
116 for (i=0; i<n; i++) {
117 iptr_ub = couple_block->pattern->ptr[i+1];
118 for (iptr=couple_block->pattern->ptr[i]; iptr<iptr_ub; iptr++) {
119 j = couple_block->pattern->index[iptr];
120 k = send_ptr[j] + degree_set[j];
121 offset_set[k] = i + offset; /* now we have the global id for row i,
122 which will be used as col index of R */
123 for (irb=0 ; irb < row_block_size; irb++) {
124 for (icb =0 ; icb < col_block_size; icb++) {
125 data_set[k*block_size+icb+col_block_size*irb] = couple_block->val[iptr*block_size+irb+row_block_size*icb];
126 }
127 }
128 degree_set[j] ++;
129 }
130 }
131 }
132
133
134 #ifdef ESYS_MPI
135 mpi_requests=TMPMEMALLOC(size*4,MPI_Request);
136 mpi_stati=TMPMEMALLOC(size*4,MPI_Status);
137 #else
138 mpi_requests=TMPMEMALLOC(size*4,int);
139 mpi_stati=TMPMEMALLOC(size*4,int);
140 #endif
141
142 /* send/receive degree_set to build the "ptr" for R->col_coupleBlock */
143 msgs = 0;
144 send = P->col_coupler->connector->send;
145 recv = P->col_coupler->connector->recv;
146 recv_ptr = TMPMEMALLOC(send->offsetInShared[send->numNeighbors], index_t);
147 for (p=0; p<send->numNeighbors; p++) {
148 i = send->offsetInShared[p];
149 j = send->offsetInShared[p+1];
150 k = j - i;
151 if (k > 0) {
152 #ifdef ESYS_MPI
153 MPI_Irecv(&(recv_ptr[i]), k, MPI_INT, send->neighbor[p],
154 mpi_info->msg_tag_counter+send->neighbor[p],
155 mpi_info->comm, &mpi_requests[msgs]);
156 #endif
157 msgs++;
158 }
159 }
160
161 for (p=0; p<recv->numNeighbors; p++) {
162 i = recv->offsetInShared[p];
163 j = recv->offsetInShared[p+1];
164 k = j - i;
165 if (k > 0) {
166 #ifdef ESYS_MPI
167 MPI_Issend(&(degree_set[i]), k, MPI_INT, recv->neighbor[p],
168 mpi_info->msg_tag_counter+rank, mpi_info->comm,
169 &mpi_requests[msgs]);
170 #endif
171 msgs++;
172 }
173 }
174
175 #ifdef ESYS_MPI
176 MPI_Waitall(msgs, mpi_requests, mpi_stati);
177 #endif
178 mpi_info->msg_tag_counter += size;
179
180 TMPMEMFREE(degree_set);
181 degree_set = TMPMEMALLOC(send->numNeighbors, index_t);
182 memset(degree_set, 0, sizeof(index_t)*send->numNeighbors);
183 for (p=0, sum=0; p<send->numNeighbors; p++) {
184 iptr_ub = send->offsetInShared[p+1];
185 for (iptr = send->offsetInShared[p]; iptr < iptr_ub; iptr++){
186 degree_set[p] += recv_ptr[iptr];
187 }
188 sum += degree_set[p];
189 }
190
191 /* send/receive offset_set and data_set to build the "idx" and "val"
192 for R->col_coupleBlock */
193 msgs = 0;
194 recv_idx = TMPMEMALLOC(sum, index_t);
195 recv_val = TMPMEMALLOC(sum * block_size, double);
196 for (p=0, offset=0; p<send->numNeighbors; p++) {
197 if (degree_set[p]) {
198 #ifdef ESYS_MPI
199 MPI_Irecv(&(recv_idx[offset]), degree_set[p], MPI_INT,
200 send->neighbor[p], mpi_info->msg_tag_counter+send->neighbor[p],
201 mpi_info->comm, &mpi_requests[msgs]);
202 msgs++;
203 MPI_Irecv(&(recv_val[offset*block_size]), degree_set[p] * block_size,
204 MPI_DOUBLE, send->neighbor[p],
205 mpi_info->msg_tag_counter+send->neighbor[p]+size,
206 mpi_info->comm, &mpi_requests[msgs]);
207 offset += degree_set[p];
208 #endif
209 msgs++;
210 }
211 }
212
213 for (p=0; p<recv->numNeighbors; p++) {
214 i = recv->offsetInShared[p];
215 j = recv->offsetInShared[p+1];
216 k = send_ptr[j] - send_ptr[i];
217 if (k > 0) {
218 #ifdef ESYS_MPI
219 MPI_Issend(&(offset_set[send_ptr[i]]), k, MPI_INT,
220 recv->neighbor[p], mpi_info->msg_tag_counter+rank,
221 mpi_info->comm, &mpi_requests[msgs]);
222 msgs++;
223 MPI_Issend(&(data_set[send_ptr[i]*block_size]), k*block_size, MPI_DOUBLE,
224 recv->neighbor[p], mpi_info->msg_tag_counter+rank+size,
225 mpi_info->comm, &mpi_requests[msgs]);
226 #endif
227 msgs++;
228 }
229 }
230
231 len = send->offsetInShared[send->numNeighbors];
232 temp = TMPMEMALLOC(len, index_t);
233 memset(temp, 0, sizeof(index_t)*len);
234 for (p=1; p<len; p++) {
235 temp[p] = temp[p-1] + recv_ptr[p-1];
236 }
237
238 #ifdef ESYS_MPI
239 MPI_Waitall(msgs, mpi_requests, mpi_stati);
240 #endif
241 mpi_info->msg_tag_counter += 2*size;
242 TMPMEMFREE(degree_set);
243 TMPMEMFREE(offset_set);
244 TMPMEMFREE(data_set);
245 TMPMEMFREE(send_ptr);
246 TMPMEMFREE(mpi_requests);
247 TMPMEMFREE(mpi_stati);
248
249 /* construct "ptr", "idx" and "val" for R->col_coupleBlock */
250 ptr = MEMALLOC(n_C + 1, index_t);
251 idx = MEMALLOC(sum, index_t);
252 val = MEMALLOC(sum*block_size, double);
253 ptr[0] = 0;
254 for (i=0; i<n_C; i++) {
255 icb = 0;
256 for (p=0; p<send->numNeighbors; p++) {
257 k = send->offsetInShared[p+1];
258 for (j=send->offsetInShared[p]; j<k; j++) {
259 if (send->shared[j] == i) {
260 offset = ptr[i] + icb;
261 len = recv_ptr[j];
262 memcpy(&idx[offset], &recv_idx[temp[j]], sizeof(index_t)*len);
263 memcpy(&val[offset*block_size], &recv_val[temp[j]*block_size], sizeof(double)*len*block_size);
264 icb += len;
265 break;
266 }
267 }
268 }
269 ptr[i+1] = ptr[i] + icb;
270 }
271 sum = ptr[n_C];
272 TMPMEMFREE(temp);
273 TMPMEMFREE(recv_ptr);
274 TMPMEMFREE(recv_val);
275
276 /* count the number of cols (num_Rcouple_cols) in R->col_coupleBlock,
277 and convert the global id in "idx" into local id */
278 num_Rcouple_cols = 0;
279 if (sum) {
280 #ifdef USE_QSORTG
281 qsortG(recv_idx, (size_t)sum, sizeof(index_t), Paso_comparIndex);
282 #else
283 qsort(recv_idx, (size_t)sum, sizeof(index_t), Paso_comparIndex);
284 #endif
285 num_Rcouple_cols = 1;
286 i = recv_idx[0];
287 for (j=1; j<sum; j++) {
288 if (recv_idx[j] > i) {
289 i = recv_idx[j];
290 recv_idx[num_Rcouple_cols] = i;
291 num_Rcouple_cols++;
292 }
293 }
294 #pragma omp parallel for private(i,where_p) schedule(static)
295 for (i=0; i<sum; i++) {
296 where_p = (index_t *)bsearch(&(idx[i]), recv_idx, num_Rcouple_cols,
297 sizeof(index_t), Paso_comparIndex);
298 idx[i] = (index_t)(where_p - recv_idx);
299 }
300 }
301
302 /* prepare the receiver for the col_connector */
303 dist = P->pattern->output_distribution->first_component;
304 offsetInShared = TMPMEMALLOC(size+1, index_t);
305 shared = TMPMEMALLOC(num_Rcouple_cols, index_t);
306 numNeighbors = send->numNeighbors;
307 neighbor = send->neighbor;
308 memset(offsetInShared, 0, sizeof(index_t) * (size+1));
309 if (num_Rcouple_cols > 0) offset = dist[neighbor[0] + 1];
310 for (i=0, p=0; i<num_Rcouple_cols; i++) {
311 /* cols i is received from rank neighbor[p] when it's still smaller
312 than "offset", otherwise, it is received from rank neighbor[p+1] */
313 while (recv_idx[i] >= offset) {
314 p++;
315 offsetInShared[p] = i;
316 offset = dist[neighbor[p] + 1];
317 }
318 shared[i] = i + n; /* n is the number of cols in R->mainBlock */
319 }
320 #pragma omp parallel for private(i) schedule(static)
321 for (i=p; i<numNeighbors; i++) {
322 offsetInShared[i+1] = num_Rcouple_cols;
323 }
324 recv = Paso_SharedComponents_alloc(n, numNeighbors,
325 neighbor, shared, offsetInShared, 1, 0, mpi_info);
326 TMPMEMFREE(recv_idx);
327
328 /* prepare the sender for the col_connector */
329 TMPMEMFREE(shared);
330 numNeighbors = P->col_coupler->connector->recv->numNeighbors;
331 neighbor = P->col_coupler->connector->recv->neighbor;
332 shared = TMPMEMALLOC(n * numNeighbors, index_t);
333 couple_pattern = P->col_coupleBlock->pattern;
334 sum=0;
335 memset(offsetInShared, 0, sizeof(index_t) * (size+1));
336 for (p=0; p<numNeighbors; p++) {
337 j = P->col_coupler->connector->recv->offsetInShared[p];
338 j_ub = P->col_coupler->connector->recv->offsetInShared[p+1];
339 for (i=0; i<n; i++) {
340 iptr = couple_pattern->ptr[i];
341 iptr_ub = couple_pattern->ptr[i+1];
342 for (; iptr<iptr_ub; iptr++) {
343 k = couple_pattern->index[iptr];
344 if (k >= j && k < j_ub) {
345 shared[sum] = i;
346 sum++;
347 break;
348 }
349 }
350 }
351 offsetInShared[p+1] = sum;
352 }
353 send = Paso_SharedComponents_alloc(n, numNeighbors,
354 neighbor, shared, offsetInShared, 1, 0, mpi_info);
355
356 /* build the col_connector based on sender and receiver */
357 col_connector = Paso_Connector_alloc(send, recv);
358 Paso_SharedComponents_free(recv);
359 Paso_SharedComponents_free(send);
360 TMPMEMFREE(offsetInShared);
361 TMPMEMFREE(shared);
362
363 couple_pattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, n_C,
364 num_Rcouple_cols, ptr, idx);
365
366 input_dist = Paso_Distribution_alloc(mpi_info, dist, 1, 0);
367 dist = P->pattern->input_distribution->first_component;
368 output_dist = Paso_Distribution_alloc(mpi_info, dist, 1, 0);
369
370 /* now we need to create the System Matrix
371 TO BE FIXED: at this stage, we only construction col_couple_pattern
372 and col_connector for Restriction matrix R. To be completed,
373 row_couple_pattern and row_connector need to be constructed as well */
374 if (Esys_noError()) {
375 pattern = Paso_SystemMatrixPattern_alloc(MATRIX_FORMAT_DEFAULT,
376 output_dist, input_dist, main_block->pattern, couple_pattern,
377 couple_pattern, col_connector, col_connector);
378 out = Paso_SystemMatrix_alloc(MATRIX_FORMAT_DIAGONAL_BLOCK, pattern,
379 row_block_size, col_block_size, FALSE);
380 }
381
382 /* now fill in the matrix */
383 memcpy(out->mainBlock->val, main_block->val,
384 main_block->len * sizeof(double));
385 memcpy(out->col_coupleBlock->val, val,
386 out->col_coupleBlock->len * sizeof(double));
387 MEMFREE(val);
388
389 /* clean up */
390 Paso_SparseMatrix_free(main_block);
391 Paso_SystemMatrixPattern_free(pattern);
392 Paso_Pattern_free(couple_pattern);
393 Paso_Connector_free(col_connector);
394 Paso_Distribution_free(output_dist);
395 Paso_Distribution_free(input_dist);
396
397 if (Esys_noError()) {
398 return out;
399 } else {
400 Paso_SystemMatrix_free(out);
401 return NULL;
402 }
403 }
404

  ViewVC Help
Powered by ViewVC 1.1.26