/[escript]/trunk/paso/src/AMG_Restriction.c
ViewVC logotype

Annotation of /trunk/paso/src/AMG_Restriction.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4286 - (hide annotations)
Thu Mar 7 04:28:11 2013 UTC (6 years, 6 months ago) by caltinay
File MIME type: text/plain
File size: 13961 byte(s)
Assorted spelling fixes.

1 jfenwick 3981 /*****************************************************************************
2 lgao 3751 *
3 jfenwick 4154 * Copyright (c) 2003-2013 by University of Queensland
4 jfenwick 3981 * http://www.uq.edu.au
5 lgao 3751 *
6     * Primary Business: Queensland, Australia
7     * Licensed under the Open Software License version 3.0
8     * http://www.opensource.org/licenses/osl-3.0.php
9     *
10 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
11     * Development since 2012 by School of Earth Sciences
12     *
13     *****************************************************************************/
14 lgao 3751
15    
16 jfenwick 3981 /************************************************************************************/
17 lgao 3751
18     /* Paso: defines AMG Restriction Operator */
19    
20 jfenwick 3981 /************************************************************************************/
21 lgao 3751
22     /* Author: Lin Gao, lgao@uq.edu.au */
23    
24 jfenwick 3981 /************************************************************************************/
25 lgao 3751
26     #include "Paso.h"
27     #include "SparseMatrix.h"
28     #include "PasoUtil.h"
29     #include "Preconditioner.h"
30    
31 jfenwick 3981 /************************************************************************************
32 lgao 3751
33 caltinay 4286 Methods necessary for AMG preconditioner
34 lgao 3751
35     construct n_C x n the Restriction matrix R from A_p.
36    
37 lgao 3819 R->mainBlock is the transpose of P->mainBlock, but we need
38     to recover R->col_coupleBlock from P's data in other ranks.
39    
40 jfenwick 3981 *************************************************************************************/
41 lgao 3751
42     Paso_SystemMatrix* Paso_Preconditioner_AMG_getRestriction(Paso_SystemMatrix* P)
43     {
44     Esys_MPIInfo *mpi_info=Esys_MPIInfo_getReference(P->mpi_info);
45     Paso_SparseMatrix *main_block=NULL, *couple_block=NULL;
46     Paso_SystemMatrix *out=NULL;
47     Paso_SystemMatrixPattern *pattern=NULL;
48     Paso_Distribution *input_dist=NULL, *output_dist=NULL;
49     Paso_SharedComponents *send =NULL, *recv=NULL;
50 lgao 3828 Paso_Connector *col_connector=NULL;
51 lgao 3751 Paso_Pattern *couple_pattern=NULL;
52     const dim_t row_block_size=P->row_block_size;
53     const dim_t col_block_size=P->col_block_size;
54     const dim_t n=P->mainBlock->numRows;
55     const dim_t n_C=P->mainBlock->numCols;
56     index_t size=mpi_info->size, rank=mpi_info->rank, *dist=NULL;
57     index_t *ptr=NULL, *idx=NULL, *degree_set=NULL, *offset_set=NULL;
58     index_t *send_ptr=NULL, *recv_ptr=NULL, *recv_idx=NULL;
59     index_t *temp=NULL, *where_p=NULL;
60     index_t num_Pcouple_cols, num_Rcouple_cols, numNeighbors;
61 lgao 3779 index_t i, j, j_ub, k, p, iptr, iptr_ub, icb, irb;
62 lgao 3751 index_t block_size, copy_block_size, sum, offset, len, msgs;
63     double *val=NULL, *data_set=NULL, *recv_val=NULL;
64     index_t *shared=NULL, *offsetInShared=NULL;
65     Esys_MPI_rank *neighbor=NULL;
66     #ifdef ESYS_MPI
67     MPI_Request* mpi_requests=NULL;
68     MPI_Status* mpi_stati=NULL;
69     #else
70     int *mpi_requests=NULL, *mpi_stati=NULL;
71     #endif
72    
73     /* get main_block of R from the transpose of P->mainBlock */
74     main_block = Paso_SparseMatrix_getTranspose(P->mainBlock);
75    
76     /* prepare "ptr" for the col_coupleBlock of R, start with get info about
77     the degree_set (associated with "ptr"), offset_set (associated with
78     "idx" and data_set (associated with "val") to be sent to other ranks */
79     couple_block = P->col_coupleBlock;
80     num_Pcouple_cols = couple_block->numCols;
81 lgao 3779 block_size = P->block_size;
82 lgao 3751 copy_block_size = block_size * sizeof(double);
83     degree_set = TMPMEMALLOC(num_Pcouple_cols, index_t);
84     send_ptr = TMPMEMALLOC(num_Pcouple_cols+1, index_t);
85     memset(degree_set, 0, sizeof(index_t) * num_Pcouple_cols);
86     for (i=0; i<n; i++) {
87     iptr_ub = couple_block->pattern->ptr[i+1];
88     for (iptr=couple_block->pattern->ptr[i]; iptr<iptr_ub; iptr++) {
89     j = couple_block->pattern->index[iptr];
90     degree_set[j] ++;
91     }
92     }
93    
94     send_ptr[0] = 0;
95     for (i=0; i<num_Pcouple_cols; i++) {
96     send_ptr[i+1] = send_ptr[i] + degree_set[i];
97     }
98    
99     memset(degree_set, 0, sizeof(index_t) * num_Pcouple_cols);
100     sum = couple_block->pattern->ptr[n];
101     offset_set = TMPMEMALLOC(sum, index_t);
102     data_set = TMPMEMALLOC(sum * block_size, double);
103     offset = P->pattern->output_distribution->first_component[rank];
104 lgao 3779
105     if (P->type & MATRIX_FORMAT_DIAGONAL_BLOCK) {
106     for (i=0; i<n; i++) {
107     iptr_ub = couple_block->pattern->ptr[i+1];
108     for (iptr=couple_block->pattern->ptr[i]; iptr<iptr_ub; iptr++) {
109     j = couple_block->pattern->index[iptr];
110     k = send_ptr[j] + degree_set[j];
111     offset_set[k] = i + offset; /* now we have the global id for row i,
112     which will be used as col index of R */
113     memcpy(&(data_set[k*block_size]), &(couple_block->val[iptr*block_size]), copy_block_size);
114     degree_set[j] ++;
115     }
116     }
117     } else {
118     for (i=0; i<n; i++) {
119     iptr_ub = couple_block->pattern->ptr[i+1];
120     for (iptr=couple_block->pattern->ptr[i]; iptr<iptr_ub; iptr++) {
121 lgao 3751 j = couple_block->pattern->index[iptr];
122     k = send_ptr[j] + degree_set[j];
123     offset_set[k] = i + offset; /* now we have the global id for row i,
124     which will be used as col index of R */
125 lgao 3779 for (irb=0 ; irb < row_block_size; irb++) {
126     for (icb =0 ; icb < col_block_size; icb++) {
127     data_set[k*block_size+icb+col_block_size*irb] = couple_block->val[iptr*block_size+irb+row_block_size*icb];
128     }
129     }
130 lgao 3751 degree_set[j] ++;
131 lgao 3779 }
132 lgao 3751 }
133     }
134    
135    
136     #ifdef ESYS_MPI
137     mpi_requests=TMPMEMALLOC(size*4,MPI_Request);
138     mpi_stati=TMPMEMALLOC(size*4,MPI_Status);
139     #else
140     mpi_requests=TMPMEMALLOC(size*4,int);
141     mpi_stati=TMPMEMALLOC(size*4,int);
142     #endif
143    
144     /* send/receive degree_set to build the "ptr" for R->col_coupleBlock */
145     msgs = 0;
146     send = P->col_coupler->connector->send;
147     recv = P->col_coupler->connector->recv;
148     recv_ptr = TMPMEMALLOC(send->offsetInShared[send->numNeighbors], index_t);
149     for (p=0; p<send->numNeighbors; p++) {
150     i = send->offsetInShared[p];
151     j = send->offsetInShared[p+1];
152     k = j - i;
153     if (k > 0) {
154 lgao 3828 #ifdef ESYS_MPI
155 lgao 3751 MPI_Irecv(&(recv_ptr[i]), k, MPI_INT, send->neighbor[p],
156     mpi_info->msg_tag_counter+send->neighbor[p],
157     mpi_info->comm, &mpi_requests[msgs]);
158 lgao 3828 #endif
159 lgao 3751 msgs++;
160     }
161     }
162    
163     for (p=0; p<recv->numNeighbors; p++) {
164     i = recv->offsetInShared[p];
165     j = recv->offsetInShared[p+1];
166     k = j - i;
167     if (k > 0) {
168 lgao 3828 #ifdef ESYS_MPI
169 lgao 3751 MPI_Issend(&(degree_set[i]), k, MPI_INT, recv->neighbor[p],
170     mpi_info->msg_tag_counter+rank, mpi_info->comm,
171     &mpi_requests[msgs]);
172 lgao 3828 #endif
173 lgao 3751 msgs++;
174     }
175     }
176    
177 lgao 3828 #ifdef ESYS_MPI
178 lgao 3751 MPI_Waitall(msgs, mpi_requests, mpi_stati);
179 lgao 3828 #endif
180 lgao 3751 mpi_info->msg_tag_counter += size;
181    
182     TMPMEMFREE(degree_set);
183     degree_set = TMPMEMALLOC(send->numNeighbors, index_t);
184     memset(degree_set, 0, sizeof(index_t)*send->numNeighbors);
185     for (p=0, sum=0; p<send->numNeighbors; p++) {
186     iptr_ub = send->offsetInShared[p+1];
187     for (iptr = send->offsetInShared[p]; iptr < iptr_ub; iptr++){
188     degree_set[p] += recv_ptr[iptr];
189     }
190     sum += degree_set[p];
191     }
192    
193     /* send/receive offset_set and data_set to build the "idx" and "val"
194     for R->col_coupleBlock */
195     msgs = 0;
196     recv_idx = TMPMEMALLOC(sum, index_t);
197     recv_val = TMPMEMALLOC(sum * block_size, double);
198     for (p=0, offset=0; p<send->numNeighbors; p++) {
199     if (degree_set[p]) {
200 lgao 3828 #ifdef ESYS_MPI
201 lgao 3751 MPI_Irecv(&(recv_idx[offset]), degree_set[p], MPI_INT,
202     send->neighbor[p], mpi_info->msg_tag_counter+send->neighbor[p],
203     mpi_info->comm, &mpi_requests[msgs]);
204     msgs++;
205     MPI_Irecv(&(recv_val[offset*block_size]), degree_set[p] * block_size,
206     MPI_DOUBLE, send->neighbor[p],
207     mpi_info->msg_tag_counter+send->neighbor[p]+size,
208     mpi_info->comm, &mpi_requests[msgs]);
209     offset += degree_set[p];
210 lgao 3828 #endif
211 lgao 3751 msgs++;
212     }
213     }
214    
215     for (p=0; p<recv->numNeighbors; p++) {
216     i = recv->offsetInShared[p];
217     j = recv->offsetInShared[p+1];
218     k = send_ptr[j] - send_ptr[i];
219     if (k > 0) {
220 lgao 3828 #ifdef ESYS_MPI
221 lgao 3767 MPI_Issend(&(offset_set[send_ptr[i]]), k, MPI_INT,
222 lgao 3751 recv->neighbor[p], mpi_info->msg_tag_counter+rank,
223     mpi_info->comm, &mpi_requests[msgs]);
224     msgs++;
225 lgao 3767 MPI_Issend(&(data_set[send_ptr[i]*block_size]), k*block_size, MPI_DOUBLE,
226 lgao 3751 recv->neighbor[p], mpi_info->msg_tag_counter+rank+size,
227     mpi_info->comm, &mpi_requests[msgs]);
228 lgao 3828 #endif
229 lgao 3751 msgs++;
230     }
231     }
232    
233     len = send->offsetInShared[send->numNeighbors];
234     temp = TMPMEMALLOC(len, index_t);
235     memset(temp, 0, sizeof(index_t)*len);
236     for (p=1; p<len; p++) {
237     temp[p] = temp[p-1] + recv_ptr[p-1];
238     }
239    
240 lgao 3828 #ifdef ESYS_MPI
241 lgao 3751 MPI_Waitall(msgs, mpi_requests, mpi_stati);
242 lgao 3828 #endif
243 lgao 3751 mpi_info->msg_tag_counter += 2*size;
244     TMPMEMFREE(degree_set);
245     TMPMEMFREE(offset_set);
246     TMPMEMFREE(data_set);
247     TMPMEMFREE(send_ptr);
248     TMPMEMFREE(mpi_requests);
249     TMPMEMFREE(mpi_stati);
250    
251     /* construct "ptr", "idx" and "val" for R->col_coupleBlock */
252     ptr = MEMALLOC(n_C + 1, index_t);
253     idx = MEMALLOC(sum, index_t);
254 lgao 3779 val = MEMALLOC(sum*block_size, double);
255 lgao 3751 ptr[0] = 0;
256     for (i=0; i<n_C; i++) {
257 lgao 3779 icb = 0;
258 lgao 3751 for (p=0; p<send->numNeighbors; p++) {
259     k = send->offsetInShared[p+1];
260     for (j=send->offsetInShared[p]; j<k; j++) {
261     if (send->shared[j] == i) {
262 lgao 3779 offset = ptr[i] + icb;
263 lgao 3751 len = recv_ptr[j];
264     memcpy(&idx[offset], &recv_idx[temp[j]], sizeof(index_t)*len);
265     memcpy(&val[offset*block_size], &recv_val[temp[j]*block_size], sizeof(double)*len*block_size);
266 lgao 3779 icb += len;
267 lgao 3751 break;
268     }
269     }
270     }
271 lgao 3779 ptr[i+1] = ptr[i] + icb;
272 lgao 3751 }
273     sum = ptr[n_C];
274     TMPMEMFREE(temp);
275     TMPMEMFREE(recv_ptr);
276     TMPMEMFREE(recv_val);
277    
278     /* count the number of cols (num_Rcouple_cols) in R->col_coupleBlock,
279     and convert the global id in "idx" into local id */
280     num_Rcouple_cols = 0;
281     if (sum) {
282     #ifdef USE_QSORTG
283     qsortG(recv_idx, (size_t)sum, sizeof(index_t), Paso_comparIndex);
284     #else
285     qsort(recv_idx, (size_t)sum, sizeof(index_t), Paso_comparIndex);
286     #endif
287     num_Rcouple_cols = 1;
288     i = recv_idx[0];
289     for (j=1; j<sum; j++) {
290     if (recv_idx[j] > i) {
291     i = recv_idx[j];
292     recv_idx[num_Rcouple_cols] = i;
293     num_Rcouple_cols++;
294     }
295     }
296 lgao 3821 #pragma omp parallel for private(i,where_p) schedule(static)
297 lgao 3751 for (i=0; i<sum; i++) {
298     where_p = (index_t *)bsearch(&(idx[i]), recv_idx, num_Rcouple_cols,
299     sizeof(index_t), Paso_comparIndex);
300     idx[i] = (index_t)(where_p - recv_idx);
301     }
302     }
303    
304     /* prepare the receiver for the col_connector */
305     dist = P->pattern->output_distribution->first_component;
306     offsetInShared = TMPMEMALLOC(size+1, index_t);
307     shared = TMPMEMALLOC(num_Rcouple_cols, index_t);
308     numNeighbors = send->numNeighbors;
309     neighbor = send->neighbor;
310 lgao 3817 memset(offsetInShared, 0, sizeof(index_t) * (size+1));
311     if (num_Rcouple_cols > 0) offset = dist[neighbor[0] + 1];
312 lgao 3751 for (i=0, p=0; i<num_Rcouple_cols; i++) {
313     /* cols i is received from rank neighbor[p] when it's still smaller
314     than "offset", otherwise, it is received from rank neighbor[p+1] */
315 lgao 3817 while (recv_idx[i] >= offset) {
316 lgao 3751 p++;
317     offsetInShared[p] = i;
318 lgao 3817 offset = dist[neighbor[p] + 1];
319 lgao 3751 }
320     shared[i] = i + n; /* n is the number of cols in R->mainBlock */
321     }
322 lgao 3821 #pragma omp parallel for private(i) schedule(static)
323 lgao 3817 for (i=p; i<numNeighbors; i++) {
324     offsetInShared[i+1] = num_Rcouple_cols;
325     }
326 lgao 3751 recv = Paso_SharedComponents_alloc(n, numNeighbors,
327     neighbor, shared, offsetInShared, 1, 0, mpi_info);
328     TMPMEMFREE(recv_idx);
329    
330     /* prepare the sender for the col_connector */
331     TMPMEMFREE(shared);
332     numNeighbors = P->col_coupler->connector->recv->numNeighbors;
333     neighbor = P->col_coupler->connector->recv->neighbor;
334     shared = TMPMEMALLOC(n * numNeighbors, index_t);
335     couple_pattern = P->col_coupleBlock->pattern;
336     sum=0;
337 lgao 3817 memset(offsetInShared, 0, sizeof(index_t) * (size+1));
338 lgao 3751 for (p=0; p<numNeighbors; p++) {
339     j = P->col_coupler->connector->recv->offsetInShared[p];
340     j_ub = P->col_coupler->connector->recv->offsetInShared[p+1];
341     for (i=0; i<n; i++) {
342     iptr = couple_pattern->ptr[i];
343     iptr_ub = couple_pattern->ptr[i+1];
344     for (; iptr<iptr_ub; iptr++) {
345     k = couple_pattern->index[iptr];
346     if (k >= j && k < j_ub) {
347     shared[sum] = i;
348     sum++;
349     break;
350     }
351     }
352     }
353     offsetInShared[p+1] = sum;
354     }
355     send = Paso_SharedComponents_alloc(n, numNeighbors,
356     neighbor, shared, offsetInShared, 1, 0, mpi_info);
357    
358     /* build the col_connector based on sender and receiver */
359     col_connector = Paso_Connector_alloc(send, recv);
360     Paso_SharedComponents_free(recv);
361     Paso_SharedComponents_free(send);
362     TMPMEMFREE(offsetInShared);
363     TMPMEMFREE(shared);
364    
365     couple_pattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, n_C,
366     num_Rcouple_cols, ptr, idx);
367    
368     input_dist = Paso_Distribution_alloc(mpi_info, dist, 1, 0);
369     dist = P->pattern->input_distribution->first_component;
370     output_dist = Paso_Distribution_alloc(mpi_info, dist, 1, 0);
371    
372     /* now we need to create the System Matrix
373     TO BE FIXED: at this stage, we only construction col_couple_pattern
374     and col_connector for Restriction matrix R. To be completed,
375     row_couple_pattern and row_connector need to be constructed as well */
376     if (Esys_noError()) {
377     pattern = Paso_SystemMatrixPattern_alloc(MATRIX_FORMAT_DEFAULT,
378     output_dist, input_dist, main_block->pattern, couple_pattern,
379     couple_pattern, col_connector, col_connector);
380     out = Paso_SystemMatrix_alloc(MATRIX_FORMAT_DIAGONAL_BLOCK, pattern,
381     row_block_size, col_block_size, FALSE);
382     }
383    
384     /* now fill in the matrix */
385     memcpy(out->mainBlock->val, main_block->val,
386     main_block->len * sizeof(double));
387     memcpy(out->col_coupleBlock->val, val,
388     out->col_coupleBlock->len * sizeof(double));
389 lgao 3758 MEMFREE(val);
390 lgao 3751
391     /* clean up */
392     Paso_SparseMatrix_free(main_block);
393     Paso_SystemMatrixPattern_free(pattern);
394     Paso_Pattern_free(couple_pattern);
395     Paso_Connector_free(col_connector);
396     Paso_Distribution_free(output_dist);
397     Paso_Distribution_free(input_dist);
398    
399     if (Esys_noError()) {
400     return out;
401     } else {
402     Paso_SystemMatrix_free(out);
403     return NULL;
404     }
405     }
406    

  ViewVC Help
Powered by ViewVC 1.1.26