/[escript]/trunk/paso/src/AMG_Prolongation.c
ViewVC logotype

Contents of /trunk/paso/src/AMG_Prolongation.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4286 - (show annotations)
Thu Mar 7 04:28:11 2013 UTC (6 years, 5 months ago) by caltinay
File MIME type: text/plain
File size: 52006 byte(s)
Assorted spelling fixes.

1 /*****************************************************************************
2 *
3 * Copyright (c) 2003-2013 by University of Queensland
4 * http://www.uq.edu.au
5 *
6 * Primary Business: Queensland, Australia
7 * Licensed under the Open Software License version 3.0
8 * http://www.opensource.org/licenses/osl-3.0.php
9 *
10 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
11 * Development since 2012 by School of Earth Sciences
12 *
13 *****************************************************************************/
14
15
16 /************************************************************************************/
17
18 /* Paso: defines AMG prolongation */
19
20 /************************************************************************************/
21
22 /* Author: Artak Amirbekyan, artak@uq.edu.au, l.gross@uq.edu.au */
23
24 /************************************************************************************/
25
26 #include "Paso.h"
27 #include "SparseMatrix.h"
28 #include "PasoUtil.h"
29 #include "Preconditioner.h"
30
31 /************************************************************************************
32
33 Methods necessary for AMG preconditioner
34
35 construct n x n_C the prolongation matrix P from A_p.
36
37 The columns in A_p to be considered are marked by counter_C[n] where
38 an unknown i to be considered in P is marked by 0<= counter_C[i] < n_C
39 and counter_C[i] gives the new column number in P.
40 S defines the strong connections.
41
42 The pattern of P is formed as follows:
43
44 If row i is in C (counter_C[i]>=0), then P[i,j]=1 if j==counter_C[i] or 0 otherwise.
45 If row i is not C, then P[i,j] <> 0 if counter_C[k]==j (k in C) and (i,k) is a strong connection.
46
47 Two settings for P are implemented (see below)
48
49 */
50
51 Paso_SystemMatrix* Paso_Preconditioner_AMG_getProlongation(Paso_SystemMatrix* A_p,
52 const index_t* offset_S, const dim_t* degree_S, const index_t* S,
53 const dim_t n_C, index_t* counter_C, const index_t interpolation_method)
54 {
55 Esys_MPIInfo *mpi_info=Esys_MPIInfo_getReference(A_p->mpi_info);
56 Paso_SystemMatrix *out=NULL;
57 Paso_SystemMatrixPattern *pattern=NULL;
58 Paso_Distribution *input_dist=NULL, *output_dist=NULL;
59 Paso_SharedComponents *send =NULL, *recv=NULL;
60 Paso_Connector *col_connector=NULL;
61 Paso_Pattern *main_pattern=NULL, *couple_pattern=NULL;
62 const dim_t row_block_size=A_p->row_block_size;
63 const dim_t col_block_size=A_p->col_block_size;
64 const dim_t my_n=A_p->mainBlock->numCols;
65 const dim_t overlap_n=A_p->col_coupleBlock->numCols;
66 const dim_t num_threads=omp_get_max_threads();
67 index_t size=mpi_info->size, *dist=NULL;
68 index_t *main_p=NULL, *couple_p=NULL, *main_idx=NULL, *couple_idx=NULL;
69 index_t *shared=NULL, *offsetInShared=NULL;
70 index_t *recv_shared=NULL, *send_shared=NULL;
71 index_t sum, i, j, k, l, p, q, iptr;
72 index_t my_n_C, global_label, num_neighbors;
73 #ifdef ESYS_MPI
74 index_t rank=mpi_info->rank;
75 #endif
76 Esys_MPI_rank *neighbor=NULL;
77 #ifdef ESYS_MPI
78 MPI_Request* mpi_requests=NULL;
79 MPI_Status* mpi_stati=NULL;
80 #else
81 int *mpi_requests=NULL, *mpi_stati=NULL;
82 #endif
83
84 /* number of C points in current distribution */
85 my_n_C = 0;
86 sum=0;
87 if (num_threads>1) {
88 #pragma omp parallel private(i,sum)
89 {
90 sum=0;
91 #pragma omp for schedule(static)
92 for (i=0;i<my_n;++i) {
93 if (counter_C[i] != -1) {
94 sum++;
95 }
96 }
97 #pragma omp critical
98 {
99 my_n_C += sum;
100 }
101 }
102 } else { /* num_threads=1 */
103 for (i=0;i<my_n;++i) {
104 if (counter_C[i] != -1) {
105 my_n_C++;
106 }
107 }
108 }
109
110 /* create row distribution (output_distribution) and col distribution
111 (input_distribution) */
112 /* ??? should I alloc an new Esys_MPIInfo object or reuse the one in
113 system matrix A. for now, I'm reuse A->mpi_info ??? */
114 dist = A_p->pattern->output_distribution->first_component;
115 output_dist=Paso_Distribution_alloc(mpi_info, dist, 1, 0);
116 dist = TMPMEMALLOC(size+1, index_t); /* now prepare for col distribution */
117 Esys_checkPtr(dist);
118 #ifdef ESYS_MPI
119 MPI_Allgather(&my_n_C, 1, MPI_INT, dist, 1, MPI_INT, mpi_info->comm);
120 #endif
121 global_label=0;
122 for (i=0; i<size; i++) {
123 k = dist[i];
124 dist[i] = global_label;
125 global_label += k;
126 }
127 dist[size] = global_label;
128
129 input_dist=Paso_Distribution_alloc(mpi_info, dist, 1, 0);
130 TMPMEMFREE(dist);
131
132 /* create pattern for mainBlock and coupleBlock */
133 main_p = MEMALLOC(my_n+1, index_t);
134 couple_p = MEMALLOC(my_n+1, index_t);
135 if (!(Esys_checkPtr(main_p) || Esys_checkPtr(couple_p))) {
136 /* count the number of entries per row in the Prolongation matrix :*/
137 #pragma omp parallel for private(i,l,k,iptr,j,p) schedule(static)
138 for (i=0; i<my_n; i++) {
139 l = 0;
140 if (counter_C[i]>=0) {
141 k = 1; /* i is a C unknown */
142 } else {
143 k = 0;
144 iptr = offset_S[i];
145 for (p=0; p<degree_S[i]; p++) {
146 j = S[iptr+p]; /* this is a strong connection */
147 if (counter_C[j]>=0) { /* and is in C */
148 if (j <my_n) k++;
149 else {
150 l++;
151 }
152 }
153 }
154 }
155 main_p[i] = k;
156 couple_p[i] = l;
157 }
158
159 /* number of unknowns in the col-coupleBlock of the interpolation matrix */
160 sum = 0;
161 for (i=0;i<overlap_n;++i) {
162 if (counter_C[i+my_n] > -1) {
163 counter_C[i+my_n] -= my_n_C;
164 sum++;
165 }
166 }
167
168 /* allocate and create index vector for prolongation: */
169 p = Paso_Util_cumsum(my_n, main_p);
170 main_p[my_n] = p;
171 main_idx = MEMALLOC(p, index_t);
172 p = Paso_Util_cumsum(my_n, couple_p);
173 couple_p[my_n] = p;
174 couple_idx = MEMALLOC(p, index_t);
175 if (!(Esys_checkPtr(main_idx) || Esys_checkPtr(couple_idx))) {
176 #pragma omp parallel for private(i,k,l,iptr,j,p) schedule(static)
177 for (i=0; i<my_n; i++) {
178 if (counter_C[i]>=0) {
179 main_idx[main_p[i]]=counter_C[i]; /* i is a C unknown */
180 } else {
181 k = 0;
182 l = 0;
183 iptr = offset_S[i];
184 for (p=0; p<degree_S[i]; p++) {
185 j = S[iptr+p]; /* this is a strong connection */
186 if (counter_C[j] >=0) { /* and is in C */
187 if (j < my_n) {
188 main_idx[main_p[i]+k] = counter_C[j];
189 k++;
190 } else {
191 couple_idx[couple_p[i]+l] = counter_C[j];
192 l++;
193 }
194 }
195 }
196 }
197 }
198 }
199 }
200
201 if (Esys_noError()) {
202 main_pattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, my_n,
203 my_n_C, main_p, main_idx);
204 couple_pattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, my_n,
205 sum, couple_p, couple_idx);
206 } else {
207 MEMFREE(main_p);
208 MEMFREE(main_idx);
209 MEMFREE(couple_p);
210 MEMFREE(couple_idx);
211 }
212
213 /* prepare the receiver for the col_connector.
214 Note that the allocation for "shared" assumes the send and receive buffer
215 of the interpolation matrix P is no larger than that of matrix A_p. */
216 neighbor = TMPMEMALLOC(size, Esys_MPI_rank);
217 offsetInShared = TMPMEMALLOC(size+1, index_t);
218 recv = A_p->col_coupler->connector->recv;
219 send = A_p->col_coupler->connector->send;
220 i = recv->numSharedComponents;
221 recv_shared = TMPMEMALLOC(i,index_t);
222 memset(recv_shared, 0, sizeof(index_t)*i);
223 k = send->numSharedComponents;
224 send_shared = TMPMEMALLOC(k,index_t);
225 if (k > i) i = k;
226 shared = TMPMEMALLOC(i, index_t);
227
228 #ifdef ESYS_MPI
229 mpi_requests=TMPMEMALLOC(size*2,MPI_Request);
230 mpi_stati=TMPMEMALLOC(size*2,MPI_Status);
231 #else
232 mpi_requests=TMPMEMALLOC(size*2,int);
233 mpi_stati=TMPMEMALLOC(size*2,int);
234 #endif
235
236 for (p=0; p<send->numNeighbors; p++) {
237 i = send->offsetInShared[p];
238 #ifdef ESYS_MPI
239 MPI_Irecv (&(send_shared[i]), send->offsetInShared[p+1]-i, MPI_INT,
240 send->neighbor[p], mpi_info->msg_tag_counter+send->neighbor[p],
241 mpi_info->comm, &mpi_requests[p]);
242 #endif
243 }
244
245 num_neighbors = 0;
246 q = 0;
247 p = recv->numNeighbors;
248 offsetInShared[0]=0;
249 for (i=0; i<p; i++) {
250 l = 0;
251 k = recv->offsetInShared[i+1];
252 for (j=recv->offsetInShared[i]; j<k; j++) {
253 if (counter_C[recv->shared[j]] > -1) {
254 shared[q] = my_n_C + q;
255 recv_shared[recv->shared[j]-my_n] = 1;
256 q++;
257 l = 1;
258 }
259 }
260 if (l == 1) {
261 iptr = recv->neighbor[i];
262 neighbor[num_neighbors] = iptr;
263 num_neighbors++;
264 offsetInShared[num_neighbors] = q;
265 }
266 #ifdef ESYS_MPI
267 MPI_Issend(&(recv_shared[recv->offsetInShared[i]]),
268 k-recv->offsetInShared[i], MPI_INT, recv->neighbor[i],
269 mpi_info->msg_tag_counter+rank, mpi_info->comm,
270 &mpi_requests[i+send->numNeighbors]);
271 #endif
272 }
273 recv = Paso_SharedComponents_alloc(my_n_C, num_neighbors, neighbor, shared,
274 offsetInShared, 1, 0, mpi_info);
275
276 /* now we can build the sender */
277 #ifdef ESYS_MPI
278 MPI_Waitall(recv->numNeighbors+send->numNeighbors, mpi_requests, mpi_stati);
279 #endif
280 mpi_info->msg_tag_counter += size;
281 TMPMEMFREE(mpi_requests);
282 TMPMEMFREE(mpi_stati);
283
284 num_neighbors = 0;
285 q = 0;
286 p = send->numNeighbors;
287 offsetInShared[0]=0;
288 for (i=0; i<p; i++) {
289 l = 0;
290 k = send->offsetInShared[i+1];
291 for (j=send->offsetInShared[i]; j<k; j++) {
292 if (send_shared[j] == 1) {
293 shared[q] = counter_C[send->shared[j]];
294 q++;
295 l = 1;
296 }
297 }
298 if (l == 1) {
299 iptr = send->neighbor[i];
300 neighbor[num_neighbors] = iptr;
301 num_neighbors++;
302 offsetInShared[num_neighbors] = q;
303 }
304 }
305
306 send = Paso_SharedComponents_alloc(my_n_C, num_neighbors, neighbor, shared,
307 offsetInShared, 1, 0, mpi_info);
308 col_connector = Paso_Connector_alloc(send, recv);
309 Paso_SharedComponents_free(recv);
310 Paso_SharedComponents_free(send);
311 TMPMEMFREE(recv_shared);
312 TMPMEMFREE(send_shared);
313 TMPMEMFREE(neighbor);
314 TMPMEMFREE(offsetInShared);
315 TMPMEMFREE(shared);
316
317 /* now we need to create the System Matrix
318 TO BE FIXED: at this stage, we only construction col_couple_pattern
319 and col_connector for interpolation matrix P. To be completed,
320 row_couple_pattern and row_connector need to be constructed as well */
321 if (Esys_noError()) {
322 pattern = Paso_SystemMatrixPattern_alloc(MATRIX_FORMAT_DEFAULT,
323 output_dist, input_dist, main_pattern, couple_pattern,
324 couple_pattern, col_connector, col_connector);
325 out = Paso_SystemMatrix_alloc(MATRIX_FORMAT_DIAGONAL_BLOCK, pattern,
326 row_block_size, col_block_size, FALSE);
327 }
328
329 /* now fill in the matrix */
330 if (Esys_noError()) {
331 if ((interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
332 || ( interpolation_method == PASO_CLASSIC_INTERPOLATION) ) {
333 if (row_block_size == 1) {
334 Paso_Preconditioner_AMG_setClassicProlongation(out, A_p, offset_S, degree_S, S, counter_C);
335 } else {
336 Paso_Preconditioner_AMG_setClassicProlongation_Block(out, A_p, offset_S, degree_S, S, counter_C);
337 }
338 } else {
339 if (row_block_size == 1) {
340 Paso_Preconditioner_AMG_setDirectProlongation(out, A_p, offset_S, degree_S, S, counter_C);
341 } else {
342 Paso_Preconditioner_AMG_setDirectProlongation_Block(out, A_p, offset_S, degree_S, S, counter_C);
343 }
344 }
345 }
346
347 /* clean up */
348 Paso_SystemMatrixPattern_free(pattern);
349 Paso_Pattern_free(main_pattern);
350 Paso_Pattern_free(couple_pattern);
351 Paso_Connector_free(col_connector);
352 Paso_Distribution_free(output_dist);
353 Paso_Distribution_free(input_dist);
354 if (Esys_noError()) {
355 return out;
356 } else {
357 Paso_SystemMatrix_free(out);
358 return NULL;
359 }
360
361 }
362
363 /*
364 Direct Prolongation:
365 -------------------
366
367 If row i is in C (counter_C[i]>=0), then P[i,j]=1 if j==counter_C[i] or 0 otherwise.
368 If row i is not C, then P[i,j] = - a[i] * A[i,k]/A[i,i] with j=counter_C[k]>=0 and k in S
369
370 and a[i]=
371 alpha[i] = sum_s min(A[i,s],0)/(sum_{s in S and C} min(A[i,s],0)) A[i,k]<0
372 or if
373 beta[i] = sum_s max(A[i,s],0)/(sum_{s in S and C} max(A[i,s],0)) A[i,k]>0
374
375
376 */
377
378 void Paso_Preconditioner_AMG_setDirectProlongation(Paso_SystemMatrix* P,
379 Paso_SystemMatrix* A, const index_t* offset_S, const dim_t* degree_S,
380 const index_t* S, const index_t *counter_C) {
381 Paso_SparseMatrix *main_block=P->mainBlock;
382 Paso_SparseMatrix *couple_block=P->col_coupleBlock;
383 Paso_Pattern *main_pattern=main_block->pattern;
384 Paso_Pattern *couple_pattern=couple_block->pattern;
385 const dim_t my_n=A->mainBlock->numRows;
386 index_t range;
387
388 dim_t i;
389 register double alpha, beta, sum_all_neg, sum_all_pos, sum_strong_neg, sum_strong_pos, A_ij, A_ii, rtmp;
390 register index_t iPtr, j, offset;
391 index_t *where_p, *start_p;
392
393 #pragma omp parallel for private(i,offset,sum_all_neg,sum_all_pos,sum_strong_neg,sum_strong_pos,A_ii,range,iPtr,j,A_ij,start_p,where_p,alpha,beta,rtmp) schedule(static)
394 for (i=0; i<my_n; i++) {
395 if (counter_C[i]>=0) {
396 offset = main_pattern->ptr[i];
397 main_block->val[offset]=1.; /* i is a C row */
398 } else if ((main_pattern->ptr[i + 1] > main_pattern->ptr[i]) ||
399 (couple_pattern->ptr[i +1] > couple_pattern->ptr[i])) {
400 /* if i is an F row we first calculate alpha and beta: */
401
402 sum_all_neg=0; /* sum of all negative values in row i of A */
403 sum_all_pos=0; /* sum of all positive values in row i of A */
404 sum_strong_neg=0; /* sum of all negative values A_ij where j is in C and strongly connected to i*/
405 sum_strong_pos=0; /* sum of all positive values A_ij where j is in C and strongly connected to i*/
406 A_ii=0;
407
408 /* first check the mainBlock */
409 range = A->mainBlock->pattern->ptr[i + 1];
410 for (iPtr=A->mainBlock->pattern->ptr[i]; iPtr<range; iPtr++) {
411 j=A->mainBlock->pattern->index[iPtr];
412 A_ij=A->mainBlock->val[iPtr];
413 if(j==i) {
414 A_ii=A_ij;
415 } else {
416
417 if(A_ij< 0) {
418 sum_all_neg+=A_ij;
419 } else {
420 sum_all_pos+=A_ij;
421 }
422
423 if (counter_C[j]>=0) {
424 /* is i strongly connected with j? We search for counter_C[j] in P[i,:] */
425 start_p=&(main_pattern->index[main_pattern->ptr[i]]);
426 where_p=(index_t*)bsearch(&(counter_C[j]), start_p,
427 main_pattern->ptr[i + 1] - main_pattern->ptr[i],
428 sizeof(index_t),
429 Paso_comparIndex);
430 if (! (where_p == NULL) ) { /* yes i strongly connected with j */
431 offset = main_pattern->ptr[i]+ (index_t)(where_p-start_p);
432 main_block->val[offset]=A_ij; /* will be modified later */
433 if (A_ij< 0) {
434 sum_strong_neg+=A_ij;
435 } else {
436 sum_strong_pos+=A_ij;
437 }
438 }
439 }
440
441 }
442 }
443
444 /* now we deal with the col_coupleBlock */
445 range = A->col_coupleBlock->pattern->ptr[i + 1];
446 for (iPtr=A->col_coupleBlock->pattern->ptr[i]; iPtr<range; iPtr++) {
447 j=A->col_coupleBlock->pattern->index[iPtr];
448 A_ij=A->col_coupleBlock->val[iPtr];
449 if(A_ij< 0) {
450 sum_all_neg+=A_ij;
451 } else {
452 sum_all_pos+=A_ij;
453 }
454
455 if (counter_C[j+my_n]>=0) {
456 /* is i strongly connected with j? We search for counter_C[j] in P[i,:] */
457 start_p=&(couple_pattern->index[couple_pattern->ptr[i]]);
458 where_p=(index_t*)bsearch(&(counter_C[j+my_n]), start_p,
459 couple_pattern->ptr[i + 1] - couple_pattern->ptr[i],
460 sizeof(index_t),
461 Paso_comparIndex);
462 if (! (where_p == NULL) ) { /* yes i strongly connect with j */
463 offset = couple_pattern->ptr[i]+ (index_t)(where_p-start_p);
464 couple_block->val[offset]=A_ij; /* will be modified later */
465 if (A_ij< 0) {
466 sum_strong_neg+=A_ij;
467 } else {
468 sum_strong_pos+=A_ij;
469 }
470 }
471 }
472 }
473
474 if(sum_strong_neg<0) {
475 alpha= sum_all_neg/sum_strong_neg;
476 } else {
477 alpha=0;
478 }
479 if(sum_strong_pos>0) {
480 beta= sum_all_pos/sum_strong_pos;
481 } else {
482 beta=0;
483 A_ii+=sum_all_pos;
484 }
485 if ( A_ii > 0.) {
486 rtmp=(-1.)/A_ii;
487 alpha*=rtmp;
488 beta*=rtmp;
489 }
490
491 range = main_pattern->ptr[i + 1];
492 for (iPtr=main_pattern->ptr[i]; iPtr<range; iPtr++) {
493 A_ij=main_block->val[iPtr];
494 if (A_ij > 0 ) {
495 main_block->val[iPtr] = A_ij * beta;
496 } else {
497 main_block->val[iPtr] = A_ij * alpha;
498 }
499 }
500
501 range = couple_pattern->ptr[i + 1];
502 for (iPtr=couple_pattern->ptr[i]; iPtr<range; iPtr++) {
503 A_ij=couple_block->val[iPtr];
504 if (A_ij > 0 ) {
505 couple_block->val[iPtr] = A_ij * beta;
506 } else {
507 couple_block->val[iPtr] = A_ij * alpha;
508 }
509 }
510 }
511 }
512 }
513
514 void Paso_Preconditioner_AMG_setDirectProlongation_Block(Paso_SystemMatrix* P,
515 Paso_SystemMatrix* A, const index_t* offset_S, const dim_t* degree_S,
516 const index_t* S, const index_t *counter_C) {
517 Paso_SparseMatrix *main_block=P->mainBlock;
518 Paso_SparseMatrix *couple_block=P->col_coupleBlock;
519 Paso_Pattern *main_pattern=main_block->pattern;
520 Paso_Pattern *couple_pattern=couple_block->pattern;
521 const dim_t row_block_size=A->row_block_size;
522 const dim_t A_block = A->block_size;
523 const dim_t my_n=A->mainBlock->numRows;
524 index_t range;
525
526 dim_t i;
527 double *alpha, *beta, *sum_all_neg, *sum_all_pos, *sum_strong_neg, *sum_strong_pos, *A_ii;
528 register double A_ij, rtmp;
529 register index_t iPtr, j, offset, ib;
530 index_t *where_p, *start_p;
531
532 #pragma omp parallel private(i,offset,ib,sum_all_neg,sum_all_pos,sum_strong_neg,sum_strong_pos,A_ii,range,iPtr,j,A_ij,start_p,where_p,alpha,beta,rtmp)
533 {
534 sum_all_neg=TMPMEMALLOC(row_block_size, double); /* sum of all negative values in row i of A */
535 sum_all_pos=TMPMEMALLOC(row_block_size, double); /* sum of all positive values in row i of A */
536 sum_strong_neg=TMPMEMALLOC(row_block_size, double); /* sum of all negative values A_ij where j is in C and strongly connected to i*/
537 sum_strong_pos=TMPMEMALLOC(row_block_size, double); /* sum of all positive values A_ij where j is in C and strongly connected to i*/
538 alpha=TMPMEMALLOC(row_block_size, double);
539 beta=TMPMEMALLOC(row_block_size, double);
540 A_ii=TMPMEMALLOC(row_block_size, double);
541
542 #pragma omp for schedule(static)
543 for (i=0;i<my_n;++i) {
544 if (counter_C[i]>=0) { /* i is a C row */
545 offset = main_pattern->ptr[i];
546 for (ib =0; ib<row_block_size; ++ib)
547 main_block->val[row_block_size*offset+ib]=1.;
548 } else if ((main_pattern->ptr[i + 1] > main_pattern->ptr[i]) ||
549 (couple_pattern->ptr[i + 1] > couple_pattern->ptr[i])) {
550 /* if i is an F row we first calculate alpha and beta: */
551 for (ib =0; ib<row_block_size; ++ib) {
552 sum_all_neg[ib]=0;
553 sum_all_pos[ib]=0;
554 sum_strong_neg[ib]=0;
555 sum_strong_pos[ib]=0;
556 A_ii[ib]=0;
557 }
558
559 /* first check the mainBlock */
560 range = A->mainBlock->pattern->ptr[i + 1];
561 for (iPtr=A->mainBlock->pattern->ptr[i]; iPtr<range; iPtr++) {
562 j=A->mainBlock->pattern->index[iPtr];
563 if(j==i) {
564 for (ib =0; ib<row_block_size; ++ib)
565 A_ii[ib]=A->mainBlock->val[A_block*iPtr+ib+row_block_size*ib];
566 } else {
567 for (ib =0; ib<row_block_size; ++ib) {
568 A_ij=A->mainBlock->val[A_block*iPtr+ib+row_block_size*ib];
569 if(A_ij< 0) {
570 sum_all_neg[ib]+=A_ij;
571 } else {
572 sum_all_pos[ib]+=A_ij;
573 }
574 }
575
576 if (counter_C[j]>=0) {
577 /* is i strongly connected with j? We search for counter_C[j] in P[i,:] */
578 start_p=&(main_pattern->index[main_pattern->ptr[i]]);
579 where_p=(index_t*)bsearch(&(counter_C[j]), start_p,
580 main_pattern->ptr[i + 1]-main_pattern->ptr[i],
581 sizeof(index_t),
582 Paso_comparIndex);
583 if (! (where_p == NULL) ) { /* yes i strongly connected with j */
584 offset = main_pattern->ptr[i]+ (index_t)(where_p-start_p);
585 for (ib =0; ib<row_block_size; ++ib) {
586 A_ij=A->mainBlock->val[A_block*iPtr+ib+row_block_size*ib];
587 main_block->val[row_block_size*offset+ib]=A_ij; /* will be modified later */
588 if (A_ij< 0) {
589 sum_strong_neg[ib]+=A_ij;
590 } else {
591 sum_strong_pos[ib]+=A_ij;
592 }
593 }
594 }
595 }
596
597 }
598 }
599
600 /* now we deal with the col_coupleBlock */
601 range = A->col_coupleBlock->pattern->ptr[i + 1];
602 for (iPtr=A->col_coupleBlock->pattern->ptr[i]; iPtr<range; iPtr++) {
603 j=A->col_coupleBlock->pattern->index[iPtr];
604 for (ib =0; ib<row_block_size; ++ib) {
605 A_ij=A->col_coupleBlock->val[A_block*iPtr+ib+row_block_size*ib];
606 if(A_ij< 0) {
607 sum_all_neg[ib]+=A_ij;
608 } else {
609 sum_all_pos[ib]+=A_ij;
610 }
611 }
612
613 if (counter_C[j+my_n]>=0) {
614 /* is i strongly connect with j? We search for counter_C[j] in P[i,:] */
615 start_p=&(couple_pattern->index[couple_pattern->ptr[i]]);
616 where_p=(index_t*)bsearch(&(counter_C[j+my_n]), start_p,
617 couple_pattern->ptr[i + 1]-couple_pattern->ptr[i],
618 sizeof(index_t),
619 Paso_comparIndex);
620 if (! (where_p == NULL) ) { /* yes i strongly connect with j */
621 offset = couple_pattern->ptr[i]+ (index_t)(where_p-start_p);
622 for (ib =0; ib<row_block_size; ++ib) {
623 A_ij=A->col_coupleBlock->val[A_block*iPtr+ib+row_block_size*ib];
624 couple_block->val[row_block_size*offset+ib]=A_ij; /* will be modified later */
625
626 if (A_ij< 0) {
627 sum_strong_neg[ib]+=A_ij;
628 } else {
629 sum_strong_pos[ib]+=A_ij;
630 }
631 }
632 }
633 }
634 }
635
636 for (ib =0; ib<row_block_size; ++ib) {
637 if(sum_strong_neg[ib]<0) {
638 alpha[ib]= sum_all_neg[ib]/sum_strong_neg[ib];
639 } else {
640 alpha[ib]=0;
641 }
642 if(sum_strong_pos[ib]>0) {
643 beta[ib]= sum_all_pos[ib]/sum_strong_pos[ib];
644 } else {
645 beta[ib]=0;
646 A_ii[ib]+=sum_all_pos[ib];
647 }
648 if ( A_ii[ib] > 0.) {
649 rtmp=(-1./A_ii[ib]);
650 alpha[ib]*=rtmp;
651 beta[ib]*=rtmp;
652 }
653 }
654
655 range = main_pattern->ptr[i + 1];
656 for (iPtr=main_pattern->ptr[i]; iPtr<range; iPtr++) {
657 for (ib =0; ib<row_block_size; ++ib) {
658 A_ij=main_block->val[row_block_size*iPtr+ib];
659 if (A_ij > 0 ) {
660 main_block->val[row_block_size*iPtr+ib] = A_ij * beta[ib];
661 } else {
662 main_block->val[row_block_size*iPtr+ib] = A_ij * alpha[ib];
663 }
664 }
665 }
666
667 range = couple_pattern->ptr[i + 1];
668 for (iPtr=couple_pattern->ptr[i]; iPtr<range; iPtr++) {
669 for (ib =0; ib<row_block_size; ++ib) {
670 A_ij=couple_block->val[row_block_size*iPtr+ib];
671 if (A_ij > 0 ) {
672 couple_block->val[row_block_size*iPtr+ib] = A_ij * beta[ib];
673 } else {
674 couple_block->val[row_block_size*iPtr+ib] = A_ij * alpha[ib];
675 }
676 }
677 }
678
679
680 }
681 }/* end i loop */
682 TMPMEMFREE(sum_all_neg);
683 TMPMEMFREE(sum_all_pos);
684 TMPMEMFREE(sum_strong_neg);
685 TMPMEMFREE(sum_strong_pos);
686 TMPMEMFREE(alpha);
687 TMPMEMFREE(beta);
688 TMPMEMFREE(A_ii);
689 } /* end parallel region */
690 }
691
692 /*
693 Classic Prolongation:
694 -------------------
695
696 If row i is in C (counter_C[i]>=0), then P[i,j]=1 if j==counter_C[i] or 0 otherwise.
697 If row i is not C, then P[i,j] = - 1/a[i] * ( A[i,k] + sum_{l} A[i,l]*A+[l,k]/B[i,k])
698 where the summation over l is considering columns which are strongly connected
699 to i (l in S[i]) and not in C (counter_C[l]<0) and
700
701 B[i,k]=sum_{m in S_i and in C} A+[k,m]
702 a[i]=A[i,i]+sum{l not strongly connected to i} A[i,l]
703
704 A+[i,k]=A[i,k] if sign(A[i,k])==sign(A[i,i]) or 0 otherwise
705
706
707 */
708 void Paso_Preconditioner_AMG_setClassicProlongation(Paso_SystemMatrix* P,
709 Paso_SystemMatrix* A, const index_t* offset_S, const dim_t* degree_S,
710 const index_t* S, const index_t *counter_C) {
711 Paso_SparseMatrix *main_block=P->mainBlock;
712 Paso_SparseMatrix *couple_block=P->col_coupleBlock;
713 Paso_Pattern *main_pattern=main_block->pattern;
714 Paso_Pattern *couple_pattern=couple_block->pattern;
715 const dim_t my_n=A->mainBlock->numRows;
716 index_t range, range_j;
717
718 dim_t i, q;
719 double *D_s=NULL;
720 index_t *D_s_offset=NULL, iPtr, iPtr_j;
721 const index_t *ptr_main_A = Paso_SparseMatrix_borrowMainDiagonalPointer(A->mainBlock);
722 index_t *start_p_main_i, *start_p_couple_i;
723 dim_t degree_p_main_i, degree_p_couple_i;
724 dim_t len, ll, main_len, len_D_s;
725
726 len = A->col_coupleBlock->numCols;
727 len = MAX(A->remote_coupleBlock->numCols, len);
728 ll = len + my_n;
729 main_len = main_pattern->len;
730
731 #pragma omp parallel private(D_s,D_s_offset,i,start_p_main_i,start_p_couple_i,degree_p_main_i,degree_p_couple_i,range,iPtr,q,range_j,iPtr_j,len_D_s)
732 {
733 D_s=TMPMEMALLOC(ll,double);
734 D_s_offset=TMPMEMALLOC(ll,index_t);
735
736 #pragma omp for schedule(static)
737 for (i=0;i<my_n;++i) {
738 if (counter_C[i]>=0) {
739 main_block->val[main_pattern->ptr[i]]=1.; /* i is a C row */
740 } else if ((main_pattern->ptr[i + 1] > main_pattern->ptr[i]) ||
741 (couple_pattern->ptr[i + 1] > couple_pattern->ptr[i])) {
742 /* this loop sums up the weak connections in a and creates
743 a list of the strong connected columns which are not in
744 C (=no interpolation nodes) */
745 const index_t *start_s = &(S[offset_S[i]]);
746 const double A_ii = A->mainBlock->val[ptr_main_A[i]];
747 double a=A_ii;
748
749 start_p_main_i = &(main_pattern->index[main_pattern->ptr[i]]);
750 start_p_couple_i = &(couple_pattern->index[couple_pattern->ptr[i]]);
751 degree_p_main_i = main_pattern->ptr[i+1] - main_pattern->ptr[i];
752 degree_p_couple_i = couple_pattern->ptr[i+1] - couple_pattern->ptr[i];
753
754 /* first, check the mainBlock */
755 range = A->mainBlock->pattern->ptr[i + 1];
756 for (iPtr=A->mainBlock->pattern->ptr[i]; iPtr<range; iPtr++) {
757 const index_t j=A->mainBlock->pattern->index[iPtr];
758 const double A_ij=A->mainBlock->val[iPtr];
759 if ( (i!=j) && (degree_S[j]>0) ) {
760 /* is (i,j) a strong connection ?*/
761 const index_t *where_s=(index_t*)bsearch(&j, start_s,degree_S[i],sizeof(index_t), Paso_comparIndex);
762 if (where_s == NULL) { /* weak connections are accumulated */
763 a+=A_ij;
764 } else { /* yes i strongly connected with j */
765 if (counter_C[j]>=0) { /* j is an interpolation point : add A_ij into P */
766 const index_t *where_p=(index_t*)bsearch(&counter_C[j], start_p_main_i,degree_p_main_i, sizeof(index_t), Paso_comparIndex);
767 if (where_p == NULL) {
768 Esys_setError(SYSTEM_ERROR, "Paso_Preconditioner_setClassicProlongation: interpolation point is missing.");
769 } else {
770 const index_t offset = main_pattern->ptr[i]+ (index_t)(where_p-start_p_main_i);
771 main_block->val[offset]+=A_ij;
772 }
773 } else { /* j is not an interpolation point */
774 /* find all interpolation points m of k */
775 double s=0.;
776 len_D_s=0;
777
778 /* first, the mainBlock part */
779 range_j = A->mainBlock->pattern->ptr[j + 1];
780 for (iPtr_j=A->mainBlock->pattern->ptr[j]; iPtr_j<range_j; iPtr_j++) {
781 const double A_jm=A->mainBlock->val[iPtr_j];
782 const index_t m=A->mainBlock->pattern->index[iPtr_j];
783 /* is m an interpolation point ? */
784 const index_t *where_p_m=(index_t*)bsearch(&counter_C[m], start_p_main_i,degree_p_main_i, sizeof(index_t), Paso_comparIndex);
785 if (! (where_p_m==NULL)) {
786 const index_t offset_m = main_pattern->ptr[i]+ (index_t)(where_p_m-start_p_main_i);
787 if (! SAMESIGN(A_ii,A_jm)) {
788 D_s[len_D_s]=A_jm;
789 } else {
790 D_s[len_D_s]=0.;
791 }
792 D_s_offset[len_D_s]=offset_m;
793 len_D_s++;
794 }
795 }
796
797 /* then the coupleBlock part */
798 if (degree_p_couple_i) {
799 range_j = A->col_coupleBlock->pattern->ptr[j + 1];
800 for (iPtr_j=A->col_coupleBlock->pattern->ptr[j]; iPtr_j<range_j; iPtr_j++) {
801 const double A_jm=A->col_coupleBlock->val[iPtr_j];
802 const index_t m=A->col_coupleBlock->pattern->index[iPtr_j];
803 /* is m an interpolation point ? */
804 const index_t *where_p_m=(index_t*)bsearch(&counter_C[m+my_n], start_p_couple_i,degree_p_couple_i, sizeof(index_t), Paso_comparIndex);
805 if (! (where_p_m==NULL)) {
806 const index_t offset_m = couple_pattern->ptr[i]+ (index_t)(where_p_m-start_p_couple_i);
807 if (! SAMESIGN(A_ii,A_jm)) {
808 D_s[len_D_s]=A_jm;
809 } else {
810 D_s[len_D_s]=0.;
811 }
812 D_s_offset[len_D_s]=offset_m + main_len;
813 len_D_s++;
814 }
815 }
816 }
817
818 for (q=0;q<len_D_s;++q) s+=D_s[q];
819 if (ABS(s)>0) {
820 s=A_ij/s;
821 for (q=0;q<len_D_s;++q) {
822 if (D_s_offset[q] < main_len)
823 main_block->val[D_s_offset[q]]+=s*D_s[q];
824 else
825 couple_block->val[D_s_offset[q]-main_len]+=s*D_s[q];
826 }
827 } else {
828 a+=A_ij;
829 }
830 }
831 }
832 }
833 }
834
835 if (A->mpi_info->size > 1) {
836 /* now, deal with the coupleBlock */
837 range = A->col_coupleBlock->pattern->ptr[i + 1];
838 for (iPtr=A->col_coupleBlock->pattern->ptr[i]; iPtr<range; iPtr++) {
839 const index_t j=A->col_coupleBlock->pattern->index[iPtr];
840 const double A_ij=A->col_coupleBlock->val[iPtr];
841 if ( (i!=j) && (degree_S[j]>0) ) {
842 /* is (i,j) a strong connection ?*/
843 index_t t=j+my_n;
844 const index_t *where_s=(index_t*)bsearch(&t, start_s,degree_S[i],sizeof(index_t), Paso_comparIndex);
845 if (where_s == NULL) { /* weak connections are accumulated */
846 a+=A_ij;
847 } else { /* yes i strongly connect with j */
848 if (counter_C[t]>=0) { /* j is an interpolation point : add A_ij into P */
849 const index_t *where_p=(index_t*)bsearch(&counter_C[t], start_p_couple_i,degree_p_couple_i, sizeof(index_t), Paso_comparIndex);
850 if (where_p == NULL) {
851 Esys_setError(SYSTEM_ERROR, "Paso_Preconditioner_AMG_setClassicProlongation: interpolation point is missing.");
852 } else {
853 const index_t offset = couple_pattern->ptr[i]+ (index_t)(where_p-start_p_couple_i);
854 couple_block->val[offset]+=A_ij;
855 }
856 } else { /* j is not an interpolation point */
857 /* find all interpolation points m of k */
858 double s=0.;
859 len_D_s=0;
860
861 /* first, the row_coupleBlock part */
862 range_j = A->row_coupleBlock->pattern->ptr[j + 1];
863 for (iPtr_j=A->row_coupleBlock->pattern->ptr[j]; iPtr_j<range_j; iPtr_j++) {
864 const double A_jm=A->row_coupleBlock->val[iPtr_j];
865 const index_t m=A->row_coupleBlock->pattern->index[iPtr_j];
866 /* is m an interpolation point ? */
867 const index_t *where_p_m=(index_t*)bsearch(&counter_C[m], start_p_main_i,degree_p_main_i, sizeof(index_t), Paso_comparIndex);
868 if (! (where_p_m==NULL)) {
869 const index_t offset_m = main_pattern->ptr[i]+ (index_t)(where_p_m-start_p_main_i);
870 if (! SAMESIGN(A_ii,A_jm)) {
871 D_s[len_D_s]=A_jm;
872 } else {
873 D_s[len_D_s]=0.;
874 }
875 D_s_offset[len_D_s]=offset_m;
876 len_D_s++;
877 }
878 }
879
880 /* then the remote_coupleBlock part */
881 range_j = A->remote_coupleBlock->pattern->ptr[j + 1];
882 for (iPtr_j=A->remote_coupleBlock->pattern->ptr[j]; iPtr_j<range_j; iPtr_j++) {
883 const double A_jm=A->remote_coupleBlock->val[iPtr_j];
884 const index_t m=A->remote_coupleBlock->pattern->index[iPtr_j];
885 /* is m an interpolation point ? */
886 const index_t *where_p_m=(index_t*)bsearch(&counter_C[m+my_n], start_p_couple_i, degree_p_couple_i, sizeof(index_t), Paso_comparIndex);
887 if (! (where_p_m==NULL)) {
888 const index_t offset_m = couple_pattern->ptr[i]+ (index_t)(where_p_m-start_p_couple_i);
889 if (! SAMESIGN(A_ii,A_jm)) {
890 D_s[len_D_s]=A_jm;
891 } else {
892 D_s[len_D_s]=0.;
893 }
894 D_s_offset[len_D_s]=offset_m + main_len;
895 len_D_s++;
896 }
897 }
898
899 for (q=0;q<len_D_s;++q) s+=D_s[q];
900 if (ABS(s)>0) {
901 s=A_ij/s;
902 for (q=0;q<len_D_s;++q) {
903 if (D_s_offset[q] < main_len)
904 main_block->val[D_s_offset[q]]+=s*D_s[q];
905 else
906 couple_block->val[D_s_offset[q]-main_len]+=s*D_s[q];
907 }
908 } else {
909 a+=A_ij;
910 }
911 }
912 }
913 }
914 }
915 }
916
917 /* i has been processed, now we need to do some rescaling */
918 if (ABS(a)>0.) {
919 a=-1./a;
920 range = main_pattern->ptr[i + 1];
921 for (iPtr=main_pattern->ptr[i]; iPtr<range; iPtr++) {
922 main_block->val[iPtr]*=a;
923 }
924
925 range = couple_pattern->ptr[i + 1];
926 for (iPtr=couple_pattern->ptr[i]; iPtr<range; iPtr++) {
927 couple_block->val[iPtr]*=a;
928 }
929 }
930 }
931 } /* end of row i loop */
932 TMPMEMFREE(D_s);
933 TMPMEMFREE(D_s_offset);
934 } /* end of parallel region */
935 }
936
937 void Paso_Preconditioner_AMG_setClassicProlongation_Block(
938 Paso_SystemMatrix* P, Paso_SystemMatrix* A, const index_t* offset_S,
939 const dim_t* degree_S, const index_t* S, const index_t *counter_C) {
940 Paso_SparseMatrix *main_block=P->mainBlock;
941 Paso_SparseMatrix *couple_block=P->col_coupleBlock;
942 Paso_Pattern *main_pattern=main_block->pattern;
943 Paso_Pattern *couple_pattern=couple_block->pattern;
944 const dim_t row_block=A->row_block_size;
945 const dim_t my_n=A->mainBlock->numRows;
946 const dim_t A_block = A->block_size;
947 index_t range, range_j;
948
949 dim_t i, q, ib;
950 double *D_s=NULL;
951 index_t *D_s_offset=NULL, iPtr, iPtr_j;
952 index_t *start_p_main_i, *start_p_couple_i;
953 dim_t degree_p_main_i, degree_p_couple_i;
954 dim_t len, ll, main_len, len_D_s;
955 const index_t *ptr_main_A = Paso_SparseMatrix_borrowMainDiagonalPointer(A->mainBlock);
956
957 len = A->col_coupleBlock->numCols;
958 len = MAX(A->remote_coupleBlock->numCols, len);
959 ll = len + my_n;
960 main_len = main_pattern->len;
961 #pragma omp parallel private(D_s,D_s_offset,i,ib,start_p_main_i,start_p_couple_i,degree_p_main_i,degree_p_couple_i,range,iPtr,q,range_j,iPtr_j,len_D_s)
962 {
963 double *a=TMPMEMALLOC(row_block, double);
964 D_s=TMPMEMALLOC(row_block*ll,double);
965 D_s_offset=TMPMEMALLOC(row_block*ll,index_t);
966
967 #pragma omp for private(i) schedule(static)
968 for (i=0;i<my_n;++i) {
969 if (counter_C[i]>=0) {
970 const index_t offset = main_pattern->ptr[i];
971 for (ib =0; ib<row_block; ++ib) main_block->val[row_block*offset+ib]=1.; /* i is a C row */
972 } else if ((main_pattern->ptr[i + 1] > main_pattern->ptr[i]) ||
973 (couple_pattern->ptr[i + 1] > couple_pattern->ptr[i])) {
974 /* this loop sums up the weak connections in a and creates
975 a list of the strong connected columns which are not in
976 C (=no interpolation nodes) */
977 const index_t *start_s = &(S[offset_S[i]]);
978 const double *A_ii = &(A->mainBlock->val[ptr_main_A[i]*A_block]);
979 start_p_main_i = &(main_pattern->index[main_pattern->ptr[i]]);
980 start_p_couple_i = &(couple_pattern->index[couple_pattern->ptr[i]]);
981 degree_p_main_i = main_pattern->ptr[i+1] - main_pattern->ptr[i];
982 degree_p_couple_i = couple_pattern->ptr[i+1] - couple_pattern->ptr[i];
983 for (ib=0; ib<row_block; ib++) a[ib]=A_ii[(row_block+1)*ib];
984
985 /* first, check the mainBlock */
986 range = A->mainBlock->pattern->ptr[i + 1];
987 for (iPtr=A->mainBlock->pattern->ptr[i]; iPtr<range; iPtr++) {
988 const index_t j=A->mainBlock->pattern->index[iPtr];
989 const double* A_ij=&(A->mainBlock->val[iPtr*A_block]);
990
991 if ( (i!=j) && (degree_S[j]>0) ) {
992 /* is (i,j) a strong connection ?*/
993 const index_t *where_s=(index_t*)bsearch(&j, start_s,degree_S[i],sizeof(index_t), Paso_comparIndex);
994 if (where_s == NULL) { /* weak connections are accumulated */
995 for (ib=0; ib<row_block; ib++) a[ib]+=A_ij[(row_block+1)*ib];
996 } else { /* yes i strongly connected with j */
997 if (counter_C[j]>=0) { /* j is an interpolation point : add A_ij into P */
998 const index_t *where_p=(index_t*)bsearch(&counter_C[j], start_p_main_i,degree_p_main_i, sizeof(index_t), Paso_comparIndex);
999 if (where_p == NULL) {
1000 Esys_setError(SYSTEM_ERROR, "Paso_Preconditioner_AMG_setClassicProlongation_Block: interpolation point is missing.");
1001 } else {
1002 const index_t offset = main_pattern->ptr[i]+ (index_t)(where_p-start_p_main_i);
1003 for (ib=0; ib<row_block; ib++) main_block->val[offset*row_block+ib] +=A_ij[(row_block+1)*ib];
1004 }
1005 } else { /* j is not an interpolation point */
1006 /* find all interpolation points m of k */
1007 len_D_s=0;
1008
1009 /* first, the mainBlock part */
1010 range_j = A->mainBlock->pattern->ptr[j + 1];
1011 for (iPtr_j=A->mainBlock->pattern->ptr[j];iPtr_j<range_j; iPtr_j++) {
1012 const double* A_jm=&(A->mainBlock->val[iPtr_j*A_block]);
1013 const index_t m=A->mainBlock->pattern->index[iPtr_j];
1014 /* is m an interpolation point ? */
1015 const index_t *where_p_m=(index_t*)bsearch(&counter_C[m], start_p_main_i,degree_p_main_i, sizeof(index_t), Paso_comparIndex);
1016 if (! (where_p_m==NULL)) {
1017 const index_t offset_m = main_pattern->ptr[i]+ (index_t)(where_p_m-start_p_main_i);
1018 for (ib=0; ib<row_block; ib++) {
1019 if (! SAMESIGN(A_ii[(row_block+1)*ib],A_jm[(row_block+1)*ib]) ) {
1020 D_s[len_D_s*row_block+ib]=A_jm[(row_block+1)*ib];
1021 } else {
1022 D_s[len_D_s*row_block+ib]=0.;
1023 }
1024 }
1025 D_s_offset[len_D_s] = offset_m;
1026 len_D_s++;
1027 }
1028 }
1029
1030 /* then the coupleBlock part */
1031 range_j = A->col_coupleBlock->pattern->ptr[j+1];
1032 for (iPtr_j=A->col_coupleBlock->pattern->ptr[j];iPtr_j<range_j; iPtr_j++) {
1033 const double* A_jm=&(A->col_coupleBlock->val[iPtr_j*A_block]);
1034 const index_t m=A->col_coupleBlock->pattern->index[iPtr_j];
1035 /* is m an interpolation point ? */
1036 const index_t *where_p_m=(index_t*)bsearch(&counter_C[m+my_n], start_p_couple_i,degree_p_couple_i, sizeof(index_t), Paso_comparIndex);
1037 if (! (where_p_m==NULL)) {
1038 const index_t offset_m = couple_pattern->ptr[i]+ (index_t)(where_p_m-start_p_couple_i);
1039 for (ib=0; ib<row_block; ib++) {
1040 if (! SAMESIGN(A_ii[(row_block+1)*ib],A_jm[(row_block+1)*ib]) ) {
1041 D_s[len_D_s*row_block+ib]=A_jm[(row_block+1)*ib];
1042 } else {
1043 D_s[len_D_s*row_block+ib]=0.;
1044 }
1045 }
1046 D_s_offset[len_D_s]=offset_m + main_len;
1047 len_D_s++;
1048 }
1049 }
1050
1051 for (ib=0; ib<row_block; ib++) {
1052 double s=0;
1053 for (q=0;q<len_D_s;++q) s+=D_s[q*row_block+ib];
1054
1055 if (ABS(s)>0) {
1056 s=A_ij[(row_block+1)*ib]/s;
1057 for (q=0; q<len_D_s; q++) {
1058 if (D_s_offset[q] < main_len)
1059 main_block->val[D_s_offset[q]*row_block+ib]+=s*D_s[q*row_block+ib];
1060 else{
1061 couple_block->val[(D_s_offset[q]-main_len)*row_block+ib]+=s*D_s[q*row_block+ib];
1062 }
1063 }
1064 } else {
1065 a[ib]+=A_ij[(row_block+1)*ib];
1066 }
1067 }
1068 }
1069 }
1070 }
1071 }
1072
1073 if (A->mpi_info->size > 1) {
1074 /* now, deal with the coupleBlock */
1075 range = A->col_coupleBlock->pattern->ptr[i + 1];
1076 for (iPtr=A->col_coupleBlock->pattern->ptr[i]; iPtr<range; iPtr++) {
1077 const index_t j=A->col_coupleBlock->pattern->index[iPtr];
1078 const double* A_ij=&(A->col_coupleBlock->val[iPtr*A_block]);
1079
1080 if ( (i!=j) && (degree_S[j]>0) ) {
1081 /* is (i,j) a strong connection ?*/
1082 index_t t=j+my_n;
1083 const index_t *where_s=(index_t*)bsearch(&t, start_s,degree_S[i],sizeof(index_t), Paso_comparIndex);
1084 if (where_s == NULL) { /* weak connections are accumulated */
1085 for (ib=0; ib<row_block; ib++) a[ib]+=A_ij[(row_block+1)*ib];
1086 } else { /* yes i strongly connected with j */
1087 if (counter_C[t]>=0) { /* j is an interpolation point : add A_ij into P */
1088 const index_t *where_p=(index_t*)bsearch(&counter_C[t], start_p_couple_i,degree_p_couple_i, sizeof(index_t), Paso_comparIndex);
1089 if (where_p == NULL) {
1090 Esys_setError(SYSTEM_ERROR, "Paso_Preconditioner_AMG_setClassicProlongation_Block: interpolation point is missing.");
1091
1092 } else {
1093 const index_t offset = couple_pattern->ptr[i]+ (index_t)(where_p-start_p_couple_i);
1094 for (ib=0; ib<row_block; ib++) couple_block->val[offset*row_block+ib] +=A_ij[(row_block+1)*ib];
1095 }
1096 } else { /* j is not an interpolation point */
1097 /* find all interpolation points m of k */
1098 len_D_s=0;
1099
1100 /* first, the row_coupleBlock part */
1101 range_j = A->row_coupleBlock->pattern->ptr[j + 1];
1102 for (iPtr_j=A->row_coupleBlock->pattern->ptr[j];iPtr_j<range_j; iPtr_j++) {
1103 /* is m an interpolation point ? */
1104 index_t m, *where_p_m;
1105 double *A_jm;
1106 A_jm=&(A->row_coupleBlock->val[iPtr_j*A_block]);
1107 m=A->row_coupleBlock->pattern->index[iPtr_j];
1108
1109 where_p_m=(index_t*)bsearch(&counter_C[m], start_p_main_i,degree_p_main_i, sizeof(index_t), Paso_comparIndex);
1110 if (! (where_p_m==NULL)) {
1111 const index_t offset_m = main_pattern->ptr[i]+ (index_t)(where_p_m-start_p_main_i);
1112 for (ib=0; ib<row_block; ib++) {
1113 if (! SAMESIGN(A_ii[(row_block+1)*ib],A_jm[(row_block+1)*ib]) ) {
1114 D_s[len_D_s*row_block+ib]=A_jm[(row_block+1)*ib];
1115 } else {
1116 D_s[len_D_s*row_block+ib]=0.;
1117 }
1118 }
1119 D_s_offset[len_D_s]=offset_m;
1120 len_D_s++;
1121 }
1122 }
1123
1124 /* then the remote_coupleBlock part */
1125 if (degree_p_couple_i) {
1126 range_j = A->remote_coupleBlock->pattern->ptr[j + 1];
1127 for (iPtr_j=A->remote_coupleBlock->pattern->ptr[j];iPtr_j<range_j; iPtr_j++) {
1128 const double* A_jm=&(A->remote_coupleBlock->val[iPtr_j*A_block]);
1129 const index_t m=A->remote_coupleBlock->pattern->index[iPtr_j];
1130 /* is m an interpolation point ? */
1131 const index_t *where_p_m=(index_t*)bsearch(&counter_C[m+my_n], start_p_couple_i,degree_p_couple_i, sizeof(index_t), Paso_comparIndex);
1132 if (! (where_p_m==NULL)) {
1133 const index_t offset_m = couple_pattern->ptr[i]+ (index_t)(where_p_m-start_p_couple_i);
1134 for (ib=0; ib<row_block; ib++) {
1135 if (! SAMESIGN(A_ii[(row_block+1)*ib],A_jm[(row_block+1)*ib]) ) {
1136 D_s[len_D_s*row_block+ib]=A_jm[(row_block+1)*ib];
1137 } else {
1138 D_s[len_D_s*row_block+ib]=0.;
1139 }
1140 }
1141 D_s_offset[len_D_s]=offset_m + main_len;
1142 len_D_s++;
1143 }
1144 }
1145 }
1146
1147 for (ib=0; ib<row_block; ib++) {
1148 double s=0;
1149 for (q=0;q<len_D_s;++q) s+=D_s[q*row_block+ib];
1150
1151 if (ABS(s)>0) {
1152 s=A_ij[(row_block+1)*ib]/s;
1153 for (q=0;q<len_D_s;++q) {
1154 if (D_s_offset[q] < main_len)
1155 main_block->val[D_s_offset[q]*row_block+ib]+=s*D_s[q*row_block+ib];
1156 else
1157 couple_block->val[(D_s_offset[q]-main_len)*row_block+ib]+=s*D_s[q*row_block+ib];
1158 }
1159 } else {
1160 a[ib]+=A_ij[(row_block+1)*ib];
1161 }
1162 }
1163 }
1164 }
1165 }
1166 }
1167 }
1168
1169 /* i has been processed, now we need to do some rescaling */
1170 for (ib=0; ib<row_block; ib++) {
1171 register double a2=a[ib];
1172 if (ABS(a2)>0.) {
1173 a2=-1./a2;
1174 range = main_pattern->ptr[i + 1];
1175 for (iPtr=main_pattern->ptr[i]; iPtr<range; iPtr++) {
1176 main_block->val[iPtr*row_block+ib]*=a2;
1177 }
1178
1179 range = couple_pattern->ptr[i + 1];
1180 for (iPtr=couple_pattern->ptr[i]; iPtr<range; iPtr++) {
1181 couple_block->val[iPtr*row_block+ib]*=a2;
1182 }
1183 }
1184 }
1185 }
1186 } /* end of row i loop */
1187 TMPMEMFREE(D_s);
1188 TMPMEMFREE(D_s_offset);
1189 TMPMEMFREE(a);
1190 } /* end of parallel region */
1191 }

  ViewVC Help
Powered by ViewVC 1.1.26