# Contents of /trunk/paso/src/AMG_Prolongation.c

Revision 4286 - (show annotations)
Thu Mar 7 04:28:11 2013 UTC (6 years, 5 months ago) by caltinay
File MIME type: text/plain
File size: 52006 byte(s)
```Assorted spelling fixes.

```
 1 /***************************************************************************** 2 * 3 * Copyright (c) 2003-2013 by University of Queensland 4 * http://www.uq.edu.au 5 * 6 * Primary Business: Queensland, Australia 7 * Licensed under the Open Software License version 3.0 8 * http://www.opensource.org/licenses/osl-3.0.php 9 * 10 * Development until 2012 by Earth Systems Science Computational Center (ESSCC) 11 * Development since 2012 by School of Earth Sciences 12 * 13 *****************************************************************************/ 14 15 16 /************************************************************************************/ 17 18 /* Paso: defines AMG prolongation */ 19 20 /************************************************************************************/ 21 22 /* Author: Artak Amirbekyan, artak@uq.edu.au, l.gross@uq.edu.au */ 23 24 /************************************************************************************/ 25 26 #include "Paso.h" 27 #include "SparseMatrix.h" 28 #include "PasoUtil.h" 29 #include "Preconditioner.h" 30 31 /************************************************************************************ 32 33 Methods necessary for AMG preconditioner 34 35 construct n x n_C the prolongation matrix P from A_p. 36 37 The columns in A_p to be considered are marked by counter_C[n] where 38 an unknown i to be considered in P is marked by 0<= counter_C[i] < n_C 39 and counter_C[i] gives the new column number in P. 40 S defines the strong connections. 41 42 The pattern of P is formed as follows: 43 44 If row i is in C (counter_C[i]>=0), then P[i,j]=1 if j==counter_C[i] or 0 otherwise. 45 If row i is not C, then P[i,j] <> 0 if counter_C[k]==j (k in C) and (i,k) is a strong connection. 46 47 Two settings for P are implemented (see below) 48 49 */ 50 51 Paso_SystemMatrix* Paso_Preconditioner_AMG_getProlongation(Paso_SystemMatrix* A_p, 52 const index_t* offset_S, const dim_t* degree_S, const index_t* S, 53 const dim_t n_C, index_t* counter_C, const index_t interpolation_method) 54 { 55 Esys_MPIInfo *mpi_info=Esys_MPIInfo_getReference(A_p->mpi_info); 56 Paso_SystemMatrix *out=NULL; 57 Paso_SystemMatrixPattern *pattern=NULL; 58 Paso_Distribution *input_dist=NULL, *output_dist=NULL; 59 Paso_SharedComponents *send =NULL, *recv=NULL; 60 Paso_Connector *col_connector=NULL; 61 Paso_Pattern *main_pattern=NULL, *couple_pattern=NULL; 62 const dim_t row_block_size=A_p->row_block_size; 63 const dim_t col_block_size=A_p->col_block_size; 64 const dim_t my_n=A_p->mainBlock->numCols; 65 const dim_t overlap_n=A_p->col_coupleBlock->numCols; 66 const dim_t num_threads=omp_get_max_threads(); 67 index_t size=mpi_info->size, *dist=NULL; 68 index_t *main_p=NULL, *couple_p=NULL, *main_idx=NULL, *couple_idx=NULL; 69 index_t *shared=NULL, *offsetInShared=NULL; 70 index_t *recv_shared=NULL, *send_shared=NULL; 71 index_t sum, i, j, k, l, p, q, iptr; 72 index_t my_n_C, global_label, num_neighbors; 73 #ifdef ESYS_MPI 74 index_t rank=mpi_info->rank; 75 #endif 76 Esys_MPI_rank *neighbor=NULL; 77 #ifdef ESYS_MPI 78 MPI_Request* mpi_requests=NULL; 79 MPI_Status* mpi_stati=NULL; 80 #else 81 int *mpi_requests=NULL, *mpi_stati=NULL; 82 #endif 83 84 /* number of C points in current distribution */ 85 my_n_C = 0; 86 sum=0; 87 if (num_threads>1) { 88 #pragma omp parallel private(i,sum) 89 { 90 sum=0; 91 #pragma omp for schedule(static) 92 for (i=0;impi_info ??? */ 114 dist = A_p->pattern->output_distribution->first_component; 115 output_dist=Paso_Distribution_alloc(mpi_info, dist, 1, 0); 116 dist = TMPMEMALLOC(size+1, index_t); /* now prepare for col distribution */ 117 Esys_checkPtr(dist); 118 #ifdef ESYS_MPI 119 MPI_Allgather(&my_n_C, 1, MPI_INT, dist, 1, MPI_INT, mpi_info->comm); 120 #endif 121 global_label=0; 122 for (i=0; i=0) { 141 k = 1; /* i is a C unknown */ 142 } else { 143 k = 0; 144 iptr = offset_S[i]; 145 for (p=0; p=0) { /* and is in C */ 148 if (j -1) { 163 counter_C[i+my_n] -= my_n_C; 164 sum++; 165 } 166 } 167 168 /* allocate and create index vector for prolongation: */ 169 p = Paso_Util_cumsum(my_n, main_p); 170 main_p[my_n] = p; 171 main_idx = MEMALLOC(p, index_t); 172 p = Paso_Util_cumsum(my_n, couple_p); 173 couple_p[my_n] = p; 174 couple_idx = MEMALLOC(p, index_t); 175 if (!(Esys_checkPtr(main_idx) || Esys_checkPtr(couple_idx))) { 176 #pragma omp parallel for private(i,k,l,iptr,j,p) schedule(static) 177 for (i=0; i=0) { 179 main_idx[main_p[i]]=counter_C[i]; /* i is a C unknown */ 180 } else { 181 k = 0; 182 l = 0; 183 iptr = offset_S[i]; 184 for (p=0; p=0) { /* and is in C */ 187 if (j < my_n) { 188 main_idx[main_p[i]+k] = counter_C[j]; 189 k++; 190 } else { 191 couple_idx[couple_p[i]+l] = counter_C[j]; 192 l++; 193 } 194 } 195 } 196 } 197 } 198 } 199 } 200 201 if (Esys_noError()) { 202 main_pattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, my_n, 203 my_n_C, main_p, main_idx); 204 couple_pattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, my_n, 205 sum, couple_p, couple_idx); 206 } else { 207 MEMFREE(main_p); 208 MEMFREE(main_idx); 209 MEMFREE(couple_p); 210 MEMFREE(couple_idx); 211 } 212 213 /* prepare the receiver for the col_connector. 214 Note that the allocation for "shared" assumes the send and receive buffer 215 of the interpolation matrix P is no larger than that of matrix A_p. */ 216 neighbor = TMPMEMALLOC(size, Esys_MPI_rank); 217 offsetInShared = TMPMEMALLOC(size+1, index_t); 218 recv = A_p->col_coupler->connector->recv; 219 send = A_p->col_coupler->connector->send; 220 i = recv->numSharedComponents; 221 recv_shared = TMPMEMALLOC(i,index_t); 222 memset(recv_shared, 0, sizeof(index_t)*i); 223 k = send->numSharedComponents; 224 send_shared = TMPMEMALLOC(k,index_t); 225 if (k > i) i = k; 226 shared = TMPMEMALLOC(i, index_t); 227 228 #ifdef ESYS_MPI 229 mpi_requests=TMPMEMALLOC(size*2,MPI_Request); 230 mpi_stati=TMPMEMALLOC(size*2,MPI_Status); 231 #else 232 mpi_requests=TMPMEMALLOC(size*2,int); 233 mpi_stati=TMPMEMALLOC(size*2,int); 234 #endif 235 236 for (p=0; pnumNeighbors; p++) { 237 i = send->offsetInShared[p]; 238 #ifdef ESYS_MPI 239 MPI_Irecv (&(send_shared[i]), send->offsetInShared[p+1]-i, MPI_INT, 240 send->neighbor[p], mpi_info->msg_tag_counter+send->neighbor[p], 241 mpi_info->comm, &mpi_requests[p]); 242 #endif 243 } 244 245 num_neighbors = 0; 246 q = 0; 247 p = recv->numNeighbors; 248 offsetInShared[0]=0; 249 for (i=0; ioffsetInShared[i+1]; 252 for (j=recv->offsetInShared[i]; jshared[j]] > -1) { 254 shared[q] = my_n_C + q; 255 recv_shared[recv->shared[j]-my_n] = 1; 256 q++; 257 l = 1; 258 } 259 } 260 if (l == 1) { 261 iptr = recv->neighbor[i]; 262 neighbor[num_neighbors] = iptr; 263 num_neighbors++; 264 offsetInShared[num_neighbors] = q; 265 } 266 #ifdef ESYS_MPI 267 MPI_Issend(&(recv_shared[recv->offsetInShared[i]]), 268 k-recv->offsetInShared[i], MPI_INT, recv->neighbor[i], 269 mpi_info->msg_tag_counter+rank, mpi_info->comm, 270 &mpi_requests[i+send->numNeighbors]); 271 #endif 272 } 273 recv = Paso_SharedComponents_alloc(my_n_C, num_neighbors, neighbor, shared, 274 offsetInShared, 1, 0, mpi_info); 275 276 /* now we can build the sender */ 277 #ifdef ESYS_MPI 278 MPI_Waitall(recv->numNeighbors+send->numNeighbors, mpi_requests, mpi_stati); 279 #endif 280 mpi_info->msg_tag_counter += size; 281 TMPMEMFREE(mpi_requests); 282 TMPMEMFREE(mpi_stati); 283 284 num_neighbors = 0; 285 q = 0; 286 p = send->numNeighbors; 287 offsetInShared[0]=0; 288 for (i=0; ioffsetInShared[i+1]; 291 for (j=send->offsetInShared[i]; jshared[j]]; 294 q++; 295 l = 1; 296 } 297 } 298 if (l == 1) { 299 iptr = send->neighbor[i]; 300 neighbor[num_neighbors] = iptr; 301 num_neighbors++; 302 offsetInShared[num_neighbors] = q; 303 } 304 } 305 306 send = Paso_SharedComponents_alloc(my_n_C, num_neighbors, neighbor, shared, 307 offsetInShared, 1, 0, mpi_info); 308 col_connector = Paso_Connector_alloc(send, recv); 309 Paso_SharedComponents_free(recv); 310 Paso_SharedComponents_free(send); 311 TMPMEMFREE(recv_shared); 312 TMPMEMFREE(send_shared); 313 TMPMEMFREE(neighbor); 314 TMPMEMFREE(offsetInShared); 315 TMPMEMFREE(shared); 316 317 /* now we need to create the System Matrix 318 TO BE FIXED: at this stage, we only construction col_couple_pattern 319 and col_connector for interpolation matrix P. To be completed, 320 row_couple_pattern and row_connector need to be constructed as well */ 321 if (Esys_noError()) { 322 pattern = Paso_SystemMatrixPattern_alloc(MATRIX_FORMAT_DEFAULT, 323 output_dist, input_dist, main_pattern, couple_pattern, 324 couple_pattern, col_connector, col_connector); 325 out = Paso_SystemMatrix_alloc(MATRIX_FORMAT_DIAGONAL_BLOCK, pattern, 326 row_block_size, col_block_size, FALSE); 327 } 328 329 /* now fill in the matrix */ 330 if (Esys_noError()) { 331 if ((interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING) 332 || ( interpolation_method == PASO_CLASSIC_INTERPOLATION) ) { 333 if (row_block_size == 1) { 334 Paso_Preconditioner_AMG_setClassicProlongation(out, A_p, offset_S, degree_S, S, counter_C); 335 } else { 336 Paso_Preconditioner_AMG_setClassicProlongation_Block(out, A_p, offset_S, degree_S, S, counter_C); 337 } 338 } else { 339 if (row_block_size == 1) { 340 Paso_Preconditioner_AMG_setDirectProlongation(out, A_p, offset_S, degree_S, S, counter_C); 341 } else { 342 Paso_Preconditioner_AMG_setDirectProlongation_Block(out, A_p, offset_S, degree_S, S, counter_C); 343 } 344 } 345 } 346 347 /* clean up */ 348 Paso_SystemMatrixPattern_free(pattern); 349 Paso_Pattern_free(main_pattern); 350 Paso_Pattern_free(couple_pattern); 351 Paso_Connector_free(col_connector); 352 Paso_Distribution_free(output_dist); 353 Paso_Distribution_free(input_dist); 354 if (Esys_noError()) { 355 return out; 356 } else { 357 Paso_SystemMatrix_free(out); 358 return NULL; 359 } 360 361 } 362 363 /* 364 Direct Prolongation: 365 ------------------- 366 367 If row i is in C (counter_C[i]>=0), then P[i,j]=1 if j==counter_C[i] or 0 otherwise. 368 If row i is not C, then P[i,j] = - a[i] * A[i,k]/A[i,i] with j=counter_C[k]>=0 and k in S 369 370 and a[i]= 371 alpha[i] = sum_s min(A[i,s],0)/(sum_{s in S and C} min(A[i,s],0)) A[i,k]<0 372 or if 373 beta[i] = sum_s max(A[i,s],0)/(sum_{s in S and C} max(A[i,s],0)) A[i,k]>0 374 375 376 */ 377 378 void Paso_Preconditioner_AMG_setDirectProlongation(Paso_SystemMatrix* P, 379 Paso_SystemMatrix* A, const index_t* offset_S, const dim_t* degree_S, 380 const index_t* S, const index_t *counter_C) { 381 Paso_SparseMatrix *main_block=P->mainBlock; 382 Paso_SparseMatrix *couple_block=P->col_coupleBlock; 383 Paso_Pattern *main_pattern=main_block->pattern; 384 Paso_Pattern *couple_pattern=couple_block->pattern; 385 const dim_t my_n=A->mainBlock->numRows; 386 index_t range; 387 388 dim_t i; 389 register double alpha, beta, sum_all_neg, sum_all_pos, sum_strong_neg, sum_strong_pos, A_ij, A_ii, rtmp; 390 register index_t iPtr, j, offset; 391 index_t *where_p, *start_p; 392 393 #pragma omp parallel for private(i,offset,sum_all_neg,sum_all_pos,sum_strong_neg,sum_strong_pos,A_ii,range,iPtr,j,A_ij,start_p,where_p,alpha,beta,rtmp) schedule(static) 394 for (i=0; i=0) { 396 offset = main_pattern->ptr[i]; 397 main_block->val[offset]=1.; /* i is a C row */ 398 } else if ((main_pattern->ptr[i + 1] > main_pattern->ptr[i]) || 399 (couple_pattern->ptr[i +1] > couple_pattern->ptr[i])) { 400 /* if i is an F row we first calculate alpha and beta: */ 401 402 sum_all_neg=0; /* sum of all negative values in row i of A */ 403 sum_all_pos=0; /* sum of all positive values in row i of A */ 404 sum_strong_neg=0; /* sum of all negative values A_ij where j is in C and strongly connected to i*/ 405 sum_strong_pos=0; /* sum of all positive values A_ij where j is in C and strongly connected to i*/ 406 A_ii=0; 407 408 /* first check the mainBlock */ 409 range = A->mainBlock->pattern->ptr[i + 1]; 410 for (iPtr=A->mainBlock->pattern->ptr[i]; iPtrmainBlock->pattern->index[iPtr]; 412 A_ij=A->mainBlock->val[iPtr]; 413 if(j==i) { 414 A_ii=A_ij; 415 } else { 416 417 if(A_ij< 0) { 418 sum_all_neg+=A_ij; 419 } else { 420 sum_all_pos+=A_ij; 421 } 422 423 if (counter_C[j]>=0) { 424 /* is i strongly connected with j? We search for counter_C[j] in P[i,:] */ 425 start_p=&(main_pattern->index[main_pattern->ptr[i]]); 426 where_p=(index_t*)bsearch(&(counter_C[j]), start_p, 427 main_pattern->ptr[i + 1] - main_pattern->ptr[i], 428 sizeof(index_t), 429 Paso_comparIndex); 430 if (! (where_p == NULL) ) { /* yes i strongly connected with j */ 431 offset = main_pattern->ptr[i]+ (index_t)(where_p-start_p); 432 main_block->val[offset]=A_ij; /* will be modified later */ 433 if (A_ij< 0) { 434 sum_strong_neg+=A_ij; 435 } else { 436 sum_strong_pos+=A_ij; 437 } 438 } 439 } 440 441 } 442 } 443 444 /* now we deal with the col_coupleBlock */ 445 range = A->col_coupleBlock->pattern->ptr[i + 1]; 446 for (iPtr=A->col_coupleBlock->pattern->ptr[i]; iPtrcol_coupleBlock->pattern->index[iPtr]; 448 A_ij=A->col_coupleBlock->val[iPtr]; 449 if(A_ij< 0) { 450 sum_all_neg+=A_ij; 451 } else { 452 sum_all_pos+=A_ij; 453 } 454 455 if (counter_C[j+my_n]>=0) { 456 /* is i strongly connected with j? We search for counter_C[j] in P[i,:] */ 457 start_p=&(couple_pattern->index[couple_pattern->ptr[i]]); 458 where_p=(index_t*)bsearch(&(counter_C[j+my_n]), start_p, 459 couple_pattern->ptr[i + 1] - couple_pattern->ptr[i], 460 sizeof(index_t), 461 Paso_comparIndex); 462 if (! (where_p == NULL) ) { /* yes i strongly connect with j */ 463 offset = couple_pattern->ptr[i]+ (index_t)(where_p-start_p); 464 couple_block->val[offset]=A_ij; /* will be modified later */ 465 if (A_ij< 0) { 466 sum_strong_neg+=A_ij; 467 } else { 468 sum_strong_pos+=A_ij; 469 } 470 } 471 } 472 } 473 474 if(sum_strong_neg<0) { 475 alpha= sum_all_neg/sum_strong_neg; 476 } else { 477 alpha=0; 478 } 479 if(sum_strong_pos>0) { 480 beta= sum_all_pos/sum_strong_pos; 481 } else { 482 beta=0; 483 A_ii+=sum_all_pos; 484 } 485 if ( A_ii > 0.) { 486 rtmp=(-1.)/A_ii; 487 alpha*=rtmp; 488 beta*=rtmp; 489 } 490 491 range = main_pattern->ptr[i + 1]; 492 for (iPtr=main_pattern->ptr[i]; iPtrval[iPtr]; 494 if (A_ij > 0 ) { 495 main_block->val[iPtr] = A_ij * beta; 496 } else { 497 main_block->val[iPtr] = A_ij * alpha; 498 } 499 } 500 501 range = couple_pattern->ptr[i + 1]; 502 for (iPtr=couple_pattern->ptr[i]; iPtrval[iPtr]; 504 if (A_ij > 0 ) { 505 couple_block->val[iPtr] = A_ij * beta; 506 } else { 507 couple_block->val[iPtr] = A_ij * alpha; 508 } 509 } 510 } 511 } 512 } 513 514 void Paso_Preconditioner_AMG_setDirectProlongation_Block(Paso_SystemMatrix* P, 515 Paso_SystemMatrix* A, const index_t* offset_S, const dim_t* degree_S, 516 const index_t* S, const index_t *counter_C) { 517 Paso_SparseMatrix *main_block=P->mainBlock; 518 Paso_SparseMatrix *couple_block=P->col_coupleBlock; 519 Paso_Pattern *main_pattern=main_block->pattern; 520 Paso_Pattern *couple_pattern=couple_block->pattern; 521 const dim_t row_block_size=A->row_block_size; 522 const dim_t A_block = A->block_size; 523 const dim_t my_n=A->mainBlock->numRows; 524 index_t range; 525 526 dim_t i; 527 double *alpha, *beta, *sum_all_neg, *sum_all_pos, *sum_strong_neg, *sum_strong_pos, *A_ii; 528 register double A_ij, rtmp; 529 register index_t iPtr, j, offset, ib; 530 index_t *where_p, *start_p; 531 532 #pragma omp parallel private(i,offset,ib,sum_all_neg,sum_all_pos,sum_strong_neg,sum_strong_pos,A_ii,range,iPtr,j,A_ij,start_p,where_p,alpha,beta,rtmp) 533 { 534 sum_all_neg=TMPMEMALLOC(row_block_size, double); /* sum of all negative values in row i of A */ 535 sum_all_pos=TMPMEMALLOC(row_block_size, double); /* sum of all positive values in row i of A */ 536 sum_strong_neg=TMPMEMALLOC(row_block_size, double); /* sum of all negative values A_ij where j is in C and strongly connected to i*/ 537 sum_strong_pos=TMPMEMALLOC(row_block_size, double); /* sum of all positive values A_ij where j is in C and strongly connected to i*/ 538 alpha=TMPMEMALLOC(row_block_size, double); 539 beta=TMPMEMALLOC(row_block_size, double); 540 A_ii=TMPMEMALLOC(row_block_size, double); 541 542 #pragma omp for schedule(static) 543 for (i=0;i=0) { /* i is a C row */ 545 offset = main_pattern->ptr[i]; 546 for (ib =0; ibval[row_block_size*offset+ib]=1.; 548 } else if ((main_pattern->ptr[i + 1] > main_pattern->ptr[i]) || 549 (couple_pattern->ptr[i + 1] > couple_pattern->ptr[i])) { 550 /* if i is an F row we first calculate alpha and beta: */ 551 for (ib =0; ibmainBlock->pattern->ptr[i + 1]; 561 for (iPtr=A->mainBlock->pattern->ptr[i]; iPtrmainBlock->pattern->index[iPtr]; 563 if(j==i) { 564 for (ib =0; ibmainBlock->val[A_block*iPtr+ib+row_block_size*ib]; 566 } else { 567 for (ib =0; ibmainBlock->val[A_block*iPtr+ib+row_block_size*ib]; 569 if(A_ij< 0) { 570 sum_all_neg[ib]+=A_ij; 571 } else { 572 sum_all_pos[ib]+=A_ij; 573 } 574 } 575 576 if (counter_C[j]>=0) { 577 /* is i strongly connected with j? We search for counter_C[j] in P[i,:] */ 578 start_p=&(main_pattern->index[main_pattern->ptr[i]]); 579 where_p=(index_t*)bsearch(&(counter_C[j]), start_p, 580 main_pattern->ptr[i + 1]-main_pattern->ptr[i], 581 sizeof(index_t), 582 Paso_comparIndex); 583 if (! (where_p == NULL) ) { /* yes i strongly connected with j */ 584 offset = main_pattern->ptr[i]+ (index_t)(where_p-start_p); 585 for (ib =0; ibmainBlock->val[A_block*iPtr+ib+row_block_size*ib]; 587 main_block->val[row_block_size*offset+ib]=A_ij; /* will be modified later */ 588 if (A_ij< 0) { 589 sum_strong_neg[ib]+=A_ij; 590 } else { 591 sum_strong_pos[ib]+=A_ij; 592 } 593 } 594 } 595 } 596 597 } 598 } 599 600 /* now we deal with the col_coupleBlock */ 601 range = A->col_coupleBlock->pattern->ptr[i + 1]; 602 for (iPtr=A->col_coupleBlock->pattern->ptr[i]; iPtrcol_coupleBlock->pattern->index[iPtr]; 604 for (ib =0; ibcol_coupleBlock->val[A_block*iPtr+ib+row_block_size*ib]; 606 if(A_ij< 0) { 607 sum_all_neg[ib]+=A_ij; 608 } else { 609 sum_all_pos[ib]+=A_ij; 610 } 611 } 612 613 if (counter_C[j+my_n]>=0) { 614 /* is i strongly connect with j? We search for counter_C[j] in P[i,:] */ 615 start_p=&(couple_pattern->index[couple_pattern->ptr[i]]); 616 where_p=(index_t*)bsearch(&(counter_C[j+my_n]), start_p, 617 couple_pattern->ptr[i + 1]-couple_pattern->ptr[i], 618 sizeof(index_t), 619 Paso_comparIndex); 620 if (! (where_p == NULL) ) { /* yes i strongly connect with j */ 621 offset = couple_pattern->ptr[i]+ (index_t)(where_p-start_p); 622 for (ib =0; ibcol_coupleBlock->val[A_block*iPtr+ib+row_block_size*ib]; 624 couple_block->val[row_block_size*offset+ib]=A_ij; /* will be modified later */ 625 626 if (A_ij< 0) { 627 sum_strong_neg[ib]+=A_ij; 628 } else { 629 sum_strong_pos[ib]+=A_ij; 630 } 631 } 632 } 633 } 634 } 635 636 for (ib =0; ib0) { 643 beta[ib]= sum_all_pos[ib]/sum_strong_pos[ib]; 644 } else { 645 beta[ib]=0; 646 A_ii[ib]+=sum_all_pos[ib]; 647 } 648 if ( A_ii[ib] > 0.) { 649 rtmp=(-1./A_ii[ib]); 650 alpha[ib]*=rtmp; 651 beta[ib]*=rtmp; 652 } 653 } 654 655 range = main_pattern->ptr[i + 1]; 656 for (iPtr=main_pattern->ptr[i]; iPtrval[row_block_size*iPtr+ib]; 659 if (A_ij > 0 ) { 660 main_block->val[row_block_size*iPtr+ib] = A_ij * beta[ib]; 661 } else { 662 main_block->val[row_block_size*iPtr+ib] = A_ij * alpha[ib]; 663 } 664 } 665 } 666 667 range = couple_pattern->ptr[i + 1]; 668 for (iPtr=couple_pattern->ptr[i]; iPtrval[row_block_size*iPtr+ib]; 671 if (A_ij > 0 ) { 672 couple_block->val[row_block_size*iPtr+ib] = A_ij * beta[ib]; 673 } else { 674 couple_block->val[row_block_size*iPtr+ib] = A_ij * alpha[ib]; 675 } 676 } 677 } 678 679 680 } 681 }/* end i loop */ 682 TMPMEMFREE(sum_all_neg); 683 TMPMEMFREE(sum_all_pos); 684 TMPMEMFREE(sum_strong_neg); 685 TMPMEMFREE(sum_strong_pos); 686 TMPMEMFREE(alpha); 687 TMPMEMFREE(beta); 688 TMPMEMFREE(A_ii); 689 } /* end parallel region */ 690 } 691 692 /* 693 Classic Prolongation: 694 ------------------- 695 696 If row i is in C (counter_C[i]>=0), then P[i,j]=1 if j==counter_C[i] or 0 otherwise. 697 If row i is not C, then P[i,j] = - 1/a[i] * ( A[i,k] + sum_{l} A[i,l]*A+[l,k]/B[i,k]) 698 where the summation over l is considering columns which are strongly connected 699 to i (l in S[i]) and not in C (counter_C[l]<0) and 700 701 B[i,k]=sum_{m in S_i and in C} A+[k,m] 702 a[i]=A[i,i]+sum{l not strongly connected to i} A[i,l] 703 704 A+[i,k]=A[i,k] if sign(A[i,k])==sign(A[i,i]) or 0 otherwise 705 706 707 */ 708 void Paso_Preconditioner_AMG_setClassicProlongation(Paso_SystemMatrix* P, 709 Paso_SystemMatrix* A, const index_t* offset_S, const dim_t* degree_S, 710 const index_t* S, const index_t *counter_C) { 711 Paso_SparseMatrix *main_block=P->mainBlock; 712 Paso_SparseMatrix *couple_block=P->col_coupleBlock; 713 Paso_Pattern *main_pattern=main_block->pattern; 714 Paso_Pattern *couple_pattern=couple_block->pattern; 715 const dim_t my_n=A->mainBlock->numRows; 716 index_t range, range_j; 717 718 dim_t i, q; 719 double *D_s=NULL; 720 index_t *D_s_offset=NULL, iPtr, iPtr_j; 721 const index_t *ptr_main_A = Paso_SparseMatrix_borrowMainDiagonalPointer(A->mainBlock); 722 index_t *start_p_main_i, *start_p_couple_i; 723 dim_t degree_p_main_i, degree_p_couple_i; 724 dim_t len, ll, main_len, len_D_s; 725 726 len = A->col_coupleBlock->numCols; 727 len = MAX(A->remote_coupleBlock->numCols, len); 728 ll = len + my_n; 729 main_len = main_pattern->len; 730 731 #pragma omp parallel private(D_s,D_s_offset,i,start_p_main_i,start_p_couple_i,degree_p_main_i,degree_p_couple_i,range,iPtr,q,range_j,iPtr_j,len_D_s) 732 { 733 D_s=TMPMEMALLOC(ll,double); 734 D_s_offset=TMPMEMALLOC(ll,index_t); 735 736 #pragma omp for schedule(static) 737 for (i=0;i=0) { 739 main_block->val[main_pattern->ptr[i]]=1.; /* i is a C row */ 740 } else if ((main_pattern->ptr[i + 1] > main_pattern->ptr[i]) || 741 (couple_pattern->ptr[i + 1] > couple_pattern->ptr[i])) { 742 /* this loop sums up the weak connections in a and creates 743 a list of the strong connected columns which are not in 744 C (=no interpolation nodes) */ 745 const index_t *start_s = &(S[offset_S[i]]); 746 const double A_ii = A->mainBlock->val[ptr_main_A[i]]; 747 double a=A_ii; 748 749 start_p_main_i = &(main_pattern->index[main_pattern->ptr[i]]); 750 start_p_couple_i = &(couple_pattern->index[couple_pattern->ptr[i]]); 751 degree_p_main_i = main_pattern->ptr[i+1] - main_pattern->ptr[i]; 752 degree_p_couple_i = couple_pattern->ptr[i+1] - couple_pattern->ptr[i]; 753 754 /* first, check the mainBlock */ 755 range = A->mainBlock->pattern->ptr[i + 1]; 756 for (iPtr=A->mainBlock->pattern->ptr[i]; iPtrmainBlock->pattern->index[iPtr]; 758 const double A_ij=A->mainBlock->val[iPtr]; 759 if ( (i!=j) && (degree_S[j]>0) ) { 760 /* is (i,j) a strong connection ?*/ 761 const index_t *where_s=(index_t*)bsearch(&j, start_s,degree_S[i],sizeof(index_t), Paso_comparIndex); 762 if (where_s == NULL) { /* weak connections are accumulated */ 763 a+=A_ij; 764 } else { /* yes i strongly connected with j */ 765 if (counter_C[j]>=0) { /* j is an interpolation point : add A_ij into P */ 766 const index_t *where_p=(index_t*)bsearch(&counter_C[j], start_p_main_i,degree_p_main_i, sizeof(index_t), Paso_comparIndex); 767 if (where_p == NULL) { 768 Esys_setError(SYSTEM_ERROR, "Paso_Preconditioner_setClassicProlongation: interpolation point is missing."); 769 } else { 770 const index_t offset = main_pattern->ptr[i]+ (index_t)(where_p-start_p_main_i); 771 main_block->val[offset]+=A_ij; 772 } 773 } else { /* j is not an interpolation point */ 774 /* find all interpolation points m of k */ 775 double s=0.; 776 len_D_s=0; 777 778 /* first, the mainBlock part */ 779 range_j = A->mainBlock->pattern->ptr[j + 1]; 780 for (iPtr_j=A->mainBlock->pattern->ptr[j]; iPtr_jmainBlock->val[iPtr_j]; 782 const index_t m=A->mainBlock->pattern->index[iPtr_j]; 783 /* is m an interpolation point ? */ 784 const index_t *where_p_m=(index_t*)bsearch(&counter_C[m], start_p_main_i,degree_p_main_i, sizeof(index_t), Paso_comparIndex); 785 if (! (where_p_m==NULL)) { 786 const index_t offset_m = main_pattern->ptr[i]+ (index_t)(where_p_m-start_p_main_i); 787 if (! SAMESIGN(A_ii,A_jm)) { 788 D_s[len_D_s]=A_jm; 789 } else { 790 D_s[len_D_s]=0.; 791 } 792 D_s_offset[len_D_s]=offset_m; 793 len_D_s++; 794 } 795 } 796 797 /* then the coupleBlock part */ 798 if (degree_p_couple_i) { 799 range_j = A->col_coupleBlock->pattern->ptr[j + 1]; 800 for (iPtr_j=A->col_coupleBlock->pattern->ptr[j]; iPtr_jcol_coupleBlock->val[iPtr_j]; 802 const index_t m=A->col_coupleBlock->pattern->index[iPtr_j]; 803 /* is m an interpolation point ? */ 804 const index_t *where_p_m=(index_t*)bsearch(&counter_C[m+my_n], start_p_couple_i,degree_p_couple_i, sizeof(index_t), Paso_comparIndex); 805 if (! (where_p_m==NULL)) { 806 const index_t offset_m = couple_pattern->ptr[i]+ (index_t)(where_p_m-start_p_couple_i); 807 if (! SAMESIGN(A_ii,A_jm)) { 808 D_s[len_D_s]=A_jm; 809 } else { 810 D_s[len_D_s]=0.; 811 } 812 D_s_offset[len_D_s]=offset_m + main_len; 813 len_D_s++; 814 } 815 } 816 } 817 818 for (q=0;q0) { 820 s=A_ij/s; 821 for (q=0;qval[D_s_offset[q]]+=s*D_s[q]; 824 else 825 couple_block->val[D_s_offset[q]-main_len]+=s*D_s[q]; 826 } 827 } else { 828 a+=A_ij; 829 } 830 } 831 } 832 } 833 } 834 835 if (A->mpi_info->size > 1) { 836 /* now, deal with the coupleBlock */ 837 range = A->col_coupleBlock->pattern->ptr[i + 1]; 838 for (iPtr=A->col_coupleBlock->pattern->ptr[i]; iPtrcol_coupleBlock->pattern->index[iPtr]; 840 const double A_ij=A->col_coupleBlock->val[iPtr]; 841 if ( (i!=j) && (degree_S[j]>0) ) { 842 /* is (i,j) a strong connection ?*/ 843 index_t t=j+my_n; 844 const index_t *where_s=(index_t*)bsearch(&t, start_s,degree_S[i],sizeof(index_t), Paso_comparIndex); 845 if (where_s == NULL) { /* weak connections are accumulated */ 846 a+=A_ij; 847 } else { /* yes i strongly connect with j */ 848 if (counter_C[t]>=0) { /* j is an interpolation point : add A_ij into P */ 849 const index_t *where_p=(index_t*)bsearch(&counter_C[t], start_p_couple_i,degree_p_couple_i, sizeof(index_t), Paso_comparIndex); 850 if (where_p == NULL) { 851 Esys_setError(SYSTEM_ERROR, "Paso_Preconditioner_AMG_setClassicProlongation: interpolation point is missing."); 852 } else { 853 const index_t offset = couple_pattern->ptr[i]+ (index_t)(where_p-start_p_couple_i); 854 couple_block->val[offset]+=A_ij; 855 } 856 } else { /* j is not an interpolation point */ 857 /* find all interpolation points m of k */ 858 double s=0.; 859 len_D_s=0; 860 861 /* first, the row_coupleBlock part */ 862 range_j = A->row_coupleBlock->pattern->ptr[j + 1]; 863 for (iPtr_j=A->row_coupleBlock->pattern->ptr[j]; iPtr_jrow_coupleBlock->val[iPtr_j]; 865 const index_t m=A->row_coupleBlock->pattern->index[iPtr_j]; 866 /* is m an interpolation point ? */ 867 const index_t *where_p_m=(index_t*)bsearch(&counter_C[m], start_p_main_i,degree_p_main_i, sizeof(index_t), Paso_comparIndex); 868 if (! (where_p_m==NULL)) { 869 const index_t offset_m = main_pattern->ptr[i]+ (index_t)(where_p_m-start_p_main_i); 870 if (! SAMESIGN(A_ii,A_jm)) { 871 D_s[len_D_s]=A_jm; 872 } else { 873 D_s[len_D_s]=0.; 874 } 875 D_s_offset[len_D_s]=offset_m; 876 len_D_s++; 877 } 878 } 879 880 /* then the remote_coupleBlock part */ 881 range_j = A->remote_coupleBlock->pattern->ptr[j + 1]; 882 for (iPtr_j=A->remote_coupleBlock->pattern->ptr[j]; iPtr_jremote_coupleBlock->val[iPtr_j]; 884 const index_t m=A->remote_coupleBlock->pattern->index[iPtr_j]; 885 /* is m an interpolation point ? */ 886 const index_t *where_p_m=(index_t*)bsearch(&counter_C[m+my_n], start_p_couple_i, degree_p_couple_i, sizeof(index_t), Paso_comparIndex); 887 if (! (where_p_m==NULL)) { 888 const index_t offset_m = couple_pattern->ptr[i]+ (index_t)(where_p_m-start_p_couple_i); 889 if (! SAMESIGN(A_ii,A_jm)) { 890 D_s[len_D_s]=A_jm; 891 } else { 892 D_s[len_D_s]=0.; 893 } 894 D_s_offset[len_D_s]=offset_m + main_len; 895 len_D_s++; 896 } 897 } 898 899 for (q=0;q0) { 901 s=A_ij/s; 902 for (q=0;qval[D_s_offset[q]]+=s*D_s[q]; 905 else 906 couple_block->val[D_s_offset[q]-main_len]+=s*D_s[q]; 907 } 908 } else { 909 a+=A_ij; 910 } 911 } 912 } 913 } 914 } 915 } 916 917 /* i has been processed, now we need to do some rescaling */ 918 if (ABS(a)>0.) { 919 a=-1./a; 920 range = main_pattern->ptr[i + 1]; 921 for (iPtr=main_pattern->ptr[i]; iPtrval[iPtr]*=a; 923 } 924 925 range = couple_pattern->ptr[i + 1]; 926 for (iPtr=couple_pattern->ptr[i]; iPtrval[iPtr]*=a; 928 } 929 } 930 } 931 } /* end of row i loop */ 932 TMPMEMFREE(D_s); 933 TMPMEMFREE(D_s_offset); 934 } /* end of parallel region */ 935 } 936 937 void Paso_Preconditioner_AMG_setClassicProlongation_Block( 938 Paso_SystemMatrix* P, Paso_SystemMatrix* A, const index_t* offset_S, 939 const dim_t* degree_S, const index_t* S, const index_t *counter_C) { 940 Paso_SparseMatrix *main_block=P->mainBlock; 941 Paso_SparseMatrix *couple_block=P->col_coupleBlock; 942 Paso_Pattern *main_pattern=main_block->pattern; 943 Paso_Pattern *couple_pattern=couple_block->pattern; 944 const dim_t row_block=A->row_block_size; 945 const dim_t my_n=A->mainBlock->numRows; 946 const dim_t A_block = A->block_size; 947 index_t range, range_j; 948 949 dim_t i, q, ib; 950 double *D_s=NULL; 951 index_t *D_s_offset=NULL, iPtr, iPtr_j; 952 index_t *start_p_main_i, *start_p_couple_i; 953 dim_t degree_p_main_i, degree_p_couple_i; 954 dim_t len, ll, main_len, len_D_s; 955 const index_t *ptr_main_A = Paso_SparseMatrix_borrowMainDiagonalPointer(A->mainBlock); 956 957 len = A->col_coupleBlock->numCols; 958 len = MAX(A->remote_coupleBlock->numCols, len); 959 ll = len + my_n; 960 main_len = main_pattern->len; 961 #pragma omp parallel private(D_s,D_s_offset,i,ib,start_p_main_i,start_p_couple_i,degree_p_main_i,degree_p_couple_i,range,iPtr,q,range_j,iPtr_j,len_D_s) 962 { 963 double *a=TMPMEMALLOC(row_block, double); 964 D_s=TMPMEMALLOC(row_block*ll,double); 965 D_s_offset=TMPMEMALLOC(row_block*ll,index_t); 966 967 #pragma omp for private(i) schedule(static) 968 for (i=0;i=0) { 970 const index_t offset = main_pattern->ptr[i]; 971 for (ib =0; ibval[row_block*offset+ib]=1.; /* i is a C row */ 972 } else if ((main_pattern->ptr[i + 1] > main_pattern->ptr[i]) || 973 (couple_pattern->ptr[i + 1] > couple_pattern->ptr[i])) { 974 /* this loop sums up the weak connections in a and creates 975 a list of the strong connected columns which are not in 976 C (=no interpolation nodes) */ 977 const index_t *start_s = &(S[offset_S[i]]); 978 const double *A_ii = &(A->mainBlock->val[ptr_main_A[i]*A_block]); 979 start_p_main_i = &(main_pattern->index[main_pattern->ptr[i]]); 980 start_p_couple_i = &(couple_pattern->index[couple_pattern->ptr[i]]); 981 degree_p_main_i = main_pattern->ptr[i+1] - main_pattern->ptr[i]; 982 degree_p_couple_i = couple_pattern->ptr[i+1] - couple_pattern->ptr[i]; 983 for (ib=0; ibmainBlock->pattern->ptr[i + 1]; 987 for (iPtr=A->mainBlock->pattern->ptr[i]; iPtrmainBlock->pattern->index[iPtr]; 989 const double* A_ij=&(A->mainBlock->val[iPtr*A_block]); 990 991 if ( (i!=j) && (degree_S[j]>0) ) { 992 /* is (i,j) a strong connection ?*/ 993 const index_t *where_s=(index_t*)bsearch(&j, start_s,degree_S[i],sizeof(index_t), Paso_comparIndex); 994 if (where_s == NULL) { /* weak connections are accumulated */ 995 for (ib=0; ib=0) { /* j is an interpolation point : add A_ij into P */ 998 const index_t *where_p=(index_t*)bsearch(&counter_C[j], start_p_main_i,degree_p_main_i, sizeof(index_t), Paso_comparIndex); 999 if (where_p == NULL) { 1000 Esys_setError(SYSTEM_ERROR, "Paso_Preconditioner_AMG_setClassicProlongation_Block: interpolation point is missing."); 1001 } else { 1002 const index_t offset = main_pattern->ptr[i]+ (index_t)(where_p-start_p_main_i); 1003 for (ib=0; ibval[offset*row_block+ib] +=A_ij[(row_block+1)*ib]; 1004 } 1005 } else { /* j is not an interpolation point */ 1006 /* find all interpolation points m of k */ 1007 len_D_s=0; 1008 1009 /* first, the mainBlock part */ 1010 range_j = A->mainBlock->pattern->ptr[j + 1]; 1011 for (iPtr_j=A->mainBlock->pattern->ptr[j];iPtr_jmainBlock->val[iPtr_j*A_block]); 1013 const index_t m=A->mainBlock->pattern->index[iPtr_j]; 1014 /* is m an interpolation point ? */ 1015 const index_t *where_p_m=(index_t*)bsearch(&counter_C[m], start_p_main_i,degree_p_main_i, sizeof(index_t), Paso_comparIndex); 1016 if (! (where_p_m==NULL)) { 1017 const index_t offset_m = main_pattern->ptr[i]+ (index_t)(where_p_m-start_p_main_i); 1018 for (ib=0; ibcol_coupleBlock->pattern->ptr[j+1]; 1032 for (iPtr_j=A->col_coupleBlock->pattern->ptr[j];iPtr_jcol_coupleBlock->val[iPtr_j*A_block]); 1034 const index_t m=A->col_coupleBlock->pattern->index[iPtr_j]; 1035 /* is m an interpolation point ? */ 1036 const index_t *where_p_m=(index_t*)bsearch(&counter_C[m+my_n], start_p_couple_i,degree_p_couple_i, sizeof(index_t), Paso_comparIndex); 1037 if (! (where_p_m==NULL)) { 1038 const index_t offset_m = couple_pattern->ptr[i]+ (index_t)(where_p_m-start_p_couple_i); 1039 for (ib=0; ib0) { 1056 s=A_ij[(row_block+1)*ib]/s; 1057 for (q=0; qval[D_s_offset[q]*row_block+ib]+=s*D_s[q*row_block+ib]; 1060 else{ 1061 couple_block->val[(D_s_offset[q]-main_len)*row_block+ib]+=s*D_s[q*row_block+ib]; 1062 } 1063 } 1064 } else { 1065 a[ib]+=A_ij[(row_block+1)*ib]; 1066 } 1067 } 1068 } 1069 } 1070 } 1071 } 1072 1073 if (A->mpi_info->size > 1) { 1074 /* now, deal with the coupleBlock */ 1075 range = A->col_coupleBlock->pattern->ptr[i + 1]; 1076 for (iPtr=A->col_coupleBlock->pattern->ptr[i]; iPtrcol_coupleBlock->pattern->index[iPtr]; 1078 const double* A_ij=&(A->col_coupleBlock->val[iPtr*A_block]); 1079 1080 if ( (i!=j) && (degree_S[j]>0) ) { 1081 /* is (i,j) a strong connection ?*/ 1082 index_t t=j+my_n; 1083 const index_t *where_s=(index_t*)bsearch(&t, start_s,degree_S[i],sizeof(index_t), Paso_comparIndex); 1084 if (where_s == NULL) { /* weak connections are accumulated */ 1085 for (ib=0; ib=0) { /* j is an interpolation point : add A_ij into P */ 1088 const index_t *where_p=(index_t*)bsearch(&counter_C[t], start_p_couple_i,degree_p_couple_i, sizeof(index_t), Paso_comparIndex); 1089 if (where_p == NULL) { 1090 Esys_setError(SYSTEM_ERROR, "Paso_Preconditioner_AMG_setClassicProlongation_Block: interpolation point is missing."); 1091 1092 } else { 1093 const index_t offset = couple_pattern->ptr[i]+ (index_t)(where_p-start_p_couple_i); 1094 for (ib=0; ibval[offset*row_block+ib] +=A_ij[(row_block+1)*ib]; 1095 } 1096 } else { /* j is not an interpolation point */ 1097 /* find all interpolation points m of k */ 1098 len_D_s=0; 1099 1100 /* first, the row_coupleBlock part */ 1101 range_j = A->row_coupleBlock->pattern->ptr[j + 1]; 1102 for (iPtr_j=A->row_coupleBlock->pattern->ptr[j];iPtr_jrow_coupleBlock->val[iPtr_j*A_block]); 1107 m=A->row_coupleBlock->pattern->index[iPtr_j]; 1108 1109 where_p_m=(index_t*)bsearch(&counter_C[m], start_p_main_i,degree_p_main_i, sizeof(index_t), Paso_comparIndex); 1110 if (! (where_p_m==NULL)) { 1111 const index_t offset_m = main_pattern->ptr[i]+ (index_t)(where_p_m-start_p_main_i); 1112 for (ib=0; ibremote_coupleBlock->pattern->ptr[j + 1]; 1127 for (iPtr_j=A->remote_coupleBlock->pattern->ptr[j];iPtr_jremote_coupleBlock->val[iPtr_j*A_block]); 1129 const index_t m=A->remote_coupleBlock->pattern->index[iPtr_j]; 1130 /* is m an interpolation point ? */ 1131 const index_t *where_p_m=(index_t*)bsearch(&counter_C[m+my_n], start_p_couple_i,degree_p_couple_i, sizeof(index_t), Paso_comparIndex); 1132 if (! (where_p_m==NULL)) { 1133 const index_t offset_m = couple_pattern->ptr[i]+ (index_t)(where_p_m-start_p_couple_i); 1134 for (ib=0; ib0) { 1152 s=A_ij[(row_block+1)*ib]/s; 1153 for (q=0;qval[D_s_offset[q]*row_block+ib]+=s*D_s[q*row_block+ib]; 1156 else 1157 couple_block->val[(D_s_offset[q]-main_len)*row_block+ib]+=s*D_s[q*row_block+ib]; 1158 } 1159 } else { 1160 a[ib]+=A_ij[(row_block+1)*ib]; 1161 } 1162 } 1163 } 1164 } 1165 } 1166 } 1167 } 1168 1169 /* i has been processed, now we need to do some rescaling */ 1170 for (ib=0; ib0.) { 1173 a2=-1./a2; 1174 range = main_pattern->ptr[i + 1]; 1175 for (iPtr=main_pattern->ptr[i]; iPtrval[iPtr*row_block+ib]*=a2; 1177 } 1178 1179 range = couple_pattern->ptr[i + 1]; 1180 for (iPtr=couple_pattern->ptr[i]; iPtrval[iPtr*row_block+ib]*=a2; 1182 } 1183 } 1184 } 1185 } 1186 } /* end of row i loop */ 1187 TMPMEMFREE(D_s); 1188 TMPMEMFREE(D_s_offset); 1189 TMPMEMFREE(a); 1190 } /* end of parallel region */ 1191 }