/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Contents of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4286 - (show annotations)
Thu Mar 7 04:28:11 2013 UTC (6 years, 6 months ago) by caltinay
File MIME type: text/plain
File size: 29624 byte(s)
Assorted spelling fixes.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15
16
17 /************************************************************************************/
18
19 /* Paso: AMG preconditioner (local version) */
20
21 /************************************************************************************/
22
23 /* Author: artak@uq.edu.au, l.gross@uq.edu.au */
24
25 /************************************************************************************/
26
27 #define SHOW_TIMING FALSE
28 #define MY_DEBUG 0
29 #define MY_DEBUG1 1
30
31 #include "Paso.h"
32 #include "Preconditioner.h"
33 #include "Options.h"
34 #include "PasoUtil.h"
35 #include "UMFPACK.h"
36 #include "MKL.h"
37 #include<stdio.h>
38
39
40 /************************************************************************************/
41
42 /* free all memory used by AMG */
43
44 void Paso_Preconditioner_AMG_free(Paso_Preconditioner_AMG * in) {
45 if (in!=NULL) {
46 Paso_Preconditioner_Smoother_free(in->Smoother);
47 Paso_SystemMatrix_free(in->P);
48 Paso_SystemMatrix_free(in->R);
49 Paso_SystemMatrix_free(in->A_C);
50 Paso_Preconditioner_AMG_free(in->AMG_C);
51 MEMFREE(in->r);
52 MEMFREE(in->x_C);
53 MEMFREE(in->b_C);
54 Paso_MergedSolver_free(in->merged_solver);
55
56 MEMFREE(in);
57 }
58 }
59
60 index_t Paso_Preconditioner_AMG_getMaxLevel(const Paso_Preconditioner_AMG * in) {
61 if (in->AMG_C == NULL) {
62 return in->level;
63 } else {
64 return Paso_Preconditioner_AMG_getMaxLevel(in->AMG_C);
65 }
66 }
67 double Paso_Preconditioner_AMG_getCoarseLevelSparsity(const Paso_Preconditioner_AMG * in) {
68 if (in->AMG_C == NULL) {
69 if (in->A_C == NULL) {
70 return 1.;
71 } else {
72 return Paso_SystemMatrix_getSparsity(in->A_C);
73 }
74 } else {
75 return Paso_Preconditioner_AMG_getCoarseLevelSparsity(in->AMG_C);
76 }
77 }
78 dim_t Paso_Preconditioner_AMG_getNumCoarseUnknwons(const Paso_Preconditioner_AMG * in) {
79 if (in->AMG_C == NULL) {
80 if (in->A_C == NULL) {
81 return 0;
82 } else {
83 return Paso_SystemMatrix_getTotalNumRows(in->A_C);
84 }
85 } else {
86 return Paso_Preconditioner_AMG_getNumCoarseUnknwons(in->AMG_C);
87 }
88 }
89 /***************************************************************************************
90
91 constructs AMG
92
93 ****************************************************************************************/
94 Paso_Preconditioner_AMG* Paso_Preconditioner_AMG_alloc(Paso_SystemMatrix *A_p,dim_t level,Paso_Options* options) {
95
96 Paso_Preconditioner_AMG* out=NULL;
97 Paso_SystemMatrix *A_C=NULL;
98 bool_t verbose=options->verbose;
99
100 const dim_t my_n=A_p->mainBlock->numRows;
101 const dim_t overlap_n=A_p->row_coupleBlock->numRows;
102
103 const dim_t n = my_n + overlap_n;
104
105 const dim_t n_block=A_p->row_block_size;
106 index_t* F_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F;
107 dim_t i, my_n_F, my_n_C, n_C, F_flag, *F_set=NULL, global_n_C=0, global_n_F=0, n_F;
108 double time0=0;
109 const double theta = options->coarsening_threshold;
110 const double tau = options->diagonal_dominance_threshold;
111 const double sparsity=Paso_SystemMatrix_getSparsity(A_p);
112 const dim_t global_n=Paso_SystemMatrix_getGlobalNumRows(A_p);
113
114
115 /*
116 is the input matrix A suitable for coarsening?
117
118 */
119 if ( (sparsity >= options->min_coarse_sparsity) ||
120 (global_n <= options->min_coarse_matrix_size) ||
121 (level > options->level_max) ) {
122
123 if (verbose) {
124 /*
125 print stopping condition:
126 - 'SPAR' = min_coarse_matrix_sparsity exceeded
127 - 'SIZE' = min_coarse_matrix_size exceeded
128 - 'LEVEL' = level_max exceeded
129 */
130 printf("Paso_Preconditioner: AMG: termination of coarsening by ");
131
132 if (sparsity >= options->min_coarse_sparsity)
133 printf("SPAR");
134
135 if (global_n <= options->min_coarse_matrix_size)
136 printf("SIZE");
137
138 if (level > options->level_max)
139 printf("LEVEL");
140
141 printf("\n");
142
143 printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
144 level, options->level_max, sparsity, options->min_coarse_sparsity, global_n, options->min_coarse_matrix_size);
145
146 }
147
148 return NULL;
149 } else {
150 /* Start Coarsening : */
151
152 /* this is the table for strong connections combining mainBlock, col_coupleBlock and row_coupleBlock */
153 const dim_t len_S=A_p->mainBlock->pattern->len + A_p->col_coupleBlock->pattern->len + A_p->row_coupleBlock->pattern->len + A_p->row_coupleBlock->numRows * A_p->col_coupleBlock->numCols;
154
155 dim_t* degree_S=TMPMEMALLOC(n, dim_t);
156 index_t *offset_S=TMPMEMALLOC(n, index_t);
157 index_t *S=TMPMEMALLOC(len_S, index_t);
158 dim_t* degree_ST=TMPMEMALLOC(n, dim_t);
159 index_t *offset_ST=TMPMEMALLOC(n, index_t);
160 index_t *ST=TMPMEMALLOC(len_S, index_t);
161
162
163 F_marker=TMPMEMALLOC(n,index_t);
164 counter=TMPMEMALLOC(n,index_t);
165
166 if ( !( Esys_checkPtr(F_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree_S) || Esys_checkPtr(offset_S) || Esys_checkPtr(S)
167 || Esys_checkPtr(degree_ST) || Esys_checkPtr(offset_ST) || Esys_checkPtr(ST) ) ) {
168 /*
169 make sure that corresponding values in the row_coupleBlock and col_coupleBlock are identical
170 */
171 Paso_SystemMatrix_copyColCoupleBlock(A_p);
172 Paso_SystemMatrix_copyRemoteCoupleBlock(A_p, FALSE);
173
174 /*
175 set splitting of unknowns:
176
177 */
178 time0=Esys_timer();
179 if (n_block>1) {
180 Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree_S, offset_S, S, theta,tau);
181 } else {
182 Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, offset_S, S, theta,tau);
183 }
184 Paso_Preconditioner_AMG_transposeStrongConnections(n, degree_S, offset_S, S, n, degree_ST, offset_ST, ST);
185 /* Paso_SystemMatrix_extendedRowsForST(A_p, degree_ST, offset_ST, ST);
186 */
187
188 Paso_Preconditioner_AMG_CIJPCoarsening(n,my_n,F_marker,
189 degree_S, offset_S, S, degree_ST, offset_ST, ST,
190 A_p->col_coupler->connector,A_p->col_distribution);
191
192
193 /* in BoomerAMG if interpolation is used FF connectivity is required */
194 /*MPI:
195 if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
196 Paso_Preconditioner_AMG_enforceFFConnectivity(n, A_p->pattern->ptr, degree_S, S, F_marker);
197 */
198
199 options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
200 if (Esys_noError() ) {
201 #pragma omp parallel for private(i) schedule(static)
202 for (i = 0; i < n; ++i) F_marker[i]=(F_marker[i] == PASO_AMG_IN_F);
203
204 /*
205 count number of unknowns to be eliminated:
206 */
207 my_n_F=Paso_Util_cumsum_maskedTrue(my_n,counter, F_marker);
208 n_F=Paso_Util_cumsum_maskedTrue(n,counter, F_marker);
209 /* collect my_n_F values on all processes, a direct solver should
210 be used if any my_n_F value is 0 */
211 F_set = TMPMEMALLOC(A_p->mpi_info->size, dim_t);
212 #ifdef ESYS_MPI
213 MPI_Allgather(&my_n_F, 1, MPI_INT, F_set, 1, MPI_INT, A_p->mpi_info->comm);
214 #endif
215 global_n_F=0;
216 F_flag = 1;
217 for (i=0; i<A_p->mpi_info->size; i++) {
218 global_n_F+=F_set[i];
219 if (F_set[i] == 0) F_flag = 0;
220 }
221 TMPMEMFREE(F_set);
222
223 my_n_C=my_n-my_n_F;
224 global_n_C=global_n-global_n_F;
225 if (verbose) printf("Paso_Preconditioner: AMG (non-local) level %d: %d unknowns are flagged for elimination. %d left.\n",level,global_n_F,global_n_C);
226
227
228 /* if ( n_F == 0 ) { is a nasty case. a direct solver should be used, return NULL */
229 if (F_flag == 0) {
230 out = NULL;
231 } else {
232 out=MEMALLOC(1,Paso_Preconditioner_AMG);
233 if (! Esys_checkPtr(out)) {
234 out->level = level;
235 out->A_C = NULL;
236 out->P = NULL;
237 out->R = NULL;
238 out->post_sweeps = options->post_sweeps;
239 out->pre_sweeps = options->pre_sweeps;
240 out->r = NULL;
241 out->x_C = NULL;
242 out->b_C = NULL;
243 out->AMG_C = NULL;
244 out->Smoother=NULL;
245 out->merged_solver=NULL;
246 }
247 mask_C=TMPMEMALLOC(n,index_t);
248 rows_in_F=TMPMEMALLOC(n_F,index_t);
249 Esys_checkPtr(mask_C);
250 Esys_checkPtr(rows_in_F);
251 if ( Esys_noError() ) {
252
253 out->Smoother = Paso_Preconditioner_Smoother_alloc(A_p, (options->smoother == PASO_JACOBI), 0, verbose);
254
255 if (global_n_C != 0) {
256 /* create mask of C nodes with value >-1, gives new id */
257 n_C=Paso_Util_cumsum_maskedFalse(n, mask_C, F_marker);
258 /* if nothing has been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
259
260 /* allocate helpers :*/
261 out->x_C=MEMALLOC(n_block*my_n_C,double);
262 out->b_C=MEMALLOC(n_block*my_n_C,double);
263 out->r=MEMALLOC(n_block*my_n,double);
264
265 Esys_checkPtr(out->r);
266 Esys_checkPtr(out->x_C);
267 Esys_checkPtr(out->b_C);
268
269 if ( Esys_noError() ) {
270 /* creates index for F:*/
271 #pragma omp parallel private(i)
272 {
273 #pragma omp for schedule(static)
274 for (i = 0; i < n; ++i) {
275 if (F_marker[i]) rows_in_F[counter[i]]=i;
276 }
277 }
278 /*
279 get Prolongation :
280 */
281
282 time0=Esys_timer();
283
284 out->P=Paso_Preconditioner_AMG_getProlongation(A_p,offset_S, degree_S,S,n_C,mask_C, options->interpolation_method);
285
286 }
287
288 /*
289 construct Restriction operator as transposed of Prolongation operator:
290 */
291
292 if ( Esys_noError()) {
293 time0=Esys_timer();
294
295 out->R=Paso_Preconditioner_AMG_getRestriction(out->P);
296
297 if (SHOW_TIMING) printf("timing: level %d: Paso_SystemMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
298 }
299 /*
300 construct coarse level matrix:
301 */
302 if ( Esys_noError()) {
303 time0=Esys_timer();
304
305 A_C = Paso_Preconditioner_AMG_buildInterpolationOperator(A_p, out->P, out->R);
306
307 if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);
308 }
309
310 /*
311 construct coarser level:
312
313 */
314 if ( Esys_noError()) {
315 out->AMG_C=Paso_Preconditioner_AMG_alloc(A_C,level+1,options);
316 }
317
318 if ( Esys_noError()) {
319 out->A_C=A_C;
320 if ( out->AMG_C == NULL ) {
321 /* merge the system matrix into 1 rank when
322 it's not suitable coarsening due to the
323 total number of unknowns are too small */
324 out->merged_solver= Paso_MergedSolver_alloc(A_C, options);
325 }
326 }
327 }
328 }
329 TMPMEMFREE(mask_C);
330 TMPMEMFREE(rows_in_F);
331 }
332 }
333
334 }
335 TMPMEMFREE(counter);
336 TMPMEMFREE(F_marker);
337 TMPMEMFREE(degree_S);
338 TMPMEMFREE(offset_S);
339 TMPMEMFREE(S);
340 TMPMEMFREE(degree_ST);
341 TMPMEMFREE(offset_ST);
342 TMPMEMFREE(ST);
343
344 }
345
346 if (Esys_noError()) {
347 return out;
348 } else {
349 Paso_Preconditioner_AMG_free(out);
350 return NULL;
351 }
352 }
353
354
355 void Paso_Preconditioner_AMG_solve(Paso_SystemMatrix* A, Paso_Preconditioner_AMG * amg, double * x, double * b) {
356 const dim_t n = A->mainBlock->numRows * A->mainBlock->row_block_size;
357 double time0=0;
358 const dim_t post_sweeps=amg->post_sweeps;
359 const dim_t pre_sweeps=amg->pre_sweeps;
360
361 /* presmoothing */
362 time0=Esys_timer();
363 Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
364
365 time0=Esys_timer()-time0;
366 if (SHOW_TIMING) printf("timing: level %d: Presmoothing: %e\n",amg->level, time0);
367 /* end of presmoothing */
368
369 time0=Esys_timer();
370
371 Paso_Copy(n, amg->r, b); /* r <- b */
372 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
373 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,amg->r,0.,amg->b_C); /* b_c = R*r */
374
375 time0=Esys_timer()-time0;
376 /* coarse level solve */
377 if ( amg->AMG_C == NULL) {
378 time0=Esys_timer();
379 /* A_C is the coarsest level */
380 Paso_MergedSolver_solve(amg->merged_solver,amg->x_C, amg->b_C);
381 if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
382 } else {
383 Paso_Preconditioner_AMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C) */
384 }
385
386 time0=time0+Esys_timer();
387 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */
388
389 /*postsmoothing*/
390
391 /*solve Ax=b with initial guess x */
392 time0=Esys_timer();
393 Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
394 time0=Esys_timer()-time0;
395 if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
396 return;
397 }
398
399 /* theta = threshold for strong connections */
400 /* tau = threshold for diagonal dominance */
401
402 /*S_i={j \in N_i; i strongly coupled to j}
403
404 in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
405 */
406
407 void Paso_Preconditioner_AMG_setStrongConnections(Paso_SystemMatrix* A,
408 dim_t *degree_S, index_t *offset_S, index_t *S,
409 const double theta, const double tau)
410 {
411
412 const dim_t my_n=A->mainBlock->numRows;
413 const dim_t overlap_n=A->row_coupleBlock->numRows;
414
415 index_t iptr, i;
416 double *threshold_p=NULL;
417
418 threshold_p = TMPMEMALLOC(2*my_n, double);
419
420 #pragma omp parallel for private(i,iptr) schedule(static)
421 for (i=0;i<my_n;++i) {
422
423 register double max_offdiagonal = 0.;
424 register double sum_row=0;
425 register double main_row=0;
426 register dim_t kdeg=0;
427 register const index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
428
429
430 /* collect information for row i: */
431 #pragma ivdep
432 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
433 register index_t j=A->mainBlock->pattern->index[iptr];
434 register double fnorm=ABS(A->mainBlock->val[iptr]);
435 if( j != i) {
436 max_offdiagonal = MAX(max_offdiagonal,fnorm);
437 sum_row+=fnorm;
438 } else {
439 main_row=fnorm;
440 }
441
442 }
443
444 #pragma ivdep
445 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
446 register double fnorm=ABS(A->col_coupleBlock->val[iptr]);
447
448 max_offdiagonal = MAX(max_offdiagonal,fnorm);
449 sum_row+=fnorm;
450 }
451
452 /* inspect row i: */
453 {
454 const double threshold = theta*max_offdiagonal;
455 threshold_p[2*i+1]=threshold;
456 if (tau*main_row < sum_row) { /* no diagonal dominance */
457 threshold_p[2*i]=1;
458 #pragma ivdep
459 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
460 register index_t j=A->mainBlock->pattern->index[iptr];
461 if(ABS(A->mainBlock->val[iptr])>threshold && i!=j) {
462 S[koffset+kdeg] = j;
463 kdeg++;
464 }
465 }
466 #pragma ivdep
467 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
468 register index_t j=A->col_coupleBlock->pattern->index[iptr];
469 if(ABS(A->col_coupleBlock->val[iptr])>threshold) {
470 S[koffset+kdeg] = j + my_n;
471 kdeg++;
472 }
473 }
474 } else {
475 threshold_p[2*i]=-1;
476 }
477 }
478 offset_S[i]=koffset;
479 degree_S[i]=kdeg;
480 }
481
482 /* now we need to distribute the threshold and the diagonal dominance indicator */
483 if (A->mpi_info->size > 1) {
484
485 const index_t koffset_0=A->mainBlock->pattern->ptr[my_n]+A->col_coupleBlock->pattern->ptr[my_n]
486 -A->mainBlock->pattern->ptr[0]-A->col_coupleBlock->pattern->ptr[0];
487
488 double *remote_threshold=NULL;
489
490 Paso_Coupler* threshold_coupler=Paso_Coupler_alloc(A->row_coupler->connector ,2);
491 Paso_Coupler_startCollect(threshold_coupler,threshold_p);
492 Paso_Coupler_finishCollect(threshold_coupler);
493 remote_threshold=threshold_coupler->recv_buffer;
494
495 #pragma omp parallel for private(i,iptr) schedule(static)
496 for (i=0; i<overlap_n; i++) {
497 const double threshold = remote_threshold[2*i+1];
498 register dim_t kdeg=0;
499 register const index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i]+A->remote_coupleBlock->pattern->ptr[i];
500 if (remote_threshold[2*i]>0) {
501 #pragma ivdep
502 for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {
503 register index_t j=A->row_coupleBlock->pattern->index[iptr];
504 if(ABS(A->row_coupleBlock->val[iptr])>threshold) {
505 S[koffset+kdeg] = j ;
506 kdeg++;
507 }
508 }
509
510 #pragma ivdep
511 for (iptr=A->remote_coupleBlock->pattern->ptr[i];iptr<A->remote_coupleBlock->pattern->ptr[i+1]; iptr++) {
512 register index_t j=A->remote_coupleBlock->pattern->index[iptr];
513 if(ABS(A->remote_coupleBlock->val[iptr])>threshold && i!=j) {
514 S[koffset+kdeg] = j + my_n;
515 kdeg++;
516 }
517 }
518 }
519 offset_S[i+my_n]=koffset;
520 degree_S[i+my_n]=kdeg;
521 }
522
523 Paso_Coupler_free(threshold_coupler);
524 }
525 TMPMEMFREE(threshold_p);
526 }
527
528 /* theta = threshold for strong connections */
529 /* tau = threshold for diagonal dominance */
530 /*S_i={j \in N_i; i strongly coupled to j}
531
532 in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
533 */
534 void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SystemMatrix* A,
535 dim_t *degree_S, index_t *offset_S, index_t *S,
536 const double theta, const double tau)
537
538 {
539 const dim_t block_size=A->block_size;
540 const dim_t my_n=A->mainBlock->numRows;
541 const dim_t overlap_n=A->row_coupleBlock->numRows;
542
543 index_t iptr, i, bi;
544 double *threshold_p=NULL;
545
546
547 threshold_p = TMPMEMALLOC(2*my_n, double);
548
549 #pragma omp parallel private(i,iptr,bi)
550 {
551
552 dim_t max_deg=0;
553 double *rtmp=NULL;
554
555 #pragma omp for schedule(static)
556 for (i=0;i<my_n;++i) max_deg=MAX(max_deg, A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]
557 +A->col_coupleBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i]);
558
559 rtmp=TMPMEMALLOC(max_deg, double);
560
561 #pragma omp for schedule(static)
562 for (i=0;i<my_n;++i) {
563 register double max_offdiagonal = 0.;
564 register double sum_row=0;
565 register double main_row=0;
566 register index_t rtmp_offset=-A->mainBlock->pattern->ptr[i];
567 register dim_t kdeg=0;
568 register const index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
569
570 /* collect information for row i: */
571 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
572 register index_t j=A->mainBlock->pattern->index[iptr];
573 register double fnorm=0;
574 #pragma ivdep
575 for(bi=0;bi<block_size;++bi) {
576 register double rtmp2= A->mainBlock->val[iptr*block_size+bi];
577 fnorm+=rtmp2*rtmp2;
578 }
579 fnorm=sqrt(fnorm);
580 rtmp[iptr+rtmp_offset]=fnorm;
581
582 if( j != i) {
583 max_offdiagonal = MAX(max_offdiagonal,fnorm);
584 sum_row+=fnorm;
585 } else {
586 main_row=fnorm;
587 }
588
589 }
590
591 rtmp_offset+=A->mainBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i];
592 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
593 register double fnorm=0;
594 #pragma ivdep
595 for(bi=0;bi<block_size;++bi) {
596 register double rtmp2 = A->col_coupleBlock->val[iptr*block_size+bi];
597 fnorm+=rtmp2*rtmp2;
598 }
599 fnorm=sqrt(fnorm);
600
601 rtmp[iptr+rtmp_offset]=fnorm;
602 max_offdiagonal = MAX(max_offdiagonal,fnorm);
603 sum_row+=fnorm;
604 }
605
606
607 /* inspect row i: */
608 {
609 const double threshold = theta*max_offdiagonal;
610 rtmp_offset=-A->mainBlock->pattern->ptr[i];
611
612 threshold_p[2*i+1]=threshold;
613 if (tau*main_row < sum_row) { /* no diagonal dominance */
614 threshold_p[2*i]=1;
615 #pragma ivdep
616 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
617 register index_t j=A->mainBlock->pattern->index[iptr];
618 if(rtmp[iptr+rtmp_offset] > threshold && i!=j) {
619 S[koffset+kdeg] = j;
620 kdeg++;
621 }
622 }
623 rtmp_offset+=A->mainBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i];
624 #pragma ivdep
625 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
626 register index_t j=A->col_coupleBlock->pattern->index[iptr];
627 if( rtmp[iptr+rtmp_offset] >threshold) {
628 S[koffset+kdeg] = j + my_n;
629 kdeg++;
630 }
631 }
632 } else {
633 threshold_p[2*i]=-1;
634 }
635 }
636 offset_S[i]=koffset;
637 degree_S[i]=kdeg;
638 }
639 TMPMEMFREE(rtmp);
640 }
641 /* now we need to distribute the threshold and the diagonal dominance indicator */
642 if (A->mpi_info->size > 1) {
643
644 const index_t koffset_0=A->mainBlock->pattern->ptr[my_n]+A->col_coupleBlock->pattern->ptr[my_n]
645 -A->mainBlock->pattern->ptr[0]-A->col_coupleBlock->pattern->ptr[0];
646
647 double *remote_threshold=NULL;
648
649 Paso_Coupler* threshold_coupler=Paso_Coupler_alloc(A->row_coupler->connector ,2);
650 Paso_Coupler_startCollect(threshold_coupler,threshold_p);
651 Paso_Coupler_finishCollect(threshold_coupler);
652 remote_threshold=threshold_coupler->recv_buffer;
653
654 #pragma omp parallel for private(i,iptr) schedule(static)
655 for (i=0; i<overlap_n; i++) {
656
657 const double threshold2 = remote_threshold[2*i+1]*remote_threshold[2*i+1];
658 register dim_t kdeg=0;
659 register const index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i]+A->remote_coupleBlock->pattern->ptr[i];
660 if (remote_threshold[2*i]>0) {
661 #pragma ivdep
662 for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {
663 register index_t j=A->row_coupleBlock->pattern->index[iptr];
664 register double fnorm2=0;
665 #pragma ivdepremote_threshold[2*i]
666 for(bi=0;bi<block_size;++bi) {
667 register double rtmp2 = A->row_coupleBlock->val[iptr*block_size+bi];
668 fnorm2+=rtmp2*rtmp2;
669 }
670
671 if(fnorm2 > threshold2 ) {
672 S[koffset+kdeg] = j ;
673 kdeg++;
674 }
675 }
676
677 #pragma ivdep
678 for (iptr=A->remote_coupleBlock->pattern->ptr[i];iptr<A->remote_coupleBlock->pattern->ptr[i+1]; ++iptr) {
679 register index_t j=A->remote_coupleBlock->pattern->index[iptr];
680 register double fnorm2=0;
681 #pragma ivdepremote_threshold[2*i]
682 for(bi=0;bi<block_size;++bi) {
683 register double rtmp2 = A->remote_coupleBlock->val[iptr*block_size+bi];
684 fnorm2+=rtmp2*rtmp2;
685 }
686 if(fnorm2 > threshold2 && i != j) {
687 S[koffset+kdeg] = j + my_n;
688 kdeg++;
689 }
690 }
691
692 }
693 offset_S[i+my_n]=koffset;
694 degree_S[i+my_n]=kdeg;
695 }
696 Paso_Coupler_free(threshold_coupler);
697 }
698 TMPMEMFREE(threshold_p);
699 }
700
701 void Paso_Preconditioner_AMG_transposeStrongConnections(const dim_t n, const dim_t* degree_S, const index_t* offset_S, const index_t* S,
702 const dim_t nT, dim_t* degree_ST, index_t* offset_ST,index_t* ST)
703 {
704 index_t i, j;
705 dim_t p;
706 dim_t len=0;
707 #pragma omp parallel for private(i) schedule(static)
708 for (i=0; i<nT ;++i) {
709 degree_ST[i]=0;
710 }
711 for (i=0; i<n ;++i) {
712 for (p=0; p<degree_S[i]; ++p) degree_ST[ S[offset_S[i]+p] ]++;
713 }
714 for (i=0; i<nT ;++i) {
715 offset_ST[i]=len;
716 len+=degree_ST[i];
717 degree_ST[i]=0;
718 }
719 for (i=0; i<n ;++i) {
720 for (p=0; p<degree_S[i]; ++p) {
721 j=S[offset_S[i]+p];
722 ST[offset_ST[j]+degree_ST[j]]=i;
723 degree_ST[j]++;
724 }
725 }
726 }
727
728 int compareindex(const void *a, const void *b)
729 {
730 return (*(int *)a - *(int *)b);
731 }
732
733 void Paso_Preconditioner_AMG_CIJPCoarsening(const dim_t n, const dim_t my_n, index_t*split_marker,
734 const dim_t* degree_S, const index_t* offset_S, const index_t* S,
735 const dim_t* degree_ST, const index_t* offset_ST, const index_t* ST,
736 Paso_Connector* col_connector, Paso_Distribution* col_dist)
737 {
738 dim_t i, numUndefined, iter=0;
739 index_t iptr, jptr, kptr;
740 double *random=NULL, *w=NULL, *Status=NULL;
741 index_t * ST_flag=NULL;
742
743 Paso_Coupler* w_coupler=Paso_Coupler_alloc(col_connector ,1);
744
745 w=TMPMEMALLOC(n, double);
746 Status=TMPMEMALLOC(n, double);
747 random = Paso_Distribution_createRandomVector(col_dist,1);
748 ST_flag=TMPMEMALLOC(offset_ST[n-1]+ degree_ST[n-1], index_t);
749
750 #pragma omp parallel for private(i)
751 for (i=0; i< my_n; ++i) {
752 w[i]=degree_ST[i]+random[i];
753 if (degree_ST[i] < 1) {
754 Status[i]=-100; /* F point */
755 } else {
756 Status[i]=1; /* status undefined */
757 }
758 }
759
760 #pragma omp parallel for private(i, iptr)
761 for (i=0; i< n; ++i) {
762 for( iptr =0 ; iptr < degree_ST[i]; ++iptr) {
763 ST_flag[offset_ST[i]+iptr]=1;
764 }
765 }
766
767
768 numUndefined = Paso_Distribution_numPositives(Status, col_dist, 1 );
769 /* printf(" coarsening loop start: num of undefined rows = %d \n",numUndefined); */
770 iter=0;
771 while (numUndefined > 0) {
772 Paso_Coupler_fillOverlap(n, w, w_coupler);
773
774 /* calculate the maximum value of neighbours following active strong connections:
775 w2[i]=MAX(w[k]) with k in ST[i] or S[i] and (i,k) connection is still active */
776 #pragma omp parallel for private(i, iptr)
777 for (i=0; i<my_n; ++i) {
778 if (Status[i]>0) { /* status is still undefined */
779
780 register bool_t inD=TRUE;
781 const double wi=w[i];
782
783 for( iptr =0 ; iptr < degree_S[i]; ++iptr) {
784 const index_t k=S[offset_S[i]+iptr];
785 const index_t* start_p = &ST[offset_ST[k]];
786 const index_t* where_p=(index_t*)bsearch(&i, start_p, degree_ST[k], sizeof(index_t), Paso_comparIndex);
787
788 if (ST_flag[offset_ST[k] + (index_t)(where_p-start_p)]>0) {
789 if (wi <= w[k] ) {
790 inD=FALSE;
791 break;
792 }
793 }
794
795 }
796
797 if (inD) {
798 for( iptr =0 ; iptr < degree_ST[i]; ++iptr) {
799 const index_t k=ST[offset_ST[i]+iptr];
800 if ( ST_flag[offset_ST[i]+iptr] > 0 ) {
801 if (wi <= w[k] ) {
802 inD=FALSE;
803 break;
804 }
805 }
806 }
807 }
808 if (inD) {
809 Status[i]=0.; /* is in D */
810 }
811 }
812
813 }
814
815 Paso_Coupler_fillOverlap(n, Status, w_coupler);
816
817
818 /* remove connection to D points :
819
820 for each i in D:
821 for each j in S_i:
822 w[j]--
823 ST_tag[j,i]=-1
824 for each j in ST[i]:
825 ST_tag[i,j]=-1
826 for each k in ST[j]:
827 if k in ST[i]:
828 w[j]--;
829 ST_tag[j,k]=-1
830
831 */
832 /* w is updated for local rows only */
833 {
834 #pragma omp parallel for private(i, jptr)
835 for (i=0; i< my_n; ++i) {
836
837 for (jptr=0; jptr<degree_ST[i]; ++jptr) {
838 const index_t j=ST[offset_ST[i]+jptr];
839 if ( (Status[j] == 0.) && (ST_flag[offset_ST[i]+jptr]>0) ) {
840 w[i]--;
841 ST_flag[offset_ST[i]+jptr]=-1;
842 }
843 }
844
845 }
846 #pragma omp parallel for private(i, jptr)
847 for (i=my_n; i< n; ++i) {
848 for (jptr=0; jptr<degree_ST[i]; ++jptr) {
849 const index_t j = ST[offset_ST[i]+jptr];
850 if ( Status[j] == 0. ) ST_flag[offset_ST[i]+jptr]=-1;
851 }
852 }
853
854
855 for (i=0; i< n; ++i) {
856 if ( Status[i] == 0. ) {
857
858 const index_t* start_p = &ST[offset_ST[i]];
859
860 for (jptr=0; jptr<degree_ST[i]; ++jptr) {
861 const index_t j=ST[offset_ST[i]+jptr];
862 ST_flag[offset_ST[i]+jptr]=-1;
863 for (kptr=0; kptr<degree_ST[j]; ++kptr) {
864 const index_t k=ST[offset_ST[j]+kptr];
865 if (NULL != bsearch(&k, start_p, degree_ST[i], sizeof(index_t), Paso_comparIndex) ) { /* k in ST[i] ? */
866 if (ST_flag[offset_ST[j]+kptr] >0) {
867 if (j< my_n ) {
868 w[j]--;
869 }
870 ST_flag[offset_ST[j]+kptr]=-1;
871 }
872 }
873 }
874 }
875 }
876 }
877 }
878
879 /* adjust status */
880 #pragma omp parallel for private(i)
881 for (i=0; i< my_n; ++i) {
882 if ( Status[i] == 0. ) {
883 Status[i] = -10; /* this is now a C point */
884 } else if (Status[i] == 1. && w[i]<1.) {
885 Status[i] = -100; /* this is now a F point */
886 }
887 }
888
889 i = numUndefined;
890 numUndefined = Paso_Distribution_numPositives(Status, col_dist, 1 );
891 if (numUndefined == i) {
892 Esys_setError(SYSTEM_ERROR, "Can NOT reduce numUndefined.");
893 return;
894 }
895
896 iter++;
897 /* printf(" coarsening loop %d: num of undefined rows = %d \n",iter, numUndefined); */
898
899 } /* end of while loop */
900
901 /* map to output :*/
902 Paso_Coupler_fillOverlap(n, Status, w_coupler);
903 #pragma omp parallel for private(i)
904 for (i=0; i< n; ++i) {
905 if (Status[i] > -50.) {
906 split_marker[i]=PASO_AMG_IN_C;
907 } else {
908 split_marker[i]=PASO_AMG_IN_F;
909 }
910 }
911 /* clean up : */
912 Paso_Coupler_free(w_coupler);
913 TMPMEMFREE(random);
914 TMPMEMFREE(w);
915 TMPMEMFREE(Status);
916 TMPMEMFREE(ST_flag);
917
918 return;
919 }
920

  ViewVC Help
Powered by ViewVC 1.1.26