/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Annotation of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4286 - (hide annotations)
Thu Mar 7 04:28:11 2013 UTC (6 years, 6 months ago) by caltinay
File MIME type: text/plain
File size: 29624 byte(s)
Assorted spelling fixes.

1 gross 3193
2 jfenwick 3981 /*****************************************************************************
3 gross 3193 *
4 jfenwick 4154 * Copyright (c) 2003-2013 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 gross 3193 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12     * Development since 2012 by School of Earth Sciences
13     *
14     *****************************************************************************/
15 gross 3193
16    
17 jfenwick 3981 /************************************************************************************/
18 gross 3193
19 gross 3441 /* Paso: AMG preconditioner (local version) */
20 gross 3193
21 jfenwick 3981 /************************************************************************************/
22 gross 3193
23 caltinay 3642 /* Author: artak@uq.edu.au, l.gross@uq.edu.au */
24 gross 3193
25 jfenwick 3981 /************************************************************************************/
26 gross 3193
27 gross 3283 #define SHOW_TIMING FALSE
28 lgao 3827 #define MY_DEBUG 0
29     #define MY_DEBUG1 1
30 gross 3283
31 gross 3193 #include "Paso.h"
32     #include "Preconditioner.h"
33     #include "Options.h"
34     #include "PasoUtil.h"
35     #include "UMFPACK.h"
36     #include "MKL.h"
37 gross 3458 #include<stdio.h>
38 gross 3193
39 gross 3458
40 jfenwick 3981 /************************************************************************************/
41 gross 3193
42     /* free all memory used by AMG */
43    
44 gross 3441 void Paso_Preconditioner_AMG_free(Paso_Preconditioner_AMG * in) {
45 gross 3193 if (in!=NULL) {
46 gross 3441 Paso_Preconditioner_Smoother_free(in->Smoother);
47     Paso_SystemMatrix_free(in->P);
48     Paso_SystemMatrix_free(in->R);
49     Paso_SystemMatrix_free(in->A_C);
50     Paso_Preconditioner_AMG_free(in->AMG_C);
51 gross 3283 MEMFREE(in->r);
52     MEMFREE(in->x_C);
53     MEMFREE(in->b_C);
54 gross 3887 Paso_MergedSolver_free(in->merged_solver);
55 gross 3193
56 gross 3283 MEMFREE(in);
57 gross 3193 }
58     }
59    
60 gross 3441 index_t Paso_Preconditioner_AMG_getMaxLevel(const Paso_Preconditioner_AMG * in) {
61 gross 3402 if (in->AMG_C == NULL) {
62     return in->level;
63     } else {
64 gross 3441 return Paso_Preconditioner_AMG_getMaxLevel(in->AMG_C);
65 gross 3402 }
66     }
67 gross 3441 double Paso_Preconditioner_AMG_getCoarseLevelSparsity(const Paso_Preconditioner_AMG * in) {
68 gross 3403 if (in->AMG_C == NULL) {
69     if (in->A_C == NULL) {
70     return 1.;
71     } else {
72 gross 3442 return Paso_SystemMatrix_getSparsity(in->A_C);
73 gross 3403 }
74     } else {
75 gross 3441 return Paso_Preconditioner_AMG_getCoarseLevelSparsity(in->AMG_C);
76 gross 3403 }
77 gross 3402 }
78 gross 3441 dim_t Paso_Preconditioner_AMG_getNumCoarseUnknwons(const Paso_Preconditioner_AMG * in) {
79 gross 3402 if (in->AMG_C == NULL) {
80 gross 3403 if (in->A_C == NULL) {
81     return 0;
82     } else {
83 gross 3445 return Paso_SystemMatrix_getTotalNumRows(in->A_C);
84 gross 3403 }
85 gross 3402 } else {
86 gross 3441 return Paso_Preconditioner_AMG_getNumCoarseUnknwons(in->AMG_C);
87 gross 3402 }
88     }
89 jfenwick 3981 /***************************************************************************************
90 gross 3193
91 gross 3283 constructs AMG
92    
93 jfenwick 3981 ****************************************************************************************/
94 gross 3441 Paso_Preconditioner_AMG* Paso_Preconditioner_AMG_alloc(Paso_SystemMatrix *A_p,dim_t level,Paso_Options* options) {
95 gross 3193
96 gross 3441 Paso_Preconditioner_AMG* out=NULL;
97 lgao 3828 Paso_SystemMatrix *A_C=NULL;
98 gross 3193 bool_t verbose=options->verbose;
99 gross 3445
100 gross 3482 const dim_t my_n=A_p->mainBlock->numRows;
101     const dim_t overlap_n=A_p->row_coupleBlock->numRows;
102    
103 gross 3445 const dim_t n = my_n + overlap_n;
104    
105 gross 3193 const dim_t n_block=A_p->row_block_size;
106 lgao 3827 index_t* F_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F;
107 gross 3886 dim_t i, my_n_F, my_n_C, n_C, F_flag, *F_set=NULL, global_n_C=0, global_n_F=0, n_F;
108 gross 3283 double time0=0;
109     const double theta = options->coarsening_threshold;
110     const double tau = options->diagonal_dominance_threshold;
111 gross 3442 const double sparsity=Paso_SystemMatrix_getSparsity(A_p);
112 gross 3885 const dim_t global_n=Paso_SystemMatrix_getGlobalNumRows(A_p);
113 plaub 3436
114 lgao 3827
115 gross 3193 /*
116 caltinay 3642 is the input matrix A suitable for coarsening?
117 gross 3283
118 gross 3193 */
119 gross 3442 if ( (sparsity >= options->min_coarse_sparsity) ||
120 gross 3885 (global_n <= options->min_coarse_matrix_size) ||
121 gross 3442 (level > options->level_max) ) {
122 plaub 3436
123 gross 3445 if (verbose) {
124 plaub 3436 /*
125     print stopping condition:
126     - 'SPAR' = min_coarse_matrix_sparsity exceeded
127     - 'SIZE' = min_coarse_matrix_size exceeded
128     - 'LEVEL' = level_max exceeded
129     */
130 gross 3442 printf("Paso_Preconditioner: AMG: termination of coarsening by ");
131 plaub 3436
132 gross 3442 if (sparsity >= options->min_coarse_sparsity)
133 caltinay 3642 printf("SPAR");
134 plaub 3436
135 gross 3885 if (global_n <= options->min_coarse_matrix_size)
136 caltinay 3642 printf("SIZE");
137 plaub 3436
138     if (level > options->level_max)
139 caltinay 3642 printf("LEVEL");
140 plaub 3436
141     printf("\n");
142    
143     printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
144 gross 3885 level, options->level_max, sparsity, options->min_coarse_sparsity, global_n, options->min_coarse_matrix_size);
145 plaub 3436
146 gross 3445 }
147 plaub 3436
148 gross 3445 return NULL;
149     } else {
150 gross 3451 /* Start Coarsening : */
151 gross 3482
152     /* this is the table for strong connections combining mainBlock, col_coupleBlock and row_coupleBlock */
153 lgao 3827 const dim_t len_S=A_p->mainBlock->pattern->len + A_p->col_coupleBlock->pattern->len + A_p->row_coupleBlock->pattern->len + A_p->row_coupleBlock->numRows * A_p->col_coupleBlock->numCols;
154 gross 3482
155     dim_t* degree_S=TMPMEMALLOC(n, dim_t);
156     index_t *offset_S=TMPMEMALLOC(n, index_t);
157 gross 3451 index_t *S=TMPMEMALLOC(len_S, index_t);
158 gross 3482 dim_t* degree_ST=TMPMEMALLOC(n, dim_t);
159     index_t *offset_ST=TMPMEMALLOC(n, index_t);
160     index_t *ST=TMPMEMALLOC(len_S, index_t);
161    
162    
163 gross 3440 F_marker=TMPMEMALLOC(n,index_t);
164 gross 3283 counter=TMPMEMALLOC(n,index_t);
165 gross 3445
166 gross 3482 if ( !( Esys_checkPtr(F_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree_S) || Esys_checkPtr(offset_S) || Esys_checkPtr(S)
167     || Esys_checkPtr(degree_ST) || Esys_checkPtr(offset_ST) || Esys_checkPtr(ST) ) ) {
168 gross 3458 /*
169     make sure that corresponding values in the row_coupleBlock and col_coupleBlock are identical
170     */
171 gross 3482 Paso_SystemMatrix_copyColCoupleBlock(A_p);
172 lgao 3827 Paso_SystemMatrix_copyRemoteCoupleBlock(A_p, FALSE);
173 gross 3458
174     /*
175 caltinay 4286 set splitting of unknowns:
176 gross 3458
177 gross 3283 */
178     time0=Esys_timer();
179     if (n_block>1) {
180 gross 3445 Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree_S, offset_S, S, theta,tau);
181 gross 3283 } else {
182 gross 3445 Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, offset_S, S, theta,tau);
183 gross 3283 }
184 gross 3482 Paso_Preconditioner_AMG_transposeStrongConnections(n, degree_S, offset_S, S, n, degree_ST, offset_ST, ST);
185 lgao 3828 /* Paso_SystemMatrix_extendedRowsForST(A_p, degree_ST, offset_ST, ST);
186     */
187 lgao 3827
188 gross 3482 Paso_Preconditioner_AMG_CIJPCoarsening(n,my_n,F_marker,
189     degree_S, offset_S, S, degree_ST, offset_ST, ST,
190     A_p->col_coupler->connector,A_p->col_distribution);
191 gross 3449
192 gross 3442
193 caltinay 3642 /* in BoomerAMG if interpolation is used FF connectivity is required */
194 gross 3442 /*MPI:
195 gross 3440 if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
196     Paso_Preconditioner_AMG_enforceFFConnectivity(n, A_p->pattern->ptr, degree_S, S, F_marker);
197 gross 3442 */
198    
199 gross 3283 options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
200     if (Esys_noError() ) {
201     #pragma omp parallel for private(i) schedule(static)
202 gross 3440 for (i = 0; i < n; ++i) F_marker[i]=(F_marker[i] == PASO_AMG_IN_F);
203 gross 3283
204     /*
205 caltinay 4286 count number of unknowns to be eliminated:
206 gross 3283 */
207 gross 3886 my_n_F=Paso_Util_cumsum_maskedTrue(my_n,counter, F_marker);
208     n_F=Paso_Util_cumsum_maskedTrue(n,counter, F_marker);
209     /* collect my_n_F values on all processes, a direct solver should
210     be used if any my_n_F value is 0 */
211 lgao 3827 F_set = TMPMEMALLOC(A_p->mpi_info->size, dim_t);
212 lgao 3828 #ifdef ESYS_MPI
213 gross 3886 MPI_Allgather(&my_n_F, 1, MPI_INT, F_set, 1, MPI_INT, A_p->mpi_info->comm);
214 lgao 3828 #endif
215 gross 3885 global_n_F=0;
216 lgao 3827 F_flag = 1;
217     for (i=0; i<A_p->mpi_info->size; i++) {
218 gross 3885 global_n_F+=F_set[i];
219     if (F_set[i] == 0) F_flag = 0;
220 lgao 3827 }
221     TMPMEMFREE(F_set);
222 gross 3884
223 gross 3886 my_n_C=my_n-my_n_F;
224 gross 3885 global_n_C=global_n-global_n_F;
225     if (verbose) printf("Paso_Preconditioner: AMG (non-local) level %d: %d unknowns are flagged for elimination. %d left.\n",level,global_n_F,global_n_C);
226 gross 3884
227 lgao 3827
228 lgao 3828 /* if ( n_F == 0 ) { is a nasty case. a direct solver should be used, return NULL */
229 lgao 3827 if (F_flag == 0) {
230 gross 3283 out = NULL;
231     } else {
232 gross 3441 out=MEMALLOC(1,Paso_Preconditioner_AMG);
233 gross 3323 if (! Esys_checkPtr(out)) {
234 gross 3283 out->level = level;
235     out->A_C = NULL;
236     out->P = NULL;
237     out->R = NULL;
238 lgao 3828 out->post_sweeps = options->post_sweeps;
239     out->pre_sweeps = options->pre_sweeps;
240 gross 3283 out->r = NULL;
241     out->x_C = NULL;
242     out->b_C = NULL;
243     out->AMG_C = NULL;
244 gross 3323 out->Smoother=NULL;
245 gross 3887 out->merged_solver=NULL;
246 gross 3323 }
247     mask_C=TMPMEMALLOC(n,index_t);
248     rows_in_F=TMPMEMALLOC(n_F,index_t);
249     Esys_checkPtr(mask_C);
250     Esys_checkPtr(rows_in_F);
251     if ( Esys_noError() ) {
252    
253 lgao 3827 out->Smoother = Paso_Preconditioner_Smoother_alloc(A_p, (options->smoother == PASO_JACOBI), 0, verbose);
254 gross 3323
255 gross 3885 if (global_n_C != 0) {
256 gross 3886 /* create mask of C nodes with value >-1, gives new id */
257     n_C=Paso_Util_cumsum_maskedFalse(n, mask_C, F_marker);
258     /* if nothing has been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
259 gross 3283
260 gross 3315 /* allocate helpers :*/
261 gross 3886 out->x_C=MEMALLOC(n_block*my_n_C,double);
262     out->b_C=MEMALLOC(n_block*my_n_C,double);
263     out->r=MEMALLOC(n_block*my_n,double);
264 gross 3315
265     Esys_checkPtr(out->r);
266     Esys_checkPtr(out->x_C);
267     Esys_checkPtr(out->b_C);
268    
269 gross 3323 if ( Esys_noError() ) {
270     /* creates index for F:*/
271     #pragma omp parallel private(i)
272     {
273     #pragma omp for schedule(static)
274     for (i = 0; i < n; ++i) {
275 gross 3440 if (F_marker[i]) rows_in_F[counter[i]]=i;
276 gross 3323 }
277     }
278     /*
279 gross 3440 get Prolongation :
280 gross 3323 */
281 lgao 3827
282 gross 3323 time0=Esys_timer();
283 lgao 3827
284     out->P=Paso_Preconditioner_AMG_getProlongation(A_p,offset_S, degree_S,S,n_C,mask_C, options->interpolation_method);
285    
286 gross 3283 }
287 lgao 3827
288 gross 3323 /*
289 gross 3440 construct Restriction operator as transposed of Prolongation operator:
290 gross 3283 */
291 lgao 3827
292 gross 3315 if ( Esys_noError()) {
293     time0=Esys_timer();
294 lgao 3827
295     out->R=Paso_Preconditioner_AMG_getRestriction(out->P);
296    
297 gross 3441 if (SHOW_TIMING) printf("timing: level %d: Paso_SystemMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
298 gross 3315 }
299 lgao 3827 /*
300     construct coarse level matrix:
301     */
302     if ( Esys_noError()) {
303     time0=Esys_timer();
304 gross 3442
305 lgao 3827 A_C = Paso_Preconditioner_AMG_buildInterpolationOperator(A_p, out->P, out->R);
306 gross 3315
307 lgao 3827 if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);
308     }
309    
310 gross 3283 /*
311 caltinay 4286 construct coarser level:
312 gross 3283
313     */
314     if ( Esys_noError()) {
315 gross 3441 out->AMG_C=Paso_Preconditioner_AMG_alloc(A_C,level+1,options);
316 gross 3315 }
317 lgao 3827
318 gross 3315 if ( Esys_noError()) {
319 gross 3887 out->A_C=A_C;
320 lgao 3827 if ( out->AMG_C == NULL ) {
321     /* merge the system matrix into 1 rank when
322     it's not suitable coarsening due to the
323     total number of unknowns are too small */
324 gross 3887 out->merged_solver= Paso_MergedSolver_alloc(A_C, options);
325 lgao 3827 }
326 gross 3283 }
327     }
328     }
329     TMPMEMFREE(mask_C);
330     TMPMEMFREE(rows_in_F);
331     }
332     }
333 gross 3445
334 gross 3193 }
335     TMPMEMFREE(counter);
336 gross 3440 TMPMEMFREE(F_marker);
337     TMPMEMFREE(degree_S);
338 gross 3445 TMPMEMFREE(offset_S);
339 gross 3303 TMPMEMFREE(S);
340 gross 3482 TMPMEMFREE(degree_ST);
341     TMPMEMFREE(offset_ST);
342     TMPMEMFREE(ST);
343    
344 gross 3445 }
345 gross 3193
346 jfenwick 3259 if (Esys_noError()) {
347 gross 3193 return out;
348     } else {
349 gross 3441 Paso_Preconditioner_AMG_free(out);
350 gross 3193 return NULL;
351     }
352     }
353    
354    
355 gross 3441 void Paso_Preconditioner_AMG_solve(Paso_SystemMatrix* A, Paso_Preconditioner_AMG * amg, double * x, double * b) {
356 lgao 3827 const dim_t n = A->mainBlock->numRows * A->mainBlock->row_block_size;
357 gross 3283 double time0=0;
358     const dim_t post_sweeps=amg->post_sweeps;
359     const dim_t pre_sweeps=amg->pre_sweeps;
360 gross 3193
361 gross 3283 /* presmoothing */
362     time0=Esys_timer();
363 gross 3442 Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
364 lgao 3827
365 gross 3283 time0=Esys_timer()-time0;
366 caltinay 3642 if (SHOW_TIMING) printf("timing: level %d: Presmoothing: %e\n",amg->level, time0);
367 gross 3283 /* end of presmoothing */
368    
369 gross 3886 time0=Esys_timer();
370 lgao 3827
371 gross 3886 Paso_Copy(n, amg->r, b); /* r <- b */
372     Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
373     Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,amg->r,0.,amg->b_C); /* b_c = R*r */
374 lgao 3827
375 gross 3886 time0=Esys_timer()-time0;
376     /* coarse level solve */
377     if ( amg->AMG_C == NULL) {
378 gross 3283 time0=Esys_timer();
379     /* A_C is the coarsest level */
380 gross 3887 Paso_MergedSolver_solve(amg->merged_solver,amg->x_C, amg->b_C);
381 gross 3283 if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
382 gross 3886 } else {
383 gross 3441 Paso_Preconditioner_AMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C) */
384 gross 3886 }
385 lgao 3827
386 gross 3886 time0=time0+Esys_timer();
387     Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */
388 lgao 3827
389 gross 3886 /*postsmoothing*/
390 gross 3283
391 gross 3886 /*solve Ax=b with initial guess x */
392     time0=Esys_timer();
393     Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
394     time0=Esys_timer()-time0;
395     if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
396     return;
397 gross 3283 }
398 gross 3193
399 gross 3283 /* theta = threshold for strong connections */
400     /* tau = threshold for diagonal dominance */
401 gross 3193
402 gross 3303 /*S_i={j \in N_i; i strongly coupled to j}
403    
404     in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
405     */
406    
407 gross 3441 void Paso_Preconditioner_AMG_setStrongConnections(Paso_SystemMatrix* A,
408 gross 3445 dim_t *degree_S, index_t *offset_S, index_t *S,
409 gross 3303 const double theta, const double tau)
410 gross 3283 {
411 gross 3482
412     const dim_t my_n=A->mainBlock->numRows;
413     const dim_t overlap_n=A->row_coupleBlock->numRows;
414    
415 gross 3445 index_t iptr, i;
416 gross 3482 double *threshold_p=NULL;
417 gross 3193
418 gross 3482 threshold_p = TMPMEMALLOC(2*my_n, double);
419    
420     #pragma omp parallel for private(i,iptr) schedule(static)
421     for (i=0;i<my_n;++i) {
422    
423 gross 3445 register double max_offdiagonal = 0.;
424     register double sum_row=0;
425     register double main_row=0;
426 gross 3451 register dim_t kdeg=0;
427 caltinay 3489 register const index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
428 lgao 3827
429 gross 3482
430     /* collect information for row i: */
431 gross 3283 #pragma ivdep
432 gross 3445 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
433     register index_t j=A->mainBlock->pattern->index[iptr];
434     register double fnorm=ABS(A->mainBlock->val[iptr]);
435 gross 3283 if( j != i) {
436     max_offdiagonal = MAX(max_offdiagonal,fnorm);
437     sum_row+=fnorm;
438     } else {
439     main_row=fnorm;
440     }
441 gross 3445
442 gross 3283 }
443 lgao 3827
444 gross 3445 #pragma ivdep
445     for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
446     register double fnorm=ABS(A->col_coupleBlock->val[iptr]);
447 lgao 3827
448 gross 3445 max_offdiagonal = MAX(max_offdiagonal,fnorm);
449     sum_row+=fnorm;
450     }
451    
452 gross 3482 /* inspect row i: */
453 gross 3451 {
454     const double threshold = theta*max_offdiagonal;
455 gross 3482 threshold_p[2*i+1]=threshold;
456 caltinay 3642 if (tau*main_row < sum_row) { /* no diagonal dominance */
457 gross 3482 threshold_p[2*i]=1;
458 gross 3451 #pragma ivdep
459     for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
460     register index_t j=A->mainBlock->pattern->index[iptr];
461     if(ABS(A->mainBlock->val[iptr])>threshold && i!=j) {
462     S[koffset+kdeg] = j;
463     kdeg++;
464     }
465 gross 3283 }
466 lgao 3828 #pragma ivdep
467 gross 3451 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
468     register index_t j=A->col_coupleBlock->pattern->index[iptr];
469     if(ABS(A->col_coupleBlock->val[iptr])>threshold) {
470     S[koffset+kdeg] = j + my_n;
471     kdeg++;
472     }
473 gross 3445 }
474 gross 3482 } else {
475     threshold_p[2*i]=-1;
476 gross 3451 }
477 gross 3440 }
478 gross 3445 offset_S[i]=koffset;
479 gross 3440 degree_S[i]=kdeg;
480 gross 3283 }
481 lgao 3827
482 gross 3482 /* now we need to distribute the threshold and the diagonal dominance indicator */
483     if (A->mpi_info->size > 1) {
484    
485     const index_t koffset_0=A->mainBlock->pattern->ptr[my_n]+A->col_coupleBlock->pattern->ptr[my_n]
486     -A->mainBlock->pattern->ptr[0]-A->col_coupleBlock->pattern->ptr[0];
487    
488     double *remote_threshold=NULL;
489    
490     Paso_Coupler* threshold_coupler=Paso_Coupler_alloc(A->row_coupler->connector ,2);
491     Paso_Coupler_startCollect(threshold_coupler,threshold_p);
492     Paso_Coupler_finishCollect(threshold_coupler);
493     remote_threshold=threshold_coupler->recv_buffer;
494    
495     #pragma omp parallel for private(i,iptr) schedule(static)
496     for (i=0; i<overlap_n; i++) {
497     const double threshold = remote_threshold[2*i+1];
498     register dim_t kdeg=0;
499 lgao 3827 register const index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i]+A->remote_coupleBlock->pattern->ptr[i];
500 gross 3482 if (remote_threshold[2*i]>0) {
501 lgao 3827 #pragma ivdep
502     for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {
503 gross 3482 register index_t j=A->row_coupleBlock->pattern->index[iptr];
504     if(ABS(A->row_coupleBlock->val[iptr])>threshold) {
505     S[koffset+kdeg] = j ;
506     kdeg++;
507     }
508 lgao 3827 }
509 gross 3482
510 lgao 3827 #pragma ivdep
511     for (iptr=A->remote_coupleBlock->pattern->ptr[i];iptr<A->remote_coupleBlock->pattern->ptr[i+1]; iptr++) {
512     register index_t j=A->remote_coupleBlock->pattern->index[iptr];
513     if(ABS(A->remote_coupleBlock->val[iptr])>threshold && i!=j) {
514     S[koffset+kdeg] = j + my_n;
515     kdeg++;
516     }
517     }
518 gross 3482 }
519     offset_S[i+my_n]=koffset;
520     degree_S[i+my_n]=kdeg;
521     }
522    
523     Paso_Coupler_free(threshold_coupler);
524     }
525     TMPMEMFREE(threshold_p);
526 gross 3283 }
527 gross 3193
528 gross 3283 /* theta = threshold for strong connections */
529     /* tau = threshold for diagonal dominance */
530 gross 3303 /*S_i={j \in N_i; i strongly coupled to j}
531 gross 3283
532 gross 3303 in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
533     */
534 gross 3441 void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SystemMatrix* A,
535 gross 3445 dim_t *degree_S, index_t *offset_S, index_t *S,
536 gross 3303 const double theta, const double tau)
537    
538 gross 3283 {
539 lgao 3827 const dim_t block_size=A->block_size;
540 gross 3482 const dim_t my_n=A->mainBlock->numRows;
541     const dim_t overlap_n=A->row_coupleBlock->numRows;
542    
543 gross 3445 index_t iptr, i, bi;
544 gross 3482 double *threshold_p=NULL;
545 gross 3283
546    
547 gross 3482 threshold_p = TMPMEMALLOC(2*my_n, double);
548 lgao 3827
549     #pragma omp parallel private(i,iptr,bi)
550 gross 3482 {
551    
552     dim_t max_deg=0;
553     double *rtmp=NULL;
554 gross 3451
555 gross 3482 #pragma omp for schedule(static)
556     for (i=0;i<my_n;++i) max_deg=MAX(max_deg, A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]
557     +A->col_coupleBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i]);
558 gross 3193
559 gross 3482 rtmp=TMPMEMALLOC(max_deg, double);
560 gross 3283
561 gross 3482 #pragma omp for schedule(static)
562     for (i=0;i<my_n;++i) {
563     register double max_offdiagonal = 0.;
564     register double sum_row=0;
565     register double main_row=0;
566     register index_t rtmp_offset=-A->mainBlock->pattern->ptr[i];
567     register dim_t kdeg=0;
568 caltinay 3489 register const index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
569 gross 3193
570 gross 3482 /* collect information for row i: */
571     for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
572     register index_t j=A->mainBlock->pattern->index[iptr];
573     register double fnorm=0;
574     #pragma ivdep
575 lgao 3827 for(bi=0;bi<block_size;++bi) {
576     register double rtmp2= A->mainBlock->val[iptr*block_size+bi];
577 gross 3482 fnorm+=rtmp2*rtmp2;
578 gross 3283 }
579 gross 3482 fnorm=sqrt(fnorm);
580     rtmp[iptr+rtmp_offset]=fnorm;
581    
582     if( j != i) {
583 gross 3445 max_offdiagonal = MAX(max_offdiagonal,fnorm);
584     sum_row+=fnorm;
585 gross 3482 } else {
586     main_row=fnorm;
587 gross 3445 }
588 gross 3482
589     }
590    
591     rtmp_offset+=A->mainBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i];
592     for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
593     register double fnorm=0;
594     #pragma ivdep
595 lgao 3827 for(bi=0;bi<block_size;++bi) {
596     register double rtmp2 = A->col_coupleBlock->val[iptr*block_size+bi];
597 gross 3482 fnorm+=rtmp2*rtmp2;
598 gross 3283 }
599 gross 3482 fnorm=sqrt(fnorm);
600    
601     rtmp[iptr+rtmp_offset]=fnorm;
602     max_offdiagonal = MAX(max_offdiagonal,fnorm);
603     sum_row+=fnorm;
604 gross 3283 }
605 gross 3482
606    
607     /* inspect row i: */
608     {
609     const double threshold = theta*max_offdiagonal;
610     rtmp_offset=-A->mainBlock->pattern->ptr[i];
611    
612     threshold_p[2*i+1]=threshold;
613 caltinay 3642 if (tau*main_row < sum_row) { /* no diagonal dominance */
614 gross 3482 threshold_p[2*i]=1;
615     #pragma ivdep
616     for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
617     register index_t j=A->mainBlock->pattern->index[iptr];
618     if(rtmp[iptr+rtmp_offset] > threshold && i!=j) {
619     S[koffset+kdeg] = j;
620     kdeg++;
621 gross 3351 }
622 gross 3283 }
623 gross 3482 rtmp_offset+=A->mainBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i];
624     #pragma ivdep
625     for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
626     register index_t j=A->col_coupleBlock->pattern->index[iptr];
627     if( rtmp[iptr+rtmp_offset] >threshold) {
628     S[koffset+kdeg] = j + my_n;
629     kdeg++;
630 gross 3351 }
631     }
632 gross 3482 } else {
633     threshold_p[2*i]=-1;
634     }
635     }
636     offset_S[i]=koffset;
637     degree_S[i]=kdeg;
638     }
639     TMPMEMFREE(rtmp);
640     }
641     /* now we need to distribute the threshold and the diagonal dominance indicator */
642     if (A->mpi_info->size > 1) {
643    
644     const index_t koffset_0=A->mainBlock->pattern->ptr[my_n]+A->col_coupleBlock->pattern->ptr[my_n]
645     -A->mainBlock->pattern->ptr[0]-A->col_coupleBlock->pattern->ptr[0];
646    
647     double *remote_threshold=NULL;
648    
649     Paso_Coupler* threshold_coupler=Paso_Coupler_alloc(A->row_coupler->connector ,2);
650     Paso_Coupler_startCollect(threshold_coupler,threshold_p);
651     Paso_Coupler_finishCollect(threshold_coupler);
652     remote_threshold=threshold_coupler->recv_buffer;
653    
654     #pragma omp parallel for private(i,iptr) schedule(static)
655     for (i=0; i<overlap_n; i++) {
656    
657     const double threshold2 = remote_threshold[2*i+1]*remote_threshold[2*i+1];
658     register dim_t kdeg=0;
659 lgao 3827 register const index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i]+A->remote_coupleBlock->pattern->ptr[i];
660 gross 3482 if (remote_threshold[2*i]>0) {
661     #pragma ivdep
662     for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {
663     register index_t j=A->row_coupleBlock->pattern->index[iptr];
664     register double fnorm2=0;
665     #pragma ivdepremote_threshold[2*i]
666 lgao 3827 for(bi=0;bi<block_size;++bi) {
667     register double rtmp2 = A->row_coupleBlock->val[iptr*block_size+bi];
668 gross 3482 fnorm2+=rtmp2*rtmp2;
669 gross 3351 }
670 gross 3482
671     if(fnorm2 > threshold2 ) {
672     S[koffset+kdeg] = j ;
673     kdeg++;
674 gross 3351 }
675 gross 3283 }
676 lgao 3827
677     #pragma ivdep
678     for (iptr=A->remote_coupleBlock->pattern->ptr[i];iptr<A->remote_coupleBlock->pattern->ptr[i+1]; ++iptr) {
679     register index_t j=A->remote_coupleBlock->pattern->index[iptr];
680     register double fnorm2=0;
681     #pragma ivdepremote_threshold[2*i]
682     for(bi=0;bi<block_size;++bi) {
683     register double rtmp2 = A->remote_coupleBlock->val[iptr*block_size+bi];
684     fnorm2+=rtmp2*rtmp2;
685     }
686     if(fnorm2 > threshold2 && i != j) {
687     S[koffset+kdeg] = j + my_n;
688     kdeg++;
689     }
690     }
691 gross 3351
692 gross 3283 }
693 gross 3482 offset_S[i+my_n]=koffset;
694     degree_S[i+my_n]=kdeg;
695 gross 3283 }
696 gross 3482 Paso_Coupler_free(threshold_coupler);
697 gross 3283 }
698 gross 3482 TMPMEMFREE(threshold_p);
699 gross 3193 }
700 caltinay 3642
701 gross 3482 void Paso_Preconditioner_AMG_transposeStrongConnections(const dim_t n, const dim_t* degree_S, const index_t* offset_S, const index_t* S,
702     const dim_t nT, dim_t* degree_ST, index_t* offset_ST,index_t* ST)
703 gross 3440 {
704 gross 3482 index_t i, j;
705     dim_t p;
706     dim_t len=0;
707 gross 3485 #pragma omp parallel for private(i) schedule(static)
708 gross 3482 for (i=0; i<nT ;++i) {
709     degree_ST[i]=0;
710     }
711     for (i=0; i<n ;++i) {
712     for (p=0; p<degree_S[i]; ++p) degree_ST[ S[offset_S[i]+p] ]++;
713     }
714     for (i=0; i<nT ;++i) {
715     offset_ST[i]=len;
716     len+=degree_ST[i];
717     degree_ST[i]=0;
718     }
719     for (i=0; i<n ;++i) {
720     for (p=0; p<degree_S[i]; ++p) {
721     j=S[offset_S[i]+p];
722     ST[offset_ST[j]+degree_ST[j]]=i;
723     degree_ST[j]++;
724     }
725     }
726 gross 3440 }
727 gross 3442
728 lgao 3827 int compareindex(const void *a, const void *b)
729     {
730     return (*(int *)a - *(int *)b);
731     }
732    
733 gross 3482 void Paso_Preconditioner_AMG_CIJPCoarsening(const dim_t n, const dim_t my_n, index_t*split_marker,
734     const dim_t* degree_S, const index_t* offset_S, const index_t* S,
735     const dim_t* degree_ST, const index_t* offset_ST, const index_t* ST,
736     Paso_Connector* col_connector, Paso_Distribution* col_dist)
737 gross 3442 {
738 gross 3482 dim_t i, numUndefined, iter=0;
739     index_t iptr, jptr, kptr;
740     double *random=NULL, *w=NULL, *Status=NULL;
741     index_t * ST_flag=NULL;
742 gross 3445
743 gross 3482 Paso_Coupler* w_coupler=Paso_Coupler_alloc(col_connector ,1);
744    
745     w=TMPMEMALLOC(n, double);
746     Status=TMPMEMALLOC(n, double);
747     random = Paso_Distribution_createRandomVector(col_dist,1);
748     ST_flag=TMPMEMALLOC(offset_ST[n-1]+ degree_ST[n-1], index_t);
749 lgao 3827
750 gross 3482 #pragma omp parallel for private(i)
751     for (i=0; i< my_n; ++i) {
752     w[i]=degree_ST[i]+random[i];
753     if (degree_ST[i] < 1) {
754     Status[i]=-100; /* F point */
755     } else {
756     Status[i]=1; /* status undefined */
757     }
758     }
759 lgao 3827
760 gross 3482 #pragma omp parallel for private(i, iptr)
761     for (i=0; i< n; ++i) {
762     for( iptr =0 ; iptr < degree_ST[i]; ++iptr) {
763     ST_flag[offset_ST[i]+iptr]=1;
764     }
765     }
766    
767 gross 3445
768 gross 3482 numUndefined = Paso_Distribution_numPositives(Status, col_dist, 1 );
769 gross 3884 /* printf(" coarsening loop start: num of undefined rows = %d \n",numUndefined); */
770 gross 3482 iter=0;
771     while (numUndefined > 0) {
772     Paso_Coupler_fillOverlap(n, w, w_coupler);
773 lgao 3827
774 caltinay 3642 /* calculate the maximum value of neighbours following active strong connections:
775     w2[i]=MAX(w[k]) with k in ST[i] or S[i] and (i,k) connection is still active */
776 gross 3482 #pragma omp parallel for private(i, iptr)
777     for (i=0; i<my_n; ++i) {
778     if (Status[i]>0) { /* status is still undefined */
779 gross 3449
780 gross 3482 register bool_t inD=TRUE;
781     const double wi=w[i];
782    
783     for( iptr =0 ; iptr < degree_S[i]; ++iptr) {
784     const index_t k=S[offset_S[i]+iptr];
785     const index_t* start_p = &ST[offset_ST[k]];
786     const index_t* where_p=(index_t*)bsearch(&i, start_p, degree_ST[k], sizeof(index_t), Paso_comparIndex);
787    
788 lgao 3827 if (ST_flag[offset_ST[k] + (index_t)(where_p-start_p)]>0) {
789 gross 3482 if (wi <= w[k] ) {
790     inD=FALSE;
791     break;
792     }
793     }
794    
795     }
796    
797     if (inD) {
798     for( iptr =0 ; iptr < degree_ST[i]; ++iptr) {
799     const index_t k=ST[offset_ST[i]+iptr];
800     if ( ST_flag[offset_ST[i]+iptr] > 0 ) {
801     if (wi <= w[k] ) {
802     inD=FALSE;
803     break;
804     }
805     }
806     }
807     }
808     if (inD) {
809     Status[i]=0.; /* is in D */
810     }
811     }
812    
813 gross 3449 }
814    
815 gross 3482 Paso_Coupler_fillOverlap(n, Status, w_coupler);
816 gross 3442
817    
818 gross 3482 /* remove connection to D points :
819    
820     for each i in D:
821     for each j in S_i:
822     w[j]--
823     ST_tag[j,i]=-1
824     for each j in ST[i]:
825     ST_tag[i,j]=-1
826     for each k in ST[j]:
827     if k in ST[i]:
828     w[j]--;
829     ST_tag[j,k]=-1
830    
831     */
832     /* w is updated for local rows only */
833     {
834 gross 3485 #pragma omp parallel for private(i, jptr)
835 gross 3482 for (i=0; i< my_n; ++i) {
836 gross 3445
837 gross 3482 for (jptr=0; jptr<degree_ST[i]; ++jptr) {
838     const index_t j=ST[offset_ST[i]+jptr];
839     if ( (Status[j] == 0.) && (ST_flag[offset_ST[i]+jptr]>0) ) {
840     w[i]--;
841     ST_flag[offset_ST[i]+jptr]=-1;
842     }
843     }
844    
845     }
846 gross 3485 #pragma omp parallel for private(i, jptr)
847 gross 3482 for (i=my_n; i< n; ++i) {
848     for (jptr=0; jptr<degree_ST[i]; ++jptr) {
849     const index_t j = ST[offset_ST[i]+jptr];
850     if ( Status[j] == 0. ) ST_flag[offset_ST[i]+jptr]=-1;
851     }
852     }
853 gross 3442
854 gross 3482
855     for (i=0; i< n; ++i) {
856     if ( Status[i] == 0. ) {
857 gross 3445
858 gross 3482 const index_t* start_p = &ST[offset_ST[i]];
859    
860     for (jptr=0; jptr<degree_ST[i]; ++jptr) {
861     const index_t j=ST[offset_ST[i]+jptr];
862     ST_flag[offset_ST[i]+jptr]=-1;
863     for (kptr=0; kptr<degree_ST[j]; ++kptr) {
864     const index_t k=ST[offset_ST[j]+kptr];
865     if (NULL != bsearch(&k, start_p, degree_ST[i], sizeof(index_t), Paso_comparIndex) ) { /* k in ST[i] ? */
866     if (ST_flag[offset_ST[j]+kptr] >0) {
867     if (j< my_n ) {
868     w[j]--;
869     }
870     ST_flag[offset_ST[j]+kptr]=-1;
871     }
872     }
873     }
874     }
875     }
876     }
877     }
878 lgao 3827
879 gross 3482 /* adjust status */
880     #pragma omp parallel for private(i)
881     for (i=0; i< my_n; ++i) {
882     if ( Status[i] == 0. ) {
883     Status[i] = -10; /* this is now a C point */
884 lgao 3827 } else if (Status[i] == 1. && w[i]<1.) {
885 gross 3482 Status[i] = -100; /* this is now a F point */
886     }
887     }
888    
889 lgao 3827 i = numUndefined;
890 gross 3482 numUndefined = Paso_Distribution_numPositives(Status, col_dist, 1 );
891 lgao 3827 if (numUndefined == i) {
892     Esys_setError(SYSTEM_ERROR, "Can NOT reduce numUndefined.");
893     return;
894     }
895    
896 gross 3482 iter++;
897 gross 3884 /* printf(" coarsening loop %d: num of undefined rows = %d \n",iter, numUndefined); */
898 lgao 3827
899 gross 3482 } /* end of while loop */
900    
901     /* map to output :*/
902     Paso_Coupler_fillOverlap(n, Status, w_coupler);
903     #pragma omp parallel for private(i)
904     for (i=0; i< n; ++i) {
905     if (Status[i] > -50.) {
906     split_marker[i]=PASO_AMG_IN_C;
907     } else {
908     split_marker[i]=PASO_AMG_IN_F;
909     }
910     }
911     /* clean up : */
912     Paso_Coupler_free(w_coupler);
913     TMPMEMFREE(random);
914     TMPMEMFREE(w);
915     TMPMEMFREE(Status);
916     TMPMEMFREE(ST_flag);
917    
918     return;
919 gross 3442 }
920 lgao 3827

  ViewVC Help
Powered by ViewVC 1.1.26