/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Annotation of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3489 - (hide annotations)
Wed Mar 30 00:46:04 2011 UTC (8 years, 5 months ago) by caltinay
File MIME type: text/plain
File size: 29983 byte(s)
Fixed a few warnings emitted by gcc-4.6.

1 gross 3193
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2010 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17 gross 3441 /* Paso: AMG preconditioner (local version) */
18 gross 3193
19     /**************************************************************/
20    
21 gross 3303 /* Author: artak@uq.edu.au, l.gross@uq.edu.au */
22 gross 3193
23     /**************************************************************/
24    
25 gross 3283 #define SHOW_TIMING FALSE
26    
27 gross 3193 #include "Paso.h"
28     #include "Preconditioner.h"
29     #include "Options.h"
30     #include "PasoUtil.h"
31     #include "UMFPACK.h"
32     #include "MKL.h"
33 gross 3458 #include<stdio.h>
34 gross 3193
35 gross 3458
36 gross 3193 /**************************************************************/
37    
38     /* free all memory used by AMG */
39    
40 gross 3441 void Paso_Preconditioner_AMG_free(Paso_Preconditioner_AMG * in) {
41 gross 3193 if (in!=NULL) {
42 gross 3441 Paso_Preconditioner_Smoother_free(in->Smoother);
43     Paso_SystemMatrix_free(in->P);
44     Paso_SystemMatrix_free(in->R);
45     Paso_SystemMatrix_free(in->A_C);
46     Paso_Preconditioner_AMG_free(in->AMG_C);
47 gross 3283 MEMFREE(in->r);
48     MEMFREE(in->x_C);
49     MEMFREE(in->b_C);
50 gross 3193
51 gross 3283 MEMFREE(in);
52 gross 3193 }
53     }
54    
55 gross 3441 index_t Paso_Preconditioner_AMG_getMaxLevel(const Paso_Preconditioner_AMG * in) {
56 gross 3402 if (in->AMG_C == NULL) {
57     return in->level;
58     } else {
59 gross 3441 return Paso_Preconditioner_AMG_getMaxLevel(in->AMG_C);
60 gross 3402 }
61     }
62 gross 3441 double Paso_Preconditioner_AMG_getCoarseLevelSparsity(const Paso_Preconditioner_AMG * in) {
63 gross 3403 if (in->AMG_C == NULL) {
64     if (in->A_C == NULL) {
65     return 1.;
66     } else {
67 gross 3442 return Paso_SystemMatrix_getSparsity(in->A_C);
68 gross 3403 }
69     } else {
70 gross 3441 return Paso_Preconditioner_AMG_getCoarseLevelSparsity(in->AMG_C);
71 gross 3403 }
72 gross 3402 }
73 gross 3441 dim_t Paso_Preconditioner_AMG_getNumCoarseUnknwons(const Paso_Preconditioner_AMG * in) {
74 gross 3402 if (in->AMG_C == NULL) {
75 gross 3403 if (in->A_C == NULL) {
76     return 0;
77     } else {
78 gross 3445 return Paso_SystemMatrix_getTotalNumRows(in->A_C);
79 gross 3403 }
80 gross 3402 } else {
81 gross 3441 return Paso_Preconditioner_AMG_getNumCoarseUnknwons(in->AMG_C);
82 gross 3402 }
83     }
84 gross 3283 /*****************************************************************
85 gross 3193
86 gross 3283 constructs AMG
87    
88     ******************************************************************/
89 gross 3441 Paso_Preconditioner_AMG* Paso_Preconditioner_AMG_alloc(Paso_SystemMatrix *A_p,dim_t level,Paso_Options* options) {
90 gross 3193
91 gross 3441 Paso_Preconditioner_AMG* out=NULL;
92 gross 3193 bool_t verbose=options->verbose;
93 gross 3445
94 gross 3482 const dim_t my_n=A_p->mainBlock->numRows;
95     const dim_t overlap_n=A_p->row_coupleBlock->numRows;
96    
97 gross 3445 const dim_t n = my_n + overlap_n;
98    
99 gross 3193 const dim_t n_block=A_p->row_block_size;
100 gross 3451 index_t* F_marker=NULL, *counter=NULL;
101 gross 3450 dim_t i;
102 gross 3283 double time0=0;
103     const double theta = options->coarsening_threshold;
104     const double tau = options->diagonal_dominance_threshold;
105 gross 3442 const double sparsity=Paso_SystemMatrix_getSparsity(A_p);
106     const dim_t total_n=Paso_SystemMatrix_getGlobalTotalNumRows(A_p);
107 plaub 3436
108 gross 3193
109     /*
110 gross 3283 is the input matrix A suitable for coarsening
111    
112 gross 3193 */
113 gross 3442 if ( (sparsity >= options->min_coarse_sparsity) ||
114     (total_n <= options->min_coarse_matrix_size) ||
115     (level > options->level_max) ) {
116 plaub 3436
117 gross 3445 if (verbose) {
118 plaub 3436 /*
119     print stopping condition:
120     - 'SPAR' = min_coarse_matrix_sparsity exceeded
121     - 'SIZE' = min_coarse_matrix_size exceeded
122     - 'LEVEL' = level_max exceeded
123     */
124 gross 3442 printf("Paso_Preconditioner: AMG: termination of coarsening by ");
125 plaub 3436
126 gross 3442 if (sparsity >= options->min_coarse_sparsity)
127 plaub 3436 printf("SPAR ");
128    
129 gross 3442 if (total_n <= options->min_coarse_matrix_size)
130 plaub 3436 printf("SIZE ");
131    
132     if (level > options->level_max)
133     printf("LEVEL ");
134    
135     printf("\n");
136    
137     printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
138 gross 3442 level, options->level_max, sparsity, options->min_coarse_sparsity, total_n, options->min_coarse_matrix_size);
139 plaub 3436
140 gross 3445 }
141 plaub 3436
142 gross 3445 return NULL;
143     } else {
144 gross 3451 /* Start Coarsening : */
145 gross 3482
146     /* this is the table for strong connections combining mainBlock, col_coupleBlock and row_coupleBlock */
147     const dim_t len_S=A_p->mainBlock->pattern->len + A_p->col_coupleBlock->pattern->len + A_p->row_coupleBlock->pattern->len;
148    
149     dim_t* degree_S=TMPMEMALLOC(n, dim_t);
150     index_t *offset_S=TMPMEMALLOC(n, index_t);
151 gross 3451 index_t *S=TMPMEMALLOC(len_S, index_t);
152 gross 3482 dim_t* degree_ST=TMPMEMALLOC(n, dim_t);
153     index_t *offset_ST=TMPMEMALLOC(n, index_t);
154     index_t *ST=TMPMEMALLOC(len_S, index_t);
155    
156    
157 gross 3440 F_marker=TMPMEMALLOC(n,index_t);
158 gross 3283 counter=TMPMEMALLOC(n,index_t);
159 gross 3445
160 gross 3482 if ( !( Esys_checkPtr(F_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree_S) || Esys_checkPtr(offset_S) || Esys_checkPtr(S)
161     || Esys_checkPtr(degree_ST) || Esys_checkPtr(offset_ST) || Esys_checkPtr(ST) ) ) {
162 gross 3458 /*
163     make sure that corresponding values in the row_coupleBlock and col_coupleBlock are identical
164     */
165 gross 3482 Paso_SystemMatrix_copyColCoupleBlock(A_p);
166     /* Paso_SystemMatrix_copyRemoteCoupleBlock(A_p, FALSE); */
167 gross 3458
168     /*
169 gross 3283 set splitting of unknows:
170 gross 3458
171 gross 3283 */
172     time0=Esys_timer();
173     if (n_block>1) {
174 gross 3445 Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree_S, offset_S, S, theta,tau);
175 gross 3283 } else {
176 gross 3445 Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, offset_S, S, theta,tau);
177 gross 3283 }
178 gross 3482 Paso_Preconditioner_AMG_transposeStrongConnections(n, degree_S, offset_S, S, n, degree_ST, offset_ST, ST);
179    
180 gross 3449 {
181     dim_t p;
182 gross 3482 for (i=0; i<n; ++i) {
183 gross 3449 printf("%d: ",i);
184     for (p=0; p<degree_S[i];++p) printf("%d ",S[offset_S[i]+p]);
185     printf("\n");
186     }
187     }
188 gross 3482 Paso_Preconditioner_AMG_CIJPCoarsening(n,my_n,F_marker,
189     degree_S, offset_S, S, degree_ST, offset_ST, ST,
190     A_p->col_coupler->connector,A_p->col_distribution);
191     Esys_setError(SYSTEM_ERROR, "AMG:DONE.");
192     return NULL;
193 gross 3449
194 gross 3442
195     /*MPI:
196 gross 3440 Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree_S, S, F_marker, options->usePanel);
197 gross 3442 */
198    
199 gross 3440 /* in BoomerAMG interpolation is used FF connectiovity is required :*/
200 gross 3442 /*MPI:
201 gross 3440 if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
202     Paso_Preconditioner_AMG_enforceFFConnectivity(n, A_p->pattern->ptr, degree_S, S, F_marker);
203 gross 3442 */
204    
205 gross 3283 options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
206 gross 3445
207     #ifdef AAAAA
208 gross 3283 if (Esys_noError() ) {
209     #pragma omp parallel for private(i) schedule(static)
210 gross 3440 for (i = 0; i < n; ++i) F_marker[i]=(F_marker[i] == PASO_AMG_IN_F);
211 gross 3283
212     /*
213     count number of unkowns to be eliminated:
214     */
215 gross 3440 n_F=Paso_Util_cumsum_maskedTrue(n,counter, F_marker);
216 gross 3283 n_C=n-n_F;
217 gross 3402 if (verbose) printf("Paso_Preconditioner: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);
218 gross 3283
219     if ( n_F == 0 ) { /* is a nasty case. a direct solver should be used, return NULL */
220     out = NULL;
221     } else {
222 gross 3441 out=MEMALLOC(1,Paso_Preconditioner_AMG);
223 gross 3323 if (! Esys_checkPtr(out)) {
224 gross 3283 out->level = level;
225     out->n = n;
226     out->n_F = n_F;
227     out->n_block = n_block;
228     out->A_C = NULL;
229     out->P = NULL;
230     out->R = NULL;
231     out->post_sweeps = options->post_sweeps;
232     out->pre_sweeps = options->pre_sweeps;
233     out->r = NULL;
234     out->x_C = NULL;
235     out->b_C = NULL;
236     out->AMG_C = NULL;
237 gross 3323 out->Smoother=NULL;
238     }
239     mask_C=TMPMEMALLOC(n,index_t);
240     rows_in_F=TMPMEMALLOC(n_F,index_t);
241     Esys_checkPtr(mask_C);
242     Esys_checkPtr(rows_in_F);
243     if ( Esys_noError() ) {
244    
245 gross 3442 out->Smoother = Paso_Preconditioner_Smoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
246 gross 3323
247 gross 3402 if (n_C != 0) {
248     /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
249 gross 3283
250 gross 3315 /* allocate helpers :*/
251     out->x_C=MEMALLOC(n_block*n_C,double);
252     out->b_C=MEMALLOC(n_block*n_C,double);
253     out->r=MEMALLOC(n_block*n,double);
254    
255     Esys_checkPtr(out->r);
256     Esys_checkPtr(out->x_C);
257     Esys_checkPtr(out->b_C);
258    
259 gross 3323 if ( Esys_noError() ) {
260     /* creates index for F:*/
261     #pragma omp parallel private(i)
262     {
263     #pragma omp for schedule(static)
264     for (i = 0; i < n; ++i) {
265 gross 3440 if (F_marker[i]) rows_in_F[counter[i]]=i;
266 gross 3323 }
267     }
268     /* create mask of C nodes with value >-1 gives new id */
269 gross 3440 i=Paso_Util_cumsum_maskedFalse(n,counter, F_marker);
270 gross 3193
271 gross 3323 #pragma omp parallel for private(i) schedule(static)
272     for (i = 0; i < n; ++i) {
273 gross 3440 if (F_marker[i]) {
274 gross 3323 mask_C[i]=-1;
275     } else {
276     mask_C[i]=counter[i];;
277     }
278 gross 3283 }
279 gross 3323 /*
280 gross 3440 get Prolongation :
281 gross 3323 */
282     time0=Esys_timer();
283 gross 3442 /*MPI:
284 gross 3440 out->P=Paso_Preconditioner_AMG_getProlongation(A_p,A_p->pattern->ptr, degree_S,S,n_C,mask_C, options->interpolation_method);
285 gross 3442 */
286 gross 3323 if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);
287 gross 3283 }
288 gross 3323 /*
289 gross 3440 construct Restriction operator as transposed of Prolongation operator:
290 gross 3283 */
291 gross 3315 if ( Esys_noError()) {
292     time0=Esys_timer();
293 gross 3442 /*MPI:
294 gross 3441 out->R=Paso_SystemMatrix_getTranspose(out->P);
295 gross 3442 */
296 gross 3441 if (SHOW_TIMING) printf("timing: level %d: Paso_SystemMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
297 gross 3315 }
298 gross 3283 /*
299 gross 3312 construct coarse level matrix:
300 gross 3283 */
301 gross 3315 if ( Esys_noError()) {
302     time0=Esys_timer();
303 gross 3442 /*MPI:
304 gross 3441 Atemp=Paso_SystemMatrix_MatrixMatrix(A_p,out->P);
305     A_C=Paso_SystemMatrix_MatrixMatrix(out->R,Atemp);
306 gross 3482 Paso_Preconditioner_AMG_setStrongConnections
307 gross 3441 Paso_SystemMatrix_free(Atemp);
308 gross 3442 */
309    
310 gross 3315 if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);
311     }
312    
313 gross 3283
314     /*
315     constructe courser level:
316    
317     */
318     if ( Esys_noError()) {
319 gross 3441 out->AMG_C=Paso_Preconditioner_AMG_alloc(A_C,level+1,options);
320 gross 3315 }
321     if ( Esys_noError()) {
322 gross 3283 if ( out->AMG_C == NULL ) {
323     out->reordering = options->reordering;
324     out->refinements = options->coarse_matrix_refinements;
325     /* no coarse level matrix has been constructed. use direct solver */
326     #ifdef MKL
327 gross 3441 out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);
328     Paso_SystemMatrix_free(A_C);
329 gross 3283 out->A_C->solver_package = PASO_MKL;
330 gross 3402 if (verbose) printf("Paso_Preconditioner: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
331 gross 3283 #else
332     #ifdef UMFPACK
333 gross 3441 out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);
334     Paso_SystemMatrix_free(A_C);
335 gross 3283 out->A_C->solver_package = PASO_UMFPACK;
336 gross 3402 if (verbose) printf("Paso_Preconditioner: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
337 gross 3283 #else
338     out->A_C=A_C;
339 gross 3442 out->A_C->solver_p=Paso_Preconditioner_Smoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);
340 gross 3283 out->A_C->solver_package = PASO_SMOOTHER;
341 gross 3402 if (verbose) printf("Paso_Preconditioner: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
342 gross 3283 #endif
343     #endif
344     } else {
345     /* finally we set some helpers for the solver step */
346     out->A_C=A_C;
347     }
348     }
349     }
350     }
351     TMPMEMFREE(mask_C);
352     TMPMEMFREE(rows_in_F);
353     }
354     }
355 gross 3445 #endif
356    
357 gross 3193 }
358     TMPMEMFREE(counter);
359 gross 3440 TMPMEMFREE(F_marker);
360     TMPMEMFREE(degree_S);
361 gross 3445 TMPMEMFREE(offset_S);
362 gross 3303 TMPMEMFREE(S);
363 gross 3482 TMPMEMFREE(degree_ST);
364     TMPMEMFREE(offset_ST);
365     TMPMEMFREE(ST);
366    
367 gross 3445 }
368 gross 3193
369 jfenwick 3259 if (Esys_noError()) {
370 gross 3193 return out;
371     } else {
372 gross 3441 Paso_Preconditioner_AMG_free(out);
373 gross 3193 return NULL;
374     }
375     }
376    
377    
378 gross 3441 void Paso_Preconditioner_AMG_solve(Paso_SystemMatrix* A, Paso_Preconditioner_AMG * amg, double * x, double * b) {
379 gross 3283 const dim_t n = amg->n * amg->n_block;
380     double time0=0;
381     const dim_t post_sweeps=amg->post_sweeps;
382     const dim_t pre_sweeps=amg->pre_sweeps;
383 gross 3193
384 gross 3283 /* presmoothing */
385     time0=Esys_timer();
386 gross 3442 Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
387 gross 3283 time0=Esys_timer()-time0;
388     if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);
389     /* end of presmoothing */
390    
391     if (amg->n_F < amg->n) { /* is there work on the coarse level? */
392     time0=Esys_timer();
393     Paso_Copy(n, amg->r, b); /* r <- b */
394 gross 3441 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
395 gross 3445 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,amg->r,0.,amg->b_C); /* b_c = R*r */
396 gross 3283 time0=Esys_timer()-time0;
397     /* coarse level solve */
398     if ( amg->AMG_C == NULL) {
399     time0=Esys_timer();
400     /* A_C is the coarsest level */
401 gross 3445 #ifdef FIXME
402 gross 3283 switch (amg->A_C->solver_package) {
403     case (PASO_MKL):
404     Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);
405     break;
406     case (PASO_UMFPACK):
407     Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);
408     break;
409     case (PASO_SMOOTHER):
410 gross 3442 Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
411 gross 3283 break;
412     }
413 gross 3445 #endif
414     Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
415 gross 3283 if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
416     } else {
417 gross 3441 Paso_Preconditioner_AMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C) */
418 gross 3283 }
419     time0=time0+Esys_timer();
420 gross 3445 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */
421 gross 3283
422     /*postsmoothing*/
423    
424     /*solve Ax=b with initial guess x */
425     time0=Esys_timer();
426 gross 3442 Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
427 gross 3283 time0=Esys_timer()-time0;
428     if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
429     /*end of postsmoothing*/
430    
431     }
432     return;
433     }
434 gross 3193
435 gross 3283 /* theta = threshold for strong connections */
436     /* tau = threshold for diagonal dominance */
437 gross 3193
438 gross 3303 /*S_i={j \in N_i; i strongly coupled to j}
439    
440     in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
441     */
442    
443 gross 3441 void Paso_Preconditioner_AMG_setStrongConnections(Paso_SystemMatrix* A,
444 gross 3445 dim_t *degree_S, index_t *offset_S, index_t *S,
445 gross 3303 const double theta, const double tau)
446 gross 3283 {
447 gross 3482
448     const dim_t my_n=A->mainBlock->numRows;
449     const dim_t overlap_n=A->row_coupleBlock->numRows;
450    
451 gross 3445 index_t iptr, i;
452 gross 3482 double *threshold_p=NULL;
453 gross 3193
454 gross 3303
455 gross 3482 threshold_p = TMPMEMALLOC(2*my_n, double);
456    
457     #pragma omp parallel for private(i,iptr) schedule(static)
458     for (i=0;i<my_n;++i) {
459    
460 gross 3445 register double max_offdiagonal = 0.;
461     register double sum_row=0;
462     register double main_row=0;
463 gross 3451 register dim_t kdeg=0;
464 caltinay 3489 register const index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
465 gross 3482
466     /* collect information for row i: */
467 gross 3283 #pragma ivdep
468 gross 3445 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
469     register index_t j=A->mainBlock->pattern->index[iptr];
470     register double fnorm=ABS(A->mainBlock->val[iptr]);
471 gross 3283
472     if( j != i) {
473     max_offdiagonal = MAX(max_offdiagonal,fnorm);
474     sum_row+=fnorm;
475     } else {
476     main_row=fnorm;
477     }
478 gross 3445
479 gross 3283 }
480 gross 3482
481 gross 3445 #pragma ivdep
482     for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
483     register double fnorm=ABS(A->col_coupleBlock->val[iptr]);
484     max_offdiagonal = MAX(max_offdiagonal,fnorm);
485     sum_row+=fnorm;
486     }
487    
488 gross 3482 /* inspect row i: */
489 gross 3451 {
490     const double threshold = theta*max_offdiagonal;
491 gross 3482 threshold_p[2*i+1]=threshold;
492 gross 3451 if (tau*main_row < sum_row) { /* no diagonal domainance */
493 gross 3482 threshold_p[2*i]=1;
494 gross 3451 #pragma ivdep
495     for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
496     register index_t j=A->mainBlock->pattern->index[iptr];
497     if(ABS(A->mainBlock->val[iptr])>threshold && i!=j) {
498     S[koffset+kdeg] = j;
499     kdeg++;
500     }
501 gross 3283 }
502 gross 3451 #pragma ivdep
503     for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
504     register index_t j=A->col_coupleBlock->pattern->index[iptr];
505     if(ABS(A->col_coupleBlock->val[iptr])>threshold) {
506     S[koffset+kdeg] = j + my_n;
507     kdeg++;
508     }
509 gross 3445 }
510 gross 3482 } else {
511     threshold_p[2*i]=-1;
512 gross 3451 }
513 gross 3440 }
514 gross 3445 offset_S[i]=koffset;
515 gross 3440 degree_S[i]=kdeg;
516 gross 3283 }
517 gross 3482 /* now we need to distribute the threshold and the diagonal dominance indicator */
518     if (A->mpi_info->size > 1) {
519    
520     const index_t koffset_0=A->mainBlock->pattern->ptr[my_n]+A->col_coupleBlock->pattern->ptr[my_n]
521     -A->mainBlock->pattern->ptr[0]-A->col_coupleBlock->pattern->ptr[0];
522    
523     double *remote_threshold=NULL;
524    
525     Paso_Coupler* threshold_coupler=Paso_Coupler_alloc(A->row_coupler->connector ,2);
526     Paso_Coupler_startCollect(threshold_coupler,threshold_p);
527     Paso_Coupler_finishCollect(threshold_coupler);
528     remote_threshold=threshold_coupler->recv_buffer;
529    
530     #pragma omp parallel for private(i,iptr) schedule(static)
531     for (i=0; i<overlap_n; i++) {
532    
533     const double threshold = remote_threshold[2*i+1];
534     register dim_t kdeg=0;
535 caltinay 3489 register const index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i];
536 gross 3482 if (remote_threshold[2*i]>0) {
537     #pragma ivdep
538     for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {
539     register index_t j=A->row_coupleBlock->pattern->index[iptr];
540     if(ABS(A->row_coupleBlock->val[iptr])>threshold) {
541     S[koffset+kdeg] = j ;
542     kdeg++;
543     }
544     }
545    
546     }
547     offset_S[i+my_n]=koffset;
548     degree_S[i+my_n]=kdeg;
549     }
550    
551     Paso_Coupler_free(threshold_coupler);
552     }
553     TMPMEMFREE(threshold_p);
554 gross 3283 }
555 gross 3193
556 gross 3283 /* theta = threshold for strong connections */
557     /* tau = threshold for diagonal dominance */
558 gross 3303 /*S_i={j \in N_i; i strongly coupled to j}
559 gross 3283
560 gross 3303 in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
561     */
562 gross 3441 void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SystemMatrix* A,
563 gross 3445 dim_t *degree_S, index_t *offset_S, index_t *S,
564 gross 3303 const double theta, const double tau)
565    
566 gross 3283 {
567 gross 3482 const dim_t n_block=A->block_size;
568     const dim_t my_n=A->mainBlock->numRows;
569     const dim_t overlap_n=A->row_coupleBlock->numRows;
570    
571 gross 3445 index_t iptr, i, bi;
572 gross 3482 double *threshold_p=NULL;
573 gross 3283
574    
575 gross 3482 threshold_p = TMPMEMALLOC(2*my_n, double);
576    
577     #pragma omp parallel private(i,iptr, bi)
578     {
579    
580     dim_t max_deg=0;
581     double *rtmp=NULL;
582 gross 3451
583 gross 3482 #pragma omp for schedule(static)
584     for (i=0;i<my_n;++i) max_deg=MAX(max_deg, A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]
585     +A->col_coupleBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i]);
586 gross 3193
587 gross 3482 rtmp=TMPMEMALLOC(max_deg, double);
588 gross 3283
589 gross 3482 #pragma omp for schedule(static)
590     for (i=0;i<my_n;++i) {
591     register double max_offdiagonal = 0.;
592     register double sum_row=0;
593     register double main_row=0;
594     register index_t rtmp_offset=-A->mainBlock->pattern->ptr[i];
595     register dim_t kdeg=0;
596 caltinay 3489 register const index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
597 gross 3193
598 gross 3482 /* collect information for row i: */
599     for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
600     register index_t j=A->mainBlock->pattern->index[iptr];
601     register double fnorm=0;
602     #pragma ivdep
603     for(bi=0;bi<n_block;++bi) {
604     register double rtmp2= A->mainBlock->val[iptr*n_block+bi];
605     fnorm+=rtmp2*rtmp2;
606 gross 3283 }
607 gross 3482 fnorm=sqrt(fnorm);
608     rtmp[iptr+rtmp_offset]=fnorm;
609    
610     if( j != i) {
611 gross 3445 max_offdiagonal = MAX(max_offdiagonal,fnorm);
612     sum_row+=fnorm;
613 gross 3482 } else {
614     main_row=fnorm;
615 gross 3445 }
616 gross 3482
617     }
618    
619     rtmp_offset+=A->mainBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i];
620     for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
621     register double fnorm=0;
622     #pragma ivdep
623     for(bi=0;bi<n_block;++bi) {
624     register double rtmp2 = A->col_coupleBlock->val[iptr*n_block+bi];
625     fnorm+=rtmp2*rtmp2;
626 gross 3283 }
627 gross 3482 fnorm=sqrt(fnorm);
628    
629     rtmp[iptr+rtmp_offset]=fnorm;
630     max_offdiagonal = MAX(max_offdiagonal,fnorm);
631     sum_row+=fnorm;
632 gross 3283 }
633 gross 3482
634    
635     /* inspect row i: */
636     {
637     const double threshold = theta*max_offdiagonal;
638     rtmp_offset=-A->mainBlock->pattern->ptr[i];
639    
640     threshold_p[2*i+1]=threshold;
641     if (tau*main_row < sum_row) { /* no diagonal domainance */
642     threshold_p[2*i]=1;
643     rtmp_offset=-A->mainBlock->pattern->ptr[i];
644     #pragma ivdep
645     for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
646     register index_t j=A->mainBlock->pattern->index[iptr];
647     if(rtmp[iptr+rtmp_offset] > threshold && i!=j) {
648     S[koffset+kdeg] = j;
649     kdeg++;
650 gross 3351 }
651 gross 3283 }
652 gross 3482 rtmp_offset+=A->mainBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i];
653     #pragma ivdep
654     for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
655     register index_t j=A->col_coupleBlock->pattern->index[iptr];
656     if( rtmp[iptr+rtmp_offset] >threshold) {
657     S[koffset+kdeg] = j + my_n;
658     kdeg++;
659 gross 3351 }
660     }
661 gross 3482 } else {
662     threshold_p[2*i]=-1;
663     }
664     }
665     offset_S[i]=koffset;
666     degree_S[i]=kdeg;
667     }
668     TMPMEMFREE(rtmp);
669     }
670     /* now we need to distribute the threshold and the diagonal dominance indicator */
671     if (A->mpi_info->size > 1) {
672    
673     const index_t koffset_0=A->mainBlock->pattern->ptr[my_n]+A->col_coupleBlock->pattern->ptr[my_n]
674     -A->mainBlock->pattern->ptr[0]-A->col_coupleBlock->pattern->ptr[0];
675    
676     double *remote_threshold=NULL;
677    
678     Paso_Coupler* threshold_coupler=Paso_Coupler_alloc(A->row_coupler->connector ,2);
679     Paso_Coupler_startCollect(threshold_coupler,threshold_p);
680     Paso_Coupler_finishCollect(threshold_coupler);
681     remote_threshold=threshold_coupler->recv_buffer;
682    
683     #pragma omp parallel for private(i,iptr) schedule(static)
684     for (i=0; i<overlap_n; i++) {
685    
686     const double threshold2 = remote_threshold[2*i+1]*remote_threshold[2*i+1];
687     register dim_t kdeg=0;
688 caltinay 3489 register const index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i];
689 gross 3482 if (remote_threshold[2*i]>0) {
690     #pragma ivdep
691     for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {
692     register index_t j=A->row_coupleBlock->pattern->index[iptr];
693     register double fnorm2=0;
694     #pragma ivdepremote_threshold[2*i]
695     for(bi=0;bi<n_block;++bi) {
696     register double rtmp2 = A->row_coupleBlock->val[iptr*n_block+bi];
697     fnorm2+=rtmp2*rtmp2;
698 gross 3351 }
699 gross 3482
700     if(fnorm2 > threshold2 ) {
701     S[koffset+kdeg] = j ;
702     kdeg++;
703 gross 3351 }
704 gross 3283 }
705 gross 3351
706 gross 3283 }
707 gross 3482 offset_S[i+my_n]=koffset;
708     degree_S[i+my_n]=kdeg;
709 gross 3283 }
710 gross 3482 Paso_Coupler_free(threshold_coupler);
711 gross 3283 }
712 gross 3482 TMPMEMFREE(threshold_p);
713 gross 3193 }
714 gross 3482 void Paso_Preconditioner_AMG_transposeStrongConnections(const dim_t n, const dim_t* degree_S, const index_t* offset_S, const index_t* S,
715     const dim_t nT, dim_t* degree_ST, index_t* offset_ST,index_t* ST)
716 gross 3440 {
717 gross 3482 index_t i, j;
718     dim_t p;
719     dim_t len=0;
720 gross 3485 #pragma omp parallel for private(i) schedule(static)
721 gross 3482 for (i=0; i<nT ;++i) {
722     degree_ST[i]=0;
723     }
724     for (i=0; i<n ;++i) {
725     for (p=0; p<degree_S[i]; ++p) degree_ST[ S[offset_S[i]+p] ]++;
726     }
727     for (i=0; i<nT ;++i) {
728     offset_ST[i]=len;
729     len+=degree_ST[i];
730     degree_ST[i]=0;
731     }
732     for (i=0; i<n ;++i) {
733     for (p=0; p<degree_S[i]; ++p) {
734     j=S[offset_S[i]+p];
735     ST[offset_ST[j]+degree_ST[j]]=i;
736     degree_ST[j]++;
737     }
738     }
739 gross 3440 }
740 gross 3442
741 gross 3482 void Paso_Preconditioner_AMG_CIJPCoarsening(const dim_t n, const dim_t my_n, index_t*split_marker,
742     const dim_t* degree_S, const index_t* offset_S, const index_t* S,
743     const dim_t* degree_ST, const index_t* offset_ST, const index_t* ST,
744     Paso_Connector* col_connector, Paso_Distribution* col_dist)
745 gross 3442 {
746 gross 3482 dim_t i, numUndefined, iter=0;
747     index_t iptr, jptr, kptr;
748     double *random=NULL, *w=NULL, *Status=NULL;
749     index_t * ST_flag=NULL;
750 gross 3445
751 gross 3482 Paso_Coupler* w_coupler=Paso_Coupler_alloc(col_connector ,1);
752    
753     w=TMPMEMALLOC(n, double);
754     Status=TMPMEMALLOC(n, double);
755     random = Paso_Distribution_createRandomVector(col_dist,1);
756     ST_flag=TMPMEMALLOC(offset_ST[n-1]+ degree_ST[n-1], index_t);
757    
758     #pragma omp parallel for private(i)
759     for (i=0; i< my_n; ++i) {
760     w[i]=degree_ST[i]+random[i];
761     if (degree_ST[i] < 1) {
762     Status[i]=-100; /* F point */
763     } else {
764     Status[i]=1; /* status undefined */
765     }
766     }
767     #pragma omp parallel for private(i, iptr)
768     for (i=0; i< n; ++i) {
769     for( iptr =0 ; iptr < degree_ST[i]; ++iptr) {
770     ST_flag[offset_ST[i]+iptr]=1;
771     }
772     }
773    
774 gross 3445
775 gross 3482 numUndefined = Paso_Distribution_numPositives(Status, col_dist, 1 );
776     printf(" coarsening loop start: num of undefined rows = %d \n",numUndefined);
777 gross 3449
778 gross 3482
779     iter=0;
780     while (numUndefined > 0) {
781     Paso_Coupler_fillOverlap(n, w, w_coupler);
782     {
783     int p;
784     for (p=0; p<my_n; ++p) {
785     printf(" %d : %f %f \n",p, w[p], Status[p]);
786     }
787     for (p=my_n; p<n; ++p) {
788     printf(" %d : %f \n",p, w[p]);
789     }
790    
791    
792     }
793    
794     /* calculate the maximum value of naigbours following active strong connections:
795     w2[i]=MAX(w[k]) with k in ST[i] or S[i] and (i,k) conenction is still active */
796     #pragma omp parallel for private(i, iptr)
797     for (i=0; i<my_n; ++i) {
798     if (Status[i]>0) { /* status is still undefined */
799 gross 3449
800 gross 3482 register bool_t inD=TRUE;
801     const double wi=w[i];
802    
803    
804     for( iptr =0 ; iptr < degree_S[i]; ++iptr) {
805    
806     const index_t k=S[offset_S[i]+iptr];
807     const index_t* start_p = &ST[offset_ST[k]];
808     const index_t* where_p=(index_t*)bsearch(&i, start_p, degree_ST[k], sizeof(index_t), Paso_comparIndex);
809    
810     if (ST_flag[(index_t)(where_p-start_p)]>0) {
811     printf("S: %d (%e) -> %d (%e)\n",i, wi, k, w[k]);
812     if (wi <= w[k] ) {
813     inD=FALSE;
814     break;
815     }
816     }
817    
818     }
819    
820     if (inD) {
821     for( iptr =0 ; iptr < degree_ST[i]; ++iptr) {
822     const index_t k=ST[offset_ST[i]+iptr];
823     if ( ST_flag[offset_ST[i]+iptr] > 0 ) {
824     printf("ST: %d (%e) -> %d (%e)\n",i, wi, k, w[k]);
825    
826     if (wi <= w[k] ) {
827     inD=FALSE;
828     break;
829     }
830     }
831     }
832     }
833     if (inD) {
834     Status[i]=0.; /* is in D */
835     printf("%d is in D\n",i);
836     }
837     }
838    
839 gross 3449 }
840    
841 gross 3482 Paso_Coupler_fillOverlap(n, Status, w_coupler);
842 gross 3442
843    
844 gross 3482 /* remove connection to D points :
845    
846     for each i in D:
847     for each j in S_i:
848     w[j]--
849     ST_tag[j,i]=-1
850     for each j in ST[i]:
851     ST_tag[i,j]=-1
852     for each k in ST[j]:
853     if k in ST[i]:
854     w[j]--;
855     ST_tag[j,k]=-1
856    
857     */
858     /* w is updated for local rows only */
859     {
860 gross 3485 #pragma omp parallel for private(i, jptr)
861 gross 3482 for (i=0; i< my_n; ++i) {
862 gross 3445
863 gross 3482 for (jptr=0; jptr<degree_ST[i]; ++jptr) {
864     const index_t j=ST[offset_ST[i]+jptr];
865     if ( (Status[j] == 0.) && (ST_flag[offset_ST[i]+jptr]>0) ) {
866     w[i]--;
867     printf("%d reduced by %d\n",i,j);
868     ST_flag[offset_ST[i]+jptr]=-1;
869     }
870     }
871    
872     }
873 gross 3485 #pragma omp parallel for private(i, jptr)
874 gross 3482 for (i=my_n; i< n; ++i) {
875     for (jptr=0; jptr<degree_ST[i]; ++jptr) {
876     const index_t j = ST[offset_ST[i]+jptr];
877     if ( Status[j] == 0. ) ST_flag[offset_ST[i]+jptr]=-1;
878     }
879     }
880 gross 3442
881 gross 3482
882     for (i=0; i< n; ++i) {
883     if ( Status[i] == 0. ) {
884 gross 3445
885 gross 3482 const index_t* start_p = &ST[offset_ST[i]];
886    
887     for (jptr=0; jptr<degree_ST[i]; ++jptr) {
888     const index_t j=ST[offset_ST[i]+jptr];
889     printf("check connection: %d %d\n",i,j);
890     ST_flag[offset_ST[i]+jptr]=-1;
891     for (kptr=0; kptr<degree_ST[j]; ++kptr) {
892     const index_t k=ST[offset_ST[j]+kptr];
893     printf("check connection: %d of %d\n",k,j);
894     if (NULL != bsearch(&k, start_p, degree_ST[i], sizeof(index_t), Paso_comparIndex) ) { /* k in ST[i] ? */
895     printf("found!\n");
896     if (ST_flag[offset_ST[j]+kptr] >0) {
897     if (j< my_n ) {
898     w[j]--;
899     printf("%d reduced by %d and %d \n",j, i,k);
900     }
901     ST_flag[offset_ST[j]+kptr]=-1;
902     }
903     }
904     }
905     }
906     }
907     }
908     }
909     /* adjust status */
910     #pragma omp parallel for private(i)
911     for (i=0; i< my_n; ++i) {
912     if ( Status[i] == 0. ) {
913     Status[i] = -10; /* this is now a C point */
914     } else if ( w[i]<1.) {
915     Status[i] = -100; /* this is now a F point */
916     }
917     }
918    
919    
920     numUndefined = Paso_Distribution_numPositives(Status, col_dist, 1 );
921     iter++;
922     printf(" coarsening loop %d: num of undefined rows = %d \n",iter, numUndefined);
923     } /* end of while loop */
924    
925    
926     /* map to output :*/
927     Paso_Coupler_fillOverlap(n, Status, w_coupler);
928     #pragma omp parallel for private(i)
929     for (i=0; i< n; ++i) {
930     if (Status[i] > -50.) {
931     split_marker[i]=PASO_AMG_IN_C;
932     } else {
933     split_marker[i]=PASO_AMG_IN_F;
934     }
935     }
936    
937     /* clean up : */
938     Paso_Coupler_free(w_coupler);
939     TMPMEMFREE(random);
940     TMPMEMFREE(w);
941     TMPMEMFREE(Status);
942     TMPMEMFREE(ST_flag);
943    
944     return;
945 gross 3442 }

  ViewVC Help
Powered by ViewVC 1.1.26