/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Contents of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3489 - (show annotations)
Wed Mar 30 00:46:04 2011 UTC (8 years, 6 months ago) by caltinay
File MIME type: text/plain
File size: 29983 byte(s)
Fixed a few warnings emitted by gcc-4.6.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: AMG preconditioner (local version) */
18
19 /**************************************************************/
20
21 /* Author: artak@uq.edu.au, l.gross@uq.edu.au */
22
23 /**************************************************************/
24
25 #define SHOW_TIMING FALSE
26
27 #include "Paso.h"
28 #include "Preconditioner.h"
29 #include "Options.h"
30 #include "PasoUtil.h"
31 #include "UMFPACK.h"
32 #include "MKL.h"
33 #include<stdio.h>
34
35
36 /**************************************************************/
37
38 /* free all memory used by AMG */
39
40 void Paso_Preconditioner_AMG_free(Paso_Preconditioner_AMG * in) {
41 if (in!=NULL) {
42 Paso_Preconditioner_Smoother_free(in->Smoother);
43 Paso_SystemMatrix_free(in->P);
44 Paso_SystemMatrix_free(in->R);
45 Paso_SystemMatrix_free(in->A_C);
46 Paso_Preconditioner_AMG_free(in->AMG_C);
47 MEMFREE(in->r);
48 MEMFREE(in->x_C);
49 MEMFREE(in->b_C);
50
51 MEMFREE(in);
52 }
53 }
54
55 index_t Paso_Preconditioner_AMG_getMaxLevel(const Paso_Preconditioner_AMG * in) {
56 if (in->AMG_C == NULL) {
57 return in->level;
58 } else {
59 return Paso_Preconditioner_AMG_getMaxLevel(in->AMG_C);
60 }
61 }
62 double Paso_Preconditioner_AMG_getCoarseLevelSparsity(const Paso_Preconditioner_AMG * in) {
63 if (in->AMG_C == NULL) {
64 if (in->A_C == NULL) {
65 return 1.;
66 } else {
67 return Paso_SystemMatrix_getSparsity(in->A_C);
68 }
69 } else {
70 return Paso_Preconditioner_AMG_getCoarseLevelSparsity(in->AMG_C);
71 }
72 }
73 dim_t Paso_Preconditioner_AMG_getNumCoarseUnknwons(const Paso_Preconditioner_AMG * in) {
74 if (in->AMG_C == NULL) {
75 if (in->A_C == NULL) {
76 return 0;
77 } else {
78 return Paso_SystemMatrix_getTotalNumRows(in->A_C);
79 }
80 } else {
81 return Paso_Preconditioner_AMG_getNumCoarseUnknwons(in->AMG_C);
82 }
83 }
84 /*****************************************************************
85
86 constructs AMG
87
88 ******************************************************************/
89 Paso_Preconditioner_AMG* Paso_Preconditioner_AMG_alloc(Paso_SystemMatrix *A_p,dim_t level,Paso_Options* options) {
90
91 Paso_Preconditioner_AMG* out=NULL;
92 bool_t verbose=options->verbose;
93
94 const dim_t my_n=A_p->mainBlock->numRows;
95 const dim_t overlap_n=A_p->row_coupleBlock->numRows;
96
97 const dim_t n = my_n + overlap_n;
98
99 const dim_t n_block=A_p->row_block_size;
100 index_t* F_marker=NULL, *counter=NULL;
101 dim_t i;
102 double time0=0;
103 const double theta = options->coarsening_threshold;
104 const double tau = options->diagonal_dominance_threshold;
105 const double sparsity=Paso_SystemMatrix_getSparsity(A_p);
106 const dim_t total_n=Paso_SystemMatrix_getGlobalTotalNumRows(A_p);
107
108
109 /*
110 is the input matrix A suitable for coarsening
111
112 */
113 if ( (sparsity >= options->min_coarse_sparsity) ||
114 (total_n <= options->min_coarse_matrix_size) ||
115 (level > options->level_max) ) {
116
117 if (verbose) {
118 /*
119 print stopping condition:
120 - 'SPAR' = min_coarse_matrix_sparsity exceeded
121 - 'SIZE' = min_coarse_matrix_size exceeded
122 - 'LEVEL' = level_max exceeded
123 */
124 printf("Paso_Preconditioner: AMG: termination of coarsening by ");
125
126 if (sparsity >= options->min_coarse_sparsity)
127 printf("SPAR ");
128
129 if (total_n <= options->min_coarse_matrix_size)
130 printf("SIZE ");
131
132 if (level > options->level_max)
133 printf("LEVEL ");
134
135 printf("\n");
136
137 printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
138 level, options->level_max, sparsity, options->min_coarse_sparsity, total_n, options->min_coarse_matrix_size);
139
140 }
141
142 return NULL;
143 } else {
144 /* Start Coarsening : */
145
146 /* this is the table for strong connections combining mainBlock, col_coupleBlock and row_coupleBlock */
147 const dim_t len_S=A_p->mainBlock->pattern->len + A_p->col_coupleBlock->pattern->len + A_p->row_coupleBlock->pattern->len;
148
149 dim_t* degree_S=TMPMEMALLOC(n, dim_t);
150 index_t *offset_S=TMPMEMALLOC(n, index_t);
151 index_t *S=TMPMEMALLOC(len_S, index_t);
152 dim_t* degree_ST=TMPMEMALLOC(n, dim_t);
153 index_t *offset_ST=TMPMEMALLOC(n, index_t);
154 index_t *ST=TMPMEMALLOC(len_S, index_t);
155
156
157 F_marker=TMPMEMALLOC(n,index_t);
158 counter=TMPMEMALLOC(n,index_t);
159
160 if ( !( Esys_checkPtr(F_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree_S) || Esys_checkPtr(offset_S) || Esys_checkPtr(S)
161 || Esys_checkPtr(degree_ST) || Esys_checkPtr(offset_ST) || Esys_checkPtr(ST) ) ) {
162 /*
163 make sure that corresponding values in the row_coupleBlock and col_coupleBlock are identical
164 */
165 Paso_SystemMatrix_copyColCoupleBlock(A_p);
166 /* Paso_SystemMatrix_copyRemoteCoupleBlock(A_p, FALSE); */
167
168 /*
169 set splitting of unknows:
170
171 */
172 time0=Esys_timer();
173 if (n_block>1) {
174 Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree_S, offset_S, S, theta,tau);
175 } else {
176 Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, offset_S, S, theta,tau);
177 }
178 Paso_Preconditioner_AMG_transposeStrongConnections(n, degree_S, offset_S, S, n, degree_ST, offset_ST, ST);
179
180 {
181 dim_t p;
182 for (i=0; i<n; ++i) {
183 printf("%d: ",i);
184 for (p=0; p<degree_S[i];++p) printf("%d ",S[offset_S[i]+p]);
185 printf("\n");
186 }
187 }
188 Paso_Preconditioner_AMG_CIJPCoarsening(n,my_n,F_marker,
189 degree_S, offset_S, S, degree_ST, offset_ST, ST,
190 A_p->col_coupler->connector,A_p->col_distribution);
191 Esys_setError(SYSTEM_ERROR, "AMG:DONE.");
192 return NULL;
193
194
195 /*MPI:
196 Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree_S, S, F_marker, options->usePanel);
197 */
198
199 /* in BoomerAMG interpolation is used FF connectiovity is required :*/
200 /*MPI:
201 if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
202 Paso_Preconditioner_AMG_enforceFFConnectivity(n, A_p->pattern->ptr, degree_S, S, F_marker);
203 */
204
205 options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
206
207 #ifdef AAAAA
208 if (Esys_noError() ) {
209 #pragma omp parallel for private(i) schedule(static)
210 for (i = 0; i < n; ++i) F_marker[i]=(F_marker[i] == PASO_AMG_IN_F);
211
212 /*
213 count number of unkowns to be eliminated:
214 */
215 n_F=Paso_Util_cumsum_maskedTrue(n,counter, F_marker);
216 n_C=n-n_F;
217 if (verbose) printf("Paso_Preconditioner: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);
218
219 if ( n_F == 0 ) { /* is a nasty case. a direct solver should be used, return NULL */
220 out = NULL;
221 } else {
222 out=MEMALLOC(1,Paso_Preconditioner_AMG);
223 if (! Esys_checkPtr(out)) {
224 out->level = level;
225 out->n = n;
226 out->n_F = n_F;
227 out->n_block = n_block;
228 out->A_C = NULL;
229 out->P = NULL;
230 out->R = NULL;
231 out->post_sweeps = options->post_sweeps;
232 out->pre_sweeps = options->pre_sweeps;
233 out->r = NULL;
234 out->x_C = NULL;
235 out->b_C = NULL;
236 out->AMG_C = NULL;
237 out->Smoother=NULL;
238 }
239 mask_C=TMPMEMALLOC(n,index_t);
240 rows_in_F=TMPMEMALLOC(n_F,index_t);
241 Esys_checkPtr(mask_C);
242 Esys_checkPtr(rows_in_F);
243 if ( Esys_noError() ) {
244
245 out->Smoother = Paso_Preconditioner_Smoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
246
247 if (n_C != 0) {
248 /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
249
250 /* allocate helpers :*/
251 out->x_C=MEMALLOC(n_block*n_C,double);
252 out->b_C=MEMALLOC(n_block*n_C,double);
253 out->r=MEMALLOC(n_block*n,double);
254
255 Esys_checkPtr(out->r);
256 Esys_checkPtr(out->x_C);
257 Esys_checkPtr(out->b_C);
258
259 if ( Esys_noError() ) {
260 /* creates index for F:*/
261 #pragma omp parallel private(i)
262 {
263 #pragma omp for schedule(static)
264 for (i = 0; i < n; ++i) {
265 if (F_marker[i]) rows_in_F[counter[i]]=i;
266 }
267 }
268 /* create mask of C nodes with value >-1 gives new id */
269 i=Paso_Util_cumsum_maskedFalse(n,counter, F_marker);
270
271 #pragma omp parallel for private(i) schedule(static)
272 for (i = 0; i < n; ++i) {
273 if (F_marker[i]) {
274 mask_C[i]=-1;
275 } else {
276 mask_C[i]=counter[i];;
277 }
278 }
279 /*
280 get Prolongation :
281 */
282 time0=Esys_timer();
283 /*MPI:
284 out->P=Paso_Preconditioner_AMG_getProlongation(A_p,A_p->pattern->ptr, degree_S,S,n_C,mask_C, options->interpolation_method);
285 */
286 if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);
287 }
288 /*
289 construct Restriction operator as transposed of Prolongation operator:
290 */
291 if ( Esys_noError()) {
292 time0=Esys_timer();
293 /*MPI:
294 out->R=Paso_SystemMatrix_getTranspose(out->P);
295 */
296 if (SHOW_TIMING) printf("timing: level %d: Paso_SystemMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
297 }
298 /*
299 construct coarse level matrix:
300 */
301 if ( Esys_noError()) {
302 time0=Esys_timer();
303 /*MPI:
304 Atemp=Paso_SystemMatrix_MatrixMatrix(A_p,out->P);
305 A_C=Paso_SystemMatrix_MatrixMatrix(out->R,Atemp);
306 Paso_Preconditioner_AMG_setStrongConnections
307 Paso_SystemMatrix_free(Atemp);
308 */
309
310 if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);
311 }
312
313
314 /*
315 constructe courser level:
316
317 */
318 if ( Esys_noError()) {
319 out->AMG_C=Paso_Preconditioner_AMG_alloc(A_C,level+1,options);
320 }
321 if ( Esys_noError()) {
322 if ( out->AMG_C == NULL ) {
323 out->reordering = options->reordering;
324 out->refinements = options->coarse_matrix_refinements;
325 /* no coarse level matrix has been constructed. use direct solver */
326 #ifdef MKL
327 out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);
328 Paso_SystemMatrix_free(A_C);
329 out->A_C->solver_package = PASO_MKL;
330 if (verbose) printf("Paso_Preconditioner: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
331 #else
332 #ifdef UMFPACK
333 out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);
334 Paso_SystemMatrix_free(A_C);
335 out->A_C->solver_package = PASO_UMFPACK;
336 if (verbose) printf("Paso_Preconditioner: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
337 #else
338 out->A_C=A_C;
339 out->A_C->solver_p=Paso_Preconditioner_Smoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);
340 out->A_C->solver_package = PASO_SMOOTHER;
341 if (verbose) printf("Paso_Preconditioner: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
342 #endif
343 #endif
344 } else {
345 /* finally we set some helpers for the solver step */
346 out->A_C=A_C;
347 }
348 }
349 }
350 }
351 TMPMEMFREE(mask_C);
352 TMPMEMFREE(rows_in_F);
353 }
354 }
355 #endif
356
357 }
358 TMPMEMFREE(counter);
359 TMPMEMFREE(F_marker);
360 TMPMEMFREE(degree_S);
361 TMPMEMFREE(offset_S);
362 TMPMEMFREE(S);
363 TMPMEMFREE(degree_ST);
364 TMPMEMFREE(offset_ST);
365 TMPMEMFREE(ST);
366
367 }
368
369 if (Esys_noError()) {
370 return out;
371 } else {
372 Paso_Preconditioner_AMG_free(out);
373 return NULL;
374 }
375 }
376
377
378 void Paso_Preconditioner_AMG_solve(Paso_SystemMatrix* A, Paso_Preconditioner_AMG * amg, double * x, double * b) {
379 const dim_t n = amg->n * amg->n_block;
380 double time0=0;
381 const dim_t post_sweeps=amg->post_sweeps;
382 const dim_t pre_sweeps=amg->pre_sweeps;
383
384 /* presmoothing */
385 time0=Esys_timer();
386 Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
387 time0=Esys_timer()-time0;
388 if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);
389 /* end of presmoothing */
390
391 if (amg->n_F < amg->n) { /* is there work on the coarse level? */
392 time0=Esys_timer();
393 Paso_Copy(n, amg->r, b); /* r <- b */
394 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
395 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,amg->r,0.,amg->b_C); /* b_c = R*r */
396 time0=Esys_timer()-time0;
397 /* coarse level solve */
398 if ( amg->AMG_C == NULL) {
399 time0=Esys_timer();
400 /* A_C is the coarsest level */
401 #ifdef FIXME
402 switch (amg->A_C->solver_package) {
403 case (PASO_MKL):
404 Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);
405 break;
406 case (PASO_UMFPACK):
407 Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);
408 break;
409 case (PASO_SMOOTHER):
410 Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
411 break;
412 }
413 #endif
414 Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
415 if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
416 } else {
417 Paso_Preconditioner_AMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C) */
418 }
419 time0=time0+Esys_timer();
420 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */
421
422 /*postsmoothing*/
423
424 /*solve Ax=b with initial guess x */
425 time0=Esys_timer();
426 Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
427 time0=Esys_timer()-time0;
428 if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
429 /*end of postsmoothing*/
430
431 }
432 return;
433 }
434
435 /* theta = threshold for strong connections */
436 /* tau = threshold for diagonal dominance */
437
438 /*S_i={j \in N_i; i strongly coupled to j}
439
440 in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
441 */
442
443 void Paso_Preconditioner_AMG_setStrongConnections(Paso_SystemMatrix* A,
444 dim_t *degree_S, index_t *offset_S, index_t *S,
445 const double theta, const double tau)
446 {
447
448 const dim_t my_n=A->mainBlock->numRows;
449 const dim_t overlap_n=A->row_coupleBlock->numRows;
450
451 index_t iptr, i;
452 double *threshold_p=NULL;
453
454
455 threshold_p = TMPMEMALLOC(2*my_n, double);
456
457 #pragma omp parallel for private(i,iptr) schedule(static)
458 for (i=0;i<my_n;++i) {
459
460 register double max_offdiagonal = 0.;
461 register double sum_row=0;
462 register double main_row=0;
463 register dim_t kdeg=0;
464 register const index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
465
466 /* collect information for row i: */
467 #pragma ivdep
468 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
469 register index_t j=A->mainBlock->pattern->index[iptr];
470 register double fnorm=ABS(A->mainBlock->val[iptr]);
471
472 if( j != i) {
473 max_offdiagonal = MAX(max_offdiagonal,fnorm);
474 sum_row+=fnorm;
475 } else {
476 main_row=fnorm;
477 }
478
479 }
480
481 #pragma ivdep
482 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
483 register double fnorm=ABS(A->col_coupleBlock->val[iptr]);
484 max_offdiagonal = MAX(max_offdiagonal,fnorm);
485 sum_row+=fnorm;
486 }
487
488 /* inspect row i: */
489 {
490 const double threshold = theta*max_offdiagonal;
491 threshold_p[2*i+1]=threshold;
492 if (tau*main_row < sum_row) { /* no diagonal domainance */
493 threshold_p[2*i]=1;
494 #pragma ivdep
495 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
496 register index_t j=A->mainBlock->pattern->index[iptr];
497 if(ABS(A->mainBlock->val[iptr])>threshold && i!=j) {
498 S[koffset+kdeg] = j;
499 kdeg++;
500 }
501 }
502 #pragma ivdep
503 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
504 register index_t j=A->col_coupleBlock->pattern->index[iptr];
505 if(ABS(A->col_coupleBlock->val[iptr])>threshold) {
506 S[koffset+kdeg] = j + my_n;
507 kdeg++;
508 }
509 }
510 } else {
511 threshold_p[2*i]=-1;
512 }
513 }
514 offset_S[i]=koffset;
515 degree_S[i]=kdeg;
516 }
517 /* now we need to distribute the threshold and the diagonal dominance indicator */
518 if (A->mpi_info->size > 1) {
519
520 const index_t koffset_0=A->mainBlock->pattern->ptr[my_n]+A->col_coupleBlock->pattern->ptr[my_n]
521 -A->mainBlock->pattern->ptr[0]-A->col_coupleBlock->pattern->ptr[0];
522
523 double *remote_threshold=NULL;
524
525 Paso_Coupler* threshold_coupler=Paso_Coupler_alloc(A->row_coupler->connector ,2);
526 Paso_Coupler_startCollect(threshold_coupler,threshold_p);
527 Paso_Coupler_finishCollect(threshold_coupler);
528 remote_threshold=threshold_coupler->recv_buffer;
529
530 #pragma omp parallel for private(i,iptr) schedule(static)
531 for (i=0; i<overlap_n; i++) {
532
533 const double threshold = remote_threshold[2*i+1];
534 register dim_t kdeg=0;
535 register const index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i];
536 if (remote_threshold[2*i]>0) {
537 #pragma ivdep
538 for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {
539 register index_t j=A->row_coupleBlock->pattern->index[iptr];
540 if(ABS(A->row_coupleBlock->val[iptr])>threshold) {
541 S[koffset+kdeg] = j ;
542 kdeg++;
543 }
544 }
545
546 }
547 offset_S[i+my_n]=koffset;
548 degree_S[i+my_n]=kdeg;
549 }
550
551 Paso_Coupler_free(threshold_coupler);
552 }
553 TMPMEMFREE(threshold_p);
554 }
555
556 /* theta = threshold for strong connections */
557 /* tau = threshold for diagonal dominance */
558 /*S_i={j \in N_i; i strongly coupled to j}
559
560 in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
561 */
562 void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SystemMatrix* A,
563 dim_t *degree_S, index_t *offset_S, index_t *S,
564 const double theta, const double tau)
565
566 {
567 const dim_t n_block=A->block_size;
568 const dim_t my_n=A->mainBlock->numRows;
569 const dim_t overlap_n=A->row_coupleBlock->numRows;
570
571 index_t iptr, i, bi;
572 double *threshold_p=NULL;
573
574
575 threshold_p = TMPMEMALLOC(2*my_n, double);
576
577 #pragma omp parallel private(i,iptr, bi)
578 {
579
580 dim_t max_deg=0;
581 double *rtmp=NULL;
582
583 #pragma omp for schedule(static)
584 for (i=0;i<my_n;++i) max_deg=MAX(max_deg, A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]
585 +A->col_coupleBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i]);
586
587 rtmp=TMPMEMALLOC(max_deg, double);
588
589 #pragma omp for schedule(static)
590 for (i=0;i<my_n;++i) {
591 register double max_offdiagonal = 0.;
592 register double sum_row=0;
593 register double main_row=0;
594 register index_t rtmp_offset=-A->mainBlock->pattern->ptr[i];
595 register dim_t kdeg=0;
596 register const index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
597
598 /* collect information for row i: */
599 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
600 register index_t j=A->mainBlock->pattern->index[iptr];
601 register double fnorm=0;
602 #pragma ivdep
603 for(bi=0;bi<n_block;++bi) {
604 register double rtmp2= A->mainBlock->val[iptr*n_block+bi];
605 fnorm+=rtmp2*rtmp2;
606 }
607 fnorm=sqrt(fnorm);
608 rtmp[iptr+rtmp_offset]=fnorm;
609
610 if( j != i) {
611 max_offdiagonal = MAX(max_offdiagonal,fnorm);
612 sum_row+=fnorm;
613 } else {
614 main_row=fnorm;
615 }
616
617 }
618
619 rtmp_offset+=A->mainBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i];
620 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
621 register double fnorm=0;
622 #pragma ivdep
623 for(bi=0;bi<n_block;++bi) {
624 register double rtmp2 = A->col_coupleBlock->val[iptr*n_block+bi];
625 fnorm+=rtmp2*rtmp2;
626 }
627 fnorm=sqrt(fnorm);
628
629 rtmp[iptr+rtmp_offset]=fnorm;
630 max_offdiagonal = MAX(max_offdiagonal,fnorm);
631 sum_row+=fnorm;
632 }
633
634
635 /* inspect row i: */
636 {
637 const double threshold = theta*max_offdiagonal;
638 rtmp_offset=-A->mainBlock->pattern->ptr[i];
639
640 threshold_p[2*i+1]=threshold;
641 if (tau*main_row < sum_row) { /* no diagonal domainance */
642 threshold_p[2*i]=1;
643 rtmp_offset=-A->mainBlock->pattern->ptr[i];
644 #pragma ivdep
645 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
646 register index_t j=A->mainBlock->pattern->index[iptr];
647 if(rtmp[iptr+rtmp_offset] > threshold && i!=j) {
648 S[koffset+kdeg] = j;
649 kdeg++;
650 }
651 }
652 rtmp_offset+=A->mainBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i];
653 #pragma ivdep
654 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
655 register index_t j=A->col_coupleBlock->pattern->index[iptr];
656 if( rtmp[iptr+rtmp_offset] >threshold) {
657 S[koffset+kdeg] = j + my_n;
658 kdeg++;
659 }
660 }
661 } else {
662 threshold_p[2*i]=-1;
663 }
664 }
665 offset_S[i]=koffset;
666 degree_S[i]=kdeg;
667 }
668 TMPMEMFREE(rtmp);
669 }
670 /* now we need to distribute the threshold and the diagonal dominance indicator */
671 if (A->mpi_info->size > 1) {
672
673 const index_t koffset_0=A->mainBlock->pattern->ptr[my_n]+A->col_coupleBlock->pattern->ptr[my_n]
674 -A->mainBlock->pattern->ptr[0]-A->col_coupleBlock->pattern->ptr[0];
675
676 double *remote_threshold=NULL;
677
678 Paso_Coupler* threshold_coupler=Paso_Coupler_alloc(A->row_coupler->connector ,2);
679 Paso_Coupler_startCollect(threshold_coupler,threshold_p);
680 Paso_Coupler_finishCollect(threshold_coupler);
681 remote_threshold=threshold_coupler->recv_buffer;
682
683 #pragma omp parallel for private(i,iptr) schedule(static)
684 for (i=0; i<overlap_n; i++) {
685
686 const double threshold2 = remote_threshold[2*i+1]*remote_threshold[2*i+1];
687 register dim_t kdeg=0;
688 register const index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i];
689 if (remote_threshold[2*i]>0) {
690 #pragma ivdep
691 for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {
692 register index_t j=A->row_coupleBlock->pattern->index[iptr];
693 register double fnorm2=0;
694 #pragma ivdepremote_threshold[2*i]
695 for(bi=0;bi<n_block;++bi) {
696 register double rtmp2 = A->row_coupleBlock->val[iptr*n_block+bi];
697 fnorm2+=rtmp2*rtmp2;
698 }
699
700 if(fnorm2 > threshold2 ) {
701 S[koffset+kdeg] = j ;
702 kdeg++;
703 }
704 }
705
706 }
707 offset_S[i+my_n]=koffset;
708 degree_S[i+my_n]=kdeg;
709 }
710 Paso_Coupler_free(threshold_coupler);
711 }
712 TMPMEMFREE(threshold_p);
713 }
714 void Paso_Preconditioner_AMG_transposeStrongConnections(const dim_t n, const dim_t* degree_S, const index_t* offset_S, const index_t* S,
715 const dim_t nT, dim_t* degree_ST, index_t* offset_ST,index_t* ST)
716 {
717 index_t i, j;
718 dim_t p;
719 dim_t len=0;
720 #pragma omp parallel for private(i) schedule(static)
721 for (i=0; i<nT ;++i) {
722 degree_ST[i]=0;
723 }
724 for (i=0; i<n ;++i) {
725 for (p=0; p<degree_S[i]; ++p) degree_ST[ S[offset_S[i]+p] ]++;
726 }
727 for (i=0; i<nT ;++i) {
728 offset_ST[i]=len;
729 len+=degree_ST[i];
730 degree_ST[i]=0;
731 }
732 for (i=0; i<n ;++i) {
733 for (p=0; p<degree_S[i]; ++p) {
734 j=S[offset_S[i]+p];
735 ST[offset_ST[j]+degree_ST[j]]=i;
736 degree_ST[j]++;
737 }
738 }
739 }
740
741 void Paso_Preconditioner_AMG_CIJPCoarsening(const dim_t n, const dim_t my_n, index_t*split_marker,
742 const dim_t* degree_S, const index_t* offset_S, const index_t* S,
743 const dim_t* degree_ST, const index_t* offset_ST, const index_t* ST,
744 Paso_Connector* col_connector, Paso_Distribution* col_dist)
745 {
746 dim_t i, numUndefined, iter=0;
747 index_t iptr, jptr, kptr;
748 double *random=NULL, *w=NULL, *Status=NULL;
749 index_t * ST_flag=NULL;
750
751 Paso_Coupler* w_coupler=Paso_Coupler_alloc(col_connector ,1);
752
753 w=TMPMEMALLOC(n, double);
754 Status=TMPMEMALLOC(n, double);
755 random = Paso_Distribution_createRandomVector(col_dist,1);
756 ST_flag=TMPMEMALLOC(offset_ST[n-1]+ degree_ST[n-1], index_t);
757
758 #pragma omp parallel for private(i)
759 for (i=0; i< my_n; ++i) {
760 w[i]=degree_ST[i]+random[i];
761 if (degree_ST[i] < 1) {
762 Status[i]=-100; /* F point */
763 } else {
764 Status[i]=1; /* status undefined */
765 }
766 }
767 #pragma omp parallel for private(i, iptr)
768 for (i=0; i< n; ++i) {
769 for( iptr =0 ; iptr < degree_ST[i]; ++iptr) {
770 ST_flag[offset_ST[i]+iptr]=1;
771 }
772 }
773
774
775 numUndefined = Paso_Distribution_numPositives(Status, col_dist, 1 );
776 printf(" coarsening loop start: num of undefined rows = %d \n",numUndefined);
777
778
779 iter=0;
780 while (numUndefined > 0) {
781 Paso_Coupler_fillOverlap(n, w, w_coupler);
782 {
783 int p;
784 for (p=0; p<my_n; ++p) {
785 printf(" %d : %f %f \n",p, w[p], Status[p]);
786 }
787 for (p=my_n; p<n; ++p) {
788 printf(" %d : %f \n",p, w[p]);
789 }
790
791
792 }
793
794 /* calculate the maximum value of naigbours following active strong connections:
795 w2[i]=MAX(w[k]) with k in ST[i] or S[i] and (i,k) conenction is still active */
796 #pragma omp parallel for private(i, iptr)
797 for (i=0; i<my_n; ++i) {
798 if (Status[i]>0) { /* status is still undefined */
799
800 register bool_t inD=TRUE;
801 const double wi=w[i];
802
803
804 for( iptr =0 ; iptr < degree_S[i]; ++iptr) {
805
806 const index_t k=S[offset_S[i]+iptr];
807 const index_t* start_p = &ST[offset_ST[k]];
808 const index_t* where_p=(index_t*)bsearch(&i, start_p, degree_ST[k], sizeof(index_t), Paso_comparIndex);
809
810 if (ST_flag[(index_t)(where_p-start_p)]>0) {
811 printf("S: %d (%e) -> %d (%e)\n",i, wi, k, w[k]);
812 if (wi <= w[k] ) {
813 inD=FALSE;
814 break;
815 }
816 }
817
818 }
819
820 if (inD) {
821 for( iptr =0 ; iptr < degree_ST[i]; ++iptr) {
822 const index_t k=ST[offset_ST[i]+iptr];
823 if ( ST_flag[offset_ST[i]+iptr] > 0 ) {
824 printf("ST: %d (%e) -> %d (%e)\n",i, wi, k, w[k]);
825
826 if (wi <= w[k] ) {
827 inD=FALSE;
828 break;
829 }
830 }
831 }
832 }
833 if (inD) {
834 Status[i]=0.; /* is in D */
835 printf("%d is in D\n",i);
836 }
837 }
838
839 }
840
841 Paso_Coupler_fillOverlap(n, Status, w_coupler);
842
843
844 /* remove connection to D points :
845
846 for each i in D:
847 for each j in S_i:
848 w[j]--
849 ST_tag[j,i]=-1
850 for each j in ST[i]:
851 ST_tag[i,j]=-1
852 for each k in ST[j]:
853 if k in ST[i]:
854 w[j]--;
855 ST_tag[j,k]=-1
856
857 */
858 /* w is updated for local rows only */
859 {
860 #pragma omp parallel for private(i, jptr)
861 for (i=0; i< my_n; ++i) {
862
863 for (jptr=0; jptr<degree_ST[i]; ++jptr) {
864 const index_t j=ST[offset_ST[i]+jptr];
865 if ( (Status[j] == 0.) && (ST_flag[offset_ST[i]+jptr]>0) ) {
866 w[i]--;
867 printf("%d reduced by %d\n",i,j);
868 ST_flag[offset_ST[i]+jptr]=-1;
869 }
870 }
871
872 }
873 #pragma omp parallel for private(i, jptr)
874 for (i=my_n; i< n; ++i) {
875 for (jptr=0; jptr<degree_ST[i]; ++jptr) {
876 const index_t j = ST[offset_ST[i]+jptr];
877 if ( Status[j] == 0. ) ST_flag[offset_ST[i]+jptr]=-1;
878 }
879 }
880
881
882 for (i=0; i< n; ++i) {
883 if ( Status[i] == 0. ) {
884
885 const index_t* start_p = &ST[offset_ST[i]];
886
887 for (jptr=0; jptr<degree_ST[i]; ++jptr) {
888 const index_t j=ST[offset_ST[i]+jptr];
889 printf("check connection: %d %d\n",i,j);
890 ST_flag[offset_ST[i]+jptr]=-1;
891 for (kptr=0; kptr<degree_ST[j]; ++kptr) {
892 const index_t k=ST[offset_ST[j]+kptr];
893 printf("check connection: %d of %d\n",k,j);
894 if (NULL != bsearch(&k, start_p, degree_ST[i], sizeof(index_t), Paso_comparIndex) ) { /* k in ST[i] ? */
895 printf("found!\n");
896 if (ST_flag[offset_ST[j]+kptr] >0) {
897 if (j< my_n ) {
898 w[j]--;
899 printf("%d reduced by %d and %d \n",j, i,k);
900 }
901 ST_flag[offset_ST[j]+kptr]=-1;
902 }
903 }
904 }
905 }
906 }
907 }
908 }
909 /* adjust status */
910 #pragma omp parallel for private(i)
911 for (i=0; i< my_n; ++i) {
912 if ( Status[i] == 0. ) {
913 Status[i] = -10; /* this is now a C point */
914 } else if ( w[i]<1.) {
915 Status[i] = -100; /* this is now a F point */
916 }
917 }
918
919
920 numUndefined = Paso_Distribution_numPositives(Status, col_dist, 1 );
921 iter++;
922 printf(" coarsening loop %d: num of undefined rows = %d \n",iter, numUndefined);
923 } /* end of while loop */
924
925
926 /* map to output :*/
927 Paso_Coupler_fillOverlap(n, Status, w_coupler);
928 #pragma omp parallel for private(i)
929 for (i=0; i< n; ++i) {
930 if (Status[i] > -50.) {
931 split_marker[i]=PASO_AMG_IN_C;
932 } else {
933 split_marker[i]=PASO_AMG_IN_F;
934 }
935 }
936
937 /* clean up : */
938 Paso_Coupler_free(w_coupler);
939 TMPMEMFREE(random);
940 TMPMEMFREE(w);
941 TMPMEMFREE(Status);
942 TMPMEMFREE(ST_flag);
943
944 return;
945 }

  ViewVC Help
Powered by ViewVC 1.1.26