/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Contents of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3442 - (show annotations)
Tue Jan 18 01:47:36 2011 UTC (8 years, 8 months ago) by gross
File MIME type: text/plain
File size: 23614 byte(s)
 emeregncy
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: AMG preconditioner (local version) */
18
19 /**************************************************************/
20
21 /* Author: artak@uq.edu.au, l.gross@uq.edu.au */
22
23 /**************************************************************/
24
25 #define SHOW_TIMING FALSE
26
27 #include "Paso.h"
28 #include "Preconditioner.h"
29 #include "Options.h"
30 #include "PasoUtil.h"
31 #include "UMFPACK.h"
32 #include "MKL.h"
33
34 /**************************************************************/
35
36 /* free all memory used by AMG */
37
38 void Paso_Preconditioner_AMG_free(Paso_Preconditioner_AMG * in) {
39 if (in!=NULL) {
40 Paso_Preconditioner_Smoother_free(in->Smoother);
41 Paso_SystemMatrix_free(in->P);
42 Paso_SystemMatrix_free(in->R);
43 Paso_SystemMatrix_free(in->A_C);
44 Paso_Preconditioner_AMG_free(in->AMG_C);
45 MEMFREE(in->r);
46 MEMFREE(in->x_C);
47 MEMFREE(in->b_C);
48
49 MEMFREE(in);
50 }
51 }
52
53 index_t Paso_Preconditioner_AMG_getMaxLevel(const Paso_Preconditioner_AMG * in) {
54 if (in->AMG_C == NULL) {
55 return in->level;
56 } else {
57 return Paso_Preconditioner_AMG_getMaxLevel(in->AMG_C);
58 }
59 }
60 double Paso_Preconditioner_AMG_getCoarseLevelSparsity(const Paso_Preconditioner_AMG * in) {
61 if (in->AMG_C == NULL) {
62 if (in->A_C == NULL) {
63 return 1.;
64 } else {
65 return Paso_SystemMatrix_getSparsity(in->A_C);
66 }
67 } else {
68 return Paso_Preconditioner_AMG_getCoarseLevelSparsity(in->AMG_C);
69 }
70 }
71 dim_t Paso_Preconditioner_AMG_getNumCoarseUnknwons(const Paso_Preconditioner_AMG * in) {
72 if (in->AMG_C == NULL) {
73 if (in->A_C == NULL) {
74 return 0;
75 } else {
76 Paso_SystemMatrix_getTotalNumRows(in->A_C);
77 }
78 } else {
79 return Paso_Preconditioner_AMG_getNumCoarseUnknwons(in->AMG_C);
80 }
81 }
82 /*****************************************************************
83
84 constructs AMG
85
86 ******************************************************************/
87 Paso_Preconditioner_AMG* Paso_Preconditioner_AMG_alloc(Paso_SystemMatrix *A_p,dim_t level,Paso_Options* options) {
88
89 Paso_Preconditioner_AMG* out=NULL;
90 bool_t verbose=options->verbose;
91
92 Paso_SystemMatrix *Atemp=NULL, *A_C=NULL;
93 const dim_t n=A_p->numRows;
94 const dim_t n_block=A_p->row_block_size;
95 index_t* F_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F=NULL, *S=NULL, *degree_S=NULL;
96 dim_t n_F=0, n_C=0, i;
97 double time0=0;
98 const double theta = options->coarsening_threshold;
99 const double tau = options->diagonal_dominance_threshold;
100 const double sparsity=Paso_SystemMatrix_getSparsity(A_p);
101 const dim_t total_n=Paso_SystemMatrix_getGlobalTotalNumRows(A_p);
102
103
104 /*
105 is the input matrix A suitable for coarsening
106
107 */
108 if ( (sparsity >= options->min_coarse_sparsity) ||
109 (total_n <= options->min_coarse_matrix_size) ||
110 (level > options->level_max) ) {
111
112 if (verbose) {
113 /*
114 print stopping condition:
115 - 'SPAR' = min_coarse_matrix_sparsity exceeded
116 - 'SIZE' = min_coarse_matrix_size exceeded
117 - 'LEVEL' = level_max exceeded
118 */
119 printf("Paso_Preconditioner: AMG: termination of coarsening by ");
120
121 if (sparsity >= options->min_coarse_sparsity)
122 printf("SPAR ");
123
124 if (total_n <= options->min_coarse_matrix_size)
125 printf("SIZE ");
126
127 if (level > options->level_max)
128 printf("LEVEL ");
129
130 printf("\n");
131
132 printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
133 level, options->level_max, sparsity, options->min_coarse_sparsity, total_n, options->min_coarse_matrix_size);
134
135 }
136
137 return NULL;
138 }
139 /* Start Coarsening : */
140
141 F_marker=TMPMEMALLOC(n,index_t);
142 counter=TMPMEMALLOC(n,index_t);
143 degree_S=TMPMEMALLOC(n, dim_t);
144 S=TMPMEMALLOC(A_p->pattern->len, index_t);
145 if ( !( Esys_checkPtr(F_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree_S) || Esys_checkPtr(S) ) ) {
146 /*
147 set splitting of unknows:
148
149 */
150 time0=Esys_timer();
151 if (n_block>1) {
152 Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree_S, S, theta,tau);
153 } else {
154 Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, S, theta,tau);
155 }
156
157 /*MPI:
158 Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree_S, S, F_marker, options->usePanel);
159 */
160
161 /* in BoomerAMG interpolation is used FF connectiovity is required :*/
162 /*MPI:
163 if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
164 Paso_Preconditioner_AMG_enforceFFConnectivity(n, A_p->pattern->ptr, degree_S, S, F_marker);
165 */
166
167 options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
168
169 if (Esys_noError() ) {
170 #pragma omp parallel for private(i) schedule(static)
171 for (i = 0; i < n; ++i) F_marker[i]=(F_marker[i] == PASO_AMG_IN_F);
172
173 /*
174 count number of unkowns to be eliminated:
175 */
176 n_F=Paso_Util_cumsum_maskedTrue(n,counter, F_marker);
177 n_C=n-n_F;
178 if (verbose) printf("Paso_Preconditioner: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);
179
180 if ( n_F == 0 ) { /* is a nasty case. a direct solver should be used, return NULL */
181 out = NULL;
182 } else {
183 out=MEMALLOC(1,Paso_Preconditioner_AMG);
184 if (! Esys_checkPtr(out)) {
185 out->level = level;
186 out->n = n;
187 out->n_F = n_F;
188 out->n_block = n_block;
189 out->A_C = NULL;
190 out->P = NULL;
191 out->R = NULL;
192 out->post_sweeps = options->post_sweeps;
193 out->pre_sweeps = options->pre_sweeps;
194 out->r = NULL;
195 out->x_C = NULL;
196 out->b_C = NULL;
197 out->AMG_C = NULL;
198 out->Smoother=NULL;
199 }
200 mask_C=TMPMEMALLOC(n,index_t);
201 rows_in_F=TMPMEMALLOC(n_F,index_t);
202 Esys_checkPtr(mask_C);
203 Esys_checkPtr(rows_in_F);
204 if ( Esys_noError() ) {
205
206 out->Smoother = Paso_Preconditioner_Smoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
207
208 if (n_C != 0) {
209 /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
210
211 /* allocate helpers :*/
212 out->x_C=MEMALLOC(n_block*n_C,double);
213 out->b_C=MEMALLOC(n_block*n_C,double);
214 out->r=MEMALLOC(n_block*n,double);
215
216 Esys_checkPtr(out->r);
217 Esys_checkPtr(out->x_C);
218 Esys_checkPtr(out->b_C);
219
220 if ( Esys_noError() ) {
221 /* creates index for F:*/
222 #pragma omp parallel private(i)
223 {
224 #pragma omp for schedule(static)
225 for (i = 0; i < n; ++i) {
226 if (F_marker[i]) rows_in_F[counter[i]]=i;
227 }
228 }
229 /* create mask of C nodes with value >-1 gives new id */
230 i=Paso_Util_cumsum_maskedFalse(n,counter, F_marker);
231
232 #pragma omp parallel for private(i) schedule(static)
233 for (i = 0; i < n; ++i) {
234 if (F_marker[i]) {
235 mask_C[i]=-1;
236 } else {
237 mask_C[i]=counter[i];;
238 }
239 }
240 /*
241 get Prolongation :
242 */
243 time0=Esys_timer();
244 /*MPI:
245 out->P=Paso_Preconditioner_AMG_getProlongation(A_p,A_p->pattern->ptr, degree_S,S,n_C,mask_C, options->interpolation_method);
246 */
247 if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);
248 }
249 /*
250 construct Restriction operator as transposed of Prolongation operator:
251 */
252 if ( Esys_noError()) {
253 time0=Esys_timer();
254 /*MPI:
255 out->R=Paso_SystemMatrix_getTranspose(out->P);
256 */
257 if (SHOW_TIMING) printf("timing: level %d: Paso_SystemMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
258 }
259 /*
260 construct coarse level matrix:
261 */
262 if ( Esys_noError()) {
263 time0=Esys_timer();
264 /*MPI:
265 Atemp=Paso_SystemMatrix_MatrixMatrix(A_p,out->P);
266 A_C=Paso_SystemMatrix_MatrixMatrix(out->R,Atemp);
267
268 Paso_SystemMatrix_free(Atemp);
269 */
270
271 if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);
272 }
273
274
275 /*
276 constructe courser level:
277
278 */
279 if ( Esys_noError()) {
280 out->AMG_C=Paso_Preconditioner_AMG_alloc(A_C,level+1,options);
281 }
282 if ( Esys_noError()) {
283 if ( out->AMG_C == NULL ) {
284 out->reordering = options->reordering;
285 out->refinements = options->coarse_matrix_refinements;
286 /* no coarse level matrix has been constructed. use direct solver */
287 #ifdef MKL
288 out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);
289 Paso_SystemMatrix_free(A_C);
290 out->A_C->solver_package = PASO_MKL;
291 if (verbose) printf("Paso_Preconditioner: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
292 #else
293 #ifdef UMFPACK
294 out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);
295 Paso_SystemMatrix_free(A_C);
296 out->A_C->solver_package = PASO_UMFPACK;
297 if (verbose) printf("Paso_Preconditioner: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
298 #else
299 out->A_C=A_C;
300 out->A_C->solver_p=Paso_Preconditioner_Smoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);
301 out->A_C->solver_package = PASO_SMOOTHER;
302 if (verbose) printf("Paso_Preconditioner: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
303 #endif
304 #endif
305 } else {
306 /* finally we set some helpers for the solver step */
307 out->A_C=A_C;
308 }
309 }
310 }
311 }
312 TMPMEMFREE(mask_C);
313 TMPMEMFREE(rows_in_F);
314 }
315 }
316 }
317 TMPMEMFREE(counter);
318 TMPMEMFREE(F_marker);
319 TMPMEMFREE(degree_S);
320 TMPMEMFREE(S);
321
322 if (Esys_noError()) {
323 return out;
324 } else {
325 Paso_Preconditioner_AMG_free(out);
326 return NULL;
327 }
328 }
329
330
331 void Paso_Preconditioner_AMG_solve(Paso_SystemMatrix* A, Paso_Preconditioner_AMG * amg, double * x, double * b) {
332 const dim_t n = amg->n * amg->n_block;
333 double time0=0;
334 const dim_t post_sweeps=amg->post_sweeps;
335 const dim_t pre_sweeps=amg->pre_sweeps;
336
337 /* presmoothing */
338 time0=Esys_timer();
339 Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
340 time0=Esys_timer()-time0;
341 if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);
342 /* end of presmoothing */
343
344 if (amg->n_F < amg->n) { /* is there work on the coarse level? */
345 time0=Esys_timer();
346 Paso_Copy(n, amg->r, b); /* r <- b */
347 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
348 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->R,amg->r,0.,amg->b_C); /* b_c = R*r */
349 time0=Esys_timer()-time0;
350 /* coarse level solve */
351 if ( amg->AMG_C == NULL) {
352 time0=Esys_timer();
353 /* A_C is the coarsest level */
354 switch (amg->A_C->solver_package) {
355 case (PASO_MKL):
356 Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);
357 break;
358 case (PASO_UMFPACK):
359 Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);
360 break;
361 case (PASO_SMOOTHER):
362 Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
363 break;
364 }
365 if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
366 } else {
367 Paso_Preconditioner_AMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C) */
368 }
369 time0=time0+Esys_timer();
370 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */
371
372 /*postsmoothing*/
373
374 /*solve Ax=b with initial guess x */
375 time0=Esys_timer();
376 Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
377 time0=Esys_timer()-time0;
378 if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
379 /*end of postsmoothing*/
380
381 }
382 return;
383 }
384
385 /* theta = threshold for strong connections */
386 /* tau = threshold for diagonal dominance */
387
388 /*S_i={j \in N_i; i strongly coupled to j}
389
390 in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
391 */
392
393 void Paso_Preconditioner_AMG_setStrongConnections(Paso_SystemMatrix* A,
394 dim_t *degree_S, index_t *S,
395 const double theta, const double tau)
396 {
397 const dim_t n=A->numRows;
398 index_t iptr, i,j;
399 dim_t kdeg;
400 double max_offdiagonal, threshold, sum_row, main_row, fnorm;
401
402
403 #pragma omp parallel for private(i,iptr,max_offdiagonal, threshold,j, kdeg, sum_row, main_row, fnorm) schedule(static)
404 for (i=0;i<n;++i) {
405 max_offdiagonal = 0.;
406 sum_row=0;
407 main_row=0;
408 #pragma ivdep
409 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
410 j=A->pattern->index[iptr];
411 fnorm=ABS(A->val[iptr]);
412
413 if( j != i) {
414 max_offdiagonal = MAX(max_offdiagonal,fnorm);
415 sum_row+=fnorm;
416 } else {
417 main_row=fnorm;
418 }
419 }
420 threshold = theta*max_offdiagonal;
421 kdeg=0;
422
423 if (tau*main_row < sum_row) { /* no diagonal domainance */
424 #pragma ivdep
425 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
426 j=A->pattern->index[iptr];
427 if(ABS(A->val[iptr])>threshold && i!=j) {
428 S[A->pattern->ptr[i]+kdeg] = j;
429 kdeg++;
430 }
431 }
432 }
433 degree_S[i]=kdeg;
434 }
435
436 }
437
438 /* theta = threshold for strong connections */
439 /* tau = threshold for diagonal dominance */
440 /*S_i={j \in N_i; i strongly coupled to j}
441
442 in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
443 */
444 void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SystemMatrix* A,
445 dim_t *degree_S, index_t *S,
446 const double theta, const double tau)
447
448 {
449 const dim_t n_block=A->row_block_size;
450 const dim_t n=A->numRows;
451 index_t iptr, i,j, bi;
452 dim_t kdeg, max_deg;
453 register double max_offdiagonal, threshold, fnorm, sum_row, main_row;
454 double *rtmp;
455
456
457 #pragma omp parallel private(i,iptr,max_offdiagonal, kdeg, threshold,j, max_deg, fnorm, sum_row, main_row, rtmp)
458 {
459 max_deg=0;
460 #pragma omp for schedule(static)
461 for (i=0;i<n;++i) max_deg=MAX(max_deg, A->pattern->ptr[i+1]-A->pattern->ptr[i]);
462
463 rtmp=TMPMEMALLOC(max_deg, double);
464
465 #pragma omp for schedule(static)
466 for (i=0;i<n;++i) {
467
468 max_offdiagonal = 0.;
469 sum_row=0;
470 main_row=0;
471
472 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
473 j=A->pattern->index[iptr];
474 fnorm=0;
475 #pragma ivdep
476 for(bi=0;bi<n_block*n_block;++bi) fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi];
477 fnorm=sqrt(fnorm);
478 rtmp[iptr-A->pattern->ptr[i]]=fnorm;
479 if( j != i) {
480 max_offdiagonal = MAX(max_offdiagonal,fnorm);
481 sum_row+=fnorm;
482 } else {
483 main_row=fnorm;
484 }
485 }
486 threshold = theta*max_offdiagonal;
487
488 kdeg=0;
489 if (tau*main_row < sum_row) { /* no diagonal domainance */
490 #pragma ivdep
491 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
492 j=A->pattern->index[iptr];
493 if(rtmp[iptr-A->pattern->ptr[i]] > threshold && i!=j) {
494 S[A->pattern->ptr[i]+kdeg] = j;
495 kdeg++;
496 }
497 }
498 }
499 degree_S[i]=kdeg;
500 }
501 TMPMEMFREE(rtmp);
502 } /* end of parallel region */
503
504 }
505
506 /* the runge stueben coarsening algorithm: */
507 void Paso_Preconditioner_AMG_RungeStuebenSearch(const dim_t n, const index_t* offset_S,
508 const dim_t* degree_S, const index_t* S,
509 index_t*split_marker, const bool_t usePanel)
510 {
511
512 index_t *lambda=NULL, *ST=NULL, *notInPanel=NULL, *panel=NULL, lambda_max, lambda_k;
513 dim_t i,k, p, q, *degree_ST=NULL, len_panel, len_panel_new;
514 register index_t j, itmp;
515
516 if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */
517
518 lambda=TMPMEMALLOC(n, index_t); Esys_checkPtr(lambda);
519 degree_ST=TMPMEMALLOC(n, dim_t); Esys_checkPtr(degree_ST);
520 ST=TMPMEMALLOC(offset_S[n], index_t); Esys_checkPtr(ST);
521 if (usePanel) {
522 notInPanel=TMPMEMALLOC(n, bool_t); Esys_checkPtr(notInPanel);
523 panel=TMPMEMALLOC(n, index_t); Esys_checkPtr(panel);
524 }
525
526
527
528 if (Esys_noError() ) {
529 /* initialize split_marker and split_marker :*/
530 /* those unknows which are not influenced go into F, the rest is available for F or C */
531 #pragma omp parallel for private(i) schedule(static)
532 for (i=0;i<n;++i) {
533 degree_ST[i]=0;
534 if (degree_S[i]>0) {
535 lambda[i]=0;
536 split_marker[i]=PASO_AMG_UNDECIDED;
537 } else {
538 split_marker[i]=PASO_AMG_IN_F;
539 lambda[i]=-1;
540 }
541 }
542 /* create transpose :*/
543 for (i=0;i<n;++i) {
544 for (p=0; p<degree_S[i]; ++p) {
545 j=S[offset_S[i]+p];
546 ST[offset_S[j]+degree_ST[j]]=i;
547 degree_ST[j]++;
548 }
549 }
550 /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */
551 #pragma omp parallel for private(i, j, itmp) schedule(static)
552 for (i=0;i<n;++i) {
553 if (split_marker[i]==PASO_AMG_UNDECIDED) {
554 itmp=lambda[i];
555 for (p=0; p<degree_ST[i]; ++p) {
556 j=ST[offset_S[i]+p];
557 if (split_marker[j]==PASO_AMG_UNDECIDED) {
558 itmp++;
559 } else { /* at this point there are no C points */
560 itmp+=2;
561 }
562 }
563 lambda[i]=itmp;
564 }
565 }
566 if (usePanel) {
567 #pragma omp parallel for private(i) schedule(static)
568 for (i=0;i<n;++i) notInPanel[i]=TRUE;
569 }
570 /* start search :*/
571 i=Paso_Util_arg_max(n,lambda);
572 while (lambda[i]>-1) { /* is there any undecided unknown? */
573
574 if (usePanel) {
575 len_panel=0;
576 do {
577 /* the unknown i is moved to C */
578 split_marker[i]=PASO_AMG_IN_C;
579 lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
580
581 /* all undecided unknown strongly coupled to i are moved to F */
582 for (p=0; p<degree_ST[i]; ++p) {
583 j=ST[offset_S[i]+p];
584
585 if (split_marker[j]==PASO_AMG_UNDECIDED) {
586
587 split_marker[j]=PASO_AMG_IN_F;
588 lambda[j]=-1;
589
590 for (q=0; q<degree_ST[j]; ++q) {
591 k=ST[offset_S[j]+q];
592 if (split_marker[k]==PASO_AMG_UNDECIDED) {
593 lambda[k]++;
594 if (notInPanel[k]) {
595 notInPanel[k]=FALSE;
596 panel[len_panel]=k;
597 len_panel++;
598 }
599
600 } /* the unknown i is moved to C */
601 split_marker[i]=PASO_AMG_IN_C;
602 lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
603 }
604
605 }
606 }
607 for (p=0; p<degree_S[i]; ++p) {
608 j=S[offset_S[i]+p];
609 if (split_marker[j]==PASO_AMG_UNDECIDED) {
610 lambda[j]--;
611 if (notInPanel[j]) {
612 notInPanel[j]=FALSE;
613 panel[len_panel]=j;
614 len_panel++;
615 }
616 }
617 }
618
619 /* consolidate panel */
620 /* remove lambda[q]=-1 */
621 lambda_max=-1;
622 i=-1;
623 len_panel_new=0;
624 for (q=0; q<len_panel; q++) {
625 k=panel[q];
626 lambda_k=lambda[k];
627 if (split_marker[k]==PASO_AMG_UNDECIDED) {
628 panel[len_panel_new]=k;
629 len_panel_new++;
630
631 if (lambda_max == lambda_k) {
632 if (k<i) i=k;
633 } else if (lambda_max < lambda_k) {
634 lambda_max =lambda_k;
635 i=k;
636 }
637 }
638 }
639 len_panel=len_panel_new;
640 } while (len_panel>0);
641 } else {
642 /* the unknown i is moved to C */
643 split_marker[i]=PASO_AMG_IN_C;
644 lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
645
646 /* all undecided unknown strongly coupled to i are moved to F */
647 for (p=0; p<degree_ST[i]; ++p) {
648 j=ST[offset_S[i]+p];
649 if (split_marker[j]==PASO_AMG_UNDECIDED) {
650
651 split_marker[j]=PASO_AMG_IN_F;
652 lambda[j]=-1;
653
654 for (q=0; q<degree_ST[j]; ++q) {
655 k=ST[offset_S[j]+q];
656 if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;
657 }
658
659 }
660 }
661 for (p=0; p<degree_S[i]; ++p) {
662 j=S[offset_S[i]+p];
663 if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;
664 }
665
666 }
667 i=Paso_Util_arg_max(n,lambda);
668 }
669
670 }
671 TMPMEMFREE(lambda);
672 TMPMEMFREE(ST);
673 TMPMEMFREE(degree_ST);
674 TMPMEMFREE(panel);
675 TMPMEMFREE(notInPanel);
676 }
677 /* ensures that two F nodes are connected via a C node :*/
678 void Paso_Preconditioner_AMG_enforceFFConnectivity(const dim_t n, const index_t* offset_S,
679 const dim_t* degree_S, const index_t* S,
680 index_t*split_marker)
681 {
682 dim_t i, p, q;
683
684 /* now we make sure that two (strongly) connected F nodes are (strongly) connected via a C node. */
685 for (i=0;i<n;++i) {
686 if ( (split_marker[i]==PASO_AMG_IN_F) && (degree_S[i]>0) ) {
687 for (p=0; p<degree_S[i]; ++p) {
688 register index_t j=S[offset_S[i]+p];
689 if ( (split_marker[j]==PASO_AMG_IN_F) && (degree_S[j]>0) ) {
690 /* i and j are now two F nodes which are strongly connected */
691 /* is there a C node they share ? */
692 register index_t sharing=-1;
693 for (q=0; q<degree_S[i]; ++q) {
694 index_t k=S[offset_S[i]+q];
695 if (split_marker[k]==PASO_AMG_IN_C) {
696 register index_t* where_k=(index_t*)bsearch(&k, &(S[offset_S[j]]), degree_S[j], sizeof(index_t), Paso_comparIndex);
697 if (where_k != NULL) {
698 sharing=k;
699 break;
700 }
701 }
702 }
703 if (sharing<0) {
704 if (i<j) {
705 split_marker[j]=PASO_AMG_IN_C;
706 } else {
707 split_marker[i]=PASO_AMG_IN_C;
708 break; /* no point to look any further as i is now a C node */
709 }
710 }
711 }
712 }
713 }
714 }
715 }
716
717 void Paso_Preconditioner_AMG_CIJPCoarsening( )
718 {
719 const dim_t my_n;
720 const dim_t overlap_n;
721 const dim_t n= my_n + overlap_n;
722 /* set local lambda + overlap */
723 #pragma omp parallel for private(i)
724 for (i=0; i<n ++i) {
725 w[i]=degree_ST[i];
726 }
727 for (i=0; i<my_n; i++) {
728 w2[i]=random;
729 }
730
731
732 Paso_Coupler_add(w, 1., w2,
733 /* adjust w accross processors */
734
735 /* */
736 global_n_C=0;
737 global_n_F=..;
738
739 while (global_n_C + global_n_F < global_n) {
740
741
742 is_in_D[i]=FALSE;
743 /* test local connectivit*/
744 for i ..
745 for k in S_i:
746 w2[i]=MAX(w2[i],w[k]);
747 w2[k]=MAX(w2[k],w[i]);
748 /* adjust overlaps by MAX */
749 ...
750 /* points with w[i]>w2[i] become C nodes */
751
752
753
754
755
756
757
758 global_n_C=?
759 global_n_F=?
760
761 }
762
763 }

  ViewVC Help
Powered by ViewVC 1.1.26