/[escript]/trunk/paso/src/LocalAMG.c
ViewVC logotype

Contents of /trunk/paso/src/LocalAMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3801 - (show annotations)
Fri Feb 3 05:21:02 2012 UTC (7 years, 8 months ago) by caltinay
File MIME type: text/plain
File size: 24759 byte(s)
Fixed OpenMP bug in paso local AMG.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: AMG preconditioner (local version) */
18
19 /**************************************************************/
20
21 /* Author: artak@uq.edu.au, l.gross@uq.edu.au l.gao@uq.edu.au */
22
23 /**************************************************************/
24
25 #define SHOW_TIMING FALSE
26 #define USE_TRANSPOSE TRUE
27 #define SMALL_PANEL TRUE
28
29 #include "Paso.h"
30 #include "Preconditioner.h"
31 #include "Options.h"
32 #include "PasoUtil.h"
33 #include "UMFPACK.h"
34 #include "MKL.h"
35
36 /**************************************************************/
37
38 /* free all memory used by AMG */
39
40 void Paso_Preconditioner_LocalAMG_free(Paso_Preconditioner_LocalAMG * in) {
41 if (in!=NULL) {
42 Paso_Preconditioner_LocalSmoother_free(in->Smoother);
43 Paso_SparseMatrix_free(in->P);
44 Paso_SparseMatrix_free(in->R);
45 Paso_SparseMatrix_free(in->A_C);
46 Paso_Preconditioner_LocalAMG_free(in->AMG_C);
47 MEMFREE(in->r);
48 MEMFREE(in->x_C);
49 MEMFREE(in->b_C);
50
51
52 MEMFREE(in);
53 }
54 }
55
56 index_t Paso_Preconditioner_LocalAMG_getMaxLevel(const Paso_Preconditioner_LocalAMG * in) {
57 if (in->AMG_C == NULL) {
58 return in->level;
59 } else {
60 return Paso_Preconditioner_LocalAMG_getMaxLevel(in->AMG_C);
61 }
62 }
63
64 double Paso_Preconditioner_LocalAMG_getCoarseLevelSparsity(const Paso_Preconditioner_LocalAMG * in) {
65 if (in->AMG_C == NULL) {
66 if (in->A_C == NULL) {
67 return 1.;
68 } else {
69 return Paso_SparseMatrix_getSparsity(in->A_C);
70 }
71 } else {
72 return Paso_Preconditioner_LocalAMG_getCoarseLevelSparsity(in->AMG_C);
73 }
74 }
75
76 dim_t Paso_Preconditioner_LocalAMG_getNumCoarseUnknwons(const Paso_Preconditioner_LocalAMG * in) {
77 if (in->AMG_C == NULL) {
78 if (in->A_C == NULL) {
79 return 0;
80 } else {
81 return Paso_SparseMatrix_getTotalNumRows(in->A_C);
82 }
83 } else {
84 return Paso_Preconditioner_LocalAMG_getNumCoarseUnknwons(in->AMG_C);
85 }
86 }
87
88 /*****************************************************************
89
90 Constructs AMG
91
92 ******************************************************************/
93 Paso_Preconditioner_LocalAMG* Paso_Preconditioner_LocalAMG_alloc(Paso_SparseMatrix *A_p,dim_t level,Paso_Options* options) {
94
95 Paso_Preconditioner_LocalAMG* out=NULL;
96 bool_t verbose=options->verbose;
97
98 Paso_SparseMatrix *Atemp=NULL, *A_C=NULL;
99 const dim_t n=A_p->numRows;
100 const dim_t n_block=A_p->row_block_size;
101 index_t* F_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F=NULL, *S=NULL, *degree_S=NULL;
102 dim_t n_F=0, n_C=0, i;
103 double time0=0;
104 const double theta = options->coarsening_threshold;
105 const double tau = options->diagonal_dominance_threshold;
106 const double sparsity=Paso_SparseMatrix_getSparsity(A_p);
107 const dim_t total_n=Paso_SparseMatrix_getTotalNumRows(A_p);
108
109 /*
110 is the input matrix A suitable for coarsening
111
112 */
113 if ( (sparsity >= options->min_coarse_sparsity) ||
114 (total_n <= options->min_coarse_matrix_size) ||
115 (level > options->level_max) ) {
116
117 if (verbose) {
118 /*
119 print stopping condition:
120 - 'SPAR' = min_coarse_matrix_sparsity exceeded
121 - 'SIZE' = min_coarse_matrix_size exceeded
122 - 'LEVEL' = level_max exceeded
123 */
124 printf("Paso_Preconditioner: AMG: termination of coarsening by ");
125
126 if (sparsity >= options->min_coarse_sparsity)
127 printf("SPAR");
128
129 if (total_n <= options->min_coarse_matrix_size)
130 printf("SIZE");
131
132 if (level > options->level_max)
133 printf("LEVEL");
134
135 printf("\n");
136
137 printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. Sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
138 level, options->level_max, sparsity, options->min_coarse_sparsity, total_n, options->min_coarse_matrix_size);
139
140 }
141
142 return NULL;
143 }
144 /* Start Coarsening : */
145
146 F_marker=TMPMEMALLOC(n,index_t);
147 counter=TMPMEMALLOC(n,index_t);
148 degree_S=TMPMEMALLOC(n, dim_t);
149 S=TMPMEMALLOC(A_p->pattern->len, index_t);
150 if ( !( Esys_checkPtr(F_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree_S) || Esys_checkPtr(S) ) ) {
151 /*
152 set splitting of unknowns:
153 */
154 time0=Esys_timer();
155 if (n_block>1) {
156 Paso_Preconditioner_LocalAMG_setStrongConnections_Block(A_p, degree_S, S, theta,tau);
157 } else {
158 Paso_Preconditioner_LocalAMG_setStrongConnections(A_p, degree_S, S, theta,tau);
159 }
160
161 Paso_Preconditioner_LocalAMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree_S, S, F_marker, options->usePanel);
162
163 /* in BoomerAMG if interpolation is used FF connectivity is required: */
164 if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
165 Paso_Preconditioner_LocalAMG_enforceFFConnectivity(n, A_p->pattern->ptr, degree_S, S, F_marker);
166 options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
167
168 if (Esys_noError() ) {
169 #pragma omp parallel for private(i) schedule(static)
170 for (i = 0; i < n; ++i) F_marker[i]=(F_marker[i] == PASO_AMG_IN_F);
171
172 /*
173 count number of unknowns to be eliminated:
174 */
175 n_F=Paso_Util_cumsum_maskedTrue(n,counter, F_marker);
176 n_C=n-n_F;
177 if (verbose) printf("Paso_Preconditioner: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);
178
179 if ( n_F == 0 ) { /* This is a nasty case. A direct solver should be used, return NULL */
180 out = NULL;
181 } else {
182 out=MEMALLOC(1,Paso_Preconditioner_LocalAMG);
183 if (! Esys_checkPtr(out)) {
184 out->level = level;
185 out->n = n;
186 out->n_F = n_F;
187 out->n_block = n_block;
188 out->A_C = NULL;
189 out->P = NULL;
190 out->R = NULL;
191 out->post_sweeps = options->post_sweeps;
192 out->pre_sweeps = options->pre_sweeps;
193 out->r = NULL;
194 out->x_C = NULL;
195 out->b_C = NULL;
196 out->AMG_C = NULL;
197 out->Smoother=NULL;
198 }
199 mask_C=TMPMEMALLOC(n,index_t);
200 rows_in_F=TMPMEMALLOC(n_F,index_t);
201 Esys_checkPtr(mask_C);
202 Esys_checkPtr(rows_in_F);
203 if ( Esys_noError() ) {
204
205 out->Smoother = Paso_Preconditioner_LocalSmoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
206
207 if (n_C != 0) {
208 /* if nothing has been removed we have a diagonal
209 * dominant matrix and we just run a few steps of
210 * the smoother */
211
212 /* allocate helpers :*/
213 out->x_C=MEMALLOC(n_block*n_C,double);
214 out->b_C=MEMALLOC(n_block*n_C,double);
215 out->r=MEMALLOC(n_block*n,double);
216
217 Esys_checkPtr(out->r);
218 Esys_checkPtr(out->x_C);
219 Esys_checkPtr(out->b_C);
220
221 if ( Esys_noError() ) {
222 /* creates index for F */
223 #pragma omp parallel private(i)
224 {
225 #pragma omp for schedule(static)
226 for (i = 0; i < n; ++i) {
227 if (F_marker[i]) rows_in_F[counter[i]]=i;
228 }
229 }
230 /* create mask of C nodes with value >-1 gives new id */
231 i=Paso_Util_cumsum_maskedFalse(n,counter, F_marker);
232
233 #pragma omp parallel for private(i) schedule(static)
234 for (i = 0; i < n; ++i) {
235 if (F_marker[i]) {
236 mask_C[i]=-1;
237 } else {
238 mask_C[i]=counter[i];;
239 }
240 }
241 /*
242 get Prolongation :
243 */
244 time0=Esys_timer();
245 out->P=Paso_Preconditioner_LocalAMG_getProlongation(A_p,A_p->pattern->ptr, degree_S,S,n_C,mask_C, options->interpolation_method);
246 if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);
247 }
248 /*
249 construct Restriction operator as transposed of Prolongation operator:
250 */
251 if ( Esys_noError()) {
252 time0=Esys_timer();
253 out->R=Paso_SparseMatrix_getTranspose(out->P);
254 if (SHOW_TIMING) printf("timing: level %d: Paso_SparseMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
255 }
256 /*
257 construct coarse level matrix:
258 */
259 if ( Esys_noError()) {
260 time0=Esys_timer();
261 if (USE_TRANSPOSE)
262 Atemp=Paso_SparseMatrix_MatrixMatrixTranspose(A_p,out->P,out->R);
263 else
264 Atemp=Paso_SparseMatrix_MatrixMatrix(A_p,out->P);
265 A_C=Paso_SparseMatrix_MatrixMatrix(out->R,Atemp);
266 Paso_SparseMatrix_free(Atemp);
267 if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);
268 }
269
270
271 /*
272 construct coarser level:
273
274 */
275 if ( Esys_noError()) {
276 out->AMG_C=Paso_Preconditioner_LocalAMG_alloc(A_C,level+1,options);
277 }
278 if ( Esys_noError()) {
279 if ( out->AMG_C == NULL ) {
280 out->reordering = options->reordering;
281 out->refinements = options->coarse_matrix_refinements;
282 /* no coarse level matrix has been constructed. Use direct solver */
283 #ifdef MKL
284 out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);
285 Paso_SparseMatrix_free(A_C);
286 out->A_C->solver_package = PASO_MKL;
287 if (verbose) printf("Paso_Preconditioner: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
288 #else
289 #ifdef UMFPACK
290 out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);
291 Paso_SparseMatrix_free(A_C);
292 out->A_C->solver_package = PASO_UMFPACK;
293 if (verbose) printf("Paso_Preconditioner: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
294 #else
295 out->A_C=A_C;
296 out->A_C->solver_p=Paso_Preconditioner_LocalSmoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);
297 out->A_C->solver_package = PASO_SMOOTHER;
298 if (verbose) printf("Paso_Preconditioner: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
299 #endif
300 #endif
301 } else {
302 /* finally we set some helpers for the solver step */
303 out->A_C=A_C;
304 }
305 }
306 }
307 }
308 TMPMEMFREE(mask_C);
309 TMPMEMFREE(rows_in_F);
310 }
311 }
312 }
313 TMPMEMFREE(counter);
314 TMPMEMFREE(F_marker);
315 TMPMEMFREE(degree_S);
316 TMPMEMFREE(S);
317
318 if (Esys_noError()) {
319 return out;
320 } else {
321 Paso_Preconditioner_LocalAMG_free(out);
322 return NULL;
323 }
324 }
325
326
327 void Paso_Preconditioner_LocalAMG_solve(Paso_SparseMatrix* A, Paso_Preconditioner_LocalAMG * amg, double * x, double * b) {
328 const dim_t n = amg->n * amg->n_block;
329 double time0=0;
330 const dim_t post_sweeps=amg->post_sweeps;
331 const dim_t pre_sweeps=amg->pre_sweeps;
332
333 /* presmoothing */
334 time0=Esys_timer();
335 Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
336 time0=Esys_timer()-time0;
337 if (SHOW_TIMING) printf("timing: level %d: Presmoothing: %e\n",amg->level, time0);
338 /* end of presmoothing */
339
340 if (amg->n_F < amg->n) { /* is there work on the coarse level? */
341 time0=Esys_timer();
342 Paso_Copy(n, amg->r, b); /* r <- b */
343 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
344 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->R,amg->r,0.,amg->b_C); /* b_c = R*r */
345 time0=Esys_timer()-time0;
346 /* coarse level solve */
347 if ( amg->AMG_C == NULL) {
348 time0=Esys_timer();
349 /* A_C is the coarsest level */
350 switch (amg->A_C->solver_package) {
351 case (PASO_MKL):
352 Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);
353 break;
354 case (PASO_UMFPACK):
355 Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);
356 break;
357 case (PASO_SMOOTHER):
358 Paso_Preconditioner_LocalSmoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
359 break;
360 }
361 if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
362 } else {
363 Paso_Preconditioner_LocalAMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C) */
364 }
365 time0=time0+Esys_timer();
366 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */
367
368 /*postsmoothing*/
369
370 /*solve Ax=b with initial guess x */
371 time0=Esys_timer();
372 Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
373 time0=Esys_timer()-time0;
374 if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
375 /*end of postsmoothing*/
376
377 }
378 return;
379 }
380
381 /* theta = threshold for strong connections */
382 /* tau = threshold for diagonal dominance */
383
384 /*S_i={j \in N_i; i strongly coupled to j}
385
386 in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
387 */
388
389 void Paso_Preconditioner_LocalAMG_setStrongConnections(Paso_SparseMatrix* A,
390 dim_t *degree_S, index_t *S,
391 const double theta, const double tau)
392 {
393 const dim_t n=A->numRows;
394 index_t iptr, i;
395
396
397 #pragma omp parallel for private(i,iptr) schedule(static)
398 for (i=0;i<n;++i) {
399 register double max_offdiagonal = 0.;
400 register double sum_row=0;
401 register double main_row=0;
402 #pragma ivdep
403 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
404 register index_t j=A->pattern->index[iptr];
405 register double fnorm=ABS(A->val[iptr]);
406
407 if( j != i) {
408 max_offdiagonal = MAX(max_offdiagonal,fnorm);
409 sum_row+=fnorm;
410 } else {
411 main_row=fnorm;
412 }
413 }
414 {
415 const double threshold = theta*max_offdiagonal;
416 register dim_t kdeg=0;
417 if (tau*main_row < sum_row) { /* no diagonal dominance */
418 #pragma ivdep
419 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
420 register index_t j=A->pattern->index[iptr];
421 if(ABS(A->val[iptr])>threshold && i!=j) {
422 S[A->pattern->ptr[i]+kdeg] = j;
423 kdeg++;
424 }
425 }
426 }
427 degree_S[i]=kdeg;
428 }
429 }
430
431 }
432
433 /* theta = threshold for strong connections */
434 /* tau = threshold for diagonal dominance */
435 /*S_i={j \in N_i; i strongly coupled to j}
436
437 in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
438 */
439 void Paso_Preconditioner_LocalAMG_setStrongConnections_Block(Paso_SparseMatrix* A,
440 dim_t *degree_S, index_t *S,
441 const double theta, const double tau)
442
443 {
444 const dim_t n_block=A->row_block_size;
445 const dim_t n=A->numRows;
446 index_t iptr, i, bi;
447
448
449 #pragma omp parallel private(i, iptr, bi)
450 {
451 dim_t max_deg=0; /* this is local on each thread */
452 double* rtmp=NULL;
453
454 #pragma omp for schedule(static)
455 for (i=0;i<n;++i) max_deg=MAX(max_deg, A->pattern->ptr[i+1]-A->pattern->ptr[i]);
456
457 rtmp=TMPMEMALLOC(max_deg, double);
458
459 #pragma omp for schedule(static)
460 for (i=0;i<n;++i) {
461
462 register double max_offdiagonal = 0.;
463 register double sum_row=0;
464 register double main_row=0;
465
466 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
467 register index_t j=A->pattern->index[iptr];
468 register double fnorm=0;
469 #pragma ivdep
470 for(bi=0;bi<n_block*n_block;++bi) {
471 register double rtmp2 = A->val[iptr*n_block*n_block+bi];
472 fnorm+=rtmp2*rtmp2;
473 }
474 fnorm=sqrt(fnorm);
475
476 rtmp[iptr-A->pattern->ptr[i]]=fnorm;
477 if( j != i) {
478 max_offdiagonal = MAX(max_offdiagonal,fnorm);
479 sum_row+=fnorm;
480 } else {
481 main_row=fnorm;
482 }
483 }
484 {
485 const double threshold = theta*max_offdiagonal;
486 register dim_t kdeg=0;
487 if (tau*main_row < sum_row) { /* no diagonal dominance */
488 #pragma ivdep
489 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
490 register index_t j=A->pattern->index[iptr];
491 if(rtmp[iptr-A->pattern->ptr[i]] > threshold && i!=j) {
492 S[A->pattern->ptr[i]+kdeg] = j;
493 kdeg++;
494 }
495 }
496 }
497 degree_S[i]=kdeg;
498 }
499 }
500 TMPMEMFREE(rtmp);
501 } /* end of parallel region */
502
503 }
504
505 /* the Runge Stueben coarsening algorithm: */
506 void Paso_Preconditioner_LocalAMG_RungeStuebenSearch(const dim_t n, const index_t* offset_S,
507 const dim_t* degree_S, const index_t* S,
508 index_t*split_marker, const bool_t usePanel)
509 {
510
511 index_t *lambda=NULL, *ST=NULL, *notInPanel=NULL, *panel=NULL, lambda_max, lambda_k;
512 dim_t i,k, p, q, *degree_ST=NULL, len_panel, len_panel_new;
513 register index_t j, itmp;
514
515 if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */
516
517 lambda=TMPMEMALLOC(n, index_t); Esys_checkPtr(lambda);
518 degree_ST=TMPMEMALLOC(n, dim_t); Esys_checkPtr(degree_ST);
519 ST=TMPMEMALLOC(offset_S[n], index_t); Esys_checkPtr(ST);
520 if (usePanel) {
521 if (!SMALL_PANEL) {
522 notInPanel=TMPMEMALLOC(n, bool_t); Esys_checkPtr(notInPanel);
523 }
524 panel=TMPMEMALLOC(n, index_t); Esys_checkPtr(panel);
525 }
526
527
528
529 if (Esys_noError() ) {
530 /* initialize split_marker: */
531 /* Those unknowns which are not influenced go into F, the rest is available for F or C */
532 #pragma omp parallel for private(i) schedule(static)
533 for (i=0;i<n;++i) {
534 degree_ST[i]=0;
535 if (degree_S[i]>0) {
536 lambda[i]=0;
537 split_marker[i]=PASO_AMG_UNDECIDED;
538 } else {
539 split_marker[i]=PASO_AMG_IN_F;
540 lambda[i]=-1;
541 }
542 }
543 /* create transpose :*/
544 for (i=0;i<n;++i) {
545 for (p=0; p<degree_S[i]; ++p) {
546 j=S[offset_S[i]+p];
547 ST[offset_S[j]+degree_ST[j]]=i;
548 degree_ST[j]++;
549 }
550 }
551
552 /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */
553 #pragma omp parallel for private(i, j, p, itmp) schedule(static)
554 for (i=0;i<n;++i) {
555 if (split_marker[i]==PASO_AMG_UNDECIDED) {
556 itmp=lambda[i];
557 for (p=0; p<degree_ST[i]; ++p) {
558 j=ST[offset_S[i]+p];
559 if (split_marker[j]==PASO_AMG_UNDECIDED) {
560 itmp++;
561 } else { /* at this point there are no C points */
562 itmp+=2;
563 }
564 }
565 lambda[i]=itmp;
566 }
567 }
568 if (usePanel && !SMALL_PANEL) {
569 #pragma omp parallel for private(i) schedule(static)
570 for (i=0;i<n;++i) notInPanel[i]=TRUE;
571 }
572 /* start search :*/
573 i=Paso_Util_arg_max(n,lambda);
574 while (lambda[i]>-1) { /* is there any undecided unknown? */
575 if (SMALL_PANEL) {
576 do {
577 len_panel=0;
578 /* the unknown i is moved to C */
579 split_marker[i]=PASO_AMG_IN_C;
580 lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
581
582 /* all undecided unknowns strongly coupled to i are moved to F */
583 for (p=0; p<degree_ST[i]; ++p) {
584 j=ST[offset_S[i]+p];
585
586 if (split_marker[j]==PASO_AMG_UNDECIDED) {
587
588 split_marker[j]=PASO_AMG_IN_F;
589 lambda[j]=-1;
590 for (q=0; q<degree_S[j]; ++q) {
591 k=S[offset_S[j]+q];
592 if (split_marker[k]==PASO_AMG_UNDECIDED) {
593 lambda[k]++;
594 panel[len_panel]=k;
595 len_panel++;
596
597 }
598 }
599
600 }
601 }
602 for (p=0; p<degree_S[i]; ++p) {
603 j=S[offset_S[i]+p];
604 if (split_marker[j]==PASO_AMG_UNDECIDED) {
605 lambda[j]--;
606 panel[len_panel]=j;
607 len_panel++;
608 }
609 }
610
611 lambda_max=-1;
612 for (q=0; q<len_panel; q++) {
613 k = panel[q];
614 j = lambda[k];
615 if (lambda_max < j) {
616 lambda_max = j;
617 i = k;
618 }
619 }
620 } while (len_panel>0);
621 } else if (usePanel) {
622 len_panel=0;
623 do {
624 /* the unknown i is moved to C */
625 split_marker[i]=PASO_AMG_IN_C;
626 lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
627
628 /* all undecided unknowns strongly coupled to i are moved to F */
629 for (p=0; p<degree_ST[i]; ++p) {
630 j=ST[offset_S[i]+p];
631
632 if (split_marker[j]==PASO_AMG_UNDECIDED) {
633
634 split_marker[j]=PASO_AMG_IN_F;
635 lambda[j]=-1;
636
637 for (q=0; q<degree_S[j]; ++q) {
638 k=S[offset_S[j]+q];
639 if (split_marker[k]==PASO_AMG_UNDECIDED) {
640 lambda[k]++;
641 if (notInPanel[k]) {
642 notInPanel[k]=FALSE;
643 panel[len_panel]=k;
644 len_panel++;
645 }
646
647 } /* the unknown i is moved to C */
648 split_marker[i]=PASO_AMG_IN_C;
649 lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
650 }
651
652 }
653 }
654 for (p=0; p<degree_S[i]; ++p) {
655 j=S[offset_S[i]+p];
656 if (split_marker[j]==PASO_AMG_UNDECIDED) {
657 lambda[j]--;
658 if (notInPanel[j]) {
659 notInPanel[j]=FALSE;
660 panel[len_panel]=j;
661 len_panel++;
662 }
663 }
664 }
665
666 /* consolidate panel */
667 /* remove lambda[q]=-1 */
668 lambda_max=-1;
669 i=-1;
670 len_panel_new=0;
671 for (q=0; q<len_panel; q++) {
672 k=panel[q];
673 lambda_k=lambda[k];
674 if (split_marker[k]==PASO_AMG_UNDECIDED) {
675 panel[len_panel_new]=k;
676 len_panel_new++;
677
678 if (lambda_max == lambda_k) {
679 if (k<i) i=k;
680 } else if (lambda_max < lambda_k) {
681 lambda_max =lambda_k;
682 i=k;
683 }
684 }
685 }
686 len_panel=len_panel_new;
687 } while (len_panel>0);
688 } else {
689 /* the unknown i is moved to C */
690 split_marker[i]=PASO_AMG_IN_C;
691 lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
692
693 /* all undecided unknowns strongly coupled to i are moved to F */
694 for (p=0; p<degree_ST[i]; ++p) {
695 j=ST[offset_S[i]+p];
696 if (split_marker[j]==PASO_AMG_UNDECIDED) {
697
698 split_marker[j]=PASO_AMG_IN_F;
699 lambda[j]=-1;
700
701 for (q=0; q<degree_S[j]; ++q) {
702 k=S[offset_S[j]+q];
703 if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;
704 }
705
706 }
707 }
708 for (p=0; p<degree_S[i]; ++p) {
709 j=S[offset_S[i]+p];
710 if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;
711 }
712
713 }
714 i=Paso_Util_arg_max(n,lambda);
715 }
716
717 }
718 TMPMEMFREE(lambda);
719 TMPMEMFREE(ST);
720 TMPMEMFREE(degree_ST);
721 if (usePanel) {
722 TMPMEMFREE(panel);
723 if (!SMALL_PANEL) TMPMEMFREE(notInPanel);
724 }
725 }
726
727 /* ensures that two F nodes are connected via a C node :*/
728 void Paso_Preconditioner_LocalAMG_enforceFFConnectivity(const dim_t n, const index_t* offset_S,
729 const dim_t* degree_S, const index_t* S,
730 index_t*split_marker)
731 {
732 dim_t i, p, q;
733
734 /* now we make sure that two (strongly) connected F nodes are (strongly) connected via a C node. */
735 for (i=0;i<n;++i) {
736 if ( (split_marker[i]==PASO_AMG_IN_F) && (degree_S[i]>0) ) {
737 for (p=0; p<degree_S[i]; ++p) {
738 register index_t j=S[offset_S[i]+p];
739 if ( (split_marker[j]==PASO_AMG_IN_F) && (degree_S[j]>0) ) {
740 /* i and j are now two F nodes which are strongly connected */
741 /* is there a C node they share ? */
742 register index_t sharing=-1;
743 for (q=0; q<degree_S[i]; ++q) {
744 index_t k=S[offset_S[i]+q];
745 if (split_marker[k]==PASO_AMG_IN_C) {
746 register index_t* where_k=(index_t*)bsearch(&k, &(S[offset_S[j]]), degree_S[j], sizeof(index_t), Paso_comparIndex);
747 if (where_k != NULL) {
748 sharing=k;
749 break;
750 }
751 }
752 }
753 if (sharing<0) {
754 if (i<j) {
755 split_marker[j]=PASO_AMG_IN_C;
756 } else {
757 split_marker[i]=PASO_AMG_IN_C;
758 break; /* no point to look any further as i is now a C node */
759 }
760 }
761 }
762 }
763 }
764 }
765 }
766

  ViewVC Help
Powered by ViewVC 1.1.26