/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Contents of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3451 - (show annotations)
Sun Jan 23 23:11:01 2011 UTC (8 years, 7 months ago) by gross
File MIME type: text/plain
File size: 27450 byte(s)
compilation fixes
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: AMG preconditioner (local version) */
18
19 /**************************************************************/
20
21 /* Author: artak@uq.edu.au, l.gross@uq.edu.au */
22
23 /**************************************************************/
24
25 #define SHOW_TIMING FALSE
26
27 #include "Paso.h"
28 #include "Preconditioner.h"
29 #include "Options.h"
30 #include "PasoUtil.h"
31 #include "UMFPACK.h"
32 #include "MKL.h"
33
34 /**************************************************************/
35
36 /* free all memory used by AMG */
37
38 void Paso_Preconditioner_AMG_free(Paso_Preconditioner_AMG * in) {
39 if (in!=NULL) {
40 Paso_Preconditioner_Smoother_free(in->Smoother);
41 Paso_SystemMatrix_free(in->P);
42 Paso_SystemMatrix_free(in->R);
43 Paso_SystemMatrix_free(in->A_C);
44 Paso_Preconditioner_AMG_free(in->AMG_C);
45 MEMFREE(in->r);
46 MEMFREE(in->x_C);
47 MEMFREE(in->b_C);
48
49 MEMFREE(in);
50 }
51 }
52
53 index_t Paso_Preconditioner_AMG_getMaxLevel(const Paso_Preconditioner_AMG * in) {
54 if (in->AMG_C == NULL) {
55 return in->level;
56 } else {
57 return Paso_Preconditioner_AMG_getMaxLevel(in->AMG_C);
58 }
59 }
60 double Paso_Preconditioner_AMG_getCoarseLevelSparsity(const Paso_Preconditioner_AMG * in) {
61 if (in->AMG_C == NULL) {
62 if (in->A_C == NULL) {
63 return 1.;
64 } else {
65 return Paso_SystemMatrix_getSparsity(in->A_C);
66 }
67 } else {
68 return Paso_Preconditioner_AMG_getCoarseLevelSparsity(in->AMG_C);
69 }
70 }
71 dim_t Paso_Preconditioner_AMG_getNumCoarseUnknwons(const Paso_Preconditioner_AMG * in) {
72 if (in->AMG_C == NULL) {
73 if (in->A_C == NULL) {
74 return 0;
75 } else {
76 return Paso_SystemMatrix_getTotalNumRows(in->A_C);
77 }
78 } else {
79 return Paso_Preconditioner_AMG_getNumCoarseUnknwons(in->AMG_C);
80 }
81 }
82 /*****************************************************************
83
84 constructs AMG
85
86 ******************************************************************/
87 Paso_Preconditioner_AMG* Paso_Preconditioner_AMG_alloc(Paso_SystemMatrix *A_p,dim_t level,Paso_Options* options) {
88
89 Paso_Preconditioner_AMG* out=NULL;
90 bool_t verbose=options->verbose;
91
92 const dim_t my_n=Paso_SystemMatrix_getNumRows(A_p);
93 const dim_t overlap_n=Paso_SystemMatrix_getColOverlap(A_p);
94 const dim_t n = my_n + overlap_n;
95
96 const dim_t n_block=A_p->row_block_size;
97 index_t* F_marker=NULL, *counter=NULL;
98 dim_t i;
99 double time0=0;
100 const double theta = options->coarsening_threshold;
101 const double tau = options->diagonal_dominance_threshold;
102 const double sparsity=Paso_SystemMatrix_getSparsity(A_p);
103 const dim_t total_n=Paso_SystemMatrix_getGlobalTotalNumRows(A_p);
104
105
106 /*
107 is the input matrix A suitable for coarsening
108
109 */
110 if ( (sparsity >= options->min_coarse_sparsity) ||
111 (total_n <= options->min_coarse_matrix_size) ||
112 (level > options->level_max) ) {
113
114 if (verbose) {
115 /*
116 print stopping condition:
117 - 'SPAR' = min_coarse_matrix_sparsity exceeded
118 - 'SIZE' = min_coarse_matrix_size exceeded
119 - 'LEVEL' = level_max exceeded
120 */
121 printf("Paso_Preconditioner: AMG: termination of coarsening by ");
122
123 if (sparsity >= options->min_coarse_sparsity)
124 printf("SPAR ");
125
126 if (total_n <= options->min_coarse_matrix_size)
127 printf("SIZE ");
128
129 if (level > options->level_max)
130 printf("LEVEL ");
131
132 printf("\n");
133
134 printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
135 level, options->level_max, sparsity, options->min_coarse_sparsity, total_n, options->min_coarse_matrix_size);
136
137 }
138
139 return NULL;
140 } else {
141 /* Start Coarsening : */
142 const dim_t len_S=A_p->mainBlock->pattern->len+A_p->col_coupleBlock->pattern->len;
143 dim_t* degree_S=TMPMEMALLOC(my_n, dim_t);
144 index_t *offset_S=TMPMEMALLOC(my_n, index_t);
145 index_t *S=TMPMEMALLOC(len_S, index_t);
146
147 F_marker=TMPMEMALLOC(n,index_t);
148 counter=TMPMEMALLOC(n,index_t);
149
150 if ( !( Esys_checkPtr(F_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree_S) || Esys_checkPtr(offset_S) || Esys_checkPtr(S) ) ) {
151 /*
152 set splitting of unknows:
153
154 */
155 time0=Esys_timer();
156 if (n_block>1) {
157 Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree_S, offset_S, S, theta,tau);
158 } else {
159 Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, offset_S, S, theta,tau);
160 }
161 {
162 dim_t p;
163 for (i=0; i< my_n; ++i) {
164 printf("%d: ",i);
165 for (p=0; p<degree_S[i];++p) printf("%d ",S[offset_S[i]+p]);
166 printf("\n");
167 }
168 }
169
170
171 /*MPI:
172 Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree_S, S, F_marker, options->usePanel);
173 */
174
175 /* in BoomerAMG interpolation is used FF connectiovity is required :*/
176 /*MPI:
177 if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
178 Paso_Preconditioner_AMG_enforceFFConnectivity(n, A_p->pattern->ptr, degree_S, S, F_marker);
179 */
180
181 options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
182
183 #ifdef AAAAA
184 if (Esys_noError() ) {
185 #pragma omp parallel for private(i) schedule(static)
186 for (i = 0; i < n; ++i) F_marker[i]=(F_marker[i] == PASO_AMG_IN_F);
187
188 /*
189 count number of unkowns to be eliminated:
190 */
191 n_F=Paso_Util_cumsum_maskedTrue(n,counter, F_marker);
192 n_C=n-n_F;
193 if (verbose) printf("Paso_Preconditioner: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);
194
195 if ( n_F == 0 ) { /* is a nasty case. a direct solver should be used, return NULL */
196 out = NULL;
197 } else {
198 out=MEMALLOC(1,Paso_Preconditioner_AMG);
199 if (! Esys_checkPtr(out)) {
200 out->level = level;
201 out->n = n;
202 out->n_F = n_F;
203 out->n_block = n_block;
204 out->A_C = NULL;
205 out->P = NULL;
206 out->R = NULL;
207 out->post_sweeps = options->post_sweeps;
208 out->pre_sweeps = options->pre_sweeps;
209 out->r = NULL;
210 out->x_C = NULL;
211 out->b_C = NULL;
212 out->AMG_C = NULL;
213 out->Smoother=NULL;
214 }
215 mask_C=TMPMEMALLOC(n,index_t);
216 rows_in_F=TMPMEMALLOC(n_F,index_t);
217 Esys_checkPtr(mask_C);
218 Esys_checkPtr(rows_in_F);
219 if ( Esys_noError() ) {
220
221 out->Smoother = Paso_Preconditioner_Smoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
222
223 if (n_C != 0) {
224 /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
225
226 /* allocate helpers :*/
227 out->x_C=MEMALLOC(n_block*n_C,double);
228 out->b_C=MEMALLOC(n_block*n_C,double);
229 out->r=MEMALLOC(n_block*n,double);
230
231 Esys_checkPtr(out->r);
232 Esys_checkPtr(out->x_C);
233 Esys_checkPtr(out->b_C);
234
235 if ( Esys_noError() ) {
236 /* creates index for F:*/
237 #pragma omp parallel private(i)
238 {
239 #pragma omp for schedule(static)
240 for (i = 0; i < n; ++i) {
241 if (F_marker[i]) rows_in_F[counter[i]]=i;
242 }
243 }
244 /* create mask of C nodes with value >-1 gives new id */
245 i=Paso_Util_cumsum_maskedFalse(n,counter, F_marker);
246
247 #pragma omp parallel for private(i) schedule(static)
248 for (i = 0; i < n; ++i) {
249 if (F_marker[i]) {
250 mask_C[i]=-1;
251 } else {
252 mask_C[i]=counter[i];;
253 }
254 }
255 /*
256 get Prolongation :
257 */
258 time0=Esys_timer();
259 /*MPI:
260 out->P=Paso_Preconditioner_AMG_getProlongation(A_p,A_p->pattern->ptr, degree_S,S,n_C,mask_C, options->interpolation_method);
261 */
262 if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);
263 }
264 /*
265 construct Restriction operator as transposed of Prolongation operator:
266 */
267 if ( Esys_noError()) {
268 time0=Esys_timer();
269 /*MPI:
270 out->R=Paso_SystemMatrix_getTranspose(out->P);
271 */
272 if (SHOW_TIMING) printf("timing: level %d: Paso_SystemMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
273 }
274 /*
275 construct coarse level matrix:
276 */
277 if ( Esys_noError()) {
278 time0=Esys_timer();
279 /*MPI:
280 Atemp=Paso_SystemMatrix_MatrixMatrix(A_p,out->P);
281 A_C=Paso_SystemMatrix_MatrixMatrix(out->R,Atemp);
282
283 Paso_SystemMatrix_free(Atemp);
284 */
285
286 if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);
287 }
288
289
290 /*
291 constructe courser level:
292
293 */
294 if ( Esys_noError()) {
295 out->AMG_C=Paso_Preconditioner_AMG_alloc(A_C,level+1,options);
296 }
297 if ( Esys_noError()) {
298 if ( out->AMG_C == NULL ) {
299 out->reordering = options->reordering;
300 out->refinements = options->coarse_matrix_refinements;
301 /* no coarse level matrix has been constructed. use direct solver */
302 #ifdef MKL
303 out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);
304 Paso_SystemMatrix_free(A_C);
305 out->A_C->solver_package = PASO_MKL;
306 if (verbose) printf("Paso_Preconditioner: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
307 #else
308 #ifdef UMFPACK
309 out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);
310 Paso_SystemMatrix_free(A_C);
311 out->A_C->solver_package = PASO_UMFPACK;
312 if (verbose) printf("Paso_Preconditioner: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
313 #else
314 out->A_C=A_C;
315 out->A_C->solver_p=Paso_Preconditioner_Smoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);
316 out->A_C->solver_package = PASO_SMOOTHER;
317 if (verbose) printf("Paso_Preconditioner: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
318 #endif
319 #endif
320 } else {
321 /* finally we set some helpers for the solver step */
322 out->A_C=A_C;
323 }
324 }
325 }
326 }
327 TMPMEMFREE(mask_C);
328 TMPMEMFREE(rows_in_F);
329 }
330 }
331 #endif
332
333 }
334 TMPMEMFREE(counter);
335 TMPMEMFREE(F_marker);
336 TMPMEMFREE(degree_S);
337 TMPMEMFREE(offset_S);
338 TMPMEMFREE(S);
339 }
340
341 if (Esys_noError()) {
342 return out;
343 } else {
344 Paso_Preconditioner_AMG_free(out);
345 return NULL;
346 }
347 }
348
349
350 void Paso_Preconditioner_AMG_solve(Paso_SystemMatrix* A, Paso_Preconditioner_AMG * amg, double * x, double * b) {
351 const dim_t n = amg->n * amg->n_block;
352 double time0=0;
353 const dim_t post_sweeps=amg->post_sweeps;
354 const dim_t pre_sweeps=amg->pre_sweeps;
355
356 /* presmoothing */
357 time0=Esys_timer();
358 Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
359 time0=Esys_timer()-time0;
360 if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);
361 /* end of presmoothing */
362
363 if (amg->n_F < amg->n) { /* is there work on the coarse level? */
364 time0=Esys_timer();
365 Paso_Copy(n, amg->r, b); /* r <- b */
366 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
367 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,amg->r,0.,amg->b_C); /* b_c = R*r */
368 time0=Esys_timer()-time0;
369 /* coarse level solve */
370 if ( amg->AMG_C == NULL) {
371 time0=Esys_timer();
372 /* A_C is the coarsest level */
373 #ifdef FIXME
374 switch (amg->A_C->solver_package) {
375 case (PASO_MKL):
376 Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);
377 break;
378 case (PASO_UMFPACK):
379 Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);
380 break;
381 case (PASO_SMOOTHER):
382 Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
383 break;
384 }
385 #endif
386 Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
387 if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
388 } else {
389 Paso_Preconditioner_AMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C) */
390 }
391 time0=time0+Esys_timer();
392 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */
393
394 /*postsmoothing*/
395
396 /*solve Ax=b with initial guess x */
397 time0=Esys_timer();
398 Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
399 time0=Esys_timer()-time0;
400 if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
401 /*end of postsmoothing*/
402
403 }
404 return;
405 }
406
407 /* theta = threshold for strong connections */
408 /* tau = threshold for diagonal dominance */
409
410 /*S_i={j \in N_i; i strongly coupled to j}
411
412 in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
413 */
414
415 void Paso_Preconditioner_AMG_setStrongConnections(Paso_SystemMatrix* A,
416 dim_t *degree_S, index_t *offset_S, index_t *S,
417 const double theta, const double tau)
418 {
419 const dim_t my_n=Paso_SystemMatrix_getNumRows(A);
420 index_t iptr, i;
421
422
423 #pragma omp parallel for private(i,iptr) schedule(static)
424 for (i=0;i<my_n;++i) {
425 register double max_offdiagonal = 0.;
426 register double sum_row=0;
427 register double main_row=0;
428 register dim_t kdeg=0;
429 register index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
430 #pragma ivdep
431 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
432 register index_t j=A->mainBlock->pattern->index[iptr];
433 register double fnorm=ABS(A->mainBlock->val[iptr]);
434
435 if( j != i) {
436 max_offdiagonal = MAX(max_offdiagonal,fnorm);
437 sum_row+=fnorm;
438 } else {
439 main_row=fnorm;
440 }
441
442 }
443 #pragma ivdep
444 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
445 register double fnorm=ABS(A->col_coupleBlock->val[iptr]);
446 max_offdiagonal = MAX(max_offdiagonal,fnorm);
447 sum_row+=fnorm;
448 }
449
450 {
451 const double threshold = theta*max_offdiagonal;
452 if (tau*main_row < sum_row) { /* no diagonal domainance */
453 #pragma ivdep
454 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
455 register index_t j=A->mainBlock->pattern->index[iptr];
456 if(ABS(A->mainBlock->val[iptr])>threshold && i!=j) {
457 S[koffset+kdeg] = j;
458 kdeg++;
459 }
460 }
461 #pragma ivdep
462 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
463 register index_t j=A->col_coupleBlock->pattern->index[iptr];
464 if(ABS(A->col_coupleBlock->val[iptr])>threshold) {
465 S[koffset+kdeg] = j + my_n;
466 kdeg++;
467 }
468 }
469 }
470 }
471 offset_S[i]=koffset;
472 degree_S[i]=kdeg;
473 }
474 }
475
476 /* theta = threshold for strong connections */
477 /* tau = threshold for diagonal dominance */
478 /*S_i={j \in N_i; i strongly coupled to j}
479
480 in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
481 */
482 void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SystemMatrix* A,
483 dim_t *degree_S, index_t *offset_S, index_t *S,
484 const double theta, const double tau)
485
486 {
487 const dim_t my_n=Paso_SystemMatrix_getNumRows(A);
488 index_t iptr, i, bi;
489 const dim_t n_block=A->row_block_size;
490
491
492 #pragma omp parallel private(i,iptr, bi)
493 {
494 dim_t max_deg=0;
495 double *rtmp=TMPMEMALLOC(max_deg, double);
496
497 #pragma omp for schedule(static)
498 for (i=0;i<my_n;++i) max_deg=MAX(max_deg, A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]
499 +A->col_coupleBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i]);
500
501
502 #pragma omp for schedule(static)
503 for (i=0;i<my_n;++i) {
504
505 register double max_offdiagonal = 0.;
506 register double sum_row=0;
507 register double main_row=0;
508 register index_t rtmp_offset=-A->mainBlock->pattern->ptr[i];
509 register dim_t kdeg=0;
510 register index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
511
512 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
513 register index_t j=A->mainBlock->pattern->index[iptr];
514 register double fnorm=0;
515 #pragma ivdep
516 for(bi=0;bi<n_block*n_block;++bi) {
517 register double rtmp2 = A->mainBlock->val[iptr*n_block*n_block+bi];
518 fnorm+=rtmp2*rtmp2;
519 }
520 fnorm=sqrt(fnorm);
521
522 rtmp[iptr+rtmp_offset]=fnorm;
523 if( j != i) {
524 max_offdiagonal = MAX(max_offdiagonal,fnorm);
525 sum_row+=fnorm;
526 } else {
527 main_row=fnorm;
528 }
529 }
530 rtmp_offset=A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]-A->col_coupleBlock->pattern->ptr[i];
531 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
532 register double fnorm=0;
533 #pragma ivdep
534 for(bi=0;bi<n_block*n_block;++bi) {
535 register double rtmp2 = A->col_coupleBlock->val[iptr*n_block*n_block+bi];
536 fnorm+=rtmp2*rtmp2;
537 }
538 fnorm=sqrt(fnorm);
539
540 rtmp[iptr+rtmp_offset]=fnorm;
541 max_offdiagonal = MAX(max_offdiagonal,fnorm);
542 sum_row+=fnorm;
543 }
544 {
545 const double threshold = theta*max_offdiagonal;
546
547 if (tau*main_row < sum_row) { /* no diagonal domainance */
548 rtmp_offset=-A->mainBlock->pattern->ptr[i];
549 #pragma ivdep
550 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
551 register index_t j=A->mainBlock->pattern->index[iptr];
552 if(rtmp[iptr+rtmp_offset] > threshold && i!=j) {
553 S[koffset+kdeg] = j;
554 kdeg++;
555 }
556 }
557 rtmp_offset=A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]-A->col_coupleBlock->pattern->ptr[i];
558 #pragma ivdep
559 for (iptr=A->col_coupleBlock->pattern->ptr[i]; iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
560 register index_t j=A->col_coupleBlock->pattern->index[iptr];
561 if(rtmp[iptr+rtmp_offset] > threshold) {
562 S[koffset+kdeg] = j + my_n;
563 kdeg++;
564 }
565 }
566 }
567 }
568 degree_S[i]=kdeg;
569 offset_S[i]=koffset;
570 }
571 TMPMEMFREE(rtmp);
572 } /* end of parallel region */
573
574 }
575
576 #ifdef AAAAA
577 /* the runge stueben coarsening algorithm: */
578 void Paso_Preconditioner_AMG_RungeStuebenSearch(const dim_t n, const index_t* offset_S,
579 const dim_t* degree_S, const index_t* S,
580 index_t*split_marker, const bool_t usePanel)
581 {
582
583 index_t *lambda=NULL, *ST=NULL, *notInPanel=NULL, *panel=NULL, lambda_max, lambda_k;
584 dim_t i,k, p, q, *degree_ST=NULL, len_panel, len_panel_new;
585 register index_t j, itmp;
586
587 if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */
588
589 lambda=TMPMEMALLOC(n, index_t); Esys_checkPtr(lambda);
590 degree_ST=TMPMEMALLOC(n, dim_t); Esys_checkPtr(degree_ST);
591 ST=TMPMEMALLOC(offset_S[n], index_t); Esys_checkPtr(ST);
592 if (usePanel) {
593 notInPanel=TMPMEMALLOC(n, bool_t); Esys_checkPtr(notInPanel);
594 panel=TMPMEMALLOC(n, index_t); Esys_checkPtr(panel);
595 }
596
597
598
599 if (Esys_noError() ) {
600 /* initialize split_marker and split_marker :*/
601 /* those unknows which are not influenced go into F, the rest is available for F or C */
602 #pragma omp parallel for private(i) schedule(static)
603 for (i=0;i<n;++i) {
604 degree_ST[i]=0;
605 if (degree_S[i]>0) {
606 lambda[i]=0;
607 split_marker[i]=PASO_AMG_UNDECIDED;
608 } else {
609 split_marker[i]=PASO_AMG_IN_F;
610 lambda[i]=-1;
611 }
612 }
613 /* create transpose :*/
614 for (i=0;i<n;++i) {
615 for (p=0; p<degree_S[i]; ++p) {
616 j=S[offset_S[i]+p];
617 ST[offset_S[j]+degree_ST[j]]=i;
618 degree_ST[j]++;
619 }
620 }
621 /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */
622 #pragma omp parallel for private(i, j, itmp) schedule(static)
623 for (i=0;i<n;++i) {
624 if (split_marker[i]==PASO_AMG_UNDECIDED) {
625 itmp=lambda[i];
626 for (p=0; p<degree_ST[i]; ++p) {
627 j=ST[offset_S[i]+p];
628 if (split_marker[j]==PASO_AMG_UNDECIDED) {
629 itmp++;
630 } else { /* at this point there are no C points */
631 itmp+=2;
632 }
633 }
634 lambda[i]=itmp;
635 }
636 }
637 if (usePanel) {
638 #pragma omp parallel for private(i) schedule(static)
639 for (i=0;i<n;++i) notInPanel[i]=TRUE;
640 }
641 /* start search :*/
642 i=Paso_Util_arg_max(n,lambda);
643 while (lambda[i]>-1) { /* is there any undecided unknown? */
644
645 if (usePanel) {
646 len_panel=0;
647 do {
648 /* the unknown i is moved to C */
649 split_marker[i]=PASO_AMG_IN_C;
650 lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
651
652 /* all undecided unknown strongly coupled to i are moved to F */
653 for (p=0; p<degree_ST[i]; ++p) {
654 j=ST[offset_S[i]+p];
655
656 if (split_marker[j]==PASO_AMG_UNDECIDED) {
657
658 split_marker[j]=PASO_AMG_IN_F;
659 lambda[j]=-1;
660
661 for (q=0; q<degree_ST[j]; ++q) {
662 k=ST[offset_S[j]+q];
663 if (split_marker[k]==PASO_AMG_UNDECIDED) {
664 lambda[k]++;
665 if (notInPanel[k]) {
666 notInPanel[k]=FALSE;
667 panel[len_panel]=k;
668 len_panel++;
669 }
670
671 } /* the unknown i is moved to C */
672 split_marker[i]=PASO_AMG_IN_C;
673 lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
674 }
675
676 }
677 }
678 for (p=0; p<degree_S[i]; ++p) {
679 j=S[offset_S[i]+p];
680 if (split_marker[j]==PASO_AMG_UNDECIDED) {
681 lambda[j]--;
682 if (notInPanel[j]) {
683 notInPanel[j]=FALSE;
684 panel[len_panel]=j;
685 len_panel++;
686 }
687 }
688 }
689
690 /* consolidate panel */
691 /* remove lambda[q]=-1 */
692 lambda_max=-1;
693 i=-1;
694 len_panel_new=0;
695 for (q=0; q<len_panel; q++) {
696 k=panel[q];
697 lambda_k=lambda[k];
698 if (split_marker[k]==PASO_AMG_UNDECIDED) {
699 panel[len_panel_new]=k;
700 len_panel_new++;
701
702 if (lambda_max == lambda_k) {
703 if (k<i) i=k;
704 } else if (lambda_max < lambda_k) {
705 lambda_max =lambda_k;
706 i=k;
707 }
708 }
709 }
710 len_panel=len_panel_new;
711 } while (len_panel>0);
712 } else {
713 /* the unknown i is moved to C */
714 split_marker[i]=PASO_AMG_IN_C;
715 lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
716
717 /* all undecided unknown strongly coupled to i are moved to F */
718 for (p=0; p<degree_ST[i]; ++p) {
719 j=ST[offset_S[i]+p];
720 if (split_marker[j]==PASO_AMG_UNDECIDED) {
721
722 split_marker[j]=PASO_AMG_IN_F;
723 lambda[j]=-1;
724
725 for (q=0; q<degree_ST[j]; ++q) {
726 k=ST[offset_S[j]+q];
727 if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;
728 }
729
730 }
731 }
732 for (p=0; p<degree_S[i]; ++p) {
733 j=S[offset_S[i]+p];
734 if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;
735 }
736
737 }
738 i=Paso_Util_arg_max(n,lambda);
739 }
740
741 }
742 TMPMEMFREE(lambda);
743 TMPMEMFREE(ST);
744 TMPMEMFREE(degree_ST);
745 TMPMEMFREE(panel);
746 TMPMEMFREE(notInPanel);
747 }
748 /* ensures that two F nodes are connected via a C node :*/
749 void Paso_Preconditioner_AMG_enforceFFConnectivity(const dim_t n, const index_t* offset_S,
750 const dim_t* degree_S, const index_t* S,
751 index_t*split_marker)
752 {
753 dim_t i, p, q;
754
755 /* now we make sure that two (strongly) connected F nodes are (strongly) connected via a C node. */
756 for (i=0;i<n;++i) {
757 if ( (split_marker[i]==PASO_AMG_IN_F) && (degree_S[i]>0) ) {
758 for (p=0; p<degree_S[i]; ++p) {
759 register index_t j=S[offset_S[i]+p];
760 if ( (split_marker[j]==PASO_AMG_IN_F) && (degree_S[j]>0) ) {
761 /* i and j are now two F nodes which are strongly connected */
762 /* is there a C node they share ? */
763 register index_t sharing=-1;
764 for (q=0; q<degree_S[i]; ++q) {
765 index_t k=S[offset_S[i]+q];
766 if (split_marker[k]==PASO_AMG_IN_C) {
767 register index_t* where_k=(index_t*)bsearch(&k, &(S[offset_S[j]]), degree_S[j], sizeof(index_t), Paso_comparIndex);
768 if (where_k != NULL) {
769 sharing=k;
770 break;
771 }
772 }
773 }
774 if (sharing<0) {
775 if (i<j) {
776 split_marker[j]=PASO_AMG_IN_C;
777 } else {
778 split_marker[i]=PASO_AMG_IN_C;
779 break; /* no point to look any further as i is now a C node */
780 }
781 }
782 }
783 }
784 }
785 }
786 }
787 #endif
788
789 #ifdef DFG
790 void Paso_Preconditioner_AMG_CIJPCoarsening( )
791 {
792
793
794 const dim_t my_n;
795 const dim_t overlap_n;
796 const dim_t n= my_n + overlap_n;
797
798
799 /* initialize split_marker and split_marker :*/
800 /* those unknows which are not influenced go into F, the rest is available for F or C */
801 #pragma omp parallel for private(i) schedule(static)
802 for (i=0;i<n;++i) {
803 degree_ST[i]=0;
804 if (degree_S[i]>0) {
805 lambda[i]=0;
806 split_marker[i]=PASO_AMG_UNDECIDED;
807 } else {
808 split_marker[i]=PASO_AMG_IN_F;
809 lambda[i]=-1;
810 }
811 }
812
813 /* set local lambda + overlap */
814 #pragma omp parallel for private(i)
815 for (i=0; i<n ++i) {
816 w[i]=degree_ST[i];
817 }
818 for (i=0; i<my_n; i++) {
819 w2[i]=random;
820 }
821
822
823 /* add noise to w */
824 Paso_Coupler_add(n, w, 1., w2, col_coupler);
825
826 /* */
827 global_n_C=0;
828 global_n_F=..;
829
830 while (global_n_C + global_n_F < global_n) {
831
832
833 is_in_D[i]=FALSE;
834 /* test local connectivit*/
835 /* w2[i] = max(w[k] | k in S_i or k in S^T_i */
836 #pragma omp parallel for private(i)
837 for (i=0; i<n; ++i) w2[i]=0;
838
839 for (i=0; i<my_n; ++i) {
840 for( iPtr =0 ; iPtr < degree_S[i]; ++iPtr) {
841 k=S[offset_S[i]+iPtr];
842 w2[i]=MAX(w2[i],w[k]);
843 w2[k]=MAX(w2[k],w[i]);
844 }
845 }
846 /* adjust overlaps by MAX */
847 Paso_Coupler_max(n, w2, col_coupler);
848
849 /* points with w[i]>w2[i] become C nodes */
850 }
851
852 }
853 #endif

  ViewVC Help
Powered by ViewVC 1.1.26