/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Contents of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3458 - (show annotations)
Mon Jan 31 07:06:42 2011 UTC (8 years, 7 months ago) by gross
File MIME type: text/plain
File size: 27682 byte(s)
more on AMG under MPI
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: AMG preconditioner (local version) */
18
19 /**************************************************************/
20
21 /* Author: artak@uq.edu.au, l.gross@uq.edu.au */
22
23 /**************************************************************/
24
25 #define SHOW_TIMING FALSE
26
27 #include "Paso.h"
28 #include "Preconditioner.h"
29 #include "Options.h"
30 #include "PasoUtil.h"
31 #include "UMFPACK.h"
32 #include "MKL.h"
33 #include<stdio.h>
34
35
36 /**************************************************************/
37
38 /* free all memory used by AMG */
39
40 void Paso_Preconditioner_AMG_free(Paso_Preconditioner_AMG * in) {
41 if (in!=NULL) {
42 Paso_Preconditioner_Smoother_free(in->Smoother);
43 Paso_SystemMatrix_free(in->P);
44 Paso_SystemMatrix_free(in->R);
45 Paso_SystemMatrix_free(in->A_C);
46 Paso_Preconditioner_AMG_free(in->AMG_C);
47 MEMFREE(in->r);
48 MEMFREE(in->x_C);
49 MEMFREE(in->b_C);
50
51 MEMFREE(in);
52 }
53 }
54
55 index_t Paso_Preconditioner_AMG_getMaxLevel(const Paso_Preconditioner_AMG * in) {
56 if (in->AMG_C == NULL) {
57 return in->level;
58 } else {
59 return Paso_Preconditioner_AMG_getMaxLevel(in->AMG_C);
60 }
61 }
62 double Paso_Preconditioner_AMG_getCoarseLevelSparsity(const Paso_Preconditioner_AMG * in) {
63 if (in->AMG_C == NULL) {
64 if (in->A_C == NULL) {
65 return 1.;
66 } else {
67 return Paso_SystemMatrix_getSparsity(in->A_C);
68 }
69 } else {
70 return Paso_Preconditioner_AMG_getCoarseLevelSparsity(in->AMG_C);
71 }
72 }
73 dim_t Paso_Preconditioner_AMG_getNumCoarseUnknwons(const Paso_Preconditioner_AMG * in) {
74 if (in->AMG_C == NULL) {
75 if (in->A_C == NULL) {
76 return 0;
77 } else {
78 return Paso_SystemMatrix_getTotalNumRows(in->A_C);
79 }
80 } else {
81 return Paso_Preconditioner_AMG_getNumCoarseUnknwons(in->AMG_C);
82 }
83 }
84 /*****************************************************************
85
86 constructs AMG
87
88 ******************************************************************/
89 Paso_Preconditioner_AMG* Paso_Preconditioner_AMG_alloc(Paso_SystemMatrix *A_p,dim_t level,Paso_Options* options) {
90
91 Paso_Preconditioner_AMG* out=NULL;
92 bool_t verbose=options->verbose;
93
94 const dim_t my_n=Paso_SystemMatrix_getNumRows(A_p);
95 const dim_t overlap_n=Paso_SystemMatrix_getColOverlap(A_p);
96 const dim_t n = my_n + overlap_n;
97
98 const dim_t n_block=A_p->row_block_size;
99 index_t* F_marker=NULL, *counter=NULL;
100 dim_t i;
101 double time0=0;
102 const double theta = options->coarsening_threshold;
103 const double tau = options->diagonal_dominance_threshold;
104 const double sparsity=Paso_SystemMatrix_getSparsity(A_p);
105 const dim_t total_n=Paso_SystemMatrix_getGlobalTotalNumRows(A_p);
106
107
108 /*
109 is the input matrix A suitable for coarsening
110
111 */
112 if ( (sparsity >= options->min_coarse_sparsity) ||
113 (total_n <= options->min_coarse_matrix_size) ||
114 (level > options->level_max) ) {
115
116 if (verbose) {
117 /*
118 print stopping condition:
119 - 'SPAR' = min_coarse_matrix_sparsity exceeded
120 - 'SIZE' = min_coarse_matrix_size exceeded
121 - 'LEVEL' = level_max exceeded
122 */
123 printf("Paso_Preconditioner: AMG: termination of coarsening by ");
124
125 if (sparsity >= options->min_coarse_sparsity)
126 printf("SPAR ");
127
128 if (total_n <= options->min_coarse_matrix_size)
129 printf("SIZE ");
130
131 if (level > options->level_max)
132 printf("LEVEL ");
133
134 printf("\n");
135
136 printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
137 level, options->level_max, sparsity, options->min_coarse_sparsity, total_n, options->min_coarse_matrix_size);
138
139 }
140
141 return NULL;
142 } else {
143 /* Start Coarsening : */
144 const dim_t len_S=A_p->mainBlock->pattern->len+A_p->col_coupleBlock->pattern->len;
145 dim_t* degree_S=TMPMEMALLOC(my_n, dim_t);
146 index_t *offset_S=TMPMEMALLOC(my_n, index_t);
147 index_t *S=TMPMEMALLOC(len_S, index_t);
148
149 F_marker=TMPMEMALLOC(n,index_t);
150 counter=TMPMEMALLOC(n,index_t);
151
152 if ( !( Esys_checkPtr(F_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree_S) || Esys_checkPtr(offset_S) || Esys_checkPtr(S) ) ) {
153 /*
154 make sure that corresponding values in the row_coupleBlock and col_coupleBlock are identical
155 */
156 Paso_SystemMatrix_copyColCoupleBlock(A_p);
157
158 /*
159 set splitting of unknows:
160
161 */
162 time0=Esys_timer();
163 if (n_block>1) {
164 Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree_S, offset_S, S, theta,tau);
165 } else {
166 Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, offset_S, S, theta,tau);
167 }
168 Esys_setError(SYSTEM_ERROR, "AMG:DONE.");
169 return NULL;
170 {
171 dim_t p;
172 for (i=0; i< my_n; ++i) {
173 printf("%d: ",i);
174 for (p=0; p<degree_S[i];++p) printf("%d ",S[offset_S[i]+p]);
175 printf("\n");
176 }
177 }
178
179
180 /*MPI:
181 Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree_S, S, F_marker, options->usePanel);
182 */
183
184 /* in BoomerAMG interpolation is used FF connectiovity is required :*/
185 /*MPI:
186 if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
187 Paso_Preconditioner_AMG_enforceFFConnectivity(n, A_p->pattern->ptr, degree_S, S, F_marker);
188 */
189
190 options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
191
192 #ifdef AAAAA
193 if (Esys_noError() ) {
194 #pragma omp parallel for private(i) schedule(static)
195 for (i = 0; i < n; ++i) F_marker[i]=(F_marker[i] == PASO_AMG_IN_F);
196
197 /*
198 count number of unkowns to be eliminated:
199 */
200 n_F=Paso_Util_cumsum_maskedTrue(n,counter, F_marker);
201 n_C=n-n_F;
202 if (verbose) printf("Paso_Preconditioner: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);
203
204 if ( n_F == 0 ) { /* is a nasty case. a direct solver should be used, return NULL */
205 out = NULL;
206 } else {
207 out=MEMALLOC(1,Paso_Preconditioner_AMG);
208 if (! Esys_checkPtr(out)) {
209 out->level = level;
210 out->n = n;
211 out->n_F = n_F;
212 out->n_block = n_block;
213 out->A_C = NULL;
214 out->P = NULL;
215 out->R = NULL;
216 out->post_sweeps = options->post_sweeps;
217 out->pre_sweeps = options->pre_sweeps;
218 out->r = NULL;
219 out->x_C = NULL;
220 out->b_C = NULL;
221 out->AMG_C = NULL;
222 out->Smoother=NULL;
223 }
224 mask_C=TMPMEMALLOC(n,index_t);
225 rows_in_F=TMPMEMALLOC(n_F,index_t);
226 Esys_checkPtr(mask_C);
227 Esys_checkPtr(rows_in_F);
228 if ( Esys_noError() ) {
229
230 out->Smoother = Paso_Preconditioner_Smoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
231
232 if (n_C != 0) {
233 /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
234
235 /* allocate helpers :*/
236 out->x_C=MEMALLOC(n_block*n_C,double);
237 out->b_C=MEMALLOC(n_block*n_C,double);
238 out->r=MEMALLOC(n_block*n,double);
239
240 Esys_checkPtr(out->r);
241 Esys_checkPtr(out->x_C);
242 Esys_checkPtr(out->b_C);
243
244 if ( Esys_noError() ) {
245 /* creates index for F:*/
246 #pragma omp parallel private(i)
247 {
248 #pragma omp for schedule(static)
249 for (i = 0; i < n; ++i) {
250 if (F_marker[i]) rows_in_F[counter[i]]=i;
251 }
252 }
253 /* create mask of C nodes with value >-1 gives new id */
254 i=Paso_Util_cumsum_maskedFalse(n,counter, F_marker);
255
256 #pragma omp parallel for private(i) schedule(static)
257 for (i = 0; i < n; ++i) {
258 if (F_marker[i]) {
259 mask_C[i]=-1;
260 } else {
261 mask_C[i]=counter[i];;
262 }
263 }
264 /*
265 get Prolongation :
266 */
267 time0=Esys_timer();
268 /*MPI:
269 out->P=Paso_Preconditioner_AMG_getProlongation(A_p,A_p->pattern->ptr, degree_S,S,n_C,mask_C, options->interpolation_method);
270 */
271 if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);
272 }
273 /*
274 construct Restriction operator as transposed of Prolongation operator:
275 */
276 if ( Esys_noError()) {
277 time0=Esys_timer();
278 /*MPI:
279 out->R=Paso_SystemMatrix_getTranspose(out->P);
280 */
281 if (SHOW_TIMING) printf("timing: level %d: Paso_SystemMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
282 }
283 /*
284 construct coarse level matrix:
285 */
286 if ( Esys_noError()) {
287 time0=Esys_timer();
288 /*MPI:
289 Atemp=Paso_SystemMatrix_MatrixMatrix(A_p,out->P);
290 A_C=Paso_SystemMatrix_MatrixMatrix(out->R,Atemp);
291
292 Paso_SystemMatrix_free(Atemp);
293 */
294
295 if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);
296 }
297
298
299 /*
300 constructe courser level:
301
302 */
303 if ( Esys_noError()) {
304 out->AMG_C=Paso_Preconditioner_AMG_alloc(A_C,level+1,options);
305 }
306 if ( Esys_noError()) {
307 if ( out->AMG_C == NULL ) {
308 out->reordering = options->reordering;
309 out->refinements = options->coarse_matrix_refinements;
310 /* no coarse level matrix has been constructed. use direct solver */
311 #ifdef MKL
312 out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);
313 Paso_SystemMatrix_free(A_C);
314 out->A_C->solver_package = PASO_MKL;
315 if (verbose) printf("Paso_Preconditioner: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
316 #else
317 #ifdef UMFPACK
318 out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);
319 Paso_SystemMatrix_free(A_C);
320 out->A_C->solver_package = PASO_UMFPACK;
321 if (verbose) printf("Paso_Preconditioner: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
322 #else
323 out->A_C=A_C;
324 out->A_C->solver_p=Paso_Preconditioner_Smoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);
325 out->A_C->solver_package = PASO_SMOOTHER;
326 if (verbose) printf("Paso_Preconditioner: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
327 #endif
328 #endif
329 } else {
330 /* finally we set some helpers for the solver step */
331 out->A_C=A_C;
332 }
333 }
334 }
335 }
336 TMPMEMFREE(mask_C);
337 TMPMEMFREE(rows_in_F);
338 }
339 }
340 #endif
341
342 }
343 TMPMEMFREE(counter);
344 TMPMEMFREE(F_marker);
345 TMPMEMFREE(degree_S);
346 TMPMEMFREE(offset_S);
347 TMPMEMFREE(S);
348 }
349
350 if (Esys_noError()) {
351 return out;
352 } else {
353 Paso_Preconditioner_AMG_free(out);
354 return NULL;
355 }
356 }
357
358
359 void Paso_Preconditioner_AMG_solve(Paso_SystemMatrix* A, Paso_Preconditioner_AMG * amg, double * x, double * b) {
360 const dim_t n = amg->n * amg->n_block;
361 double time0=0;
362 const dim_t post_sweeps=amg->post_sweeps;
363 const dim_t pre_sweeps=amg->pre_sweeps;
364
365 /* presmoothing */
366 time0=Esys_timer();
367 Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
368 time0=Esys_timer()-time0;
369 if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);
370 /* end of presmoothing */
371
372 if (amg->n_F < amg->n) { /* is there work on the coarse level? */
373 time0=Esys_timer();
374 Paso_Copy(n, amg->r, b); /* r <- b */
375 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
376 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,amg->r,0.,amg->b_C); /* b_c = R*r */
377 time0=Esys_timer()-time0;
378 /* coarse level solve */
379 if ( amg->AMG_C == NULL) {
380 time0=Esys_timer();
381 /* A_C is the coarsest level */
382 #ifdef FIXME
383 switch (amg->A_C->solver_package) {
384 case (PASO_MKL):
385 Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);
386 break;
387 case (PASO_UMFPACK):
388 Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);
389 break;
390 case (PASO_SMOOTHER):
391 Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
392 break;
393 }
394 #endif
395 Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
396 if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
397 } else {
398 Paso_Preconditioner_AMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C) */
399 }
400 time0=time0+Esys_timer();
401 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */
402
403 /*postsmoothing*/
404
405 /*solve Ax=b with initial guess x */
406 time0=Esys_timer();
407 Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
408 time0=Esys_timer()-time0;
409 if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
410 /*end of postsmoothing*/
411
412 }
413 return;
414 }
415
416 /* theta = threshold for strong connections */
417 /* tau = threshold for diagonal dominance */
418
419 /*S_i={j \in N_i; i strongly coupled to j}
420
421 in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
422 */
423
424 void Paso_Preconditioner_AMG_setStrongConnections(Paso_SystemMatrix* A,
425 dim_t *degree_S, index_t *offset_S, index_t *S,
426 const double theta, const double tau)
427 {
428 const dim_t my_n=Paso_SystemMatrix_getNumRows(A);
429 index_t iptr, i;
430
431
432 #pragma omp parallel for private(i,iptr) schedule(static)
433 for (i=0;i<my_n;++i) {
434 register double max_offdiagonal = 0.;
435 register double sum_row=0;
436 register double main_row=0;
437 register dim_t kdeg=0;
438 register index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
439 #pragma ivdep
440 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
441 register index_t j=A->mainBlock->pattern->index[iptr];
442 register double fnorm=ABS(A->mainBlock->val[iptr]);
443
444 if( j != i) {
445 max_offdiagonal = MAX(max_offdiagonal,fnorm);
446 sum_row+=fnorm;
447 } else {
448 main_row=fnorm;
449 }
450
451 }
452 #pragma ivdep
453 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
454 register double fnorm=ABS(A->col_coupleBlock->val[iptr]);
455 max_offdiagonal = MAX(max_offdiagonal,fnorm);
456 sum_row+=fnorm;
457 }
458
459 {
460 const double threshold = theta*max_offdiagonal;
461 if (tau*main_row < sum_row) { /* no diagonal domainance */
462 #pragma ivdep
463 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
464 register index_t j=A->mainBlock->pattern->index[iptr];
465 if(ABS(A->mainBlock->val[iptr])>threshold && i!=j) {
466 S[koffset+kdeg] = j;
467 kdeg++;
468 }
469 }
470 #pragma ivdep
471 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
472 register index_t j=A->col_coupleBlock->pattern->index[iptr];
473 if(ABS(A->col_coupleBlock->val[iptr])>threshold) {
474 S[koffset+kdeg] = j + my_n;
475 kdeg++;
476 }
477 }
478 }
479 }
480 offset_S[i]=koffset;
481 degree_S[i]=kdeg;
482 }
483 }
484
485 /* theta = threshold for strong connections */
486 /* tau = threshold for diagonal dominance */
487 /*S_i={j \in N_i; i strongly coupled to j}
488
489 in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
490 */
491 void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SystemMatrix* A,
492 dim_t *degree_S, index_t *offset_S, index_t *S,
493 const double theta, const double tau)
494
495 {
496 const dim_t my_n=Paso_SystemMatrix_getNumRows(A);
497 index_t iptr, i, bi;
498 const dim_t n_block=A->row_block_size;
499
500
501 #pragma omp parallel private(i,iptr, bi)
502 {
503 dim_t max_deg=0;
504 double *rtmp=TMPMEMALLOC(max_deg, double);
505
506 #pragma omp for schedule(static)
507 for (i=0;i<my_n;++i) max_deg=MAX(max_deg, A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]
508 +A->col_coupleBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i]);
509
510
511 #pragma omp for schedule(static)
512 for (i=0;i<my_n;++i) {
513
514 register double max_offdiagonal = 0.;
515 register double sum_row=0;
516 register double main_row=0;
517 register index_t rtmp_offset=-A->mainBlock->pattern->ptr[i];
518 register dim_t kdeg=0;
519 register index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
520
521 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
522 register index_t j=A->mainBlock->pattern->index[iptr];
523 register double fnorm=0;
524 #pragma ivdep
525 for(bi=0;bi<n_block*n_block;++bi) {
526 register double rtmp2 = A->mainBlock->val[iptr*n_block*n_block+bi];
527 fnorm+=rtmp2*rtmp2;
528 }
529 fnorm=sqrt(fnorm);
530
531 rtmp[iptr+rtmp_offset]=fnorm;
532 if( j != i) {
533 max_offdiagonal = MAX(max_offdiagonal,fnorm);
534 sum_row+=fnorm;
535 } else {
536 main_row=fnorm;
537 }
538 }
539 rtmp_offset=A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]-A->col_coupleBlock->pattern->ptr[i];
540 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
541 register double fnorm=0;
542 #pragma ivdep
543 for(bi=0;bi<n_block*n_block;++bi) {
544 register double rtmp2 = A->col_coupleBlock->val[iptr*n_block*n_block+bi];
545 fnorm+=rtmp2*rtmp2;
546 }
547 fnorm=sqrt(fnorm);
548
549 rtmp[iptr+rtmp_offset]=fnorm;
550 max_offdiagonal = MAX(max_offdiagonal,fnorm);
551 sum_row+=fnorm;
552 }
553 {
554 const double threshold = theta*max_offdiagonal;
555
556 if (tau*main_row < sum_row) { /* no diagonal domainance */
557 rtmp_offset=-A->mainBlock->pattern->ptr[i];
558 #pragma ivdep
559 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
560 register index_t j=A->mainBlock->pattern->index[iptr];
561 if(rtmp[iptr+rtmp_offset] > threshold && i!=j) {
562 S[koffset+kdeg] = j;
563 kdeg++;
564 }
565 }
566 rtmp_offset=A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]-A->col_coupleBlock->pattern->ptr[i];
567 #pragma ivdep
568 for (iptr=A->col_coupleBlock->pattern->ptr[i]; iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
569 register index_t j=A->col_coupleBlock->pattern->index[iptr];
570 if(rtmp[iptr+rtmp_offset] > threshold) {
571 S[koffset+kdeg] = j + my_n;
572 kdeg++;
573 }
574 }
575 }
576 }
577 degree_S[i]=kdeg;
578 offset_S[i]=koffset;
579 }
580 TMPMEMFREE(rtmp);
581 } /* end of parallel region */
582
583 }
584
585 #ifdef AAAAA
586 /* the runge stueben coarsening algorithm: */
587 void Paso_Preconditioner_AMG_RungeStuebenSearch(const dim_t n, const index_t* offset_S,
588 const dim_t* degree_S, const index_t* S,
589 index_t*split_marker, const bool_t usePanel)
590 {
591
592 index_t *lambda=NULL, *ST=NULL, *notInPanel=NULL, *panel=NULL, lambda_max, lambda_k;
593 dim_t i,k, p, q, *degree_ST=NULL, len_panel, len_panel_new;
594 register index_t j, itmp;
595
596 if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */
597
598 lambda=TMPMEMALLOC(n, index_t); Esys_checkPtr(lambda);
599 degree_ST=TMPMEMALLOC(n, dim_t); Esys_checkPtr(degree_ST);
600 ST=TMPMEMALLOC(offset_S[n], index_t); Esys_checkPtr(ST);
601 if (usePanel) {
602 notInPanel=TMPMEMALLOC(n, bool_t); Esys_checkPtr(notInPanel);
603 panel=TMPMEMALLOC(n, index_t); Esys_checkPtr(panel);
604 }
605
606
607
608 if (Esys_noError() ) {
609 /* initialize split_marker and split_marker :*/
610 /* those unknows which are not influenced go into F, the rest is available for F or C */
611 #pragma omp parallel for private(i) schedule(static)
612 for (i=0;i<n;++i) {
613 degree_ST[i]=0;
614 if (degree_S[i]>0) {
615 lambda[i]=0;
616 split_marker[i]=PASO_AMG_UNDECIDED;
617 } else {
618 split_marker[i]=PASO_AMG_IN_F;
619 lambda[i]=-1;
620 }
621 }
622 /* create transpose :*/
623 for (i=0;i<n;++i) {
624 for (p=0; p<degree_S[i]; ++p) {
625 j=S[offset_S[i]+p];
626 ST[offset_S[j]+degree_ST[j]]=i;
627 degree_ST[j]++;
628 }
629 }
630 /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */
631 #pragma omp parallel for private(i, j, itmp) schedule(static)
632 for (i=0;i<n;++i) {
633 if (split_marker[i]==PASO_AMG_UNDECIDED) {
634 itmp=lambda[i];
635 for (p=0; p<degree_ST[i]; ++p) {
636 j=ST[offset_S[i]+p];
637 if (split_marker[j]==PASO_AMG_UNDECIDED) {
638 itmp++;
639 } else { /* at this point there are no C points */
640 itmp+=2;
641 }
642 }
643 lambda[i]=itmp;
644 }
645 }
646 if (usePanel) {
647 #pragma omp parallel for private(i) schedule(static)
648 for (i=0;i<n;++i) notInPanel[i]=TRUE;
649 }
650 /* start search :*/
651 i=Paso_Util_arg_max(n,lambda);
652 while (lambda[i]>-1) { /* is there any undecided unknown? */
653
654 if (usePanel) {
655 len_panel=0;
656 do {
657 /* the unknown i is moved to C */
658 split_marker[i]=PASO_AMG_IN_C;
659 lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
660
661 /* all undecided unknown strongly coupled to i are moved to F */
662 for (p=0; p<degree_ST[i]; ++p) {
663 j=ST[offset_S[i]+p];
664
665 if (split_marker[j]==PASO_AMG_UNDECIDED) {
666
667 split_marker[j]=PASO_AMG_IN_F;
668 lambda[j]=-1;
669
670 for (q=0; q<degree_ST[j]; ++q) {
671 k=ST[offset_S[j]+q];
672 if (split_marker[k]==PASO_AMG_UNDECIDED) {
673 lambda[k]++;
674 if (notInPanel[k]) {
675 notInPanel[k]=FALSE;
676 panel[len_panel]=k;
677 len_panel++;
678 }
679
680 } /* the unknown i is moved to C */
681 split_marker[i]=PASO_AMG_IN_C;
682 lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
683 }
684
685 }
686 }
687 for (p=0; p<degree_S[i]; ++p) {
688 j=S[offset_S[i]+p];
689 if (split_marker[j]==PASO_AMG_UNDECIDED) {
690 lambda[j]--;
691 if (notInPanel[j]) {
692 notInPanel[j]=FALSE;
693 panel[len_panel]=j;
694 len_panel++;
695 }
696 }
697 }
698
699 /* consolidate panel */
700 /* remove lambda[q]=-1 */
701 lambda_max=-1;
702 i=-1;
703 len_panel_new=0;
704 for (q=0; q<len_panel; q++) {
705 k=panel[q];
706 lambda_k=lambda[k];
707 if (split_marker[k]==PASO_AMG_UNDECIDED) {
708 panel[len_panel_new]=k;
709 len_panel_new++;
710
711 if (lambda_max == lambda_k) {
712 if (k<i) i=k;
713 } else if (lambda_max < lambda_k) {
714 lambda_max =lambda_k;
715 i=k;
716 }
717 }
718 }
719 len_panel=len_panel_new;
720 } while (len_panel>0);
721 } else {
722 /* the unknown i is moved to C */
723 split_marker[i]=PASO_AMG_IN_C;
724 lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
725
726 /* all undecided unknown strongly coupled to i are moved to F */
727 for (p=0; p<degree_ST[i]; ++p) {
728 j=ST[offset_S[i]+p];
729 if (split_marker[j]==PASO_AMG_UNDECIDED) {
730
731 split_marker[j]=PASO_AMG_IN_F;
732 lambda[j]=-1;
733
734 for (q=0; q<degree_ST[j]; ++q) {
735 k=ST[offset_S[j]+q];
736 if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;
737 }
738
739 }
740 }
741 for (p=0; p<degree_S[i]; ++p) {
742 j=S[offset_S[i]+p];
743 if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;
744 }
745
746 }
747 i=Paso_Util_arg_max(n,lambda);
748 }
749
750 }
751 TMPMEMFREE(lambda);
752 TMPMEMFREE(ST);
753 TMPMEMFREE(degree_ST);
754 TMPMEMFREE(panel);
755 TMPMEMFREE(notInPanel);
756 }
757 /* ensures that two F nodes are connected via a C node :*/
758 void Paso_Preconditioner_AMG_enforceFFConnectivity(const dim_t n, const index_t* offset_S,
759 const dim_t* degree_S, const index_t* S,
760 index_t*split_marker)
761 {
762 dim_t i, p, q;
763
764 /* now we make sure that two (strongly) connected F nodes are (strongly) connected via a C node. */
765 for (i=0;i<n;++i) {
766 if ( (split_marker[i]==PASO_AMG_IN_F) && (degree_S[i]>0) ) {
767 for (p=0; p<degree_S[i]; ++p) {
768 register index_t j=S[offset_S[i]+p];
769 if ( (split_marker[j]==PASO_AMG_IN_F) && (degree_S[j]>0) ) {
770 /* i and j are now two F nodes which are strongly connected */
771 /* is there a C node they share ? */
772 register index_t sharing=-1;
773 for (q=0; q<degree_S[i]; ++q) {
774 index_t k=S[offset_S[i]+q];
775 if (split_marker[k]==PASO_AMG_IN_C) {
776 register index_t* where_k=(index_t*)bsearch(&k, &(S[offset_S[j]]), degree_S[j], sizeof(index_t), Paso_comparIndex);
777 if (where_k != NULL) {
778 sharing=k;
779 break;
780 }
781 }
782 }
783 if (sharing<0) {
784 if (i<j) {
785 split_marker[j]=PASO_AMG_IN_C;
786 } else {
787 split_marker[i]=PASO_AMG_IN_C;
788 break; /* no point to look any further as i is now a C node */
789 }
790 }
791 }
792 }
793 }
794 }
795 }
796 #endif
797
798 #ifdef DFG
799 void Paso_Preconditioner_AMG_CIJPCoarsening( )
800 {
801
802
803 const dim_t my_n;
804 const dim_t overlap_n;
805 const dim_t n= my_n + overlap_n;
806
807
808 /* initialize split_marker and split_marker :*/
809 /* those unknows which are not influenced go into F, the rest is available for F or C */
810 #pragma omp parallel for private(i) schedule(static)
811 for (i=0;i<n;++i) {
812 degree_ST[i]=0;
813 if (degree_S[i]>0) {
814 lambda[i]=0;
815 split_marker[i]=PASO_AMG_UNDECIDED;
816 } else {
817 split_marker[i]=PASO_AMG_IN_F;
818 lambda[i]=-1;
819 }
820 }
821
822 /* set local lambda + overlap */
823 #pragma omp parallel for private(i)
824 for (i=0; i<n ++i) {
825 w[i]=degree_ST[i];
826 }
827 for (i=0; i<my_n; i++) {
828 w2[i]=random;
829 }
830
831
832 /* add noise to w */
833 Paso_Coupler_add(n, w, 1., w2, col_coupler);
834
835 /* */
836 global_n_C=0;
837 global_n_F=..;
838
839 while (global_n_C + global_n_F < global_n) {
840
841
842 is_in_D[i]=FALSE;
843 /* test local connectivit*/
844 /* w2[i] = max(w[k] | k in S_i or k in S^T_i */
845 #pragma omp parallel for private(i)
846 for (i=0; i<n; ++i) w2[i]=0;
847
848 for (i=0; i<my_n; ++i) {
849 for( iPtr =0 ; iPtr < degree_S[i]; ++iPtr) {
850 k=S[offset_S[i]+iPtr];
851 w2[i]=MAX(w2[i],w[k]);
852 w2[k]=MAX(w2[k],w[i]);
853 }
854 }
855 /* adjust overlaps by MAX */
856 Paso_Coupler_max(n, w2, col_coupler);
857
858 /* points with w[i]>w2[i] become C nodes */
859 }
860
861 }
862 #endif

  ViewVC Help
Powered by ViewVC 1.1.26