/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Contents of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3449 - (show annotations)
Fri Jan 21 05:48:15 2011 UTC (8 years, 10 months ago) by gross
File MIME type: text/plain
File size: 27513 byte(s)
compile problem in AMG fixed.
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: AMG preconditioner (local version) */
18
19 /**************************************************************/
20
21 /* Author: artak@uq.edu.au, l.gross@uq.edu.au */
22
23 /**************************************************************/
24
25 #define SHOW_TIMING FALSE
26
27 #include "Paso.h"
28 #include "Preconditioner.h"
29 #include "Options.h"
30 #include "PasoUtil.h"
31 #include "UMFPACK.h"
32 #include "MKL.h"
33
34 /**************************************************************/
35
36 /* free all memory used by AMG */
37
38 void Paso_Preconditioner_AMG_free(Paso_Preconditioner_AMG * in) {
39 if (in!=NULL) {
40 Paso_Preconditioner_Smoother_free(in->Smoother);
41 Paso_SystemMatrix_free(in->P);
42 Paso_SystemMatrix_free(in->R);
43 Paso_SystemMatrix_free(in->A_C);
44 Paso_Preconditioner_AMG_free(in->AMG_C);
45 MEMFREE(in->r);
46 MEMFREE(in->x_C);
47 MEMFREE(in->b_C);
48
49 MEMFREE(in);
50 }
51 }
52
53 index_t Paso_Preconditioner_AMG_getMaxLevel(const Paso_Preconditioner_AMG * in) {
54 if (in->AMG_C == NULL) {
55 return in->level;
56 } else {
57 return Paso_Preconditioner_AMG_getMaxLevel(in->AMG_C);
58 }
59 }
60 double Paso_Preconditioner_AMG_getCoarseLevelSparsity(const Paso_Preconditioner_AMG * in) {
61 if (in->AMG_C == NULL) {
62 if (in->A_C == NULL) {
63 return 1.;
64 } else {
65 return Paso_SystemMatrix_getSparsity(in->A_C);
66 }
67 } else {
68 return Paso_Preconditioner_AMG_getCoarseLevelSparsity(in->AMG_C);
69 }
70 }
71 dim_t Paso_Preconditioner_AMG_getNumCoarseUnknwons(const Paso_Preconditioner_AMG * in) {
72 if (in->AMG_C == NULL) {
73 if (in->A_C == NULL) {
74 return 0;
75 } else {
76 return Paso_SystemMatrix_getTotalNumRows(in->A_C);
77 }
78 } else {
79 return Paso_Preconditioner_AMG_getNumCoarseUnknwons(in->AMG_C);
80 }
81 }
82 /*****************************************************************
83
84 constructs AMG
85
86 ******************************************************************/
87 Paso_Preconditioner_AMG* Paso_Preconditioner_AMG_alloc(Paso_SystemMatrix *A_p,dim_t level,Paso_Options* options) {
88
89 Paso_Preconditioner_AMG* out=NULL;
90 bool_t verbose=options->verbose;
91 Paso_SystemMatrix *Atemp=NULL, *A_C=NULL;
92
93 const dim_t my_n=Paso_SystemMatrix_getNumRows(A_p);
94 const dim_t overlap_n=Paso_SystemMatrix_getColOverlap(A_p);
95 const dim_t n = my_n + overlap_n;
96
97 const dim_t n_block=A_p->row_block_size;
98 index_t* F_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F=NULL;
99 dim_t n_F=0, n_C=0, i;
100 double time0=0;
101 const double theta = options->coarsening_threshold;
102 const double tau = options->diagonal_dominance_threshold;
103 const double sparsity=Paso_SystemMatrix_getSparsity(A_p);
104 const dim_t total_n=Paso_SystemMatrix_getGlobalTotalNumRows(A_p);
105
106
107 /*
108 is the input matrix A suitable for coarsening
109
110 */
111 if ( (sparsity >= options->min_coarse_sparsity) ||
112 (total_n <= options->min_coarse_matrix_size) ||
113 (level > options->level_max) ) {
114
115 if (verbose) {
116 /*
117 print stopping condition:
118 - 'SPAR' = min_coarse_matrix_sparsity exceeded
119 - 'SIZE' = min_coarse_matrix_size exceeded
120 - 'LEVEL' = level_max exceeded
121 */
122 printf("Paso_Preconditioner: AMG: termination of coarsening by ");
123
124 if (sparsity >= options->min_coarse_sparsity)
125 printf("SPAR ");
126
127 if (total_n <= options->min_coarse_matrix_size)
128 printf("SIZE ");
129
130 if (level > options->level_max)
131 printf("LEVEL ");
132
133 printf("\n");
134
135 printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
136 level, options->level_max, sparsity, options->min_coarse_sparsity, total_n, options->min_coarse_matrix_size);
137
138 }
139
140 return NULL;
141 } else {
142 /* Start Coarsening : */
143 const dim_t len_S=A_p->mainBlock->pattern->len+A_p->col_coupleBlock->pattern->len;
144
145 F_marker=TMPMEMALLOC(n,index_t);
146 counter=TMPMEMALLOC(n,index_t);
147
148 dim_t* degree_S=TMPMEMALLOC(my_n, dim_t);
149 index_t *offset_S=TMPMEMALLOC(my_n, index_t);
150 index_t *S=TMPMEMALLOC(len_S, index_t);
151 if ( !( Esys_checkPtr(F_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree_S) || Esys_checkPtr(offset_S) || Esys_checkPtr(S) ) ) {
152 /*
153 set splitting of unknows:
154
155 */
156 time0=Esys_timer();
157 if (n_block>1) {
158 Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree_S, offset_S, S, theta,tau);
159 } else {
160 Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, offset_S, S, theta,tau);
161 }
162 {
163 dim_t p;
164 for (i=0; i< my_n; ++i) {
165 printf("%d: ",i);
166 for (p=0; p<degree_S[i];++p) printf("%d ",S[offset_S[i]+p]);
167 printf("\n");
168 }
169 }
170
171
172 /*MPI:
173 Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree_S, S, F_marker, options->usePanel);
174 */
175
176 /* in BoomerAMG interpolation is used FF connectiovity is required :*/
177 /*MPI:
178 if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
179 Paso_Preconditioner_AMG_enforceFFConnectivity(n, A_p->pattern->ptr, degree_S, S, F_marker);
180 */
181
182 options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
183
184 #ifdef AAAAA
185 if (Esys_noError() ) {
186 #pragma omp parallel for private(i) schedule(static)
187 for (i = 0; i < n; ++i) F_marker[i]=(F_marker[i] == PASO_AMG_IN_F);
188
189 /*
190 count number of unkowns to be eliminated:
191 */
192 n_F=Paso_Util_cumsum_maskedTrue(n,counter, F_marker);
193 n_C=n-n_F;
194 if (verbose) printf("Paso_Preconditioner: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);
195
196 if ( n_F == 0 ) { /* is a nasty case. a direct solver should be used, return NULL */
197 out = NULL;
198 } else {
199 out=MEMALLOC(1,Paso_Preconditioner_AMG);
200 if (! Esys_checkPtr(out)) {
201 out->level = level;
202 out->n = n;
203 out->n_F = n_F;
204 out->n_block = n_block;
205 out->A_C = NULL;
206 out->P = NULL;
207 out->R = NULL;
208 out->post_sweeps = options->post_sweeps;
209 out->pre_sweeps = options->pre_sweeps;
210 out->r = NULL;
211 out->x_C = NULL;
212 out->b_C = NULL;
213 out->AMG_C = NULL;
214 out->Smoother=NULL;
215 }
216 mask_C=TMPMEMALLOC(n,index_t);
217 rows_in_F=TMPMEMALLOC(n_F,index_t);
218 Esys_checkPtr(mask_C);
219 Esys_checkPtr(rows_in_F);
220 if ( Esys_noError() ) {
221
222 out->Smoother = Paso_Preconditioner_Smoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
223
224 if (n_C != 0) {
225 /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
226
227 /* allocate helpers :*/
228 out->x_C=MEMALLOC(n_block*n_C,double);
229 out->b_C=MEMALLOC(n_block*n_C,double);
230 out->r=MEMALLOC(n_block*n,double);
231
232 Esys_checkPtr(out->r);
233 Esys_checkPtr(out->x_C);
234 Esys_checkPtr(out->b_C);
235
236 if ( Esys_noError() ) {
237 /* creates index for F:*/
238 #pragma omp parallel private(i)
239 {
240 #pragma omp for schedule(static)
241 for (i = 0; i < n; ++i) {
242 if (F_marker[i]) rows_in_F[counter[i]]=i;
243 }
244 }
245 /* create mask of C nodes with value >-1 gives new id */
246 i=Paso_Util_cumsum_maskedFalse(n,counter, F_marker);
247
248 #pragma omp parallel for private(i) schedule(static)
249 for (i = 0; i < n; ++i) {
250 if (F_marker[i]) {
251 mask_C[i]=-1;
252 } else {
253 mask_C[i]=counter[i];;
254 }
255 }
256 /*
257 get Prolongation :
258 */
259 time0=Esys_timer();
260 /*MPI:
261 out->P=Paso_Preconditioner_AMG_getProlongation(A_p,A_p->pattern->ptr, degree_S,S,n_C,mask_C, options->interpolation_method);
262 */
263 if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);
264 }
265 /*
266 construct Restriction operator as transposed of Prolongation operator:
267 */
268 if ( Esys_noError()) {
269 time0=Esys_timer();
270 /*MPI:
271 out->R=Paso_SystemMatrix_getTranspose(out->P);
272 */
273 if (SHOW_TIMING) printf("timing: level %d: Paso_SystemMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
274 }
275 /*
276 construct coarse level matrix:
277 */
278 if ( Esys_noError()) {
279 time0=Esys_timer();
280 /*MPI:
281 Atemp=Paso_SystemMatrix_MatrixMatrix(A_p,out->P);
282 A_C=Paso_SystemMatrix_MatrixMatrix(out->R,Atemp);
283
284 Paso_SystemMatrix_free(Atemp);
285 */
286
287 if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);
288 }
289
290
291 /*
292 constructe courser level:
293
294 */
295 if ( Esys_noError()) {
296 out->AMG_C=Paso_Preconditioner_AMG_alloc(A_C,level+1,options);
297 }
298 if ( Esys_noError()) {
299 if ( out->AMG_C == NULL ) {
300 out->reordering = options->reordering;
301 out->refinements = options->coarse_matrix_refinements;
302 /* no coarse level matrix has been constructed. use direct solver */
303 #ifdef MKL
304 out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);
305 Paso_SystemMatrix_free(A_C);
306 out->A_C->solver_package = PASO_MKL;
307 if (verbose) printf("Paso_Preconditioner: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
308 #else
309 #ifdef UMFPACK
310 out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);
311 Paso_SystemMatrix_free(A_C);
312 out->A_C->solver_package = PASO_UMFPACK;
313 if (verbose) printf("Paso_Preconditioner: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
314 #else
315 out->A_C=A_C;
316 out->A_C->solver_p=Paso_Preconditioner_Smoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);
317 out->A_C->solver_package = PASO_SMOOTHER;
318 if (verbose) printf("Paso_Preconditioner: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
319 #endif
320 #endif
321 } else {
322 /* finally we set some helpers for the solver step */
323 out->A_C=A_C;
324 }
325 }
326 }
327 }
328 TMPMEMFREE(mask_C);
329 TMPMEMFREE(rows_in_F);
330 }
331 }
332 #endif
333
334 }
335 TMPMEMFREE(counter);
336 TMPMEMFREE(F_marker);
337 TMPMEMFREE(degree_S);
338 TMPMEMFREE(offset_S);
339 TMPMEMFREE(S);
340 }
341
342 if (Esys_noError()) {
343 return out;
344 } else {
345 Paso_Preconditioner_AMG_free(out);
346 return NULL;
347 }
348 }
349
350
351 void Paso_Preconditioner_AMG_solve(Paso_SystemMatrix* A, Paso_Preconditioner_AMG * amg, double * x, double * b) {
352 const dim_t n = amg->n * amg->n_block;
353 double time0=0;
354 const dim_t post_sweeps=amg->post_sweeps;
355 const dim_t pre_sweeps=amg->pre_sweeps;
356
357 /* presmoothing */
358 time0=Esys_timer();
359 Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
360 time0=Esys_timer()-time0;
361 if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);
362 /* end of presmoothing */
363
364 if (amg->n_F < amg->n) { /* is there work on the coarse level? */
365 time0=Esys_timer();
366 Paso_Copy(n, amg->r, b); /* r <- b */
367 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
368 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,amg->r,0.,amg->b_C); /* b_c = R*r */
369 time0=Esys_timer()-time0;
370 /* coarse level solve */
371 if ( amg->AMG_C == NULL) {
372 time0=Esys_timer();
373 /* A_C is the coarsest level */
374 #ifdef FIXME
375 switch (amg->A_C->solver_package) {
376 case (PASO_MKL):
377 Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);
378 break;
379 case (PASO_UMFPACK):
380 Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);
381 break;
382 case (PASO_SMOOTHER):
383 Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
384 break;
385 }
386 #endif
387 Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
388 if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
389 } else {
390 Paso_Preconditioner_AMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C) */
391 }
392 time0=time0+Esys_timer();
393 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */
394
395 /*postsmoothing*/
396
397 /*solve Ax=b with initial guess x */
398 time0=Esys_timer();
399 Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
400 time0=Esys_timer()-time0;
401 if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
402 /*end of postsmoothing*/
403
404 }
405 return;
406 }
407
408 /* theta = threshold for strong connections */
409 /* tau = threshold for diagonal dominance */
410
411 /*S_i={j \in N_i; i strongly coupled to j}
412
413 in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
414 */
415
416 void Paso_Preconditioner_AMG_setStrongConnections(Paso_SystemMatrix* A,
417 dim_t *degree_S, index_t *offset_S, index_t *S,
418 const double theta, const double tau)
419 {
420 const dim_t my_n=Paso_SystemMatrix_getNumRows(A);
421 index_t iptr, i;
422
423
424 #pragma omp parallel for private(i,iptr) schedule(static)
425 for (i=0;i<my_n;++i) {
426 register double max_offdiagonal = 0.;
427 register double sum_row=0;
428 register double main_row=0;
429 #pragma ivdep
430 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
431 register index_t j=A->mainBlock->pattern->index[iptr];
432 register double fnorm=ABS(A->mainBlock->val[iptr]);
433
434 if( j != i) {
435 max_offdiagonal = MAX(max_offdiagonal,fnorm);
436 sum_row+=fnorm;
437 } else {
438 main_row=fnorm;
439 }
440
441 }
442 #pragma ivdep
443 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
444 register index_t j=A->col_coupleBlock->pattern->index[iptr];
445 register double fnorm=ABS(A->col_coupleBlock->val[iptr]);
446 max_offdiagonal = MAX(max_offdiagonal,fnorm);
447 sum_row+=fnorm;
448 }
449
450
451 const double threshold = theta*max_offdiagonal;
452 register dim_t kdeg=0;
453 register index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
454 if (tau*main_row < sum_row) { /* no diagonal domainance */
455 #pragma ivdep
456 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
457 register index_t j=A->mainBlock->pattern->index[iptr];
458 if(ABS(A->mainBlock->val[iptr])>threshold && i!=j) {
459 S[koffset+kdeg] = j;
460 kdeg++;
461 }
462 }
463 #pragma ivdep
464 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
465 register index_t j=A->col_coupleBlock->pattern->index[iptr];
466 if(ABS(A->col_coupleBlock->val[iptr])>threshold) {
467 S[koffset+kdeg] = j + my_n;
468 kdeg++;
469 }
470 }
471 }
472 offset_S[i]=koffset;
473 degree_S[i]=kdeg;
474 }
475 }
476
477 /* theta = threshold for strong connections */
478 /* tau = threshold for diagonal dominance */
479 /*S_i={j \in N_i; i strongly coupled to j}
480
481 in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
482 */
483 void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SystemMatrix* A,
484 dim_t *degree_S, index_t *offset_S, index_t *S,
485 const double theta, const double tau)
486
487 {
488 const dim_t my_n=Paso_SystemMatrix_getNumRows(A);
489 index_t iptr, i, bi;
490 const dim_t n_block=A->row_block_size;
491
492
493 #pragma omp parallel private(i,iptr, bi)
494 {
495 dim_t max_deg=0;
496 #pragma omp for schedule(static)
497 for (i=0;i<my_n;++i) max_deg=MAX(max_deg, A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]
498 +A->col_coupleBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i]);
499
500 double *rtmp=TMPMEMALLOC(max_deg, double);
501
502 #pragma omp for schedule(static)
503 for (i=0;i<my_n;++i) {
504
505 register double max_offdiagonal = 0.;
506 register double sum_row=0;
507 register double main_row=0;
508 register index_t rtmp_offset=-A->mainBlock->pattern->ptr[i];
509
510 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
511 register index_t j=A->mainBlock->pattern->index[iptr];
512 register double fnorm=0;
513 #pragma ivdep
514 for(bi=0;bi<n_block*n_block;++bi) {
515 register double rtmp2 = A->mainBlock->val[iptr*n_block*n_block+bi];
516 fnorm+=rtmp2*rtmp2;
517 }
518 fnorm=sqrt(fnorm);
519
520 rtmp[iptr+rtmp_offset]=fnorm;
521 if( j != i) {
522 max_offdiagonal = MAX(max_offdiagonal,fnorm);
523 sum_row+=fnorm;
524 } else {
525 main_row=fnorm;
526 }
527 }
528 rtmp_offset=A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]-A->col_coupleBlock->pattern->ptr[i];
529 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
530 register index_t j=A->col_coupleBlock->pattern->index[iptr];
531 register double fnorm=0;
532 #pragma ivdep
533 for(bi=0;bi<n_block*n_block;++bi) {
534 register double rtmp2 = A->col_coupleBlock->val[iptr*n_block*n_block+bi];
535 fnorm+=rtmp2*rtmp2;
536 }
537 fnorm=sqrt(fnorm);
538
539 rtmp[iptr+rtmp_offset]=fnorm;
540 max_offdiagonal = MAX(max_offdiagonal,fnorm);
541 sum_row+=fnorm;
542 }
543
544 const double threshold = theta*max_offdiagonal;
545 register dim_t kdeg=0;
546 register index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
547
548 if (tau*main_row < sum_row) { /* no diagonal domainance */
549 rtmp_offset=-A->mainBlock->pattern->ptr[i];
550 #pragma ivdep
551 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
552 register index_t j=A->mainBlock->pattern->index[iptr];
553 if(rtmp[iptr+rtmp_offset] > threshold && i!=j) {
554 S[koffset+kdeg] = j;
555 kdeg++;
556 }
557 }
558 rtmp_offset=A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]-A->col_coupleBlock->pattern->ptr[i];
559 #pragma ivdep
560 for (iptr=A->col_coupleBlock->pattern->ptr[i]; iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
561 register index_t j=A->col_coupleBlock->pattern->index[iptr];
562 if(rtmp[iptr+rtmp_offset] > threshold) {
563 S[koffset+kdeg] = j + my_n;
564 kdeg++;
565 }
566 }
567
568 }
569 degree_S[i]=kdeg;
570 offset_S[i]=koffset;
571 }
572 TMPMEMFREE(rtmp);
573 } /* end of parallel region */
574
575 }
576
577 #ifdef AAAAA
578 /* the runge stueben coarsening algorithm: */
579 void Paso_Preconditioner_AMG_RungeStuebenSearch(const dim_t n, const index_t* offset_S,
580 const dim_t* degree_S, const index_t* S,
581 index_t*split_marker, const bool_t usePanel)
582 {
583
584 index_t *lambda=NULL, *ST=NULL, *notInPanel=NULL, *panel=NULL, lambda_max, lambda_k;
585 dim_t i,k, p, q, *degree_ST=NULL, len_panel, len_panel_new;
586 register index_t j, itmp;
587
588 if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */
589
590 lambda=TMPMEMALLOC(n, index_t); Esys_checkPtr(lambda);
591 degree_ST=TMPMEMALLOC(n, dim_t); Esys_checkPtr(degree_ST);
592 ST=TMPMEMALLOC(offset_S[n], index_t); Esys_checkPtr(ST);
593 if (usePanel) {
594 notInPanel=TMPMEMALLOC(n, bool_t); Esys_checkPtr(notInPanel);
595 panel=TMPMEMALLOC(n, index_t); Esys_checkPtr(panel);
596 }
597
598
599
600 if (Esys_noError() ) {
601 /* initialize split_marker and split_marker :*/
602 /* those unknows which are not influenced go into F, the rest is available for F or C */
603 #pragma omp parallel for private(i) schedule(static)
604 for (i=0;i<n;++i) {
605 degree_ST[i]=0;
606 if (degree_S[i]>0) {
607 lambda[i]=0;
608 split_marker[i]=PASO_AMG_UNDECIDED;
609 } else {
610 split_marker[i]=PASO_AMG_IN_F;
611 lambda[i]=-1;
612 }
613 }
614 /* create transpose :*/
615 for (i=0;i<n;++i) {
616 for (p=0; p<degree_S[i]; ++p) {
617 j=S[offset_S[i]+p];
618 ST[offset_S[j]+degree_ST[j]]=i;
619 degree_ST[j]++;
620 }
621 }
622 /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */
623 #pragma omp parallel for private(i, j, itmp) schedule(static)
624 for (i=0;i<n;++i) {
625 if (split_marker[i]==PASO_AMG_UNDECIDED) {
626 itmp=lambda[i];
627 for (p=0; p<degree_ST[i]; ++p) {
628 j=ST[offset_S[i]+p];
629 if (split_marker[j]==PASO_AMG_UNDECIDED) {
630 itmp++;
631 } else { /* at this point there are no C points */
632 itmp+=2;
633 }
634 }
635 lambda[i]=itmp;
636 }
637 }
638 if (usePanel) {
639 #pragma omp parallel for private(i) schedule(static)
640 for (i=0;i<n;++i) notInPanel[i]=TRUE;
641 }
642 /* start search :*/
643 i=Paso_Util_arg_max(n,lambda);
644 while (lambda[i]>-1) { /* is there any undecided unknown? */
645
646 if (usePanel) {
647 len_panel=0;
648 do {
649 /* the unknown i is moved to C */
650 split_marker[i]=PASO_AMG_IN_C;
651 lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
652
653 /* all undecided unknown strongly coupled to i are moved to F */
654 for (p=0; p<degree_ST[i]; ++p) {
655 j=ST[offset_S[i]+p];
656
657 if (split_marker[j]==PASO_AMG_UNDECIDED) {
658
659 split_marker[j]=PASO_AMG_IN_F;
660 lambda[j]=-1;
661
662 for (q=0; q<degree_ST[j]; ++q) {
663 k=ST[offset_S[j]+q];
664 if (split_marker[k]==PASO_AMG_UNDECIDED) {
665 lambda[k]++;
666 if (notInPanel[k]) {
667 notInPanel[k]=FALSE;
668 panel[len_panel]=k;
669 len_panel++;
670 }
671
672 } /* the unknown i is moved to C */
673 split_marker[i]=PASO_AMG_IN_C;
674 lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
675 }
676
677 }
678 }
679 for (p=0; p<degree_S[i]; ++p) {
680 j=S[offset_S[i]+p];
681 if (split_marker[j]==PASO_AMG_UNDECIDED) {
682 lambda[j]--;
683 if (notInPanel[j]) {
684 notInPanel[j]=FALSE;
685 panel[len_panel]=j;
686 len_panel++;
687 }
688 }
689 }
690
691 /* consolidate panel */
692 /* remove lambda[q]=-1 */
693 lambda_max=-1;
694 i=-1;
695 len_panel_new=0;
696 for (q=0; q<len_panel; q++) {
697 k=panel[q];
698 lambda_k=lambda[k];
699 if (split_marker[k]==PASO_AMG_UNDECIDED) {
700 panel[len_panel_new]=k;
701 len_panel_new++;
702
703 if (lambda_max == lambda_k) {
704 if (k<i) i=k;
705 } else if (lambda_max < lambda_k) {
706 lambda_max =lambda_k;
707 i=k;
708 }
709 }
710 }
711 len_panel=len_panel_new;
712 } while (len_panel>0);
713 } else {
714 /* the unknown i is moved to C */
715 split_marker[i]=PASO_AMG_IN_C;
716 lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
717
718 /* all undecided unknown strongly coupled to i are moved to F */
719 for (p=0; p<degree_ST[i]; ++p) {
720 j=ST[offset_S[i]+p];
721 if (split_marker[j]==PASO_AMG_UNDECIDED) {
722
723 split_marker[j]=PASO_AMG_IN_F;
724 lambda[j]=-1;
725
726 for (q=0; q<degree_ST[j]; ++q) {
727 k=ST[offset_S[j]+q];
728 if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;
729 }
730
731 }
732 }
733 for (p=0; p<degree_S[i]; ++p) {
734 j=S[offset_S[i]+p];
735 if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;
736 }
737
738 }
739 i=Paso_Util_arg_max(n,lambda);
740 }
741
742 }
743 TMPMEMFREE(lambda);
744 TMPMEMFREE(ST);
745 TMPMEMFREE(degree_ST);
746 TMPMEMFREE(panel);
747 TMPMEMFREE(notInPanel);
748 }
749 /* ensures that two F nodes are connected via a C node :*/
750 void Paso_Preconditioner_AMG_enforceFFConnectivity(const dim_t n, const index_t* offset_S,
751 const dim_t* degree_S, const index_t* S,
752 index_t*split_marker)
753 {
754 dim_t i, p, q;
755
756 /* now we make sure that two (strongly) connected F nodes are (strongly) connected via a C node. */
757 for (i=0;i<n;++i) {
758 if ( (split_marker[i]==PASO_AMG_IN_F) && (degree_S[i]>0) ) {
759 for (p=0; p<degree_S[i]; ++p) {
760 register index_t j=S[offset_S[i]+p];
761 if ( (split_marker[j]==PASO_AMG_IN_F) && (degree_S[j]>0) ) {
762 /* i and j are now two F nodes which are strongly connected */
763 /* is there a C node they share ? */
764 register index_t sharing=-1;
765 for (q=0; q<degree_S[i]; ++q) {
766 index_t k=S[offset_S[i]+q];
767 if (split_marker[k]==PASO_AMG_IN_C) {
768 register index_t* where_k=(index_t*)bsearch(&k, &(S[offset_S[j]]), degree_S[j], sizeof(index_t), Paso_comparIndex);
769 if (where_k != NULL) {
770 sharing=k;
771 break;
772 }
773 }
774 }
775 if (sharing<0) {
776 if (i<j) {
777 split_marker[j]=PASO_AMG_IN_C;
778 } else {
779 split_marker[i]=PASO_AMG_IN_C;
780 break; /* no point to look any further as i is now a C node */
781 }
782 }
783 }
784 }
785 }
786 }
787 }
788 #endif
789
790 #ifdef DFG
791 void Paso_Preconditioner_AMG_CIJPCoarsening( )
792 {
793
794
795 const dim_t my_n;
796 const dim_t overlap_n;
797 const dim_t n= my_n + overlap_n;
798
799
800 /* initialize split_marker and split_marker :*/
801 /* those unknows which are not influenced go into F, the rest is available for F or C */
802 #pragma omp parallel for private(i) schedule(static)
803 for (i=0;i<n;++i) {
804 degree_ST[i]=0;
805 if (degree_S[i]>0) {
806 lambda[i]=0;
807 split_marker[i]=PASO_AMG_UNDECIDED;
808 } else {
809 split_marker[i]=PASO_AMG_IN_F;
810 lambda[i]=-1;
811 }
812 }
813
814 /* set local lambda + overlap */
815 #pragma omp parallel for private(i)
816 for (i=0; i<n ++i) {
817 w[i]=degree_ST[i];
818 }
819 for (i=0; i<my_n; i++) {
820 w2[i]=random;
821 }
822
823
824 /* add noise to w */
825 Paso_Coupler_add(n, w, 1., w2, col_coupler);
826
827 /* */
828 global_n_C=0;
829 global_n_F=..;
830
831 while (global_n_C + global_n_F < global_n) {
832
833
834 is_in_D[i]=FALSE;
835 /* test local connectivit*/
836 /* w2[i] = max(w[k] | k in S_i or k in S^T_i */
837 #pragma omp parallel for private(i)
838 for (i=0; i<n; ++i) w2[i]=0;
839
840 for (i=0; i<my_n; ++i) {
841 for( iPtr =0 ; iPtr < degree_S[i]; ++iPtr) {
842 k=S[offset_S[i]+iPtr];
843 w2[i]=MAX(w2[i],w[k]);
844 w2[k]=MAX(w2[k],w[i]);
845 }
846 }
847 /* adjust overlaps by MAX */
848 Paso_Coupler_max(n, w2, col_coupler);
849
850 /* points with w[i]>w2[i] become C nodes */
851 }
852
853 }
854 #endif

  ViewVC Help
Powered by ViewVC 1.1.26