/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Annotation of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3442 - (hide annotations)
Tue Jan 18 01:47:36 2011 UTC (8 years, 10 months ago) by gross
File MIME type: text/plain
File size: 23614 byte(s)
 emeregncy
1 gross 3193
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2010 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17 gross 3441 /* Paso: AMG preconditioner (local version) */
18 gross 3193
19     /**************************************************************/
20    
21 gross 3303 /* Author: artak@uq.edu.au, l.gross@uq.edu.au */
22 gross 3193
23     /**************************************************************/
24    
25 gross 3283 #define SHOW_TIMING FALSE
26    
27 gross 3193 #include "Paso.h"
28     #include "Preconditioner.h"
29     #include "Options.h"
30     #include "PasoUtil.h"
31     #include "UMFPACK.h"
32     #include "MKL.h"
33    
34     /**************************************************************/
35    
36     /* free all memory used by AMG */
37    
38 gross 3441 void Paso_Preconditioner_AMG_free(Paso_Preconditioner_AMG * in) {
39 gross 3193 if (in!=NULL) {
40 gross 3441 Paso_Preconditioner_Smoother_free(in->Smoother);
41     Paso_SystemMatrix_free(in->P);
42     Paso_SystemMatrix_free(in->R);
43     Paso_SystemMatrix_free(in->A_C);
44     Paso_Preconditioner_AMG_free(in->AMG_C);
45 gross 3283 MEMFREE(in->r);
46     MEMFREE(in->x_C);
47     MEMFREE(in->b_C);
48 gross 3193
49 gross 3283 MEMFREE(in);
50 gross 3193 }
51     }
52    
53 gross 3441 index_t Paso_Preconditioner_AMG_getMaxLevel(const Paso_Preconditioner_AMG * in) {
54 gross 3402 if (in->AMG_C == NULL) {
55     return in->level;
56     } else {
57 gross 3441 return Paso_Preconditioner_AMG_getMaxLevel(in->AMG_C);
58 gross 3402 }
59     }
60 gross 3441 double Paso_Preconditioner_AMG_getCoarseLevelSparsity(const Paso_Preconditioner_AMG * in) {
61 gross 3403 if (in->AMG_C == NULL) {
62     if (in->A_C == NULL) {
63     return 1.;
64     } else {
65 gross 3442 return Paso_SystemMatrix_getSparsity(in->A_C);
66 gross 3403 }
67     } else {
68 gross 3441 return Paso_Preconditioner_AMG_getCoarseLevelSparsity(in->AMG_C);
69 gross 3403 }
70 gross 3402 }
71 gross 3441 dim_t Paso_Preconditioner_AMG_getNumCoarseUnknwons(const Paso_Preconditioner_AMG * in) {
72 gross 3402 if (in->AMG_C == NULL) {
73 gross 3403 if (in->A_C == NULL) {
74     return 0;
75     } else {
76 gross 3442 Paso_SystemMatrix_getTotalNumRows(in->A_C);
77 gross 3403 }
78 gross 3402 } else {
79 gross 3441 return Paso_Preconditioner_AMG_getNumCoarseUnknwons(in->AMG_C);
80 gross 3402 }
81     }
82 gross 3283 /*****************************************************************
83 gross 3193
84 gross 3283 constructs AMG
85    
86     ******************************************************************/
87 gross 3441 Paso_Preconditioner_AMG* Paso_Preconditioner_AMG_alloc(Paso_SystemMatrix *A_p,dim_t level,Paso_Options* options) {
88 gross 3193
89 gross 3441 Paso_Preconditioner_AMG* out=NULL;
90 gross 3193 bool_t verbose=options->verbose;
91    
92 gross 3441 Paso_SystemMatrix *Atemp=NULL, *A_C=NULL;
93 gross 3193 const dim_t n=A_p->numRows;
94     const dim_t n_block=A_p->row_block_size;
95 gross 3440 index_t* F_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F=NULL, *S=NULL, *degree_S=NULL;
96 gross 3283 dim_t n_F=0, n_C=0, i;
97     double time0=0;
98     const double theta = options->coarsening_threshold;
99     const double tau = options->diagonal_dominance_threshold;
100 gross 3442 const double sparsity=Paso_SystemMatrix_getSparsity(A_p);
101     const dim_t total_n=Paso_SystemMatrix_getGlobalTotalNumRows(A_p);
102 plaub 3436
103 gross 3193
104     /*
105 gross 3283 is the input matrix A suitable for coarsening
106    
107 gross 3193 */
108 gross 3442 if ( (sparsity >= options->min_coarse_sparsity) ||
109     (total_n <= options->min_coarse_matrix_size) ||
110     (level > options->level_max) ) {
111 plaub 3436
112     if (verbose) {
113     /*
114     print stopping condition:
115     - 'SPAR' = min_coarse_matrix_sparsity exceeded
116     - 'SIZE' = min_coarse_matrix_size exceeded
117     - 'LEVEL' = level_max exceeded
118     */
119 gross 3442 printf("Paso_Preconditioner: AMG: termination of coarsening by ");
120 plaub 3436
121 gross 3442 if (sparsity >= options->min_coarse_sparsity)
122 plaub 3436 printf("SPAR ");
123    
124 gross 3442 if (total_n <= options->min_coarse_matrix_size)
125 plaub 3436 printf("SIZE ");
126    
127     if (level > options->level_max)
128     printf("LEVEL ");
129    
130     printf("\n");
131    
132     printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
133 gross 3442 level, options->level_max, sparsity, options->min_coarse_sparsity, total_n, options->min_coarse_matrix_size);
134 plaub 3436
135     }
136    
137 gross 3193 return NULL;
138 gross 3283 }
139     /* Start Coarsening : */
140 gross 3193
141 gross 3440 F_marker=TMPMEMALLOC(n,index_t);
142 gross 3283 counter=TMPMEMALLOC(n,index_t);
143 gross 3440 degree_S=TMPMEMALLOC(n, dim_t);
144 gross 3303 S=TMPMEMALLOC(A_p->pattern->len, index_t);
145 gross 3440 if ( !( Esys_checkPtr(F_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree_S) || Esys_checkPtr(S) ) ) {
146 gross 3283 /*
147     set splitting of unknows:
148    
149     */
150     time0=Esys_timer();
151     if (n_block>1) {
152 gross 3440 Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree_S, S, theta,tau);
153 gross 3283 } else {
154 gross 3440 Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, S, theta,tau);
155 gross 3283 }
156 gross 3442
157     /*MPI:
158 gross 3440 Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree_S, S, F_marker, options->usePanel);
159 gross 3442 */
160    
161 gross 3440 /* in BoomerAMG interpolation is used FF connectiovity is required :*/
162 gross 3442 /*MPI:
163 gross 3440 if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
164     Paso_Preconditioner_AMG_enforceFFConnectivity(n, A_p->pattern->ptr, degree_S, S, F_marker);
165 gross 3442 */
166    
167 gross 3283 options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
168    
169     if (Esys_noError() ) {
170     #pragma omp parallel for private(i) schedule(static)
171 gross 3440 for (i = 0; i < n; ++i) F_marker[i]=(F_marker[i] == PASO_AMG_IN_F);
172 gross 3283
173     /*
174     count number of unkowns to be eliminated:
175     */
176 gross 3440 n_F=Paso_Util_cumsum_maskedTrue(n,counter, F_marker);
177 gross 3283 n_C=n-n_F;
178 gross 3402 if (verbose) printf("Paso_Preconditioner: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);
179 gross 3283
180     if ( n_F == 0 ) { /* is a nasty case. a direct solver should be used, return NULL */
181     out = NULL;
182     } else {
183 gross 3441 out=MEMALLOC(1,Paso_Preconditioner_AMG);
184 gross 3323 if (! Esys_checkPtr(out)) {
185 gross 3283 out->level = level;
186     out->n = n;
187     out->n_F = n_F;
188     out->n_block = n_block;
189     out->A_C = NULL;
190     out->P = NULL;
191     out->R = NULL;
192     out->post_sweeps = options->post_sweeps;
193     out->pre_sweeps = options->pre_sweeps;
194     out->r = NULL;
195     out->x_C = NULL;
196     out->b_C = NULL;
197     out->AMG_C = NULL;
198 gross 3323 out->Smoother=NULL;
199     }
200     mask_C=TMPMEMALLOC(n,index_t);
201     rows_in_F=TMPMEMALLOC(n_F,index_t);
202     Esys_checkPtr(mask_C);
203     Esys_checkPtr(rows_in_F);
204     if ( Esys_noError() ) {
205    
206 gross 3442 out->Smoother = Paso_Preconditioner_Smoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
207 gross 3323
208 gross 3402 if (n_C != 0) {
209     /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
210 gross 3283
211 gross 3315 /* allocate helpers :*/
212     out->x_C=MEMALLOC(n_block*n_C,double);
213     out->b_C=MEMALLOC(n_block*n_C,double);
214     out->r=MEMALLOC(n_block*n,double);
215    
216     Esys_checkPtr(out->r);
217     Esys_checkPtr(out->x_C);
218     Esys_checkPtr(out->b_C);
219    
220 gross 3323 if ( Esys_noError() ) {
221     /* creates index for F:*/
222     #pragma omp parallel private(i)
223     {
224     #pragma omp for schedule(static)
225     for (i = 0; i < n; ++i) {
226 gross 3440 if (F_marker[i]) rows_in_F[counter[i]]=i;
227 gross 3323 }
228     }
229     /* create mask of C nodes with value >-1 gives new id */
230 gross 3440 i=Paso_Util_cumsum_maskedFalse(n,counter, F_marker);
231 gross 3193
232 gross 3323 #pragma omp parallel for private(i) schedule(static)
233     for (i = 0; i < n; ++i) {
234 gross 3440 if (F_marker[i]) {
235 gross 3323 mask_C[i]=-1;
236     } else {
237     mask_C[i]=counter[i];;
238     }
239 gross 3283 }
240 gross 3323 /*
241 gross 3440 get Prolongation :
242 gross 3323 */
243     time0=Esys_timer();
244 gross 3442 /*MPI:
245 gross 3440 out->P=Paso_Preconditioner_AMG_getProlongation(A_p,A_p->pattern->ptr, degree_S,S,n_C,mask_C, options->interpolation_method);
246 gross 3442 */
247 gross 3323 if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);
248 gross 3283 }
249 gross 3323 /*
250 gross 3440 construct Restriction operator as transposed of Prolongation operator:
251 gross 3283 */
252 gross 3315 if ( Esys_noError()) {
253     time0=Esys_timer();
254 gross 3442 /*MPI:
255 gross 3441 out->R=Paso_SystemMatrix_getTranspose(out->P);
256 gross 3442 */
257 gross 3441 if (SHOW_TIMING) printf("timing: level %d: Paso_SystemMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
258 gross 3315 }
259 gross 3283 /*
260 gross 3312 construct coarse level matrix:
261 gross 3283 */
262 gross 3315 if ( Esys_noError()) {
263     time0=Esys_timer();
264 gross 3442 /*MPI:
265 gross 3441 Atemp=Paso_SystemMatrix_MatrixMatrix(A_p,out->P);
266     A_C=Paso_SystemMatrix_MatrixMatrix(out->R,Atemp);
267 gross 3442
268 gross 3441 Paso_SystemMatrix_free(Atemp);
269 gross 3442 */
270    
271 gross 3315 if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);
272     }
273    
274 gross 3283
275     /*
276     constructe courser level:
277    
278     */
279     if ( Esys_noError()) {
280 gross 3441 out->AMG_C=Paso_Preconditioner_AMG_alloc(A_C,level+1,options);
281 gross 3315 }
282     if ( Esys_noError()) {
283 gross 3283 if ( out->AMG_C == NULL ) {
284     out->reordering = options->reordering;
285     out->refinements = options->coarse_matrix_refinements;
286     /* no coarse level matrix has been constructed. use direct solver */
287     #ifdef MKL
288 gross 3441 out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);
289     Paso_SystemMatrix_free(A_C);
290 gross 3283 out->A_C->solver_package = PASO_MKL;
291 gross 3402 if (verbose) printf("Paso_Preconditioner: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
292 gross 3283 #else
293     #ifdef UMFPACK
294 gross 3441 out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);
295     Paso_SystemMatrix_free(A_C);
296 gross 3283 out->A_C->solver_package = PASO_UMFPACK;
297 gross 3402 if (verbose) printf("Paso_Preconditioner: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
298 gross 3283 #else
299     out->A_C=A_C;
300 gross 3442 out->A_C->solver_p=Paso_Preconditioner_Smoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);
301 gross 3283 out->A_C->solver_package = PASO_SMOOTHER;
302 gross 3402 if (verbose) printf("Paso_Preconditioner: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
303 gross 3283 #endif
304     #endif
305     } else {
306     /* finally we set some helpers for the solver step */
307     out->A_C=A_C;
308     }
309     }
310     }
311     }
312     TMPMEMFREE(mask_C);
313     TMPMEMFREE(rows_in_F);
314     }
315     }
316 gross 3193 }
317     TMPMEMFREE(counter);
318 gross 3440 TMPMEMFREE(F_marker);
319     TMPMEMFREE(degree_S);
320 gross 3303 TMPMEMFREE(S);
321 gross 3193
322 jfenwick 3259 if (Esys_noError()) {
323 gross 3193 return out;
324     } else {
325 gross 3441 Paso_Preconditioner_AMG_free(out);
326 gross 3193 return NULL;
327     }
328     }
329    
330    
331 gross 3441 void Paso_Preconditioner_AMG_solve(Paso_SystemMatrix* A, Paso_Preconditioner_AMG * amg, double * x, double * b) {
332 gross 3283 const dim_t n = amg->n * amg->n_block;
333     double time0=0;
334     const dim_t post_sweeps=amg->post_sweeps;
335     const dim_t pre_sweeps=amg->pre_sweeps;
336 gross 3193
337 gross 3283 /* presmoothing */
338     time0=Esys_timer();
339 gross 3442 Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
340 gross 3283 time0=Esys_timer()-time0;
341     if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);
342     /* end of presmoothing */
343    
344     if (amg->n_F < amg->n) { /* is there work on the coarse level? */
345     time0=Esys_timer();
346     Paso_Copy(n, amg->r, b); /* r <- b */
347 gross 3441 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
348     Paso_SystemMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->R,amg->r,0.,amg->b_C); /* b_c = R*r */
349 gross 3283 time0=Esys_timer()-time0;
350     /* coarse level solve */
351     if ( amg->AMG_C == NULL) {
352     time0=Esys_timer();
353     /* A_C is the coarsest level */
354     switch (amg->A_C->solver_package) {
355     case (PASO_MKL):
356     Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);
357     break;
358     case (PASO_UMFPACK):
359     Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);
360     break;
361     case (PASO_SMOOTHER):
362 gross 3442 Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
363 gross 3283 break;
364     }
365     if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
366     } else {
367 gross 3441 Paso_Preconditioner_AMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C) */
368 gross 3283 }
369     time0=time0+Esys_timer();
370 gross 3441 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */
371 gross 3283
372     /*postsmoothing*/
373    
374     /*solve Ax=b with initial guess x */
375     time0=Esys_timer();
376 gross 3442 Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
377 gross 3283 time0=Esys_timer()-time0;
378     if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
379     /*end of postsmoothing*/
380    
381     }
382     return;
383     }
384 gross 3193
385 gross 3283 /* theta = threshold for strong connections */
386     /* tau = threshold for diagonal dominance */
387 gross 3193
388 gross 3303 /*S_i={j \in N_i; i strongly coupled to j}
389    
390     in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
391     */
392    
393 gross 3441 void Paso_Preconditioner_AMG_setStrongConnections(Paso_SystemMatrix* A,
394 gross 3440 dim_t *degree_S, index_t *S,
395 gross 3303 const double theta, const double tau)
396 gross 3283 {
397     const dim_t n=A->numRows;
398 gross 3303 index_t iptr, i,j;
399 gross 3283 dim_t kdeg;
400     double max_offdiagonal, threshold, sum_row, main_row, fnorm;
401 gross 3193
402 gross 3303
403 gross 3283 #pragma omp parallel for private(i,iptr,max_offdiagonal, threshold,j, kdeg, sum_row, main_row, fnorm) schedule(static)
404 gross 3402 for (i=0;i<n;++i) {
405 gross 3283 max_offdiagonal = 0.;
406     sum_row=0;
407     main_row=0;
408     #pragma ivdep
409     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
410     j=A->pattern->index[iptr];
411     fnorm=ABS(A->val[iptr]);
412    
413     if( j != i) {
414     max_offdiagonal = MAX(max_offdiagonal,fnorm);
415     sum_row+=fnorm;
416     } else {
417     main_row=fnorm;
418     }
419     }
420     threshold = theta*max_offdiagonal;
421     kdeg=0;
422 gross 3402
423 gross 3283 if (tau*main_row < sum_row) { /* no diagonal domainance */
424     #pragma ivdep
425     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
426     j=A->pattern->index[iptr];
427     if(ABS(A->val[iptr])>threshold && i!=j) {
428     S[A->pattern->ptr[i]+kdeg] = j;
429     kdeg++;
430     }
431     }
432 gross 3440 }
433     degree_S[i]=kdeg;
434 gross 3283 }
435 gross 3303
436 gross 3283 }
437 gross 3193
438 gross 3283 /* theta = threshold for strong connections */
439     /* tau = threshold for diagonal dominance */
440 gross 3303 /*S_i={j \in N_i; i strongly coupled to j}
441 gross 3283
442 gross 3303 in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
443     */
444 gross 3441 void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SystemMatrix* A,
445 gross 3440 dim_t *degree_S, index_t *S,
446 gross 3303 const double theta, const double tau)
447    
448 gross 3283 {
449     const dim_t n_block=A->row_block_size;
450     const dim_t n=A->numRows;
451 gross 3303 index_t iptr, i,j, bi;
452 gross 3283 dim_t kdeg, max_deg;
453     register double max_offdiagonal, threshold, fnorm, sum_row, main_row;
454     double *rtmp;
455    
456    
457     #pragma omp parallel private(i,iptr,max_offdiagonal, kdeg, threshold,j, max_deg, fnorm, sum_row, main_row, rtmp)
458     {
459     max_deg=0;
460     #pragma omp for schedule(static)
461     for (i=0;i<n;++i) max_deg=MAX(max_deg, A->pattern->ptr[i+1]-A->pattern->ptr[i]);
462 gross 3193
463 gross 3283 rtmp=TMPMEMALLOC(max_deg, double);
464    
465     #pragma omp for schedule(static)
466     for (i=0;i<n;++i) {
467 gross 3193
468 gross 3283 max_offdiagonal = 0.;
469     sum_row=0;
470     main_row=0;
471 gross 3440
472 gross 3283 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
473     j=A->pattern->index[iptr];
474     fnorm=0;
475     #pragma ivdep
476     for(bi=0;bi<n_block*n_block;++bi) fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi];
477     fnorm=sqrt(fnorm);
478 gross 3287 rtmp[iptr-A->pattern->ptr[i]]=fnorm;
479 gross 3283 if( j != i) {
480     max_offdiagonal = MAX(max_offdiagonal,fnorm);
481     sum_row+=fnorm;
482     } else {
483     main_row=fnorm;
484     }
485     }
486     threshold = theta*max_offdiagonal;
487 gross 3193
488 gross 3283 kdeg=0;
489     if (tau*main_row < sum_row) { /* no diagonal domainance */
490     #pragma ivdep
491     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
492     j=A->pattern->index[iptr];
493     if(rtmp[iptr-A->pattern->ptr[i]] > threshold && i!=j) {
494     S[A->pattern->ptr[i]+kdeg] = j;
495     kdeg++;
496     }
497     }
498     }
499 gross 3440 degree_S[i]=kdeg;
500 gross 3283 }
501     TMPMEMFREE(rtmp);
502     } /* end of parallel region */
503 gross 3303
504 gross 3283 }
505    
506     /* the runge stueben coarsening algorithm: */
507 gross 3440 void Paso_Preconditioner_AMG_RungeStuebenSearch(const dim_t n, const index_t* offset_S,
508     const dim_t* degree_S, const index_t* S,
509 gross 3352 index_t*split_marker, const bool_t usePanel)
510 gross 3283 {
511    
512 gross 3351 index_t *lambda=NULL, *ST=NULL, *notInPanel=NULL, *panel=NULL, lambda_max, lambda_k;
513 gross 3440 dim_t i,k, p, q, *degree_ST=NULL, len_panel, len_panel_new;
514 gross 3351 register index_t j, itmp;
515    
516 gross 3283 if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */
517    
518 gross 3351 lambda=TMPMEMALLOC(n, index_t); Esys_checkPtr(lambda);
519 gross 3440 degree_ST=TMPMEMALLOC(n, dim_t); Esys_checkPtr(degree_ST);
520     ST=TMPMEMALLOC(offset_S[n], index_t); Esys_checkPtr(ST);
521 gross 3351 if (usePanel) {
522     notInPanel=TMPMEMALLOC(n, bool_t); Esys_checkPtr(notInPanel);
523     panel=TMPMEMALLOC(n, index_t); Esys_checkPtr(panel);
524     }
525 gross 3283
526 gross 3351
527    
528     if (Esys_noError() ) {
529 gross 3283 /* initialize split_marker and split_marker :*/
530     /* those unknows which are not influenced go into F, the rest is available for F or C */
531     #pragma omp parallel for private(i) schedule(static)
532     for (i=0;i<n;++i) {
533 gross 3440 degree_ST[i]=0;
534     if (degree_S[i]>0) {
535 gross 3283 lambda[i]=0;
536     split_marker[i]=PASO_AMG_UNDECIDED;
537     } else {
538     split_marker[i]=PASO_AMG_IN_F;
539     lambda[i]=-1;
540     }
541     }
542     /* create transpose :*/
543     for (i=0;i<n;++i) {
544 gross 3440 for (p=0; p<degree_S[i]; ++p) {
545     j=S[offset_S[i]+p];
546     ST[offset_S[j]+degree_ST[j]]=i;
547     degree_ST[j]++;
548 gross 3283 }
549     }
550     /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */
551     #pragma omp parallel for private(i, j, itmp) schedule(static)
552     for (i=0;i<n;++i) {
553     if (split_marker[i]==PASO_AMG_UNDECIDED) {
554     itmp=lambda[i];
555 gross 3440 for (p=0; p<degree_ST[i]; ++p) {
556     j=ST[offset_S[i]+p];
557 gross 3283 if (split_marker[j]==PASO_AMG_UNDECIDED) {
558     itmp++;
559     } else { /* at this point there are no C points */
560     itmp+=2;
561     }
562     }
563     lambda[i]=itmp;
564     }
565     }
566 gross 3351 if (usePanel) {
567     #pragma omp parallel for private(i) schedule(static)
568     for (i=0;i<n;++i) notInPanel[i]=TRUE;
569     }
570 gross 3283 /* start search :*/
571     i=Paso_Util_arg_max(n,lambda);
572 gross 3351 while (lambda[i]>-1) { /* is there any undecided unknown? */
573    
574     if (usePanel) {
575     len_panel=0;
576     do {
577     /* the unknown i is moved to C */
578     split_marker[i]=PASO_AMG_IN_C;
579     lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
580    
581     /* all undecided unknown strongly coupled to i are moved to F */
582 gross 3440 for (p=0; p<degree_ST[i]; ++p) {
583     j=ST[offset_S[i]+p];
584 gross 3351
585     if (split_marker[j]==PASO_AMG_UNDECIDED) {
586    
587     split_marker[j]=PASO_AMG_IN_F;
588     lambda[j]=-1;
589    
590 gross 3440 for (q=0; q<degree_ST[j]; ++q) {
591     k=ST[offset_S[j]+q];
592 gross 3351 if (split_marker[k]==PASO_AMG_UNDECIDED) {
593     lambda[k]++;
594     if (notInPanel[k]) {
595     notInPanel[k]=FALSE;
596     panel[len_panel]=k;
597     len_panel++;
598     }
599    
600     } /* the unknown i is moved to C */
601     split_marker[i]=PASO_AMG_IN_C;
602     lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
603     }
604    
605     }
606 gross 3283 }
607 gross 3440 for (p=0; p<degree_S[i]; ++p) {
608     j=S[offset_S[i]+p];
609 gross 3351 if (split_marker[j]==PASO_AMG_UNDECIDED) {
610     lambda[j]--;
611     if (notInPanel[j]) {
612     notInPanel[j]=FALSE;
613     panel[len_panel]=j;
614     len_panel++;
615     }
616     }
617     }
618 gross 3193
619 gross 3351 /* consolidate panel */
620     /* remove lambda[q]=-1 */
621     lambda_max=-1;
622     i=-1;
623     len_panel_new=0;
624     for (q=0; q<len_panel; q++) {
625     k=panel[q];
626     lambda_k=lambda[k];
627     if (split_marker[k]==PASO_AMG_UNDECIDED) {
628     panel[len_panel_new]=k;
629     len_panel_new++;
630    
631     if (lambda_max == lambda_k) {
632     if (k<i) i=k;
633     } else if (lambda_max < lambda_k) {
634     lambda_max =lambda_k;
635     i=k;
636     }
637     }
638     }
639     len_panel=len_panel_new;
640     } while (len_panel>0);
641     } else {
642     /* the unknown i is moved to C */
643     split_marker[i]=PASO_AMG_IN_C;
644     lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
645    
646     /* all undecided unknown strongly coupled to i are moved to F */
647 gross 3440 for (p=0; p<degree_ST[i]; ++p) {
648     j=ST[offset_S[i]+p];
649 gross 3351 if (split_marker[j]==PASO_AMG_UNDECIDED) {
650    
651     split_marker[j]=PASO_AMG_IN_F;
652     lambda[j]=-1;
653    
654 gross 3440 for (q=0; q<degree_ST[j]; ++q) {
655     k=ST[offset_S[j]+q];
656 gross 3351 if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;
657     }
658    
659     }
660 gross 3283 }
661 gross 3440 for (p=0; p<degree_S[i]; ++p) {
662     j=S[offset_S[i]+p];
663 gross 3351 if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;
664     }
665    
666 gross 3283 }
667     i=Paso_Util_arg_max(n,lambda);
668     }
669 gross 3440
670 gross 3283 }
671     TMPMEMFREE(lambda);
672     TMPMEMFREE(ST);
673 gross 3440 TMPMEMFREE(degree_ST);
674 gross 3351 TMPMEMFREE(panel);
675     TMPMEMFREE(notInPanel);
676 gross 3193 }
677 gross 3440 /* ensures that two F nodes are connected via a C node :*/
678     void Paso_Preconditioner_AMG_enforceFFConnectivity(const dim_t n, const index_t* offset_S,
679     const dim_t* degree_S, const index_t* S,
680     index_t*split_marker)
681     {
682     dim_t i, p, q;
683    
684     /* now we make sure that two (strongly) connected F nodes are (strongly) connected via a C node. */
685     for (i=0;i<n;++i) {
686     if ( (split_marker[i]==PASO_AMG_IN_F) && (degree_S[i]>0) ) {
687     for (p=0; p<degree_S[i]; ++p) {
688     register index_t j=S[offset_S[i]+p];
689     if ( (split_marker[j]==PASO_AMG_IN_F) && (degree_S[j]>0) ) {
690     /* i and j are now two F nodes which are strongly connected */
691     /* is there a C node they share ? */
692     register index_t sharing=-1;
693     for (q=0; q<degree_S[i]; ++q) {
694     index_t k=S[offset_S[i]+q];
695     if (split_marker[k]==PASO_AMG_IN_C) {
696     register index_t* where_k=(index_t*)bsearch(&k, &(S[offset_S[j]]), degree_S[j], sizeof(index_t), Paso_comparIndex);
697     if (where_k != NULL) {
698     sharing=k;
699     break;
700     }
701     }
702     }
703     if (sharing<0) {
704     if (i<j) {
705     split_marker[j]=PASO_AMG_IN_C;
706     } else {
707     split_marker[i]=PASO_AMG_IN_C;
708     break; /* no point to look any further as i is now a C node */
709     }
710     }
711     }
712     }
713     }
714     }
715     }
716 gross 3442
717     void Paso_Preconditioner_AMG_CIJPCoarsening( )
718     {
719     const dim_t my_n;
720     const dim_t overlap_n;
721     const dim_t n= my_n + overlap_n;
722     /* set local lambda + overlap */
723     #pragma omp parallel for private(i)
724     for (i=0; i<n ++i) {
725     w[i]=degree_ST[i];
726     }
727     for (i=0; i<my_n; i++) {
728     w2[i]=random;
729     }
730    
731    
732     Paso_Coupler_add(w, 1., w2,
733     /* adjust w accross processors */
734    
735     /* */
736     global_n_C=0;
737     global_n_F=..;
738    
739     while (global_n_C + global_n_F < global_n) {
740    
741    
742     is_in_D[i]=FALSE;
743     /* test local connectivit*/
744     for i ..
745     for k in S_i:
746     w2[i]=MAX(w2[i],w[k]);
747     w2[k]=MAX(w2[k],w[i]);
748     /* adjust overlaps by MAX */
749     ...
750     /* points with w[i]>w2[i] become C nodes */
751    
752    
753    
754    
755    
756    
757    
758     global_n_C=?
759     global_n_F=?
760    
761     }
762    
763     }

  ViewVC Help
Powered by ViewVC 1.1.26