/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Annotation of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3458 - (hide annotations)
Mon Jan 31 07:06:42 2011 UTC (8 years, 7 months ago) by gross
File MIME type: text/plain
File size: 27682 byte(s)
more on AMG under MPI
1 gross 3193
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2010 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17 gross 3441 /* Paso: AMG preconditioner (local version) */
18 gross 3193
19     /**************************************************************/
20    
21 gross 3303 /* Author: artak@uq.edu.au, l.gross@uq.edu.au */
22 gross 3193
23     /**************************************************************/
24    
25 gross 3283 #define SHOW_TIMING FALSE
26    
27 gross 3193 #include "Paso.h"
28     #include "Preconditioner.h"
29     #include "Options.h"
30     #include "PasoUtil.h"
31     #include "UMFPACK.h"
32     #include "MKL.h"
33 gross 3458 #include<stdio.h>
34 gross 3193
35 gross 3458
36 gross 3193 /**************************************************************/
37    
38     /* free all memory used by AMG */
39    
40 gross 3441 void Paso_Preconditioner_AMG_free(Paso_Preconditioner_AMG * in) {
41 gross 3193 if (in!=NULL) {
42 gross 3441 Paso_Preconditioner_Smoother_free(in->Smoother);
43     Paso_SystemMatrix_free(in->P);
44     Paso_SystemMatrix_free(in->R);
45     Paso_SystemMatrix_free(in->A_C);
46     Paso_Preconditioner_AMG_free(in->AMG_C);
47 gross 3283 MEMFREE(in->r);
48     MEMFREE(in->x_C);
49     MEMFREE(in->b_C);
50 gross 3193
51 gross 3283 MEMFREE(in);
52 gross 3193 }
53     }
54    
55 gross 3441 index_t Paso_Preconditioner_AMG_getMaxLevel(const Paso_Preconditioner_AMG * in) {
56 gross 3402 if (in->AMG_C == NULL) {
57     return in->level;
58     } else {
59 gross 3441 return Paso_Preconditioner_AMG_getMaxLevel(in->AMG_C);
60 gross 3402 }
61     }
62 gross 3441 double Paso_Preconditioner_AMG_getCoarseLevelSparsity(const Paso_Preconditioner_AMG * in) {
63 gross 3403 if (in->AMG_C == NULL) {
64     if (in->A_C == NULL) {
65     return 1.;
66     } else {
67 gross 3442 return Paso_SystemMatrix_getSparsity(in->A_C);
68 gross 3403 }
69     } else {
70 gross 3441 return Paso_Preconditioner_AMG_getCoarseLevelSparsity(in->AMG_C);
71 gross 3403 }
72 gross 3402 }
73 gross 3441 dim_t Paso_Preconditioner_AMG_getNumCoarseUnknwons(const Paso_Preconditioner_AMG * in) {
74 gross 3402 if (in->AMG_C == NULL) {
75 gross 3403 if (in->A_C == NULL) {
76     return 0;
77     } else {
78 gross 3445 return Paso_SystemMatrix_getTotalNumRows(in->A_C);
79 gross 3403 }
80 gross 3402 } else {
81 gross 3441 return Paso_Preconditioner_AMG_getNumCoarseUnknwons(in->AMG_C);
82 gross 3402 }
83     }
84 gross 3283 /*****************************************************************
85 gross 3193
86 gross 3283 constructs AMG
87    
88     ******************************************************************/
89 gross 3441 Paso_Preconditioner_AMG* Paso_Preconditioner_AMG_alloc(Paso_SystemMatrix *A_p,dim_t level,Paso_Options* options) {
90 gross 3193
91 gross 3441 Paso_Preconditioner_AMG* out=NULL;
92 gross 3193 bool_t verbose=options->verbose;
93 gross 3445
94     const dim_t my_n=Paso_SystemMatrix_getNumRows(A_p);
95     const dim_t overlap_n=Paso_SystemMatrix_getColOverlap(A_p);
96     const dim_t n = my_n + overlap_n;
97    
98 gross 3193 const dim_t n_block=A_p->row_block_size;
99 gross 3451 index_t* F_marker=NULL, *counter=NULL;
100 gross 3450 dim_t i;
101 gross 3283 double time0=0;
102     const double theta = options->coarsening_threshold;
103     const double tau = options->diagonal_dominance_threshold;
104 gross 3442 const double sparsity=Paso_SystemMatrix_getSparsity(A_p);
105     const dim_t total_n=Paso_SystemMatrix_getGlobalTotalNumRows(A_p);
106 plaub 3436
107 gross 3193
108     /*
109 gross 3283 is the input matrix A suitable for coarsening
110    
111 gross 3193 */
112 gross 3442 if ( (sparsity >= options->min_coarse_sparsity) ||
113     (total_n <= options->min_coarse_matrix_size) ||
114     (level > options->level_max) ) {
115 plaub 3436
116 gross 3445 if (verbose) {
117 plaub 3436 /*
118     print stopping condition:
119     - 'SPAR' = min_coarse_matrix_sparsity exceeded
120     - 'SIZE' = min_coarse_matrix_size exceeded
121     - 'LEVEL' = level_max exceeded
122     */
123 gross 3442 printf("Paso_Preconditioner: AMG: termination of coarsening by ");
124 plaub 3436
125 gross 3442 if (sparsity >= options->min_coarse_sparsity)
126 plaub 3436 printf("SPAR ");
127    
128 gross 3442 if (total_n <= options->min_coarse_matrix_size)
129 plaub 3436 printf("SIZE ");
130    
131     if (level > options->level_max)
132     printf("LEVEL ");
133    
134     printf("\n");
135    
136     printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
137 gross 3442 level, options->level_max, sparsity, options->min_coarse_sparsity, total_n, options->min_coarse_matrix_size);
138 plaub 3436
139 gross 3445 }
140 plaub 3436
141 gross 3445 return NULL;
142     } else {
143 gross 3451 /* Start Coarsening : */
144     const dim_t len_S=A_p->mainBlock->pattern->len+A_p->col_coupleBlock->pattern->len;
145     dim_t* degree_S=TMPMEMALLOC(my_n, dim_t);
146     index_t *offset_S=TMPMEMALLOC(my_n, index_t);
147     index_t *S=TMPMEMALLOC(len_S, index_t);
148 gross 3193
149 gross 3440 F_marker=TMPMEMALLOC(n,index_t);
150 gross 3283 counter=TMPMEMALLOC(n,index_t);
151 gross 3445
152     if ( !( Esys_checkPtr(F_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree_S) || Esys_checkPtr(offset_S) || Esys_checkPtr(S) ) ) {
153 gross 3458 /*
154     make sure that corresponding values in the row_coupleBlock and col_coupleBlock are identical
155     */
156     Paso_SystemMatrix_copyColCoupleBlock(A_p);
157    
158     /*
159 gross 3283 set splitting of unknows:
160 gross 3458
161 gross 3283 */
162     time0=Esys_timer();
163     if (n_block>1) {
164 gross 3445 Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree_S, offset_S, S, theta,tau);
165 gross 3283 } else {
166 gross 3445 Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, offset_S, S, theta,tau);
167 gross 3283 }
168 gross 3458 Esys_setError(SYSTEM_ERROR, "AMG:DONE.");
169     return NULL;
170 gross 3449 {
171     dim_t p;
172     for (i=0; i< my_n; ++i) {
173     printf("%d: ",i);
174     for (p=0; p<degree_S[i];++p) printf("%d ",S[offset_S[i]+p]);
175     printf("\n");
176     }
177     }
178    
179 gross 3442
180     /*MPI:
181 gross 3440 Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree_S, S, F_marker, options->usePanel);
182 gross 3442 */
183    
184 gross 3440 /* in BoomerAMG interpolation is used FF connectiovity is required :*/
185 gross 3442 /*MPI:
186 gross 3440 if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
187     Paso_Preconditioner_AMG_enforceFFConnectivity(n, A_p->pattern->ptr, degree_S, S, F_marker);
188 gross 3442 */
189    
190 gross 3283 options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
191 gross 3445
192     #ifdef AAAAA
193 gross 3283 if (Esys_noError() ) {
194     #pragma omp parallel for private(i) schedule(static)
195 gross 3440 for (i = 0; i < n; ++i) F_marker[i]=(F_marker[i] == PASO_AMG_IN_F);
196 gross 3283
197     /*
198     count number of unkowns to be eliminated:
199     */
200 gross 3440 n_F=Paso_Util_cumsum_maskedTrue(n,counter, F_marker);
201 gross 3283 n_C=n-n_F;
202 gross 3402 if (verbose) printf("Paso_Preconditioner: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);
203 gross 3283
204     if ( n_F == 0 ) { /* is a nasty case. a direct solver should be used, return NULL */
205     out = NULL;
206     } else {
207 gross 3441 out=MEMALLOC(1,Paso_Preconditioner_AMG);
208 gross 3323 if (! Esys_checkPtr(out)) {
209 gross 3283 out->level = level;
210     out->n = n;
211     out->n_F = n_F;
212     out->n_block = n_block;
213     out->A_C = NULL;
214     out->P = NULL;
215     out->R = NULL;
216     out->post_sweeps = options->post_sweeps;
217     out->pre_sweeps = options->pre_sweeps;
218     out->r = NULL;
219     out->x_C = NULL;
220     out->b_C = NULL;
221     out->AMG_C = NULL;
222 gross 3323 out->Smoother=NULL;
223     }
224     mask_C=TMPMEMALLOC(n,index_t);
225     rows_in_F=TMPMEMALLOC(n_F,index_t);
226     Esys_checkPtr(mask_C);
227     Esys_checkPtr(rows_in_F);
228     if ( Esys_noError() ) {
229    
230 gross 3442 out->Smoother = Paso_Preconditioner_Smoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
231 gross 3323
232 gross 3402 if (n_C != 0) {
233     /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
234 gross 3283
235 gross 3315 /* allocate helpers :*/
236     out->x_C=MEMALLOC(n_block*n_C,double);
237     out->b_C=MEMALLOC(n_block*n_C,double);
238     out->r=MEMALLOC(n_block*n,double);
239    
240     Esys_checkPtr(out->r);
241     Esys_checkPtr(out->x_C);
242     Esys_checkPtr(out->b_C);
243    
244 gross 3323 if ( Esys_noError() ) {
245     /* creates index for F:*/
246     #pragma omp parallel private(i)
247     {
248     #pragma omp for schedule(static)
249     for (i = 0; i < n; ++i) {
250 gross 3440 if (F_marker[i]) rows_in_F[counter[i]]=i;
251 gross 3323 }
252     }
253     /* create mask of C nodes with value >-1 gives new id */
254 gross 3440 i=Paso_Util_cumsum_maskedFalse(n,counter, F_marker);
255 gross 3193
256 gross 3323 #pragma omp parallel for private(i) schedule(static)
257     for (i = 0; i < n; ++i) {
258 gross 3440 if (F_marker[i]) {
259 gross 3323 mask_C[i]=-1;
260     } else {
261     mask_C[i]=counter[i];;
262     }
263 gross 3283 }
264 gross 3323 /*
265 gross 3440 get Prolongation :
266 gross 3323 */
267     time0=Esys_timer();
268 gross 3442 /*MPI:
269 gross 3440 out->P=Paso_Preconditioner_AMG_getProlongation(A_p,A_p->pattern->ptr, degree_S,S,n_C,mask_C, options->interpolation_method);
270 gross 3442 */
271 gross 3323 if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);
272 gross 3283 }
273 gross 3323 /*
274 gross 3440 construct Restriction operator as transposed of Prolongation operator:
275 gross 3283 */
276 gross 3315 if ( Esys_noError()) {
277     time0=Esys_timer();
278 gross 3442 /*MPI:
279 gross 3441 out->R=Paso_SystemMatrix_getTranspose(out->P);
280 gross 3442 */
281 gross 3441 if (SHOW_TIMING) printf("timing: level %d: Paso_SystemMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
282 gross 3315 }
283 gross 3283 /*
284 gross 3312 construct coarse level matrix:
285 gross 3283 */
286 gross 3315 if ( Esys_noError()) {
287     time0=Esys_timer();
288 gross 3442 /*MPI:
289 gross 3441 Atemp=Paso_SystemMatrix_MatrixMatrix(A_p,out->P);
290     A_C=Paso_SystemMatrix_MatrixMatrix(out->R,Atemp);
291 gross 3442
292 gross 3441 Paso_SystemMatrix_free(Atemp);
293 gross 3442 */
294    
295 gross 3315 if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);
296     }
297    
298 gross 3283
299     /*
300     constructe courser level:
301    
302     */
303     if ( Esys_noError()) {
304 gross 3441 out->AMG_C=Paso_Preconditioner_AMG_alloc(A_C,level+1,options);
305 gross 3315 }
306     if ( Esys_noError()) {
307 gross 3283 if ( out->AMG_C == NULL ) {
308     out->reordering = options->reordering;
309     out->refinements = options->coarse_matrix_refinements;
310     /* no coarse level matrix has been constructed. use direct solver */
311     #ifdef MKL
312 gross 3441 out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);
313     Paso_SystemMatrix_free(A_C);
314 gross 3283 out->A_C->solver_package = PASO_MKL;
315 gross 3402 if (verbose) printf("Paso_Preconditioner: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
316 gross 3283 #else
317     #ifdef UMFPACK
318 gross 3441 out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);
319     Paso_SystemMatrix_free(A_C);
320 gross 3283 out->A_C->solver_package = PASO_UMFPACK;
321 gross 3402 if (verbose) printf("Paso_Preconditioner: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
322 gross 3283 #else
323     out->A_C=A_C;
324 gross 3442 out->A_C->solver_p=Paso_Preconditioner_Smoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);
325 gross 3283 out->A_C->solver_package = PASO_SMOOTHER;
326 gross 3402 if (verbose) printf("Paso_Preconditioner: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
327 gross 3283 #endif
328     #endif
329     } else {
330     /* finally we set some helpers for the solver step */
331     out->A_C=A_C;
332     }
333     }
334     }
335     }
336     TMPMEMFREE(mask_C);
337     TMPMEMFREE(rows_in_F);
338     }
339     }
340 gross 3445 #endif
341    
342 gross 3193 }
343     TMPMEMFREE(counter);
344 gross 3440 TMPMEMFREE(F_marker);
345     TMPMEMFREE(degree_S);
346 gross 3445 TMPMEMFREE(offset_S);
347 gross 3303 TMPMEMFREE(S);
348 gross 3445 }
349 gross 3193
350 jfenwick 3259 if (Esys_noError()) {
351 gross 3193 return out;
352     } else {
353 gross 3441 Paso_Preconditioner_AMG_free(out);
354 gross 3193 return NULL;
355     }
356     }
357    
358    
359 gross 3441 void Paso_Preconditioner_AMG_solve(Paso_SystemMatrix* A, Paso_Preconditioner_AMG * amg, double * x, double * b) {
360 gross 3283 const dim_t n = amg->n * amg->n_block;
361     double time0=0;
362     const dim_t post_sweeps=amg->post_sweeps;
363     const dim_t pre_sweeps=amg->pre_sweeps;
364 gross 3193
365 gross 3283 /* presmoothing */
366     time0=Esys_timer();
367 gross 3442 Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
368 gross 3283 time0=Esys_timer()-time0;
369     if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);
370     /* end of presmoothing */
371    
372     if (amg->n_F < amg->n) { /* is there work on the coarse level? */
373     time0=Esys_timer();
374     Paso_Copy(n, amg->r, b); /* r <- b */
375 gross 3441 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
376 gross 3445 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,amg->r,0.,amg->b_C); /* b_c = R*r */
377 gross 3283 time0=Esys_timer()-time0;
378     /* coarse level solve */
379     if ( amg->AMG_C == NULL) {
380     time0=Esys_timer();
381     /* A_C is the coarsest level */
382 gross 3445 #ifdef FIXME
383 gross 3283 switch (amg->A_C->solver_package) {
384     case (PASO_MKL):
385     Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);
386     break;
387     case (PASO_UMFPACK):
388     Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);
389     break;
390     case (PASO_SMOOTHER):
391 gross 3442 Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
392 gross 3283 break;
393     }
394 gross 3445 #endif
395     Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
396 gross 3283 if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
397     } else {
398 gross 3441 Paso_Preconditioner_AMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C) */
399 gross 3283 }
400     time0=time0+Esys_timer();
401 gross 3445 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */
402 gross 3283
403     /*postsmoothing*/
404    
405     /*solve Ax=b with initial guess x */
406     time0=Esys_timer();
407 gross 3442 Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
408 gross 3283 time0=Esys_timer()-time0;
409     if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
410     /*end of postsmoothing*/
411    
412     }
413     return;
414     }
415 gross 3193
416 gross 3283 /* theta = threshold for strong connections */
417     /* tau = threshold for diagonal dominance */
418 gross 3193
419 gross 3303 /*S_i={j \in N_i; i strongly coupled to j}
420    
421     in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
422     */
423    
424 gross 3441 void Paso_Preconditioner_AMG_setStrongConnections(Paso_SystemMatrix* A,
425 gross 3445 dim_t *degree_S, index_t *offset_S, index_t *S,
426 gross 3303 const double theta, const double tau)
427 gross 3283 {
428 gross 3445 const dim_t my_n=Paso_SystemMatrix_getNumRows(A);
429     index_t iptr, i;
430 gross 3193
431 gross 3303
432 gross 3445 #pragma omp parallel for private(i,iptr) schedule(static)
433     for (i=0;i<my_n;++i) {
434     register double max_offdiagonal = 0.;
435     register double sum_row=0;
436     register double main_row=0;
437 gross 3451 register dim_t kdeg=0;
438     register index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
439 gross 3283 #pragma ivdep
440 gross 3445 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
441     register index_t j=A->mainBlock->pattern->index[iptr];
442     register double fnorm=ABS(A->mainBlock->val[iptr]);
443 gross 3283
444     if( j != i) {
445     max_offdiagonal = MAX(max_offdiagonal,fnorm);
446     sum_row+=fnorm;
447     } else {
448     main_row=fnorm;
449     }
450 gross 3445
451 gross 3283 }
452 gross 3445 #pragma ivdep
453     for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
454     register double fnorm=ABS(A->col_coupleBlock->val[iptr]);
455     max_offdiagonal = MAX(max_offdiagonal,fnorm);
456     sum_row+=fnorm;
457     }
458    
459 gross 3451 {
460     const double threshold = theta*max_offdiagonal;
461     if (tau*main_row < sum_row) { /* no diagonal domainance */
462     #pragma ivdep
463     for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
464     register index_t j=A->mainBlock->pattern->index[iptr];
465     if(ABS(A->mainBlock->val[iptr])>threshold && i!=j) {
466     S[koffset+kdeg] = j;
467     kdeg++;
468     }
469 gross 3283 }
470 gross 3451 #pragma ivdep
471     for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
472     register index_t j=A->col_coupleBlock->pattern->index[iptr];
473     if(ABS(A->col_coupleBlock->val[iptr])>threshold) {
474     S[koffset+kdeg] = j + my_n;
475     kdeg++;
476     }
477 gross 3445 }
478 gross 3451 }
479 gross 3440 }
480 gross 3445 offset_S[i]=koffset;
481 gross 3440 degree_S[i]=kdeg;
482 gross 3283 }
483     }
484 gross 3193
485 gross 3283 /* theta = threshold for strong connections */
486     /* tau = threshold for diagonal dominance */
487 gross 3303 /*S_i={j \in N_i; i strongly coupled to j}
488 gross 3283
489 gross 3303 in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
490     */
491 gross 3441 void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SystemMatrix* A,
492 gross 3445 dim_t *degree_S, index_t *offset_S, index_t *S,
493 gross 3303 const double theta, const double tau)
494    
495 gross 3283 {
496 gross 3445 const dim_t my_n=Paso_SystemMatrix_getNumRows(A);
497     index_t iptr, i, bi;
498 gross 3283 const dim_t n_block=A->row_block_size;
499    
500    
501 gross 3445 #pragma omp parallel private(i,iptr, bi)
502 gross 3283 {
503 gross 3445 dim_t max_deg=0;
504 gross 3451 double *rtmp=TMPMEMALLOC(max_deg, double);
505    
506 gross 3283 #pragma omp for schedule(static)
507 gross 3445 for (i=0;i<my_n;++i) max_deg=MAX(max_deg, A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]
508     +A->col_coupleBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i]);
509 gross 3193
510 gross 3283
511     #pragma omp for schedule(static)
512 gross 3445 for (i=0;i<my_n;++i) {
513 gross 3193
514 gross 3445 register double max_offdiagonal = 0.;
515     register double sum_row=0;
516     register double main_row=0;
517     register index_t rtmp_offset=-A->mainBlock->pattern->ptr[i];
518 gross 3451 register dim_t kdeg=0;
519     register index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
520 gross 3440
521 gross 3445 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
522     register index_t j=A->mainBlock->pattern->index[iptr];
523     register double fnorm=0;
524 gross 3283 #pragma ivdep
525 gross 3445 for(bi=0;bi<n_block*n_block;++bi) {
526     register double rtmp2 = A->mainBlock->val[iptr*n_block*n_block+bi];
527     fnorm+=rtmp2*rtmp2;
528     }
529 gross 3283 fnorm=sqrt(fnorm);
530 gross 3445
531     rtmp[iptr+rtmp_offset]=fnorm;
532 gross 3283 if( j != i) {
533     max_offdiagonal = MAX(max_offdiagonal,fnorm);
534     sum_row+=fnorm;
535     } else {
536     main_row=fnorm;
537     }
538     }
539 gross 3445 rtmp_offset=A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]-A->col_coupleBlock->pattern->ptr[i];
540     for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
541     register double fnorm=0;
542     #pragma ivdep
543     for(bi=0;bi<n_block*n_block;++bi) {
544     register double rtmp2 = A->col_coupleBlock->val[iptr*n_block*n_block+bi];
545     fnorm+=rtmp2*rtmp2;
546     }
547     fnorm=sqrt(fnorm);
548    
549     rtmp[iptr+rtmp_offset]=fnorm;
550     max_offdiagonal = MAX(max_offdiagonal,fnorm);
551     sum_row+=fnorm;
552     }
553 gross 3451 {
554     const double threshold = theta*max_offdiagonal;
555 gross 3445
556 gross 3451 if (tau*main_row < sum_row) { /* no diagonal domainance */
557     rtmp_offset=-A->mainBlock->pattern->ptr[i];
558     #pragma ivdep
559     for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
560     register index_t j=A->mainBlock->pattern->index[iptr];
561     if(rtmp[iptr+rtmp_offset] > threshold && i!=j) {
562     S[koffset+kdeg] = j;
563     kdeg++;
564     }
565     }
566     rtmp_offset=A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]-A->col_coupleBlock->pattern->ptr[i];
567     #pragma ivdep
568     for (iptr=A->col_coupleBlock->pattern->ptr[i]; iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
569     register index_t j=A->col_coupleBlock->pattern->index[iptr];
570     if(rtmp[iptr+rtmp_offset] > threshold) {
571     S[koffset+kdeg] = j + my_n;
572     kdeg++;
573     }
574     }
575     }
576     }
577 gross 3440 degree_S[i]=kdeg;
578 gross 3445 offset_S[i]=koffset;
579 gross 3283 }
580     TMPMEMFREE(rtmp);
581     } /* end of parallel region */
582 gross 3303
583 gross 3283 }
584    
585 gross 3445 #ifdef AAAAA
586 gross 3283 /* the runge stueben coarsening algorithm: */
587 gross 3440 void Paso_Preconditioner_AMG_RungeStuebenSearch(const dim_t n, const index_t* offset_S,
588     const dim_t* degree_S, const index_t* S,
589 gross 3352 index_t*split_marker, const bool_t usePanel)
590 gross 3283 {
591    
592 gross 3351 index_t *lambda=NULL, *ST=NULL, *notInPanel=NULL, *panel=NULL, lambda_max, lambda_k;
593 gross 3440 dim_t i,k, p, q, *degree_ST=NULL, len_panel, len_panel_new;
594 gross 3351 register index_t j, itmp;
595    
596 gross 3283 if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */
597    
598 gross 3351 lambda=TMPMEMALLOC(n, index_t); Esys_checkPtr(lambda);
599 gross 3440 degree_ST=TMPMEMALLOC(n, dim_t); Esys_checkPtr(degree_ST);
600     ST=TMPMEMALLOC(offset_S[n], index_t); Esys_checkPtr(ST);
601 gross 3351 if (usePanel) {
602     notInPanel=TMPMEMALLOC(n, bool_t); Esys_checkPtr(notInPanel);
603     panel=TMPMEMALLOC(n, index_t); Esys_checkPtr(panel);
604     }
605 gross 3283
606 gross 3351
607    
608     if (Esys_noError() ) {
609 gross 3283 /* initialize split_marker and split_marker :*/
610     /* those unknows which are not influenced go into F, the rest is available for F or C */
611     #pragma omp parallel for private(i) schedule(static)
612     for (i=0;i<n;++i) {
613 gross 3440 degree_ST[i]=0;
614     if (degree_S[i]>0) {
615 gross 3283 lambda[i]=0;
616     split_marker[i]=PASO_AMG_UNDECIDED;
617     } else {
618     split_marker[i]=PASO_AMG_IN_F;
619     lambda[i]=-1;
620     }
621     }
622     /* create transpose :*/
623     for (i=0;i<n;++i) {
624 gross 3440 for (p=0; p<degree_S[i]; ++p) {
625     j=S[offset_S[i]+p];
626     ST[offset_S[j]+degree_ST[j]]=i;
627     degree_ST[j]++;
628 gross 3283 }
629     }
630     /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */
631     #pragma omp parallel for private(i, j, itmp) schedule(static)
632     for (i=0;i<n;++i) {
633     if (split_marker[i]==PASO_AMG_UNDECIDED) {
634     itmp=lambda[i];
635 gross 3440 for (p=0; p<degree_ST[i]; ++p) {
636     j=ST[offset_S[i]+p];
637 gross 3283 if (split_marker[j]==PASO_AMG_UNDECIDED) {
638     itmp++;
639     } else { /* at this point there are no C points */
640     itmp+=2;
641     }
642     }
643     lambda[i]=itmp;
644     }
645     }
646 gross 3351 if (usePanel) {
647     #pragma omp parallel for private(i) schedule(static)
648     for (i=0;i<n;++i) notInPanel[i]=TRUE;
649     }
650 gross 3283 /* start search :*/
651     i=Paso_Util_arg_max(n,lambda);
652 gross 3351 while (lambda[i]>-1) { /* is there any undecided unknown? */
653    
654     if (usePanel) {
655     len_panel=0;
656     do {
657     /* the unknown i is moved to C */
658     split_marker[i]=PASO_AMG_IN_C;
659     lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
660    
661     /* all undecided unknown strongly coupled to i are moved to F */
662 gross 3440 for (p=0; p<degree_ST[i]; ++p) {
663     j=ST[offset_S[i]+p];
664 gross 3351
665     if (split_marker[j]==PASO_AMG_UNDECIDED) {
666    
667     split_marker[j]=PASO_AMG_IN_F;
668     lambda[j]=-1;
669    
670 gross 3440 for (q=0; q<degree_ST[j]; ++q) {
671     k=ST[offset_S[j]+q];
672 gross 3351 if (split_marker[k]==PASO_AMG_UNDECIDED) {
673     lambda[k]++;
674     if (notInPanel[k]) {
675     notInPanel[k]=FALSE;
676     panel[len_panel]=k;
677     len_panel++;
678     }
679    
680     } /* the unknown i is moved to C */
681     split_marker[i]=PASO_AMG_IN_C;
682     lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
683     }
684    
685     }
686 gross 3283 }
687 gross 3440 for (p=0; p<degree_S[i]; ++p) {
688     j=S[offset_S[i]+p];
689 gross 3351 if (split_marker[j]==PASO_AMG_UNDECIDED) {
690     lambda[j]--;
691     if (notInPanel[j]) {
692     notInPanel[j]=FALSE;
693     panel[len_panel]=j;
694     len_panel++;
695     }
696     }
697     }
698 gross 3193
699 gross 3351 /* consolidate panel */
700     /* remove lambda[q]=-1 */
701     lambda_max=-1;
702     i=-1;
703     len_panel_new=0;
704     for (q=0; q<len_panel; q++) {
705     k=panel[q];
706     lambda_k=lambda[k];
707     if (split_marker[k]==PASO_AMG_UNDECIDED) {
708     panel[len_panel_new]=k;
709     len_panel_new++;
710    
711     if (lambda_max == lambda_k) {
712     if (k<i) i=k;
713     } else if (lambda_max < lambda_k) {
714     lambda_max =lambda_k;
715     i=k;
716     }
717     }
718     }
719     len_panel=len_panel_new;
720     } while (len_panel>0);
721     } else {
722     /* the unknown i is moved to C */
723     split_marker[i]=PASO_AMG_IN_C;
724     lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
725    
726     /* all undecided unknown strongly coupled to i are moved to F */
727 gross 3440 for (p=0; p<degree_ST[i]; ++p) {
728     j=ST[offset_S[i]+p];
729 gross 3351 if (split_marker[j]==PASO_AMG_UNDECIDED) {
730    
731     split_marker[j]=PASO_AMG_IN_F;
732     lambda[j]=-1;
733    
734 gross 3440 for (q=0; q<degree_ST[j]; ++q) {
735     k=ST[offset_S[j]+q];
736 gross 3351 if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;
737     }
738    
739     }
740 gross 3283 }
741 gross 3440 for (p=0; p<degree_S[i]; ++p) {
742     j=S[offset_S[i]+p];
743 gross 3351 if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;
744     }
745    
746 gross 3283 }
747     i=Paso_Util_arg_max(n,lambda);
748     }
749 gross 3440
750 gross 3283 }
751     TMPMEMFREE(lambda);
752     TMPMEMFREE(ST);
753 gross 3440 TMPMEMFREE(degree_ST);
754 gross 3351 TMPMEMFREE(panel);
755     TMPMEMFREE(notInPanel);
756 gross 3193 }
757 gross 3440 /* ensures that two F nodes are connected via a C node :*/
758     void Paso_Preconditioner_AMG_enforceFFConnectivity(const dim_t n, const index_t* offset_S,
759     const dim_t* degree_S, const index_t* S,
760     index_t*split_marker)
761     {
762     dim_t i, p, q;
763    
764     /* now we make sure that two (strongly) connected F nodes are (strongly) connected via a C node. */
765     for (i=0;i<n;++i) {
766     if ( (split_marker[i]==PASO_AMG_IN_F) && (degree_S[i]>0) ) {
767     for (p=0; p<degree_S[i]; ++p) {
768     register index_t j=S[offset_S[i]+p];
769     if ( (split_marker[j]==PASO_AMG_IN_F) && (degree_S[j]>0) ) {
770     /* i and j are now two F nodes which are strongly connected */
771     /* is there a C node they share ? */
772     register index_t sharing=-1;
773     for (q=0; q<degree_S[i]; ++q) {
774     index_t k=S[offset_S[i]+q];
775     if (split_marker[k]==PASO_AMG_IN_C) {
776     register index_t* where_k=(index_t*)bsearch(&k, &(S[offset_S[j]]), degree_S[j], sizeof(index_t), Paso_comparIndex);
777     if (where_k != NULL) {
778     sharing=k;
779     break;
780     }
781     }
782     }
783     if (sharing<0) {
784     if (i<j) {
785     split_marker[j]=PASO_AMG_IN_C;
786     } else {
787     split_marker[i]=PASO_AMG_IN_C;
788     break; /* no point to look any further as i is now a C node */
789     }
790     }
791     }
792     }
793     }
794     }
795     }
796 gross 3445 #endif
797 gross 3442
798 gross 3445 #ifdef DFG
799 gross 3442 void Paso_Preconditioner_AMG_CIJPCoarsening( )
800     {
801 gross 3445
802    
803 gross 3442 const dim_t my_n;
804     const dim_t overlap_n;
805     const dim_t n= my_n + overlap_n;
806 gross 3449
807    
808     /* initialize split_marker and split_marker :*/
809     /* those unknows which are not influenced go into F, the rest is available for F or C */
810     #pragma omp parallel for private(i) schedule(static)
811     for (i=0;i<n;++i) {
812     degree_ST[i]=0;
813     if (degree_S[i]>0) {
814     lambda[i]=0;
815     split_marker[i]=PASO_AMG_UNDECIDED;
816     } else {
817     split_marker[i]=PASO_AMG_IN_F;
818     lambda[i]=-1;
819     }
820     }
821    
822 gross 3442 /* set local lambda + overlap */
823     #pragma omp parallel for private(i)
824     for (i=0; i<n ++i) {
825     w[i]=degree_ST[i];
826     }
827     for (i=0; i<my_n; i++) {
828     w2[i]=random;
829     }
830    
831    
832 gross 3445 /* add noise to w */
833     Paso_Coupler_add(n, w, 1., w2, col_coupler);
834    
835 gross 3442 /* */
836     global_n_C=0;
837     global_n_F=..;
838    
839     while (global_n_C + global_n_F < global_n) {
840    
841    
842     is_in_D[i]=FALSE;
843     /* test local connectivit*/
844 gross 3445 /* w2[i] = max(w[k] | k in S_i or k in S^T_i */
845     #pragma omp parallel for private(i)
846     for (i=0; i<n; ++i) w2[i]=0;
847    
848     for (i=0; i<my_n; ++i) {
849     for( iPtr =0 ; iPtr < degree_S[i]; ++iPtr) {
850     k=S[offset_S[i]+iPtr];
851 gross 3442 w2[i]=MAX(w2[i],w[k]);
852     w2[k]=MAX(w2[k],w[i]);
853 gross 3445 }
854     }
855 gross 3442 /* adjust overlaps by MAX */
856 gross 3445 Paso_Coupler_max(n, w2, col_coupler);
857    
858 gross 3442 /* points with w[i]>w2[i] become C nodes */
859     }
860    
861     }
862 gross 3445 #endif

  ViewVC Help
Powered by ViewVC 1.1.26