/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Annotation of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3449 - (hide annotations)
Fri Jan 21 05:48:15 2011 UTC (8 years, 9 months ago) by gross
File MIME type: text/plain
File size: 27513 byte(s)
compile problem in AMG fixed.
1 gross 3193
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2010 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17 gross 3441 /* Paso: AMG preconditioner (local version) */
18 gross 3193
19     /**************************************************************/
20    
21 gross 3303 /* Author: artak@uq.edu.au, l.gross@uq.edu.au */
22 gross 3193
23     /**************************************************************/
24    
25 gross 3283 #define SHOW_TIMING FALSE
26    
27 gross 3193 #include "Paso.h"
28     #include "Preconditioner.h"
29     #include "Options.h"
30     #include "PasoUtil.h"
31     #include "UMFPACK.h"
32     #include "MKL.h"
33    
34     /**************************************************************/
35    
36     /* free all memory used by AMG */
37    
38 gross 3441 void Paso_Preconditioner_AMG_free(Paso_Preconditioner_AMG * in) {
39 gross 3193 if (in!=NULL) {
40 gross 3441 Paso_Preconditioner_Smoother_free(in->Smoother);
41     Paso_SystemMatrix_free(in->P);
42     Paso_SystemMatrix_free(in->R);
43     Paso_SystemMatrix_free(in->A_C);
44     Paso_Preconditioner_AMG_free(in->AMG_C);
45 gross 3283 MEMFREE(in->r);
46     MEMFREE(in->x_C);
47     MEMFREE(in->b_C);
48 gross 3193
49 gross 3283 MEMFREE(in);
50 gross 3193 }
51     }
52    
53 gross 3441 index_t Paso_Preconditioner_AMG_getMaxLevel(const Paso_Preconditioner_AMG * in) {
54 gross 3402 if (in->AMG_C == NULL) {
55     return in->level;
56     } else {
57 gross 3441 return Paso_Preconditioner_AMG_getMaxLevel(in->AMG_C);
58 gross 3402 }
59     }
60 gross 3441 double Paso_Preconditioner_AMG_getCoarseLevelSparsity(const Paso_Preconditioner_AMG * in) {
61 gross 3403 if (in->AMG_C == NULL) {
62     if (in->A_C == NULL) {
63     return 1.;
64     } else {
65 gross 3442 return Paso_SystemMatrix_getSparsity(in->A_C);
66 gross 3403 }
67     } else {
68 gross 3441 return Paso_Preconditioner_AMG_getCoarseLevelSparsity(in->AMG_C);
69 gross 3403 }
70 gross 3402 }
71 gross 3441 dim_t Paso_Preconditioner_AMG_getNumCoarseUnknwons(const Paso_Preconditioner_AMG * in) {
72 gross 3402 if (in->AMG_C == NULL) {
73 gross 3403 if (in->A_C == NULL) {
74     return 0;
75     } else {
76 gross 3445 return Paso_SystemMatrix_getTotalNumRows(in->A_C);
77 gross 3403 }
78 gross 3402 } else {
79 gross 3441 return Paso_Preconditioner_AMG_getNumCoarseUnknwons(in->AMG_C);
80 gross 3402 }
81     }
82 gross 3283 /*****************************************************************
83 gross 3193
84 gross 3283 constructs AMG
85    
86     ******************************************************************/
87 gross 3441 Paso_Preconditioner_AMG* Paso_Preconditioner_AMG_alloc(Paso_SystemMatrix *A_p,dim_t level,Paso_Options* options) {
88 gross 3193
89 gross 3441 Paso_Preconditioner_AMG* out=NULL;
90 gross 3193 bool_t verbose=options->verbose;
91 gross 3441 Paso_SystemMatrix *Atemp=NULL, *A_C=NULL;
92 gross 3445
93     const dim_t my_n=Paso_SystemMatrix_getNumRows(A_p);
94     const dim_t overlap_n=Paso_SystemMatrix_getColOverlap(A_p);
95     const dim_t n = my_n + overlap_n;
96    
97 gross 3193 const dim_t n_block=A_p->row_block_size;
98 gross 3445 index_t* F_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F=NULL;
99 gross 3283 dim_t n_F=0, n_C=0, i;
100     double time0=0;
101     const double theta = options->coarsening_threshold;
102     const double tau = options->diagonal_dominance_threshold;
103 gross 3442 const double sparsity=Paso_SystemMatrix_getSparsity(A_p);
104     const dim_t total_n=Paso_SystemMatrix_getGlobalTotalNumRows(A_p);
105 plaub 3436
106 gross 3193
107     /*
108 gross 3283 is the input matrix A suitable for coarsening
109    
110 gross 3193 */
111 gross 3442 if ( (sparsity >= options->min_coarse_sparsity) ||
112     (total_n <= options->min_coarse_matrix_size) ||
113     (level > options->level_max) ) {
114 plaub 3436
115 gross 3445 if (verbose) {
116 plaub 3436 /*
117     print stopping condition:
118     - 'SPAR' = min_coarse_matrix_sparsity exceeded
119     - 'SIZE' = min_coarse_matrix_size exceeded
120     - 'LEVEL' = level_max exceeded
121     */
122 gross 3442 printf("Paso_Preconditioner: AMG: termination of coarsening by ");
123 plaub 3436
124 gross 3442 if (sparsity >= options->min_coarse_sparsity)
125 plaub 3436 printf("SPAR ");
126    
127 gross 3442 if (total_n <= options->min_coarse_matrix_size)
128 plaub 3436 printf("SIZE ");
129    
130     if (level > options->level_max)
131     printf("LEVEL ");
132    
133     printf("\n");
134    
135     printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
136 gross 3442 level, options->level_max, sparsity, options->min_coarse_sparsity, total_n, options->min_coarse_matrix_size);
137 plaub 3436
138 gross 3445 }
139 plaub 3436
140 gross 3445 return NULL;
141     } else {
142     /* Start Coarsening : */
143     const dim_t len_S=A_p->mainBlock->pattern->len+A_p->col_coupleBlock->pattern->len;
144 gross 3193
145 gross 3440 F_marker=TMPMEMALLOC(n,index_t);
146 gross 3283 counter=TMPMEMALLOC(n,index_t);
147 gross 3445
148     dim_t* degree_S=TMPMEMALLOC(my_n, dim_t);
149     index_t *offset_S=TMPMEMALLOC(my_n, index_t);
150     index_t *S=TMPMEMALLOC(len_S, index_t);
151     if ( !( Esys_checkPtr(F_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree_S) || Esys_checkPtr(offset_S) || Esys_checkPtr(S) ) ) {
152 gross 3283 /*
153     set splitting of unknows:
154    
155     */
156     time0=Esys_timer();
157     if (n_block>1) {
158 gross 3445 Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree_S, offset_S, S, theta,tau);
159 gross 3283 } else {
160 gross 3445 Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, offset_S, S, theta,tau);
161 gross 3283 }
162 gross 3449 {
163     dim_t p;
164     for (i=0; i< my_n; ++i) {
165     printf("%d: ",i);
166     for (p=0; p<degree_S[i];++p) printf("%d ",S[offset_S[i]+p]);
167     printf("\n");
168     }
169     }
170    
171 gross 3442
172     /*MPI:
173 gross 3440 Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree_S, S, F_marker, options->usePanel);
174 gross 3442 */
175    
176 gross 3440 /* in BoomerAMG interpolation is used FF connectiovity is required :*/
177 gross 3442 /*MPI:
178 gross 3440 if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
179     Paso_Preconditioner_AMG_enforceFFConnectivity(n, A_p->pattern->ptr, degree_S, S, F_marker);
180 gross 3442 */
181    
182 gross 3283 options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
183 gross 3445
184     #ifdef AAAAA
185 gross 3283 if (Esys_noError() ) {
186     #pragma omp parallel for private(i) schedule(static)
187 gross 3440 for (i = 0; i < n; ++i) F_marker[i]=(F_marker[i] == PASO_AMG_IN_F);
188 gross 3283
189     /*
190     count number of unkowns to be eliminated:
191     */
192 gross 3440 n_F=Paso_Util_cumsum_maskedTrue(n,counter, F_marker);
193 gross 3283 n_C=n-n_F;
194 gross 3402 if (verbose) printf("Paso_Preconditioner: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);
195 gross 3283
196     if ( n_F == 0 ) { /* is a nasty case. a direct solver should be used, return NULL */
197     out = NULL;
198     } else {
199 gross 3441 out=MEMALLOC(1,Paso_Preconditioner_AMG);
200 gross 3323 if (! Esys_checkPtr(out)) {
201 gross 3283 out->level = level;
202     out->n = n;
203     out->n_F = n_F;
204     out->n_block = n_block;
205     out->A_C = NULL;
206     out->P = NULL;
207     out->R = NULL;
208     out->post_sweeps = options->post_sweeps;
209     out->pre_sweeps = options->pre_sweeps;
210     out->r = NULL;
211     out->x_C = NULL;
212     out->b_C = NULL;
213     out->AMG_C = NULL;
214 gross 3323 out->Smoother=NULL;
215     }
216     mask_C=TMPMEMALLOC(n,index_t);
217     rows_in_F=TMPMEMALLOC(n_F,index_t);
218     Esys_checkPtr(mask_C);
219     Esys_checkPtr(rows_in_F);
220     if ( Esys_noError() ) {
221    
222 gross 3442 out->Smoother = Paso_Preconditioner_Smoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
223 gross 3323
224 gross 3402 if (n_C != 0) {
225     /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
226 gross 3283
227 gross 3315 /* allocate helpers :*/
228     out->x_C=MEMALLOC(n_block*n_C,double);
229     out->b_C=MEMALLOC(n_block*n_C,double);
230     out->r=MEMALLOC(n_block*n,double);
231    
232     Esys_checkPtr(out->r);
233     Esys_checkPtr(out->x_C);
234     Esys_checkPtr(out->b_C);
235    
236 gross 3323 if ( Esys_noError() ) {
237     /* creates index for F:*/
238     #pragma omp parallel private(i)
239     {
240     #pragma omp for schedule(static)
241     for (i = 0; i < n; ++i) {
242 gross 3440 if (F_marker[i]) rows_in_F[counter[i]]=i;
243 gross 3323 }
244     }
245     /* create mask of C nodes with value >-1 gives new id */
246 gross 3440 i=Paso_Util_cumsum_maskedFalse(n,counter, F_marker);
247 gross 3193
248 gross 3323 #pragma omp parallel for private(i) schedule(static)
249     for (i = 0; i < n; ++i) {
250 gross 3440 if (F_marker[i]) {
251 gross 3323 mask_C[i]=-1;
252     } else {
253     mask_C[i]=counter[i];;
254     }
255 gross 3283 }
256 gross 3323 /*
257 gross 3440 get Prolongation :
258 gross 3323 */
259     time0=Esys_timer();
260 gross 3442 /*MPI:
261 gross 3440 out->P=Paso_Preconditioner_AMG_getProlongation(A_p,A_p->pattern->ptr, degree_S,S,n_C,mask_C, options->interpolation_method);
262 gross 3442 */
263 gross 3323 if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);
264 gross 3283 }
265 gross 3323 /*
266 gross 3440 construct Restriction operator as transposed of Prolongation operator:
267 gross 3283 */
268 gross 3315 if ( Esys_noError()) {
269     time0=Esys_timer();
270 gross 3442 /*MPI:
271 gross 3441 out->R=Paso_SystemMatrix_getTranspose(out->P);
272 gross 3442 */
273 gross 3441 if (SHOW_TIMING) printf("timing: level %d: Paso_SystemMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
274 gross 3315 }
275 gross 3283 /*
276 gross 3312 construct coarse level matrix:
277 gross 3283 */
278 gross 3315 if ( Esys_noError()) {
279     time0=Esys_timer();
280 gross 3442 /*MPI:
281 gross 3441 Atemp=Paso_SystemMatrix_MatrixMatrix(A_p,out->P);
282     A_C=Paso_SystemMatrix_MatrixMatrix(out->R,Atemp);
283 gross 3442
284 gross 3441 Paso_SystemMatrix_free(Atemp);
285 gross 3442 */
286    
287 gross 3315 if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);
288     }
289    
290 gross 3283
291     /*
292     constructe courser level:
293    
294     */
295     if ( Esys_noError()) {
296 gross 3441 out->AMG_C=Paso_Preconditioner_AMG_alloc(A_C,level+1,options);
297 gross 3315 }
298     if ( Esys_noError()) {
299 gross 3283 if ( out->AMG_C == NULL ) {
300     out->reordering = options->reordering;
301     out->refinements = options->coarse_matrix_refinements;
302     /* no coarse level matrix has been constructed. use direct solver */
303     #ifdef MKL
304 gross 3441 out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);
305     Paso_SystemMatrix_free(A_C);
306 gross 3283 out->A_C->solver_package = PASO_MKL;
307 gross 3402 if (verbose) printf("Paso_Preconditioner: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
308 gross 3283 #else
309     #ifdef UMFPACK
310 gross 3441 out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);
311     Paso_SystemMatrix_free(A_C);
312 gross 3283 out->A_C->solver_package = PASO_UMFPACK;
313 gross 3402 if (verbose) printf("Paso_Preconditioner: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
314 gross 3283 #else
315     out->A_C=A_C;
316 gross 3442 out->A_C->solver_p=Paso_Preconditioner_Smoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);
317 gross 3283 out->A_C->solver_package = PASO_SMOOTHER;
318 gross 3402 if (verbose) printf("Paso_Preconditioner: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
319 gross 3283 #endif
320     #endif
321     } else {
322     /* finally we set some helpers for the solver step */
323     out->A_C=A_C;
324     }
325     }
326     }
327     }
328     TMPMEMFREE(mask_C);
329     TMPMEMFREE(rows_in_F);
330     }
331     }
332 gross 3445 #endif
333    
334 gross 3193 }
335     TMPMEMFREE(counter);
336 gross 3440 TMPMEMFREE(F_marker);
337     TMPMEMFREE(degree_S);
338 gross 3445 TMPMEMFREE(offset_S);
339 gross 3303 TMPMEMFREE(S);
340 gross 3445 }
341 gross 3193
342 jfenwick 3259 if (Esys_noError()) {
343 gross 3193 return out;
344     } else {
345 gross 3441 Paso_Preconditioner_AMG_free(out);
346 gross 3193 return NULL;
347     }
348     }
349    
350    
351 gross 3441 void Paso_Preconditioner_AMG_solve(Paso_SystemMatrix* A, Paso_Preconditioner_AMG * amg, double * x, double * b) {
352 gross 3283 const dim_t n = amg->n * amg->n_block;
353     double time0=0;
354     const dim_t post_sweeps=amg->post_sweeps;
355     const dim_t pre_sweeps=amg->pre_sweeps;
356 gross 3193
357 gross 3283 /* presmoothing */
358     time0=Esys_timer();
359 gross 3442 Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
360 gross 3283 time0=Esys_timer()-time0;
361     if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);
362     /* end of presmoothing */
363    
364     if (amg->n_F < amg->n) { /* is there work on the coarse level? */
365     time0=Esys_timer();
366     Paso_Copy(n, amg->r, b); /* r <- b */
367 gross 3441 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
368 gross 3445 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,amg->r,0.,amg->b_C); /* b_c = R*r */
369 gross 3283 time0=Esys_timer()-time0;
370     /* coarse level solve */
371     if ( amg->AMG_C == NULL) {
372     time0=Esys_timer();
373     /* A_C is the coarsest level */
374 gross 3445 #ifdef FIXME
375 gross 3283 switch (amg->A_C->solver_package) {
376     case (PASO_MKL):
377     Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);
378     break;
379     case (PASO_UMFPACK):
380     Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);
381     break;
382     case (PASO_SMOOTHER):
383 gross 3442 Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
384 gross 3283 break;
385     }
386 gross 3445 #endif
387     Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
388 gross 3283 if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
389     } else {
390 gross 3441 Paso_Preconditioner_AMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C) */
391 gross 3283 }
392     time0=time0+Esys_timer();
393 gross 3445 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */
394 gross 3283
395     /*postsmoothing*/
396    
397     /*solve Ax=b with initial guess x */
398     time0=Esys_timer();
399 gross 3442 Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
400 gross 3283 time0=Esys_timer()-time0;
401     if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
402     /*end of postsmoothing*/
403    
404     }
405     return;
406     }
407 gross 3193
408 gross 3283 /* theta = threshold for strong connections */
409     /* tau = threshold for diagonal dominance */
410 gross 3193
411 gross 3303 /*S_i={j \in N_i; i strongly coupled to j}
412    
413     in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
414     */
415    
416 gross 3441 void Paso_Preconditioner_AMG_setStrongConnections(Paso_SystemMatrix* A,
417 gross 3445 dim_t *degree_S, index_t *offset_S, index_t *S,
418 gross 3303 const double theta, const double tau)
419 gross 3283 {
420 gross 3445 const dim_t my_n=Paso_SystemMatrix_getNumRows(A);
421     index_t iptr, i;
422 gross 3193
423 gross 3303
424 gross 3445 #pragma omp parallel for private(i,iptr) schedule(static)
425     for (i=0;i<my_n;++i) {
426     register double max_offdiagonal = 0.;
427     register double sum_row=0;
428     register double main_row=0;
429 gross 3283 #pragma ivdep
430 gross 3445 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
431     register index_t j=A->mainBlock->pattern->index[iptr];
432     register double fnorm=ABS(A->mainBlock->val[iptr]);
433 gross 3283
434     if( j != i) {
435     max_offdiagonal = MAX(max_offdiagonal,fnorm);
436     sum_row+=fnorm;
437     } else {
438     main_row=fnorm;
439     }
440 gross 3445
441 gross 3283 }
442 gross 3445 #pragma ivdep
443     for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
444     register index_t j=A->col_coupleBlock->pattern->index[iptr];
445     register double fnorm=ABS(A->col_coupleBlock->val[iptr]);
446     max_offdiagonal = MAX(max_offdiagonal,fnorm);
447     sum_row+=fnorm;
448     }
449    
450    
451     const double threshold = theta*max_offdiagonal;
452     register dim_t kdeg=0;
453     register index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
454 gross 3283 if (tau*main_row < sum_row) { /* no diagonal domainance */
455     #pragma ivdep
456 gross 3445 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
457     register index_t j=A->mainBlock->pattern->index[iptr];
458     if(ABS(A->mainBlock->val[iptr])>threshold && i!=j) {
459     S[koffset+kdeg] = j;
460 gross 3283 kdeg++;
461     }
462     }
463 gross 3445 #pragma ivdep
464     for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
465     register index_t j=A->col_coupleBlock->pattern->index[iptr];
466     if(ABS(A->col_coupleBlock->val[iptr])>threshold) {
467     S[koffset+kdeg] = j + my_n;
468     kdeg++;
469     }
470     }
471 gross 3440 }
472 gross 3445 offset_S[i]=koffset;
473 gross 3440 degree_S[i]=kdeg;
474 gross 3283 }
475     }
476 gross 3193
477 gross 3283 /* theta = threshold for strong connections */
478     /* tau = threshold for diagonal dominance */
479 gross 3303 /*S_i={j \in N_i; i strongly coupled to j}
480 gross 3283
481 gross 3303 in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
482     */
483 gross 3441 void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SystemMatrix* A,
484 gross 3445 dim_t *degree_S, index_t *offset_S, index_t *S,
485 gross 3303 const double theta, const double tau)
486    
487 gross 3283 {
488 gross 3445 const dim_t my_n=Paso_SystemMatrix_getNumRows(A);
489     index_t iptr, i, bi;
490 gross 3283 const dim_t n_block=A->row_block_size;
491    
492    
493 gross 3445 #pragma omp parallel private(i,iptr, bi)
494 gross 3283 {
495 gross 3445 dim_t max_deg=0;
496 gross 3283 #pragma omp for schedule(static)
497 gross 3445 for (i=0;i<my_n;++i) max_deg=MAX(max_deg, A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]
498     +A->col_coupleBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i]);
499 gross 3193
500 gross 3445 double *rtmp=TMPMEMALLOC(max_deg, double);
501 gross 3283
502     #pragma omp for schedule(static)
503 gross 3445 for (i=0;i<my_n;++i) {
504 gross 3193
505 gross 3445 register double max_offdiagonal = 0.;
506     register double sum_row=0;
507     register double main_row=0;
508     register index_t rtmp_offset=-A->mainBlock->pattern->ptr[i];
509 gross 3440
510 gross 3445 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
511     register index_t j=A->mainBlock->pattern->index[iptr];
512     register double fnorm=0;
513 gross 3283 #pragma ivdep
514 gross 3445 for(bi=0;bi<n_block*n_block;++bi) {
515     register double rtmp2 = A->mainBlock->val[iptr*n_block*n_block+bi];
516     fnorm+=rtmp2*rtmp2;
517     }
518 gross 3283 fnorm=sqrt(fnorm);
519 gross 3445
520     rtmp[iptr+rtmp_offset]=fnorm;
521 gross 3283 if( j != i) {
522     max_offdiagonal = MAX(max_offdiagonal,fnorm);
523     sum_row+=fnorm;
524     } else {
525     main_row=fnorm;
526     }
527     }
528 gross 3445 rtmp_offset=A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]-A->col_coupleBlock->pattern->ptr[i];
529     for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
530     register index_t j=A->col_coupleBlock->pattern->index[iptr];
531     register double fnorm=0;
532     #pragma ivdep
533     for(bi=0;bi<n_block*n_block;++bi) {
534     register double rtmp2 = A->col_coupleBlock->val[iptr*n_block*n_block+bi];
535     fnorm+=rtmp2*rtmp2;
536     }
537     fnorm=sqrt(fnorm);
538    
539     rtmp[iptr+rtmp_offset]=fnorm;
540     max_offdiagonal = MAX(max_offdiagonal,fnorm);
541     sum_row+=fnorm;
542     }
543    
544     const double threshold = theta*max_offdiagonal;
545     register dim_t kdeg=0;
546     register index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
547    
548 gross 3283 if (tau*main_row < sum_row) { /* no diagonal domainance */
549 gross 3445 rtmp_offset=-A->mainBlock->pattern->ptr[i];
550 gross 3283 #pragma ivdep
551 gross 3445 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
552     register index_t j=A->mainBlock->pattern->index[iptr];
553     if(rtmp[iptr+rtmp_offset] > threshold && i!=j) {
554     S[koffset+kdeg] = j;
555 gross 3283 kdeg++;
556     }
557     }
558 gross 3445 rtmp_offset=A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]-A->col_coupleBlock->pattern->ptr[i];
559     #pragma ivdep
560 gross 3446 for (iptr=A->col_coupleBlock->pattern->ptr[i]; iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
561 gross 3445 register index_t j=A->col_coupleBlock->pattern->index[iptr];
562     if(rtmp[iptr+rtmp_offset] > threshold) {
563     S[koffset+kdeg] = j + my_n;
564     kdeg++;
565     }
566     }
567    
568 gross 3283 }
569 gross 3440 degree_S[i]=kdeg;
570 gross 3445 offset_S[i]=koffset;
571 gross 3283 }
572     TMPMEMFREE(rtmp);
573     } /* end of parallel region */
574 gross 3303
575 gross 3283 }
576    
577 gross 3445 #ifdef AAAAA
578 gross 3283 /* the runge stueben coarsening algorithm: */
579 gross 3440 void Paso_Preconditioner_AMG_RungeStuebenSearch(const dim_t n, const index_t* offset_S,
580     const dim_t* degree_S, const index_t* S,
581 gross 3352 index_t*split_marker, const bool_t usePanel)
582 gross 3283 {
583    
584 gross 3351 index_t *lambda=NULL, *ST=NULL, *notInPanel=NULL, *panel=NULL, lambda_max, lambda_k;
585 gross 3440 dim_t i,k, p, q, *degree_ST=NULL, len_panel, len_panel_new;
586 gross 3351 register index_t j, itmp;
587    
588 gross 3283 if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */
589    
590 gross 3351 lambda=TMPMEMALLOC(n, index_t); Esys_checkPtr(lambda);
591 gross 3440 degree_ST=TMPMEMALLOC(n, dim_t); Esys_checkPtr(degree_ST);
592     ST=TMPMEMALLOC(offset_S[n], index_t); Esys_checkPtr(ST);
593 gross 3351 if (usePanel) {
594     notInPanel=TMPMEMALLOC(n, bool_t); Esys_checkPtr(notInPanel);
595     panel=TMPMEMALLOC(n, index_t); Esys_checkPtr(panel);
596     }
597 gross 3283
598 gross 3351
599    
600     if (Esys_noError() ) {
601 gross 3283 /* initialize split_marker and split_marker :*/
602     /* those unknows which are not influenced go into F, the rest is available for F or C */
603     #pragma omp parallel for private(i) schedule(static)
604     for (i=0;i<n;++i) {
605 gross 3440 degree_ST[i]=0;
606     if (degree_S[i]>0) {
607 gross 3283 lambda[i]=0;
608     split_marker[i]=PASO_AMG_UNDECIDED;
609     } else {
610     split_marker[i]=PASO_AMG_IN_F;
611     lambda[i]=-1;
612     }
613     }
614     /* create transpose :*/
615     for (i=0;i<n;++i) {
616 gross 3440 for (p=0; p<degree_S[i]; ++p) {
617     j=S[offset_S[i]+p];
618     ST[offset_S[j]+degree_ST[j]]=i;
619     degree_ST[j]++;
620 gross 3283 }
621     }
622     /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */
623     #pragma omp parallel for private(i, j, itmp) schedule(static)
624     for (i=0;i<n;++i) {
625     if (split_marker[i]==PASO_AMG_UNDECIDED) {
626     itmp=lambda[i];
627 gross 3440 for (p=0; p<degree_ST[i]; ++p) {
628     j=ST[offset_S[i]+p];
629 gross 3283 if (split_marker[j]==PASO_AMG_UNDECIDED) {
630     itmp++;
631     } else { /* at this point there are no C points */
632     itmp+=2;
633     }
634     }
635     lambda[i]=itmp;
636     }
637     }
638 gross 3351 if (usePanel) {
639     #pragma omp parallel for private(i) schedule(static)
640     for (i=0;i<n;++i) notInPanel[i]=TRUE;
641     }
642 gross 3283 /* start search :*/
643     i=Paso_Util_arg_max(n,lambda);
644 gross 3351 while (lambda[i]>-1) { /* is there any undecided unknown? */
645    
646     if (usePanel) {
647     len_panel=0;
648     do {
649     /* the unknown i is moved to C */
650     split_marker[i]=PASO_AMG_IN_C;
651     lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
652    
653     /* all undecided unknown strongly coupled to i are moved to F */
654 gross 3440 for (p=0; p<degree_ST[i]; ++p) {
655     j=ST[offset_S[i]+p];
656 gross 3351
657     if (split_marker[j]==PASO_AMG_UNDECIDED) {
658    
659     split_marker[j]=PASO_AMG_IN_F;
660     lambda[j]=-1;
661    
662 gross 3440 for (q=0; q<degree_ST[j]; ++q) {
663     k=ST[offset_S[j]+q];
664 gross 3351 if (split_marker[k]==PASO_AMG_UNDECIDED) {
665     lambda[k]++;
666     if (notInPanel[k]) {
667     notInPanel[k]=FALSE;
668     panel[len_panel]=k;
669     len_panel++;
670     }
671    
672     } /* the unknown i is moved to C */
673     split_marker[i]=PASO_AMG_IN_C;
674     lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
675     }
676    
677     }
678 gross 3283 }
679 gross 3440 for (p=0; p<degree_S[i]; ++p) {
680     j=S[offset_S[i]+p];
681 gross 3351 if (split_marker[j]==PASO_AMG_UNDECIDED) {
682     lambda[j]--;
683     if (notInPanel[j]) {
684     notInPanel[j]=FALSE;
685     panel[len_panel]=j;
686     len_panel++;
687     }
688     }
689     }
690 gross 3193
691 gross 3351 /* consolidate panel */
692     /* remove lambda[q]=-1 */
693     lambda_max=-1;
694     i=-1;
695     len_panel_new=0;
696     for (q=0; q<len_panel; q++) {
697     k=panel[q];
698     lambda_k=lambda[k];
699     if (split_marker[k]==PASO_AMG_UNDECIDED) {
700     panel[len_panel_new]=k;
701     len_panel_new++;
702    
703     if (lambda_max == lambda_k) {
704     if (k<i) i=k;
705     } else if (lambda_max < lambda_k) {
706     lambda_max =lambda_k;
707     i=k;
708     }
709     }
710     }
711     len_panel=len_panel_new;
712     } while (len_panel>0);
713     } else {
714     /* the unknown i is moved to C */
715     split_marker[i]=PASO_AMG_IN_C;
716     lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
717    
718     /* all undecided unknown strongly coupled to i are moved to F */
719 gross 3440 for (p=0; p<degree_ST[i]; ++p) {
720     j=ST[offset_S[i]+p];
721 gross 3351 if (split_marker[j]==PASO_AMG_UNDECIDED) {
722    
723     split_marker[j]=PASO_AMG_IN_F;
724     lambda[j]=-1;
725    
726 gross 3440 for (q=0; q<degree_ST[j]; ++q) {
727     k=ST[offset_S[j]+q];
728 gross 3351 if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;
729     }
730    
731     }
732 gross 3283 }
733 gross 3440 for (p=0; p<degree_S[i]; ++p) {
734     j=S[offset_S[i]+p];
735 gross 3351 if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;
736     }
737    
738 gross 3283 }
739     i=Paso_Util_arg_max(n,lambda);
740     }
741 gross 3440
742 gross 3283 }
743     TMPMEMFREE(lambda);
744     TMPMEMFREE(ST);
745 gross 3440 TMPMEMFREE(degree_ST);
746 gross 3351 TMPMEMFREE(panel);
747     TMPMEMFREE(notInPanel);
748 gross 3193 }
749 gross 3440 /* ensures that two F nodes are connected via a C node :*/
750     void Paso_Preconditioner_AMG_enforceFFConnectivity(const dim_t n, const index_t* offset_S,
751     const dim_t* degree_S, const index_t* S,
752     index_t*split_marker)
753     {
754     dim_t i, p, q;
755    
756     /* now we make sure that two (strongly) connected F nodes are (strongly) connected via a C node. */
757     for (i=0;i<n;++i) {
758     if ( (split_marker[i]==PASO_AMG_IN_F) && (degree_S[i]>0) ) {
759     for (p=0; p<degree_S[i]; ++p) {
760     register index_t j=S[offset_S[i]+p];
761     if ( (split_marker[j]==PASO_AMG_IN_F) && (degree_S[j]>0) ) {
762     /* i and j are now two F nodes which are strongly connected */
763     /* is there a C node they share ? */
764     register index_t sharing=-1;
765     for (q=0; q<degree_S[i]; ++q) {
766     index_t k=S[offset_S[i]+q];
767     if (split_marker[k]==PASO_AMG_IN_C) {
768     register index_t* where_k=(index_t*)bsearch(&k, &(S[offset_S[j]]), degree_S[j], sizeof(index_t), Paso_comparIndex);
769     if (where_k != NULL) {
770     sharing=k;
771     break;
772     }
773     }
774     }
775     if (sharing<0) {
776     if (i<j) {
777     split_marker[j]=PASO_AMG_IN_C;
778     } else {
779     split_marker[i]=PASO_AMG_IN_C;
780     break; /* no point to look any further as i is now a C node */
781     }
782     }
783     }
784     }
785     }
786     }
787     }
788 gross 3445 #endif
789 gross 3442
790 gross 3445 #ifdef DFG
791 gross 3442 void Paso_Preconditioner_AMG_CIJPCoarsening( )
792     {
793 gross 3445
794    
795 gross 3442 const dim_t my_n;
796     const dim_t overlap_n;
797     const dim_t n= my_n + overlap_n;
798 gross 3449
799    
800     /* initialize split_marker and split_marker :*/
801     /* those unknows which are not influenced go into F, the rest is available for F or C */
802     #pragma omp parallel for private(i) schedule(static)
803     for (i=0;i<n;++i) {
804     degree_ST[i]=0;
805     if (degree_S[i]>0) {
806     lambda[i]=0;
807     split_marker[i]=PASO_AMG_UNDECIDED;
808     } else {
809     split_marker[i]=PASO_AMG_IN_F;
810     lambda[i]=-1;
811     }
812     }
813    
814 gross 3442 /* set local lambda + overlap */
815     #pragma omp parallel for private(i)
816     for (i=0; i<n ++i) {
817     w[i]=degree_ST[i];
818     }
819     for (i=0; i<my_n; i++) {
820     w2[i]=random;
821     }
822    
823    
824 gross 3445 /* add noise to w */
825     Paso_Coupler_add(n, w, 1., w2, col_coupler);
826    
827 gross 3442 /* */
828     global_n_C=0;
829     global_n_F=..;
830    
831     while (global_n_C + global_n_F < global_n) {
832    
833    
834     is_in_D[i]=FALSE;
835     /* test local connectivit*/
836 gross 3445 /* w2[i] = max(w[k] | k in S_i or k in S^T_i */
837     #pragma omp parallel for private(i)
838     for (i=0; i<n; ++i) w2[i]=0;
839    
840     for (i=0; i<my_n; ++i) {
841     for( iPtr =0 ; iPtr < degree_S[i]; ++iPtr) {
842     k=S[offset_S[i]+iPtr];
843 gross 3442 w2[i]=MAX(w2[i],w[k]);
844     w2[k]=MAX(w2[k],w[i]);
845 gross 3445 }
846     }
847 gross 3442 /* adjust overlaps by MAX */
848 gross 3445 Paso_Coupler_max(n, w2, col_coupler);
849    
850 gross 3442 /* points with w[i]>w2[i] become C nodes */
851     }
852    
853     }
854 gross 3445 #endif

  ViewVC Help
Powered by ViewVC 1.1.26