/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Annotation of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3436 - (hide annotations)
Mon Jan 10 02:06:07 2011 UTC (8 years, 9 months ago) by plaub
File MIME type: text/plain
File size: 20560 byte(s)
Added/modified AMG tests:

  - "self.failUnless(...)" became "self.assertTrue(...)"
    because fail* is soon to be deprecated,
  - fixed logical error in self.assertTrue argument,
  - added another test, 'testSquare'.

Added some print statements to AMG.c in verbose mode to
identify which of the stopping conditions were fulfilled.

Changed default AMG parameters for 'setNumPreSweeps' and 
'setNumPostSweeps' to 1 (huge speed improvment).

P.S Beware of bugs in the above code; I have only proved 
it correct, not tried it... Jokes!


1 gross 3193
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2010 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17     /* Paso: AMG preconditioner */
18    
19     /**************************************************************/
20    
21 gross 3303 /* Author: artak@uq.edu.au, l.gross@uq.edu.au */
22 gross 3193
23     /**************************************************************/
24    
25 gross 3283 #define SHOW_TIMING FALSE
26    
27 gross 3193 #include "Paso.h"
28     #include "Preconditioner.h"
29     #include "Options.h"
30     #include "PasoUtil.h"
31     #include "UMFPACK.h"
32     #include "MKL.h"
33    
34     /**************************************************************/
35    
36     /* free all memory used by AMG */
37    
38     void Paso_Preconditioner_LocalAMG_free(Paso_Preconditioner_LocalAMG * in) {
39     if (in!=NULL) {
40     Paso_Preconditioner_LocalSmoother_free(in->Smoother);
41 gross 3283 Paso_SparseMatrix_free(in->P);
42     Paso_SparseMatrix_free(in->R);
43     Paso_SparseMatrix_free(in->A_C);
44     Paso_Preconditioner_LocalAMG_free(in->AMG_C);
45     MEMFREE(in->r);
46     MEMFREE(in->x_C);
47     MEMFREE(in->b_C);
48 gross 3193
49 gross 3283
50     MEMFREE(in);
51 gross 3193 }
52     }
53    
54 gross 3402 index_t Paso_Preconditioner_LocalAMG_getMaxLevel(const Paso_Preconditioner_LocalAMG * in) {
55     if (in->AMG_C == NULL) {
56     return in->level;
57     } else {
58     return Paso_Preconditioner_LocalAMG_getMaxLevel(in->AMG_C);
59     }
60     }
61     double Paso_Preconditioner_LocalAMG_getCoarseLevelSparsity(const Paso_Preconditioner_LocalAMG * in) {
62 gross 3403 if (in->AMG_C == NULL) {
63     if (in->A_C == NULL) {
64     return 1.;
65     } else {
66     return DBLE(in->A_C->pattern->len)/DBLE(in->A_C->numRows)/DBLE(in->A_C->numRows);
67     }
68     } else {
69     return Paso_Preconditioner_LocalAMG_getCoarseLevelSparsity(in->AMG_C);
70     }
71 gross 3402 }
72     dim_t Paso_Preconditioner_LocalAMG_getNumCoarseUnknwons(const Paso_Preconditioner_LocalAMG * in) {
73     if (in->AMG_C == NULL) {
74 gross 3403 if (in->A_C == NULL) {
75     return 0;
76     } else {
77     return in->A_C->numRows;
78     }
79 gross 3402 } else {
80 gross 3403 return Paso_Preconditioner_LocalAMG_getNumCoarseUnknwons(in->AMG_C);
81 gross 3402 }
82     }
83 gross 3283 /*****************************************************************
84 gross 3193
85 gross 3283 constructs AMG
86    
87     ******************************************************************/
88     Paso_Preconditioner_LocalAMG* Paso_Preconditioner_LocalAMG_alloc(Paso_SparseMatrix *A_p,dim_t level,Paso_Options* options) {
89 gross 3193
90     Paso_Preconditioner_LocalAMG* out=NULL;
91     bool_t verbose=options->verbose;
92    
93 gross 3303 Paso_SparseMatrix *Atemp=NULL, *A_C=NULL;
94 gross 3193 const dim_t n=A_p->numRows;
95     const dim_t n_block=A_p->row_block_size;
96 gross 3303 index_t* split_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F=NULL, *S=NULL, *degree=NULL;
97 gross 3283 dim_t n_F=0, n_C=0, i;
98     double time0=0;
99     const double theta = options->coarsening_threshold;
100     const double tau = options->diagonal_dominance_threshold;
101 plaub 3436
102 gross 3193
103     /*
104 gross 3283 is the input matrix A suitable for coarsening
105    
106 gross 3193 */
107 gross 3283 if ( (A_p->pattern->len >= options->min_coarse_sparsity * n * n ) || (n <= options->min_coarse_matrix_size) || (level > options->level_max) ) {
108 plaub 3436
109     if (verbose) {
110     /*
111     print stopping condition:
112     - 'SPAR' = min_coarse_matrix_sparsity exceeded
113     - 'SIZE' = min_coarse_matrix_size exceeded
114     - 'LEVEL' = level_max exceeded
115     */
116     printf("Paso_Preconditioner: Stopping condition: ");
117    
118     if (A_p->pattern->len >= options->min_coarse_sparsity * n * n)
119     printf("SPAR ");
120    
121     if (n <= options->min_coarse_matrix_size)
122     printf("SIZE ");
123    
124     if (level > options->level_max)
125     printf("LEVEL ");
126    
127     printf("\n");
128    
129     printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
130     level, options->level_max, A_p->pattern->len/(1.*n * n), options->min_coarse_sparsity, n, options->min_coarse_matrix_size);
131    
132     }
133    
134 gross 3193 return NULL;
135 gross 3283 }
136     /* Start Coarsening : */
137 gross 3193
138 gross 3283 split_marker=TMPMEMALLOC(n,index_t);
139     counter=TMPMEMALLOC(n,index_t);
140 gross 3303 degree=TMPMEMALLOC(n, dim_t);
141     S=TMPMEMALLOC(A_p->pattern->len, index_t);
142     if ( !( Esys_checkPtr(split_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree) || Esys_checkPtr(S) ) ) {
143 gross 3283 /*
144     set splitting of unknows:
145    
146     */
147     time0=Esys_timer();
148     if (n_block>1) {
149 gross 3303 Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree, S, theta,tau);
150 gross 3283 } else {
151 gross 3303 Paso_Preconditioner_AMG_setStrongConnections(A_p, degree, S, theta,tau);
152 gross 3283 }
153 gross 3352 Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree, S, split_marker, options->usePanel);
154 gross 3283 options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
155    
156     if (Esys_noError() ) {
157     #pragma omp parallel for private(i) schedule(static)
158     for (i = 0; i < n; ++i) split_marker[i]= (split_marker[i] == PASO_AMG_IN_F);
159    
160     /*
161     count number of unkowns to be eliminated:
162     */
163     n_F=Paso_Util_cumsum_maskedTrue(n,counter, split_marker);
164     n_C=n-n_F;
165 gross 3402 if (verbose) printf("Paso_Preconditioner: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);
166 gross 3283
167     if ( n_F == 0 ) { /* is a nasty case. a direct solver should be used, return NULL */
168     out = NULL;
169     } else {
170     out=MEMALLOC(1,Paso_Preconditioner_LocalAMG);
171 gross 3323 if (! Esys_checkPtr(out)) {
172 gross 3283 out->level = level;
173     out->n = n;
174     out->n_F = n_F;
175     out->n_block = n_block;
176     out->A_C = NULL;
177     out->P = NULL;
178     out->R = NULL;
179     out->post_sweeps = options->post_sweeps;
180     out->pre_sweeps = options->pre_sweeps;
181     out->r = NULL;
182     out->x_C = NULL;
183     out->b_C = NULL;
184     out->AMG_C = NULL;
185 gross 3323 out->Smoother=NULL;
186     }
187     mask_C=TMPMEMALLOC(n,index_t);
188     rows_in_F=TMPMEMALLOC(n_F,index_t);
189     Esys_checkPtr(mask_C);
190     Esys_checkPtr(rows_in_F);
191     if ( Esys_noError() ) {
192    
193 gross 3283 out->Smoother = Paso_Preconditioner_LocalSmoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
194 gross 3323
195 gross 3402 if (n_C != 0) {
196     /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
197 gross 3283
198 gross 3315 /* allocate helpers :*/
199     out->x_C=MEMALLOC(n_block*n_C,double);
200     out->b_C=MEMALLOC(n_block*n_C,double);
201     out->r=MEMALLOC(n_block*n,double);
202    
203     Esys_checkPtr(out->r);
204     Esys_checkPtr(out->x_C);
205     Esys_checkPtr(out->b_C);
206    
207 gross 3323 if ( Esys_noError() ) {
208     /* creates index for F:*/
209     #pragma omp parallel private(i)
210     {
211     #pragma omp for schedule(static)
212     for (i = 0; i < n; ++i) {
213     if (split_marker[i]) rows_in_F[counter[i]]=i;
214     }
215     }
216     /* create mask of C nodes with value >-1 gives new id */
217     i=Paso_Util_cumsum_maskedFalse(n,counter, split_marker);
218 gross 3193
219 gross 3323 #pragma omp parallel for private(i) schedule(static)
220     for (i = 0; i < n; ++i) {
221     if (split_marker[i]) {
222     mask_C[i]=-1;
223     } else {
224     mask_C[i]=counter[i];;
225     }
226 gross 3283 }
227 gross 3323 /*
228     get Restriction :
229     */
230     time0=Esys_timer();
231     out->P=Paso_Preconditioner_AMG_getDirectProlongation(A_p,degree,S,n_C,mask_C);
232     if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);
233 gross 3283 }
234 gross 3323 /*
235 gross 3283 construct Prolongation operator as transposed of restriction operator:
236     */
237 gross 3315 if ( Esys_noError()) {
238     time0=Esys_timer();
239     out->R=Paso_SparseMatrix_getTranspose(out->P);
240     if (SHOW_TIMING) printf("timing: level %d: Paso_SparseMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
241     }
242 gross 3283 /*
243 gross 3312 construct coarse level matrix:
244 gross 3283 */
245 gross 3315 if ( Esys_noError()) {
246     time0=Esys_timer();
247     Atemp=Paso_SparseMatrix_MatrixMatrix(A_p,out->P);
248     A_C=Paso_SparseMatrix_MatrixMatrix(out->R,Atemp);
249     Paso_SparseMatrix_free(Atemp);
250     if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);
251     }
252    
253 gross 3283
254     /*
255     constructe courser level:
256    
257     */
258     if ( Esys_noError()) {
259 gross 3315 out->AMG_C=Paso_Preconditioner_LocalAMG_alloc(A_C,level+1,options);
260     }
261     if ( Esys_noError()) {
262 gross 3283 if ( out->AMG_C == NULL ) {
263     out->reordering = options->reordering;
264     out->refinements = options->coarse_matrix_refinements;
265     /* no coarse level matrix has been constructed. use direct solver */
266     #ifdef MKL
267 gross 3312 out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);
268 gross 3283 Paso_SparseMatrix_free(A_C);
269     out->A_C->solver_package = PASO_MKL;
270 gross 3402 if (verbose) printf("Paso_Preconditioner: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
271 gross 3283 #else
272     #ifdef UMFPACK
273 gross 3312 out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);
274 gross 3283 Paso_SparseMatrix_free(A_C);
275     out->A_C->solver_package = PASO_UMFPACK;
276 gross 3402 if (verbose) printf("Paso_Preconditioner: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
277 gross 3283 #else
278     out->A_C=A_C;
279     out->A_C->solver_p=Paso_Preconditioner_LocalSmoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);
280     out->A_C->solver_package = PASO_SMOOTHER;
281 gross 3402 if (verbose) printf("Paso_Preconditioner: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
282 gross 3283 #endif
283     #endif
284     } else {
285     /* finally we set some helpers for the solver step */
286     out->A_C=A_C;
287     }
288     }
289     }
290     }
291     TMPMEMFREE(mask_C);
292     TMPMEMFREE(rows_in_F);
293     }
294     }
295 gross 3193 }
296     TMPMEMFREE(counter);
297 gross 3283 TMPMEMFREE(split_marker);
298 gross 3303 TMPMEMFREE(degree);
299     TMPMEMFREE(S);
300 gross 3193
301 jfenwick 3259 if (Esys_noError()) {
302 gross 3193 return out;
303     } else {
304     Paso_Preconditioner_LocalAMG_free(out);
305     return NULL;
306     }
307     }
308    
309    
310 gross 3283 void Paso_Preconditioner_LocalAMG_solve(Paso_SparseMatrix* A, Paso_Preconditioner_LocalAMG * amg, double * x, double * b) {
311     const dim_t n = amg->n * amg->n_block;
312     double time0=0;
313     const dim_t post_sweeps=amg->post_sweeps;
314     const dim_t pre_sweeps=amg->pre_sweeps;
315 gross 3193
316 gross 3283 /* presmoothing */
317     time0=Esys_timer();
318     Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
319     time0=Esys_timer()-time0;
320     if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);
321     /* end of presmoothing */
322    
323     if (amg->n_F < amg->n) { /* is there work on the coarse level? */
324     time0=Esys_timer();
325     Paso_Copy(n, amg->r, b); /* r <- b */
326 gross 3312 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
327     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->R,amg->r,0.,amg->b_C); /* b_c = R*r */
328 gross 3283 time0=Esys_timer()-time0;
329     /* coarse level solve */
330     if ( amg->AMG_C == NULL) {
331     time0=Esys_timer();
332     /* A_C is the coarsest level */
333     switch (amg->A_C->solver_package) {
334     case (PASO_MKL):
335     Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);
336     break;
337     case (PASO_UMFPACK):
338     Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);
339     break;
340     case (PASO_SMOOTHER):
341 gross 3402 Paso_Preconditioner_LocalSmoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
342 gross 3283 break;
343     }
344     if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
345     } else {
346     Paso_Preconditioner_LocalAMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C) */
347     }
348     time0=time0+Esys_timer();
349 gross 3303 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */
350 gross 3283
351     /*postsmoothing*/
352    
353     /*solve Ax=b with initial guess x */
354     time0=Esys_timer();
355     Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
356     time0=Esys_timer()-time0;
357     if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
358     /*end of postsmoothing*/
359    
360     }
361     return;
362     }
363 gross 3193
364 gross 3283 /* theta = threshold for strong connections */
365     /* tau = threshold for diagonal dominance */
366 gross 3193
367 gross 3303 /*S_i={j \in N_i; i strongly coupled to j}
368    
369     in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
370     */
371    
372     void Paso_Preconditioner_AMG_setStrongConnections(Paso_SparseMatrix* A,
373     dim_t *degree, index_t *S,
374     const double theta, const double tau)
375 gross 3283 {
376     const dim_t n=A->numRows;
377 gross 3303 index_t iptr, i,j;
378 gross 3283 dim_t kdeg;
379     double max_offdiagonal, threshold, sum_row, main_row, fnorm;
380 gross 3193
381 gross 3303
382 gross 3283 #pragma omp parallel for private(i,iptr,max_offdiagonal, threshold,j, kdeg, sum_row, main_row, fnorm) schedule(static)
383 gross 3402 for (i=0;i<n;++i) {
384 gross 3283 max_offdiagonal = 0.;
385     sum_row=0;
386     main_row=0;
387     #pragma ivdep
388     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
389     j=A->pattern->index[iptr];
390     fnorm=ABS(A->val[iptr]);
391    
392     if( j != i) {
393     max_offdiagonal = MAX(max_offdiagonal,fnorm);
394     sum_row+=fnorm;
395     } else {
396     main_row=fnorm;
397     }
398     }
399     threshold = theta*max_offdiagonal;
400     kdeg=0;
401 gross 3402
402 gross 3283 if (tau*main_row < sum_row) { /* no diagonal domainance */
403     #pragma ivdep
404     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
405     j=A->pattern->index[iptr];
406     if(ABS(A->val[iptr])>threshold && i!=j) {
407     S[A->pattern->ptr[i]+kdeg] = j;
408     kdeg++;
409     }
410     }
411     }
412     degree[i]=kdeg;
413     }
414 gross 3303
415 gross 3283 }
416 gross 3193
417 gross 3283 /* theta = threshold for strong connections */
418     /* tau = threshold for diagonal dominance */
419 gross 3303 /*S_i={j \in N_i; i strongly coupled to j}
420 gross 3283
421 gross 3303 in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
422     */
423     void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SparseMatrix* A,
424     dim_t *degree, index_t *S,
425     const double theta, const double tau)
426    
427 gross 3283 {
428     const dim_t n_block=A->row_block_size;
429     const dim_t n=A->numRows;
430 gross 3303 index_t iptr, i,j, bi;
431 gross 3283 dim_t kdeg, max_deg;
432     register double max_offdiagonal, threshold, fnorm, sum_row, main_row;
433     double *rtmp;
434    
435    
436     #pragma omp parallel private(i,iptr,max_offdiagonal, kdeg, threshold,j, max_deg, fnorm, sum_row, main_row, rtmp)
437     {
438     max_deg=0;
439     #pragma omp for schedule(static)
440     for (i=0;i<n;++i) max_deg=MAX(max_deg, A->pattern->ptr[i+1]-A->pattern->ptr[i]);
441 gross 3193
442 gross 3283 rtmp=TMPMEMALLOC(max_deg, double);
443    
444     #pragma omp for schedule(static)
445     for (i=0;i<n;++i) {
446 gross 3193
447 gross 3283 max_offdiagonal = 0.;
448     sum_row=0;
449     main_row=0;
450     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
451     j=A->pattern->index[iptr];
452     fnorm=0;
453     #pragma ivdep
454     for(bi=0;bi<n_block*n_block;++bi) fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi];
455     fnorm=sqrt(fnorm);
456 gross 3287 rtmp[iptr-A->pattern->ptr[i]]=fnorm;
457 gross 3283 if( j != i) {
458     max_offdiagonal = MAX(max_offdiagonal,fnorm);
459     sum_row+=fnorm;
460     } else {
461     main_row=fnorm;
462     }
463     }
464     threshold = theta*max_offdiagonal;
465 gross 3193
466 gross 3283 kdeg=0;
467     if (tau*main_row < sum_row) { /* no diagonal domainance */
468     #pragma ivdep
469     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
470     j=A->pattern->index[iptr];
471     if(rtmp[iptr-A->pattern->ptr[i]] > threshold && i!=j) {
472     S[A->pattern->ptr[i]+kdeg] = j;
473     kdeg++;
474     }
475     }
476     }
477     degree[i]=kdeg;
478     }
479     TMPMEMFREE(rtmp);
480     } /* end of parallel region */
481 gross 3303
482 gross 3283 }
483    
484     /* the runge stueben coarsening algorithm: */
485 gross 3303 void Paso_Preconditioner_AMG_RungeStuebenSearch(const dim_t n, const index_t* offset,
486 gross 3352 const dim_t* degree, const index_t* S,
487     index_t*split_marker, const bool_t usePanel)
488 gross 3283 {
489    
490 gross 3351 index_t *lambda=NULL, *ST=NULL, *notInPanel=NULL, *panel=NULL, lambda_max, lambda_k;
491     dim_t i,k, p, q, *degreeT=NULL, len_panel, len_panel_new;
492     register index_t j, itmp;
493    
494 gross 3283 if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */
495    
496 gross 3351 lambda=TMPMEMALLOC(n, index_t); Esys_checkPtr(lambda);
497     degreeT=TMPMEMALLOC(n, dim_t); Esys_checkPtr(degreeT);
498     ST=TMPMEMALLOC(offset[n], index_t); Esys_checkPtr(ST);
499     if (usePanel) {
500     notInPanel=TMPMEMALLOC(n, bool_t); Esys_checkPtr(notInPanel);
501     panel=TMPMEMALLOC(n, index_t); Esys_checkPtr(panel);
502     }
503 gross 3283
504 gross 3351
505    
506     if (Esys_noError() ) {
507 gross 3283 /* initialize split_marker and split_marker :*/
508     /* those unknows which are not influenced go into F, the rest is available for F or C */
509     #pragma omp parallel for private(i) schedule(static)
510     for (i=0;i<n;++i) {
511     degreeT[i]=0;
512     if (degree[i]>0) {
513     lambda[i]=0;
514     split_marker[i]=PASO_AMG_UNDECIDED;
515     } else {
516     split_marker[i]=PASO_AMG_IN_F;
517     lambda[i]=-1;
518     }
519     }
520     /* create transpose :*/
521     for (i=0;i<n;++i) {
522     for (p=0; p<degree[i]; ++p) {
523     j=S[offset[i]+p];
524     ST[offset[j]+degreeT[j]]=i;
525     degreeT[j]++;
526     }
527     }
528     /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */
529     #pragma omp parallel for private(i, j, itmp) schedule(static)
530     for (i=0;i<n;++i) {
531     if (split_marker[i]==PASO_AMG_UNDECIDED) {
532     itmp=lambda[i];
533     for (p=0; p<degreeT[i]; ++p) {
534     j=ST[offset[i]+p];
535     if (split_marker[j]==PASO_AMG_UNDECIDED) {
536     itmp++;
537     } else { /* at this point there are no C points */
538     itmp+=2;
539     }
540     }
541     lambda[i]=itmp;
542     }
543     }
544 gross 3351 if (usePanel) {
545     #pragma omp parallel for private(i) schedule(static)
546     for (i=0;i<n;++i) notInPanel[i]=TRUE;
547     }
548 gross 3283 /* start search :*/
549     i=Paso_Util_arg_max(n,lambda);
550 gross 3351 while (lambda[i]>-1) { /* is there any undecided unknown? */
551    
552     if (usePanel) {
553     len_panel=0;
554     do {
555     /* the unknown i is moved to C */
556     split_marker[i]=PASO_AMG_IN_C;
557     lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
558    
559     /* all undecided unknown strongly coupled to i are moved to F */
560     for (p=0; p<degreeT[i]; ++p) {
561     j=ST[offset[i]+p];
562    
563     if (split_marker[j]==PASO_AMG_UNDECIDED) {
564    
565     split_marker[j]=PASO_AMG_IN_F;
566     lambda[j]=-1;
567    
568     for (q=0; q<degreeT[j]; ++q) {
569     k=ST[offset[j]+q];
570     if (split_marker[k]==PASO_AMG_UNDECIDED) {
571     lambda[k]++;
572     if (notInPanel[k]) {
573     notInPanel[k]=FALSE;
574     panel[len_panel]=k;
575     len_panel++;
576     }
577    
578     } /* the unknown i is moved to C */
579     split_marker[i]=PASO_AMG_IN_C;
580     lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
581     }
582    
583     }
584 gross 3283 }
585 gross 3351 for (p=0; p<degree[i]; ++p) {
586     j=S[offset[i]+p];
587     if (split_marker[j]==PASO_AMG_UNDECIDED) {
588     lambda[j]--;
589     if (notInPanel[j]) {
590     notInPanel[j]=FALSE;
591     panel[len_panel]=j;
592     len_panel++;
593     }
594     }
595     }
596 gross 3193
597 gross 3351 /* consolidate panel */
598     /* remove lambda[q]=-1 */
599     lambda_max=-1;
600     i=-1;
601     len_panel_new=0;
602     for (q=0; q<len_panel; q++) {
603     k=panel[q];
604     lambda_k=lambda[k];
605     if (split_marker[k]==PASO_AMG_UNDECIDED) {
606     panel[len_panel_new]=k;
607     len_panel_new++;
608    
609     if (lambda_max == lambda_k) {
610     if (k<i) i=k;
611     } else if (lambda_max < lambda_k) {
612     lambda_max =lambda_k;
613     i=k;
614     }
615     }
616     }
617     len_panel=len_panel_new;
618     } while (len_panel>0);
619     } else {
620     /* the unknown i is moved to C */
621     split_marker[i]=PASO_AMG_IN_C;
622     lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */
623    
624     /* all undecided unknown strongly coupled to i are moved to F */
625     for (p=0; p<degreeT[i]; ++p) {
626     j=ST[offset[i]+p];
627     if (split_marker[j]==PASO_AMG_UNDECIDED) {
628    
629     split_marker[j]=PASO_AMG_IN_F;
630     lambda[j]=-1;
631    
632     for (q=0; q<degreeT[j]; ++q) {
633     k=ST[offset[j]+q];
634     if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;
635     }
636    
637     }
638 gross 3283 }
639 gross 3351 for (p=0; p<degree[i]; ++p) {
640     j=S[offset[i]+p];
641     if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;
642     }
643    
644 gross 3283 }
645     i=Paso_Util_arg_max(n,lambda);
646     }
647     }
648     TMPMEMFREE(lambda);
649     TMPMEMFREE(ST);
650     TMPMEMFREE(degreeT);
651 gross 3351 TMPMEMFREE(panel);
652     TMPMEMFREE(notInPanel);
653 gross 3193 }

  ViewVC Help
Powered by ViewVC 1.1.26