/[escript]/trunk/paso/src/AML.c
ViewVC logotype

Annotation of /trunk/paso/src/AML.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3284 - (hide annotations)
Mon Oct 18 23:48:04 2010 UTC (9 years ago) by gross
File MIME type: text/plain
File size: 30372 byte(s)
old AMLI code collected in a single file. It does not compile and should be merged with RecILU
1 gross 3284
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2010 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17     /* Paso: AML preconditioner:
18    
19     This is just a collection of older code. This does not compile.
20    
21     */
22    
23     /**************************************************************/
24    
25     /* Author: artak@uq.edu.au */
26    
27     /**************************************************************/
28    
29     #include "Paso.h"
30     #include "Preconditioner.h"
31     #include "Options.h"
32     #include "PasoUtil.h"
33     #include "UMFPACK.h"
34     #include "MKL.h"
35     #include "SystemMatrix.h"
36     #include "Coarsening.h"
37     #include "BlockOps.h"
38    
39     #ifndef INC_COARSE
40     #define INC_COARSE
41    
42     #include "SystemMatrix.h"
43    
44    
45     /* Remove:
46     #define PASO_COARSENING_IN_F TRUE
47     #define PASO_COARSENING_IN_C FALSE
48     */
49    
50     void Paso_Coarsening_Local(index_t* marker_F, Paso_SparseMatrix* A, Paso_Options* options);
51     void Paso_Coarsening_Local_Aggregation(Paso_SparseMatrix* A, index_t* marker_F, const double theta);
52     void Paso_Coarsening_Local_Aggregation_blk(Paso_SparseMatrix* A, index_t* marker_F, const double theta);
53     void Paso_Coarsening_Local_YS(Paso_SparseMatrix* A, index_t* marker_F, const double theta);
54     void Paso_Coarsening_Local_YS_blk(Paso_SparseMatrix* A, index_t* marker_F, const double theta);
55    
56     void Paso_Coarsening_Local_RS(Paso_SparseMatrix* A, index_t* marker_F, double theta);
57    
58     void Paso_Coarsening_Local_Partition(Paso_Pattern* pattern,index_t* marker_F);
59     void Paso_Coarsening_Local_greedy(Paso_Pattern* pattern, index_t* marker_F);
60    
61    
62    
63     /*=== REVISE ============*/
64     void Paso_Coarsening_Local_greedy_color(Paso_Pattern* pattern, index_t* marker_F);
65     void Paso_Coarsening_Local_greedy_diag(Paso_SparseMatrix* A, index_t* marker_F, double thershold);
66    
67     void Paso_Coarsening_Local_YS_plus(Paso_SparseMatrix* A, index_t* marker_F, double alpha, double taw, double delta);
68     void Paso_Coarsening_Local_Standard(Paso_SparseMatrix* A, index_t* marker_F, double theta);
69     void Paso_Coarsening_Local_greedy_RS(Paso_SparseMatrix* A, index_t* marker_F, double theta);
70     void Paso_Coarsening_Local_greedy_Agg(Paso_SparseMatrix* A, index_t* marker_F, double theta);
71     /*dim_t how_many(dim_t n,dim_t* S_i, int value1, dim_t* addedSet, int value2);*/
72     void Paso_Coarsening_Local_Standard_Block(Paso_SparseMatrix* A, index_t* marker_F, double theta);
73    
74     dim_t how_many(dim_t i,Paso_Pattern * S, bool_t transpose);
75     dim_t arg_max(dim_t n, dim_t* lambda, dim_t mask);
76     Paso_Pattern* Paso_Coarsening_Local_getTranspose(Paso_Pattern* P);
77    
78     void Paso_Coarsening_Local_getReport(dim_t n,index_t* marker_F);
79     void Paso_Coarsening_Local_Read(char *fileName,dim_t n,index_t* marker_F);
80     void Paso_Coarsening_Local_Write(char *fileName,dim_t n,index_t* marker_F);
81    
82    
83     #endif
84    
85    
86     /*************************************************************,amli->b_F*/
87    
88     /* free all memory used by AMLI */
89    
90     void Paso_Solver_AMLI_System_free(Paso_Solver_AMLI_System * in) {
91     dim_t i;
92     if (in!=NULL) {
93     for (i=0;i<in->block_size;++i) {
94     Paso_Solver_AMLI_free(in->amliblock[i]);
95     Paso_SparseMatrix_free(in->block[i]);
96     }
97     MEMFREE(in);
98     }
99     }
100    
101    
102     /* free all memory used by AMLI */
103    
104     void Paso_Solver_AMLI_free(Paso_Solver_AMLI * in) {
105     if (in!=NULL) {
106     Paso_Preconditioner_LocalSmoother_free(in->Smoother);
107    
108     MEMFREE(in->inv_A_FF);
109     MEMFREE(in->A_FF_pivot);
110     Paso_SparseMatrix_free(in->A_FC);
111     Paso_SparseMatrix_free(in->A_CF);
112     Paso_SparseMatrix_free(in->A);
113     if(in->coarsest_level==TRUE) {
114     #ifdef MKL
115     Paso_MKL_free1(in->AOffset1);
116     Paso_SparseMatrix_free(in->AOffset1);
117     #else
118     #ifdef UMFPACK
119     Paso_UMFPACK1_free((Paso_UMFPACK_Handler*)(in->solver));
120     #endif
121     #endif
122     }
123     MEMFREE(in->rows_in_F);
124     MEMFREE(in->rows_in_C);
125     MEMFREE(in->mask_F);
126     MEMFREE(in->mask_C);
127     MEMFREE(in->x_F);
128     MEMFREE(in->b_F);
129     MEMFREE(in->x_C);
130     MEMFREE(in->b_C);
131     in->solver=NULL;
132     Paso_Solver_AMLI_free(in->AMLI_of_Schur);
133     MEMFREE(in->b_C);
134     MEMFREE(in);
135     }
136     }
137    
138     /**************************************************************/
139    
140     /* constructs the block-block factorization of
141    
142     [ A_FF A_FC ]
143     A_p=
144     [ A_CF A_FF ]
145    
146     to
147    
148     [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ]
149     [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ]
150    
151    
152     where S=A_FF-ACF*invA_FF*A_FC within the shape of S
153    
154     then AMLI is applied to S again until S becomes empty
155    
156     */
157     Paso_Solver_AMLI* Paso_Solver_getAMLI(Paso_SparseMatrix *A_p,dim_t level,Paso_Options* options) {
158     Paso_Solver_AMLI* out=NULL;
159     Paso_Pattern* temp1=NULL;
160     Paso_Pattern* temp2=NULL;
161     bool_t verbose=options->verbose;
162     dim_t n=A_p->numRows;
163     dim_t n_block=A_p->row_block_size;
164     index_t* mis_marker=NULL;
165     index_t* counter=NULL;
166     index_t iPtr,*index, *where_p, iPtr_s;
167     dim_t i,j;
168     Paso_SparseMatrix * schur=NULL;
169     Paso_SparseMatrix * schur_withFillIn=NULL;
170     double S=0;
171    
172    
173     /* char filename[8];
174     sprintf(filename,"AMLILevel%d",level);
175    
176     Paso_SparseMatrix_saveMM(A_p,filename);
177     */
178    
179     /*Make sure we have block sizes 1*/
180     if (A_p->col_block_size>1) {
181     Esys_setError(TYPE_ERROR,"Paso_Solver_getAMLI: AMLI requires column block size 1.");
182     return NULL;
183     }
184     if (A_p->row_block_size>1) {
185     Esys_setError(TYPE_ERROR,"Paso_Solver_getAMLI: AMLI requires row block size 1.");
186     return NULL;
187     }
188     out=MEMALLOC(1,Paso_Solver_AMLI);
189     /* identify independend set of rows/columns */
190     mis_marker=TMPMEMALLOC(n,index_t);
191     counter=TMPMEMALLOC(n,index_t);
192     if ( !( Esys_checkPtr(mis_marker) || Esys_checkPtr(counter) || Esys_checkPtr(out)) ) {
193     out->AMLI_of_Schur=NULL;
194     out->inv_A_FF=NULL;
195     out->A_FF_pivot=NULL;
196     out->A_FC=NULL;
197     out->A_CF=NULL;
198     out->rows_in_F=NULL;
199     out->rows_in_C=NULL;
200     out->mask_F=NULL;
201     out->mask_C=NULL;
202     out->x_F=NULL;
203     out->b_F=NULL;
204     out->x_C=NULL;
205     out->b_C=NULL;
206     out->A=Paso_SparseMatrix_getReference(A_p);
207     out->Smoother=NULL;
208     out->solver=NULL;
209     /*out->GS=Paso_Solver_getGS(A_p,verbose);*/
210     out->level=level;
211     out->n=n;
212     out->n_F=n+1;
213     out->n_block=n_block;
214     out->post_sweeps=options->post_sweeps;
215     out->pre_sweeps=options->pre_sweeps;
216    
217     if (level==0 || n<=options->min_coarse_matrix_size) {
218     out->coarsest_level=TRUE;
219     #ifdef MKL
220     out->AOffset1=Paso_SparseMatrix_alloc(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, out->A->pattern,1,1, FALSE);
221     #pragma omp parallel for private(i) schedule(static)
222     for (i=0;i<out->A->len;++i) {
223     out->AOffset1->val[i]=out->A->val[i];
224     }
225     #else
226     #ifdef UMFPACK
227     #else
228     out->Smoother=Paso_Preconditioner_LocalSmoother_alloc(A_p,TRUE,verbose);
229     #endif
230     #endif
231     } else {
232     out->coarsest_level=FALSE;
233     out->Smoother=Paso_Preconditioner_LocalSmoother_alloc(A_p,TRUE,verbose);
234    
235     Paso_Coarsening_Local(mis_marker,A_p, options->coarsening_threshold, options->coarsening_method);
236    
237     #pragma omp parallel for private(i) schedule(static)
238     for (i = 0; i < n; ++i) {
239     mis_marker[i]=(mis_marker[i]== PASO_COARSENING_IN_F);
240     counter[i]=mis_marker[i];
241    
242     }
243    
244     out->n_F=Paso_Util_cumsum(n,counter);
245    
246     if (out->n_F==0) {
247     out->coarsest_level=TRUE;
248     level=0;
249     if (verbose) {
250     /*printf("AMLI coarsening eliminates all unknowns, switching to Jacobi preconditioner.\n");*/
251     printf("AMLI coarsening does not eliminate any of the unknowns, switching to Jacobi preconditioner.\n");
252     }
253     }
254     else if (out->n_F==n) {
255     out->coarsest_level=TRUE;
256     level=0;
257     if (verbose) {
258     /*printf("AMLI coarsening eliminates all unknowns, switching to Jacobi preconditioner.\n");*/
259     printf("AMLI coarsening eliminates all of the unknowns, switching to Jacobi preconditioner.\n");
260    
261     }
262     } else {
263    
264     if (Esys_noError()) {
265    
266     /*#pragma omp parallel for private(i) schedule(static)
267     for (i = 0; i < n; ++i) counter[i]=mis_marker[i];
268     out->n_F=Paso_Util_cumsum(n,counter);
269     */
270    
271     /*if(level==3) {
272     printf("##TOTAL: %d, ELIMINATED: %d\n",n,out->n_F);
273     for (i = 0; i < n; ++i) {
274     printf("##%d %d\n",i,mis_marker[i]);
275     }
276     }*/
277    
278     out->mask_F=MEMALLOC(n,index_t);
279     out->rows_in_F=MEMALLOC(out->n_F,index_t);
280     out->inv_A_FF=MEMALLOC(n_block*n_block*out->n_F,double);
281     out->A_FF_pivot=NULL; /* later use for block size>3 */
282     if (! (Esys_checkPtr(out->mask_F) || Esys_checkPtr(out->inv_A_FF) || Esys_checkPtr(out->rows_in_F) ) ) {
283     /* creates an index for F from mask */
284     #pragma omp parallel for private(i) schedule(static)
285     for (i = 0; i < out->n_F; ++i) out->rows_in_F[i]=-1;
286     #pragma omp parallel for private(i) schedule(static)
287     for (i = 0; i < n; ++i) {
288     if (mis_marker[i]) {
289     out->rows_in_F[counter[i]]=i;
290     out->mask_F[i]=counter[i];
291     } else {
292     out->mask_F[i]=-1;
293     }
294     }
295    
296     /* Compute row-sum for getting rs(A_FF)^-1*/
297     #pragma omp parallel for private(i,iPtr,j,S) schedule(static)
298     for (i = 0; i < out->n_F; ++i) {
299     S=0;
300     /*printf("[%d ]: [%d] -> ",i, out->rows_in_F[i]);*/
301     for (iPtr=A_p->pattern->ptr[out->rows_in_F[i]];iPtr<A_p->pattern->ptr[out->rows_in_F[i] + 1]; ++iPtr) {
302     j=A_p->pattern->index[iPtr];
303     /*if (j==out->rows_in_F[i]) printf("diagonal %e",A_p->val[iPtr]);*/
304     if (mis_marker[j])
305     S+=A_p->val[iPtr];
306     }
307     /*printf("-> %e \n",S);*/
308     out->inv_A_FF[i]=1./S;
309     }
310     }
311     }
312    
313     /*check whether coarsening process actually makes sense to continue.
314     if coarse matrix at least smaller by 30% then continue, otherwise we stop.*/
315     if ((out->n_F*100/n)<30) {
316     level=1;
317     }
318    
319     if ( Esys_noError()) {
320     /* if there are no nodes in the coarse level there is no more work to do */
321     out->n_C=n-out->n_F;
322    
323     /*if (out->n_F>500) */
324     out->rows_in_C=MEMALLOC(out->n_C,index_t);
325     out->mask_C=MEMALLOC(n,index_t);
326     if (! (Esys_checkPtr(out->mask_C) || Esys_checkPtr(out->rows_in_C) ) ) {
327     /* creates an index for C from mask */
328     #pragma omp parallel for private(i) schedule(static)
329     for (i = 0; i < n; ++i) counter[i]=! mis_marker[i];
330     Paso_Util_cumsum(n,counter);
331     #pragma omp parallel for private(i) schedule(static)
332     for (i = 0; i < out->n_C; ++i) out->rows_in_C[i]=-1;
333     #pragma omp parallel for private(i) schedule(static)
334     for (i = 0; i < n; ++i) {
335     if (! mis_marker[i]) {
336     out->rows_in_C[counter[i]]=i;
337     out->mask_C[i]=counter[i];
338     } else {
339     out->mask_C[i]=-1;
340     }
341     }
342     }
343     }
344     if ( Esys_noError()) {
345     /* get A_CF block: */
346     out->A_CF=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_F,out->rows_in_C,out->mask_F);
347     /* get A_FC block: */
348     out->A_FC=Paso_SparseMatrix_getSubmatrix(A_p,out->n_F,out->n_C,out->rows_in_F,out->mask_C);
349     /* get A_CC block: */
350     schur=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_C,out->rows_in_C,out->mask_C);
351    
352     }
353     if ( Esys_noError()) {
354     /*find the pattern of the schur complement with fill in*/
355     temp1=Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,out->A_CF->pattern,out->A_FC->pattern);
356     temp2=Paso_Pattern_binop(PATTERN_FORMAT_DEFAULT, schur->pattern, temp1);
357     schur_withFillIn=Paso_SparseMatrix_alloc(A_p->type,temp2,1,1, TRUE);
358     Paso_Pattern_free(temp1);
359     Paso_Pattern_free(temp2);
360     }
361     if ( Esys_noError()) {
362     /* copy values over*/
363     #pragma omp parallel for private(i,iPtr,j,iPtr_s,index,where_p) schedule(static)
364     for (i = 0; i < schur_withFillIn->numRows; ++i) {
365     for (iPtr=schur_withFillIn->pattern->ptr[i];iPtr<schur_withFillIn->pattern->ptr[i + 1]; ++iPtr) {
366     j=schur_withFillIn->pattern->index[iPtr];
367     iPtr_s=schur->pattern->ptr[i];
368     index=&(schur->pattern->index[iPtr_s]);
369     where_p=(index_t*)bsearch(&j,
370     index,
371     schur->pattern->ptr[i + 1]-schur->pattern->ptr[i],
372     sizeof(index_t),
373     Paso_comparIndex);
374     if (where_p!=NULL) {
375     schur_withFillIn->val[iPtr]=schur->val[iPtr_s+(index_t)(where_p-index)];
376     }
377     }
378     }
379     Paso_Solver_updateIncompleteSchurComplement(schur_withFillIn,out->A_CF,out->inv_A_FF,out->A_FF_pivot,out->A_FC);
380     out->AMLI_of_Schur=Paso_Solver_getAMLI(schur_withFillIn,level-1,options);
381     }
382     /* allocate work arrays for AMLI application */
383     if (Esys_noError()) {
384     out->x_F=MEMALLOC(n_block*out->n_F,double);
385     out->b_F=MEMALLOC(n_block*out->n_F,double);
386     out->x_C=MEMALLOC(n_block*out->n_C,double);
387     out->b_C=MEMALLOC(n_block*out->n_C,double);
388    
389     if (! (Esys_checkPtr(out->x_F) || Esys_checkPtr(out->b_F) || Esys_checkPtr(out->x_C) || Esys_checkPtr(out->b_C) ) ) {
390     #pragma omp parallel for private(i) schedule(static)
391     for (i = 0; i < out->n_F; ++i) {
392     out->x_F[i]=0.;
393     out->b_F[i]=0.;
394     }
395     #pragma omp parallel for private(i) schedule(static)
396     for (i = 0; i < out->n_C; ++i) {
397     out->x_C[i]=0.;
398     out->b_C[i]=0.;
399     }
400     }
401     }
402     Paso_SparseMatrix_free(schur);
403     Paso_SparseMatrix_free(schur_withFillIn);
404     }
405     }
406     }
407     TMPMEMFREE(mis_marker);
408     TMPMEMFREE(counter);
409    
410     if (Esys_noError()) {
411     if (verbose && level>0 && !out->coarsest_level) {
412     printf("AMLI: level: %d: %d unknowns eliminated. %d left.\n",level, out->n_F,out->n_C);
413     }
414     return out;
415     } else {
416     Paso_Solver_AMLI_free(out);
417     return NULL;
418     }
419     }
420    
421     /**************************************************************/
422    
423     /* apply AMLI precondition b-> x
424    
425     in fact it solves
426    
427     [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FC ] [ x_F ] = [b_F]
428     [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ] [ x_C ] = [b_C]
429    
430     in the form
431    
432     b->[b_F,b_C]
433     x_F=invA_FF*b_F
434     b_C=b_C-A_CF*x_F
435     x_C=AMLI(b_C)
436     b_F=b_F-A_FC*x_C
437     x_F=invA_FF*b_F
438     x<-[x_F,x_C]
439    
440     should be called within a parallel region
441     barrier synconization should be performed to make sure that the input vector available
442    
443     */
444    
445     void Paso_Solver_solveAMLI(Paso_Solver_AMLI * amli, double * x, double * b) {
446     dim_t i;
447     double time0=0;
448     double *r=NULL, *x0=NULL,*x_F_temp=NULL;
449     bool_t verbose=0;
450    
451     dim_t post_sweeps=amli->post_sweeps;
452     dim_t pre_sweeps=amli->pre_sweeps;
453    
454     #ifdef UMFPACK
455     Paso_UMFPACK_Handler * ptr=NULL;
456     #endif
457    
458     r=MEMALLOC(amli->n,double);
459     x0=MEMALLOC(amli->n,double);
460     x_F_temp=MEMALLOC(amli->n_F,double);
461    
462     if (amli->coarsest_level) {
463    
464     time0=Esys_timer();
465     /*If all unknown are eliminated then Jacobi is the best preconditioner*/
466     if (amli->n_F==0 || amli->n_F==amli->n) {
467     Paso_Preconditioner_LocalSmoother_solve(amli->A, amli->Smoother,x,b,1,FALSE);
468     }
469     else {
470     #ifdef MKL
471     Paso_MKL1(amli->AOffset1,x,b,verbose);
472     #else
473     #ifdef UMFPACK
474     ptr=(Paso_UMFPACK_Handler *)(amli->solver);
475     Paso_UMFPACK1(&ptr,amli->A,x,b,verbose);
476     amli->solver=(void*) ptr;
477     #else
478     Paso_Preconditioner_LocalSmoother_solve(amli->A,amli->Smoother,x,b,1,FALSE);
479     #endif
480     #endif
481     }
482     time0=Esys_timer()-time0;
483     if (verbose) fprintf(stderr,"timing: DIRECT SOLVER: %e\n",time0);
484    
485     } else {
486     /* presmoothing */
487     time0=Esys_timer();
488     Paso_Preconditioner_LocalSmoother_solve(amli->A,amli->Smoother,x,b,pre_sweeps,FALSE);
489     time0=Esys_timer()-time0;
490     if (verbose) fprintf(stderr,"timing: Presmooting: %e\n",time0);
491     /* end of presmoothing */
492    
493    
494     time0=Esys_timer();
495     #pragma omp parallel for private(i) schedule(static)
496     for (i=0;i<amli->n;++i) r[i]=b[i];
497    
498     /*r=b-Ax*/
499     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amli->A,x,1.,r);
500    
501     /* r->[b_F,b_C] */
502     #pragma omp parallel for private(i) schedule(static)
503     for (i=0;i<amli->n_F;++i) amli->b_F[i]=r[amli->rows_in_F[i]];
504    
505     #pragma omp parallel for private(i) schedule(static)
506     for (i=0;i<amli->n_C;++i) amli->b_C[i]=r[amli->rows_in_C[i]];
507    
508     /* x_F=invA_FF*b_F */
509     Paso_Copy(amli->n_F, amli->x_F,amli->b_F);
510     Paso_BlockOps_allMV(1,amli->n_F,amli->inv_A_FF,amli->A_FF_pivot,amli->x_F);
511    
512     /* b_C=b_C-A_CF*x_F */
513     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amli->A_CF,amli->x_F,1.,amli->b_C);
514    
515     time0=Esys_timer()-time0;
516     if (verbose) fprintf(stderr,"timing: Before next level: %e\n",time0);
517    
518     /* x_C=AMLI(b_C) */
519     Paso_Solver_solveAMLI(amli->AMLI_of_Schur,amli->x_C,amli->b_C);
520    
521     time0=Esys_timer();
522    
523     /* b_F=-A_FC*x_C */
524     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amli->A_FC,amli->x_C,0.,amli->b_F);
525     /* x_F_temp=invA_FF*b_F */
526     Paso_Copy(amli->n_F, x_F_temp,amli->b_F);
527     Paso_BlockOps_allMV(1,amli->n_F,amli->inv_A_FF,amli->A_FF_pivot,x_F_temp);
528    
529     #pragma omp parallel for private(i) schedule(static)
530     for (i=0;i<amli->n_F;++i) {
531     amli->x_F[i]+=x_F_temp[i];
532     }
533    
534     /* x<-[x_F,x_C] */
535     #pragma omp parallel for private(i) schedule(static)
536     for (i=0;i<amli->n;++i) {
537     if (amli->mask_C[i]>-1) {
538     x[i]+=amli->x_C[amli->mask_C[i]];
539     } else {
540     x[i]+=amli->x_F[amli->mask_F[i]];
541     }
542     }
543    
544     time0=Esys_timer()-time0;
545     if (verbose) fprintf(stderr,"timing: After next level: %e\n",time0);
546    
547     /*postsmoothing*/
548     time0=Esys_timer();
549     Paso_Preconditioner_LocalSmoother_solve(amli->A,amli->Smoother,x,b,post_sweeps,TRUE);
550     time0=Esys_timer()-time0;
551     if (verbose) fprintf(stderr,"timing: Postsmoothing: %e\n",time0);
552    
553     /*end of postsmoothing*/
554    
555     }
556     MEMFREE(r);
557     MEMFREE(x0);
558     MEMFREE(x_F_temp);
559     return;
560     }
561    
562     /*******************************************************
563     *
564     * Copyright (c) 2003-2010 by University of Queensland
565     * Earth Systems Science Computational Center (ESSCC)
566     * http://www.uq.edu.au/esscc
567     *
568     * Primary Business: Queensland, Australia
569     * Licensed under the Open Software License version 3.0
570     * http://www.opensource.org/licenses/osl-3.0.php
571     *
572     *******************************************************/
573    
574    
575     /*************************************************************
576    
577     Paso: Coarsening strategies (no MPI)
578    
579     the given matrix A is splitted in to the form
580    
581    
582     [ A_FF A_FC ]
583     A = [ ]
584     [ A_CF A_CC ]
585    
586     such that unknowns/equations in set C are weakly connected via A_CC and strongly connected
587     to at least one unknowns/equation in F via A_CF and A_FC. The unknowns/equations in C and F
588     are marked in the marker_F by FALSE and TRUE respectively.
589    
590     The weak/strong connection is controlled by coarsening_threshold.
591    
592     three strategies are implemented:
593    
594     a) YAIR_SHAPIRA (YS): |a_ij| >= theta * |a_ii|
595     b) Ruge-Stueben (RS): |a_ij| >= theta * max_(k<>i) |a_ik|
596     c) Aggregation : |a_ij|^2 >= theta**2 * |a_ii||a_jj|
597    
598     where theta = coarsening_threshold/maximum_pattern_degree
599    
600     Remark:
601    
602     - a strong connection in YAIR_SHAPIRA is a strong connection in Aggregation
603    
604     */
605     /**************************************************************
606    
607     Author: artak@uq.edu.au, l.gross@uq.edu.au
608    
609     **************************************************************/
610    
611     #include "Coarsening.h"
612     #include "PasoUtil.h"
613     #include <limits.h>
614    
615    
616    
617     /*Used in Paso_Coarsening_Local_RS_MI*/
618    
619     /*Computes how many connections unknown i have in S.
620     bool_t transpose - TRUE if we want to compute how many strong connection of i in S^T, FALSE otherwise.
621     Note that if we first transpose S and then call method on S^T then "transpose" should be set to FALSE.
622     */
623     dim_t how_many(dim_t i,Paso_Pattern * S, bool_t transpose) {
624     dim_t j,n;
625     dim_t total,ltotal;
626     index_t *index,*where_p;
627    
628     /*index_t iptr;*/
629     total=0;
630     ltotal=0;
631    
632     n=S->numOutput;
633    
634     if(transpose) {
635     #pragma omp parallel
636     {
637     ltotal=0;
638     #pragma omp for private(j,index,where_p,ltotal) schedule(static)
639     for (j=0;j<n;++j) {
640     index=&(S->index[S->ptr[j]]);
641     where_p=(index_t*)bsearch(&i,
642     index,
643     S->ptr[j + 1]-S->ptr[j],
644     sizeof(index_t),
645     Paso_comparIndex);
646     if (where_p!=NULL) {
647     ltotal++;
648     }
649     }
650     }
651     #pragma omp critical
652     {
653     total+=ltotal;
654     }
655    
656     }
657     else {
658     total=S->ptr[i+1]-S->ptr[i];
659     /*#pragma omp parallel for private(iptr) schedule(static)*/
660     /*for (iptr=S->ptr[i];iptr<S->ptr[i+1]; ++iptr) {
661     if(S->index[iptr]!=i && marker_F[S->index[iptr]]==IS_AVAILABLE)
662     total++;
663     }*/
664    
665     }
666    
667     if (total==0) total=IS_NOT_AVAILABLE;
668    
669     return total;
670     }
671    
672    
673    
674    
675    
676     Paso_Pattern* Paso_Coarsening_Local_getTranspose(Paso_Pattern* P){
677    
678     Paso_Pattern *outpattern=NULL;
679    
680     dim_t C=P->numInput;
681     dim_t F=P->numOutput-C;
682     dim_t n=C+F;
683     dim_t i,j;
684     index_t iptr;
685     Paso_IndexListArray* index_list = Paso_IndexListArray_alloc(C);
686    
687    
688     /*#pragma omp parallel for private(i,iptr,j) schedule(static)*/
689     for (i=0;i<n;++i) {
690     for (iptr=P->ptr[i];iptr<P->ptr[i+1]; ++iptr) {
691     j=P->index[iptr];
692     Paso_IndexListArray_insertIndex(index_list,i,j);
693     }
694     }
695    
696     outpattern=Paso_Pattern_fromIndexListArray(0, index_list,0,n,0);
697    
698     /* clean up */
699     Paso_IndexListArray_free(index_list);
700    
701     return outpattern;
702     }
703    
704    
705     /************** BLOCK COARSENENING *********************/
706    
707     void Paso_Coarsening_Local_Standard_Block(Paso_SparseMatrix* A, index_t* marker_F, double theta)
708     {
709     const dim_t n=A->numRows;
710    
711     dim_t i,j,k;
712     index_t iptr,jptr;
713     /*index_t *index,*where_p;*/
714     double threshold,max_offdiagonal;
715     dim_t *lambda; /*mesure of importance */
716     dim_t maxlambda=0;
717     index_t index_maxlambda=0;
718     double time0=0;
719     bool_t verbose=0;
720     dim_t n_block=A->row_block_size;
721    
722     double fnorm=0;
723     dim_t bi;
724    
725     Paso_Pattern *S=NULL;
726     Paso_Pattern *ST=NULL;
727     Paso_IndexListArray* index_list = Paso_IndexListArray_alloc(n);
728    
729     time0=Esys_timer();
730     k=0;
731     /*Paso_Coarsening_Local_getReport(n,marker_F);*/
732     /*printf("Blocks %d %d\n",n_block,A->len);*/
733    
734     /*S_i={j \in N_i; i strongly coupled to j}*/
735     #pragma omp parallel for private(i,bi,fnorm,iptr,max_offdiagonal,threshold,j) schedule(static)
736     for (i=0;i<n;++i) {
737     if(marker_F[i]==IS_AVAILABLE) {
738     max_offdiagonal = DBL_MIN;
739     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
740     j=A->pattern->index[iptr];
741     if( j != i){
742     fnorm=0;
743     for(bi=0;bi<n_block*n_block;++bi)
744     {
745     fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi];
746     }
747     fnorm=sqrt(fnorm);
748     max_offdiagonal = MAX(max_offdiagonal,fnorm);
749     }
750     }
751     threshold = theta*max_offdiagonal;
752     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
753     j=A->pattern->index[iptr];
754     if( j != i){
755     fnorm=0;
756     for(bi=0;bi<n_block*n_block;++bi)
757     {
758     fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi];
759     }
760     fnorm=sqrt(fnorm);
761     if(fnorm>=threshold) {
762     Paso_IndexListArray_insertIndex(index_list,i,j);
763     }
764     }
765     }
766     }
767     }
768    
769     S=Paso_Pattern_fromIndexListArray(0,index_list,0,A->pattern->numInput,0);
770     ST=Paso_Coarsening_Local_getTranspose(S);
771    
772     /*printf("Patterns len %d %d\n",S->len,ST->len);*/
773    
774     time0=Esys_timer()-time0;
775     if (verbose) fprintf(stdout,"timing: RS filtering and pattern creation: %e\n",time0);
776    
777     lambda=TMPMEMALLOC(n,dim_t);
778    
779     #pragma omp parallel for private(i) schedule(static)
780     for (i=0;i<n;++i) { lambda[i]=IS_NOT_AVAILABLE; }
781    
782     /*S_i={j \in N_i; i strongly coupled to j}*/
783    
784     /*
785     #pragma omp parallel for private(i,iptr,lk) schedule(static)
786     for (i=0;i<n;++i) {
787     lk=0;
788     for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
789     if(ABS(A->val[iptr])>1.e-15 && A->pattern->index[iptr]!=i )
790     lk++;
791     }
792     #pragma omp critical
793     k+=lk;
794     if(k==0) {
795     marker_F[i]=TRUE;
796     }
797     }
798     */
799    
800    
801     k=0;
802     maxlambda=0;
803    
804     time0=Esys_timer();
805    
806     for (i=0;i<n;++i) {
807     if(marker_F[i]==IS_AVAILABLE) {
808     lambda[i]=how_many(k,ST,FALSE); /* if every row is available then k and i are the same.*/
809     /*lambda[i]=how_many(i,S,TRUE);*/
810     /*printf("lambda[%d]=%d, k=%d \n",i,lambda[i],k);*/
811     k++;
812     if(maxlambda<lambda[i]) {
813     maxlambda=lambda[i];
814     index_maxlambda=i;
815     }
816     }
817     }
818    
819     k=0;
820     time0=Esys_timer()-time0;
821     if (verbose) fprintf(stdout,"timing: Lambdas computations at the begining: %e\n",time0);
822    
823     time0=Esys_timer();
824    
825     /*Paso_Coarsening_Local_getReport(n,marker_F);*/
826    
827     while (Paso_Util_isAny(n,marker_F,IS_AVAILABLE)) {
828    
829     if(index_maxlambda<0) {
830     break;
831     }
832    
833     i=index_maxlambda;
834     if(marker_F[i]==IS_AVAILABLE) {
835     marker_F[index_maxlambda]=FALSE;
836     lambda[index_maxlambda]=IS_NOT_AVAILABLE;
837     for (iptr=ST->ptr[i];iptr<ST->ptr[i+1]; ++iptr) {
838     j=ST->index[iptr];
839     if(marker_F[j]==IS_AVAILABLE) {
840     marker_F[j]=TRUE;
841     lambda[j]=IS_NOT_AVAILABLE;
842     for (jptr=S->ptr[j];jptr<S->ptr[j+1]; ++jptr) {
843     k=S->index[jptr];
844     if(marker_F[k]==IS_AVAILABLE) {
845     lambda[k]++;
846     }
847     }
848     }
849     }
850     for (iptr=S->ptr[i];iptr<S->ptr[i+1]; ++iptr) {
851     j=S->index[iptr];
852     if(marker_F[j]==IS_AVAILABLE) {
853     lambda[j]--;
854     }
855     }
856     }
857    
858     /* Used when transpose of S is not available */
859     /*
860     for (i=0;i<n;++i) {
861     if(marker_F[i]==IS_AVAILABLE) {
862     if (i==index_maxlambda) {
863     marker_F[index_maxlambda]=FALSE;
864     lambda[index_maxlambda]=-1;
865     for (j=0;j<n;++j) {
866     if(marker_F[j]==IS_AVAILABLE) {
867     index=&(S->index[S->ptr[j]]);
868     where_p=(index_t*)bsearch(&i,
869     index,
870     S->ptr[j + 1]-S->ptr[j],
871     sizeof(index_t),
872     Paso_comparIndex);
873     if (where_p!=NULL) {
874     marker_F[j]=TRUE;
875     lambda[j]=-1;
876     for (iptr=S->ptr[j];iptr<S->ptr[j+1]; ++iptr) {
877     k=S->index[iptr];
878     if(marker_F[k]==IS_AVAILABLE) {
879     lambda[k]++;
880     }
881     }
882     }
883    
884     }
885     }
886    
887     }
888     }
889     }
890     */
891     index_maxlambda=arg_max(n,lambda, IS_NOT_AVAILABLE);
892     }
893    
894     time0=Esys_timer()-time0;
895     if (verbose) fprintf(stdout,"timing: Loop : %e\n",time0);
896    
897     /*Paso_Coarsening_Local_getReport(n,marker_F);*/
898    
899     #pragma omp parallel for private(i) schedule(static)
900     for (i=0;i<n;++i)
901     if(marker_F[i]==IS_AVAILABLE) {
902     marker_F[i]=TRUE;
903     }
904    
905     /*Paso_Coarsening_Local_getReport(n,marker_F);*/
906    
907     TMPMEMFREE(lambda);
908    
909     /* clean up */
910     Paso_IndexListArray_free(index_list);
911     Paso_Pattern_free(S);
912    
913     /* swap to TRUE/FALSE in marker_F */
914     #pragma omp parallel for private(i) schedule(static)
915     for (i=0;i<n;i++) marker_F[i]=(marker_F[i]==TRUE)? TRUE : FALSE;
916    
917     }
918    
919    
920    
921     #undef IS_AVAILABLE
922     #undef IS_NOT_AVAILABLE
923     #undef TRUE
924     #undef FALSE
925    
926     #undef IS_UNDECIDED
927     #undef IS_STRONG
928     #undef IS_WEAK
929    
930     #undef TRUEB
931     #undef TRUEB
932    

  ViewVC Help
Powered by ViewVC 1.1.26