22 |
|
|
23 |
/**************************************************************/ |
/**************************************************************/ |
24 |
|
|
25 |
|
#define SHOW_TIMING FALSE |
26 |
|
|
27 |
#include "Paso.h" |
#include "Paso.h" |
28 |
#include "Preconditioner.h" |
#include "Preconditioner.h" |
29 |
#include "Options.h" |
#include "Options.h" |
30 |
#include "PasoUtil.h" |
#include "PasoUtil.h" |
31 |
#include "UMFPACK.h" |
#include "UMFPACK.h" |
32 |
#include "MKL.h" |
#include "MKL.h" |
|
#include "Coarsening.h" |
|
33 |
|
|
34 |
/**************************************************************/ |
/**************************************************************/ |
35 |
|
|
38 |
void Paso_Preconditioner_LocalAMG_free(Paso_Preconditioner_LocalAMG * in) { |
void Paso_Preconditioner_LocalAMG_free(Paso_Preconditioner_LocalAMG * in) { |
39 |
if (in!=NULL) { |
if (in!=NULL) { |
40 |
Paso_Preconditioner_LocalSmoother_free(in->Smoother); |
Paso_Preconditioner_LocalSmoother_free(in->Smoother); |
41 |
|
Paso_SparseMatrix_free(in->P); |
42 |
|
Paso_SparseMatrix_free(in->R); |
43 |
|
Paso_SparseMatrix_free(in->A_C); |
44 |
|
Paso_Preconditioner_LocalAMG_free(in->AMG_C); |
45 |
|
MEMFREE(in->r); |
46 |
|
MEMFREE(in->x_C); |
47 |
|
MEMFREE(in->b_C); |
48 |
|
|
49 |
/*=========================*/ |
|
50 |
Paso_SparseMatrix_free(in->A_FC); |
MEMFREE(in); |
|
Paso_SparseMatrix_free(in->A_FF); |
|
|
Paso_SparseMatrix_free(in->W_FC); |
|
|
Paso_SparseMatrix_free(in->A_CF); |
|
|
Paso_SparseMatrix_free(in->P); |
|
|
Paso_SparseMatrix_free(in->R); |
|
|
Paso_SparseMatrix_free(in->A); |
|
|
if(in->coarsest_level==TRUE) { |
|
|
#ifdef MKL |
|
|
Paso_MKL_free1(in->AOffset1); |
|
|
Paso_SparseMatrix_free(in->AOffset1); |
|
|
Paso_SparseMatrix_free(in->AUnrolled); |
|
|
#else |
|
|
#ifdef UMFPACK |
|
|
Paso_UMFPACK1_free((Paso_UMFPACK_Handler*)(in->solver)); |
|
|
Paso_SparseMatrix_free(in->AUnrolled); |
|
|
#endif |
|
|
#endif |
|
|
} |
|
|
MEMFREE(in->rows_in_F); |
|
|
MEMFREE(in->rows_in_C); |
|
|
MEMFREE(in->mask_F); |
|
|
MEMFREE(in->mask_C); |
|
|
MEMFREE(in->x_F); |
|
|
MEMFREE(in->b_F); |
|
|
MEMFREE(in->x_C); |
|
|
MEMFREE(in->b_C); |
|
|
in->solver=NULL; |
|
|
Paso_Preconditioner_LocalAMG_free(in->AMG_of_Coarse); |
|
|
MEMFREE(in->b_C); |
|
|
MEMFREE(in); |
|
51 |
} |
} |
52 |
} |
} |
53 |
|
|
54 |
/**************************************************************/ |
/***************************************************************** |
|
|
|
|
/* constructs the block-block factorization of |
|
|
|
|
|
[ A_FF A_FC ] |
|
|
A_p= |
|
|
[ A_CF A_FF ] |
|
|
|
|
|
to |
|
|
|
|
|
[ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ] |
|
|
[ A_CF*invA_FF I ] [ 0 S ] [ 0 I ] |
|
55 |
|
|
56 |
|
constructs AMG |
57 |
where S=A_FF-ACF*invA_FF*A_FC within the shape of S |
|
58 |
|
******************************************************************/ |
|
then AMG is applied to S again until S becomes empty |
|
|
|
|
|
*/ |
|
59 |
Paso_Preconditioner_LocalAMG* Paso_Preconditioner_LocalAMG_alloc(Paso_SparseMatrix *A_p,dim_t level,Paso_Options* options) { |
Paso_Preconditioner_LocalAMG* Paso_Preconditioner_LocalAMG_alloc(Paso_SparseMatrix *A_p,dim_t level,Paso_Options* options) { |
60 |
|
|
61 |
Paso_Preconditioner_LocalAMG* out=NULL; |
Paso_Preconditioner_LocalAMG* out=NULL; |
|
/* |
|
|
Paso_Pattern* temp1=NULL; |
|
|
Paso_Pattern* temp2=NULL; |
|
|
*/ |
|
62 |
bool_t verbose=options->verbose; |
bool_t verbose=options->verbose; |
|
bool_t timing=0; |
|
63 |
|
|
64 |
|
Paso_SparseMatrix* W_FC=NULL, *Atemp=NULL, *A_C=NULL; |
65 |
const dim_t n=A_p->numRows; |
const dim_t n=A_p->numRows; |
66 |
const dim_t n_block=A_p->row_block_size; |
const dim_t n_block=A_p->row_block_size; |
67 |
|
index_t* split_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F=NULL; |
68 |
|
dim_t n_F=0, n_C=0, i; |
|
index_t* mis_marker=NULL; |
|
|
index_t* counter=NULL; |
|
|
/*index_t iPtr,*index, *where_p;*/ |
|
|
dim_t i,j; |
|
|
Paso_SparseMatrix * A_c=NULL; |
|
69 |
double time0=0; |
double time0=0; |
70 |
Paso_SparseMatrix * Atemp=NULL; |
const double theta = options->coarsening_threshold; |
71 |
double sparsity=0; |
const double tau = options->diagonal_dominance_threshold; |
|
|
|
|
/* |
|
|
double *temp,*temp_1; |
|
|
double S; |
|
|
index_t iptr; |
|
|
*/ |
|
|
|
|
|
/*char filename[8];*/ |
|
72 |
|
|
|
/* |
|
|
sprintf(filename,"AMGLevel%d",level); |
|
73 |
|
|
74 |
Paso_SparseMatrix_saveMM(A_p,filename); |
/* |
75 |
|
is the input matrix A suitable for coarsening |
76 |
|
|
77 |
*/ |
*/ |
78 |
|
if ( (A_p->pattern->len >= options->min_coarse_sparsity * n * n ) || (n <= options->min_coarse_matrix_size) || (level > options->level_max) ) { |
79 |
/*Make sure we have block sizes 1*/ |
if (verbose) printf("Paso: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n", |
80 |
/*if (A_p->col_block_size>1) { |
level, options->level_max, A_p->pattern->len/(1.*n * n), options->min_coarse_sparsity, n, options->min_coarse_matrix_size ); |
|
Paso_setError(TYPE_ERROR,"Paso_Solver_getAMG: AMG requires column block size 1."); |
|
81 |
return NULL; |
return NULL; |
82 |
} |
} |
83 |
if (A_p->row_block_size>1) { |
/* Start Coarsening : */ |
|
Paso_setError(TYPE_ERROR,"Paso_Solver_getAMG: AMG requires row block size 1."); |
|
|
return NULL; |
|
|
}*/ |
|
|
out=MEMALLOC(1,Paso_Preconditioner_LocalAMG); |
|
|
|
|
|
|
|
|
/* identify independend set of rows/columns */ |
|
|
mis_marker=TMPMEMALLOC(n,index_t); |
|
|
counter=TMPMEMALLOC(n,index_t); |
|
|
if ( !( Esys_checkPtr(mis_marker) || Esys_checkPtr(counter) || Esys_checkPtr(out)) ) { |
|
|
|
|
|
|
|
|
out->post_sweeps=options->post_sweeps; |
|
|
out->pre_sweeps=options->pre_sweeps; |
|
|
|
|
|
out->AMG_of_Coarse=NULL; |
|
|
out->A_FF=NULL; |
|
|
out->A_FC=NULL; |
|
|
out->A_CF=NULL; |
|
|
out->W_FC=NULL; |
|
|
out->P=NULL; |
|
|
out->R=NULL; |
|
|
out->rows_in_F=NULL; |
|
|
out->rows_in_C=NULL; |
|
|
out->mask_F=NULL; |
|
|
out->mask_C=NULL; |
|
|
out->x_F=NULL; |
|
|
out->b_F=NULL; |
|
|
out->x_C=NULL; |
|
|
out->b_C=NULL; |
|
|
out->A=Paso_SparseMatrix_getReference(A_p); |
|
|
out->AUnrolled=NULL; |
|
|
out->AOffset1=NULL; |
|
|
out->solver=NULL; |
|
|
out->Smoother=NULL; |
|
|
out->level=level; |
|
|
out->n=n; |
|
|
out->n_F=n+1; |
|
|
out->n_block=n_block; |
|
|
|
|
|
|
|
|
sparsity=(A_p->len*1.)/(1.*A_p->numRows*A_p->numCols); |
|
|
|
|
|
if (verbose) fprintf(stdout,"Stats: Sparsity of the Coarse Matrix with %d non-zeros (%d,%d) in level %d is %.6f\n",A_p->len,A_p->numRows,A_p->numCols,level,sparsity); |
|
|
|
|
|
|
|
|
if(sparsity>0.05) { |
|
|
level=0; |
|
|
} |
|
|
|
|
|
|
|
|
if (level==0 || n<=options->min_coarse_matrix_size) { |
|
|
out->coarsest_level=TRUE; |
|
|
#ifdef MKL |
|
|
out->AUnrolled=Paso_SparseMatrix_unroll(A_p); |
|
|
out->AOffset1=Paso_SparseMatrix_alloc(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, out->AUnrolled->pattern,1,1, FALSE); |
|
|
#pragma omp parallel for private(i) schedule(static) |
|
|
for (i=0;i<out->A->len;++i) { |
|
|
out->AOffset1->val[i]=out->AUnrolled->val[i]; |
|
|
} |
|
|
#else |
|
|
#ifdef UMFPACK |
|
|
out->AUnrolled=Paso_SparseMatrix_unroll(A_p); |
|
|
/*Paso_SparseMatrix_saveMM(out->AUnrolled,"AUnroll.mat"); |
|
|
Paso_SparseMatrix_saveMM(A_p,"Aorg.mat"); |
|
|
*/ |
|
|
#else |
|
|
out->Smoother=Paso_Preconditioner_LocalSmoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose); |
|
|
#endif |
|
|
#endif |
|
|
|
|
|
} else { |
|
|
out->coarsest_level=FALSE; |
|
|
out->Smoother=Paso_Preconditioner_LocalSmoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose); |
|
|
|
|
|
/* Start Coarsening : */ |
|
|
time0=Esys_timer(); |
|
|
Paso_Coarsening_Local(mis_marker, A_p, options->coarsening_threshold, options->coarsening_method); |
|
|
time0=Esys_timer()-time0; |
|
|
if (timing) fprintf(stdout,"Level %d: timing: Coarsening: %e\n",level,time0); |
|
84 |
|
|
85 |
#pragma omp parallel for private(i) schedule(static) |
split_marker=TMPMEMALLOC(n,index_t); |
86 |
for (i = 0; i < n; ++i) { |
counter=TMPMEMALLOC(n,index_t); |
87 |
mis_marker[i]=(mis_marker[i]== PASO_COARSENING_IN_F); |
if ( !( Esys_checkPtr(split_marker) || Esys_checkPtr(counter) ) ) { |
88 |
counter[i]=mis_marker[i]; |
/* |
89 |
} |
set splitting of unknows: |
90 |
out->n_F=Paso_Util_cumsum(n,counter); |
|
91 |
|
*/ |
92 |
if (out->n_F==0) { |
time0=Esys_timer(); |
93 |
out->coarsest_level=TRUE; |
if (n_block>1) { |
94 |
level=0; |
Paso_Preconditioner_AMG_RSCoarsening_Block(A_p, split_marker, theta,tau); |
95 |
if (verbose) { |
} else { |
96 |
/*printf("AMG coarsening eliminates all unknowns, switching to Jacobi preconditioner.\n");*/ |
Paso_Preconditioner_AMG_RSCoarsening(A_p, split_marker, theta,tau); |
97 |
printf("AMG coarsening does not eliminate any of the unknowns, switching to Jacobi preconditioner.\n"); |
} |
98 |
} |
options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time); |
99 |
} |
|
100 |
else if (out->n_F==n) { |
if (Esys_noError() ) { |
101 |
out->coarsest_level=TRUE; |
#pragma omp parallel for private(i) schedule(static) |
102 |
level=0; |
for (i = 0; i < n; ++i) split_marker[i]= (split_marker[i] == PASO_AMG_IN_F); |
103 |
if (verbose) { |
|
104 |
/*printf("AMG coarsening eliminates all unknowns, switching to Jacobi preconditioner.\n");*/ |
/* |
105 |
printf("AMG coarsening eliminates all of the unknowns, switching to Jacobi preconditioner.\n"); |
count number of unkowns to be eliminated: |
106 |
|
*/ |
107 |
} |
n_F=Paso_Util_cumsum_maskedTrue(n,counter, split_marker); |
108 |
} else { |
n_C=n-n_F; |
109 |
|
if (verbose) printf("Paso AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F); |
110 |
if (Esys_noError()) { |
|
111 |
|
if ( n_F == 0 ) { /* is a nasty case. a direct solver should be used, return NULL */ |
112 |
/*#pragma omp parallel for private(i) schedule(static) |
out = NULL; |
113 |
for (i = 0; i < n; ++i) counter[i]=mis_marker[i]; |
} else { |
114 |
out->n_F=Paso_Util_cumsum(n,counter); |
out=MEMALLOC(1,Paso_Preconditioner_LocalAMG); |
115 |
*/ |
mask_C=TMPMEMALLOC(n,index_t); |
116 |
|
rows_in_F=TMPMEMALLOC(n_F,index_t); |
117 |
out->mask_F=MEMALLOC(n,index_t); |
if ( !( Esys_checkPtr(mask_C) || Esys_checkPtr(rows_in_F) || Esys_checkPtr(out)) ) { |
118 |
out->rows_in_F=MEMALLOC(out->n_F,index_t); |
out->level = level; |
119 |
if (! (Esys_checkPtr(out->mask_F) || Esys_checkPtr(out->rows_in_F) ) ) { |
out->n = n; |
120 |
/* creates an index for F from mask */ |
out->n_F = n_F; |
121 |
#pragma omp parallel for private(i) schedule(static) |
out->n_block = n_block; |
122 |
for (i = 0; i < out->n_F; ++i) out->rows_in_F[i]=-1; |
out->A_C = NULL; |
123 |
#pragma omp parallel for private(i) schedule(static) |
out->P = NULL; |
124 |
for (i = 0; i < n; ++i) { |
out->R = NULL; |
125 |
if (mis_marker[i]) { |
out->post_sweeps = options->post_sweeps; |
126 |
out->rows_in_F[counter[i]]=i; |
out->pre_sweeps = options->pre_sweeps; |
127 |
out->mask_F[i]=counter[i]; |
out->r = NULL; |
128 |
} else { |
out->x_C = NULL; |
129 |
out->mask_F[i]=-1; |
out->b_C = NULL; |
130 |
} |
out->AMG_C = NULL; |
131 |
} |
out->Smoother = Paso_Preconditioner_LocalSmoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose); |
132 |
|
|
133 |
} |
if ( n_F < n ) { /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */ |
134 |
} |
|
135 |
|
/* creates index for F:*/ |
136 |
/* if(level==1) { |
#pragma omp parallel for private(i) schedule(static) |
137 |
printf("##TOTAL: %d, ELIMINATED: %d\n",n,out->n_F); |
for (i = 0; i < n; ++i) { |
138 |
for (i = 0; i < n; ++i) { |
if (split_marker[i]) rows_in_F[counter[i]]=i; |
139 |
printf("##%d %d\n",i,!mis_marker[i]); |
} |
140 |
} |
/* create mask of C nodes with value >-1 gives new id */ |
141 |
} |
i=Paso_Util_cumsum_maskedFalse(n,counter, split_marker); |
142 |
*/ |
|
143 |
|
#pragma omp parallel for private(i) schedule(static) |
144 |
/*check whether coarsening process actually makes sense to continue. |
for (i = 0; i < n; ++i) { |
145 |
if coarse matrix at least smaller by 30% then continue, otherwise we stop.*/ |
if (split_marker[i]) { |
146 |
if ((out->n_F*100/n)<30) { |
mask_C[i]=-1; |
147 |
level=1; |
} else { |
148 |
} |
mask_C[i]=counter[i];; |
149 |
|
} |
150 |
if ( Esys_noError()) { |
} |
151 |
out->n_C=n-out->n_F; |
/* |
152 |
out->rows_in_C=MEMALLOC(out->n_C,index_t); |
get Restriction : (can we do this in one go?) |
153 |
out->mask_C=MEMALLOC(n,index_t); |
*/ |
154 |
if (! (Esys_checkPtr(out->mask_C) || Esys_checkPtr(out->rows_in_C) ) ) { |
time0=Esys_timer(); |
155 |
/* creates an index for C from mask */ |
W_FC=Paso_SparseMatrix_getSubmatrix(A_p, n_F, n_C, rows_in_F, mask_C); |
156 |
#pragma omp parallel for private(i) schedule(static) |
if (SHOW_TIMING) printf("timing: level %d: get Weights: %e\n",level, Esys_timer()-time0); |
157 |
for (i = 0; i < n; ++i) counter[i]=! mis_marker[i]; |
|
158 |
Paso_Util_cumsum(n,counter); |
time0=Esys_timer(); |
159 |
#pragma omp parallel for private(i) schedule(static) |
Paso_SparseMatrix_updateWeights(A_p,W_FC,split_marker); |
160 |
for (i = 0; i < out->n_C; ++i) out->rows_in_C[i]=-1; |
if (SHOW_TIMING) printf("timing: level %d: updateWeights: %e\n",level, Esys_timer()-time0); |
161 |
#pragma omp parallel for private(i) schedule(static) |
|
162 |
for (i = 0; i < n; ++i) { |
time0=Esys_timer(); |
163 |
if (! mis_marker[i]) { |
out->P=Paso_SparseMatrix_getProlongation(W_FC,split_marker); |
164 |
out->rows_in_C[counter[i]]=i; |
if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0); |
165 |
out->mask_C[i]=counter[i]; |
|
166 |
} else { |
Paso_SparseMatrix_free(W_FC); |
167 |
out->mask_C[i]=-1; |
/* |
168 |
} |
construct Prolongation operator as transposed of restriction operator: |
169 |
} |
*/ |
170 |
} |
time0=Esys_timer(); |
171 |
} |
out->R=Paso_SparseMatrix_getTranspose(out->P); |
172 |
if ( Esys_noError()) { |
if (SHOW_TIMING) printf("timing: level %d: Paso_SparseMatrix_getTranspose: %e\n",level,Esys_timer()-time0); |
173 |
/* get A_FF block: */ |
/* |
174 |
/* |
constrPaso AMG level 2: 4 unknowns are flagged for elimination. |
175 |
out->A_FF=Paso_SparseMatrix_getSubmatrix(A_p,out->n_F,out->n_F,out->rows_in_F,out->mask_F); |
timing: Gauss-Seidel preparation: elemination : 8.096400e-05 |
176 |
out->A_CF=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_F,out->rows_in_C,out->mask_F); |
AMG: level 2: 4 unknowns eliminated. |
177 |
out->A_FC=Paso_SparseMatrix_getSubmatrix(A_p,out->n_F,out->n_C,out->rows_in_F,out->mask_C); |
uct coarse level matrix (can we do this in one call?) |
178 |
*/ |
*/ |
179 |
|
time0=Esys_timer(); |
180 |
/*Compute W_FC*/ |
Atemp=Paso_SparseMatrix_MatrixMatrix(A_p,out->P); |
181 |
/*initialy W_FC=A_FC*/ |
A_C=Paso_SparseMatrix_MatrixMatrix(out->R,Atemp); |
182 |
out->W_FC=Paso_SparseMatrix_getSubmatrix(A_p,out->n_F,out->n_C,out->rows_in_F,out->mask_C); |
Paso_SparseMatrix_free(Atemp); |
183 |
|
/*A_c=Paso_Solver_getCoarseMatrix(A_p,out->R,out->P);*/ |
184 |
/*sprintf(filename,"W_FCbefore_%d",level); |
if (SHOW_TIMING) printf("timing: level %d : getCoarseMatrix: %e\n",level,Esys_timer()-time0); |
185 |
Paso_SparseMatrix_saveMM(out->W_FC,filename); |
|
186 |
*/ |
/* allocate helpers :*/ |
187 |
/* for (i = 0; i < n; ++i) { |
out->x_C=MEMALLOC(n_block*n_C,double); |
188 |
printf("##mis_marker[%d]=%d\n",i,mis_marker[i]); |
out->b_C=MEMALLOC(n_block*n_C,double); |
189 |
} |
out->r=MEMALLOC(n_block*n,double); |
190 |
*/ |
|
191 |
time0=Esys_timer(); |
Esys_checkPtr(out->r); |
192 |
Paso_SparseMatrix_updateWeights(A_p,out->W_FC,mis_marker); |
Esys_checkPtr(out->Smoother); |
193 |
time0=Esys_timer()-time0; |
Esys_checkPtr(out->x_C); |
194 |
if (timing) fprintf(stdout,"timing: updateWeights: %e\n",time0); |
Esys_checkPtr(out->b_C); |
195 |
|
|
196 |
/* |
/* |
197 |
sprintf(filename,"W_FCafter_%d",level); |
constructe courser level: |
198 |
Paso_SparseMatrix_saveMM(out->W_FC,filename); |
|
199 |
*/ |
*/ |
200 |
|
out->AMG_C=Paso_Preconditioner_LocalAMG_alloc(A_C,level+1,options); |
201 |
/* get Prolongation and Restriction */ |
|
202 |
time0=Esys_timer(); |
if ( Esys_noError()) { |
203 |
out->P=Paso_SparseMatrix_getProlongation(out->W_FC,mis_marker); |
if ( out->AMG_C == NULL ) { |
204 |
time0=Esys_timer()-time0; |
out->reordering = options->reordering; |
205 |
if (timing) fprintf(stdout,"timing: getProlongation: %e\n",time0); |
out->refinements = options->coarse_matrix_refinements; |
206 |
/*out->P=Paso_SparseMatrix_loadMM_toCSR("P1.mtx");*/ |
/* no coarse level matrix has been constructed. use direct solver */ |
207 |
|
#ifdef MKL |
208 |
/* |
Atemp=Paso_SparseMatrix_unroll(A_C); |
209 |
sprintf(filename,"P_%d",level); |
Paso_SparseMatrix_free(A_C); |
210 |
Paso_SparseMatrix_saveMM(out->P,filename); |
out->A_C=Paso_SparseMatrix_alloc(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, Atemp->pattern,1,1, FALSE); |
211 |
*/ |
#pragma omp parallel for private(i) schedule(static) |
212 |
|
for (i=0;i<out->A->len;++i) { |
213 |
time0=Esys_timer(); |
out->A_C->val[i]=Atemp->val[i]; |
214 |
out->R=Paso_SparseMatrix_getRestriction(out->P); |
} |
215 |
time0=Esys_timer()-time0; |
Paso_SparseMatrix_free(Atemp); |
216 |
if (timing) fprintf(stdout,"timing: getRestriction: %e\n",time0); |
out->A_C->solver_package = PASO_MKL; |
217 |
/*out->R=Paso_SparseMatrix_loadMM_toCSR("R1.mtx");*/ |
#else |
218 |
|
#ifdef UMFPACK |
219 |
/* |
out->A_C=Paso_SparseMatrix_unroll(A_C); |
220 |
sprintf(filename,"R_%d",level); |
Paso_SparseMatrix_free(A_C); |
221 |
Paso_SparseMatrix_saveMM(out->R,filename); |
out->A_C->solver_package = PASO_UMFPACK; |
222 |
*/ |
#else |
223 |
|
out->A_C=A_C; |
224 |
} |
out->A_C->solver_p=Paso_Preconditioner_LocalSmoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose); |
225 |
if ( Esys_noError()) { |
out->A_C->solver_package = PASO_SMOOTHER; |
226 |
|
#endif |
227 |
time0=Esys_timer(); |
#endif |
228 |
|
} else { |
229 |
Atemp=Paso_SparseMatrix_MatrixMatrix(A_p,out->P); |
/* finally we set some helpers for the solver step */ |
230 |
|
out->A_C=A_C; |
231 |
A_c=Paso_SparseMatrix_MatrixMatrix(out->R,Atemp); |
} |
232 |
|
} |
233 |
/*A_c=Paso_SparseMatrix_loadMM_toCSR("A_C1.mtx");*/ |
} |
234 |
|
} |
235 |
Paso_SparseMatrix_free(Atemp); |
TMPMEMFREE(mask_C); |
236 |
|
TMPMEMFREE(rows_in_F); |
237 |
/*A_c=Paso_Solver_getCoarseMatrix(A_p,out->R,out->P);*/ |
} |
238 |
time0=Esys_timer()-time0; |
} |
|
if (timing) fprintf(stdout,"timing: getCoarseMatrix: %e\n",time0); |
|
|
|
|
|
|
|
|
/*Paso_Solver_getCoarseMatrix(A_c, A_p,out->R,out->P);*/ |
|
|
/* |
|
|
sprintf(filename,"A_C_%d",level); |
|
|
Paso_SparseMatrix_saveMM(A_c,filename); |
|
|
*/ |
|
|
|
|
|
out->AMG_of_Coarse=Paso_Preconditioner_LocalAMG_alloc(A_c,level-1,options); |
|
|
} |
|
|
|
|
|
/* allocate work arrays for AMG application */ |
|
|
if (Esys_noError()) { |
|
|
/* |
|
|
out->x_F=MEMALLOC(n_block*out->n_F,double); |
|
|
out->b_F=MEMALLOC(n_block*out->n_F,double); |
|
|
*/ |
|
|
out->x_C=MEMALLOC(n_block*out->n_C,double); |
|
|
out->b_C=MEMALLOC(n_block*out->n_C,double); |
|
|
|
|
|
/*if (! (Esys_checkPtr(out->x_F) || Esys_checkPtr(out->b_F) || Esys_checkPtr(out->x_C) || Esys_checkPtr(out->b_C) ) ) {*/ |
|
|
if ( ! ( Esys_checkPtr(out->x_C) || Esys_checkPtr(out->b_C) ) ) { |
|
|
|
|
|
/* |
|
|
#pragma omp parallel for private(i) schedule(static) |
|
|
for (i = 0; i < out->n_F; ++i) { |
|
|
out->x_F[i]=0.; |
|
|
out->b_F[i]=0.; |
|
|
} |
|
|
*/ |
|
|
|
|
|
#pragma omp parallel for private(i,j) schedule(static) |
|
|
for (i = 0; i < out->n_C; ++i) { |
|
|
for(j=0;j<n_block;++j) { |
|
|
out->x_C[i*n_block+j]=0.; |
|
|
out->b_C[i*n_block+j]=0.; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
Paso_SparseMatrix_free(A_c); |
|
|
} |
|
|
} |
|
239 |
} |
} |
|
TMPMEMFREE(mis_marker); |
|
240 |
TMPMEMFREE(counter); |
TMPMEMFREE(counter); |
241 |
|
TMPMEMFREE(split_marker); |
242 |
|
|
243 |
if (Esys_noError()) { |
if (Esys_noError()) { |
244 |
if (verbose && level>0 && !out->coarsest_level) { |
if (verbose) printf("AMG: level %d: %d unknowns eliminated.\n",level, n_F); |
|
printf("AMG: level: %d: %d unknowns eliminated. %d left.\n",level, out->n_F,out->n_C); |
|
|
} |
|
245 |
return out; |
return out; |
246 |
} else { |
} else { |
247 |
Paso_Preconditioner_LocalAMG_free(out); |
Paso_Preconditioner_LocalAMG_free(out); |
249 |
} |
} |
250 |
} |
} |
251 |
|
|
|
/**************************************************************/ |
|
|
|
|
|
/* apply AMG precondition b-> x |
|
252 |
|
|
253 |
in fact it solves |
void Paso_Preconditioner_LocalAMG_solve(Paso_SparseMatrix* A, Paso_Preconditioner_LocalAMG * amg, double * x, double * b) { |
254 |
|
const dim_t n = amg->n * amg->n_block; |
255 |
[ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FC ] [ x_F ] = [b_F] |
double time0=0; |
256 |
[ A_CF*invA_FF I ] [ 0 S ] [ 0 I ] [ x_C ] = [b_C] |
const dim_t post_sweeps=amg->post_sweeps; |
257 |
|
const dim_t pre_sweeps=amg->pre_sweeps; |
|
in the form |
|
|
|
|
|
b->[b_F,b_C] |
|
|
x_F=invA_FF*b_F |
|
|
b_C=b_C-A_CF*x_F |
|
|
x_C=AMG(b_C) |
|
|
b_F=b_F-A_FC*x_C |
|
|
x_F=invA_FF*b_F |
|
|
x<-[x_F,x_C] |
|
258 |
|
|
259 |
should be called within a parallel region |
/* presmoothing */ |
260 |
barrier synconization should be performed to make sure that the input vector available |
time0=Esys_timer(); |
261 |
|
Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE); |
262 |
|
time0=Esys_timer()-time0; |
263 |
|
if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0); |
264 |
|
/* end of presmoothing */ |
265 |
|
|
266 |
|
if (amg->n_F < amg->n) { /* is there work on the coarse level? */ |
267 |
|
time0=Esys_timer(); |
268 |
|
Paso_Copy(n, amg->r, b); /* r <- b */ |
269 |
|
Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/ |
270 |
|
Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,amg->r,0.,amg->b_C); /* b_c = R*r */ |
271 |
|
time0=Esys_timer()-time0; |
272 |
|
/* coarse level solve */ |
273 |
|
if ( amg->AMG_C == NULL) { |
274 |
|
time0=Esys_timer(); |
275 |
|
/* A_C is the coarsest level */ |
276 |
|
switch (amg->A_C->solver_package) { |
277 |
|
case (PASO_MKL): |
278 |
|
Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING); |
279 |
|
break; |
280 |
|
case (PASO_UMFPACK): |
281 |
|
Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING); |
282 |
|
break; |
283 |
|
case (PASO_SMOOTHER): |
284 |
|
Paso_Preconditioner_LocalSmoother_solve(amg->A_C, amg->Smoother,amg->x_C,amg->b_C,pre_sweeps, FALSE); |
285 |
|
break; |
286 |
|
} |
287 |
|
if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0); |
288 |
|
} else { |
289 |
|
Paso_Preconditioner_LocalAMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C) */ |
290 |
|
} |
291 |
|
time0=time0+Esys_timer(); |
292 |
|
Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */ |
293 |
|
|
294 |
|
/*postsmoothing*/ |
295 |
|
|
296 |
|
/*solve Ax=b with initial guess x */ |
297 |
|
time0=Esys_timer(); |
298 |
|
Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE); |
299 |
|
time0=Esys_timer()-time0; |
300 |
|
if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0); |
301 |
|
/*end of postsmoothing*/ |
302 |
|
|
303 |
|
} |
304 |
|
return; |
305 |
|
} |
306 |
|
|
307 |
*/ |
/* theta = threshold for strong connections */ |
308 |
|
/* tau = threshold for diagonal dominance */ |
309 |
|
|
310 |
void Paso_Preconditioner_LocalAMG_solve(Paso_Preconditioner_LocalAMG * amg, double * x, double * b) { |
void Paso_Preconditioner_AMG_RSCoarsening(Paso_SparseMatrix* A, index_t* split_marker, const double theta, const double tau) |
311 |
dim_t i,j; |
{ |
312 |
double time0=0; |
const dim_t n=A->numRows; |
313 |
double *r=NULL, *x0=NULL; |
dim_t *degree=NULL; /* number of naighbours in strong connection graph */ |
314 |
bool_t timing=0; |
index_t *S=NULL, iptr, i,j; |
315 |
|
dim_t kdeg; |
316 |
dim_t post_sweeps=amg->post_sweeps; |
double max_offdiagonal, threshold, sum_row, main_row, fnorm; |
317 |
dim_t pre_sweeps=amg->pre_sweeps; |
|
318 |
|
degree=TMPMEMALLOC(n, dim_t); |
319 |
#ifdef UMFPACK |
S=TMPMEMALLOC(A->pattern->len, index_t); |
320 |
Paso_UMFPACK_Handler * ptr=NULL; |
|
321 |
#endif |
if ( !( Esys_checkPtr(degree) || Esys_checkPtr(S) ) ) { |
322 |
|
/*S_i={j \in N_i; i strongly coupled to j} |
323 |
|
|
324 |
|
in the sense that |A_{ij}| >= theta * max_k |A_{ik}| |
325 |
|
*/ |
326 |
|
#pragma omp parallel for private(i,iptr,max_offdiagonal, threshold,j, kdeg, sum_row, main_row, fnorm) schedule(static) |
327 |
|
for (i=0;i<n;++i) { |
328 |
|
|
329 |
r=MEMALLOC(amg->n*amg->n_block,double); |
max_offdiagonal = 0.; |
330 |
x0=MEMALLOC(amg->n*amg->n_block,double); |
sum_row=0; |
331 |
|
main_row=0; |
332 |
if (amg->coarsest_level) { |
#pragma ivdep |
333 |
|
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
334 |
|
j=A->pattern->index[iptr]; |
335 |
|
fnorm=ABS(A->val[iptr]); |
336 |
|
|
337 |
|
if( j != i) { |
338 |
|
max_offdiagonal = MAX(max_offdiagonal,fnorm); |
339 |
|
sum_row+=fnorm; |
340 |
|
} else { |
341 |
|
main_row=fnorm; |
342 |
|
} |
343 |
|
} |
344 |
|
threshold = theta*max_offdiagonal; |
345 |
|
kdeg=0; |
346 |
|
if (tau*main_row < sum_row) { /* no diagonal domainance */ |
347 |
|
#pragma ivdep |
348 |
|
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
349 |
|
j=A->pattern->index[iptr]; |
350 |
|
if(ABS(A->val[iptr])>threshold && i!=j) { |
351 |
|
S[A->pattern->ptr[i]+kdeg] = j; |
352 |
|
kdeg++; |
353 |
|
} |
354 |
|
} |
355 |
|
} |
356 |
|
degree[i]=kdeg; |
357 |
|
} |
358 |
|
|
359 |
time0=Esys_timer(); |
Paso_Preconditioner_AMG_RSCoarsening_search(n, A->pattern->ptr, degree, S, split_marker); |
360 |
/*If all unknown are eliminated then Jacobi is the best preconditioner*/ |
|
361 |
|
} |
362 |
|
TMPMEMFREE(degree); |
363 |
|
TMPMEMFREE(S); |
364 |
|
} |
365 |
|
|
366 |
if (amg->n_F==0 || amg->n_F==amg->n) { |
/* theta = threshold for strong connections */ |
367 |
Paso_Preconditioner_LocalSmoother_solve(amg->A, amg->Smoother,x,b,1, FALSE); |
/* tau = threshold for diagonal dominance */ |
368 |
} |
void Paso_Preconditioner_AMG_RSCoarsening_Block(Paso_SparseMatrix* A, index_t* split_marker, const double theta, const double tau) |
369 |
else { |
|
370 |
#ifdef MKL |
{ |
371 |
Paso_MKL1(amg->AOffset1,x,b,timing); |
const dim_t n_block=A->row_block_size; |
372 |
#else |
const dim_t n=A->numRows; |
373 |
#ifdef UMFPACK |
dim_t *degree=NULL; /* number of naighbours in strong connection graph */ |
374 |
ptr=(Paso_UMFPACK_Handler *)(amg->solver); |
index_t *S=NULL, iptr, i,j, bi; |
375 |
Paso_UMFPACK1(&ptr,amg->AUnrolled,x,b,timing); |
dim_t kdeg, max_deg; |
376 |
amg->solver=(void*) ptr; |
register double max_offdiagonal, threshold, fnorm, sum_row, main_row; |
377 |
#else |
double *rtmp; |
378 |
Paso_Preconditioner_LocalSmoother_solve(amg->A, amg->Smoother,x,b,1, FALSE); |
|
379 |
#endif |
|
380 |
#endif |
degree=TMPMEMALLOC(n, dim_t); |
381 |
} |
S=TMPMEMALLOC(A->pattern->len, index_t); |
382 |
|
|
383 |
|
if ( !( Esys_checkPtr(degree) || Esys_checkPtr(S) ) ) { |
384 |
|
/*S_i={j \in N_i; i strongly coupled to j} |
385 |
|
|
386 |
|
in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F |
387 |
|
*/ |
388 |
|
#pragma omp parallel private(i,iptr,max_offdiagonal, kdeg, threshold,j, max_deg, fnorm, sum_row, main_row, rtmp) |
389 |
|
{ |
390 |
|
max_deg=0; |
391 |
|
#pragma omp for schedule(static) |
392 |
|
for (i=0;i<n;++i) max_deg=MAX(max_deg, A->pattern->ptr[i+1]-A->pattern->ptr[i]); |
393 |
|
|
394 |
time0=Esys_timer()-time0; |
rtmp=TMPMEMALLOC(max_deg, double); |
395 |
if (timing) fprintf(stdout,"timing: DIRECT SOLVER: %e\n",time0); |
|
396 |
|
#pragma omp for schedule(static) |
397 |
} else { |
for (i=0;i<n;++i) { |
|
/* presmoothing */ |
|
|
time0=Esys_timer(); |
|
|
Paso_Preconditioner_LocalSmoother_solve(amg->A, amg->Smoother, x, b, pre_sweeps, FALSE); |
|
|
time0=Esys_timer()-time0; |
|
|
if (timing) fprintf(stdout,"timing: Presmooting: %e\n",time0); |
|
398 |
|
|
399 |
/* end of presmoothing */ |
max_offdiagonal = 0.; |
400 |
|
sum_row=0; |
401 |
time0=Esys_timer(); |
main_row=0; |
402 |
#pragma omp parallel for private(i,j) schedule(static) |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
403 |
for (i=0;i<amg->n;++i) { |
j=A->pattern->index[iptr]; |
404 |
for (j=0;j<amg->n_block;++j) { |
fnorm=0; |
405 |
r[i*amg->n_block+j]=b[i*amg->n_block+j]; |
#pragma ivdep |
406 |
} |
for(bi=0;bi<n_block*n_block;++bi) fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi]; |
407 |
} |
fnorm=sqrt(fnorm); |
408 |
|
|
409 |
/*r=b-Ax*/ |
if( j != i) { |
410 |
Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r); |
rtmp[iptr-A->pattern->ptr[i]]=fnorm; |
411 |
|
max_offdiagonal = MAX(max_offdiagonal,fnorm); |
412 |
/* b_c = R*r */ |
sum_row+=fnorm; |
413 |
Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,r,0.,amg->b_C); |
} else { |
414 |
|
main_row=fnorm; |
415 |
time0=Esys_timer()-time0; |
} |
416 |
if (timing) fprintf(stdout,"timing: Before next level: %e\n",time0); |
} |
417 |
|
threshold = theta*max_offdiagonal; |
|
/* x_C=AMG(b_C) */ |
|
|
Paso_Preconditioner_LocalAMG_solve(amg->AMG_of_Coarse,amg->x_C,amg->b_C); |
|
|
|
|
|
time0=Esys_timer(); |
|
|
|
|
|
/* x_0 = P*x_c */ |
|
|
Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,0.,x0); |
|
|
|
|
|
/* x=x+x0 */ |
|
|
#pragma omp parallel for private(i,j) schedule(static) |
|
|
for (i=0;i<amg->n;++i) { |
|
|
for (j=0;j<amg->n_block;++j) { |
|
|
x[i*amg->n_block+j]+=x0[i*amg->n_block+j]; |
|
|
} |
|
|
} |
|
|
|
|
|
/*postsmoothing*/ |
|
418 |
|
|
419 |
/*solve Ax=b with initial guess x */ |
kdeg=0; |
420 |
time0=Esys_timer(); |
if (tau*main_row < sum_row) { /* no diagonal domainance */ |
421 |
Paso_Preconditioner_LocalSmoother_solve(amg->A, amg->Smoother, x, b, post_sweeps, TRUE); |
#pragma ivdep |
422 |
time0=Esys_timer()-time0; |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
423 |
if (timing) fprintf(stdout,"timing: Postsmoothing: %e\n",time0); |
j=A->pattern->index[iptr]; |
424 |
|
if(rtmp[iptr-A->pattern->ptr[i]] > threshold && i!=j) { |
425 |
/*end of postsmoothing*/ |
S[A->pattern->ptr[i]+kdeg] = j; |
426 |
|
kdeg++; |
427 |
} |
} |
428 |
MEMFREE(r); |
} |
429 |
MEMFREE(x0); |
} |
430 |
|
degree[i]=kdeg; |
431 |
|
} |
432 |
|
TMPMEMFREE(rtmp); |
433 |
|
} /* end of parallel region */ |
434 |
|
Paso_Preconditioner_AMG_RSCoarsening_search(n, A->pattern->ptr, degree, S, split_marker); |
435 |
|
} |
436 |
|
TMPMEMFREE(degree); |
437 |
|
TMPMEMFREE(S); |
438 |
|
} |
439 |
|
|
440 |
|
/* the runge stueben coarsening algorithm: */ |
441 |
|
void Paso_Preconditioner_AMG_RSCoarsening_search(const dim_t n, const index_t* offset, const dim_t* degree, const index_t* S, |
442 |
|
index_t*split_marker) |
443 |
|
{ |
444 |
|
index_t *lambda=NULL, j, *ST=NULL; |
445 |
|
dim_t i,k, p, q, *degreeT=NULL, itmp; |
446 |
|
|
447 |
|
if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */ |
448 |
|
|
449 |
|
lambda=TMPMEMALLOC(n, index_t); |
450 |
|
degreeT=TMPMEMALLOC(n, dim_t); |
451 |
|
ST=TMPMEMALLOC(offset[n], index_t); |
452 |
|
|
453 |
|
if (! ( Esys_checkPtr(lambda) || Esys_checkPtr(degreeT) || Esys_checkPtr(ST) ) ) { |
454 |
|
/* initialize split_marker and split_marker :*/ |
455 |
|
/* those unknows which are not influenced go into F, the rest is available for F or C */ |
456 |
|
#pragma omp parallel for private(i) schedule(static) |
457 |
|
for (i=0;i<n;++i) { |
458 |
|
degreeT[i]=0; |
459 |
|
if (degree[i]>0) { |
460 |
|
lambda[i]=0; |
461 |
|
split_marker[i]=PASO_AMG_UNDECIDED; |
462 |
|
} else { |
463 |
|
split_marker[i]=PASO_AMG_IN_F; |
464 |
|
lambda[i]=-1; |
465 |
|
} |
466 |
|
} |
467 |
|
/* create transpose :*/ |
468 |
|
for (i=0;i<n;++i) { |
469 |
|
for (p=0; p<degree[i]; ++p) { |
470 |
|
j=S[offset[i]+p]; |
471 |
|
ST[offset[j]+degreeT[j]]=i; |
472 |
|
degreeT[j]++; |
473 |
|
} |
474 |
|
} |
475 |
|
/* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */ |
476 |
|
#pragma omp parallel for private(i, j, itmp) schedule(static) |
477 |
|
for (i=0;i<n;++i) { |
478 |
|
if (split_marker[i]==PASO_AMG_UNDECIDED) { |
479 |
|
itmp=lambda[i]; |
480 |
|
for (p=0; p<degreeT[i]; ++p) { |
481 |
|
j=ST[offset[i]+p]; |
482 |
|
if (split_marker[j]==PASO_AMG_UNDECIDED) { |
483 |
|
itmp++; |
484 |
|
} else { /* at this point there are no C points */ |
485 |
|
itmp+=2; |
486 |
|
} |
487 |
|
} |
488 |
|
lambda[i]=itmp; |
489 |
|
} |
490 |
|
} |
491 |
|
|
492 |
return; |
/* start search :*/ |
493 |
|
i=Paso_Util_arg_max(n,lambda); |
494 |
|
while (lambda[i]>-1) { /* is there any undecided unknowns? */ |
495 |
|
|
496 |
|
/* the unknown i is moved to C */ |
497 |
|
split_marker[i]=PASO_AMG_IN_C; |
498 |
|
lambda[i]=-1; /* lambda fro unavailable unknowns is set to -1 */ |
499 |
|
|
500 |
|
/* all undecided unknown strongly coupled to i are moved to F */ |
501 |
|
for (p=0; p<degreeT[i]; ++p) { |
502 |
|
j=ST[offset[i]+p]; |
503 |
|
|
504 |
|
if (split_marker[j]==PASO_AMG_UNDECIDED) { |
505 |
|
|
506 |
|
split_marker[j]=PASO_AMG_IN_F; |
507 |
|
lambda[j]=-1; |
508 |
|
|
509 |
|
for (q=0; q<degreeT[j]; ++q) { |
510 |
|
k=ST[offset[j]+q]; |
511 |
|
if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++; |
512 |
|
} |
513 |
|
|
514 |
|
} |
515 |
|
} |
516 |
|
for (p=0; p<degree[i]; ++p) { |
517 |
|
j=S[offset[i]+p]; |
518 |
|
if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--; |
519 |
|
} |
520 |
|
|
521 |
|
i=Paso_Util_arg_max(n,lambda); |
522 |
|
} |
523 |
|
} |
524 |
|
TMPMEMFREE(lambda); |
525 |
|
TMPMEMFREE(ST); |
526 |
|
TMPMEMFREE(degreeT); |
527 |
} |
} |