1 |
|
2 |
/******************************************************* |
3 |
* |
4 |
* Copyright (c) 2003-2010 by University of Queensland |
5 |
* Earth Systems Science Computational Center (ESSCC) |
6 |
* http://www.uq.edu.au/esscc |
7 |
* |
8 |
* Primary Business: Queensland, Australia |
9 |
* Licensed under the Open Software License version 3.0 |
10 |
* http://www.opensource.org/licenses/osl-3.0.php |
11 |
* |
12 |
*******************************************************/ |
13 |
|
14 |
|
15 |
/**************************************************************/ |
16 |
|
17 |
/* Paso: AMG preconditioner (local version) */ |
18 |
|
19 |
/**************************************************************/ |
20 |
|
21 |
/* Author: artak@uq.edu.au, l.gross@uq.edu.au */ |
22 |
|
23 |
/**************************************************************/ |
24 |
|
25 |
#define SHOW_TIMING FALSE |
26 |
|
27 |
#include "Paso.h" |
28 |
#include "Preconditioner.h" |
29 |
#include "Options.h" |
30 |
#include "PasoUtil.h" |
31 |
#include "UMFPACK.h" |
32 |
#include "MKL.h" |
33 |
|
34 |
/**************************************************************/ |
35 |
|
36 |
/* free all memory used by AMG */ |
37 |
|
38 |
void Paso_Preconditioner_LocalAMG_free(Paso_Preconditioner_LocalAMG * in) { |
39 |
if (in!=NULL) { |
40 |
Paso_Preconditioner_LocalSmoother_free(in->Smoother); |
41 |
Paso_SparseMatrix_free(in->P); |
42 |
Paso_SparseMatrix_free(in->R); |
43 |
Paso_SparseMatrix_free(in->A_C); |
44 |
Paso_Preconditioner_LocalAMG_free(in->AMG_C); |
45 |
MEMFREE(in->r); |
46 |
MEMFREE(in->x_C); |
47 |
MEMFREE(in->b_C); |
48 |
|
49 |
|
50 |
MEMFREE(in); |
51 |
} |
52 |
} |
53 |
|
54 |
index_t Paso_Preconditioner_LocalAMG_getMaxLevel(const Paso_Preconditioner_LocalAMG * in) { |
55 |
if (in->AMG_C == NULL) { |
56 |
return in->level; |
57 |
} else { |
58 |
return Paso_Preconditioner_LocalAMG_getMaxLevel(in->AMG_C); |
59 |
} |
60 |
} |
61 |
double Paso_Preconditioner_LocalAMG_getCoarseLevelSparsity(const Paso_Preconditioner_LocalAMG * in) { |
62 |
if (in->AMG_C == NULL) { |
63 |
if (in->A_C == NULL) { |
64 |
return 1.; |
65 |
} else { |
66 |
return Paso_SparseMatrix_getSparsity(in->A_C); |
67 |
} |
68 |
} else { |
69 |
return Paso_Preconditioner_LocalAMG_getCoarseLevelSparsity(in->AMG_C); |
70 |
} |
71 |
} |
72 |
|
73 |
dim_t Paso_Preconditioner_LocalAMG_getNumCoarseUnknwons(const Paso_Preconditioner_LocalAMG * in) { |
74 |
if (in->AMG_C == NULL) { |
75 |
if (in->A_C == NULL) { |
76 |
return 0; |
77 |
} else { |
78 |
return Paso_SparseMatrix_getTotalNumRows(in->A_C); |
79 |
} |
80 |
} else { |
81 |
return Paso_Preconditioner_LocalAMG_getNumCoarseUnknwons(in->AMG_C); |
82 |
} |
83 |
} |
84 |
/***************************************************************** |
85 |
|
86 |
constructs AMG |
87 |
|
88 |
******************************************************************/ |
89 |
Paso_Preconditioner_LocalAMG* Paso_Preconditioner_LocalAMG_alloc(Paso_SparseMatrix *A_p,dim_t level,Paso_Options* options) { |
90 |
|
91 |
Paso_Preconditioner_LocalAMG* out=NULL; |
92 |
bool_t verbose=options->verbose; |
93 |
|
94 |
Paso_SparseMatrix *Atemp=NULL, *A_C=NULL; |
95 |
const dim_t n=A_p->numRows; |
96 |
const dim_t n_block=A_p->row_block_size; |
97 |
index_t* F_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F=NULL, *S=NULL, *degree_S=NULL; |
98 |
dim_t n_F=0, n_C=0, i; |
99 |
double time0=0; |
100 |
const double theta = options->coarsening_threshold; |
101 |
const double tau = options->diagonal_dominance_threshold; |
102 |
const double sparsity=Paso_SparseMatrix_getSparsity(A_p); |
103 |
const dim_t total_n=Paso_SparseMatrix_getTotalNumRows(A_p); |
104 |
|
105 |
|
106 |
/* |
107 |
is the input matrix A suitable for coarsening |
108 |
|
109 |
*/ |
110 |
if ( (sparsity >= options->min_coarse_sparsity) || |
111 |
(total_n <= options->min_coarse_matrix_size) || |
112 |
(level > options->level_max) ) { |
113 |
|
114 |
if (verbose) { |
115 |
/* |
116 |
print stopping condition: |
117 |
- 'SPAR' = min_coarse_matrix_sparsity exceeded |
118 |
- 'SIZE' = min_coarse_matrix_size exceeded |
119 |
- 'LEVEL' = level_max exceeded |
120 |
*/ |
121 |
printf("Paso_Preconditioner: AMG: termination of coarsening by "); |
122 |
|
123 |
if (sparsity >= options->min_coarse_sparsity) |
124 |
printf("SPAR "); |
125 |
|
126 |
if (total_n <= options->min_coarse_matrix_size) |
127 |
printf("SIZE "); |
128 |
|
129 |
if (level > options->level_max) |
130 |
printf("LEVEL "); |
131 |
|
132 |
printf("\n"); |
133 |
|
134 |
printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n", |
135 |
level, options->level_max, sparsity, options->min_coarse_sparsity, total_n, options->min_coarse_matrix_size); |
136 |
|
137 |
} |
138 |
|
139 |
return NULL; |
140 |
} |
141 |
/* Start Coarsening : */ |
142 |
|
143 |
F_marker=TMPMEMALLOC(n,index_t); |
144 |
counter=TMPMEMALLOC(n,index_t); |
145 |
degree_S=TMPMEMALLOC(n, dim_t); |
146 |
S=TMPMEMALLOC(A_p->pattern->len, index_t); |
147 |
if ( !( Esys_checkPtr(F_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree_S) || Esys_checkPtr(S) ) ) { |
148 |
/* |
149 |
set splitting of unknows: |
150 |
|
151 |
*/ |
152 |
time0=Esys_timer(); |
153 |
if (n_block>1) { |
154 |
Paso_Preconditioner_LocalAMG_setStrongConnections_Block(A_p, degree_S, S, theta,tau); |
155 |
} else { |
156 |
Paso_Preconditioner_LocalAMG_setStrongConnections(A_p, degree_S, S, theta,tau); |
157 |
} |
158 |
/* |
159 |
{ |
160 |
dim_t p; |
161 |
for (i=0; i< n; ++i) { |
162 |
printf("%d: ",i); |
163 |
for (p=0; p<degree_S[i];++p) printf("%d ",S[A_p->pattern->ptr[i]+p]); |
164 |
printf("\n"); |
165 |
} |
166 |
} |
167 |
*/ |
168 |
|
169 |
Paso_Preconditioner_LocalAMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree_S, S, F_marker, options->usePanel); |
170 |
|
171 |
/* in BoomerAMG interpolation is used FF connectiovity is required :*/ |
172 |
if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING) |
173 |
Paso_Preconditioner_LocalAMG_enforceFFConnectivity(n, A_p->pattern->ptr, degree_S, S, F_marker); |
174 |
options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time); |
175 |
|
176 |
if (Esys_noError() ) { |
177 |
#pragma omp parallel for private(i) schedule(static) |
178 |
for (i = 0; i < n; ++i) F_marker[i]=(F_marker[i] == PASO_AMG_IN_F); |
179 |
|
180 |
/* |
181 |
count number of unkowns to be eliminated: |
182 |
*/ |
183 |
n_F=Paso_Util_cumsum_maskedTrue(n,counter, F_marker); |
184 |
n_C=n-n_F; |
185 |
if (verbose) printf("Paso_Preconditioner: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F); |
186 |
|
187 |
if ( n_F == 0 ) { /* is a nasty case. a direct solver should be used, return NULL */ |
188 |
out = NULL; |
189 |
} else { |
190 |
out=MEMALLOC(1,Paso_Preconditioner_LocalAMG); |
191 |
if (! Esys_checkPtr(out)) { |
192 |
out->level = level; |
193 |
out->n = n; |
194 |
out->n_F = n_F; |
195 |
out->n_block = n_block; |
196 |
out->A_C = NULL; |
197 |
out->P = NULL; |
198 |
out->R = NULL; |
199 |
out->post_sweeps = options->post_sweeps; |
200 |
out->pre_sweeps = options->pre_sweeps; |
201 |
out->r = NULL; |
202 |
out->x_C = NULL; |
203 |
out->b_C = NULL; |
204 |
out->AMG_C = NULL; |
205 |
out->Smoother=NULL; |
206 |
} |
207 |
mask_C=TMPMEMALLOC(n,index_t); |
208 |
rows_in_F=TMPMEMALLOC(n_F,index_t); |
209 |
Esys_checkPtr(mask_C); |
210 |
Esys_checkPtr(rows_in_F); |
211 |
if ( Esys_noError() ) { |
212 |
|
213 |
out->Smoother = Paso_Preconditioner_LocalSmoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose); |
214 |
|
215 |
if (n_C != 0) { |
216 |
/* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */ |
217 |
|
218 |
/* allocate helpers :*/ |
219 |
out->x_C=MEMALLOC(n_block*n_C,double); |
220 |
out->b_C=MEMALLOC(n_block*n_C,double); |
221 |
out->r=MEMALLOC(n_block*n,double); |
222 |
|
223 |
Esys_checkPtr(out->r); |
224 |
Esys_checkPtr(out->x_C); |
225 |
Esys_checkPtr(out->b_C); |
226 |
|
227 |
if ( Esys_noError() ) { |
228 |
/* creates index for F:*/ |
229 |
#pragma omp parallel private(i) |
230 |
{ |
231 |
#pragma omp for schedule(static) |
232 |
for (i = 0; i < n; ++i) { |
233 |
if (F_marker[i]) rows_in_F[counter[i]]=i; |
234 |
} |
235 |
} |
236 |
/* create mask of C nodes with value >-1 gives new id */ |
237 |
i=Paso_Util_cumsum_maskedFalse(n,counter, F_marker); |
238 |
|
239 |
#pragma omp parallel for private(i) schedule(static) |
240 |
for (i = 0; i < n; ++i) { |
241 |
if (F_marker[i]) { |
242 |
mask_C[i]=-1; |
243 |
} else { |
244 |
mask_C[i]=counter[i];; |
245 |
} |
246 |
} |
247 |
/* |
248 |
get Prolongation : |
249 |
*/ |
250 |
time0=Esys_timer(); |
251 |
out->P=Paso_Preconditioner_LocalAMG_getProlongation(A_p,A_p->pattern->ptr, degree_S,S,n_C,mask_C, options->interpolation_method); |
252 |
if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0); |
253 |
} |
254 |
/* |
255 |
construct Restriction operator as transposed of Prolongation operator: |
256 |
*/ |
257 |
if ( Esys_noError()) { |
258 |
time0=Esys_timer(); |
259 |
out->R=Paso_SparseMatrix_getTranspose(out->P); |
260 |
if (SHOW_TIMING) printf("timing: level %d: Paso_SparseMatrix_getTranspose: %e\n",level,Esys_timer()-time0); |
261 |
} |
262 |
/* |
263 |
construct coarse level matrix: |
264 |
*/ |
265 |
if ( Esys_noError()) { |
266 |
time0=Esys_timer(); |
267 |
Atemp=Paso_SparseMatrix_MatrixMatrix(A_p,out->P); |
268 |
A_C=Paso_SparseMatrix_MatrixMatrix(out->R,Atemp); |
269 |
Paso_SparseMatrix_free(Atemp); |
270 |
if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0); |
271 |
} |
272 |
|
273 |
|
274 |
/* |
275 |
constructe courser level: |
276 |
|
277 |
*/ |
278 |
if ( Esys_noError()) { |
279 |
out->AMG_C=Paso_Preconditioner_LocalAMG_alloc(A_C,level+1,options); |
280 |
} |
281 |
if ( Esys_noError()) { |
282 |
if ( out->AMG_C == NULL ) { |
283 |
out->reordering = options->reordering; |
284 |
out->refinements = options->coarse_matrix_refinements; |
285 |
/* no coarse level matrix has been constructed. use direct solver */ |
286 |
#ifdef MKL |
287 |
out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C); |
288 |
Paso_SparseMatrix_free(A_C); |
289 |
out->A_C->solver_package = PASO_MKL; |
290 |
if (verbose) printf("Paso_Preconditioner: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block); |
291 |
#else |
292 |
#ifdef UMFPACK |
293 |
out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C); |
294 |
Paso_SparseMatrix_free(A_C); |
295 |
out->A_C->solver_package = PASO_UMFPACK; |
296 |
if (verbose) printf("Paso_Preconditioner: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block); |
297 |
#else |
298 |
out->A_C=A_C; |
299 |
out->A_C->solver_p=Paso_Preconditioner_LocalSmoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose); |
300 |
out->A_C->solver_package = PASO_SMOOTHER; |
301 |
if (verbose) printf("Paso_Preconditioner: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C*n_block); |
302 |
#endif |
303 |
#endif |
304 |
} else { |
305 |
/* finally we set some helpers for the solver step */ |
306 |
out->A_C=A_C; |
307 |
} |
308 |
} |
309 |
} |
310 |
} |
311 |
TMPMEMFREE(mask_C); |
312 |
TMPMEMFREE(rows_in_F); |
313 |
} |
314 |
} |
315 |
} |
316 |
TMPMEMFREE(counter); |
317 |
TMPMEMFREE(F_marker); |
318 |
TMPMEMFREE(degree_S); |
319 |
TMPMEMFREE(S); |
320 |
|
321 |
if (Esys_noError()) { |
322 |
return out; |
323 |
} else { |
324 |
Paso_Preconditioner_LocalAMG_free(out); |
325 |
return NULL; |
326 |
} |
327 |
} |
328 |
|
329 |
|
330 |
void Paso_Preconditioner_LocalAMG_solve(Paso_SparseMatrix* A, Paso_Preconditioner_LocalAMG * amg, double * x, double * b) { |
331 |
const dim_t n = amg->n * amg->n_block; |
332 |
double time0=0; |
333 |
const dim_t post_sweeps=amg->post_sweeps; |
334 |
const dim_t pre_sweeps=amg->pre_sweeps; |
335 |
|
336 |
/* presmoothing */ |
337 |
time0=Esys_timer(); |
338 |
Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE); |
339 |
time0=Esys_timer()-time0; |
340 |
if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0); |
341 |
/* end of presmoothing */ |
342 |
|
343 |
if (amg->n_F < amg->n) { /* is there work on the coarse level? */ |
344 |
time0=Esys_timer(); |
345 |
Paso_Copy(n, amg->r, b); /* r <- b */ |
346 |
Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/ |
347 |
Paso_SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->R,amg->r,0.,amg->b_C); /* b_c = R*r */ |
348 |
time0=Esys_timer()-time0; |
349 |
/* coarse level solve */ |
350 |
if ( amg->AMG_C == NULL) { |
351 |
time0=Esys_timer(); |
352 |
/* A_C is the coarsest level */ |
353 |
switch (amg->A_C->solver_package) { |
354 |
case (PASO_MKL): |
355 |
Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING); |
356 |
break; |
357 |
case (PASO_UMFPACK): |
358 |
Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING); |
359 |
break; |
360 |
case (PASO_SMOOTHER): |
361 |
Paso_Preconditioner_LocalSmoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE); |
362 |
break; |
363 |
} |
364 |
if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0); |
365 |
} else { |
366 |
Paso_Preconditioner_LocalAMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C) */ |
367 |
} |
368 |
time0=time0+Esys_timer(); |
369 |
Paso_SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */ |
370 |
|
371 |
/*postsmoothing*/ |
372 |
|
373 |
/*solve Ax=b with initial guess x */ |
374 |
time0=Esys_timer(); |
375 |
Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE); |
376 |
time0=Esys_timer()-time0; |
377 |
if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0); |
378 |
/*end of postsmoothing*/ |
379 |
|
380 |
} |
381 |
return; |
382 |
} |
383 |
|
384 |
/* theta = threshold for strong connections */ |
385 |
/* tau = threshold for diagonal dominance */ |
386 |
|
387 |
/*S_i={j \in N_i; i strongly coupled to j} |
388 |
|
389 |
in the sense that |A_{ij}| >= theta * max_k |A_{ik}| |
390 |
*/ |
391 |
|
392 |
void Paso_Preconditioner_LocalAMG_setStrongConnections(Paso_SparseMatrix* A, |
393 |
dim_t *degree_S, index_t *S, |
394 |
const double theta, const double tau) |
395 |
{ |
396 |
const dim_t n=A->numRows; |
397 |
index_t iptr, i; |
398 |
|
399 |
|
400 |
#pragma omp parallel for private(i,iptr) schedule(static) |
401 |
for (i=0;i<n;++i) { |
402 |
register double max_offdiagonal = 0.; |
403 |
register double sum_row=0; |
404 |
register double main_row=0; |
405 |
#pragma ivdep |
406 |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
407 |
register index_t j=A->pattern->index[iptr]; |
408 |
register double fnorm=ABS(A->val[iptr]); |
409 |
|
410 |
if( j != i) { |
411 |
max_offdiagonal = MAX(max_offdiagonal,fnorm); |
412 |
sum_row+=fnorm; |
413 |
} else { |
414 |
main_row=fnorm; |
415 |
} |
416 |
} |
417 |
const double threshold = theta*max_offdiagonal; |
418 |
register dim_t kdeg=0; |
419 |
|
420 |
if (tau*main_row < sum_row) { /* no diagonal domainance */ |
421 |
#pragma ivdep |
422 |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
423 |
register index_t j=A->pattern->index[iptr]; |
424 |
if(ABS(A->val[iptr])>threshold && i!=j) { |
425 |
S[A->pattern->ptr[i]+kdeg] = j; |
426 |
kdeg++; |
427 |
} |
428 |
} |
429 |
} |
430 |
degree_S[i]=kdeg; |
431 |
} |
432 |
|
433 |
} |
434 |
|
435 |
/* theta = threshold for strong connections */ |
436 |
/* tau = threshold for diagonal dominance */ |
437 |
/*S_i={j \in N_i; i strongly coupled to j} |
438 |
|
439 |
in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F |
440 |
*/ |
441 |
void Paso_Preconditioner_LocalAMG_setStrongConnections_Block(Paso_SparseMatrix* A, |
442 |
dim_t *degree_S, index_t *S, |
443 |
const double theta, const double tau) |
444 |
|
445 |
{ |
446 |
const dim_t n_block=A->row_block_size; |
447 |
const dim_t n=A->numRows; |
448 |
index_t iptr, i, bi; |
449 |
|
450 |
|
451 |
#pragma omp parallel private(i, iptr, bi) |
452 |
{ |
453 |
dim_t max_deg=0; /* this is local on each thread */ |
454 |
#pragma omp for schedule(static) |
455 |
for (i=0;i<n;++i) max_deg=MAX(max_deg, A->pattern->ptr[i+1]-A->pattern->ptr[i]); |
456 |
|
457 |
double* rtmp=TMPMEMALLOC(max_deg, double); |
458 |
|
459 |
#pragma omp for schedule(static) |
460 |
for (i=0;i<n;++i) { |
461 |
|
462 |
register double max_offdiagonal = 0.; |
463 |
register double sum_row=0; |
464 |
register double main_row=0; |
465 |
|
466 |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
467 |
register index_t j=A->pattern->index[iptr]; |
468 |
register double fnorm=0; |
469 |
#pragma ivdep |
470 |
for(bi=0;bi<n_block*n_block;++bi) { |
471 |
register double rtmp2 = A->val[iptr*n_block*n_block+bi]; |
472 |
fnorm+=rtmp2*rtmp2; |
473 |
} |
474 |
fnorm=sqrt(fnorm); |
475 |
|
476 |
rtmp[iptr-A->pattern->ptr[i]]=fnorm; |
477 |
if( j != i) { |
478 |
max_offdiagonal = MAX(max_offdiagonal,fnorm); |
479 |
sum_row+=fnorm; |
480 |
} else { |
481 |
main_row=fnorm; |
482 |
} |
483 |
} |
484 |
const double threshold = theta*max_offdiagonal; |
485 |
|
486 |
register dim_t kdeg=0; |
487 |
if (tau*main_row < sum_row) { /* no diagonal domainance */ |
488 |
#pragma ivdep |
489 |
for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) { |
490 |
register index_t j=A->pattern->index[iptr]; |
491 |
if(rtmp[iptr-A->pattern->ptr[i]] > threshold && i!=j) { |
492 |
S[A->pattern->ptr[i]+kdeg] = j; |
493 |
kdeg++; |
494 |
} |
495 |
} |
496 |
} |
497 |
degree_S[i]=kdeg; |
498 |
} |
499 |
TMPMEMFREE(rtmp); |
500 |
} /* end of parallel region */ |
501 |
|
502 |
} |
503 |
|
504 |
/* the runge stueben coarsening algorithm: */ |
505 |
void Paso_Preconditioner_LocalAMG_RungeStuebenSearch(const dim_t n, const index_t* offset_S, |
506 |
const dim_t* degree_S, const index_t* S, |
507 |
index_t*split_marker, const bool_t usePanel) |
508 |
{ |
509 |
|
510 |
index_t *lambda=NULL, *ST=NULL, *notInPanel=NULL, *panel=NULL, lambda_max, lambda_k; |
511 |
dim_t i,k, p, q, *degree_ST=NULL, len_panel, len_panel_new; |
512 |
register index_t j, itmp; |
513 |
|
514 |
if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */ |
515 |
|
516 |
lambda=TMPMEMALLOC(n, index_t); Esys_checkPtr(lambda); |
517 |
degree_ST=TMPMEMALLOC(n, dim_t); Esys_checkPtr(degree_ST); |
518 |
ST=TMPMEMALLOC(offset_S[n], index_t); Esys_checkPtr(ST); |
519 |
if (usePanel) { |
520 |
notInPanel=TMPMEMALLOC(n, bool_t); Esys_checkPtr(notInPanel); |
521 |
panel=TMPMEMALLOC(n, index_t); Esys_checkPtr(panel); |
522 |
} |
523 |
|
524 |
|
525 |
|
526 |
if (Esys_noError() ) { |
527 |
/* initialize split_marker and split_marker :*/ |
528 |
/* those unknows which are not influenced go into F, the rest is available for F or C */ |
529 |
#pragma omp parallel for private(i) schedule(static) |
530 |
for (i=0;i<n;++i) { |
531 |
degree_ST[i]=0; |
532 |
if (degree_S[i]>0) { |
533 |
lambda[i]=0; |
534 |
split_marker[i]=PASO_AMG_UNDECIDED; |
535 |
} else { |
536 |
split_marker[i]=PASO_AMG_IN_F; |
537 |
lambda[i]=-1; |
538 |
} |
539 |
} |
540 |
/* create transpose :*/ |
541 |
for (i=0;i<n;++i) { |
542 |
for (p=0; p<degree_S[i]; ++p) { |
543 |
j=S[offset_S[i]+p]; |
544 |
ST[offset_S[j]+degree_ST[j]]=i; |
545 |
degree_ST[j]++; |
546 |
} |
547 |
} |
548 |
/* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */ |
549 |
#pragma omp parallel for private(i, j, itmp) schedule(static) |
550 |
for (i=0;i<n;++i) { |
551 |
if (split_marker[i]==PASO_AMG_UNDECIDED) { |
552 |
itmp=lambda[i]; |
553 |
for (p=0; p<degree_ST[i]; ++p) { |
554 |
j=ST[offset_S[i]+p]; |
555 |
if (split_marker[j]==PASO_AMG_UNDECIDED) { |
556 |
itmp++; |
557 |
} else { /* at this point there are no C points */ |
558 |
itmp+=2; |
559 |
} |
560 |
} |
561 |
lambda[i]=itmp; |
562 |
} |
563 |
} |
564 |
if (usePanel) { |
565 |
#pragma omp parallel for private(i) schedule(static) |
566 |
for (i=0;i<n;++i) notInPanel[i]=TRUE; |
567 |
} |
568 |
/* start search :*/ |
569 |
i=Paso_Util_arg_max(n,lambda); |
570 |
while (lambda[i]>-1) { /* is there any undecided unknown? */ |
571 |
|
572 |
if (usePanel) { |
573 |
len_panel=0; |
574 |
do { |
575 |
/* the unknown i is moved to C */ |
576 |
split_marker[i]=PASO_AMG_IN_C; |
577 |
lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */ |
578 |
|
579 |
/* all undecided unknown strongly coupled to i are moved to F */ |
580 |
for (p=0; p<degree_ST[i]; ++p) { |
581 |
j=ST[offset_S[i]+p]; |
582 |
|
583 |
if (split_marker[j]==PASO_AMG_UNDECIDED) { |
584 |
|
585 |
split_marker[j]=PASO_AMG_IN_F; |
586 |
lambda[j]=-1; |
587 |
|
588 |
for (q=0; q<degree_ST[j]; ++q) { |
589 |
k=ST[offset_S[j]+q]; |
590 |
if (split_marker[k]==PASO_AMG_UNDECIDED) { |
591 |
lambda[k]++; |
592 |
if (notInPanel[k]) { |
593 |
notInPanel[k]=FALSE; |
594 |
panel[len_panel]=k; |
595 |
len_panel++; |
596 |
} |
597 |
|
598 |
} /* the unknown i is moved to C */ |
599 |
split_marker[i]=PASO_AMG_IN_C; |
600 |
lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */ |
601 |
} |
602 |
|
603 |
} |
604 |
} |
605 |
for (p=0; p<degree_S[i]; ++p) { |
606 |
j=S[offset_S[i]+p]; |
607 |
if (split_marker[j]==PASO_AMG_UNDECIDED) { |
608 |
lambda[j]--; |
609 |
if (notInPanel[j]) { |
610 |
notInPanel[j]=FALSE; |
611 |
panel[len_panel]=j; |
612 |
len_panel++; |
613 |
} |
614 |
} |
615 |
} |
616 |
|
617 |
/* consolidate panel */ |
618 |
/* remove lambda[q]=-1 */ |
619 |
lambda_max=-1; |
620 |
i=-1; |
621 |
len_panel_new=0; |
622 |
for (q=0; q<len_panel; q++) { |
623 |
k=panel[q]; |
624 |
lambda_k=lambda[k]; |
625 |
if (split_marker[k]==PASO_AMG_UNDECIDED) { |
626 |
panel[len_panel_new]=k; |
627 |
len_panel_new++; |
628 |
|
629 |
if (lambda_max == lambda_k) { |
630 |
if (k<i) i=k; |
631 |
} else if (lambda_max < lambda_k) { |
632 |
lambda_max =lambda_k; |
633 |
i=k; |
634 |
} |
635 |
} |
636 |
} |
637 |
len_panel=len_panel_new; |
638 |
} while (len_panel>0); |
639 |
} else { |
640 |
/* the unknown i is moved to C */ |
641 |
split_marker[i]=PASO_AMG_IN_C; |
642 |
lambda[i]=-1; /* lambda from unavailable unknowns is set to -1 */ |
643 |
|
644 |
/* all undecided unknown strongly coupled to i are moved to F */ |
645 |
for (p=0; p<degree_ST[i]; ++p) { |
646 |
j=ST[offset_S[i]+p]; |
647 |
if (split_marker[j]==PASO_AMG_UNDECIDED) { |
648 |
|
649 |
split_marker[j]=PASO_AMG_IN_F; |
650 |
lambda[j]=-1; |
651 |
|
652 |
for (q=0; q<degree_ST[j]; ++q) { |
653 |
k=ST[offset_S[j]+q]; |
654 |
if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++; |
655 |
} |
656 |
|
657 |
} |
658 |
} |
659 |
for (p=0; p<degree_S[i]; ++p) { |
660 |
j=S[offset_S[i]+p]; |
661 |
if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--; |
662 |
} |
663 |
|
664 |
} |
665 |
i=Paso_Util_arg_max(n,lambda); |
666 |
} |
667 |
|
668 |
} |
669 |
TMPMEMFREE(lambda); |
670 |
TMPMEMFREE(ST); |
671 |
TMPMEMFREE(degree_ST); |
672 |
TMPMEMFREE(panel); |
673 |
TMPMEMFREE(notInPanel); |
674 |
} |
675 |
/* ensures that two F nodes are connected via a C node :*/ |
676 |
void Paso_Preconditioner_LocalAMG_enforceFFConnectivity(const dim_t n, const index_t* offset_S, |
677 |
const dim_t* degree_S, const index_t* S, |
678 |
index_t*split_marker) |
679 |
{ |
680 |
dim_t i, p, q; |
681 |
|
682 |
/* now we make sure that two (strongly) connected F nodes are (strongly) connected via a C node. */ |
683 |
for (i=0;i<n;++i) { |
684 |
if ( (split_marker[i]==PASO_AMG_IN_F) && (degree_S[i]>0) ) { |
685 |
for (p=0; p<degree_S[i]; ++p) { |
686 |
register index_t j=S[offset_S[i]+p]; |
687 |
if ( (split_marker[j]==PASO_AMG_IN_F) && (degree_S[j]>0) ) { |
688 |
/* i and j are now two F nodes which are strongly connected */ |
689 |
/* is there a C node they share ? */ |
690 |
register index_t sharing=-1; |
691 |
for (q=0; q<degree_S[i]; ++q) { |
692 |
index_t k=S[offset_S[i]+q]; |
693 |
if (split_marker[k]==PASO_AMG_IN_C) { |
694 |
register index_t* where_k=(index_t*)bsearch(&k, &(S[offset_S[j]]), degree_S[j], sizeof(index_t), Paso_comparIndex); |
695 |
if (where_k != NULL) { |
696 |
sharing=k; |
697 |
break; |
698 |
} |
699 |
} |
700 |
} |
701 |
if (sharing<0) { |
702 |
if (i<j) { |
703 |
split_marker[j]=PASO_AMG_IN_C; |
704 |
} else { |
705 |
split_marker[i]=PASO_AMG_IN_C; |
706 |
break; /* no point to look any further as i is now a C node */ |
707 |
} |
708 |
} |
709 |
} |
710 |
} |
711 |
} |
712 |
} |
713 |
} |