/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Contents of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3315 - (show annotations)
Wed Oct 27 01:20:27 2010 UTC (9 years ago) by gross
File MIME type: text/plain
File size: 16563 byte(s)
bug in sparse matrixmatrix fixed.
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: AMG preconditioner */
18
19 /**************************************************************/
20
21 /* Author: artak@uq.edu.au, l.gross@uq.edu.au */
22
23 /**************************************************************/
24
25 #define SHOW_TIMING FALSE
26
27 #include "Paso.h"
28 #include "Preconditioner.h"
29 #include "Options.h"
30 #include "PasoUtil.h"
31 #include "UMFPACK.h"
32 #include "MKL.h"
33
34 /**************************************************************/
35
36 /* free all memory used by AMG */
37
38 void Paso_Preconditioner_LocalAMG_free(Paso_Preconditioner_LocalAMG * in) {
39 if (in!=NULL) {
40 Paso_Preconditioner_LocalSmoother_free(in->Smoother);
41 Paso_SparseMatrix_free(in->P);
42 Paso_SparseMatrix_free(in->R);
43 Paso_SparseMatrix_free(in->A_C);
44 Paso_Preconditioner_LocalAMG_free(in->AMG_C);
45 MEMFREE(in->r);
46 MEMFREE(in->x_C);
47 MEMFREE(in->b_C);
48
49
50 MEMFREE(in);
51 }
52 }
53
54 /*****************************************************************
55
56 constructs AMG
57
58 ******************************************************************/
59 Paso_Preconditioner_LocalAMG* Paso_Preconditioner_LocalAMG_alloc(Paso_SparseMatrix *A_p,dim_t level,Paso_Options* options) {
60
61 Paso_Preconditioner_LocalAMG* out=NULL;
62 bool_t verbose=options->verbose;
63
64 Paso_SparseMatrix *Atemp=NULL, *A_C=NULL;
65 const dim_t n=A_p->numRows;
66 const dim_t n_block=A_p->row_block_size;
67 index_t* split_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F=NULL, *S=NULL, *degree=NULL;
68 dim_t n_F=0, n_C=0, i;
69 double time0=0;
70 const double theta = options->coarsening_threshold;
71 const double tau = options->diagonal_dominance_threshold;
72
73
74 /*
75 is the input matrix A suitable for coarsening
76
77 */
78 if ( (A_p->pattern->len >= options->min_coarse_sparsity * n * n ) || (n <= options->min_coarse_matrix_size) || (level > options->level_max) ) {
79 if (verbose) printf("Paso: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
80 level, options->level_max, A_p->pattern->len/(1.*n * n), options->min_coarse_sparsity, n, options->min_coarse_matrix_size );
81 return NULL;
82 }
83 /* Start Coarsening : */
84
85 split_marker=TMPMEMALLOC(n,index_t);
86 counter=TMPMEMALLOC(n,index_t);
87 degree=TMPMEMALLOC(n, dim_t);
88 S=TMPMEMALLOC(A_p->pattern->len, index_t);
89 if ( !( Esys_checkPtr(split_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree) || Esys_checkPtr(S) ) ) {
90 /*
91 set splitting of unknows:
92
93 */
94 time0=Esys_timer();
95 if (n_block>1) {
96 Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree, S, theta,tau);
97 } else {
98 Paso_Preconditioner_AMG_setStrongConnections(A_p, degree, S, theta,tau);
99 }
100 Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree, S, split_marker);
101 options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
102
103 if (Esys_noError() ) {
104 #pragma omp parallel for private(i) schedule(static)
105 for (i = 0; i < n; ++i) split_marker[i]= (split_marker[i] == PASO_AMG_IN_F);
106
107 /*
108 count number of unkowns to be eliminated:
109 */
110 n_F=Paso_Util_cumsum_maskedTrue(n,counter, split_marker);
111 n_C=n-n_F;
112 if (verbose) printf("Paso: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);
113
114 if ( n_F == 0 ) { /* is a nasty case. a direct solver should be used, return NULL */
115 out = NULL;
116 } else {
117 out=MEMALLOC(1,Paso_Preconditioner_LocalAMG);
118 mask_C=TMPMEMALLOC(n,index_t);
119 rows_in_F=TMPMEMALLOC(n_F,index_t);
120 if ( !( Esys_checkPtr(mask_C) || Esys_checkPtr(rows_in_F) || Esys_checkPtr(out)) ) {
121 out->level = level;
122 out->n = n;
123 out->n_F = n_F;
124 out->n_block = n_block;
125 out->A_C = NULL;
126 out->P = NULL;
127 out->R = NULL;
128 out->post_sweeps = options->post_sweeps;
129 out->pre_sweeps = options->pre_sweeps;
130 out->r = NULL;
131 out->x_C = NULL;
132 out->b_C = NULL;
133 out->AMG_C = NULL;
134 out->Smoother = Paso_Preconditioner_LocalSmoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
135
136 if ( n_F < n ) { /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
137
138 /* allocate helpers :*/
139 out->x_C=MEMALLOC(n_block*n_C,double);
140 out->b_C=MEMALLOC(n_block*n_C,double);
141 out->r=MEMALLOC(n_block*n,double);
142
143 Esys_checkPtr(out->r);
144 Esys_checkPtr(out->Smoother);
145 Esys_checkPtr(out->x_C);
146 Esys_checkPtr(out->b_C);
147
148 /* creates index for F:*/
149 #pragma omp parallel for private(i) schedule(static)
150 for (i = 0; i < n; ++i) {
151 if (split_marker[i]) rows_in_F[counter[i]]=i;
152 }
153 /* create mask of C nodes with value >-1 gives new id */
154 i=Paso_Util_cumsum_maskedFalse(n,counter, split_marker);
155
156 #pragma omp parallel for private(i) schedule(static)
157 for (i = 0; i < n; ++i) {
158 if (split_marker[i]) {
159 mask_C[i]=-1;
160 } else {
161 mask_C[i]=counter[i];;
162 }
163 }
164 /*
165 get Restriction :
166 */
167 time0=Esys_timer();
168 out->P=Paso_Preconditioner_AMG_getDirectProlongation(A_p,degree,S,n_C,mask_C);
169 if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);
170 /*
171 construct Prolongation operator as transposed of restriction operator:
172 */
173 if ( Esys_noError()) {
174 time0=Esys_timer();
175 out->R=Paso_SparseMatrix_getTranspose(out->P);
176 if (SHOW_TIMING) printf("timing: level %d: Paso_SparseMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
177 }
178 /*
179 construct coarse level matrix:
180 */
181 if ( Esys_noError()) {
182 time0=Esys_timer();
183 Atemp=Paso_SparseMatrix_MatrixMatrix(A_p,out->P);
184 A_C=Paso_SparseMatrix_MatrixMatrix(out->R,Atemp);
185 Paso_SparseMatrix_free(Atemp);
186 if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);
187 }
188
189
190 /*
191 constructe courser level:
192
193 */
194 if ( Esys_noError()) {
195 out->AMG_C=Paso_Preconditioner_LocalAMG_alloc(A_C,level+1,options);
196 }
197 if ( Esys_noError()) {
198 if ( out->AMG_C == NULL ) {
199 out->reordering = options->reordering;
200 out->refinements = options->coarse_matrix_refinements;
201 /* no coarse level matrix has been constructed. use direct solver */
202 #ifdef MKL
203 out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);
204 Paso_SparseMatrix_free(A_C);
205 out->A_C->solver_package = PASO_MKL;
206 if (verbose) printf("Paso: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C);
207 #else
208 #ifdef UMFPACK
209 out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);
210 Paso_SparseMatrix_free(A_C);
211 out->A_C->solver_package = PASO_UMFPACK;
212 if (verbose) printf("Paso: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C);
213 #else
214 out->A_C=A_C;
215 out->A_C->solver_p=Paso_Preconditioner_LocalSmoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);
216 out->A_C->solver_package = PASO_SMOOTHER;
217 if (verbose) printf("Paso: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C);
218 #endif
219 #endif
220 } else {
221 /* finally we set some helpers for the solver step */
222 out->A_C=A_C;
223 }
224 }
225 }
226 }
227 TMPMEMFREE(mask_C);
228 TMPMEMFREE(rows_in_F);
229 }
230 }
231 }
232 TMPMEMFREE(counter);
233 TMPMEMFREE(split_marker);
234 TMPMEMFREE(degree);
235 TMPMEMFREE(S);
236
237 if (Esys_noError()) {
238 return out;
239 } else {
240 Paso_Preconditioner_LocalAMG_free(out);
241 return NULL;
242 }
243 }
244
245
246 void Paso_Preconditioner_LocalAMG_solve(Paso_SparseMatrix* A, Paso_Preconditioner_LocalAMG * amg, double * x, double * b) {
247 const dim_t n = amg->n * amg->n_block;
248 double time0=0;
249 const dim_t post_sweeps=amg->post_sweeps;
250 const dim_t pre_sweeps=amg->pre_sweeps;
251
252 /* presmoothing */
253 time0=Esys_timer();
254 Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
255 time0=Esys_timer()-time0;
256 if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);
257 /* end of presmoothing */
258
259 if (amg->n_F < amg->n) { /* is there work on the coarse level? */
260 time0=Esys_timer();
261 Paso_Copy(n, amg->r, b); /* r <- b */
262 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
263 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->R,amg->r,0.,amg->b_C); /* b_c = R*r */
264 time0=Esys_timer()-time0;
265 /* coarse level solve */
266 if ( amg->AMG_C == NULL) {
267 time0=Esys_timer();
268 /* A_C is the coarsest level */
269 switch (amg->A_C->solver_package) {
270 case (PASO_MKL):
271 Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);
272 break;
273 case (PASO_UMFPACK):
274 Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);
275 break;
276 case (PASO_SMOOTHER):
277 Paso_Preconditioner_LocalSmoother_solve(amg->A_C, amg->Smoother,amg->x_C,amg->b_C,pre_sweeps, FALSE);
278 break;
279 }
280 if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
281 } else {
282 Paso_Preconditioner_LocalAMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C) */
283 }
284 time0=time0+Esys_timer();
285 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */
286
287 /*postsmoothing*/
288
289 /*solve Ax=b with initial guess x */
290 time0=Esys_timer();
291 Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
292 time0=Esys_timer()-time0;
293 if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
294 /*end of postsmoothing*/
295
296 }
297 return;
298 }
299
300 /* theta = threshold for strong connections */
301 /* tau = threshold for diagonal dominance */
302
303 /*S_i={j \in N_i; i strongly coupled to j}
304
305 in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
306 */
307
308 void Paso_Preconditioner_AMG_setStrongConnections(Paso_SparseMatrix* A,
309 dim_t *degree, index_t *S,
310 const double theta, const double tau)
311 {
312 const dim_t n=A->numRows;
313 index_t iptr, i,j;
314 dim_t kdeg;
315 double max_offdiagonal, threshold, sum_row, main_row, fnorm;
316
317
318 #pragma omp parallel for private(i,iptr,max_offdiagonal, threshold,j, kdeg, sum_row, main_row, fnorm) schedule(static)
319 for (i=0;i<n;++i) {
320
321 max_offdiagonal = 0.;
322 sum_row=0;
323 main_row=0;
324 #pragma ivdep
325 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
326 j=A->pattern->index[iptr];
327 fnorm=ABS(A->val[iptr]);
328
329 if( j != i) {
330 max_offdiagonal = MAX(max_offdiagonal,fnorm);
331 sum_row+=fnorm;
332 } else {
333 main_row=fnorm;
334 }
335 }
336 threshold = theta*max_offdiagonal;
337 kdeg=0;
338 if (tau*main_row < sum_row) { /* no diagonal domainance */
339 #pragma ivdep
340 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
341 j=A->pattern->index[iptr];
342 if(ABS(A->val[iptr])>threshold && i!=j) {
343 S[A->pattern->ptr[i]+kdeg] = j;
344 kdeg++;
345 }
346 }
347 }
348 degree[i]=kdeg;
349 }
350
351 }
352
353 /* theta = threshold for strong connections */
354 /* tau = threshold for diagonal dominance */
355 /*S_i={j \in N_i; i strongly coupled to j}
356
357 in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
358 */
359 void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SparseMatrix* A,
360 dim_t *degree, index_t *S,
361 const double theta, const double tau)
362
363 {
364 const dim_t n_block=A->row_block_size;
365 const dim_t n=A->numRows;
366 index_t iptr, i,j, bi;
367 dim_t kdeg, max_deg;
368 register double max_offdiagonal, threshold, fnorm, sum_row, main_row;
369 double *rtmp;
370
371
372 #pragma omp parallel private(i,iptr,max_offdiagonal, kdeg, threshold,j, max_deg, fnorm, sum_row, main_row, rtmp)
373 {
374 max_deg=0;
375 #pragma omp for schedule(static)
376 for (i=0;i<n;++i) max_deg=MAX(max_deg, A->pattern->ptr[i+1]-A->pattern->ptr[i]);
377
378 rtmp=TMPMEMALLOC(max_deg, double);
379
380 #pragma omp for schedule(static)
381 for (i=0;i<n;++i) {
382
383 max_offdiagonal = 0.;
384 sum_row=0;
385 main_row=0;
386 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
387 j=A->pattern->index[iptr];
388 fnorm=0;
389 #pragma ivdep
390 for(bi=0;bi<n_block*n_block;++bi) fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi];
391 fnorm=sqrt(fnorm);
392 rtmp[iptr-A->pattern->ptr[i]]=fnorm;
393 if( j != i) {
394 max_offdiagonal = MAX(max_offdiagonal,fnorm);
395 sum_row+=fnorm;
396 } else {
397 main_row=fnorm;
398 }
399 }
400 threshold = theta*max_offdiagonal;
401
402 kdeg=0;
403 if (tau*main_row < sum_row) { /* no diagonal domainance */
404 #pragma ivdep
405 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
406 j=A->pattern->index[iptr];
407 if(rtmp[iptr-A->pattern->ptr[i]] > threshold && i!=j) {
408 S[A->pattern->ptr[i]+kdeg] = j;
409 kdeg++;
410 }
411 }
412 }
413 degree[i]=kdeg;
414 }
415 TMPMEMFREE(rtmp);
416 } /* end of parallel region */
417
418 }
419
420 /* the runge stueben coarsening algorithm: */
421 void Paso_Preconditioner_AMG_RungeStuebenSearch(const dim_t n, const index_t* offset,
422 const dim_t* degree, const index_t* S,
423 index_t*split_marker)
424 {
425 index_t *lambda=NULL, j, *ST=NULL;
426 dim_t i,k, p, q, *degreeT=NULL, itmp;
427
428 if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */
429
430 lambda=TMPMEMALLOC(n, index_t);
431 degreeT=TMPMEMALLOC(n, dim_t);
432 ST=TMPMEMALLOC(offset[n], index_t);
433
434 if (! ( Esys_checkPtr(lambda) || Esys_checkPtr(degreeT) || Esys_checkPtr(ST) ) ) {
435 /* initialize split_marker and split_marker :*/
436 /* those unknows which are not influenced go into F, the rest is available for F or C */
437 #pragma omp parallel for private(i) schedule(static)
438 for (i=0;i<n;++i) {
439 degreeT[i]=0;
440 if (degree[i]>0) {
441 lambda[i]=0;
442 split_marker[i]=PASO_AMG_UNDECIDED;
443 } else {
444 split_marker[i]=PASO_AMG_IN_F;
445 lambda[i]=-1;
446 }
447 }
448 /* create transpose :*/
449 for (i=0;i<n;++i) {
450 for (p=0; p<degree[i]; ++p) {
451 j=S[offset[i]+p];
452 ST[offset[j]+degreeT[j]]=i;
453 degreeT[j]++;
454 }
455 }
456 /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */
457 #pragma omp parallel for private(i, j, itmp) schedule(static)
458 for (i=0;i<n;++i) {
459 if (split_marker[i]==PASO_AMG_UNDECIDED) {
460 itmp=lambda[i];
461 for (p=0; p<degreeT[i]; ++p) {
462 j=ST[offset[i]+p];
463 if (split_marker[j]==PASO_AMG_UNDECIDED) {
464 itmp++;
465 } else { /* at this point there are no C points */
466 itmp+=2;
467 }
468 }
469 lambda[i]=itmp;
470 }
471 }
472
473 /* start search :*/
474 i=Paso_Util_arg_max(n,lambda);
475 while (lambda[i]>-1) { /* is there any undecided unknowns? */
476
477 /* the unknown i is moved to C */
478 split_marker[i]=PASO_AMG_IN_C;
479 lambda[i]=-1; /* lambda fro unavailable unknowns is set to -1 */
480
481 /* all undecided unknown strongly coupled to i are moved to F */
482 for (p=0; p<degreeT[i]; ++p) {
483 j=ST[offset[i]+p];
484
485 if (split_marker[j]==PASO_AMG_UNDECIDED) {
486
487 split_marker[j]=PASO_AMG_IN_F;
488 lambda[j]=-1;
489
490 for (q=0; q<degreeT[j]; ++q) {
491 k=ST[offset[j]+q];
492 if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;
493 }
494
495 }
496 }
497 for (p=0; p<degree[i]; ++p) {
498 j=S[offset[i]+p];
499 if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;
500 }
501
502 i=Paso_Util_arg_max(n,lambda);
503 }
504 }
505 TMPMEMFREE(lambda);
506 TMPMEMFREE(ST);
507 TMPMEMFREE(degreeT);
508 }

  ViewVC Help
Powered by ViewVC 1.1.26