/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Contents of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3287 - (show annotations)
Tue Oct 19 00:42:22 2010 UTC (9 years ago) by gross
File MIME type: text/plain
File size: 17459 byte(s)
complaint from valgrind about an uninitialized value fixed (no impact on result)
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: AMG preconditioner */
18
19 /**************************************************************/
20
21 /* Author: artak@uq.edu.au */
22
23 /**************************************************************/
24
25 #define SHOW_TIMING FALSE
26
27 #include "Paso.h"
28 #include "Preconditioner.h"
29 #include "Options.h"
30 #include "PasoUtil.h"
31 #include "UMFPACK.h"
32 #include "MKL.h"
33
34 /**************************************************************/
35
36 /* free all memory used by AMG */
37
38 void Paso_Preconditioner_LocalAMG_free(Paso_Preconditioner_LocalAMG * in) {
39 if (in!=NULL) {
40 Paso_Preconditioner_LocalSmoother_free(in->Smoother);
41 Paso_SparseMatrix_free(in->P);
42 Paso_SparseMatrix_free(in->R);
43 Paso_SparseMatrix_free(in->A_C);
44 Paso_Preconditioner_LocalAMG_free(in->AMG_C);
45 MEMFREE(in->r);
46 MEMFREE(in->x_C);
47 MEMFREE(in->b_C);
48
49
50 MEMFREE(in);
51 }
52 }
53
54 /*****************************************************************
55
56 constructs AMG
57
58 ******************************************************************/
59 Paso_Preconditioner_LocalAMG* Paso_Preconditioner_LocalAMG_alloc(Paso_SparseMatrix *A_p,dim_t level,Paso_Options* options) {
60
61 Paso_Preconditioner_LocalAMG* out=NULL;
62 bool_t verbose=options->verbose;
63
64 Paso_SparseMatrix* W_FC=NULL, *Atemp=NULL, *A_C=NULL;
65 const dim_t n=A_p->numRows;
66 const dim_t n_block=A_p->row_block_size;
67 index_t* split_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F=NULL;
68 dim_t n_F=0, n_C=0, i;
69 double time0=0;
70 const double theta = options->coarsening_threshold;
71 const double tau = options->diagonal_dominance_threshold;
72
73
74 /*
75 is the input matrix A suitable for coarsening
76
77 */
78 if ( (A_p->pattern->len >= options->min_coarse_sparsity * n * n ) || (n <= options->min_coarse_matrix_size) || (level > options->level_max) ) {
79 if (verbose) printf("Paso: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
80 level, options->level_max, A_p->pattern->len/(1.*n * n), options->min_coarse_sparsity, n, options->min_coarse_matrix_size );
81 return NULL;
82 }
83 /* Start Coarsening : */
84
85 split_marker=TMPMEMALLOC(n,index_t);
86 counter=TMPMEMALLOC(n,index_t);
87 if ( !( Esys_checkPtr(split_marker) || Esys_checkPtr(counter) ) ) {
88 /*
89 set splitting of unknows:
90
91 */
92 time0=Esys_timer();
93 if (n_block>1) {
94 Paso_Preconditioner_AMG_RSCoarsening_Block(A_p, split_marker, theta,tau);
95 } else {
96 Paso_Preconditioner_AMG_RSCoarsening(A_p, split_marker, theta,tau);
97 }
98 options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
99
100 if (Esys_noError() ) {
101 #pragma omp parallel for private(i) schedule(static)
102 for (i = 0; i < n; ++i) split_marker[i]= (split_marker[i] == PASO_AMG_IN_F);
103
104 /*
105 count number of unkowns to be eliminated:
106 */
107 n_F=Paso_Util_cumsum_maskedTrue(n,counter, split_marker);
108 n_C=n-n_F;
109 if (verbose) printf("Paso AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);
110
111 if ( n_F == 0 ) { /* is a nasty case. a direct solver should be used, return NULL */
112 out = NULL;
113 } else {
114 out=MEMALLOC(1,Paso_Preconditioner_LocalAMG);
115 mask_C=TMPMEMALLOC(n,index_t);
116 rows_in_F=TMPMEMALLOC(n_F,index_t);
117 if ( !( Esys_checkPtr(mask_C) || Esys_checkPtr(rows_in_F) || Esys_checkPtr(out)) ) {
118 out->level = level;
119 out->n = n;
120 out->n_F = n_F;
121 out->n_block = n_block;
122 out->A_C = NULL;
123 out->P = NULL;
124 out->R = NULL;
125 out->post_sweeps = options->post_sweeps;
126 out->pre_sweeps = options->pre_sweeps;
127 out->r = NULL;
128 out->x_C = NULL;
129 out->b_C = NULL;
130 out->AMG_C = NULL;
131 out->Smoother = Paso_Preconditioner_LocalSmoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
132
133 if ( n_F < n ) { /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
134
135 /* creates index for F:*/
136 #pragma omp parallel for private(i) schedule(static)
137 for (i = 0; i < n; ++i) {
138 if (split_marker[i]) rows_in_F[counter[i]]=i;
139 }
140 /* create mask of C nodes with value >-1 gives new id */
141 i=Paso_Util_cumsum_maskedFalse(n,counter, split_marker);
142
143 #pragma omp parallel for private(i) schedule(static)
144 for (i = 0; i < n; ++i) {
145 if (split_marker[i]) {
146 mask_C[i]=-1;
147 } else {
148 mask_C[i]=counter[i];;
149 }
150 }
151 /*
152 get Restriction : (can we do this in one go?)
153 */
154 time0=Esys_timer();
155 W_FC=Paso_SparseMatrix_getSubmatrix(A_p, n_F, n_C, rows_in_F, mask_C);
156 if (SHOW_TIMING) printf("timing: level %d: get Weights: %e\n",level, Esys_timer()-time0);
157
158 time0=Esys_timer();
159 Paso_SparseMatrix_updateWeights(A_p,W_FC,split_marker);
160 if (SHOW_TIMING) printf("timing: level %d: updateWeights: %e\n",level, Esys_timer()-time0);
161
162 time0=Esys_timer();
163 out->P=Paso_SparseMatrix_getProlongation(W_FC,split_marker);
164 if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);
165
166 Paso_SparseMatrix_free(W_FC);
167 /*
168 construct Prolongation operator as transposed of restriction operator:
169 */
170 time0=Esys_timer();
171 out->R=Paso_SparseMatrix_getTranspose(out->P);
172 if (SHOW_TIMING) printf("timing: level %d: Paso_SparseMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
173 /*
174 constrPaso AMG level 2: 4 unknowns are flagged for elimination.
175 timing: Gauss-Seidel preparation: elemination : 8.096400e-05
176 AMG: level 2: 4 unknowns eliminated.
177 uct coarse level matrix (can we do this in one call?)
178 */
179 time0=Esys_timer();
180 Atemp=Paso_SparseMatrix_MatrixMatrix(A_p,out->P);
181 A_C=Paso_SparseMatrix_MatrixMatrix(out->R,Atemp);
182 Paso_SparseMatrix_free(Atemp);
183 /*A_c=Paso_Solver_getCoarseMatrix(A_p,out->R,out->P);*/
184 if (SHOW_TIMING) printf("timing: level %d : getCoarseMatrix: %e\n",level,Esys_timer()-time0);
185
186 /* allocate helpers :*/
187 out->x_C=MEMALLOC(n_block*n_C,double);
188 out->b_C=MEMALLOC(n_block*n_C,double);
189 out->r=MEMALLOC(n_block*n,double);
190
191 Esys_checkPtr(out->r);
192 Esys_checkPtr(out->Smoother);
193 Esys_checkPtr(out->x_C);
194 Esys_checkPtr(out->b_C);
195
196 /*
197 constructe courser level:
198
199 */
200 out->AMG_C=Paso_Preconditioner_LocalAMG_alloc(A_C,level+1,options);
201
202 if ( Esys_noError()) {
203 if ( out->AMG_C == NULL ) {
204 out->reordering = options->reordering;
205 out->refinements = options->coarse_matrix_refinements;
206 /* no coarse level matrix has been constructed. use direct solver */
207 #ifdef MKL
208 Atemp=Paso_SparseMatrix_unroll(A_C);
209 Paso_SparseMatrix_free(A_C);
210 out->A_C=Paso_SparseMatrix_alloc(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, Atemp->pattern,1,1, FALSE);
211 #pragma omp parallel for private(i) schedule(static)
212 for (i=0;i<out->A->len;++i) {
213 out->A_C->val[i]=Atemp->val[i];
214 }
215 Paso_SparseMatrix_free(Atemp);
216 out->A_C->solver_package = PASO_MKL;
217 #else
218 #ifdef UMFPACK
219 out->A_C=Paso_SparseMatrix_unroll(A_C);
220 Paso_SparseMatrix_free(A_C);
221 out->A_C->solver_package = PASO_UMFPACK;
222 #else
223 out->A_C=A_C;
224 out->A_C->solver_p=Paso_Preconditioner_LocalSmoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);
225 out->A_C->solver_package = PASO_SMOOTHER;
226 #endif
227 #endif
228 } else {
229 /* finally we set some helpers for the solver step */
230 out->A_C=A_C;
231 }
232 }
233 }
234 }
235 TMPMEMFREE(mask_C);
236 TMPMEMFREE(rows_in_F);
237 }
238 }
239 }
240 TMPMEMFREE(counter);
241 TMPMEMFREE(split_marker);
242
243 if (Esys_noError()) {
244 if (verbose) printf("AMG: level %d: %d unknowns eliminated.\n",level, n_F);
245 return out;
246 } else {
247 Paso_Preconditioner_LocalAMG_free(out);
248 return NULL;
249 }
250 }
251
252
253 void Paso_Preconditioner_LocalAMG_solve(Paso_SparseMatrix* A, Paso_Preconditioner_LocalAMG * amg, double * x, double * b) {
254 const dim_t n = amg->n * amg->n_block;
255 double time0=0;
256 const dim_t post_sweeps=amg->post_sweeps;
257 const dim_t pre_sweeps=amg->pre_sweeps;
258
259 /* presmoothing */
260 time0=Esys_timer();
261 Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
262 time0=Esys_timer()-time0;
263 if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);
264 /* end of presmoothing */
265
266 if (amg->n_F < amg->n) { /* is there work on the coarse level? */
267 time0=Esys_timer();
268 Paso_Copy(n, amg->r, b); /* r <- b */
269 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
270 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,amg->r,0.,amg->b_C); /* b_c = R*r */
271 time0=Esys_timer()-time0;
272 /* coarse level solve */
273 if ( amg->AMG_C == NULL) {
274 time0=Esys_timer();
275 /* A_C is the coarsest level */
276 switch (amg->A_C->solver_package) {
277 case (PASO_MKL):
278 Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);
279 break;
280 case (PASO_UMFPACK):
281 Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);
282 break;
283 case (PASO_SMOOTHER):
284 Paso_Preconditioner_LocalSmoother_solve(amg->A_C, amg->Smoother,amg->x_C,amg->b_C,pre_sweeps, FALSE);
285 break;
286 }
287 if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
288 } else {
289 Paso_Preconditioner_LocalAMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C) */
290 }
291 time0=time0+Esys_timer();
292 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */
293
294 /*postsmoothing*/
295
296 /*solve Ax=b with initial guess x */
297 time0=Esys_timer();
298 Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
299 time0=Esys_timer()-time0;
300 if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
301 /*end of postsmoothing*/
302
303 }
304 return;
305 }
306
307 /* theta = threshold for strong connections */
308 /* tau = threshold for diagonal dominance */
309
310 void Paso_Preconditioner_AMG_RSCoarsening(Paso_SparseMatrix* A, index_t* split_marker, const double theta, const double tau)
311 {
312 const dim_t n=A->numRows;
313 dim_t *degree=NULL; /* number of naighbours in strong connection graph */
314 index_t *S=NULL, iptr, i,j;
315 dim_t kdeg;
316 double max_offdiagonal, threshold, sum_row, main_row, fnorm;
317
318 degree=TMPMEMALLOC(n, dim_t);
319 S=TMPMEMALLOC(A->pattern->len, index_t);
320
321 if ( !( Esys_checkPtr(degree) || Esys_checkPtr(S) ) ) {
322 /*S_i={j \in N_i; i strongly coupled to j}
323
324 in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
325 */
326 #pragma omp parallel for private(i,iptr,max_offdiagonal, threshold,j, kdeg, sum_row, main_row, fnorm) schedule(static)
327 for (i=0;i<n;++i) {
328
329 max_offdiagonal = 0.;
330 sum_row=0;
331 main_row=0;
332 #pragma ivdep
333 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
334 j=A->pattern->index[iptr];
335 fnorm=ABS(A->val[iptr]);
336
337 if( j != i) {
338 max_offdiagonal = MAX(max_offdiagonal,fnorm);
339 sum_row+=fnorm;
340 } else {
341 main_row=fnorm;
342 }
343 }
344 threshold = theta*max_offdiagonal;
345 kdeg=0;
346 if (tau*main_row < sum_row) { /* no diagonal domainance */
347 #pragma ivdep
348 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
349 j=A->pattern->index[iptr];
350 if(ABS(A->val[iptr])>threshold && i!=j) {
351 S[A->pattern->ptr[i]+kdeg] = j;
352 kdeg++;
353 }
354 }
355 }
356 degree[i]=kdeg;
357 }
358
359 Paso_Preconditioner_AMG_RSCoarsening_search(n, A->pattern->ptr, degree, S, split_marker);
360
361 }
362 TMPMEMFREE(degree);
363 TMPMEMFREE(S);
364 }
365
366 /* theta = threshold for strong connections */
367 /* tau = threshold for diagonal dominance */
368 void Paso_Preconditioner_AMG_RSCoarsening_Block(Paso_SparseMatrix* A, index_t* split_marker, const double theta, const double tau)
369
370 {
371 const dim_t n_block=A->row_block_size;
372 const dim_t n=A->numRows;
373 dim_t *degree=NULL; /* number of naighbours in strong connection graph */
374 index_t *S=NULL, iptr, i,j, bi;
375 dim_t kdeg, max_deg;
376 register double max_offdiagonal, threshold, fnorm, sum_row, main_row;
377 double *rtmp;
378
379
380 degree=TMPMEMALLOC(n, dim_t);
381 S=TMPMEMALLOC(A->pattern->len, index_t);
382
383 if ( !( Esys_checkPtr(degree) || Esys_checkPtr(S) ) ) {
384 /*S_i={j \in N_i; i strongly coupled to j}
385
386 in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
387 */
388 #pragma omp parallel private(i,iptr,max_offdiagonal, kdeg, threshold,j, max_deg, fnorm, sum_row, main_row, rtmp)
389 {
390 max_deg=0;
391 #pragma omp for schedule(static)
392 for (i=0;i<n;++i) max_deg=MAX(max_deg, A->pattern->ptr[i+1]-A->pattern->ptr[i]);
393
394 rtmp=TMPMEMALLOC(max_deg, double);
395
396 #pragma omp for schedule(static)
397 for (i=0;i<n;++i) {
398
399 max_offdiagonal = 0.;
400 sum_row=0;
401 main_row=0;
402 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
403 j=A->pattern->index[iptr];
404 fnorm=0;
405 #pragma ivdep
406 for(bi=0;bi<n_block*n_block;++bi) fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi];
407 fnorm=sqrt(fnorm);
408 rtmp[iptr-A->pattern->ptr[i]]=fnorm;
409 if( j != i) {
410 max_offdiagonal = MAX(max_offdiagonal,fnorm);
411 sum_row+=fnorm;
412 } else {
413 main_row=fnorm;
414 }
415 }
416 threshold = theta*max_offdiagonal;
417
418 kdeg=0;
419 if (tau*main_row < sum_row) { /* no diagonal domainance */
420 #pragma ivdep
421 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
422 j=A->pattern->index[iptr];
423 if(rtmp[iptr-A->pattern->ptr[i]] > threshold && i!=j) {
424 S[A->pattern->ptr[i]+kdeg] = j;
425 kdeg++;
426 }
427 }
428 }
429 degree[i]=kdeg;
430 }
431 TMPMEMFREE(rtmp);
432 } /* end of parallel region */
433 Paso_Preconditioner_AMG_RSCoarsening_search(n, A->pattern->ptr, degree, S, split_marker);
434 }
435 TMPMEMFREE(degree);
436 TMPMEMFREE(S);
437 }
438
439 /* the runge stueben coarsening algorithm: */
440 void Paso_Preconditioner_AMG_RSCoarsening_search(const dim_t n, const index_t* offset, const dim_t* degree, const index_t* S,
441 index_t*split_marker)
442 {
443 index_t *lambda=NULL, j, *ST=NULL;
444 dim_t i,k, p, q, *degreeT=NULL, itmp;
445
446 if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */
447
448 lambda=TMPMEMALLOC(n, index_t);
449 degreeT=TMPMEMALLOC(n, dim_t);
450 ST=TMPMEMALLOC(offset[n], index_t);
451
452 if (! ( Esys_checkPtr(lambda) || Esys_checkPtr(degreeT) || Esys_checkPtr(ST) ) ) {
453 /* initialize split_marker and split_marker :*/
454 /* those unknows which are not influenced go into F, the rest is available for F or C */
455 #pragma omp parallel for private(i) schedule(static)
456 for (i=0;i<n;++i) {
457 degreeT[i]=0;
458 if (degree[i]>0) {
459 lambda[i]=0;
460 split_marker[i]=PASO_AMG_UNDECIDED;
461 } else {
462 split_marker[i]=PASO_AMG_IN_F;
463 lambda[i]=-1;
464 }
465 }
466 /* create transpose :*/
467 for (i=0;i<n;++i) {
468 for (p=0; p<degree[i]; ++p) {
469 j=S[offset[i]+p];
470 ST[offset[j]+degreeT[j]]=i;
471 degreeT[j]++;
472 }
473 }
474 /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */
475 #pragma omp parallel for private(i, j, itmp) schedule(static)
476 for (i=0;i<n;++i) {
477 if (split_marker[i]==PASO_AMG_UNDECIDED) {
478 itmp=lambda[i];
479 for (p=0; p<degreeT[i]; ++p) {
480 j=ST[offset[i]+p];
481 if (split_marker[j]==PASO_AMG_UNDECIDED) {
482 itmp++;
483 } else { /* at this point there are no C points */
484 itmp+=2;
485 }
486 }
487 lambda[i]=itmp;
488 }
489 }
490
491 /* start search :*/
492 i=Paso_Util_arg_max(n,lambda);
493 while (lambda[i]>-1) { /* is there any undecided unknowns? */
494
495 /* the unknown i is moved to C */
496 split_marker[i]=PASO_AMG_IN_C;
497 lambda[i]=-1; /* lambda fro unavailable unknowns is set to -1 */
498
499 /* all undecided unknown strongly coupled to i are moved to F */
500 for (p=0; p<degreeT[i]; ++p) {
501 j=ST[offset[i]+p];
502
503 if (split_marker[j]==PASO_AMG_UNDECIDED) {
504
505 split_marker[j]=PASO_AMG_IN_F;
506 lambda[j]=-1;
507
508 for (q=0; q<degreeT[j]; ++q) {
509 k=ST[offset[j]+q];
510 if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;
511 }
512
513 }
514 }
515 for (p=0; p<degree[i]; ++p) {
516 j=S[offset[i]+p];
517 if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;
518 }
519
520 i=Paso_Util_arg_max(n,lambda);
521 }
522 }
523 TMPMEMFREE(lambda);
524 TMPMEMFREE(ST);
525 TMPMEMFREE(degreeT);
526 }

  ViewVC Help
Powered by ViewVC 1.1.26