/[escript]/trunk/paso/src/BOOMERAMG.cpp
ViewVC logotype

Contents of /trunk/paso/src/BOOMERAMG.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6141 - (show annotations)
Wed Apr 6 03:51:30 2016 UTC (2 years, 8 months ago) by caltinay
File size: 11153 byte(s)
more namespacing of defines.

1 /*****************************************************************************
2 *
3 * Copyright (c) 2003-2016 by The University of Queensland
4 * http://www.uq.edu.au
5 *
6 * Primary Business: Queensland, Australia
7 * Licensed under the Apache License, version 2.0
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
11 * Development 2012-2013 by School of Earth Sciences
12 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
13 *
14 *****************************************************************************/
15
16
17 /****************************************************************************/
18
19 /* Paso: interface to HYPRE, a software library of high performance
20 * preconditioners and solvers. We use the BoomerAMG part provided
21 * in this library
22 */
23
24 /****************************************************************************/
25
26 /* Author: Lin Gao, l.gao@uq.edu.au */
27
28 /****************************************************************************/
29
30 #include "Paso.h"
31 #include "BOOMERAMG.h"
32 #include "Options.h"
33 #include "Preconditioner.h"
34
35 namespace paso {
36
37 void Preconditioner_BoomerAMG_free(Preconditioner_BoomerAMG* in)
38 {
39 #ifdef ESYS_HAVE_BOOMERAMG
40 if (in != NULL) {
41 HYPRE_IJMatrixDestroy(in->A);
42 HYPRE_IJVectorDestroy(in->b);
43 HYPRE_IJVectorDestroy(in->x);
44 HYPRE_BoomerAMGDestroy(in->solver);
45 delete in;
46 }
47 #endif
48 }
49
50 // allocate necessary space for the BoomerAMG library
51 Preconditioner_BoomerAMG* Preconditioner_BoomerAMG_alloc(SystemMatrix_ptr A,
52 Options* options)
53 {
54 #ifdef ESYS_HAVE_BOOMERAMG
55 index_t ilower; /* first row in current processor, number is given by
56 the global indices. Can be 0- or 1-based indexing */
57 index_t iupper; /* last row in current processor, number is given by
58 the global indices. Row partitioning must be
59 contiguous, i.e., iupper for proc i must equal
60 ilower-1 for proc i+1 */
61 index_t jlower; /* jlower and jupper define a column partitioning */
62 index_t jupper;
63 index_t nrows;
64 dim_t offset;
65 index_t i;
66 index_t *ncols, *rows, *cols;
67 double *val;
68 Preconditioner_BoomerAMG* out=NULL;
69
70 if (A->type & MATRIX_FORMAT_OFFSET1 || A->mainBlock->col_block_size != 1 ||
71 A->mainBlock->row_block_size != 1 ||
72 ((A->type != MATRIX_FORMAT_DEFAULT) &&
73 (A->type != (MATRIX_FORMAT_DEFAULT+MATRIX_FORMAT_BLK1))
74 )) {
75 printf("Preconditioner_BoomerAMG: BoomerAMG requires CSR format with index offset 0 and block size 1.");
76 return NULL;
77 }
78
79 out = new Preconditioner_BoomerAMG;
80
81 /* set up inputs for BoomerAMG */
82 nrows = A->mainBlock->numRows;
83
84 ilower = A->row_distribution->getFirstComponent();
85 iupper = ilower + nrows - 1;
86
87 jlower = A->col_distribution->getFirstComponent();
88 jupper = ilower + A->mainBlock->numCols - 1;
89
90 rows = new index_t[nrows];
91 #pragma omp parallel for schedule(static) private(i)
92 for (i=0; i<nrows; i++) rows[i]=i+ilower;
93
94 /* merge MainBlock and CoupleBlock to get a complete column
95 entries for each row allocated to current processor */
96 A->mergeMainAndCouple(&ncols, &cols, &val);
97
98 /* ncols returned from mergeMainAndCouple() is a vector of
99 locations that start a row. we transfer it into a vector
100 of the number of columns in each row */
101 for (i=0; i<nrows; i++) ncols[i]=ncols[i+1]-ncols[i];
102
103 /* create and initialise an empty matrix object. _IJMatrixCreate()
104 is a collective call with each process passing its own row extents,
105 ilower and iupper */
106 HYPRE_IJMatrixCreate(A->mpi_info->comm, ilower, iupper, jlower, jupper, &(out->A));
107 HYPRE_IJMatrixSetObjectType(out->A, HYPRE_PARCSR);
108 HYPRE_IJMatrixInitialize(out->A);
109
110 /* set matrix coefficients. set matrix values for some number of
111 rows (nrows) and some number of columns in each row (ncols).
112 The actual row and column numbers of the matrix values to be
113 set are given by "rows" and "cols" */
114 HYPRE_IJMatrixSetValues(out->A, nrows, ncols, rows, cols, val);
115
116 /* finalize the matrix assembly and make the matrix "ready to
117 use". _IJMatrixAssemble() is a collective call */
118 HYPRE_IJMatrixAssemble(out->A);
119 HYPRE_IJMatrixGetObject(out->A, (void **) &out->parcsr_A);
120
121 /* Create the rhs and solution */
122 HYPRE_IJVectorCreate(A->mpi_info->comm, ilower, iupper,&(out->b));
123 HYPRE_IJVectorSetObjectType(out->b, HYPRE_PARCSR);
124 HYPRE_IJVectorInitialize(out->b);
125 HYPRE_IJVectorCreate(A->mpi_info->comm, ilower, iupper,&(out->x));
126 HYPRE_IJVectorSetObjectType(out->x, HYPRE_PARCSR);
127 HYPRE_IJVectorInitialize(out->x);
128
129 /* create solver */
130 HYPRE_BoomerAMGCreate(&(out->solver));
131
132 /* set some parameters */
133 /* define the smoother to be used (default is 3):
134 0 -- Jacobi
135 1 -- Gauss-Seidel, sequential (very slow!)
136 2 -- Gauss-Seidel, points in parallel, boundary sequential (slow!)
137 3 -- hybrid Gauss-Seidel or SOR, forward solve
138 4 -- hybrid Gauss-Seidel or SOR, backward solve
139 5 -- hybrid chaotic Gauss-Seidel (works only with OpenMP)
140 6 -- hybrid symmetric Gauss-Seidel or SSOR
141 9 -- Gaussian elimination (only on coarsest level) */
142 switch (options->smoother) {
143 case PASO_GS:
144 default:
145 HYPRE_BoomerAMGSetRelaxType(out->solver, 3);
146 break;
147 case PASO_JACOBI:
148 HYPRE_BoomerAMGSetRelaxType(out->solver, 0);
149 break;
150 }
151 /* parallel coarsening algorithm used (default is 6):
152 0 -- CLJP-coarsening (a parallel coarsening algorithm using
153 independent sets.
154 1 -- classical Ruge-Stueben coarsening on each processor, no
155 boundary treatment (not recommended!)
156 3 -- classical Ruge-Stueben coarsening on each processor, followed
157 by a third pass, which adds coarse points on the boundaries
158 6 -- Falgout coarsening (uses 1 first, followed by CLJP using the
159 interior coarse points generated by 1 as its first
160 independent set)
161 7 -- CLJP-coarsening (using a fixed random vector, for debugging
162 purposes only)
163 8 -- PMIS-coarsening (a parallel coarsening algorithm using
164 independent sets, generating lower complexities than CLJP,
165 might also lead to slower convergence)
166 9 -- PMIS-coarsening (using a fixed random vector, for debugging
167 purposes only)
168 10 -- HMIS-coarsening (uses one pass Ruge-Stueben on each processor
169 independently, followed by PMIS using the interior C-points
170 generated as its first independent set)
171 11 -- one-pass Ruge-Stueben coarsening on each processor, no
172 boundary treatment (not recommended!) */
173 switch (options->coarsening_method) {
174 case PASO_FALGOUT_COARSENING:
175 default:
176 HYPRE_BoomerAMGSetCoarsenType(out->solver, 6);
177 break;
178 case PASO_RUGE_STUEBEN_COARSENING:
179 HYPRE_BoomerAMGSetCoarsenType(out->solver, 3);
180 break;
181 case PASO_CIJP_COARSENING:
182 HYPRE_BoomerAMGSetCoarsenType(out->solver, 0);
183 break;
184 case PASO_CIJP_FIXED_RANDOM_COARSENING:
185 HYPRE_BoomerAMGSetCoarsenType(out->solver, 7);
186 break;
187 case PASO_PMIS_COARSENING:
188 HYPRE_BoomerAMGSetCoarsenType(out->solver, 8);
189 break;
190 case PASO_HMIS_COARSENING:
191 HYPRE_BoomerAMGSetCoarsenType(out->solver, 10);
192 break;
193 }
194
195 if (options->verbose)
196 HYPRE_BoomerAMGSetPrintLevel(out->solver, 3);
197 /* maximum number of levels. Default is 25 */
198 if (options->level_max > 0)
199 HYPRE_BoomerAMGSetMaxLevels(out->solver, options->level_max);
200 /* number of is sweeps */
201 if (options->sweeps > 0)
202 HYPRE_BoomerAMGSetNumSweeps(out->solver, options->sweeps);
203 /* 1 for V-cycle and 2 for W-cycle */
204 if (options->cycle_type == 1 || options->cycle_type == 2)
205 HYPRE_BoomerAMGSetCycleType(out->solver, options->cycle_type);
206 /* AMG coarsening strength threshold. Default is 0.25. For 2D Laplace
207 operations, 0.25 is a good value. For 3D Laplace operations, 0.5 or
208 0.6 is a better value. For elasticity problems, a large strength
209 threshold, such as 0.9, is often better */
210 if (options->coarsening_threshold > 0)
211 HYPRE_BoomerAMGSetStrongThreshold(out->solver, options->coarsening_threshold);
212 /* threshold for diagonal dominant rows. Default is 0.9 */
213 if (options->diagonal_dominance_threshold > 0)
214 HYPRE_BoomerAMGSetMaxRowSum(out->solver, options->diagonal_dominance_threshold);
215 /* conv. tolerance */
216 /* HYPRE_BoomerAMGSetTol(out->solver, 1e-7);*/
217
218 // free memory
219 delete[] rows;
220 delete[] ncols;
221 delete[] cols;
222 delete[] val;
223 return out;
224 #else
225 return NULL;
226 #endif
227 }
228
229 /// call the solver
230 void Preconditioner_BoomerAMG_solve(SystemMatrix_ptr A,
231 Preconditioner_BoomerAMG* amg,
232 double* out, double* in)
233 {
234 #ifdef ESYS_HAVE_BOOMERAMG
235 index_t ilower; /* first row in current processor, number is given by
236 the global indices. Can be 0- or 1-based indexing */
237 index_t iupper; /* last row in current processor, number is given by
238 the global indices. Row partitioning must be
239 contiguous, i.e., iupper for proc i must equal
240 ilower-1 for proc i+1 */
241 index_t i, nrows;
242 dim_t offset;
243 index_t *rows;
244
245 if (A->type & MATRIX_FORMAT_OFFSET1 ||
246 A->mainBlock->col_block_size != 1 ||
247 A->mainBlock->row_block_size != 1 ||
248 ((A->type != MATRIX_FORMAT_DEFAULT) &&
249 (A->type != (MATRIX_FORMAT_DEFAULT+MATRIX_FORMAT_BLK1))
250 )
251 ) {
252 printf("Preconditioner_BoomerAMG: BoomerAMG requires CSR format with index offset 0 and block size 1.");
253 return;
254 }
255
256 /* set up inputs for BoomerAMG */
257 nrows = A->mainBlock->numRows;
258 ilower = A->row_distribution->getFirstComponent();
259 iupper = ilower + nrows - 1;
260 rows = new index_t[nrows];
261 #pragma omp parallel for schedule(static) private(i)
262 for (i=0; i<nrows; i++) rows[i]=i+ilower;
263
264 /* set values for the rhs and solution */
265 HYPRE_IJVectorSetValues(amg->b, nrows, rows, in);
266 HYPRE_IJVectorAssemble(amg->b);
267 HYPRE_IJVectorGetObject(amg->b, (void **) &(amg->par_b));
268 HYPRE_IJVectorSetValues(amg->x, nrows, rows, out);
269 HYPRE_IJVectorAssemble(amg->x);
270 HYPRE_IJVectorGetObject(amg->x, (void **) &(amg->par_x));
271
272 /* setup and solve */
273 HYPRE_BoomerAMGSetup(amg->solver, amg->parcsr_A, amg->par_b, amg->par_x);
274 HYPRE_BoomerAMGSolve(amg->solver, amg->parcsr_A, amg->par_b, amg->par_x);
275 HYPRE_IJVectorGetValues(amg->x, nrows, rows, out);
276
277 delete[] rows;
278 #endif
279 }
280
281 } // namespace paso
282

Properties

Name Value
svn:mergeinfo /branches/4.0fordebian/paso/src/BOOMERAMG.cpp:5567-5588 /branches/amg_from_3530/paso/src/BOOMERAMG.cpp:3531-3826 /branches/lapack2681/paso/src/BOOMERAMG.cpp:2682-2741 /branches/pasowrap/paso/src/BOOMERAMG.cpp:3661-3674 /branches/py3_attempt2/paso/src/BOOMERAMG.cpp:3871-3891 /branches/restext/paso/src/BOOMERAMG.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/BOOMERAMG.cpp:3669-3791 /branches/stage3.0/paso/src/BOOMERAMG.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/BOOMERAMG.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/BOOMERAMG.cpp:3517-3974 /branches/trilinos_from_5897/paso/src/BOOMERAMG.cpp:5898-6118 /release/3.0/paso/src/BOOMERAMG.cpp:2591-2601 /release/4.0/paso/src/BOOMERAMG.cpp:5380-5406 /trunk/paso/src/BOOMERAMG.cpp:4257-4344 /trunk/ripley/test/python/paso/src/BOOMERAMG.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26