/[escript]/trunk/paso/src/Solver_AMG.c
ViewVC logotype

Contents of /trunk/paso/src/Solver_AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2475 - (show annotations)
Wed Jun 17 01:48:46 2009 UTC (10 years, 3 months ago) by artak
File MIME type: text/plain
File size: 16262 byte(s)
AMG now takes into account new SolverOptions class, namely one can select coarsening algorithms from python. Not all options are included into AMG, this will be implemented later on.
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2008 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: AMG preconditioner */
18
19 /**************************************************************/
20
21 /* Author: artak@access.edu.au */
22
23 /**************************************************************/
24
25 #include "Paso.h"
26 #include "Solver.h"
27 #include "Options.h"
28 #include "PasoUtil.h"
29 #include "UMFPACK.h"
30 #include "Pattern_coupling.h"
31
32 /**************************************************************/
33
34 /* free all memory used by AMG */
35
36 void Paso_Solver_AMG_free(Paso_Solver_AMG * in) {
37 if (in!=NULL) {
38 Paso_Solver_AMG_free(in->AMG_of_Schur);
39 Paso_Solver_Jacobi_free(in->GS);
40 MEMFREE(in->inv_A_FF);
41 MEMFREE(in->A_FF_pivot);
42 Paso_SparseMatrix_free(in->A_FC);
43 Paso_SparseMatrix_free(in->A_CF);
44 MEMFREE(in->rows_in_F);
45 MEMFREE(in->rows_in_C);
46 MEMFREE(in->mask_F);
47 MEMFREE(in->mask_C);
48 MEMFREE(in->x_F);
49 MEMFREE(in->b_F);
50 MEMFREE(in->x_C);
51 MEMFREE(in->b_C);
52 MEMFREE(in);
53 }
54 }
55
56 /**************************************************************/
57
58 /* constructs the block-block factorization of
59
60 [ A_FF A_FC ]
61 A_p=
62 [ A_CF A_FF ]
63
64 to
65
66 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ]
67 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ]
68
69
70 where S=A_FF-ACF*invA_FF*A_FC within the shape of S
71
72 then AMG is applied to S again until S becomes empty
73
74 */
75 Paso_Solver_AMG* Paso_Solver_getAMG(Paso_SparseMatrix *A_p,bool_t verbose,dim_t level, double coarsening_threshold, dim_t coarsening_method) {
76 Paso_Solver_AMG* out=NULL;
77 dim_t n=A_p->numRows;
78 dim_t n_block=A_p->row_block_size;
79 index_t* mis_marker=NULL;
80 index_t* counter=NULL;
81 index_t iPtr,*index, *where_p, iPtr_s;
82 dim_t i,k,j;
83 Paso_SparseMatrix * schur=NULL;
84 Paso_SparseMatrix * schur_withFillIn=NULL;
85 double S=0;
86
87 /*Make sure we have block sizes 1*/
88 A_p->pattern->input_block_size=A_p->col_block_size;
89 A_p->pattern->output_block_size=A_p->row_block_size;
90 A_p->pattern->block_size=A_p->block_size;
91
92 /* identify independend set of rows/columns */
93 mis_marker=TMPMEMALLOC(n,index_t);
94 counter=TMPMEMALLOC(n,index_t);
95 out=MEMALLOC(1,Paso_Solver_AMG);
96 out->AMG_of_Schur=NULL;
97 out->inv_A_FF=NULL;
98 out->A_FF_pivot=NULL;
99 out->A_FC=NULL;
100 out->A_CF=NULL;
101 out->rows_in_F=NULL;
102 out->rows_in_C=NULL;
103 out->mask_F=NULL;
104 out->mask_C=NULL;
105 out->x_F=NULL;
106 out->b_F=NULL;
107 out->x_C=NULL;
108 out->b_C=NULL;
109 out->GS=NULL;
110 out->A=Paso_SparseMatrix_getReference(A_p);
111 /*out->GS=Paso_Solver_getGS(A_p,verbose);*/
112 out->GS=Paso_Solver_getJacobi(A_p);
113 /*out->GS->sweeps=2;*/
114 out->level=level;
115
116 if ( !(Paso_checkPtr(mis_marker) || Paso_checkPtr(out) || Paso_checkPtr(counter) || level==0 ) ) {
117 /* identify independend set of rows/columns */
118 #pragma omp parallel for private(i) schedule(static)
119 for (i=0;i<n;++i) mis_marker[i]=-1;
120
121 /*Paso_Pattern_RS(A_p,mis_marker,coarsening_threshold);*/
122 /*fprintf(stderr,"\n Coarsening method == %d, %d \n",coarsening_method,PASO_RUGE_STUEBEN_COARSENING);*/
123 if (coarsening_method == PASO_YAIR_SHAPIRA_COARSENING) {
124 Paso_Pattern_coup(A_p,mis_marker,coarsening_threshold);
125 }
126 else if (coarsening_method == PASO_RUGE_STUEBEN_COARSENING) {
127 Paso_Pattern_RS(A_p,mis_marker,coarsening_threshold);
128 }
129 else if (coarsening_method == PASO_AGGREGATION_COARSENING) {
130 Paso_Pattern_Aggregiation(A_p,mis_marker,coarsening_threshold);
131 }
132 else {
133 /*Default coarseneing*/
134 Paso_Pattern_RS(A_p,mis_marker,coarsening_threshold);
135 }
136
137 if (Paso_noError()) {
138 #pragma omp parallel for private(i) schedule(static)
139 for (i = 0; i < n; ++i) counter[i]=mis_marker[i];
140 out->n=n;
141 out->n_block=n_block;
142 out->n_F=Paso_Util_cumsum(n,counter);
143 out->mask_F=MEMALLOC(n,index_t);
144 out->rows_in_F=MEMALLOC(out->n_F,index_t);
145 out->inv_A_FF=MEMALLOC(n_block*n_block*out->n_F,double);
146 out->A_FF_pivot=NULL; /* later use for block size>3 */
147 if (! (Paso_checkPtr(out->mask_F) || Paso_checkPtr(out->inv_A_FF) || Paso_checkPtr(out->rows_in_F) ) ) {
148 /* creates an index for F from mask */
149 #pragma omp parallel for private(i) schedule(static)
150 for (i = 0; i < out->n_F; ++i) out->rows_in_F[i]=-1;
151 #pragma omp parallel for private(i) schedule(static)
152 for (i = 0; i < n; ++i) {
153 if (mis_marker[i]) {
154 out->rows_in_F[counter[i]]=i;
155 out->mask_F[i]=counter[i];
156 } else {
157 out->mask_F[i]=-1;
158 }
159 }
160
161 /* Compute row-sum for getting rs(A_FF)^-1*/
162 #pragma omp parallel for private(i,iPtr,j,S) schedule(static)
163 for (i = 0; i < out->n_F; ++i) {
164 S=0;
165 for (iPtr=A_p->pattern->ptr[out->rows_in_F[i]];iPtr<A_p->pattern->ptr[out->rows_in_F[i] + 1]; ++iPtr) {
166 j=A_p->pattern->index[iPtr];
167 if (mis_marker[j])
168 S+=A_p->val[iPtr];
169 }
170 out->inv_A_FF[i]=1./S;
171 }
172
173 if( Paso_noError()) {
174 /* if there are no nodes in the coarse level there is no more work to do */
175 out->n_C=n-out->n_F;
176 if (level>0 && out->n_C>500) {
177 /*if (out->n_F>500) {*/
178 out->rows_in_C=MEMALLOC(out->n_C,index_t);
179 out->mask_C=MEMALLOC(n,index_t);
180 if (! (Paso_checkPtr(out->mask_C) || Paso_checkPtr(out->rows_in_C) ) ) {
181 /* creates an index for C from mask */
182 #pragma omp parallel for private(i) schedule(static)
183 for (i = 0; i < n; ++i) counter[i]=! mis_marker[i];
184 Paso_Util_cumsum(n,counter);
185 #pragma omp parallel for private(i) schedule(static)
186 for (i = 0; i < out->n_C; ++i) out->rows_in_C[i]=-1;
187 #pragma omp parallel for private(i) schedule(static)
188 for (i = 0; i < n; ++i) {
189 if (! mis_marker[i]) {
190 out->rows_in_C[counter[i]]=i;
191 out->mask_C[i]=counter[i];
192 } else {
193 out->mask_C[i]=-1;
194 }
195 }
196 /* get A_CF block: */
197 out->A_CF=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_F,out->rows_in_C,out->mask_F);
198 if (Paso_noError()) {
199 /* get A_FC block: */
200 out->A_FC=Paso_SparseMatrix_getSubmatrix(A_p,out->n_F,out->n_C,out->rows_in_F,out->mask_C);
201 /* get A_CC block: */
202 if (Paso_noError()) {
203 schur=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_C,out->rows_in_C,out->mask_C);
204 /*find the pattern of the schur complement with fill in*/
205 schur_withFillIn=Paso_SparseMatrix_alloc(A_p->type,Paso_Pattern_binop(PATTERN_FORMAT_DEFAULT, schur->pattern, Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,out->A_CF->pattern,out->A_FC->pattern)),1,1);
206
207 /* copy values over*/
208 #pragma omp parallel for private(i,iPtr,j,iPtr_s,index,where_p) schedule(static)
209 for (i = 0; i < schur_withFillIn->numRows; ++i) {
210 for (iPtr=schur_withFillIn->pattern->ptr[i];iPtr<schur_withFillIn->pattern->ptr[i + 1]; ++iPtr) {
211 j=schur_withFillIn->pattern->index[iPtr];
212 iPtr_s=schur->pattern->ptr[i];
213 /*schur_withFillIn->val[iPtr]=0.;*/
214 index=&(schur->pattern->index[iPtr_s]);
215 where_p=(index_t*)bsearch(&j,
216 index,
217 schur->pattern->ptr[i + 1]-schur->pattern->ptr[i],
218 sizeof(index_t),
219 Paso_comparIndex);
220 if (where_p!=NULL) {
221 schur_withFillIn->val[iPtr]=schur->val[iPtr_s+(index_t)(where_p-index)];
222 }
223 }
224 }
225
226 if (Paso_noError()) {
227 Paso_Solver_updateIncompleteSchurComplement(schur_withFillIn,out->A_CF,out->inv_A_FF,out->A_FF_pivot,out->A_FC);
228 out->AMG_of_Schur=Paso_Solver_getAMG(schur_withFillIn,verbose,level-1,coarsening_threshold,coarsening_method);
229 Paso_SparseMatrix_free(schur);
230 }
231 /* allocate work arrays for AMG application */
232 if (Paso_noError()) {
233 out->x_F=MEMALLOC(n_block*out->n_F,double);
234 out->b_F=MEMALLOC(n_block*out->n_F,double);
235 out->x_C=MEMALLOC(n_block*out->n_C,double);
236 out->b_C=MEMALLOC(n_block*out->n_C,double);
237
238 if (! (Paso_checkPtr(out->x_F) || Paso_checkPtr(out->b_F) || Paso_checkPtr(out->x_C) || Paso_checkPtr(out->b_C) ) ) {
239 #pragma omp parallel for private(i,k) schedule(static)
240 for (i = 0; i < out->n_F; ++i) {
241 for (k=0; k<n_block;++k) {
242 out->x_F[i*n_block+k]=0.;
243 out->b_F[i*n_block+k]=0.;
244 }
245 }
246 #pragma omp parallel for private(i,k) schedule(static)
247 for (i = 0; i < out->n_C; ++i) {
248 for (k=0; k<n_block;++k) {
249 out->x_C[i*n_block+k]=0.;
250 out->b_C[i*n_block+k]=0.;
251 }
252 }
253 }
254 }
255 }
256 }
257 }
258 }
259 }
260 }
261 }
262 }
263 TMPMEMFREE(mis_marker);
264 TMPMEMFREE(counter);
265 if (Paso_noError()) {
266 if (verbose && level>0) {
267 printf("AMG: %d unknowns eliminated. %d left.\n",out->n_F,out->n_C);
268 /*if (level>0 && out->n_F>500) {*/
269 /* if (out->n_F<500) {*/
270 /* printf("timing: AMG: MIS/reordering/elemination : %e/%e\n",time0,time1);
271 } else {
272 printf("timing: AMG: MIS: %e\n",time2);
273 }*/
274 }
275 return out;
276 } else {
277 Paso_Solver_AMG_free(out);
278 return NULL;
279 }
280 }
281
282 /**************************************************************/
283
284 /* apply AMG precondition b-> x
285
286 in fact it solves
287
288 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FC ] [ x_F ] = [b_F]
289 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ] [ x_C ] = [b_C]
290
291 in the form
292
293 b->[b_F,b_C]
294 x_F=invA_FF*b_F
295 b_C=b_C-A_CF*x_F
296 x_C=AMG(b_C)
297 b_F=b_F-A_FC*x_C
298 x_F=invA_FF*b_F
299 x<-[x_F,x_C]
300
301 should be called within a parallel region
302 barrier synconization should be performed to make sure that the input vector available
303
304 */
305
306 void Paso_Solver_solveAMG(Paso_Solver_AMG * amg, double * x, double * b) {
307 dim_t i;
308 double *r=MEMALLOC(amg->n,double);
309 double *x0=MEMALLOC(amg->n,double);
310 /*double time0=0;*/
311
312 if (amg->level==0 || amg->n_C<=500) {
313 /*if (amg->n_F<=500) {*/
314 /*time0=Paso_timer();*/
315
316 /*Paso_Solver_solveJacobi(amg->GS,x,b);*/
317
318 /* #pragma omp parallel for private(i) schedule(static)
319 for (i=0;i<amg->n;++i) r[i]=b[i];
320 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
321 Paso_Solver_solveGS(amg->GS,x0,r);
322 #pragma omp parallel for private(i) schedule(static)
323 for (i=0;i<amg->n;++i) {
324 x[i]+=x0[i];
325 }
326 */
327 Paso_UMFPACK1(amg->A,x,b,0);
328
329 /*
330 time0=Paso_timer()-time0;
331 fprintf(stderr,"timing: DIRECT SOLVER: %e/\n",time0);
332 */
333 } else {
334 /* presmoothing */
335 Paso_Solver_solveJacobi(amg->GS,x,b);
336
337 /* #pragma omp parallel for private(i) schedule(static)
338 for (i=0;i<amg->n;++i) r[i]=b[i];
339
340 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
341 Paso_Solver_solveJacobi(amg->GS,x0,r);
342
343 #pragma omp parallel for private(i) schedule(static)
344 for (i=0;i<amg->n;++i) {
345 x[i]+=x0[i];
346 }
347 */
348 /* end of presmoothing */
349
350 #pragma omp parallel for private(i) schedule(static)
351 for (i=0;i<amg->n;++i) r[i]=b[i];
352
353 /*r=b-Ax*/
354 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
355
356 /* b->[b_F,b_C] */
357 #pragma omp parallel for private(i) schedule(static)
358 for (i=0;i<amg->n_F;++i) amg->b_F[i]=r[amg->rows_in_F[i]];
359
360 #pragma omp parallel for private(i) schedule(static)
361 for (i=0;i<amg->n_C;++i) amg->b_C[i]=r[amg->rows_in_C[i]];
362
363 /* x_F=invA_FF*b_F */
364 Paso_Solver_applyBlockDiagonalMatrix(1,amg->n_F,amg->inv_A_FF,amg->A_FF_pivot,amg->x_F,amg->b_F);
365
366 /* b_C=b_C-A_CF*x_F */
367 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A_CF,amg->x_F,1.,amg->b_C);
368
369 /* x_C=AMG(b_C) */
370 Paso_Solver_solveAMG(amg->AMG_of_Schur,amg->x_C,amg->b_C);
371
372 /* b_F=b_F-A_FC*x_C */
373 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A_FC,amg->x_C,1.,amg->b_F);
374 /* x_F=invA_FF*b_F */
375 Paso_Solver_applyBlockDiagonalMatrix(1,amg->n_F,amg->inv_A_FF,amg->A_FF_pivot,amg->x_F,amg->b_F);
376 /* x<-[x_F,x_C] */
377
378 #pragma omp parallel for private(i) schedule(static)
379 for (i=0;i<amg->n;++i) {
380 if (amg->mask_C[i]>-1) {
381 x[i]=amg->x_C[amg->mask_C[i]];
382 } else {
383 x[i]=amg->x_F[amg->mask_F[i]];
384 }
385 }
386
387 /*postsmoothing*/
388
389 #pragma omp parallel for private(i) schedule(static)
390 for (i=0;i<amg->n;++i) r[i]=b[i];
391
392 /*r=b-Ax */
393 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
394 Paso_Solver_solveJacobi(amg->GS,x0,r);
395
396 #pragma omp parallel for private(i) schedule(static)
397 for (i=0;i<amg->n;++i) x[i]+=x0[i];
398
399 /*
400 #pragma omp parallel for private(i) schedule(static)
401 for (i=0;i<amg->n;++i) r[i]=b[i];
402
403 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A,x,1.,r);
404 Paso_Solver_solveJacobi(amg->GS,x0,r);
405
406 #pragma omp parallel for private(i) schedule(static)
407 for (i=0;i<amg->n;++i) {
408 x[i]+=x0[i];
409 }
410 */
411 /*end of postsmoothing*/
412
413 }
414 MEMFREE(r);
415 MEMFREE(x0);
416 return;
417 }
418
419 /*
420 * $Log$
421 *
422 */

  ViewVC Help
Powered by ViewVC 1.1.26