/[escript]/trunk/paso/src/Solver_AMG.c
ViewVC logotype

Contents of /trunk/paso/src/Solver_AMG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1887 - (show annotations)
Wed Oct 15 03:26:25 2008 UTC (10 years, 9 months ago) by ksteube
File MIME type: text/plain
File size: 16383 byte(s)
Fixed two typos that stopped the test suite from running.

Also, gcc 4.3.2 issued several warnings not seen before:
ignoring the return value of fscanf and using the wrong format
with printf.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2008 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: AMG preconditioner with reordering */
18
19 /**************************************************************/
20
21 /* Author: artak@access.edu.au */
22
23 /**************************************************************/
24
25 #include "Paso.h"
26 #include "Solver.h"
27 #include "PasoUtil.h"
28
29 /**************************************************************/
30
31 /* free all memory used by AMG */
32
33 void Paso_Solver_AMG_free(Paso_Solver_AMG * in) {
34 if (in!=NULL) {
35 Paso_Solver_AMG_free(in->AMG_of_Schur);
36 MEMFREE(in->inv_A_FF);
37 MEMFREE(in->A_FF_pivot);
38 Paso_SparseMatrix_free(in->A_FC);
39 Paso_SparseMatrix_free(in->A_CF);
40 MEMFREE(in->rows_in_F);
41 MEMFREE(in->rows_in_C);
42 MEMFREE(in->mask_F);
43 MEMFREE(in->mask_C);
44 MEMFREE(in->x_F);
45 MEMFREE(in->b_F);
46 MEMFREE(in->x_C);
47 MEMFREE(in->b_C);
48 MEMFREE(in);
49 }
50 }
51
52 /**************************************************************/
53
54 /* constructs the block-block factorization of
55
56 [ A_FF A_FC ]
57 A_p=
58 [ A_CF A_FF ]
59
60 to
61
62 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ]
63 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ]
64
65
66 where S=A_FF-ACF*invA_FF*A_FC within the shape of S
67
68 then AMG is applied to S again until S becomes empty
69
70 */
71 Paso_Solver_AMG* Paso_Solver_getAMG(Paso_SparseMatrix *A_p,bool_t verbose) {
72 Paso_Solver_AMG* out=NULL;
73 dim_t n=A_p->numRows;
74 dim_t n_block=A_p->row_block_size;
75 index_t* mis_marker=NULL;
76 index_t* counter=NULL;
77 double *rs=NULL;
78 index_t iPtr,*index, *where_p, iPtr_s;
79 dim_t i,k,j,j0;
80 Paso_SparseMatrix * schur=NULL;
81 Paso_SparseMatrix * schur_withFillIn=NULL;
82 double A11,A12,A13,A21,A22,A23,A31,A32,A33,D,time0,time1,time2;
83 schur_withFillIn=MEMALLOC(1,Paso_SparseMatrix);
84
85
86 /* identify independend set of rows/columns */
87 mis_marker=TMPMEMALLOC(n,index_t);
88 counter=TMPMEMALLOC(n,index_t);
89 rs=TMPMEMALLOC(n,double);
90 out=MEMALLOC(1,Paso_Solver_AMG);
91 out->AMG_of_Schur=NULL;
92 out->inv_A_FF=NULL;
93 out->A_FF_pivot=NULL;
94 out->A_FC=NULL;
95 out->A_CF=NULL;
96 out->rows_in_F=NULL;
97 out->rows_in_C=NULL;
98 out->mask_F=NULL;
99 out->mask_C=NULL;
100 out->x_F=NULL;
101 out->b_F=NULL;
102 out->x_C=NULL;
103 out->b_C=NULL;
104
105 /* fprintf(stderr,"START OF MATRIX \n\n");
106 for (i = 0; i < A_p->numRows; ++i) {
107 for (iPtr=A_p->pattern->ptr[i];iPtr<A_p->pattern->ptr[i + 1]; ++iPtr) {
108 j=A_p->pattern->index[iPtr];
109 fprintf(stderr,"A[%d,%d]=%.2f ",i,j,A_p->val[iPtr]);
110 }
111 fprintf(stderr,"\n");
112 }
113 fprintf(stderr,"END OF MATRIX \n\n");
114 */
115 if ( !(Paso_checkPtr(mis_marker) || Paso_checkPtr(out) || Paso_checkPtr(counter) ) ) {
116 /* identify independend set of rows/columns */
117 time0=Paso_timer();
118 #pragma omp parallel for private(i) schedule(static)
119 for (i=0;i<n;++i) mis_marker[i]=-1;
120 Paso_Pattern_coup(A_p,mis_marker);
121 time2=Paso_timer()-time0;
122 if (Paso_noError()) {
123 #pragma omp parallel for private(i) schedule(static)
124 for (i = 0; i < n; ++i) counter[i]=mis_marker[i];
125 out->n=n;
126 out->n_block=n_block;
127 out->n_F=Paso_Util_cumsum(n,counter);
128 out->mask_F=MEMALLOC(n,index_t);
129 out->rows_in_F=MEMALLOC(out->n_F,index_t);
130 out->inv_A_FF=MEMALLOC(n_block*n_block*out->n_F,double);
131 out->A_FF_pivot=NULL; /* later use for block size>3 */
132 if (! (Paso_checkPtr(out->mask_F) || Paso_checkPtr(out->inv_A_FF) || Paso_checkPtr(out->rows_in_F) ) ) {
133 #pragma omp parallel
134 {
135 /* creates an index for F from mask */
136 #pragma omp for private(i) schedule(static)
137 for (i = 0; i < out->n_F; ++i) out->rows_in_F[i]=-1;
138 #pragma omp for private(i) schedule(static)
139 for (i = 0; i < n; ++i) {
140 if (mis_marker[i]) {
141 out->rows_in_F[counter[i]]=i;
142 out->mask_F[i]=counter[i];
143 } else {
144 out->mask_F[i]=-1;
145 }
146 }
147 /* Compute row-sum for getting rs(A_FF)*/
148 #pragma omp for private(i,iPtr) schedule(static)
149 for (i = 0; i < out->n_F; ++i) {
150 rs[i]=0;
151 for (iPtr=A_p->pattern->ptr[out->rows_in_F[i]];iPtr<A_p->pattern->ptr[out->rows_in_F[i] + 1]; ++iPtr) {
152 rs[i]+=A_p->val[iPtr];
153 }
154 }
155
156 #pragma omp for private(i, where_p,iPtr,A11,A12,A13,A21,A22,A23,A31,A32,A33,D,index) schedule(static)
157 for (i = 0; i < out->n_F; i++) {
158 /* find main diagonal */
159 iPtr=A_p->pattern->ptr[out->rows_in_F[i]];
160 index=&(A_p->pattern->index[iPtr]);
161 where_p=(index_t*)bsearch(&out->rows_in_F[i],
162 index,
163 A_p->pattern->ptr[out->rows_in_F[i] + 1]-A_p->pattern->ptr[out->rows_in_F[i]],
164 sizeof(index_t),
165 Paso_comparIndex);
166 if (where_p==NULL) {
167 Paso_setError(VALUE_ERROR, "Paso_Solver_getAMG: main diagonal element missing.");
168 } else {
169 iPtr+=(index_t)(where_p-index);
170 /* get inverse of A_FF block: */
171 if (ABS(rs[i])>0.) {
172 out->inv_A_FF[i]=1./rs[i];
173 } else {
174 Paso_setError(ZERO_DIVISION_ERROR, "Paso_Solver_getAMG: Break-down in AMG decomposition: non-regular main diagonal block.");
175 }
176 }
177 }
178 } /* end parallel region */
179
180 if( Paso_noError()) {
181 /* if there are no nodes in the coarse level there is no more work to do */
182 out->n_C=n-out->n_F;
183 if (out->n_C>0) {
184 out->rows_in_C=MEMALLOC(out->n_C,index_t);
185 out->mask_C=MEMALLOC(n,index_t);
186 if (! (Paso_checkPtr(out->mask_C) || Paso_checkPtr(out->rows_in_C) ) ) {
187 /* creates an index for C from mask */
188 #pragma omp parallel for private(i) schedule(static)
189 for (i = 0; i < n; ++i) counter[i]=! mis_marker[i];
190 Paso_Util_cumsum(n,counter);
191 #pragma omp parallel
192 {
193 #pragma omp for private(i) schedule(static)
194 for (i = 0; i < out->n_C; ++i) out->rows_in_C[i]=-1;
195 #pragma omp for private(i) schedule(static)
196 for (i = 0; i < n; ++i) {
197 if (! mis_marker[i]) {
198 out->rows_in_C[counter[i]]=i;
199 out->mask_C[i]=counter[i];
200 } else {
201 out->mask_C[i]=-1;
202 }
203 }
204 } /* end parallel region */
205 /* get A_CF block: */
206 out->A_CF=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_F,out->rows_in_C,out->mask_F);
207 if (Paso_noError()) {
208 /* get A_FC block: */
209 out->A_FC=Paso_SparseMatrix_getSubmatrix(A_p,out->n_F,out->n_C,out->rows_in_F,out->mask_C);
210 /* get A_CC block: */
211 if (Paso_noError()) {
212 schur=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_C,out->rows_in_C,out->mask_C);
213
214 /*find the pattern of the schur complement with fill in*/
215 schur_withFillIn=Paso_SparseMatrix_alloc(A_p->type,Paso_Pattern_binop(PATTERN_FORMAT_DEFAULT, schur->pattern, Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,out->A_CF->pattern,out->A_FC->pattern)),1,1);
216
217 /* copy values over*/
218 #pragma omp for private(i,iPtr,iPtr_s,j,j0) schedule(static)
219 for (i = 0; i < schur_withFillIn->numRows; ++i) {
220 for (iPtr=schur_withFillIn->pattern->ptr[i];iPtr<schur_withFillIn->pattern->ptr[i + 1]; ++iPtr) {
221 j=schur_withFillIn->pattern->index[iPtr];
222 schur_withFillIn->val[iPtr]=0.;
223 for (iPtr_s=schur->pattern->ptr[i];iPtr_s<schur->pattern->ptr[i + 1]; ++iPtr_s){
224 j0=schur->pattern->index[iPtr_s];
225 if (j==j0) {
226 schur_withFillIn->val[iPtr]=schur->val[iPtr_s];
227 break;
228 }
229 }
230 }
231 }
232
233 /* for (i = 0; i < schur_withFillIn->numRows; ++i) {
234 for (iPtr=schur_withFillIn->pattern->ptr[i];iPtr<schur_withFillIn->pattern->ptr[i + 1]; ++iPtr) {
235 j=schur_withFillIn->pattern->index[iPtr];
236 fprintf(stderr,"A_CC[%d,%d]=%.2f ",i,j,schur_withFillIn->val[iPtr]);
237 }
238 fprintf(stderr,"\n");
239 }*/
240 time0=Paso_timer()-time0;
241 if (Paso_noError()) {
242 time1=Paso_timer();
243 /* update A_CC block to get Schur complement and then apply AMG to it */
244 Paso_Solver_updateIncompleteSchurComplement(schur_withFillIn,out->A_CF,out->inv_A_FF,out->A_FF_pivot,out->A_FC);
245 time1=Paso_timer()-time1;
246 out->AMG_of_Schur=Paso_Solver_getAMG(schur_withFillIn,verbose);
247
248 Paso_SparseMatrix_free(schur);
249 Paso_SparseMatrix_free(schur_withFillIn);
250 }
251 /* allocate work arrays for AMG application */
252 if (Paso_noError()) {
253 out->x_F=MEMALLOC(n_block*out->n_F,double);
254 out->b_F=MEMALLOC(n_block*out->n_F,double);
255 out->x_C=MEMALLOC(n_block*out->n_C,double);
256 out->b_C=MEMALLOC(n_block*out->n_C,double);
257 if (! (Paso_checkPtr(out->x_F) || Paso_checkPtr(out->b_F) || Paso_checkPtr(out->x_C) || Paso_checkPtr(out->b_C) ) ) {
258 #pragma omp parallel
259 {
260 #pragma omp for private(i,k) schedule(static)
261 for (i = 0; i < out->n_F; ++i) {
262 for (k=0; k<n_block;++k) {
263 out->x_F[i*n_block+k]=0.;
264 out->b_F[i*n_block+k]=0.;
265 }
266 }
267 #pragma omp for private(i,k) schedule(static)
268 for (i = 0; i < out->n_C; ++i) {
269 for (k=0; k<n_block;++k) {
270 out->x_C[i*n_block+k]=0.;
271 out->b_C[i*n_block+k]=0.;
272 }
273 }
274 } /* end parallel region */
275 }
276 }
277 }
278 }
279 }
280 }
281 }
282 }
283 }
284 }
285 TMPMEMFREE(mis_marker);
286 TMPMEMFREE(counter);
287 TMPMEMFREE(rs);
288 if (Paso_noError()) {
289 if (verbose) {
290 printf("AMG: %d unknowns eliminated. %d left.\n",out->n_F,n-out->n_F);
291 if (out->n_C>0) {
292 printf("timing: AMG: MIS/reordering/elemination : %e/%e/%e\n",time2,time0,time1);
293 } else {
294 printf("timing: AMG: MIS: %e\n",time2);
295 }
296 }
297 return out;
298 } else {
299 Paso_Solver_AMG_free(out);
300 return NULL;
301 }
302 }
303
304 /**************************************************************/
305
306 /* apply AMG precondition b-> x
307
308 in fact it solves
309
310 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ] [ x_F ] = [b_F]
311 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ] [ x_C ] = [b_C]
312
313 in the form
314
315 b->[b_F,b_C]
316 x_F=invA_FF*b_F
317 b_C=b_C-A_CF*x_F
318 x_C=AMG(b_C)
319 b_F=b_F-A_FC*x_C
320 x_F=invA_FF*b_F
321 x<-[x_F,x_C]
322
323 should be called within a parallel region
324 barrier synconization should be performed to make sure that the input vector available
325
326 */
327
328 void Paso_Solver_solveAMG(Paso_Solver_AMG * amg, double * x, double * b) {
329 dim_t i,k;
330 dim_t n_block=amg->n_block;
331
332 if (amg->n_C==0) {
333 /* x=invA_FF*b */
334 Paso_Solver_applyBlockDiagonalMatrix(n_block,amg->n_F,amg->inv_A_FF,amg->A_FF_pivot,x,b);
335 } else {
336 /* presmoothing on (Shure, x, b, r) */
337 /* b->[b_F,b_C] */
338 if (n_block==1) {
339 #pragma omp parallel for private(i) schedule(static)
340 for (i=0;i<amg->n_F;++i) amg->b_F[i]=b[amg->rows_in_F[i]];
341 #pragma omp parallel for private(i) schedule(static)
342 for (i=0;i<amg->n_C;++i) amg->b_C[i]=b[amg->rows_in_C[i]];
343 } else {
344 #pragma omp parallel for private(i,k) schedule(static)
345 for (i=0;i<amg->n_F;++i)
346 for (k=0;k<n_block;k++) amg->b_F[amg->n_block*i+k]=b[n_block*amg->rows_in_F[i]+k];
347 #pragma omp parallel for private(i,k) schedule(static)
348 for (i=0;i<amg->n_C;++i)
349 for (k=0;k<n_block;k++) amg->b_C[amg->n_block*i+k]=b[n_block*amg->rows_in_C[i]+k];
350 }
351 /* x_F=invA_FF*b_F */
352 Paso_Solver_applyBlockDiagonalMatrix(n_block,amg->n_F,amg->inv_A_FF,amg->A_FF_pivot,amg->x_F,amg->b_F);
353 /* b_C=b_C-A_CF*x_F */
354 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A_CF,amg->x_F,1.,amg->b_C);
355 /* x_C=AMG(b_C) */
356 Paso_Solver_solveAMG(amg->AMG_of_Schur,amg->x_C,amg->b_C);
357 /* b_F=b_F-A_FC*x_C */
358 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amg->A_FC,amg->x_C,1.,amg->b_F);
359 /* x_F=invA_FF*b_F */
360 Paso_Solver_applyBlockDiagonalMatrix(n_block,amg->n_F,amg->inv_A_FF,amg->A_FF_pivot,amg->x_F,amg->b_F);
361 /* x<-[x_F,x_C] */
362 if (n_block==1) {
363 #pragma omp parallel for private(i) schedule(static)
364 for (i=0;i<amg->n;++i) {
365 if (amg->mask_C[i]>-1) {
366 x[i]=amg->x_C[amg->mask_C[i]];
367 } else {
368 x[i]=amg->x_F[amg->mask_F[i]];
369 }
370 }
371 } else {
372 #pragma omp parallel for private(i,k) schedule(static)
373 for (i=0;i<amg->n;++i) {
374 if (amg->mask_C[i]>-1) {
375 for (k=0;k<n_block;k++) x[n_block*i+k]=amg->x_C[n_block*amg->mask_C[i]+k];
376 } else {
377 for (k=0;k<n_block;k++) x[n_block*i+k]=amg->x_F[n_block*amg->mask_F[i]+k];
378 }
379 }
380 }
381 /* all done */
382 }
383 return;
384 }
385
386 /*
387 * $Log$
388 *
389 */

  ViewVC Help
Powered by ViewVC 1.1.26