/[escript]/trunk/paso/src/AML.c
ViewVC logotype

Contents of /trunk/paso/src/AML.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3284 - (show annotations)
Mon Oct 18 23:48:04 2010 UTC (9 years ago) by gross
File MIME type: text/plain
File size: 30372 byte(s)
old AMLI code collected in a single file. It does not compile and should be merged with RecILU
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: AML preconditioner:
18
19 This is just a collection of older code. This does not compile.
20
21 */
22
23 /**************************************************************/
24
25 /* Author: artak@uq.edu.au */
26
27 /**************************************************************/
28
29 #include "Paso.h"
30 #include "Preconditioner.h"
31 #include "Options.h"
32 #include "PasoUtil.h"
33 #include "UMFPACK.h"
34 #include "MKL.h"
35 #include "SystemMatrix.h"
36 #include "Coarsening.h"
37 #include "BlockOps.h"
38
39 #ifndef INC_COARSE
40 #define INC_COARSE
41
42 #include "SystemMatrix.h"
43
44
45 /* Remove:
46 #define PASO_COARSENING_IN_F TRUE
47 #define PASO_COARSENING_IN_C FALSE
48 */
49
50 void Paso_Coarsening_Local(index_t* marker_F, Paso_SparseMatrix* A, Paso_Options* options);
51 void Paso_Coarsening_Local_Aggregation(Paso_SparseMatrix* A, index_t* marker_F, const double theta);
52 void Paso_Coarsening_Local_Aggregation_blk(Paso_SparseMatrix* A, index_t* marker_F, const double theta);
53 void Paso_Coarsening_Local_YS(Paso_SparseMatrix* A, index_t* marker_F, const double theta);
54 void Paso_Coarsening_Local_YS_blk(Paso_SparseMatrix* A, index_t* marker_F, const double theta);
55
56 void Paso_Coarsening_Local_RS(Paso_SparseMatrix* A, index_t* marker_F, double theta);
57
58 void Paso_Coarsening_Local_Partition(Paso_Pattern* pattern,index_t* marker_F);
59 void Paso_Coarsening_Local_greedy(Paso_Pattern* pattern, index_t* marker_F);
60
61
62
63 /*=== REVISE ============*/
64 void Paso_Coarsening_Local_greedy_color(Paso_Pattern* pattern, index_t* marker_F);
65 void Paso_Coarsening_Local_greedy_diag(Paso_SparseMatrix* A, index_t* marker_F, double thershold);
66
67 void Paso_Coarsening_Local_YS_plus(Paso_SparseMatrix* A, index_t* marker_F, double alpha, double taw, double delta);
68 void Paso_Coarsening_Local_Standard(Paso_SparseMatrix* A, index_t* marker_F, double theta);
69 void Paso_Coarsening_Local_greedy_RS(Paso_SparseMatrix* A, index_t* marker_F, double theta);
70 void Paso_Coarsening_Local_greedy_Agg(Paso_SparseMatrix* A, index_t* marker_F, double theta);
71 /*dim_t how_many(dim_t n,dim_t* S_i, int value1, dim_t* addedSet, int value2);*/
72 void Paso_Coarsening_Local_Standard_Block(Paso_SparseMatrix* A, index_t* marker_F, double theta);
73
74 dim_t how_many(dim_t i,Paso_Pattern * S, bool_t transpose);
75 dim_t arg_max(dim_t n, dim_t* lambda, dim_t mask);
76 Paso_Pattern* Paso_Coarsening_Local_getTranspose(Paso_Pattern* P);
77
78 void Paso_Coarsening_Local_getReport(dim_t n,index_t* marker_F);
79 void Paso_Coarsening_Local_Read(char *fileName,dim_t n,index_t* marker_F);
80 void Paso_Coarsening_Local_Write(char *fileName,dim_t n,index_t* marker_F);
81
82
83 #endif
84
85
86 /*************************************************************,amli->b_F*/
87
88 /* free all memory used by AMLI */
89
90 void Paso_Solver_AMLI_System_free(Paso_Solver_AMLI_System * in) {
91 dim_t i;
92 if (in!=NULL) {
93 for (i=0;i<in->block_size;++i) {
94 Paso_Solver_AMLI_free(in->amliblock[i]);
95 Paso_SparseMatrix_free(in->block[i]);
96 }
97 MEMFREE(in);
98 }
99 }
100
101
102 /* free all memory used by AMLI */
103
104 void Paso_Solver_AMLI_free(Paso_Solver_AMLI * in) {
105 if (in!=NULL) {
106 Paso_Preconditioner_LocalSmoother_free(in->Smoother);
107
108 MEMFREE(in->inv_A_FF);
109 MEMFREE(in->A_FF_pivot);
110 Paso_SparseMatrix_free(in->A_FC);
111 Paso_SparseMatrix_free(in->A_CF);
112 Paso_SparseMatrix_free(in->A);
113 if(in->coarsest_level==TRUE) {
114 #ifdef MKL
115 Paso_MKL_free1(in->AOffset1);
116 Paso_SparseMatrix_free(in->AOffset1);
117 #else
118 #ifdef UMFPACK
119 Paso_UMFPACK1_free((Paso_UMFPACK_Handler*)(in->solver));
120 #endif
121 #endif
122 }
123 MEMFREE(in->rows_in_F);
124 MEMFREE(in->rows_in_C);
125 MEMFREE(in->mask_F);
126 MEMFREE(in->mask_C);
127 MEMFREE(in->x_F);
128 MEMFREE(in->b_F);
129 MEMFREE(in->x_C);
130 MEMFREE(in->b_C);
131 in->solver=NULL;
132 Paso_Solver_AMLI_free(in->AMLI_of_Schur);
133 MEMFREE(in->b_C);
134 MEMFREE(in);
135 }
136 }
137
138 /**************************************************************/
139
140 /* constructs the block-block factorization of
141
142 [ A_FF A_FC ]
143 A_p=
144 [ A_CF A_FF ]
145
146 to
147
148 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ]
149 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ]
150
151
152 where S=A_FF-ACF*invA_FF*A_FC within the shape of S
153
154 then AMLI is applied to S again until S becomes empty
155
156 */
157 Paso_Solver_AMLI* Paso_Solver_getAMLI(Paso_SparseMatrix *A_p,dim_t level,Paso_Options* options) {
158 Paso_Solver_AMLI* out=NULL;
159 Paso_Pattern* temp1=NULL;
160 Paso_Pattern* temp2=NULL;
161 bool_t verbose=options->verbose;
162 dim_t n=A_p->numRows;
163 dim_t n_block=A_p->row_block_size;
164 index_t* mis_marker=NULL;
165 index_t* counter=NULL;
166 index_t iPtr,*index, *where_p, iPtr_s;
167 dim_t i,j;
168 Paso_SparseMatrix * schur=NULL;
169 Paso_SparseMatrix * schur_withFillIn=NULL;
170 double S=0;
171
172
173 /* char filename[8];
174 sprintf(filename,"AMLILevel%d",level);
175
176 Paso_SparseMatrix_saveMM(A_p,filename);
177 */
178
179 /*Make sure we have block sizes 1*/
180 if (A_p->col_block_size>1) {
181 Esys_setError(TYPE_ERROR,"Paso_Solver_getAMLI: AMLI requires column block size 1.");
182 return NULL;
183 }
184 if (A_p->row_block_size>1) {
185 Esys_setError(TYPE_ERROR,"Paso_Solver_getAMLI: AMLI requires row block size 1.");
186 return NULL;
187 }
188 out=MEMALLOC(1,Paso_Solver_AMLI);
189 /* identify independend set of rows/columns */
190 mis_marker=TMPMEMALLOC(n,index_t);
191 counter=TMPMEMALLOC(n,index_t);
192 if ( !( Esys_checkPtr(mis_marker) || Esys_checkPtr(counter) || Esys_checkPtr(out)) ) {
193 out->AMLI_of_Schur=NULL;
194 out->inv_A_FF=NULL;
195 out->A_FF_pivot=NULL;
196 out->A_FC=NULL;
197 out->A_CF=NULL;
198 out->rows_in_F=NULL;
199 out->rows_in_C=NULL;
200 out->mask_F=NULL;
201 out->mask_C=NULL;
202 out->x_F=NULL;
203 out->b_F=NULL;
204 out->x_C=NULL;
205 out->b_C=NULL;
206 out->A=Paso_SparseMatrix_getReference(A_p);
207 out->Smoother=NULL;
208 out->solver=NULL;
209 /*out->GS=Paso_Solver_getGS(A_p,verbose);*/
210 out->level=level;
211 out->n=n;
212 out->n_F=n+1;
213 out->n_block=n_block;
214 out->post_sweeps=options->post_sweeps;
215 out->pre_sweeps=options->pre_sweeps;
216
217 if (level==0 || n<=options->min_coarse_matrix_size) {
218 out->coarsest_level=TRUE;
219 #ifdef MKL
220 out->AOffset1=Paso_SparseMatrix_alloc(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, out->A->pattern,1,1, FALSE);
221 #pragma omp parallel for private(i) schedule(static)
222 for (i=0;i<out->A->len;++i) {
223 out->AOffset1->val[i]=out->A->val[i];
224 }
225 #else
226 #ifdef UMFPACK
227 #else
228 out->Smoother=Paso_Preconditioner_LocalSmoother_alloc(A_p,TRUE,verbose);
229 #endif
230 #endif
231 } else {
232 out->coarsest_level=FALSE;
233 out->Smoother=Paso_Preconditioner_LocalSmoother_alloc(A_p,TRUE,verbose);
234
235 Paso_Coarsening_Local(mis_marker,A_p, options->coarsening_threshold, options->coarsening_method);
236
237 #pragma omp parallel for private(i) schedule(static)
238 for (i = 0; i < n; ++i) {
239 mis_marker[i]=(mis_marker[i]== PASO_COARSENING_IN_F);
240 counter[i]=mis_marker[i];
241
242 }
243
244 out->n_F=Paso_Util_cumsum(n,counter);
245
246 if (out->n_F==0) {
247 out->coarsest_level=TRUE;
248 level=0;
249 if (verbose) {
250 /*printf("AMLI coarsening eliminates all unknowns, switching to Jacobi preconditioner.\n");*/
251 printf("AMLI coarsening does not eliminate any of the unknowns, switching to Jacobi preconditioner.\n");
252 }
253 }
254 else if (out->n_F==n) {
255 out->coarsest_level=TRUE;
256 level=0;
257 if (verbose) {
258 /*printf("AMLI coarsening eliminates all unknowns, switching to Jacobi preconditioner.\n");*/
259 printf("AMLI coarsening eliminates all of the unknowns, switching to Jacobi preconditioner.\n");
260
261 }
262 } else {
263
264 if (Esys_noError()) {
265
266 /*#pragma omp parallel for private(i) schedule(static)
267 for (i = 0; i < n; ++i) counter[i]=mis_marker[i];
268 out->n_F=Paso_Util_cumsum(n,counter);
269 */
270
271 /*if(level==3) {
272 printf("##TOTAL: %d, ELIMINATED: %d\n",n,out->n_F);
273 for (i = 0; i < n; ++i) {
274 printf("##%d %d\n",i,mis_marker[i]);
275 }
276 }*/
277
278 out->mask_F=MEMALLOC(n,index_t);
279 out->rows_in_F=MEMALLOC(out->n_F,index_t);
280 out->inv_A_FF=MEMALLOC(n_block*n_block*out->n_F,double);
281 out->A_FF_pivot=NULL; /* later use for block size>3 */
282 if (! (Esys_checkPtr(out->mask_F) || Esys_checkPtr(out->inv_A_FF) || Esys_checkPtr(out->rows_in_F) ) ) {
283 /* creates an index for F from mask */
284 #pragma omp parallel for private(i) schedule(static)
285 for (i = 0; i < out->n_F; ++i) out->rows_in_F[i]=-1;
286 #pragma omp parallel for private(i) schedule(static)
287 for (i = 0; i < n; ++i) {
288 if (mis_marker[i]) {
289 out->rows_in_F[counter[i]]=i;
290 out->mask_F[i]=counter[i];
291 } else {
292 out->mask_F[i]=-1;
293 }
294 }
295
296 /* Compute row-sum for getting rs(A_FF)^-1*/
297 #pragma omp parallel for private(i,iPtr,j,S) schedule(static)
298 for (i = 0; i < out->n_F; ++i) {
299 S=0;
300 /*printf("[%d ]: [%d] -> ",i, out->rows_in_F[i]);*/
301 for (iPtr=A_p->pattern->ptr[out->rows_in_F[i]];iPtr<A_p->pattern->ptr[out->rows_in_F[i] + 1]; ++iPtr) {
302 j=A_p->pattern->index[iPtr];
303 /*if (j==out->rows_in_F[i]) printf("diagonal %e",A_p->val[iPtr]);*/
304 if (mis_marker[j])
305 S+=A_p->val[iPtr];
306 }
307 /*printf("-> %e \n",S);*/
308 out->inv_A_FF[i]=1./S;
309 }
310 }
311 }
312
313 /*check whether coarsening process actually makes sense to continue.
314 if coarse matrix at least smaller by 30% then continue, otherwise we stop.*/
315 if ((out->n_F*100/n)<30) {
316 level=1;
317 }
318
319 if ( Esys_noError()) {
320 /* if there are no nodes in the coarse level there is no more work to do */
321 out->n_C=n-out->n_F;
322
323 /*if (out->n_F>500) */
324 out->rows_in_C=MEMALLOC(out->n_C,index_t);
325 out->mask_C=MEMALLOC(n,index_t);
326 if (! (Esys_checkPtr(out->mask_C) || Esys_checkPtr(out->rows_in_C) ) ) {
327 /* creates an index for C from mask */
328 #pragma omp parallel for private(i) schedule(static)
329 for (i = 0; i < n; ++i) counter[i]=! mis_marker[i];
330 Paso_Util_cumsum(n,counter);
331 #pragma omp parallel for private(i) schedule(static)
332 for (i = 0; i < out->n_C; ++i) out->rows_in_C[i]=-1;
333 #pragma omp parallel for private(i) schedule(static)
334 for (i = 0; i < n; ++i) {
335 if (! mis_marker[i]) {
336 out->rows_in_C[counter[i]]=i;
337 out->mask_C[i]=counter[i];
338 } else {
339 out->mask_C[i]=-1;
340 }
341 }
342 }
343 }
344 if ( Esys_noError()) {
345 /* get A_CF block: */
346 out->A_CF=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_F,out->rows_in_C,out->mask_F);
347 /* get A_FC block: */
348 out->A_FC=Paso_SparseMatrix_getSubmatrix(A_p,out->n_F,out->n_C,out->rows_in_F,out->mask_C);
349 /* get A_CC block: */
350 schur=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_C,out->rows_in_C,out->mask_C);
351
352 }
353 if ( Esys_noError()) {
354 /*find the pattern of the schur complement with fill in*/
355 temp1=Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,out->A_CF->pattern,out->A_FC->pattern);
356 temp2=Paso_Pattern_binop(PATTERN_FORMAT_DEFAULT, schur->pattern, temp1);
357 schur_withFillIn=Paso_SparseMatrix_alloc(A_p->type,temp2,1,1, TRUE);
358 Paso_Pattern_free(temp1);
359 Paso_Pattern_free(temp2);
360 }
361 if ( Esys_noError()) {
362 /* copy values over*/
363 #pragma omp parallel for private(i,iPtr,j,iPtr_s,index,where_p) schedule(static)
364 for (i = 0; i < schur_withFillIn->numRows; ++i) {
365 for (iPtr=schur_withFillIn->pattern->ptr[i];iPtr<schur_withFillIn->pattern->ptr[i + 1]; ++iPtr) {
366 j=schur_withFillIn->pattern->index[iPtr];
367 iPtr_s=schur->pattern->ptr[i];
368 index=&(schur->pattern->index[iPtr_s]);
369 where_p=(index_t*)bsearch(&j,
370 index,
371 schur->pattern->ptr[i + 1]-schur->pattern->ptr[i],
372 sizeof(index_t),
373 Paso_comparIndex);
374 if (where_p!=NULL) {
375 schur_withFillIn->val[iPtr]=schur->val[iPtr_s+(index_t)(where_p-index)];
376 }
377 }
378 }
379 Paso_Solver_updateIncompleteSchurComplement(schur_withFillIn,out->A_CF,out->inv_A_FF,out->A_FF_pivot,out->A_FC);
380 out->AMLI_of_Schur=Paso_Solver_getAMLI(schur_withFillIn,level-1,options);
381 }
382 /* allocate work arrays for AMLI application */
383 if (Esys_noError()) {
384 out->x_F=MEMALLOC(n_block*out->n_F,double);
385 out->b_F=MEMALLOC(n_block*out->n_F,double);
386 out->x_C=MEMALLOC(n_block*out->n_C,double);
387 out->b_C=MEMALLOC(n_block*out->n_C,double);
388
389 if (! (Esys_checkPtr(out->x_F) || Esys_checkPtr(out->b_F) || Esys_checkPtr(out->x_C) || Esys_checkPtr(out->b_C) ) ) {
390 #pragma omp parallel for private(i) schedule(static)
391 for (i = 0; i < out->n_F; ++i) {
392 out->x_F[i]=0.;
393 out->b_F[i]=0.;
394 }
395 #pragma omp parallel for private(i) schedule(static)
396 for (i = 0; i < out->n_C; ++i) {
397 out->x_C[i]=0.;
398 out->b_C[i]=0.;
399 }
400 }
401 }
402 Paso_SparseMatrix_free(schur);
403 Paso_SparseMatrix_free(schur_withFillIn);
404 }
405 }
406 }
407 TMPMEMFREE(mis_marker);
408 TMPMEMFREE(counter);
409
410 if (Esys_noError()) {
411 if (verbose && level>0 && !out->coarsest_level) {
412 printf("AMLI: level: %d: %d unknowns eliminated. %d left.\n",level, out->n_F,out->n_C);
413 }
414 return out;
415 } else {
416 Paso_Solver_AMLI_free(out);
417 return NULL;
418 }
419 }
420
421 /**************************************************************/
422
423 /* apply AMLI precondition b-> x
424
425 in fact it solves
426
427 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FC ] [ x_F ] = [b_F]
428 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ] [ x_C ] = [b_C]
429
430 in the form
431
432 b->[b_F,b_C]
433 x_F=invA_FF*b_F
434 b_C=b_C-A_CF*x_F
435 x_C=AMLI(b_C)
436 b_F=b_F-A_FC*x_C
437 x_F=invA_FF*b_F
438 x<-[x_F,x_C]
439
440 should be called within a parallel region
441 barrier synconization should be performed to make sure that the input vector available
442
443 */
444
445 void Paso_Solver_solveAMLI(Paso_Solver_AMLI * amli, double * x, double * b) {
446 dim_t i;
447 double time0=0;
448 double *r=NULL, *x0=NULL,*x_F_temp=NULL;
449 bool_t verbose=0;
450
451 dim_t post_sweeps=amli->post_sweeps;
452 dim_t pre_sweeps=amli->pre_sweeps;
453
454 #ifdef UMFPACK
455 Paso_UMFPACK_Handler * ptr=NULL;
456 #endif
457
458 r=MEMALLOC(amli->n,double);
459 x0=MEMALLOC(amli->n,double);
460 x_F_temp=MEMALLOC(amli->n_F,double);
461
462 if (amli->coarsest_level) {
463
464 time0=Esys_timer();
465 /*If all unknown are eliminated then Jacobi is the best preconditioner*/
466 if (amli->n_F==0 || amli->n_F==amli->n) {
467 Paso_Preconditioner_LocalSmoother_solve(amli->A, amli->Smoother,x,b,1,FALSE);
468 }
469 else {
470 #ifdef MKL
471 Paso_MKL1(amli->AOffset1,x,b,verbose);
472 #else
473 #ifdef UMFPACK
474 ptr=(Paso_UMFPACK_Handler *)(amli->solver);
475 Paso_UMFPACK1(&ptr,amli->A,x,b,verbose);
476 amli->solver=(void*) ptr;
477 #else
478 Paso_Preconditioner_LocalSmoother_solve(amli->A,amli->Smoother,x,b,1,FALSE);
479 #endif
480 #endif
481 }
482 time0=Esys_timer()-time0;
483 if (verbose) fprintf(stderr,"timing: DIRECT SOLVER: %e\n",time0);
484
485 } else {
486 /* presmoothing */
487 time0=Esys_timer();
488 Paso_Preconditioner_LocalSmoother_solve(amli->A,amli->Smoother,x,b,pre_sweeps,FALSE);
489 time0=Esys_timer()-time0;
490 if (verbose) fprintf(stderr,"timing: Presmooting: %e\n",time0);
491 /* end of presmoothing */
492
493
494 time0=Esys_timer();
495 #pragma omp parallel for private(i) schedule(static)
496 for (i=0;i<amli->n;++i) r[i]=b[i];
497
498 /*r=b-Ax*/
499 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amli->A,x,1.,r);
500
501 /* r->[b_F,b_C] */
502 #pragma omp parallel for private(i) schedule(static)
503 for (i=0;i<amli->n_F;++i) amli->b_F[i]=r[amli->rows_in_F[i]];
504
505 #pragma omp parallel for private(i) schedule(static)
506 for (i=0;i<amli->n_C;++i) amli->b_C[i]=r[amli->rows_in_C[i]];
507
508 /* x_F=invA_FF*b_F */
509 Paso_Copy(amli->n_F, amli->x_F,amli->b_F);
510 Paso_BlockOps_allMV(1,amli->n_F,amli->inv_A_FF,amli->A_FF_pivot,amli->x_F);
511
512 /* b_C=b_C-A_CF*x_F */
513 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amli->A_CF,amli->x_F,1.,amli->b_C);
514
515 time0=Esys_timer()-time0;
516 if (verbose) fprintf(stderr,"timing: Before next level: %e\n",time0);
517
518 /* x_C=AMLI(b_C) */
519 Paso_Solver_solveAMLI(amli->AMLI_of_Schur,amli->x_C,amli->b_C);
520
521 time0=Esys_timer();
522
523 /* b_F=-A_FC*x_C */
524 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amli->A_FC,amli->x_C,0.,amli->b_F);
525 /* x_F_temp=invA_FF*b_F */
526 Paso_Copy(amli->n_F, x_F_temp,amli->b_F);
527 Paso_BlockOps_allMV(1,amli->n_F,amli->inv_A_FF,amli->A_FF_pivot,x_F_temp);
528
529 #pragma omp parallel for private(i) schedule(static)
530 for (i=0;i<amli->n_F;++i) {
531 amli->x_F[i]+=x_F_temp[i];
532 }
533
534 /* x<-[x_F,x_C] */
535 #pragma omp parallel for private(i) schedule(static)
536 for (i=0;i<amli->n;++i) {
537 if (amli->mask_C[i]>-1) {
538 x[i]+=amli->x_C[amli->mask_C[i]];
539 } else {
540 x[i]+=amli->x_F[amli->mask_F[i]];
541 }
542 }
543
544 time0=Esys_timer()-time0;
545 if (verbose) fprintf(stderr,"timing: After next level: %e\n",time0);
546
547 /*postsmoothing*/
548 time0=Esys_timer();
549 Paso_Preconditioner_LocalSmoother_solve(amli->A,amli->Smoother,x,b,post_sweeps,TRUE);
550 time0=Esys_timer()-time0;
551 if (verbose) fprintf(stderr,"timing: Postsmoothing: %e\n",time0);
552
553 /*end of postsmoothing*/
554
555 }
556 MEMFREE(r);
557 MEMFREE(x0);
558 MEMFREE(x_F_temp);
559 return;
560 }
561
562 /*******************************************************
563 *
564 * Copyright (c) 2003-2010 by University of Queensland
565 * Earth Systems Science Computational Center (ESSCC)
566 * http://www.uq.edu.au/esscc
567 *
568 * Primary Business: Queensland, Australia
569 * Licensed under the Open Software License version 3.0
570 * http://www.opensource.org/licenses/osl-3.0.php
571 *
572 *******************************************************/
573
574
575 /*************************************************************
576
577 Paso: Coarsening strategies (no MPI)
578
579 the given matrix A is splitted in to the form
580
581
582 [ A_FF A_FC ]
583 A = [ ]
584 [ A_CF A_CC ]
585
586 such that unknowns/equations in set C are weakly connected via A_CC and strongly connected
587 to at least one unknowns/equation in F via A_CF and A_FC. The unknowns/equations in C and F
588 are marked in the marker_F by FALSE and TRUE respectively.
589
590 The weak/strong connection is controlled by coarsening_threshold.
591
592 three strategies are implemented:
593
594 a) YAIR_SHAPIRA (YS): |a_ij| >= theta * |a_ii|
595 b) Ruge-Stueben (RS): |a_ij| >= theta * max_(k<>i) |a_ik|
596 c) Aggregation : |a_ij|^2 >= theta**2 * |a_ii||a_jj|
597
598 where theta = coarsening_threshold/maximum_pattern_degree
599
600 Remark:
601
602 - a strong connection in YAIR_SHAPIRA is a strong connection in Aggregation
603
604 */
605 /**************************************************************
606
607 Author: artak@uq.edu.au, l.gross@uq.edu.au
608
609 **************************************************************/
610
611 #include "Coarsening.h"
612 #include "PasoUtil.h"
613 #include <limits.h>
614
615
616
617 /*Used in Paso_Coarsening_Local_RS_MI*/
618
619 /*Computes how many connections unknown i have in S.
620 bool_t transpose - TRUE if we want to compute how many strong connection of i in S^T, FALSE otherwise.
621 Note that if we first transpose S and then call method on S^T then "transpose" should be set to FALSE.
622 */
623 dim_t how_many(dim_t i,Paso_Pattern * S, bool_t transpose) {
624 dim_t j,n;
625 dim_t total,ltotal;
626 index_t *index,*where_p;
627
628 /*index_t iptr;*/
629 total=0;
630 ltotal=0;
631
632 n=S->numOutput;
633
634 if(transpose) {
635 #pragma omp parallel
636 {
637 ltotal=0;
638 #pragma omp for private(j,index,where_p,ltotal) schedule(static)
639 for (j=0;j<n;++j) {
640 index=&(S->index[S->ptr[j]]);
641 where_p=(index_t*)bsearch(&i,
642 index,
643 S->ptr[j + 1]-S->ptr[j],
644 sizeof(index_t),
645 Paso_comparIndex);
646 if (where_p!=NULL) {
647 ltotal++;
648 }
649 }
650 }
651 #pragma omp critical
652 {
653 total+=ltotal;
654 }
655
656 }
657 else {
658 total=S->ptr[i+1]-S->ptr[i];
659 /*#pragma omp parallel for private(iptr) schedule(static)*/
660 /*for (iptr=S->ptr[i];iptr<S->ptr[i+1]; ++iptr) {
661 if(S->index[iptr]!=i && marker_F[S->index[iptr]]==IS_AVAILABLE)
662 total++;
663 }*/
664
665 }
666
667 if (total==0) total=IS_NOT_AVAILABLE;
668
669 return total;
670 }
671
672
673
674
675
676 Paso_Pattern* Paso_Coarsening_Local_getTranspose(Paso_Pattern* P){
677
678 Paso_Pattern *outpattern=NULL;
679
680 dim_t C=P->numInput;
681 dim_t F=P->numOutput-C;
682 dim_t n=C+F;
683 dim_t i,j;
684 index_t iptr;
685 Paso_IndexListArray* index_list = Paso_IndexListArray_alloc(C);
686
687
688 /*#pragma omp parallel for private(i,iptr,j) schedule(static)*/
689 for (i=0;i<n;++i) {
690 for (iptr=P->ptr[i];iptr<P->ptr[i+1]; ++iptr) {
691 j=P->index[iptr];
692 Paso_IndexListArray_insertIndex(index_list,i,j);
693 }
694 }
695
696 outpattern=Paso_Pattern_fromIndexListArray(0, index_list,0,n,0);
697
698 /* clean up */
699 Paso_IndexListArray_free(index_list);
700
701 return outpattern;
702 }
703
704
705 /************** BLOCK COARSENENING *********************/
706
707 void Paso_Coarsening_Local_Standard_Block(Paso_SparseMatrix* A, index_t* marker_F, double theta)
708 {
709 const dim_t n=A->numRows;
710
711 dim_t i,j,k;
712 index_t iptr,jptr;
713 /*index_t *index,*where_p;*/
714 double threshold,max_offdiagonal;
715 dim_t *lambda; /*mesure of importance */
716 dim_t maxlambda=0;
717 index_t index_maxlambda=0;
718 double time0=0;
719 bool_t verbose=0;
720 dim_t n_block=A->row_block_size;
721
722 double fnorm=0;
723 dim_t bi;
724
725 Paso_Pattern *S=NULL;
726 Paso_Pattern *ST=NULL;
727 Paso_IndexListArray* index_list = Paso_IndexListArray_alloc(n);
728
729 time0=Esys_timer();
730 k=0;
731 /*Paso_Coarsening_Local_getReport(n,marker_F);*/
732 /*printf("Blocks %d %d\n",n_block,A->len);*/
733
734 /*S_i={j \in N_i; i strongly coupled to j}*/
735 #pragma omp parallel for private(i,bi,fnorm,iptr,max_offdiagonal,threshold,j) schedule(static)
736 for (i=0;i<n;++i) {
737 if(marker_F[i]==IS_AVAILABLE) {
738 max_offdiagonal = DBL_MIN;
739 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
740 j=A->pattern->index[iptr];
741 if( j != i){
742 fnorm=0;
743 for(bi=0;bi<n_block*n_block;++bi)
744 {
745 fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi];
746 }
747 fnorm=sqrt(fnorm);
748 max_offdiagonal = MAX(max_offdiagonal,fnorm);
749 }
750 }
751 threshold = theta*max_offdiagonal;
752 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
753 j=A->pattern->index[iptr];
754 if( j != i){
755 fnorm=0;
756 for(bi=0;bi<n_block*n_block;++bi)
757 {
758 fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi];
759 }
760 fnorm=sqrt(fnorm);
761 if(fnorm>=threshold) {
762 Paso_IndexListArray_insertIndex(index_list,i,j);
763 }
764 }
765 }
766 }
767 }
768
769 S=Paso_Pattern_fromIndexListArray(0,index_list,0,A->pattern->numInput,0);
770 ST=Paso_Coarsening_Local_getTranspose(S);
771
772 /*printf("Patterns len %d %d\n",S->len,ST->len);*/
773
774 time0=Esys_timer()-time0;
775 if (verbose) fprintf(stdout,"timing: RS filtering and pattern creation: %e\n",time0);
776
777 lambda=TMPMEMALLOC(n,dim_t);
778
779 #pragma omp parallel for private(i) schedule(static)
780 for (i=0;i<n;++i) { lambda[i]=IS_NOT_AVAILABLE; }
781
782 /*S_i={j \in N_i; i strongly coupled to j}*/
783
784 /*
785 #pragma omp parallel for private(i,iptr,lk) schedule(static)
786 for (i=0;i<n;++i) {
787 lk=0;
788 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
789 if(ABS(A->val[iptr])>1.e-15 && A->pattern->index[iptr]!=i )
790 lk++;
791 }
792 #pragma omp critical
793 k+=lk;
794 if(k==0) {
795 marker_F[i]=TRUE;
796 }
797 }
798 */
799
800
801 k=0;
802 maxlambda=0;
803
804 time0=Esys_timer();
805
806 for (i=0;i<n;++i) {
807 if(marker_F[i]==IS_AVAILABLE) {
808 lambda[i]=how_many(k,ST,FALSE); /* if every row is available then k and i are the same.*/
809 /*lambda[i]=how_many(i,S,TRUE);*/
810 /*printf("lambda[%d]=%d, k=%d \n",i,lambda[i],k);*/
811 k++;
812 if(maxlambda<lambda[i]) {
813 maxlambda=lambda[i];
814 index_maxlambda=i;
815 }
816 }
817 }
818
819 k=0;
820 time0=Esys_timer()-time0;
821 if (verbose) fprintf(stdout,"timing: Lambdas computations at the begining: %e\n",time0);
822
823 time0=Esys_timer();
824
825 /*Paso_Coarsening_Local_getReport(n,marker_F);*/
826
827 while (Paso_Util_isAny(n,marker_F,IS_AVAILABLE)) {
828
829 if(index_maxlambda<0) {
830 break;
831 }
832
833 i=index_maxlambda;
834 if(marker_F[i]==IS_AVAILABLE) {
835 marker_F[index_maxlambda]=FALSE;
836 lambda[index_maxlambda]=IS_NOT_AVAILABLE;
837 for (iptr=ST->ptr[i];iptr<ST->ptr[i+1]; ++iptr) {
838 j=ST->index[iptr];
839 if(marker_F[j]==IS_AVAILABLE) {
840 marker_F[j]=TRUE;
841 lambda[j]=IS_NOT_AVAILABLE;
842 for (jptr=S->ptr[j];jptr<S->ptr[j+1]; ++jptr) {
843 k=S->index[jptr];
844 if(marker_F[k]==IS_AVAILABLE) {
845 lambda[k]++;
846 }
847 }
848 }
849 }
850 for (iptr=S->ptr[i];iptr<S->ptr[i+1]; ++iptr) {
851 j=S->index[iptr];
852 if(marker_F[j]==IS_AVAILABLE) {
853 lambda[j]--;
854 }
855 }
856 }
857
858 /* Used when transpose of S is not available */
859 /*
860 for (i=0;i<n;++i) {
861 if(marker_F[i]==IS_AVAILABLE) {
862 if (i==index_maxlambda) {
863 marker_F[index_maxlambda]=FALSE;
864 lambda[index_maxlambda]=-1;
865 for (j=0;j<n;++j) {
866 if(marker_F[j]==IS_AVAILABLE) {
867 index=&(S->index[S->ptr[j]]);
868 where_p=(index_t*)bsearch(&i,
869 index,
870 S->ptr[j + 1]-S->ptr[j],
871 sizeof(index_t),
872 Paso_comparIndex);
873 if (where_p!=NULL) {
874 marker_F[j]=TRUE;
875 lambda[j]=-1;
876 for (iptr=S->ptr[j];iptr<S->ptr[j+1]; ++iptr) {
877 k=S->index[iptr];
878 if(marker_F[k]==IS_AVAILABLE) {
879 lambda[k]++;
880 }
881 }
882 }
883
884 }
885 }
886
887 }
888 }
889 }
890 */
891 index_maxlambda=arg_max(n,lambda, IS_NOT_AVAILABLE);
892 }
893
894 time0=Esys_timer()-time0;
895 if (verbose) fprintf(stdout,"timing: Loop : %e\n",time0);
896
897 /*Paso_Coarsening_Local_getReport(n,marker_F);*/
898
899 #pragma omp parallel for private(i) schedule(static)
900 for (i=0;i<n;++i)
901 if(marker_F[i]==IS_AVAILABLE) {
902 marker_F[i]=TRUE;
903 }
904
905 /*Paso_Coarsening_Local_getReport(n,marker_F);*/
906
907 TMPMEMFREE(lambda);
908
909 /* clean up */
910 Paso_IndexListArray_free(index_list);
911 Paso_Pattern_free(S);
912
913 /* swap to TRUE/FALSE in marker_F */
914 #pragma omp parallel for private(i) schedule(static)
915 for (i=0;i<n;i++) marker_F[i]=(marker_F[i]==TRUE)? TRUE : FALSE;
916
917 }
918
919
920
921 #undef IS_AVAILABLE
922 #undef IS_NOT_AVAILABLE
923 #undef TRUE
924 #undef FALSE
925
926 #undef IS_UNDECIDED
927 #undef IS_STRONG
928 #undef IS_WEAK
929
930 #undef TRUEB
931 #undef TRUEB
932

  ViewVC Help
Powered by ViewVC 1.1.26