/[escript]/trunk/paso/src/AML.cpp.old
ViewVC logotype

Contents of /trunk/paso/src/AML.cpp.old

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4869 - (show annotations)
Mon Apr 14 10:39:22 2014 UTC (5 years, 5 months ago) by caltinay
File size: 30416 byte(s)
all of paso now lives in its own namespace.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17
18 /****************************************************************************/
19
20 /* Paso: AML preconditioner:
21
22 This is just a collection of older code. This does not compile.
23 */
24
25 /****************************************************************************/
26
27 /* Author: artak@uq.edu.au */
28
29 /****************************************************************************/
30
31 #include "Paso.h"
32 #include "Preconditioner.h"
33 #include "Options.h"
34 #include "PasoUtil.h"
35 #include "UMFPACK.h"
36 #include "MKL.h"
37 #include "SystemMatrix.h"
38 #include "Coarsening.h"
39 #include "BlockOps.h"
40
41 #ifndef INC_COARSE
42 #define INC_COARSE
43
44 #include "SystemMatrix.h"
45
46
47 /* Remove:
48 #define PASO_COARSENING_IN_F TRUE
49 #define PASO_COARSENING_IN_C FALSE
50 */
51
52 void Coarsening_Local(index_t* marker_F, SparseMatrix* A, Options* options);
53 void Coarsening_Local_Aggregation(SparseMatrix* A, index_t* marker_F, const double theta);
54 void Coarsening_Local_Aggregation_blk(SparseMatrix* A, index_t* marker_F, const double theta);
55 void Coarsening_Local_YS(SparseMatrix* A, index_t* marker_F, const double theta);
56 void Coarsening_Local_YS_blk(SparseMatrix* A, index_t* marker_F, const double theta);
57
58 void Coarsening_Local_RS(SparseMatrix* A, index_t* marker_F, double theta);
59
60 void Coarsening_Local_Partition(Pattern* pattern,index_t* marker_F);
61 void Coarsening_Local_greedy(Pattern* pattern, index_t* marker_F);
62
63
64
65 /*=== REVISE ============*/
66 void Coarsening_Local_greedy_color(Pattern* pattern, index_t* marker_F);
67 void Coarsening_Local_greedy_diag(SparseMatrix* A, index_t* marker_F, double thershold);
68
69 void Coarsening_Local_YS_plus(SparseMatrix* A, index_t* marker_F, double alpha, double taw, double delta);
70 void Coarsening_Local_Standard(SparseMatrix* A, index_t* marker_F, double theta);
71 void Coarsening_Local_greedy_RS(SparseMatrix* A, index_t* marker_F, double theta);
72 void Coarsening_Local_greedy_Agg(SparseMatrix* A, index_t* marker_F, double theta);
73 /*dim_t how_many(dim_t n,dim_t* S_i, int value1, dim_t* addedSet, int value2);*/
74 void Coarsening_Local_Standard_Block(SparseMatrix* A, index_t* marker_F, double theta);
75
76 dim_t how_many(dim_t i,Pattern * S, bool_t transpose);
77 dim_t arg_max(dim_t n, dim_t* lambda, dim_t mask);
78 Pattern* Coarsening_Local_getTranspose(Pattern* P);
79
80 void Coarsening_Local_getReport(dim_t n,index_t* marker_F);
81 void Coarsening_Local_Read(char *fileName,dim_t n,index_t* marker_F);
82 void Coarsening_Local_Write(char *fileName,dim_t n,index_t* marker_F);
83
84
85 #endif
86
87
88 /***********************************************************************************,amli->b_F*/
89
90 /* free all memory used by AMLI */
91
92 void Solver_AMLI_System_free(Solver_AMLI_System * in) {
93 dim_t i;
94 if (in!=NULL) {
95 for (i=0;i<in->block_size;++i) {
96 Solver_AMLI_free(in->amliblock[i]);
97 SparseMatrix_free(in->block[i]);
98 }
99 delete in;
100 }
101 }
102
103
104 /* free all memory used by AMLI */
105
106 void Solver_AMLI_free(Solver_AMLI * in) {
107 if (in!=NULL) {
108 Preconditioner_LocalSmoother_free(in->Smoother);
109
110 delete[] in->inv_A_FF;
111 delete[] in->A_FF_pivot;
112 SparseMatrix_free(in->A_FC);
113 SparseMatrix_free(in->A_CF);
114 SparseMatrix_free(in->A);
115 if(in->coarsest_level==TRUE) {
116 #ifdef MKL
117 MKL_free1(in->AOffset1);
118 SparseMatrix_free(in->AOffset1);
119 #else
120 #ifdef UMFPACK
121 UMFPACK1_free((UMFPACK_Handler*)(in->solver));
122 #endif
123 #endif
124 }
125 delete[] in->rows_in_F;
126 delete[] in->rows_in_C;
127 delete[] in->mask_F;
128 delete[] in->mask_C;
129 delete[] in->x_F;
130 delete[] in->b_F;
131 delete[] in->x_C;
132 delete[] in->b_C;
133 in->solver=NULL;
134 Solver_AMLI_free(in->AMLI_of_Schur);
135 delete[] in->b_C;
136 delete in;
137 }
138 }
139
140 /************************************************************************************/
141
142 /* constructs the block-block factorization of
143
144 [ A_FF A_FC ]
145 A_p=
146 [ A_CF A_FF ]
147
148 to
149
150 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ]
151 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ]
152
153
154 where S=A_FF-ACF*invA_FF*A_FC within the shape of S
155
156 then AMLI is applied to S again until S becomes empty
157
158 */
159 Solver_AMLI* Solver_getAMLI(SparseMatrix *A_p,dim_t level,Options* options) {
160 Solver_AMLI* out=NULL;
161 Pattern* temp1=NULL;
162 Pattern* temp2=NULL;
163 bool_t verbose=options->verbose;
164 dim_t n=A_p->numRows;
165 dim_t n_block=A_p->row_block_size;
166 index_t* mis_marker=NULL;
167 index_t* counter=NULL;
168 index_t iPtr,*index, *where_p, iPtr_s;
169 dim_t i,j;
170 SparseMatrix * schur=NULL;
171 SparseMatrix * schur_withFillIn=NULL;
172 double S=0;
173
174 /* char filename[8];
175 sprintf(filename,"AMLILevel%d",level);
176
177 SparseMatrix_saveMM(A_p,filename);
178 */
179
180 /*Make sure we have block sizes 1*/
181 if (A_p->col_block_size>1) {
182 Esys_setError(TYPE_ERROR,"Solver_getAMLI: AMLI requires column block size 1.");
183 return NULL;
184 }
185 if (A_p->row_block_size>1) {
186 Esys_setError(TYPE_ERROR,"Solver_getAMLI: AMLI requires row block size 1.");
187 return NULL;
188 }
189 out=new Solver_AMLI;
190 /* identify independent set of rows/columns */
191 mis_marker=new index_t[n];
192 counter=new index_t[n];
193 if ( !( Esys_checkPtr(mis_marker) || Esys_checkPtr(counter) || Esys_checkPtr(out)) ) {
194 out->AMLI_of_Schur=NULL;
195 out->inv_A_FF=NULL;
196 out->A_FF_pivot=NULL;
197 out->A_FC=NULL;
198 out->A_CF=NULL;
199 out->rows_in_F=NULL;
200 out->rows_in_C=NULL;
201 out->mask_F=NULL;
202 out->mask_C=NULL;
203 out->x_F=NULL;
204 out->b_F=NULL;
205 out->x_C=NULL;
206 out->b_C=NULL;
207 out->A=SparseMatrix_getReference(A_p);
208 out->Smoother=NULL;
209 out->solver=NULL;
210 /*out->GS=Solver_getGS(A_p,verbose);*/
211 out->level=level;
212 out->n=n;
213 out->n_F=n+1;
214 out->n_block=n_block;
215 out->post_sweeps=options->post_sweeps;
216 out->pre_sweeps=options->pre_sweeps;
217
218 if (level==0 || n<=options->min_coarse_matrix_size) {
219 out->coarsest_level=TRUE;
220 #ifdef MKL
221 out->AOffset1=SparseMatrix_alloc(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, out->A->pattern,1,1, FALSE);
222 #pragma omp parallel for private(i) schedule(static)
223 for (i=0;i<out->A->len;++i) {
224 out->AOffset1->val[i]=out->A->val[i];
225 }
226 #else
227 #ifdef UMFPACK
228 #else
229 out->Smoother=Preconditioner_LocalSmoother_alloc(A_p,TRUE,verbose);
230 #endif
231 #endif
232 } else {
233 out->coarsest_level=FALSE;
234 out->Smoother=Preconditioner_LocalSmoother_alloc(A_p,TRUE,verbose);
235
236 Coarsening_Local(mis_marker,A_p, options->coarsening_threshold, options->coarsening_method);
237
238 #pragma omp parallel for private(i) schedule(static)
239 for (i = 0; i < n; ++i) {
240 mis_marker[i]=(mis_marker[i]== PASO_COARSENING_IN_F);
241 counter[i]=mis_marker[i];
242
243 }
244
245 out->n_F=Util_cumsum(n,counter);
246
247 if (out->n_F==0) {
248 out->coarsest_level=TRUE;
249 level=0;
250 if (verbose) {
251 /*printf("AMLI coarsening eliminates all unknowns, switching to Jacobi preconditioner.\n");*/
252 printf("AMLI coarsening does not eliminate any of the unknowns, switching to Jacobi preconditioner.\n");
253 }
254 }
255 else if (out->n_F==n) {
256 out->coarsest_level=TRUE;
257 level=0;
258 if (verbose) {
259 /*printf("AMLI coarsening eliminates all unknowns, switching to Jacobi preconditioner.\n");*/
260 printf("AMLI coarsening eliminates all of the unknowns, switching to Jacobi preconditioner.\n");
261
262 }
263 } else {
264 if (Esys_noError()) {
265 /*#pragma omp parallel for private(i) schedule(static)
266 for (i = 0; i < n; ++i) counter[i]=mis_marker[i];
267 out->n_F=Util_cumsum(n,counter);
268 */
269
270 /*if(level==3) {
271 printf("##TOTAL: %d, ELIMINATED: %d\n",n,out->n_F);
272 for (i = 0; i < n; ++i) {
273 printf("##%d %d\n",i,mis_marker[i]);
274 }
275 }*/
276
277 out->mask_F=new index_t[n];
278 out->rows_in_F=new index_t[out->n_F];
279 out->inv_A_FF=new double[n_block*n_block*out->n_F];
280 out->A_FF_pivot=NULL; /* later use for block size>3 */
281 if (! (Esys_checkPtr(out->mask_F) || Esys_checkPtr(out->inv_A_FF) || Esys_checkPtr(out->rows_in_F) ) ) {
282 /* creates an index for F from mask */
283 #pragma omp parallel for private(i) schedule(static)
284 for (i = 0; i < out->n_F; ++i) out->rows_in_F[i]=-1;
285 #pragma omp parallel for private(i) schedule(static)
286 for (i = 0; i < n; ++i) {
287 if (mis_marker[i]) {
288 out->rows_in_F[counter[i]]=i;
289 out->mask_F[i]=counter[i];
290 } else {
291 out->mask_F[i]=-1;
292 }
293 }
294
295 /* Compute row-sum for getting rs(A_FF)^-1*/
296 #pragma omp parallel for private(i,iPtr,j,S) schedule(static)
297 for (i = 0; i < out->n_F; ++i) {
298 S=0;
299 /*printf("[%d ]: [%d] -> ",i, out->rows_in_F[i]);*/
300 for (iPtr=A_p->pattern->ptr[out->rows_in_F[i]];iPtr<A_p->pattern->ptr[out->rows_in_F[i] + 1]; ++iPtr) {
301 j=A_p->pattern->index[iPtr];
302 /*if (j==out->rows_in_F[i]) printf("diagonal %e",A_p->val[iPtr]);*/
303 if (mis_marker[j])
304 S+=A_p->val[iPtr];
305 }
306 /*printf("-> %e \n",S);*/
307 out->inv_A_FF[i]=1./S;
308 }
309 }
310 }
311
312 /*check whether coarsening process actually makes sense to continue.
313 if coarse matrix at least smaller by 30% then continue, otherwise we stop.*/
314 if ((out->n_F*100/n)<30) {
315 level=1;
316 }
317
318 if ( Esys_noError()) {
319 /* if there are no nodes in the coarse level there is no more work to do */
320 out->n_C=n-out->n_F;
321
322 /*if (out->n_F>500) */
323 out->rows_in_C=new index_t[out->n_C];
324 out->mask_C=new index_t[n];
325 if (! (Esys_checkPtr(out->mask_C) || Esys_checkPtr(out->rows_in_C) ) ) {
326 /* creates an index for C from mask */
327 #pragma omp parallel for private(i) schedule(static)
328 for (i = 0; i < n; ++i) counter[i]=! mis_marker[i];
329 Util_cumsum(n,counter);
330 #pragma omp parallel for private(i) schedule(static)
331 for (i = 0; i < out->n_C; ++i) out->rows_in_C[i]=-1;
332 #pragma omp parallel for private(i) schedule(static)
333 for (i = 0; i < n; ++i) {
334 if (! mis_marker[i]) {
335 out->rows_in_C[counter[i]]=i;
336 out->mask_C[i]=counter[i];
337 } else {
338 out->mask_C[i]=-1;
339 }
340 }
341 }
342 }
343 if ( Esys_noError()) {
344 /* get A_CF block: */
345 out->A_CF=SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_F,out->rows_in_C,out->mask_F);
346 /* get A_FC block: */
347 out->A_FC=SparseMatrix_getSubmatrix(A_p,out->n_F,out->n_C,out->rows_in_F,out->mask_C);
348 /* get A_CC block: */
349 schur=SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_C,out->rows_in_C,out->mask_C);
350 }
351 if ( Esys_noError()) {
352 /*find the pattern of the schur complement with fill in*/
353 temp1=Pattern_multiply(PATTERN_FORMAT_DEFAULT,out->A_CF->pattern,out->A_FC->pattern);
354 temp2=Pattern_binop(PATTERN_FORMAT_DEFAULT, schur->pattern, temp1);
355 schur_withFillIn=SparseMatrix_alloc(A_p->type,temp2,1,1, TRUE);
356 Pattern_free(temp1);
357 Pattern_free(temp2);
358 }
359 if ( Esys_noError()) {
360 /* copy values over*/
361 #pragma omp parallel for private(i,iPtr,j,iPtr_s,index,where_p) schedule(static)
362 for (i = 0; i < schur_withFillIn->numRows; ++i) {
363 for (iPtr=schur_withFillIn->pattern->ptr[i];iPtr<schur_withFillIn->pattern->ptr[i + 1]; ++iPtr) {
364 j=schur_withFillIn->pattern->index[iPtr];
365 iPtr_s=schur->pattern->ptr[i];
366 index=&(schur->pattern->index[iPtr_s]);
367 where_p=(index_t*)bsearch(&j,
368 index,
369 schur->pattern->ptr[i + 1]-schur->pattern->ptr[i],
370 sizeof(index_t),
371 comparIndex);
372 if (where_p!=NULL) {
373 schur_withFillIn->val[iPtr]=schur->val[iPtr_s+(index_t)(where_p-index)];
374 }
375 }
376 }
377 Solver_updateIncompleteSchurComplement(schur_withFillIn,out->A_CF,out->inv_A_FF,out->A_FF_pivot,out->A_FC);
378 out->AMLI_of_Schur=Solver_getAMLI(schur_withFillIn,level-1,options);
379 }
380 /* allocate work arrays for AMLI application */
381 if (Esys_noError()) {
382 out->x_F=new double[n_block*out->n_F];
383 out->b_F=new double[n_block*out->n_F];
384 out->x_C=new double[n_block*out->n_C];
385 out->b_C=new double[n_block*out->n_C];
386
387 if (! (Esys_checkPtr(out->x_F) || Esys_checkPtr(out->b_F) || Esys_checkPtr(out->x_C) || Esys_checkPtr(out->b_C) ) ) {
388 #pragma omp parallel for private(i) schedule(static)
389 for (i = 0; i < out->n_F; ++i) {
390 out->x_F[i]=0.;
391 out->b_F[i]=0.;
392 }
393 #pragma omp parallel for private(i) schedule(static)
394 for (i = 0; i < out->n_C; ++i) {
395 out->x_C[i]=0.;
396 out->b_C[i]=0.;
397 }
398 }
399 }
400 SparseMatrix_free(schur);
401 SparseMatrix_free(schur_withFillIn);
402 }
403 }
404 }
405 delete[] mis_marker;
406 delete[] counter;
407
408 if (Esys_noError()) {
409 if (verbose && level>0 && !out->coarsest_level) {
410 printf("AMLI: level: %d: %d unknowns eliminated. %d left.\n",level, out->n_F,out->n_C);
411 }
412 return out;
413 } else {
414 Solver_AMLI_free(out);
415 return NULL;
416 }
417 }
418
419 /************************************************************************************/
420
421 /* apply AMLI precondition b-> x
422
423 in fact it solves
424
425 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FC ] [ x_F ] = [b_F]
426 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ] [ x_C ] = [b_C]
427
428 in the form
429
430 b->[b_F,b_C]
431 x_F=invA_FF*b_F
432 b_C=b_C-A_CF*x_F
433 x_C=AMLI(b_C)
434 b_F=b_F-A_FC*x_C
435 x_F=invA_FF*b_F
436 x<-[x_F,x_C]
437
438 should be called within a parallel region
439 barrier synchronisation should be performed to make sure that the input vector available
440
441 */
442
443 void Solver_solveAMLI(Solver_AMLI * amli, double * x, double * b) {
444 dim_t i;
445 double time0=0;
446 double *r=NULL, *x0=NULL,*x_F_temp=NULL;
447 bool_t verbose=0;
448
449 dim_t post_sweeps=amli->post_sweeps;
450 dim_t pre_sweeps=amli->pre_sweeps;
451
452 #ifdef UMFPACK
453 UMFPACK_Handler * ptr=NULL;
454 #endif
455
456 r=new double[amli->n];
457 x0=new double[amli->n];
458 x_F_temp=new double[amli->n_F];
459
460 if (amli->coarsest_level) {
461
462 time0=Esys_timer();
463 /*If all unknown are eliminated then Jacobi is the best preconditioner*/
464 if (amli->n_F==0 || amli->n_F==amli->n) {
465 Preconditioner_LocalSmoother_solve(amli->A, amli->Smoother,x,b,1,FALSE);
466 }
467 else {
468 #ifdef MKL
469 MKL1(amli->AOffset1,x,b,verbose);
470 #else
471 #ifdef UMFPACK
472 ptr=(UMFPACK_Handler *)(amli->solver);
473 UMFPACK1(&ptr,amli->A,x,b,verbose);
474 amli->solver=(void*) ptr;
475 #else
476 Preconditioner_LocalSmoother_solve(amli->A,amli->Smoother,x,b,1,FALSE);
477 #endif
478 #endif
479 }
480 time0=Esys_timer()-time0;
481 if (verbose) fprintf(stderr,"timing: DIRECT SOLVER: %e\n",time0);
482
483 } else {
484 /* presmoothing */
485 time0=Esys_timer();
486 Preconditioner_LocalSmoother_solve(amli->A,amli->Smoother,x,b,pre_sweeps,FALSE);
487 time0=Esys_timer()-time0;
488 if (verbose) fprintf(stderr,"timing: Presmoothing: %e\n",time0);
489 /* end of presmoothing */
490
491 time0=Esys_timer();
492 #pragma omp parallel for private(i) schedule(static)
493 for (i=0;i<amli->n;++i) r[i]=b[i];
494
495 /*r=b-Ax*/
496 SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amli->A,x,1.,r);
497
498 /* r->[b_F,b_C] */
499 #pragma omp parallel for private(i) schedule(static)
500 for (i=0;i<amli->n_F;++i) amli->b_F[i]=r[amli->rows_in_F[i]];
501
502 #pragma omp parallel for private(i) schedule(static)
503 for (i=0;i<amli->n_C;++i) amli->b_C[i]=r[amli->rows_in_C[i]];
504
505 /* x_F=invA_FF*b_F */
506 Copy(amli->n_F, amli->x_F,amli->b_F);
507 BlockOps_solveAll(1,amli->n_F,amli->inv_A_FF,amli->A_FF_pivot,amli->x_F);
508
509 /* b_C=b_C-A_CF*x_F */
510 SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amli->A_CF,amli->x_F,1.,amli->b_C);
511
512 time0=Esys_timer()-time0;
513 if (verbose) fprintf(stderr,"timing: Before next level: %e\n",time0);
514
515 /* x_C=AMLI(b_C) */
516 Solver_solveAMLI(amli->AMLI_of_Schur,amli->x_C,amli->b_C);
517
518 time0=Esys_timer();
519
520 /* b_F=-A_FC*x_C */
521 SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amli->A_FC,amli->x_C,0.,amli->b_F);
522 /* x_F_temp=invA_FF*b_F */
523 Copy(amli->n_F, x_F_temp,amli->b_F);
524 BlockOps_solveAll(1,amli->n_F,amli->inv_A_FF,amli->A_FF_pivot,x_F_temp);
525
526 #pragma omp parallel for private(i) schedule(static)
527 for (i=0;i<amli->n_F;++i) {
528 amli->x_F[i]+=x_F_temp[i];
529 }
530
531 /* x<-[x_F,x_C] */
532 #pragma omp parallel for private(i) schedule(static)
533 for (i=0;i<amli->n;++i) {
534 if (amli->mask_C[i]>-1) {
535 x[i]+=amli->x_C[amli->mask_C[i]];
536 } else {
537 x[i]+=amli->x_F[amli->mask_F[i]];
538 }
539 }
540
541 time0=Esys_timer()-time0;
542 if (verbose) fprintf(stderr,"timing: After next level: %e\n",time0);
543
544 /*postsmoothing*/
545 time0=Esys_timer();
546 Preconditioner_LocalSmoother_solve(amli->A,amli->Smoother,x,b,post_sweeps,TRUE);
547 time0=Esys_timer()-time0;
548 if (verbose) fprintf(stderr,"timing: Postsmoothing: %e\n",time0);
549
550 /*end of postsmoothing*/
551
552 }
553 delete[] r;
554 delete[] x0;
555 delete[] x_F_temp;
556 return;
557 }
558
559 /*****************************************************************************
560 *
561 * Copyright (c) 2003-2014 by University of Queensland
562 * http://www.uq.edu.au
563 *
564 * Primary Business: Queensland, Australia
565 * Licensed under the Open Software License version 3.0
566 * http://www.opensource.org/licenses/osl-3.0.php
567 *
568 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
569 * Development 2012-2013 by School of Earth Sciences
570 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
571 *
572 *****************************************************************************/
573
574
575 /***********************************************************************************
576
577 Paso: Coarsening strategies (no MPI)
578
579 the given matrix A is split in to the form
580
581
582 [ A_FF A_FC ]
583 A = [ ]
584 [ A_CF A_CC ]
585
586 such that unknowns/equations in set C are weakly connected via A_CC and strongly connected
587 to at least one unknowns/equation in F via A_CF and A_FC. The unknowns/equations in C and F
588 are marked in the marker_F by FALSE and TRUE respectively.
589
590 The weak/strong connection is controlled by coarsening_threshold.
591
592 three strategies are implemented:
593
594 a) YAIR_SHAPIRA (YS): |a_ij| >= theta * |a_ii|
595 b) Ruge-Stueben (RS): |a_ij| >= theta * max_(k<>i) |a_ik|
596 c) Aggregation : |a_ij|^2 >= theta**2 * |a_ii||a_jj|
597
598 where theta = coarsening_threshold/maximum_pattern_degree
599
600 Remark:
601
602 - a strong connection in YAIR_SHAPIRA is a strong connection in Aggregation
603
604 */
605 /************************************************************************************
606
607 Author: artak@uq.edu.au, l.gross@uq.edu.au
608
609 ************************************************************************************/
610
611 #include "Coarsening.h"
612 #include "PasoUtil.h"
613 #include <limits.h>
614
615
616
617 /*Used in Coarsening_Local_RS_MI*/
618
619 /*Computes how many connections unknown i have in S.
620 bool_t transpose - TRUE if we want to compute how many strong connection of i in S^T, FALSE otherwise.
621 Note that if we first transpose S and then call method on S^T then "transpose" should be set to FALSE.
622 */
623 dim_t how_many(dim_t i,Pattern * S, bool_t transpose) {
624 dim_t j,n;
625 dim_t total,ltotal;
626 index_t *index,*where_p;
627
628 /*index_t iptr;*/
629 total=0;
630 ltotal=0;
631
632 n=S->numOutput;
633
634 if(transpose) {
635 #pragma omp parallel
636 {
637 ltotal=0;
638 #pragma omp for private(j,index,where_p,ltotal) schedule(static)
639 for (j=0;j<n;++j) {
640 index=&(S->index[S->ptr[j]]);
641 where_p=(index_t*)bsearch(&i,
642 index,
643 S->ptr[j + 1]-S->ptr[j],
644 sizeof(index_t),
645 comparIndex);
646 if (where_p!=NULL) {
647 ltotal++;
648 }
649 }
650 }
651 #pragma omp critical
652 {
653 total+=ltotal;
654 }
655
656 }
657 else {
658 total=S->ptr[i+1]-S->ptr[i];
659 /*#pragma omp parallel for private(iptr) schedule(static)*/
660 /*for (iptr=S->ptr[i];iptr<S->ptr[i+1]; ++iptr) {
661 if(S->index[iptr]!=i && marker_F[S->index[iptr]]==IS_AVAILABLE)
662 total++;
663 }*/
664
665 }
666
667 if (total==0) total=IS_NOT_AVAILABLE;
668
669 return total;
670 }
671
672
673
674
675
676 Pattern* Coarsening_Local_getTranspose(Pattern* P)
677 {
678 Pattern *outpattern=NULL;
679 dim_t C=P->numInput;
680 dim_t F=P->numOutput-C;
681 dim_t n=C+F;
682 dim_t i,j;
683 index_t iptr;
684 IndexListArray index_list(C);
685
686 /*#pragma omp parallel for private(i,iptr,j) schedule(static)*/
687 for (i=0; i<n; ++i) {
688 for (iptr=P->ptr[i];iptr<P->ptr[i+1]; ++iptr) {
689 j=P->index[iptr];
690 index_list[i].insertIndex(j);
691 }
692 }
693
694 outpattern=Pattern_fromIndexListArray(0, index_list, 0, n, 0);
695 return outpattern;
696 }
697
698
699 /************** BLOCK COARSENING *********************/
700
701 void Coarsening_Local_Standard_Block(SparseMatrix* A, index_t* marker_F, double theta)
702 {
703 const dim_t n=A->numRows;
704
705 dim_t i,j,k;
706 index_t iptr,jptr;
707 /*index_t *index,*where_p;*/
708 double threshold,max_offdiagonal;
709 dim_t *lambda; /*measure of importance */
710 dim_t maxlambda=0;
711 index_t index_maxlambda=0;
712 double time0=0;
713 bool_t verbose=0;
714 dim_t n_block=A->row_block_size;
715
716 double fnorm=0;
717 dim_t bi;
718
719 Pattern *S=NULL;
720 Pattern *ST=NULL;
721 IndexListArray index_list(n);
722
723 time0=Esys_timer();
724 k=0;
725 /*Coarsening_Local_getReport(n,marker_F);*/
726 /*printf("Blocks %d %d\n",n_block,A->len);*/
727
728 /*S_i={j \in N_i; i strongly coupled to j}*/
729 #pragma omp parallel for private(i,bi,fnorm,iptr,max_offdiagonal,threshold,j) schedule(static)
730 for (i=0;i<n;++i) {
731 if(marker_F[i]==IS_AVAILABLE) {
732 max_offdiagonal = DBL_MIN;
733 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
734 j=A->pattern->index[iptr];
735 if( j != i){
736 fnorm=0;
737 for(bi=0;bi<n_block*n_block;++bi)
738 {
739 fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi];
740 }
741 fnorm=sqrt(fnorm);
742 max_offdiagonal = MAX(max_offdiagonal,fnorm);
743 }
744 }
745 threshold = theta*max_offdiagonal;
746 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
747 j=A->pattern->index[iptr];
748 if( j != i){
749 fnorm=0;
750 for(bi=0;bi<n_block*n_block;++bi)
751 {
752 fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi];
753 }
754 fnorm=sqrt(fnorm);
755 if(fnorm>=threshold) {
756 index_list[i].insertIndex(j);
757 }
758 }
759 }
760 }
761 }
762
763 S=Pattern_fromIndexListArray(0, index_list, 0, A->pattern->numInput, 0);
764 ST=Coarsening_Local_getTranspose(S);
765
766 /*printf("Patterns len %d %d\n",S->len,ST->len);*/
767
768 time0=Esys_timer()-time0;
769 if (verbose) fprintf(stdout,"timing: RS filtering and pattern creation: %e\n",time0);
770
771 lambda=new dim_t[n];
772
773 #pragma omp parallel for private(i) schedule(static)
774 for (i=0;i<n;++i) { lambda[i]=IS_NOT_AVAILABLE; }
775
776 /*S_i={j \in N_i; i strongly coupled to j}*/
777
778 /*
779 #pragma omp parallel for private(i,iptr,lk) schedule(static)
780 for (i=0;i<n;++i) {
781 lk=0;
782 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
783 if(ABS(A->val[iptr])>1.e-15 && A->pattern->index[iptr]!=i )
784 lk++;
785 }
786 #pragma omp critical
787 k+=lk;
788 if(k==0) {
789 marker_F[i]=TRUE;
790 }
791 }
792 */
793
794 k=0;
795 maxlambda=0;
796 time0=Esys_timer();
797
798 for (i=0;i<n;++i) {
799 if(marker_F[i]==IS_AVAILABLE) {
800 lambda[i]=how_many(k,ST,FALSE); /* if every row is available then k and i are the same.*/
801 /*lambda[i]=how_many(i,S,TRUE);*/
802 /*printf("lambda[%d]=%d, k=%d \n",i,lambda[i],k);*/
803 k++;
804 if(maxlambda<lambda[i]) {
805 maxlambda=lambda[i];
806 index_maxlambda=i;
807 }
808 }
809 }
810
811 k=0;
812 time0=Esys_timer()-time0;
813 if (verbose) fprintf(stdout,"timing: Lambda computations at the beginning: %e\n",time0);
814
815 time0=Esys_timer();
816
817 /*Coarsening_Local_getReport(n,marker_F);*/
818
819 while (Util_isAny(n,marker_F,IS_AVAILABLE)) {
820 if(index_maxlambda<0) {
821 break;
822 }
823
824 i=index_maxlambda;
825 if(marker_F[i]==IS_AVAILABLE) {
826 marker_F[index_maxlambda]=FALSE;
827 lambda[index_maxlambda]=IS_NOT_AVAILABLE;
828 for (iptr=ST->ptr[i];iptr<ST->ptr[i+1]; ++iptr) {
829 j=ST->index[iptr];
830 if(marker_F[j]==IS_AVAILABLE) {
831 marker_F[j]=TRUE;
832 lambda[j]=IS_NOT_AVAILABLE;
833 for (jptr=S->ptr[j];jptr<S->ptr[j+1]; ++jptr) {
834 k=S->index[jptr];
835 if(marker_F[k]==IS_AVAILABLE) {
836 lambda[k]++;
837 }
838 }
839 }
840 }
841 for (iptr=S->ptr[i];iptr<S->ptr[i+1]; ++iptr) {
842 j=S->index[iptr];
843 if(marker_F[j]==IS_AVAILABLE) {
844 lambda[j]--;
845 }
846 }
847 }
848
849 /* Used when transpose of S is not available */
850 /*
851 for (i=0;i<n;++i) {
852 if(marker_F[i]==IS_AVAILABLE) {
853 if (i==index_maxlambda) {
854 marker_F[index_maxlambda]=FALSE;
855 lambda[index_maxlambda]=-1;
856 for (j=0;j<n;++j) {
857 if(marker_F[j]==IS_AVAILABLE) {
858 index=&(S->index[S->ptr[j]]);
859 where_p=(index_t*)bsearch(&i,
860 index,
861 S->ptr[j + 1]-S->ptr[j],
862 sizeof(index_t),
863 comparIndex);
864 if (where_p!=NULL) {
865 marker_F[j]=TRUE;
866 lambda[j]=-1;
867 for (iptr=S->ptr[j];iptr<S->ptr[j+1]; ++iptr) {
868 k=S->index[iptr];
869 if(marker_F[k]==IS_AVAILABLE) {
870 lambda[k]++;
871 }
872 }
873 }
874 }
875 }
876 }
877 }
878 }
879 */
880 index_maxlambda=arg_max(n,lambda, IS_NOT_AVAILABLE);
881 }
882
883 time0=Esys_timer()-time0;
884 if (verbose) fprintf(stdout,"timing: Loop : %e\n",time0);
885
886 /*Coarsening_Local_getReport(n,marker_F);*/
887 #pragma omp parallel for private(i) schedule(static)
888 for (i=0;i<n;++i)
889 if(marker_F[i]==IS_AVAILABLE) {
890 marker_F[i]=TRUE;
891 }
892
893 /*Coarsening_Local_getReport(n,marker_F);*/
894
895 delete[] lambda;
896
897 Pattern_free(S);
898
899 /* swap to TRUE/FALSE in marker_F */
900 #pragma omp parallel for private(i) schedule(static)
901 for (i=0;i<n;i++) marker_F[i]=(marker_F[i]==TRUE)? TRUE : FALSE;
902
903 }
904
905
906
907 #undef IS_AVAILABLE
908 #undef IS_NOT_AVAILABLE
909 #undef TRUE
910 #undef FALSE
911
912 #undef IS_UNDECIDED
913 #undef IS_STRONG
914 #undef IS_WEAK
915
916 #undef TRUEB
917 #undef TRUEB
918

Properties

Name Value
svn:mergeinfo /branches/amg_from_3530/paso/src/AML.cpp:3531-3826 /branches/lapack2681/paso/src/AML.cpp:2682-2741 /branches/pasowrap/paso/src/AML.cpp:3661-3674 /branches/py3_attempt2/paso/src/AML.cpp:3871-3891 /branches/restext/paso/src/AML.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/AML.cpp:3669-3791 /branches/stage3.0/paso/src/AML.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/AML.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/AML.cpp:3517-3974 /release/3.0/paso/src/AML.cpp:2591-2601 /trunk/paso/src/AML.cpp:4257-4344 /trunk/ripley/test/python/paso/src/AML.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26