/[escript]/trunk/paso/src/AML.c
ViewVC logotype

Contents of /trunk/paso/src/AML.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4286 - (show annotations)
Thu Mar 7 04:28:11 2013 UTC (6 years, 5 months ago) by caltinay
File MIME type: text/plain
File size: 30805 byte(s)
Assorted spelling fixes.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15
16
17 /************************************************************************************/
18
19 /* Paso: AML preconditioner:
20
21 This is just a collection of older code. This does not compile.
22
23 */
24
25 /************************************************************************************/
26
27 /* Author: artak@uq.edu.au */
28
29 /************************************************************************************/
30
31 #include "Paso.h"
32 #include "Preconditioner.h"
33 #include "Options.h"
34 #include "PasoUtil.h"
35 #include "UMFPACK.h"
36 #include "MKL.h"
37 #include "SystemMatrix.h"
38 #include "Coarsening.h"
39 #include "BlockOps.h"
40
41 #ifndef INC_COARSE
42 #define INC_COARSE
43
44 #include "SystemMatrix.h"
45
46
47 /* Remove:
48 #define PASO_COARSENING_IN_F TRUE
49 #define PASO_COARSENING_IN_C FALSE
50 */
51
52 void Paso_Coarsening_Local(index_t* marker_F, Paso_SparseMatrix* A, Paso_Options* options);
53 void Paso_Coarsening_Local_Aggregation(Paso_SparseMatrix* A, index_t* marker_F, const double theta);
54 void Paso_Coarsening_Local_Aggregation_blk(Paso_SparseMatrix* A, index_t* marker_F, const double theta);
55 void Paso_Coarsening_Local_YS(Paso_SparseMatrix* A, index_t* marker_F, const double theta);
56 void Paso_Coarsening_Local_YS_blk(Paso_SparseMatrix* A, index_t* marker_F, const double theta);
57
58 void Paso_Coarsening_Local_RS(Paso_SparseMatrix* A, index_t* marker_F, double theta);
59
60 void Paso_Coarsening_Local_Partition(Paso_Pattern* pattern,index_t* marker_F);
61 void Paso_Coarsening_Local_greedy(Paso_Pattern* pattern, index_t* marker_F);
62
63
64
65 /*=== REVISE ============*/
66 void Paso_Coarsening_Local_greedy_color(Paso_Pattern* pattern, index_t* marker_F);
67 void Paso_Coarsening_Local_greedy_diag(Paso_SparseMatrix* A, index_t* marker_F, double thershold);
68
69 void Paso_Coarsening_Local_YS_plus(Paso_SparseMatrix* A, index_t* marker_F, double alpha, double taw, double delta);
70 void Paso_Coarsening_Local_Standard(Paso_SparseMatrix* A, index_t* marker_F, double theta);
71 void Paso_Coarsening_Local_greedy_RS(Paso_SparseMatrix* A, index_t* marker_F, double theta);
72 void Paso_Coarsening_Local_greedy_Agg(Paso_SparseMatrix* A, index_t* marker_F, double theta);
73 /*dim_t how_many(dim_t n,dim_t* S_i, int value1, dim_t* addedSet, int value2);*/
74 void Paso_Coarsening_Local_Standard_Block(Paso_SparseMatrix* A, index_t* marker_F, double theta);
75
76 dim_t how_many(dim_t i,Paso_Pattern * S, bool_t transpose);
77 dim_t arg_max(dim_t n, dim_t* lambda, dim_t mask);
78 Paso_Pattern* Paso_Coarsening_Local_getTranspose(Paso_Pattern* P);
79
80 void Paso_Coarsening_Local_getReport(dim_t n,index_t* marker_F);
81 void Paso_Coarsening_Local_Read(char *fileName,dim_t n,index_t* marker_F);
82 void Paso_Coarsening_Local_Write(char *fileName,dim_t n,index_t* marker_F);
83
84
85 #endif
86
87
88 /***********************************************************************************,amli->b_F*/
89
90 /* free all memory used by AMLI */
91
92 void Paso_Solver_AMLI_System_free(Paso_Solver_AMLI_System * in) {
93 dim_t i;
94 if (in!=NULL) {
95 for (i=0;i<in->block_size;++i) {
96 Paso_Solver_AMLI_free(in->amliblock[i]);
97 Paso_SparseMatrix_free(in->block[i]);
98 }
99 MEMFREE(in);
100 }
101 }
102
103
104 /* free all memory used by AMLI */
105
106 void Paso_Solver_AMLI_free(Paso_Solver_AMLI * in) {
107 if (in!=NULL) {
108 Paso_Preconditioner_LocalSmoother_free(in->Smoother);
109
110 MEMFREE(in->inv_A_FF);
111 MEMFREE(in->A_FF_pivot);
112 Paso_SparseMatrix_free(in->A_FC);
113 Paso_SparseMatrix_free(in->A_CF);
114 Paso_SparseMatrix_free(in->A);
115 if(in->coarsest_level==TRUE) {
116 #ifdef MKL
117 Paso_MKL_free1(in->AOffset1);
118 Paso_SparseMatrix_free(in->AOffset1);
119 #else
120 #ifdef UMFPACK
121 Paso_UMFPACK1_free((Paso_UMFPACK_Handler*)(in->solver));
122 #endif
123 #endif
124 }
125 MEMFREE(in->rows_in_F);
126 MEMFREE(in->rows_in_C);
127 MEMFREE(in->mask_F);
128 MEMFREE(in->mask_C);
129 MEMFREE(in->x_F);
130 MEMFREE(in->b_F);
131 MEMFREE(in->x_C);
132 MEMFREE(in->b_C);
133 in->solver=NULL;
134 Paso_Solver_AMLI_free(in->AMLI_of_Schur);
135 MEMFREE(in->b_C);
136 MEMFREE(in);
137 }
138 }
139
140 /************************************************************************************/
141
142 /* constructs the block-block factorization of
143
144 [ A_FF A_FC ]
145 A_p=
146 [ A_CF A_FF ]
147
148 to
149
150 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FF ]
151 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ]
152
153
154 where S=A_FF-ACF*invA_FF*A_FC within the shape of S
155
156 then AMLI is applied to S again until S becomes empty
157
158 */
159 Paso_Solver_AMLI* Paso_Solver_getAMLI(Paso_SparseMatrix *A_p,dim_t level,Paso_Options* options) {
160 Paso_Solver_AMLI* out=NULL;
161 Paso_Pattern* temp1=NULL;
162 Paso_Pattern* temp2=NULL;
163 bool_t verbose=options->verbose;
164 dim_t n=A_p->numRows;
165 dim_t n_block=A_p->row_block_size;
166 index_t* mis_marker=NULL;
167 index_t* counter=NULL;
168 index_t iPtr,*index, *where_p, iPtr_s;
169 dim_t i,j;
170 Paso_SparseMatrix * schur=NULL;
171 Paso_SparseMatrix * schur_withFillIn=NULL;
172 double S=0;
173
174
175 /* char filename[8];
176 sprintf(filename,"AMLILevel%d",level);
177
178 Paso_SparseMatrix_saveMM(A_p,filename);
179 */
180
181 /*Make sure we have block sizes 1*/
182 if (A_p->col_block_size>1) {
183 Esys_setError(TYPE_ERROR,"Paso_Solver_getAMLI: AMLI requires column block size 1.");
184 return NULL;
185 }
186 if (A_p->row_block_size>1) {
187 Esys_setError(TYPE_ERROR,"Paso_Solver_getAMLI: AMLI requires row block size 1.");
188 return NULL;
189 }
190 out=MEMALLOC(1,Paso_Solver_AMLI);
191 /* identify independent set of rows/columns */
192 mis_marker=TMPMEMALLOC(n,index_t);
193 counter=TMPMEMALLOC(n,index_t);
194 if ( !( Esys_checkPtr(mis_marker) || Esys_checkPtr(counter) || Esys_checkPtr(out)) ) {
195 out->AMLI_of_Schur=NULL;
196 out->inv_A_FF=NULL;
197 out->A_FF_pivot=NULL;
198 out->A_FC=NULL;
199 out->A_CF=NULL;
200 out->rows_in_F=NULL;
201 out->rows_in_C=NULL;
202 out->mask_F=NULL;
203 out->mask_C=NULL;
204 out->x_F=NULL;
205 out->b_F=NULL;
206 out->x_C=NULL;
207 out->b_C=NULL;
208 out->A=Paso_SparseMatrix_getReference(A_p);
209 out->Smoother=NULL;
210 out->solver=NULL;
211 /*out->GS=Paso_Solver_getGS(A_p,verbose);*/
212 out->level=level;
213 out->n=n;
214 out->n_F=n+1;
215 out->n_block=n_block;
216 out->post_sweeps=options->post_sweeps;
217 out->pre_sweeps=options->pre_sweeps;
218
219 if (level==0 || n<=options->min_coarse_matrix_size) {
220 out->coarsest_level=TRUE;
221 #ifdef MKL
222 out->AOffset1=Paso_SparseMatrix_alloc(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, out->A->pattern,1,1, FALSE);
223 #pragma omp parallel for private(i) schedule(static)
224 for (i=0;i<out->A->len;++i) {
225 out->AOffset1->val[i]=out->A->val[i];
226 }
227 #else
228 #ifdef UMFPACK
229 #else
230 out->Smoother=Paso_Preconditioner_LocalSmoother_alloc(A_p,TRUE,verbose);
231 #endif
232 #endif
233 } else {
234 out->coarsest_level=FALSE;
235 out->Smoother=Paso_Preconditioner_LocalSmoother_alloc(A_p,TRUE,verbose);
236
237 Paso_Coarsening_Local(mis_marker,A_p, options->coarsening_threshold, options->coarsening_method);
238
239 #pragma omp parallel for private(i) schedule(static)
240 for (i = 0; i < n; ++i) {
241 mis_marker[i]=(mis_marker[i]== PASO_COARSENING_IN_F);
242 counter[i]=mis_marker[i];
243
244 }
245
246 out->n_F=Paso_Util_cumsum(n,counter);
247
248 if (out->n_F==0) {
249 out->coarsest_level=TRUE;
250 level=0;
251 if (verbose) {
252 /*printf("AMLI coarsening eliminates all unknowns, switching to Jacobi preconditioner.\n");*/
253 printf("AMLI coarsening does not eliminate any of the unknowns, switching to Jacobi preconditioner.\n");
254 }
255 }
256 else if (out->n_F==n) {
257 out->coarsest_level=TRUE;
258 level=0;
259 if (verbose) {
260 /*printf("AMLI coarsening eliminates all unknowns, switching to Jacobi preconditioner.\n");*/
261 printf("AMLI coarsening eliminates all of the unknowns, switching to Jacobi preconditioner.\n");
262
263 }
264 } else {
265
266 if (Esys_noError()) {
267
268 /*#pragma omp parallel for private(i) schedule(static)
269 for (i = 0; i < n; ++i) counter[i]=mis_marker[i];
270 out->n_F=Paso_Util_cumsum(n,counter);
271 */
272
273 /*if(level==3) {
274 printf("##TOTAL: %d, ELIMINATED: %d\n",n,out->n_F);
275 for (i = 0; i < n; ++i) {
276 printf("##%d %d\n",i,mis_marker[i]);
277 }
278 }*/
279
280 out->mask_F=MEMALLOC(n,index_t);
281 out->rows_in_F=MEMALLOC(out->n_F,index_t);
282 out->inv_A_FF=MEMALLOC(n_block*n_block*out->n_F,double);
283 out->A_FF_pivot=NULL; /* later use for block size>3 */
284 if (! (Esys_checkPtr(out->mask_F) || Esys_checkPtr(out->inv_A_FF) || Esys_checkPtr(out->rows_in_F) ) ) {
285 /* creates an index for F from mask */
286 #pragma omp parallel for private(i) schedule(static)
287 for (i = 0; i < out->n_F; ++i) out->rows_in_F[i]=-1;
288 #pragma omp parallel for private(i) schedule(static)
289 for (i = 0; i < n; ++i) {
290 if (mis_marker[i]) {
291 out->rows_in_F[counter[i]]=i;
292 out->mask_F[i]=counter[i];
293 } else {
294 out->mask_F[i]=-1;
295 }
296 }
297
298 /* Compute row-sum for getting rs(A_FF)^-1*/
299 #pragma omp parallel for private(i,iPtr,j,S) schedule(static)
300 for (i = 0; i < out->n_F; ++i) {
301 S=0;
302 /*printf("[%d ]: [%d] -> ",i, out->rows_in_F[i]);*/
303 for (iPtr=A_p->pattern->ptr[out->rows_in_F[i]];iPtr<A_p->pattern->ptr[out->rows_in_F[i] + 1]; ++iPtr) {
304 j=A_p->pattern->index[iPtr];
305 /*if (j==out->rows_in_F[i]) printf("diagonal %e",A_p->val[iPtr]);*/
306 if (mis_marker[j])
307 S+=A_p->val[iPtr];
308 }
309 /*printf("-> %e \n",S);*/
310 out->inv_A_FF[i]=1./S;
311 }
312 }
313 }
314
315 /*check whether coarsening process actually makes sense to continue.
316 if coarse matrix at least smaller by 30% then continue, otherwise we stop.*/
317 if ((out->n_F*100/n)<30) {
318 level=1;
319 }
320
321 if ( Esys_noError()) {
322 /* if there are no nodes in the coarse level there is no more work to do */
323 out->n_C=n-out->n_F;
324
325 /*if (out->n_F>500) */
326 out->rows_in_C=MEMALLOC(out->n_C,index_t);
327 out->mask_C=MEMALLOC(n,index_t);
328 if (! (Esys_checkPtr(out->mask_C) || Esys_checkPtr(out->rows_in_C) ) ) {
329 /* creates an index for C from mask */
330 #pragma omp parallel for private(i) schedule(static)
331 for (i = 0; i < n; ++i) counter[i]=! mis_marker[i];
332 Paso_Util_cumsum(n,counter);
333 #pragma omp parallel for private(i) schedule(static)
334 for (i = 0; i < out->n_C; ++i) out->rows_in_C[i]=-1;
335 #pragma omp parallel for private(i) schedule(static)
336 for (i = 0; i < n; ++i) {
337 if (! mis_marker[i]) {
338 out->rows_in_C[counter[i]]=i;
339 out->mask_C[i]=counter[i];
340 } else {
341 out->mask_C[i]=-1;
342 }
343 }
344 }
345 }
346 if ( Esys_noError()) {
347 /* get A_CF block: */
348 out->A_CF=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_F,out->rows_in_C,out->mask_F);
349 /* get A_FC block: */
350 out->A_FC=Paso_SparseMatrix_getSubmatrix(A_p,out->n_F,out->n_C,out->rows_in_F,out->mask_C);
351 /* get A_CC block: */
352 schur=Paso_SparseMatrix_getSubmatrix(A_p,out->n_C,out->n_C,out->rows_in_C,out->mask_C);
353
354 }
355 if ( Esys_noError()) {
356 /*find the pattern of the schur complement with fill in*/
357 temp1=Paso_Pattern_multiply(PATTERN_FORMAT_DEFAULT,out->A_CF->pattern,out->A_FC->pattern);
358 temp2=Paso_Pattern_binop(PATTERN_FORMAT_DEFAULT, schur->pattern, temp1);
359 schur_withFillIn=Paso_SparseMatrix_alloc(A_p->type,temp2,1,1, TRUE);
360 Paso_Pattern_free(temp1);
361 Paso_Pattern_free(temp2);
362 }
363 if ( Esys_noError()) {
364 /* copy values over*/
365 #pragma omp parallel for private(i,iPtr,j,iPtr_s,index,where_p) schedule(static)
366 for (i = 0; i < schur_withFillIn->numRows; ++i) {
367 for (iPtr=schur_withFillIn->pattern->ptr[i];iPtr<schur_withFillIn->pattern->ptr[i + 1]; ++iPtr) {
368 j=schur_withFillIn->pattern->index[iPtr];
369 iPtr_s=schur->pattern->ptr[i];
370 index=&(schur->pattern->index[iPtr_s]);
371 where_p=(index_t*)bsearch(&j,
372 index,
373 schur->pattern->ptr[i + 1]-schur->pattern->ptr[i],
374 sizeof(index_t),
375 Paso_comparIndex);
376 if (where_p!=NULL) {
377 schur_withFillIn->val[iPtr]=schur->val[iPtr_s+(index_t)(where_p-index)];
378 }
379 }
380 }
381 Paso_Solver_updateIncompleteSchurComplement(schur_withFillIn,out->A_CF,out->inv_A_FF,out->A_FF_pivot,out->A_FC);
382 out->AMLI_of_Schur=Paso_Solver_getAMLI(schur_withFillIn,level-1,options);
383 }
384 /* allocate work arrays for AMLI application */
385 if (Esys_noError()) {
386 out->x_F=MEMALLOC(n_block*out->n_F,double);
387 out->b_F=MEMALLOC(n_block*out->n_F,double);
388 out->x_C=MEMALLOC(n_block*out->n_C,double);
389 out->b_C=MEMALLOC(n_block*out->n_C,double);
390
391 if (! (Esys_checkPtr(out->x_F) || Esys_checkPtr(out->b_F) || Esys_checkPtr(out->x_C) || Esys_checkPtr(out->b_C) ) ) {
392 #pragma omp parallel for private(i) schedule(static)
393 for (i = 0; i < out->n_F; ++i) {
394 out->x_F[i]=0.;
395 out->b_F[i]=0.;
396 }
397 #pragma omp parallel for private(i) schedule(static)
398 for (i = 0; i < out->n_C; ++i) {
399 out->x_C[i]=0.;
400 out->b_C[i]=0.;
401 }
402 }
403 }
404 Paso_SparseMatrix_free(schur);
405 Paso_SparseMatrix_free(schur_withFillIn);
406 }
407 }
408 }
409 TMPMEMFREE(mis_marker);
410 TMPMEMFREE(counter);
411
412 if (Esys_noError()) {
413 if (verbose && level>0 && !out->coarsest_level) {
414 printf("AMLI: level: %d: %d unknowns eliminated. %d left.\n",level, out->n_F,out->n_C);
415 }
416 return out;
417 } else {
418 Paso_Solver_AMLI_free(out);
419 return NULL;
420 }
421 }
422
423 /************************************************************************************/
424
425 /* apply AMLI precondition b-> x
426
427 in fact it solves
428
429 [ I 0 ] [ A_FF 0 ] [ I invA_FF*A_FC ] [ x_F ] = [b_F]
430 [ A_CF*invA_FF I ] [ 0 S ] [ 0 I ] [ x_C ] = [b_C]
431
432 in the form
433
434 b->[b_F,b_C]
435 x_F=invA_FF*b_F
436 b_C=b_C-A_CF*x_F
437 x_C=AMLI(b_C)
438 b_F=b_F-A_FC*x_C
439 x_F=invA_FF*b_F
440 x<-[x_F,x_C]
441
442 should be called within a parallel region
443 barrier synchronization should be performed to make sure that the input vector available
444
445 */
446
447 void Paso_Solver_solveAMLI(Paso_Solver_AMLI * amli, double * x, double * b) {
448 dim_t i;
449 double time0=0;
450 double *r=NULL, *x0=NULL,*x_F_temp=NULL;
451 bool_t verbose=0;
452
453 dim_t post_sweeps=amli->post_sweeps;
454 dim_t pre_sweeps=amli->pre_sweeps;
455
456 #ifdef UMFPACK
457 Paso_UMFPACK_Handler * ptr=NULL;
458 #endif
459
460 r=MEMALLOC(amli->n,double);
461 x0=MEMALLOC(amli->n,double);
462 x_F_temp=MEMALLOC(amli->n_F,double);
463
464 if (amli->coarsest_level) {
465
466 time0=Esys_timer();
467 /*If all unknown are eliminated then Jacobi is the best preconditioner*/
468 if (amli->n_F==0 || amli->n_F==amli->n) {
469 Paso_Preconditioner_LocalSmoother_solve(amli->A, amli->Smoother,x,b,1,FALSE);
470 }
471 else {
472 #ifdef MKL
473 Paso_MKL1(amli->AOffset1,x,b,verbose);
474 #else
475 #ifdef UMFPACK
476 ptr=(Paso_UMFPACK_Handler *)(amli->solver);
477 Paso_UMFPACK1(&ptr,amli->A,x,b,verbose);
478 amli->solver=(void*) ptr;
479 #else
480 Paso_Preconditioner_LocalSmoother_solve(amli->A,amli->Smoother,x,b,1,FALSE);
481 #endif
482 #endif
483 }
484 time0=Esys_timer()-time0;
485 if (verbose) fprintf(stderr,"timing: DIRECT SOLVER: %e\n",time0);
486
487 } else {
488 /* presmoothing */
489 time0=Esys_timer();
490 Paso_Preconditioner_LocalSmoother_solve(amli->A,amli->Smoother,x,b,pre_sweeps,FALSE);
491 time0=Esys_timer()-time0;
492 if (verbose) fprintf(stderr,"timing: Presmoothing: %e\n",time0);
493 /* end of presmoothing */
494
495
496 time0=Esys_timer();
497 #pragma omp parallel for private(i) schedule(static)
498 for (i=0;i<amli->n;++i) r[i]=b[i];
499
500 /*r=b-Ax*/
501 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amli->A,x,1.,r);
502
503 /* r->[b_F,b_C] */
504 #pragma omp parallel for private(i) schedule(static)
505 for (i=0;i<amli->n_F;++i) amli->b_F[i]=r[amli->rows_in_F[i]];
506
507 #pragma omp parallel for private(i) schedule(static)
508 for (i=0;i<amli->n_C;++i) amli->b_C[i]=r[amli->rows_in_C[i]];
509
510 /* x_F=invA_FF*b_F */
511 Paso_Copy(amli->n_F, amli->x_F,amli->b_F);
512 Paso_BlockOps_solveAll(1,amli->n_F,amli->inv_A_FF,amli->A_FF_pivot,amli->x_F);
513
514 /* b_C=b_C-A_CF*x_F */
515 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amli->A_CF,amli->x_F,1.,amli->b_C);
516
517 time0=Esys_timer()-time0;
518 if (verbose) fprintf(stderr,"timing: Before next level: %e\n",time0);
519
520 /* x_C=AMLI(b_C) */
521 Paso_Solver_solveAMLI(amli->AMLI_of_Schur,amli->x_C,amli->b_C);
522
523 time0=Esys_timer();
524
525 /* b_F=-A_FC*x_C */
526 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,amli->A_FC,amli->x_C,0.,amli->b_F);
527 /* x_F_temp=invA_FF*b_F */
528 Paso_Copy(amli->n_F, x_F_temp,amli->b_F);
529 Paso_BlockOps_solveAll(1,amli->n_F,amli->inv_A_FF,amli->A_FF_pivot,x_F_temp);
530
531 #pragma omp parallel for private(i) schedule(static)
532 for (i=0;i<amli->n_F;++i) {
533 amli->x_F[i]+=x_F_temp[i];
534 }
535
536 /* x<-[x_F,x_C] */
537 #pragma omp parallel for private(i) schedule(static)
538 for (i=0;i<amli->n;++i) {
539 if (amli->mask_C[i]>-1) {
540 x[i]+=amli->x_C[amli->mask_C[i]];
541 } else {
542 x[i]+=amli->x_F[amli->mask_F[i]];
543 }
544 }
545
546 time0=Esys_timer()-time0;
547 if (verbose) fprintf(stderr,"timing: After next level: %e\n",time0);
548
549 /*postsmoothing*/
550 time0=Esys_timer();
551 Paso_Preconditioner_LocalSmoother_solve(amli->A,amli->Smoother,x,b,post_sweeps,TRUE);
552 time0=Esys_timer()-time0;
553 if (verbose) fprintf(stderr,"timing: Postsmoothing: %e\n",time0);
554
555 /*end of postsmoothing*/
556
557 }
558 MEMFREE(r);
559 MEMFREE(x0);
560 MEMFREE(x_F_temp);
561 return;
562 }
563
564 /*****************************************************************************
565 *
566 * Copyright (c) 2003-2013 by University of Queensland
567 * http://www.uq.edu.au
568 *
569 * Primary Business: Queensland, Australia
570 * Licensed under the Open Software License version 3.0
571 * http://www.opensource.org/licenses/osl-3.0.php
572 *
573 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
574 * Development since 2012 by School of Earth Sciences
575 *
576 *****************************************************************************/
577
578
579 /***********************************************************************************
580
581 Paso: Coarsening strategies (no MPI)
582
583 the given matrix A is split into the form
584
585
586 [ A_FF A_FC ]
587 A = [ ]
588 [ A_CF A_CC ]
589
590 such that unknowns/equations in set C are weakly connected via A_CC and strongly connected
591 to at least one unknowns/equation in F via A_CF and A_FC. The unknowns/equations in C and F
592 are marked in the marker_F by FALSE and TRUE respectively.
593
594 The weak/strong connection is controlled by coarsening_threshold.
595
596 three strategies are implemented:
597
598 a) YAIR_SHAPIRA (YS): |a_ij| >= theta * |a_ii|
599 b) Ruge-Stueben (RS): |a_ij| >= theta * max_(k<>i) |a_ik|
600 c) Aggregation : |a_ij|^2 >= theta**2 * |a_ii||a_jj|
601
602 where theta = coarsening_threshold/maximum_pattern_degree
603
604 Remark:
605
606 - a strong connection in YAIR_SHAPIRA is a strong connection in Aggregation
607
608 */
609 /************************************************************************************
610
611 Author: artak@uq.edu.au, l.gross@uq.edu.au
612
613 ************************************************************************************/
614
615 #include "Coarsening.h"
616 #include "PasoUtil.h"
617 #include <limits.h>
618
619
620
621 /*Used in Paso_Coarsening_Local_RS_MI*/
622
623 /*Computes how many connections unknown i have in S.
624 bool_t transpose - TRUE if we want to compute how many strong connection of i in S^T, FALSE otherwise.
625 Note that if we first transpose S and then call method on S^T then "transpose" should be set to FALSE.
626 */
627 dim_t how_many(dim_t i,Paso_Pattern * S, bool_t transpose) {
628 dim_t j,n;
629 dim_t total,ltotal;
630 index_t *index,*where_p;
631
632 /*index_t iptr;*/
633 total=0;
634 ltotal=0;
635
636 n=S->numOutput;
637
638 if(transpose) {
639 #pragma omp parallel
640 {
641 ltotal=0;
642 #pragma omp for private(j,index,where_p,ltotal) schedule(static)
643 for (j=0;j<n;++j) {
644 index=&(S->index[S->ptr[j]]);
645 where_p=(index_t*)bsearch(&i,
646 index,
647 S->ptr[j + 1]-S->ptr[j],
648 sizeof(index_t),
649 Paso_comparIndex);
650 if (where_p!=NULL) {
651 ltotal++;
652 }
653 }
654 }
655 #pragma omp critical
656 {
657 total+=ltotal;
658 }
659
660 }
661 else {
662 total=S->ptr[i+1]-S->ptr[i];
663 /*#pragma omp parallel for private(iptr) schedule(static)*/
664 /*for (iptr=S->ptr[i];iptr<S->ptr[i+1]; ++iptr) {
665 if(S->index[iptr]!=i && marker_F[S->index[iptr]]==IS_AVAILABLE)
666 total++;
667 }*/
668
669 }
670
671 if (total==0) total=IS_NOT_AVAILABLE;
672
673 return total;
674 }
675
676
677
678
679
680 Paso_Pattern* Paso_Coarsening_Local_getTranspose(Paso_Pattern* P){
681
682 Paso_Pattern *outpattern=NULL;
683
684 dim_t C=P->numInput;
685 dim_t F=P->numOutput-C;
686 dim_t n=C+F;
687 dim_t i,j;
688 index_t iptr;
689 Paso_IndexListArray* index_list = Paso_IndexListArray_alloc(C);
690
691
692 /*#pragma omp parallel for private(i,iptr,j) schedule(static)*/
693 for (i=0;i<n;++i) {
694 for (iptr=P->ptr[i];iptr<P->ptr[i+1]; ++iptr) {
695 j=P->index[iptr];
696 Paso_IndexListArray_insertIndex(index_list,i,j);
697 }
698 }
699
700 outpattern=Paso_Pattern_fromIndexListArray(0, index_list,0,n,0);
701
702 /* clean up */
703 Paso_IndexListArray_free(index_list);
704
705 return outpattern;
706 }
707
708
709 /************** BLOCK COARSENING *********************/
710
711 void Paso_Coarsening_Local_Standard_Block(Paso_SparseMatrix* A, index_t* marker_F, double theta)
712 {
713 const dim_t n=A->numRows;
714
715 dim_t i,j,k;
716 index_t iptr,jptr;
717 /*index_t *index,*where_p;*/
718 double threshold,max_offdiagonal;
719 dim_t *lambda; /*measure of importance */
720 dim_t maxlambda=0;
721 index_t index_maxlambda=0;
722 double time0=0;
723 bool_t verbose=0;
724 dim_t n_block=A->row_block_size;
725
726 double fnorm=0;
727 dim_t bi;
728
729 Paso_Pattern *S=NULL;
730 Paso_Pattern *ST=NULL;
731 Paso_IndexListArray* index_list = Paso_IndexListArray_alloc(n);
732
733 time0=Esys_timer();
734 k=0;
735 /*Paso_Coarsening_Local_getReport(n,marker_F);*/
736 /*printf("Blocks %d %d\n",n_block,A->len);*/
737
738 /*S_i={j \in N_i; i strongly coupled to j}*/
739 #pragma omp parallel for private(i,bi,fnorm,iptr,max_offdiagonal,threshold,j) schedule(static)
740 for (i=0;i<n;++i) {
741 if(marker_F[i]==IS_AVAILABLE) {
742 max_offdiagonal = DBL_MIN;
743 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
744 j=A->pattern->index[iptr];
745 if( j != i){
746 fnorm=0;
747 for(bi=0;bi<n_block*n_block;++bi)
748 {
749 fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi];
750 }
751 fnorm=sqrt(fnorm);
752 max_offdiagonal = MAX(max_offdiagonal,fnorm);
753 }
754 }
755 threshold = theta*max_offdiagonal;
756 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
757 j=A->pattern->index[iptr];
758 if( j != i){
759 fnorm=0;
760 for(bi=0;bi<n_block*n_block;++bi)
761 {
762 fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi];
763 }
764 fnorm=sqrt(fnorm);
765 if(fnorm>=threshold) {
766 Paso_IndexListArray_insertIndex(index_list,i,j);
767 }
768 }
769 }
770 }
771 }
772
773 S=Paso_Pattern_fromIndexListArray(0,index_list,0,A->pattern->numInput,0);
774 ST=Paso_Coarsening_Local_getTranspose(S);
775
776 /*printf("Patterns len %d %d\n",S->len,ST->len);*/
777
778 time0=Esys_timer()-time0;
779 if (verbose) fprintf(stdout,"timing: RS filtering and pattern creation: %e\n",time0);
780
781 lambda=TMPMEMALLOC(n,dim_t);
782
783 #pragma omp parallel for private(i) schedule(static)
784 for (i=0;i<n;++i) { lambda[i]=IS_NOT_AVAILABLE; }
785
786 /*S_i={j \in N_i; i strongly coupled to j}*/
787
788 /*
789 #pragma omp parallel for private(i,iptr,lk) schedule(static)
790 for (i=0;i<n;++i) {
791 lk=0;
792 for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {
793 if(ABS(A->val[iptr])>1.e-15 && A->pattern->index[iptr]!=i )
794 lk++;
795 }
796 #pragma omp critical
797 k+=lk;
798 if(k==0) {
799 marker_F[i]=TRUE;
800 }
801 }
802 */
803
804
805 k=0;
806 maxlambda=0;
807
808 time0=Esys_timer();
809
810 for (i=0;i<n;++i) {
811 if(marker_F[i]==IS_AVAILABLE) {
812 lambda[i]=how_many(k,ST,FALSE); /* if every row is available then k and i are the same.*/
813 /*lambda[i]=how_many(i,S,TRUE);*/
814 /*printf("lambda[%d]=%d, k=%d \n",i,lambda[i],k);*/
815 k++;
816 if(maxlambda<lambda[i]) {
817 maxlambda=lambda[i];
818 index_maxlambda=i;
819 }
820 }
821 }
822
823 k=0;
824 time0=Esys_timer()-time0;
825 if (verbose) fprintf(stdout,"timing: Lambda computations at the beginning: %e\n",time0);
826
827 time0=Esys_timer();
828
829 /*Paso_Coarsening_Local_getReport(n,marker_F);*/
830
831 while (Paso_Util_isAny(n,marker_F,IS_AVAILABLE)) {
832
833 if(index_maxlambda<0) {
834 break;
835 }
836
837 i=index_maxlambda;
838 if(marker_F[i]==IS_AVAILABLE) {
839 marker_F[index_maxlambda]=FALSE;
840 lambda[index_maxlambda]=IS_NOT_AVAILABLE;
841 for (iptr=ST->ptr[i];iptr<ST->ptr[i+1]; ++iptr) {
842 j=ST->index[iptr];
843 if(marker_F[j]==IS_AVAILABLE) {
844 marker_F[j]=TRUE;
845 lambda[j]=IS_NOT_AVAILABLE;
846 for (jptr=S->ptr[j];jptr<S->ptr[j+1]; ++jptr) {
847 k=S->index[jptr];
848 if(marker_F[k]==IS_AVAILABLE) {
849 lambda[k]++;
850 }
851 }
852 }
853 }
854 for (iptr=S->ptr[i];iptr<S->ptr[i+1]; ++iptr) {
855 j=S->index[iptr];
856 if(marker_F[j]==IS_AVAILABLE) {
857 lambda[j]--;
858 }
859 }
860 }
861
862 /* Used when transpose of S is not available */
863 /*
864 for (i=0;i<n;++i) {
865 if(marker_F[i]==IS_AVAILABLE) {
866 if (i==index_maxlambda) {
867 marker_F[index_maxlambda]=FALSE;
868 lambda[index_maxlambda]=-1;
869 for (j=0;j<n;++j) {
870 if(marker_F[j]==IS_AVAILABLE) {
871 index=&(S->index[S->ptr[j]]);
872 where_p=(index_t*)bsearch(&i,
873 index,
874 S->ptr[j + 1]-S->ptr[j],
875 sizeof(index_t),
876 Paso_comparIndex);
877 if (where_p!=NULL) {
878 marker_F[j]=TRUE;
879 lambda[j]=-1;
880 for (iptr=S->ptr[j];iptr<S->ptr[j+1]; ++iptr) {
881 k=S->index[iptr];
882 if(marker_F[k]==IS_AVAILABLE) {
883 lambda[k]++;
884 }
885 }
886 }
887
888 }
889 }
890
891 }
892 }
893 }
894 */
895 index_maxlambda=arg_max(n,lambda, IS_NOT_AVAILABLE);
896 }
897
898 time0=Esys_timer()-time0;
899 if (verbose) fprintf(stdout,"timing: Loop : %e\n",time0);
900
901 /*Paso_Coarsening_Local_getReport(n,marker_F);*/
902
903 #pragma omp parallel for private(i) schedule(static)
904 for (i=0;i<n;++i)
905 if(marker_F[i]==IS_AVAILABLE) {
906 marker_F[i]=TRUE;
907 }
908
909 /*Paso_Coarsening_Local_getReport(n,marker_F);*/
910
911 TMPMEMFREE(lambda);
912
913 /* clean up */
914 Paso_IndexListArray_free(index_list);
915 Paso_Pattern_free(S);
916
917 /* swap to TRUE/FALSE in marker_F */
918 #pragma omp parallel for private(i) schedule(static)
919 for (i=0;i<n;i++) marker_F[i]=(marker_F[i]==TRUE)? TRUE : FALSE;
920
921 }
922
923
924
925 #undef IS_AVAILABLE
926 #undef IS_NOT_AVAILABLE
927 #undef TRUE
928 #undef FALSE
929
930 #undef IS_UNDECIDED
931 #undef IS_STRONG
932 #undef IS_WEAK
933
934 #undef TRUEB
935 #undef TRUEB
936

  ViewVC Help
Powered by ViewVC 1.1.26