/[escript]/trunk/paso/src/AMG.cpp
ViewVC logotype

Contents of /trunk/paso/src/AMG.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5228 - (show annotations)
Mon Oct 27 23:31:46 2014 UTC (5 years, 7 months ago) by caltinay
File size: 32696 byte(s)
updated UMFPACK def in places...

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17
18 /****************************************************************************/
19
20 /* Paso: AMG preconditioner (local version) */
21
22 /****************************************************************************/
23
24 /* Author: artak@uq.edu.au, l.gross@uq.edu.au */
25
26 /****************************************************************************/
27
28 #define SHOW_TIMING 0
29
30 #include "Paso.h"
31 #include "Preconditioner.h"
32 #include "MergedSolver.h"
33 #include "Options.h"
34 #include "PasoUtil.h"
35 #include "MKL.h"
36
37 namespace paso {
38
39 void Preconditioner_AMG_free(Preconditioner_AMG* in)
40 {
41 if (in!=NULL) {
42 Preconditioner_Smoother_free(in->Smoother);
43 Preconditioner_AMG_free(in->AMG_C);
44 delete[] in->r;
45 delete[] in->x_C;
46 delete[] in->b_C;
47 delete in->merged_solver;
48 delete in;
49 }
50 }
51
52 int Preconditioner_AMG_getMaxLevel(const Preconditioner_AMG* in)
53 {
54 if (in->AMG_C == NULL) {
55 return in->level;
56 } else {
57 return Preconditioner_AMG_getMaxLevel(in->AMG_C);
58 }
59 }
60
61 double Preconditioner_AMG_getCoarseLevelSparsity(const Preconditioner_AMG* in)
62 {
63 if (in->AMG_C == NULL) {
64 if (in->A_C == NULL) {
65 return 1.;
66 } else {
67 return in->A_C->getSparsity();
68 }
69 }
70 return Preconditioner_AMG_getCoarseLevelSparsity(in->AMG_C);
71 }
72
73 dim_t Preconditioner_AMG_getNumCoarseUnknowns(const Preconditioner_AMG* in)
74 {
75 if (in->AMG_C == NULL) {
76 if (in->A_C == NULL) {
77 return 0;
78 } else {
79 return in->A_C->getTotalNumRows();
80 }
81 }
82 return Preconditioner_AMG_getNumCoarseUnknowns(in->AMG_C);
83 }
84
85 /****************************************************************************
86
87 constructs AMG
88
89 *****************************************************************************/
90 Preconditioner_AMG* Preconditioner_AMG_alloc(SystemMatrix_ptr A, int level,
91 Options* options)
92 {
93 Preconditioner_AMG* out = NULL;
94 const bool verbose = options->verbose;
95 const dim_t my_n = A->mainBlock->numRows;
96 const dim_t overlap_n = A->row_coupleBlock->numRows;
97 const dim_t n = my_n + overlap_n;
98 const dim_t n_block = A->row_block_size;
99 const double sparsity = A->getSparsity();
100 const dim_t global_n = A->getGlobalNumRows();
101
102 // is the input matrix A suitable for coarsening?
103 if (sparsity >= options->min_coarse_sparsity ||
104 global_n <= options->min_coarse_matrix_size ||
105 level > options->level_max) {
106 if (verbose) {
107 /*
108 print stopping condition:
109 - 'SPAR' = min_coarse_matrix_sparsity exceeded
110 - 'SIZE' = min_coarse_matrix_size exceeded
111 - 'LEVEL' = level_max exceeded
112 */
113 std::cout << "Preconditioner: AMG: termination of coarsening by ";
114
115 if (sparsity >= options->min_coarse_sparsity)
116 std::cout << "SPAR";
117
118 if (global_n <= options->min_coarse_matrix_size)
119 std::cout << "SIZE";
120
121 if (level > options->level_max)
122 std::cout << "LEVEL";
123
124 std::cout << std::endl << "Preconditioner: AMG level " << level
125 << " (limit = " << options->level_max << ") stopped."
126 << " Sparsity = " << sparsity << " (limit = "
127 << options->min_coarse_sparsity << "), unknowns = " << global_n
128 << " (limit = " << options->min_coarse_matrix_size << ")"
129 << std::endl;
130 }
131 return out;
132 }
133
134 ///// Start Coarsening /////
135
136 double time0 = 0;
137 // this is the table for strong connections combining mainBlock,
138 // col_coupleBlock and row_coupleBlock
139 const dim_t len_S = A->mainBlock->pattern->len +
140 A->col_coupleBlock->pattern->len +
141 A->row_coupleBlock->pattern->len +
142 A->row_coupleBlock->numRows*A->col_coupleBlock->numCols;
143
144 dim_t* degree_S = new dim_t[n];
145 index_t* offset_S = new index_t[n];
146 index_t* S = new index_t[len_S];
147 dim_t* degree_ST = new dim_t[n];
148 index_t* offset_ST = new index_t[n];
149 index_t* ST = new index_t[len_S];
150 AMGBlockSelect* F_marker = new AMGBlockSelect[n];
151 index_t* counter = new index_t[n];
152 const double theta = options->coarsening_threshold;
153 const double tau = options->diagonal_dominance_threshold;
154
155 // make sure that corresponding values in the row_coupleBlock and
156 // col_coupleBlock are identical
157 A->copyColCoupleBlock();
158 A->copyRemoteCoupleBlock(false);
159
160 // set splitting of unknowns
161 time0 = Esys_timer();
162 if (n_block > 1) {
163 Preconditioner_AMG_setStrongConnections_Block(A, degree_S, offset_S,
164 S, theta, tau);
165 } else {
166 Preconditioner_AMG_setStrongConnections(A, degree_S, offset_S, S,
167 theta, tau);
168 }
169 Preconditioner_AMG_transposeStrongConnections(n, degree_S, offset_S, S,
170 n, degree_ST, offset_ST, ST);
171 //A->extendedRowsForST(degree_ST, offset_ST, ST);
172
173 Preconditioner_AMG_CIJPCoarsening(n, my_n, F_marker, degree_S, offset_S, S,
174 degree_ST, offset_ST, ST,
175 A->col_coupler->connector,
176 A->col_distribution);
177
178 // in BoomerAMG if interpolation is used FF connectivity is required
179 //if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
180 // Preconditioner_AMG_enforceFFConnectivity(n, A->pattern->ptr, degree_S, S, F_marker);
181
182 options->coarsening_selection_time = Esys_timer()-time0 +
183 std::max(0., options->coarsening_selection_time);
184 if (Esys_noError()) {
185 #pragma omp parallel for
186 for (dim_t i = 0; i < n; ++i)
187 F_marker[i] = (F_marker[i]==PASO_AMG_IN_F ? PASO_AMG_IN_C : PASO_AMG_IN_F);
188
189 // count number of unknowns to be eliminated:
190 dim_t my_n_F = util::cumsum_maskedTrue(my_n, counter, (int*)F_marker);
191 const dim_t n_F = util::cumsum_maskedTrue(n, counter, (int*)F_marker);
192 // collect my_n_F values on all processes, a direct solver should
193 // be used if any my_n_F value is 0
194 dim_t* F_set = new dim_t[A->mpi_info->size];
195 #ifdef ESYS_MPI
196 MPI_Allgather(&my_n_F, 1, MPI_INT, F_set, 1, MPI_INT, A->mpi_info->comm);
197 #endif
198 dim_t global_n_F = 0;
199 bool F_flag = true;
200 for (dim_t i=0; i<A->mpi_info->size; i++) {
201 global_n_F += F_set[i];
202 if (F_set[i] == 0)
203 F_flag = false;
204 }
205 delete[] F_set;
206
207 const dim_t global_n_C = global_n-global_n_F;
208 if (verbose)
209 std::cout << "Preconditioner: AMG (non-local) level " << level
210 << ": " << global_n_F << " unknowns are flagged for"
211 << " elimination. " << global_n_C << " left." << std::endl;
212
213 //if (n_F == 0) { nasty case. a direct solver should be used!
214 if (F_flag) {
215 out = new Preconditioner_AMG;
216 out->level = level;
217 out->post_sweeps = options->post_sweeps;
218 out->pre_sweeps = options->pre_sweeps;
219 out->r = NULL;
220 out->x_C = NULL;
221 out->b_C = NULL;
222 out->AMG_C = NULL;
223 out->Smoother = NULL;
224 out->merged_solver = NULL;
225 if (Esys_noError()) {
226 out->Smoother = Preconditioner_Smoother_alloc(A,
227 (options->smoother == PASO_JACOBI), 0, verbose);
228
229 if (global_n_C != 0) {
230 index_t* mask_C = new index_t[n];
231 index_t* rows_in_F = new index_t[n_F];
232 // create mask of C nodes with value >-1, gives new id
233 const dim_t n_C = util::cumsum_maskedFalse(n, mask_C,
234 (int*)F_marker);
235 const dim_t my_n_C = my_n-my_n_F;
236 // if nothing has been removed we have a diagonal dominant
237 // matrix and we just run a few steps of the smoother
238
239 out->x_C = new double[n_block*my_n_C];
240 out->b_C = new double[n_block*my_n_C];
241 out->r = new double[n_block*my_n];
242
243 if (Esys_noError()) {
244 // creates index for F
245 #pragma omp parallel for
246 for (dim_t i = 0; i < n; ++i) {
247 if (F_marker[i])
248 rows_in_F[counter[i]] = i;
249 }
250 // get Prolongation
251 time0 = Esys_timer();
252 out->P = Preconditioner_AMG_getProlongation(A,
253 offset_S, degree_S, S, n_C, mask_C,
254 options->interpolation_method);
255 }
256
257 // construct Restriction operator as transposed of
258 // Prolongation operator:
259 if (Esys_noError()) {
260 time0 = Esys_timer();
261 out->R = Preconditioner_AMG_getRestriction(out->P);
262 if (SHOW_TIMING)
263 std::cout << "timing: level " << level
264 << ": getTranspose: " << Esys_timer()-time0
265 << std::endl;
266 }
267 // construct coarse level matrix
268 SystemMatrix_ptr A_C;
269 if (Esys_noError()) {
270 time0 = Esys_timer();
271 A_C = Preconditioner_AMG_buildInterpolationOperator(A, out->P, out->R);
272 if (SHOW_TIMING)
273 std::cout << "timing: level " << level
274 << ": construct coarse matrix: "
275 << Esys_timer()-time0 << std::endl;
276
277 out->AMG_C = Preconditioner_AMG_alloc(A_C, level+1, options);
278 out->A_C = A_C;
279 if (out->AMG_C == NULL) {
280 // merge the system matrix into 1 rank when
281 // it's not suitable coarsening due to the
282 // total number of unknowns being too small
283 out->merged_solver = new MergedSolver(A_C, options);
284 }
285 }
286 delete[] mask_C;
287 delete[] rows_in_F;
288 }
289 }
290 }
291 }
292 delete[] counter;
293 delete[] F_marker;
294 delete[] degree_S;
295 delete[] offset_S;
296 delete[] S;
297 delete[] degree_ST;
298 delete[] offset_ST;
299 delete[] ST;
300
301 if (Esys_noError()) {
302 return out;
303 } else {
304 Preconditioner_AMG_free(out);
305 return NULL;
306 }
307 }
308
309
310 void Preconditioner_AMG_solve(SystemMatrix_ptr A,
311 Preconditioner_AMG* amg, double* x, double* b)
312 {
313 const dim_t n = A->mainBlock->numRows * A->mainBlock->row_block_size;
314 const dim_t post_sweeps=amg->post_sweeps;
315 const dim_t pre_sweeps=amg->pre_sweeps;
316
317 // presmoothing
318 double time0 = Esys_timer();
319 Preconditioner_Smoother_solve(A, amg->Smoother, x, b, pre_sweeps, false);
320 time0 = Esys_timer()-time0;
321 if (SHOW_TIMING)
322 std::cout << "timing: level " << amg->level << ": Presmoothing: "
323 << time0 << std::endl;
324 // end of presmoothing
325
326 time0=Esys_timer();
327 // r <- b
328 util::copy(n, amg->r, b);
329 // r = r-Ax
330 SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r);
331 // b_C = R*r
332 SystemMatrix_MatrixVector_CSR_OFFSET0(1., amg->R, amg->r, 0., amg->b_C);
333 time0 = Esys_timer()-time0;
334
335 // coarse level solve
336 if (amg->AMG_C == NULL) {
337 time0 = Esys_timer();
338 // A_C is the coarsest level
339 amg->merged_solver->solve(amg->x_C, amg->b_C);
340 if (SHOW_TIMING)
341 std::cout << "timing: level " << amg->level << ": DIRECT SOLVER: "
342 << Esys_timer()-time0 << std::endl;
343 } else {
344 // x_C = AMG(b_C)
345 Preconditioner_AMG_solve(amg->A_C, amg->AMG_C, amg->x_C, amg->b_C);
346 }
347
348 time0 = time0+Esys_timer();
349 // x = x + P*x_c
350 SystemMatrix_MatrixVector_CSR_OFFSET0(1., amg->P, amg->x_C, 1., x);
351
352 // postsmoothing:
353 // solve Ax=b with initial guess x
354 time0 = Esys_timer();
355 Preconditioner_Smoother_solve(A, amg->Smoother, x, b, post_sweeps, true);
356 time0 = Esys_timer()-time0;
357 if (SHOW_TIMING)
358 std::cout << "timing: level " << amg->level << ": Postsmoothing: "
359 << time0 << std::endl;
360 }
361
362 /// theta = threshold for strong connections
363 /// tau = threshold for diagonal dominance
364 ///
365 /// S_i={j \in N_i; i strongly coupled to j}
366 /// in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
367 void Preconditioner_AMG_setStrongConnections(SystemMatrix_ptr A,
368 dim_t* degree_S,
369 index_t* offset_S, index_t* S,
370 double theta, double tau)
371 {
372 const dim_t my_n=A->mainBlock->numRows;
373 const dim_t overlap_n=A->row_coupleBlock->numRows;
374 index_t iptr, i;
375 double *threshold_p=NULL;
376 threshold_p = new double[2*my_n];
377
378 #pragma omp parallel for private(i,iptr) schedule(static)
379 for (i=0;i<my_n;++i) {
380 double max_offdiagonal = 0.;
381 register double sum_row=0;
382 register double main_row=0;
383 register dim_t kdeg=0;
384 register const index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
385
386 // collect information for row i:
387 #pragma ivdep
388 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
389 register index_t j=A->mainBlock->pattern->index[iptr];
390 const double fnorm=std::abs(A->mainBlock->val[iptr]);
391 if(j != i) {
392 max_offdiagonal = std::max(max_offdiagonal,fnorm);
393 sum_row+=fnorm;
394 } else {
395 main_row=fnorm;
396 }
397 }
398
399 #pragma ivdep
400 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
401 const double fnorm = std::abs(A->col_coupleBlock->val[iptr]);
402
403 max_offdiagonal = std::max(max_offdiagonal,fnorm);
404 sum_row+=fnorm;
405 }
406
407 // inspect row i:
408 {
409 const double threshold = theta*max_offdiagonal;
410 threshold_p[2*i+1]=threshold;
411 if (tau*main_row < sum_row) { // no diagonal dominance
412 threshold_p[2*i]=1;
413 #pragma ivdep
414 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
415 const index_t j=A->mainBlock->pattern->index[iptr];
416 if(std::abs(A->mainBlock->val[iptr])>threshold && i!=j) {
417 S[koffset+kdeg] = j;
418 kdeg++;
419 }
420 }
421 #pragma ivdep
422 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
423 const index_t j=A->col_coupleBlock->pattern->index[iptr];
424 if(std::abs(A->col_coupleBlock->val[iptr])>threshold) {
425 S[koffset+kdeg] = j + my_n;
426 kdeg++;
427 }
428 }
429 } else {
430 threshold_p[2*i]=-1;
431 }
432 }
433 offset_S[i]=koffset;
434 degree_S[i]=kdeg;
435 }
436
437 // now we need to distribute the threshold and the diagonal dominance
438 // indicator
439 if (A->mpi_info->size > 1) {
440 const index_t koffset_0 =
441 A->mainBlock->pattern->ptr[my_n]+A->col_coupleBlock->pattern->ptr[my_n]
442 -A->mainBlock->pattern->ptr[0]-A->col_coupleBlock->pattern->ptr[0];
443
444 Coupler_ptr threshold_coupler(new Coupler(A->row_coupler->connector, 2));
445 threshold_coupler->startCollect(threshold_p);
446 double* remote_threshold = threshold_coupler->finishCollect();
447
448 #pragma omp parallel for private(i,iptr) schedule(static)
449 for (i=0; i<overlap_n; i++) {
450 const double threshold = remote_threshold[2*i+1];
451 register dim_t kdeg=0;
452 register const index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i]+A->remote_coupleBlock->pattern->ptr[i];
453 if (remote_threshold[2*i]>0) {
454 #pragma ivdep
455 for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {
456 register index_t j=A->row_coupleBlock->pattern->index[iptr];
457 if(std::abs(A->row_coupleBlock->val[iptr])>threshold) {
458 S[koffset+kdeg] = j ;
459 kdeg++;
460 }
461 }
462
463 #pragma ivdep
464 for (iptr=A->remote_coupleBlock->pattern->ptr[i];iptr<A->remote_coupleBlock->pattern->ptr[i+1]; iptr++) {
465 register index_t j=A->remote_coupleBlock->pattern->index[iptr];
466 if(std::abs(A->remote_coupleBlock->val[iptr])>threshold && i!=j) {
467 S[koffset+kdeg] = j + my_n;
468 kdeg++;
469 }
470 }
471 }
472 offset_S[i+my_n]=koffset;
473 degree_S[i+my_n]=kdeg;
474 }
475 }
476 delete[] threshold_p;
477 }
478
479 // theta = threshold for strong connections
480 // tau = threshold for diagonal dominance
481 // S_i={j \in N_i; i strongly coupled to j}
482 // in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
483 void Preconditioner_AMG_setStrongConnections_Block(SystemMatrix_ptr A,
484 dim_t* degree_S,
485 index_t* offset_S,
486 index_t* S,
487 double theta, double tau)
488 {
489 const dim_t block_size=A->block_size;
490 const dim_t my_n=A->mainBlock->numRows;
491 const dim_t overlap_n=A->row_coupleBlock->numRows;
492 double* threshold_p = new double[2*my_n];
493 index_t iptr, i, bi;
494
495 #pragma omp parallel private(i,iptr,bi)
496 {
497 dim_t max_deg=0;
498
499 #pragma omp for schedule(static)
500 for (i=0; i<my_n; ++i)
501 max_deg=std::max(max_deg, A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]
502 +A->col_coupleBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i]);
503
504 double* rtmp = new double[max_deg];
505
506 #pragma omp for schedule(static)
507 for (i=0;i<my_n;++i) {
508 double max_offdiagonal = 0.;
509 register double sum_row=0;
510 register double main_row=0;
511 register index_t rtmp_offset=-A->mainBlock->pattern->ptr[i];
512 register dim_t kdeg=0;
513 const index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
514
515 /* collect information for row i: */
516 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
517 const index_t j=A->mainBlock->pattern->index[iptr];
518 double fnorm=0;
519 #pragma ivdep
520 for(bi=0;bi<block_size;++bi) {
521 const double rtmp2 = A->mainBlock->val[iptr*block_size+bi];
522 fnorm+=rtmp2*rtmp2;
523 }
524 fnorm=sqrt(fnorm);
525 rtmp[iptr+rtmp_offset]=fnorm;
526
527 if( j != i) {
528 max_offdiagonal = std::max(max_offdiagonal,fnorm);
529 sum_row+=fnorm;
530 } else {
531 main_row=fnorm;
532 }
533 }
534 rtmp_offset+=A->mainBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i];
535 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
536 double fnorm=0;
537 #pragma ivdep
538 for(bi=0;bi<block_size;++bi) {
539 const double rtmp2 = A->col_coupleBlock->val[iptr*block_size+bi];
540 fnorm+=rtmp2*rtmp2;
541 }
542 fnorm=sqrt(fnorm);
543
544 rtmp[iptr+rtmp_offset]=fnorm;
545 max_offdiagonal = std::max(max_offdiagonal,fnorm);
546 sum_row+=fnorm;
547 }
548
549 // inspect row i:
550 {
551 const double threshold = theta*max_offdiagonal;
552 rtmp_offset=-A->mainBlock->pattern->ptr[i];
553
554 threshold_p[2*i+1]=threshold;
555 if (tau*main_row < sum_row) { /* no diagonal dominance */
556 threshold_p[2*i]=1;
557 #pragma ivdep
558 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
559 const index_t j=A->mainBlock->pattern->index[iptr];
560 if(rtmp[iptr+rtmp_offset] > threshold && i!=j) {
561 S[koffset+kdeg] = j;
562 kdeg++;
563 }
564 }
565 rtmp_offset+=A->mainBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i];
566 #pragma ivdep
567 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
568 const index_t j=A->col_coupleBlock->pattern->index[iptr];
569 if( rtmp[iptr+rtmp_offset] >threshold) {
570 S[koffset+kdeg] = j + my_n;
571 kdeg++;
572 }
573 }
574 } else {
575 threshold_p[2*i]=-1;
576 }
577 }
578 offset_S[i]=koffset;
579 degree_S[i]=kdeg;
580 }
581 delete[] rtmp;
582 } // parallel section
583
584 // now we need to distribute the threshold and the diagonal dominance
585 // indicator
586 if (A->mpi_info->size > 1) {
587 const index_t koffset_0 =
588 A->mainBlock->pattern->ptr[my_n]+A->col_coupleBlock->pattern->ptr[my_n]
589 -A->mainBlock->pattern->ptr[0]-A->col_coupleBlock->pattern->ptr[0];
590
591 Coupler_ptr threshold_coupler(new Coupler(A->row_coupler->connector, 2));
592 threshold_coupler->startCollect(threshold_p);
593 double* remote_threshold = threshold_coupler->finishCollect();
594
595 #pragma omp parallel for private(i,iptr) schedule(static)
596 for (i=0; i<overlap_n; i++) {
597 const double threshold2 = remote_threshold[2*i+1]*remote_threshold[2*i+1];
598 register dim_t kdeg=0;
599 const index_t koffset = koffset_0+A->row_coupleBlock->pattern->ptr[i]+A->remote_coupleBlock->pattern->ptr[i];
600 if (remote_threshold[2*i]>0) {
601 #pragma ivdep
602 for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {
603 const index_t j=A->row_coupleBlock->pattern->index[iptr];
604 double fnorm2=0;
605 #pragma ivdep
606 for(bi=0;bi<block_size;++bi) {
607 const double rtmp2 = A->row_coupleBlock->val[iptr*block_size+bi];
608 fnorm2+=rtmp2*rtmp2;
609 }
610
611 if(fnorm2 > threshold2 ) {
612 S[koffset+kdeg] = j ;
613 kdeg++;
614 }
615 }
616
617 #pragma ivdep
618 for (iptr=A->remote_coupleBlock->pattern->ptr[i];iptr<A->remote_coupleBlock->pattern->ptr[i+1]; ++iptr) {
619 const index_t j=A->remote_coupleBlock->pattern->index[iptr];
620 double fnorm2 = 0.;
621 #pragma ivdep
622 for (bi=0; bi<block_size; ++bi) {
623 const double v = A->remote_coupleBlock->val[iptr*block_size+bi];
624 fnorm2 += v*v;
625 }
626 if (fnorm2 > threshold2 && i != j) {
627 S[koffset+kdeg] = j + my_n;
628 kdeg++;
629 }
630 }
631 }
632 offset_S[i+my_n]=koffset;
633 degree_S[i+my_n]=kdeg;
634 }
635 }
636 delete[] threshold_p;
637 }
638
639 void Preconditioner_AMG_transposeStrongConnections(dim_t n,
640 const dim_t* degree_S, const index_t* offset_S, const index_t* S,
641 const dim_t nT, dim_t* degree_ST, index_t* offset_ST, index_t* ST)
642 {
643 #pragma omp parallel for
644 for (index_t i=0; i<nT; ++i) {
645 degree_ST[i]=0;
646 }
647 for (index_t i=0; i<n ;++i) {
648 for (dim_t p=0; p<degree_S[i]; ++p)
649 degree_ST[S[offset_S[i]+p]]++;
650 }
651 dim_t len=0;
652 for (index_t i=0; i<nT; ++i) {
653 offset_ST[i]=len;
654 len+=degree_ST[i];
655 degree_ST[i]=0;
656 }
657 for (index_t i=0; i<n; ++i) {
658 for (dim_t p=0; p<degree_S[i]; ++p) {
659 const index_t j = S[offset_S[i]+p];
660 ST[offset_ST[j]+degree_ST[j]]=i;
661 degree_ST[j]++;
662 }
663 }
664 }
665
666 void Preconditioner_AMG_CIJPCoarsening(dim_t n, dim_t my_n,
667 AMGBlockSelect* split_marker,
668 const dim_t* degree_S,
669 const index_t* offset_S,
670 const index_t* S,
671 const dim_t* degree_ST,
672 const index_t* offset_ST,
673 const index_t* ST,
674 const_Connector_ptr col_connector,
675 const_Distribution_ptr col_dist)
676 {
677 Coupler_ptr w_coupler(new Coupler(col_connector, 1));
678 double* w = new double[n];
679 double* Status = new double[n];
680 double* random = col_dist->createRandomVector(1);
681 index_t* ST_flag = new index_t[offset_ST[n-1] + degree_ST[n-1]];
682 dim_t i, numUndefined, iter=0;
683 index_t iptr, jptr, kptr;
684
685 #pragma omp parallel for private(i)
686 for (i=0; i<my_n; ++i) {
687 w[i]=degree_ST[i]+random[i];
688 if (degree_ST[i] < 1) {
689 Status[i]=-100; // F point
690 } else {
691 Status[i]=1; // status undefined
692 }
693 }
694
695 #pragma omp parallel for private(i, iptr)
696 for (i=0; i<n; ++i) {
697 for (iptr =0 ; iptr < degree_ST[i]; ++iptr) {
698 ST_flag[offset_ST[i]+iptr] = 1;
699 }
700 }
701
702 numUndefined = col_dist->numPositives(Status, 1);
703 //printf("coarsening loop start: num of undefined rows = %d \n",numUndefined);
704 iter=0;
705 while (numUndefined > 0) {
706 w_coupler->fillOverlap(n, w);
707
708 // calculate the maximum value of neighbours following active strong
709 // connections:
710 // w2[i]=MAX(w[k]) with k in ST[i] or S[i] and (i,k) connection
711 // is still active
712 #pragma omp parallel for private(i, iptr)
713 for (i=0; i<my_n; ++i) {
714 if (Status[i] > 0) { // status is still undefined
715 bool inD = true;
716 const double wi=w[i];
717
718 for (iptr=0; iptr < degree_S[i]; ++iptr) {
719 const index_t k=S[offset_S[i]+iptr];
720 const index_t* start_p = &ST[offset_ST[k]];
721 const index_t* where_p=(index_t*)bsearch(&i, start_p, degree_ST[k], sizeof(index_t), util::comparIndex);
722
723 if (ST_flag[offset_ST[k] + (index_t)(where_p-start_p)]>0) {
724 if (wi <= w[k]) {
725 inD = false;
726 break;
727 }
728 }
729 }
730
731 if (inD) {
732 for (iptr=0; iptr < degree_ST[i]; ++iptr) {
733 const index_t k=ST[offset_ST[i]+iptr];
734 if (ST_flag[offset_ST[i]+iptr] > 0) {
735 if (wi <= w[k]) {
736 inD = false;
737 break;
738 }
739 }
740 }
741 }
742 if (inD) {
743 Status[i]=0.; // is in D
744 }
745 }
746 }
747
748 w_coupler->fillOverlap(n, Status);
749
750 /* remove connection to D points :
751 for each i in D:
752 for each j in S_i:
753 w[j]--
754 ST_tag[j,i]=-1
755 for each j in ST[i]:
756 ST_tag[i,j]=-1
757 for each k in ST[j]:
758 if k in ST[i]:
759 w[j]--;
760 ST_tag[j,k]=-1
761 */
762 // w is updated for local rows only
763 {
764 #pragma omp parallel for private(i, jptr)
765 for (i=0; i<my_n; ++i) {
766 for (jptr=0; jptr<degree_ST[i]; ++jptr) {
767 const index_t j=ST[offset_ST[i]+jptr];
768 if (Status[j] == 0. && ST_flag[offset_ST[i]+jptr] > 0) {
769 w[i]--;
770 ST_flag[offset_ST[i]+jptr] = -1;
771 }
772 }
773 }
774 #pragma omp parallel for private(i, jptr)
775 for (i=my_n; i<n; ++i) {
776 for (jptr=0; jptr<degree_ST[i]; ++jptr) {
777 const index_t j = ST[offset_ST[i]+jptr];
778 if (Status[j] == 0.)
779 ST_flag[offset_ST[i]+jptr] = -1;
780 }
781 }
782
783 for (i=0; i< n; ++i) {
784 if (Status[i] == 0.) {
785 const index_t* start_p = &ST[offset_ST[i]];
786
787 for (jptr=0; jptr<degree_ST[i]; ++jptr) {
788 const index_t j=ST[offset_ST[i]+jptr];
789 ST_flag[offset_ST[i]+jptr]=-1;
790 for (kptr=0; kptr<degree_ST[j]; ++kptr) {
791 const index_t k=ST[offset_ST[j]+kptr];
792 // k in ST[i]?
793 if (bsearch(&k, start_p, degree_ST[i],
794 sizeof(index_t), util::comparIndex)) {
795 if (ST_flag[offset_ST[j]+kptr] > 0) {
796 if (j< my_n) {
797 w[j]--;
798 }
799 ST_flag[offset_ST[j]+kptr]=-1;
800 }
801 }
802 }
803 }
804 }
805 }
806 }
807
808 // adjust status
809 #pragma omp parallel for private(i)
810 for (i=0; i< my_n; ++i) {
811 if ( Status[i] == 0. ) {
812 Status[i] = -10; // this is now a C point
813 } else if (Status[i] == 1. && w[i]<1.) {
814 Status[i] = -100; // this is now a F point
815 }
816 }
817
818 i = numUndefined;
819 numUndefined = col_dist->numPositives(Status, 1);
820 if (numUndefined == i) {
821 Esys_setError(SYSTEM_ERROR, "Can NOT reduce numUndefined.");
822 return;
823 }
824
825 iter++;
826 //printf("coarsening loop %d: num of undefined rows = %d \n",iter, numUndefined);
827
828 } // end of while loop
829
830 // map to output
831 w_coupler->fillOverlap(n, Status);
832 #pragma omp parallel for private(i)
833 for (i=0; i< n; ++i) {
834 if (Status[i] > -50.) {
835 split_marker[i]=PASO_AMG_IN_C;
836 } else {
837 split_marker[i]=PASO_AMG_IN_F;
838 }
839 }
840 delete[] random;
841 delete[] w;
842 delete[] Status;
843 delete[] ST_flag;
844 }
845
846 } // namespace paso
847

Properties

Name Value
svn:mergeinfo /branches/amg_from_3530/paso/src/AMG.cpp:3531-3826 /branches/diaplayground/paso/src/AMG.cpp:4940-5147 /branches/lapack2681/paso/src/AMG.cpp:2682-2741 /branches/pasowrap/paso/src/AMG.cpp:3661-3674 /branches/py3_attempt2/paso/src/AMG.cpp:3871-3891 /branches/restext/paso/src/AMG.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/AMG.cpp:3669-3791 /branches/stage3.0/paso/src/AMG.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/AMG.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/AMG.cpp:3517-3974 /release/3.0/paso/src/AMG.cpp:2591-2601 /trunk/paso/src/AMG.cpp:4257-4344 /trunk/ripley/test/python/paso/src/AMG.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26