/[escript]/trunk/paso/src/AMG.cpp
ViewVC logotype

Contents of /trunk/paso/src/AMG.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5265 - (show annotations)
Mon Nov 17 04:59:25 2014 UTC (5 years, 4 months ago) by jfenwick
File size: 32716 byte(s)
Adding missing include which clang noticed.
1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17
18 /****************************************************************************/
19
20 /* Paso: AMG preconditioner (local version) */
21
22 /****************************************************************************/
23
24 /* Author: artak@uq.edu.au, l.gross@uq.edu.au */
25
26 /****************************************************************************/
27
28 #define SHOW_TIMING 0
29
30 #include <iostream>
31 #include "Paso.h"
32 #include "Preconditioner.h"
33 #include "MergedSolver.h"
34 #include "Options.h"
35 #include "PasoUtil.h"
36 #include "MKL.h"
37
38 namespace paso {
39
40 void Preconditioner_AMG_free(Preconditioner_AMG* in)
41 {
42 if (in!=NULL) {
43 Preconditioner_Smoother_free(in->Smoother);
44 Preconditioner_AMG_free(in->AMG_C);
45 delete[] in->r;
46 delete[] in->x_C;
47 delete[] in->b_C;
48 delete in->merged_solver;
49 delete in;
50 }
51 }
52
53 int Preconditioner_AMG_getMaxLevel(const Preconditioner_AMG* in)
54 {
55 if (in->AMG_C == NULL) {
56 return in->level;
57 } else {
58 return Preconditioner_AMG_getMaxLevel(in->AMG_C);
59 }
60 }
61
62 double Preconditioner_AMG_getCoarseLevelSparsity(const Preconditioner_AMG* in)
63 {
64 if (in->AMG_C == NULL) {
65 if (in->A_C == NULL) {
66 return 1.;
67 } else {
68 return in->A_C->getSparsity();
69 }
70 }
71 return Preconditioner_AMG_getCoarseLevelSparsity(in->AMG_C);
72 }
73
74 dim_t Preconditioner_AMG_getNumCoarseUnknowns(const Preconditioner_AMG* in)
75 {
76 if (in->AMG_C == NULL) {
77 if (in->A_C == NULL) {
78 return 0;
79 } else {
80 return in->A_C->getTotalNumRows();
81 }
82 }
83 return Preconditioner_AMG_getNumCoarseUnknowns(in->AMG_C);
84 }
85
86 /****************************************************************************
87
88 constructs AMG
89
90 *****************************************************************************/
91 Preconditioner_AMG* Preconditioner_AMG_alloc(SystemMatrix_ptr A, int level,
92 Options* options)
93 {
94 Preconditioner_AMG* out = NULL;
95 const bool verbose = options->verbose;
96 const dim_t my_n = A->mainBlock->numRows;
97 const dim_t overlap_n = A->row_coupleBlock->numRows;
98 const dim_t n = my_n + overlap_n;
99 const dim_t n_block = A->row_block_size;
100 const double sparsity = A->getSparsity();
101 const dim_t global_n = A->getGlobalNumRows();
102
103 // is the input matrix A suitable for coarsening?
104 if (sparsity >= options->min_coarse_sparsity ||
105 global_n <= options->min_coarse_matrix_size ||
106 level > options->level_max) {
107 if (verbose) {
108 /*
109 print stopping condition:
110 - 'SPAR' = min_coarse_matrix_sparsity exceeded
111 - 'SIZE' = min_coarse_matrix_size exceeded
112 - 'LEVEL' = level_max exceeded
113 */
114 std::cout << "Preconditioner: AMG: termination of coarsening by ";
115
116 if (sparsity >= options->min_coarse_sparsity)
117 std::cout << "SPAR";
118
119 if (global_n <= options->min_coarse_matrix_size)
120 std::cout << "SIZE";
121
122 if (level > options->level_max)
123 std::cout << "LEVEL";
124
125 std::cout << std::endl << "Preconditioner: AMG level " << level
126 << " (limit = " << options->level_max << ") stopped."
127 << " Sparsity = " << sparsity << " (limit = "
128 << options->min_coarse_sparsity << "), unknowns = " << global_n
129 << " (limit = " << options->min_coarse_matrix_size << ")"
130 << std::endl;
131 }
132 return out;
133 }
134
135 ///// Start Coarsening /////
136
137 double time0 = 0;
138 // this is the table for strong connections combining mainBlock,
139 // col_coupleBlock and row_coupleBlock
140 const dim_t len_S = A->mainBlock->pattern->len +
141 A->col_coupleBlock->pattern->len +
142 A->row_coupleBlock->pattern->len +
143 A->row_coupleBlock->numRows*A->col_coupleBlock->numCols;
144
145 dim_t* degree_S = new dim_t[n];
146 index_t* offset_S = new index_t[n];
147 index_t* S = new index_t[len_S];
148 dim_t* degree_ST = new dim_t[n];
149 index_t* offset_ST = new index_t[n];
150 index_t* ST = new index_t[len_S];
151 AMGBlockSelect* F_marker = new AMGBlockSelect[n];
152 index_t* counter = new index_t[n];
153 const double theta = options->coarsening_threshold;
154 const double tau = options->diagonal_dominance_threshold;
155
156 // make sure that corresponding values in the row_coupleBlock and
157 // col_coupleBlock are identical
158 A->copyColCoupleBlock();
159 A->copyRemoteCoupleBlock(false);
160
161 // set splitting of unknowns
162 time0 = Esys_timer();
163 if (n_block > 1) {
164 Preconditioner_AMG_setStrongConnections_Block(A, degree_S, offset_S,
165 S, theta, tau);
166 } else {
167 Preconditioner_AMG_setStrongConnections(A, degree_S, offset_S, S,
168 theta, tau);
169 }
170 Preconditioner_AMG_transposeStrongConnections(n, degree_S, offset_S, S,
171 n, degree_ST, offset_ST, ST);
172 //A->extendedRowsForST(degree_ST, offset_ST, ST);
173
174 Preconditioner_AMG_CIJPCoarsening(n, my_n, F_marker, degree_S, offset_S, S,
175 degree_ST, offset_ST, ST,
176 A->col_coupler->connector,
177 A->col_distribution);
178
179 // in BoomerAMG if interpolation is used FF connectivity is required
180 //if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
181 // Preconditioner_AMG_enforceFFConnectivity(n, A->pattern->ptr, degree_S, S, F_marker);
182
183 options->coarsening_selection_time = Esys_timer()-time0 +
184 std::max(0., options->coarsening_selection_time);
185 if (Esys_noError()) {
186 #pragma omp parallel for
187 for (dim_t i = 0; i < n; ++i)
188 F_marker[i] = (F_marker[i]==PASO_AMG_IN_F ? PASO_AMG_IN_C : PASO_AMG_IN_F);
189
190 // count number of unknowns to be eliminated:
191 dim_t my_n_F = util::cumsum_maskedTrue(my_n, counter, (int*)F_marker);
192 const dim_t n_F = util::cumsum_maskedTrue(n, counter, (int*)F_marker);
193 // collect my_n_F values on all processes, a direct solver should
194 // be used if any my_n_F value is 0
195 dim_t* F_set = new dim_t[A->mpi_info->size];
196 #ifdef ESYS_MPI
197 MPI_Allgather(&my_n_F, 1, MPI_INT, F_set, 1, MPI_INT, A->mpi_info->comm);
198 #endif
199 dim_t global_n_F = 0;
200 bool F_flag = true;
201 for (dim_t i=0; i<A->mpi_info->size; i++) {
202 global_n_F += F_set[i];
203 if (F_set[i] == 0)
204 F_flag = false;
205 }
206 delete[] F_set;
207
208 const dim_t global_n_C = global_n-global_n_F;
209 if (verbose)
210 std::cout << "Preconditioner: AMG (non-local) level " << level
211 << ": " << global_n_F << " unknowns are flagged for"
212 << " elimination. " << global_n_C << " left." << std::endl;
213
214 //if (n_F == 0) { nasty case. a direct solver should be used!
215 if (F_flag) {
216 out = new Preconditioner_AMG;
217 out->level = level;
218 out->post_sweeps = options->post_sweeps;
219 out->pre_sweeps = options->pre_sweeps;
220 out->r = NULL;
221 out->x_C = NULL;
222 out->b_C = NULL;
223 out->AMG_C = NULL;
224 out->Smoother = NULL;
225 out->merged_solver = NULL;
226 if (Esys_noError()) {
227 out->Smoother = Preconditioner_Smoother_alloc(A,
228 (options->smoother == PASO_JACOBI), 0, verbose);
229
230 if (global_n_C != 0) {
231 index_t* mask_C = new index_t[n];
232 index_t* rows_in_F = new index_t[n_F];
233 // create mask of C nodes with value >-1, gives new id
234 const dim_t n_C = util::cumsum_maskedFalse(n, mask_C,
235 (int*)F_marker);
236 const dim_t my_n_C = my_n-my_n_F;
237 // if nothing has been removed we have a diagonal dominant
238 // matrix and we just run a few steps of the smoother
239
240 out->x_C = new double[n_block*my_n_C];
241 out->b_C = new double[n_block*my_n_C];
242 out->r = new double[n_block*my_n];
243
244 if (Esys_noError()) {
245 // creates index for F
246 #pragma omp parallel for
247 for (dim_t i = 0; i < n; ++i) {
248 if (F_marker[i])
249 rows_in_F[counter[i]] = i;
250 }
251 // get Prolongation
252 time0 = Esys_timer();
253 out->P = Preconditioner_AMG_getProlongation(A,
254 offset_S, degree_S, S, n_C, mask_C,
255 options->interpolation_method);
256 }
257
258 // construct Restriction operator as transposed of
259 // Prolongation operator:
260 if (Esys_noError()) {
261 time0 = Esys_timer();
262 out->R = Preconditioner_AMG_getRestriction(out->P);
263 if (SHOW_TIMING)
264 std::cout << "timing: level " << level
265 << ": getTranspose: " << Esys_timer()-time0
266 << std::endl;
267 }
268 // construct coarse level matrix
269 SystemMatrix_ptr A_C;
270 if (Esys_noError()) {
271 time0 = Esys_timer();
272 A_C = Preconditioner_AMG_buildInterpolationOperator(A, out->P, out->R);
273 if (SHOW_TIMING)
274 std::cout << "timing: level " << level
275 << ": construct coarse matrix: "
276 << Esys_timer()-time0 << std::endl;
277
278 out->AMG_C = Preconditioner_AMG_alloc(A_C, level+1, options);
279 out->A_C = A_C;
280 if (out->AMG_C == NULL) {
281 // merge the system matrix into 1 rank when
282 // it's not suitable coarsening due to the
283 // total number of unknowns being too small
284 out->merged_solver = new MergedSolver(A_C, options);
285 }
286 }
287 delete[] mask_C;
288 delete[] rows_in_F;
289 }
290 }
291 }
292 }
293 delete[] counter;
294 delete[] F_marker;
295 delete[] degree_S;
296 delete[] offset_S;
297 delete[] S;
298 delete[] degree_ST;
299 delete[] offset_ST;
300 delete[] ST;
301
302 if (Esys_noError()) {
303 return out;
304 } else {
305 Preconditioner_AMG_free(out);
306 return NULL;
307 }
308 }
309
310
311 void Preconditioner_AMG_solve(SystemMatrix_ptr A,
312 Preconditioner_AMG* amg, double* x, double* b)
313 {
314 const dim_t n = A->mainBlock->numRows * A->mainBlock->row_block_size;
315 const dim_t post_sweeps=amg->post_sweeps;
316 const dim_t pre_sweeps=amg->pre_sweeps;
317
318 // presmoothing
319 double time0 = Esys_timer();
320 Preconditioner_Smoother_solve(A, amg->Smoother, x, b, pre_sweeps, false);
321 time0 = Esys_timer()-time0;
322 if (SHOW_TIMING)
323 std::cout << "timing: level " << amg->level << ": Presmoothing: "
324 << time0 << std::endl;
325 // end of presmoothing
326
327 time0=Esys_timer();
328 // r <- b
329 util::copy(n, amg->r, b);
330 // r = r-Ax
331 SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r);
332 // b_C = R*r
333 SystemMatrix_MatrixVector_CSR_OFFSET0(1., amg->R, amg->r, 0., amg->b_C);
334 time0 = Esys_timer()-time0;
335
336 // coarse level solve
337 if (amg->AMG_C == NULL) {
338 time0 = Esys_timer();
339 // A_C is the coarsest level
340 amg->merged_solver->solve(amg->x_C, amg->b_C);
341 if (SHOW_TIMING)
342 std::cout << "timing: level " << amg->level << ": DIRECT SOLVER: "
343 << Esys_timer()-time0 << std::endl;
344 } else {
345 // x_C = AMG(b_C)
346 Preconditioner_AMG_solve(amg->A_C, amg->AMG_C, amg->x_C, amg->b_C);
347 }
348
349 time0 = time0+Esys_timer();
350 // x = x + P*x_c
351 SystemMatrix_MatrixVector_CSR_OFFSET0(1., amg->P, amg->x_C, 1., x);
352
353 // postsmoothing:
354 // solve Ax=b with initial guess x
355 time0 = Esys_timer();
356 Preconditioner_Smoother_solve(A, amg->Smoother, x, b, post_sweeps, true);
357 time0 = Esys_timer()-time0;
358 if (SHOW_TIMING)
359 std::cout << "timing: level " << amg->level << ": Postsmoothing: "
360 << time0 << std::endl;
361 }
362
363 /// theta = threshold for strong connections
364 /// tau = threshold for diagonal dominance
365 ///
366 /// S_i={j \in N_i; i strongly coupled to j}
367 /// in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
368 void Preconditioner_AMG_setStrongConnections(SystemMatrix_ptr A,
369 dim_t* degree_S,
370 index_t* offset_S, index_t* S,
371 double theta, double tau)
372 {
373 const dim_t my_n=A->mainBlock->numRows;
374 const dim_t overlap_n=A->row_coupleBlock->numRows;
375 index_t iptr, i;
376 double *threshold_p=NULL;
377 threshold_p = new double[2*my_n];
378
379 #pragma omp parallel for private(i,iptr) schedule(static)
380 for (i=0;i<my_n;++i) {
381 double max_offdiagonal = 0.;
382 register double sum_row=0;
383 register double main_row=0;
384 register dim_t kdeg=0;
385 register const index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
386
387 // collect information for row i:
388 #pragma ivdep
389 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
390 register index_t j=A->mainBlock->pattern->index[iptr];
391 const double fnorm=std::abs(A->mainBlock->val[iptr]);
392 if(j != i) {
393 max_offdiagonal = std::max(max_offdiagonal,fnorm);
394 sum_row+=fnorm;
395 } else {
396 main_row=fnorm;
397 }
398 }
399
400 #pragma ivdep
401 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
402 const double fnorm = std::abs(A->col_coupleBlock->val[iptr]);
403
404 max_offdiagonal = std::max(max_offdiagonal,fnorm);
405 sum_row+=fnorm;
406 }
407
408 // inspect row i:
409 {
410 const double threshold = theta*max_offdiagonal;
411 threshold_p[2*i+1]=threshold;
412 if (tau*main_row < sum_row) { // no diagonal dominance
413 threshold_p[2*i]=1;
414 #pragma ivdep
415 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
416 const index_t j=A->mainBlock->pattern->index[iptr];
417 if(std::abs(A->mainBlock->val[iptr])>threshold && i!=j) {
418 S[koffset+kdeg] = j;
419 kdeg++;
420 }
421 }
422 #pragma ivdep
423 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
424 const index_t j=A->col_coupleBlock->pattern->index[iptr];
425 if(std::abs(A->col_coupleBlock->val[iptr])>threshold) {
426 S[koffset+kdeg] = j + my_n;
427 kdeg++;
428 }
429 }
430 } else {
431 threshold_p[2*i]=-1;
432 }
433 }
434 offset_S[i]=koffset;
435 degree_S[i]=kdeg;
436 }
437
438 // now we need to distribute the threshold and the diagonal dominance
439 // indicator
440 if (A->mpi_info->size > 1) {
441 const index_t koffset_0 =
442 A->mainBlock->pattern->ptr[my_n]+A->col_coupleBlock->pattern->ptr[my_n]
443 -A->mainBlock->pattern->ptr[0]-A->col_coupleBlock->pattern->ptr[0];
444
445 Coupler_ptr threshold_coupler(new Coupler(A->row_coupler->connector, 2));
446 threshold_coupler->startCollect(threshold_p);
447 double* remote_threshold = threshold_coupler->finishCollect();
448
449 #pragma omp parallel for private(i,iptr) schedule(static)
450 for (i=0; i<overlap_n; i++) {
451 const double threshold = remote_threshold[2*i+1];
452 register dim_t kdeg=0;
453 register const index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i]+A->remote_coupleBlock->pattern->ptr[i];
454 if (remote_threshold[2*i]>0) {
455 #pragma ivdep
456 for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {
457 register index_t j=A->row_coupleBlock->pattern->index[iptr];
458 if(std::abs(A->row_coupleBlock->val[iptr])>threshold) {
459 S[koffset+kdeg] = j ;
460 kdeg++;
461 }
462 }
463
464 #pragma ivdep
465 for (iptr=A->remote_coupleBlock->pattern->ptr[i];iptr<A->remote_coupleBlock->pattern->ptr[i+1]; iptr++) {
466 register index_t j=A->remote_coupleBlock->pattern->index[iptr];
467 if(std::abs(A->remote_coupleBlock->val[iptr])>threshold && i!=j) {
468 S[koffset+kdeg] = j + my_n;
469 kdeg++;
470 }
471 }
472 }
473 offset_S[i+my_n]=koffset;
474 degree_S[i+my_n]=kdeg;
475 }
476 }
477 delete[] threshold_p;
478 }
479
480 // theta = threshold for strong connections
481 // tau = threshold for diagonal dominance
482 // S_i={j \in N_i; i strongly coupled to j}
483 // in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
484 void Preconditioner_AMG_setStrongConnections_Block(SystemMatrix_ptr A,
485 dim_t* degree_S,
486 index_t* offset_S,
487 index_t* S,
488 double theta, double tau)
489 {
490 const dim_t block_size=A->block_size;
491 const dim_t my_n=A->mainBlock->numRows;
492 const dim_t overlap_n=A->row_coupleBlock->numRows;
493 double* threshold_p = new double[2*my_n];
494 index_t iptr, i, bi;
495
496 #pragma omp parallel private(i,iptr,bi)
497 {
498 dim_t max_deg=0;
499
500 #pragma omp for schedule(static)
501 for (i=0; i<my_n; ++i)
502 max_deg=std::max(max_deg, A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]
503 +A->col_coupleBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i]);
504
505 double* rtmp = new double[max_deg];
506
507 #pragma omp for schedule(static)
508 for (i=0;i<my_n;++i) {
509 double max_offdiagonal = 0.;
510 register double sum_row=0;
511 register double main_row=0;
512 register index_t rtmp_offset=-A->mainBlock->pattern->ptr[i];
513 register dim_t kdeg=0;
514 const index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
515
516 /* collect information for row i: */
517 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
518 const index_t j=A->mainBlock->pattern->index[iptr];
519 double fnorm=0;
520 #pragma ivdep
521 for(bi=0;bi<block_size;++bi) {
522 const double rtmp2 = A->mainBlock->val[iptr*block_size+bi];
523 fnorm+=rtmp2*rtmp2;
524 }
525 fnorm=sqrt(fnorm);
526 rtmp[iptr+rtmp_offset]=fnorm;
527
528 if( j != i) {
529 max_offdiagonal = std::max(max_offdiagonal,fnorm);
530 sum_row+=fnorm;
531 } else {
532 main_row=fnorm;
533 }
534 }
535 rtmp_offset+=A->mainBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i];
536 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
537 double fnorm=0;
538 #pragma ivdep
539 for(bi=0;bi<block_size;++bi) {
540 const double rtmp2 = A->col_coupleBlock->val[iptr*block_size+bi];
541 fnorm+=rtmp2*rtmp2;
542 }
543 fnorm=sqrt(fnorm);
544
545 rtmp[iptr+rtmp_offset]=fnorm;
546 max_offdiagonal = std::max(max_offdiagonal,fnorm);
547 sum_row+=fnorm;
548 }
549
550 // inspect row i:
551 {
552 const double threshold = theta*max_offdiagonal;
553 rtmp_offset=-A->mainBlock->pattern->ptr[i];
554
555 threshold_p[2*i+1]=threshold;
556 if (tau*main_row < sum_row) { /* no diagonal dominance */
557 threshold_p[2*i]=1;
558 #pragma ivdep
559 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
560 const index_t j=A->mainBlock->pattern->index[iptr];
561 if(rtmp[iptr+rtmp_offset] > threshold && i!=j) {
562 S[koffset+kdeg] = j;
563 kdeg++;
564 }
565 }
566 rtmp_offset+=A->mainBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i];
567 #pragma ivdep
568 for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
569 const index_t j=A->col_coupleBlock->pattern->index[iptr];
570 if( rtmp[iptr+rtmp_offset] >threshold) {
571 S[koffset+kdeg] = j + my_n;
572 kdeg++;
573 }
574 }
575 } else {
576 threshold_p[2*i]=-1;
577 }
578 }
579 offset_S[i]=koffset;
580 degree_S[i]=kdeg;
581 }
582 delete[] rtmp;
583 } // parallel section
584
585 // now we need to distribute the threshold and the diagonal dominance
586 // indicator
587 if (A->mpi_info->size > 1) {
588 const index_t koffset_0 =
589 A->mainBlock->pattern->ptr[my_n]+A->col_coupleBlock->pattern->ptr[my_n]
590 -A->mainBlock->pattern->ptr[0]-A->col_coupleBlock->pattern->ptr[0];
591
592 Coupler_ptr threshold_coupler(new Coupler(A->row_coupler->connector, 2));
593 threshold_coupler->startCollect(threshold_p);
594 double* remote_threshold = threshold_coupler->finishCollect();
595
596 #pragma omp parallel for private(i,iptr) schedule(static)
597 for (i=0; i<overlap_n; i++) {
598 const double threshold2 = remote_threshold[2*i+1]*remote_threshold[2*i+1];
599 register dim_t kdeg=0;
600 const index_t koffset = koffset_0+A->row_coupleBlock->pattern->ptr[i]+A->remote_coupleBlock->pattern->ptr[i];
601 if (remote_threshold[2*i]>0) {
602 #pragma ivdep
603 for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {
604 const index_t j=A->row_coupleBlock->pattern->index[iptr];
605 double fnorm2=0;
606 #pragma ivdep
607 for(bi=0;bi<block_size;++bi) {
608 const double rtmp2 = A->row_coupleBlock->val[iptr*block_size+bi];
609 fnorm2+=rtmp2*rtmp2;
610 }
611
612 if(fnorm2 > threshold2 ) {
613 S[koffset+kdeg] = j ;
614 kdeg++;
615 }
616 }
617
618 #pragma ivdep
619 for (iptr=A->remote_coupleBlock->pattern->ptr[i];iptr<A->remote_coupleBlock->pattern->ptr[i+1]; ++iptr) {
620 const index_t j=A->remote_coupleBlock->pattern->index[iptr];
621 double fnorm2 = 0.;
622 #pragma ivdep
623 for (bi=0; bi<block_size; ++bi) {
624 const double v = A->remote_coupleBlock->val[iptr*block_size+bi];
625 fnorm2 += v*v;
626 }
627 if (fnorm2 > threshold2 && i != j) {
628 S[koffset+kdeg] = j + my_n;
629 kdeg++;
630 }
631 }
632 }
633 offset_S[i+my_n]=koffset;
634 degree_S[i+my_n]=kdeg;
635 }
636 }
637 delete[] threshold_p;
638 }
639
640 void Preconditioner_AMG_transposeStrongConnections(dim_t n,
641 const dim_t* degree_S, const index_t* offset_S, const index_t* S,
642 const dim_t nT, dim_t* degree_ST, index_t* offset_ST, index_t* ST)
643 {
644 #pragma omp parallel for
645 for (index_t i=0; i<nT; ++i) {
646 degree_ST[i]=0;
647 }
648 for (index_t i=0; i<n ;++i) {
649 for (dim_t p=0; p<degree_S[i]; ++p)
650 degree_ST[S[offset_S[i]+p]]++;
651 }
652 dim_t len=0;
653 for (index_t i=0; i<nT; ++i) {
654 offset_ST[i]=len;
655 len+=degree_ST[i];
656 degree_ST[i]=0;
657 }
658 for (index_t i=0; i<n; ++i) {
659 for (dim_t p=0; p<degree_S[i]; ++p) {
660 const index_t j = S[offset_S[i]+p];
661 ST[offset_ST[j]+degree_ST[j]]=i;
662 degree_ST[j]++;
663 }
664 }
665 }
666
667 void Preconditioner_AMG_CIJPCoarsening(dim_t n, dim_t my_n,
668 AMGBlockSelect* split_marker,
669 const dim_t* degree_S,
670 const index_t* offset_S,
671 const index_t* S,
672 const dim_t* degree_ST,
673 const index_t* offset_ST,
674 const index_t* ST,
675 const_Connector_ptr col_connector,
676 const_Distribution_ptr col_dist)
677 {
678 Coupler_ptr w_coupler(new Coupler(col_connector, 1));
679 double* w = new double[n];
680 double* Status = new double[n];
681 double* random = col_dist->createRandomVector(1);
682 index_t* ST_flag = new index_t[offset_ST[n-1] + degree_ST[n-1]];
683 dim_t i, numUndefined, iter=0;
684 index_t iptr, jptr, kptr;
685
686 #pragma omp parallel for private(i)
687 for (i=0; i<my_n; ++i) {
688 w[i]=degree_ST[i]+random[i];
689 if (degree_ST[i] < 1) {
690 Status[i]=-100; // F point
691 } else {
692 Status[i]=1; // status undefined
693 }
694 }
695
696 #pragma omp parallel for private(i, iptr)
697 for (i=0; i<n; ++i) {
698 for (iptr =0 ; iptr < degree_ST[i]; ++iptr) {
699 ST_flag[offset_ST[i]+iptr] = 1;
700 }
701 }
702
703 numUndefined = col_dist->numPositives(Status, 1);
704 //printf("coarsening loop start: num of undefined rows = %d \n",numUndefined);
705 iter=0;
706 while (numUndefined > 0) {
707 w_coupler->fillOverlap(n, w);
708
709 // calculate the maximum value of neighbours following active strong
710 // connections:
711 // w2[i]=MAX(w[k]) with k in ST[i] or S[i] and (i,k) connection
712 // is still active
713 #pragma omp parallel for private(i, iptr)
714 for (i=0; i<my_n; ++i) {
715 if (Status[i] > 0) { // status is still undefined
716 bool inD = true;
717 const double wi=w[i];
718
719 for (iptr=0; iptr < degree_S[i]; ++iptr) {
720 const index_t k=S[offset_S[i]+iptr];
721 const index_t* start_p = &ST[offset_ST[k]];
722 const index_t* where_p=(index_t*)bsearch(&i, start_p, degree_ST[k], sizeof(index_t), util::comparIndex);
723
724 if (ST_flag[offset_ST[k] + (index_t)(where_p-start_p)]>0) {
725 if (wi <= w[k]) {
726 inD = false;
727 break;
728 }
729 }
730 }
731
732 if (inD) {
733 for (iptr=0; iptr < degree_ST[i]; ++iptr) {
734 const index_t k=ST[offset_ST[i]+iptr];
735 if (ST_flag[offset_ST[i]+iptr] > 0) {
736 if (wi <= w[k]) {
737 inD = false;
738 break;
739 }
740 }
741 }
742 }
743 if (inD) {
744 Status[i]=0.; // is in D
745 }
746 }
747 }
748
749 w_coupler->fillOverlap(n, Status);
750
751 /* remove connection to D points :
752 for each i in D:
753 for each j in S_i:
754 w[j]--
755 ST_tag[j,i]=-1
756 for each j in ST[i]:
757 ST_tag[i,j]=-1
758 for each k in ST[j]:
759 if k in ST[i]:
760 w[j]--;
761 ST_tag[j,k]=-1
762 */
763 // w is updated for local rows only
764 {
765 #pragma omp parallel for private(i, jptr)
766 for (i=0; i<my_n; ++i) {
767 for (jptr=0; jptr<degree_ST[i]; ++jptr) {
768 const index_t j=ST[offset_ST[i]+jptr];
769 if (Status[j] == 0. && ST_flag[offset_ST[i]+jptr] > 0) {
770 w[i]--;
771 ST_flag[offset_ST[i]+jptr] = -1;
772 }
773 }
774 }
775 #pragma omp parallel for private(i, jptr)
776 for (i=my_n; i<n; ++i) {
777 for (jptr=0; jptr<degree_ST[i]; ++jptr) {
778 const index_t j = ST[offset_ST[i]+jptr];
779 if (Status[j] == 0.)
780 ST_flag[offset_ST[i]+jptr] = -1;
781 }
782 }
783
784 for (i=0; i< n; ++i) {
785 if (Status[i] == 0.) {
786 const index_t* start_p = &ST[offset_ST[i]];
787
788 for (jptr=0; jptr<degree_ST[i]; ++jptr) {
789 const index_t j=ST[offset_ST[i]+jptr];
790 ST_flag[offset_ST[i]+jptr]=-1;
791 for (kptr=0; kptr<degree_ST[j]; ++kptr) {
792 const index_t k=ST[offset_ST[j]+kptr];
793 // k in ST[i]?
794 if (bsearch(&k, start_p, degree_ST[i],
795 sizeof(index_t), util::comparIndex)) {
796 if (ST_flag[offset_ST[j]+kptr] > 0) {
797 if (j< my_n) {
798 w[j]--;
799 }
800 ST_flag[offset_ST[j]+kptr]=-1;
801 }
802 }
803 }
804 }
805 }
806 }
807 }
808
809 // adjust status
810 #pragma omp parallel for private(i)
811 for (i=0; i< my_n; ++i) {
812 if ( Status[i] == 0. ) {
813 Status[i] = -10; // this is now a C point
814 } else if (Status[i] == 1. && w[i]<1.) {
815 Status[i] = -100; // this is now a F point
816 }
817 }
818
819 i = numUndefined;
820 numUndefined = col_dist->numPositives(Status, 1);
821 if (numUndefined == i) {
822 Esys_setError(SYSTEM_ERROR, "Can NOT reduce numUndefined.");
823 return;
824 }
825
826 iter++;
827 //printf("coarsening loop %d: num of undefined rows = %d \n",iter, numUndefined);
828
829 } // end of while loop
830
831 // map to output
832 w_coupler->fillOverlap(n, Status);
833 #pragma omp parallel for private(i)
834 for (i=0; i< n; ++i) {
835 if (Status[i] > -50.) {
836 split_marker[i]=PASO_AMG_IN_C;
837 } else {
838 split_marker[i]=PASO_AMG_IN_F;
839 }
840 }
841 delete[] random;
842 delete[] w;
843 delete[] Status;
844 delete[] ST_flag;
845 }
846
847 } // namespace paso
848

Properties

Name Value
svn:mergeinfo /branches/amg_from_3530/paso/src/AMG.cpp:3531-3826 /branches/diaplayground/paso/src/AMG.cpp:4940-5147 /branches/lapack2681/paso/src/AMG.cpp:2682-2741 /branches/pasowrap/paso/src/AMG.cpp:3661-3674 /branches/py3_attempt2/paso/src/AMG.cpp:3871-3891 /branches/restext/paso/src/AMG.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/AMG.cpp:3669-3791 /branches/stage3.0/paso/src/AMG.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/AMG.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/AMG.cpp:3517-3974 /release/3.0/paso/src/AMG.cpp:2591-2601 /trunk/paso/src/AMG.cpp:4257-4344 /trunk/ripley/test/python/paso/src/AMG.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26