/[escript]/trunk/paso/src/Smoother.cpp
ViewVC logotype

Contents of /trunk/paso/src/Smoother.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4873 - (show annotations)
Wed Apr 16 06:38:51 2014 UTC (5 years, 5 months ago) by caltinay
File size: 19123 byte(s)
whitespace only changes.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17
18 /****************************************************************************/
19
20 /* Paso: Gauss-Seidel */
21
22 /****************************************************************************/
23
24 /* Author: artak@uq.edu.au */
25
26 /****************************************************************************/
27
28 #include "Preconditioner.h"
29 #include "PasoUtil.h"
30 #include "BlockOps.h"
31
32 namespace paso {
33
34 void Preconditioner_Smoother_free(Preconditioner_Smoother* in)
35 {
36 if (in!=NULL) {
37 Preconditioner_LocalSmoother_free(in->localSmoother);
38 delete in;
39 }
40 }
41
42 void Preconditioner_LocalSmoother_free(Preconditioner_LocalSmoother* in)
43 {
44 if (in!=NULL) {
45 delete[] in->diag;
46 delete[] in->pivot;
47 delete[] in->buffer;
48 delete in;
49 }
50 }
51
52
53 /// constructs the symmetric Gauss-Seidel preconditioner
54 Preconditioner_Smoother* Preconditioner_Smoother_alloc(SystemMatrix_ptr A,
55 bool jacobi, bool is_local, bool verbose)
56 {
57 Preconditioner_Smoother* out=new Preconditioner_Smoother;
58 out->localSmoother=Preconditioner_LocalSmoother_alloc(A->mainBlock,
59 jacobi, verbose);
60 out->is_local=is_local;
61 if (Esys_MPIInfo_noError(A->mpi_info)) {
62 return out;
63 } else {
64 Preconditioner_Smoother_free(out);
65 return NULL;
66 }
67 }
68
69 Preconditioner_LocalSmoother* Preconditioner_LocalSmoother_alloc(
70 SparseMatrix_ptr A, bool jacobi, bool verbose)
71 {
72 const dim_t n=A->numRows;
73 const dim_t n_block=A->row_block_size;
74 const dim_t block_size=A->block_size;
75 double time0=Esys_timer();
76 Preconditioner_LocalSmoother* out=new Preconditioner_LocalSmoother;
77
78 out->diag=new double[((size_t) n) * ((size_t) block_size)];
79 out->pivot=new index_t[ ((size_t) n) * ((size_t) n_block)];
80 out->buffer=new double[((size_t) n) * ((size_t) n_block)];
81 out->Jacobi=jacobi;
82 A->invMain(out->diag, out->pivot);
83 time0=Esys_timer()-time0;
84
85 if (Esys_noError()) {
86 return out;
87 } else {
88 Preconditioner_LocalSmoother_free(out);
89 return NULL;
90 }
91 }
92
93 /*
94 performs a few sweeps of the form
95
96 S (x_{k} - x_{k-1}) = b - A x_{k-1}
97
98 where x_{0}=0 and S provides some approximation of A.
99
100 Under MPI S is built using A->mainBlock only.
101 If Smoother is local the defect b - A x_{k-1} is calculated using A->mainBlock only.
102
103 The MPI-versioned smoother works in the following way:
104 (1) initialize x
105 (2) calculate residual r_n = b - A * x_n
106 (3) store residual r_n in b_new and pass to Preconditioner_LocalSmoother_Sweep() as input
107 (4) /delta x_{n+1} is returned (stored in b_new) as the output of Preconditioner_LocalSmoother_Sweep()
108 (5) recover x_{n+1} by /delta x_{n+1} + x_n
109 (6) repeat steps 2-5 for until sweeps number reduce to 0
110 */
111 void Preconditioner_Smoother_solve(SystemMatrix_ptr A,
112 Preconditioner_Smoother* smoother, double* x, const double* b,
113 dim_t sweeps, bool x_is_initial)
114 {
115 const dim_t n = A->mainBlock->numRows * A->mainBlock->row_block_size;
116 double *b_new = smoother->localSmoother->buffer;
117 dim_t nsweeps=sweeps;
118 if (smoother->is_local) {
119 Preconditioner_LocalSmoother_solve(A->mainBlock,smoother->localSmoother,x,b,sweeps,x_is_initial);
120 } else {
121 if (! x_is_initial) {
122 util::copy(n, x, b);
123
124 Preconditioner_LocalSmoother_Sweep(A->mainBlock,smoother->localSmoother,x);
125 nsweeps--;
126 }
127 while (nsweeps > 0 ) {
128 util::copy(n, b_new, b);
129 SystemMatrix_MatrixVector_CSR_OFFSET0((-1.), A, x, 1., b_new); /* b_new = b - A*x */
130 Preconditioner_LocalSmoother_Sweep(A->mainBlock,smoother->localSmoother,b_new);
131 util::AXPY(n, x, 1., b_new);
132 nsweeps--;
133 }
134 }
135 }
136
137 err_t Preconditioner_Smoother_solve_byTolerance(SystemMatrix_ptr A,
138 Preconditioner_Smoother* smoother, double* x, const double* b,
139 double atol, dim_t* sweeps, bool x_is_initial)
140 {
141 const dim_t n = A->mainBlock->numRows * A->mainBlock->row_block_size;
142 double *b_new = smoother->localSmoother->buffer;
143 const dim_t max_sweeps=*sweeps;
144 dim_t s=0;
145 double norm_dx = atol * 2.;
146 err_t errorCode = PRECONDITIONER_NO_ERROR;
147
148 if (! x_is_initial) {
149 util::copy(n, x, b);
150 Preconditioner_LocalSmoother_Sweep(A->mainBlock,smoother->localSmoother,x);
151 norm_dx=util::lsup(n,x,A->mpi_info);
152 s++;
153 }
154 while (norm_dx > atol) {
155 util::copy(n, b_new, b);
156 SystemMatrix_MatrixVector((-1.), A, x, 1., b_new); /* b_new = b - A*x */
157 Preconditioner_LocalSmoother_Sweep(A->mainBlock,smoother->localSmoother,b_new);
158 norm_dx=util::lsup(n,b_new,A->mpi_info);
159 util::AXPY(n, x, 1., b_new);
160 if (s >= max_sweeps) {
161 errorCode = PRECONDITIONER_MAXITER_REACHED;
162 break;
163 }
164 s++;
165 }
166 *sweeps=s;
167 return errorCode;
168 }
169
170 void Preconditioner_LocalSmoother_solve(SparseMatrix_ptr A,
171 Preconditioner_LocalSmoother* smoother,
172 double* x, const double* b,
173 dim_t sweeps, bool x_is_initial)
174 {
175 const dim_t n = A->numRows * A->row_block_size;
176 double *b_new = smoother->buffer;
177 dim_t nsweeps=sweeps;
178
179 if (! x_is_initial) {
180 util::copy(n, x, b);
181 Preconditioner_LocalSmoother_Sweep(A, smoother, x);
182 nsweeps--;
183 }
184
185 while (nsweeps > 0 ) {
186 util::copy(n, b_new, b);
187
188 SparseMatrix_MatrixVector_CSR_OFFSET0((-1.), A, x, 1., b_new); /* b_new = b - A*x */
189 Preconditioner_LocalSmoother_Sweep(A, smoother, b_new);
190 util::AXPY(n, x, 1., b_new);
191 nsweeps--;
192 }
193 }
194
195 /*
196 Gauss-Seidel sweep to calculate /delta x_{n+1} from matrix A and
197 residual r_n. It has two steps: forward substitution and backward
198 substitution.
199 Forward substitution: (D+L) * /delta x_{n+1/2} = r_n
200 Backward substitution: (D+U) * /delta x_{n+1} = r_n - L * /delta x_{n+1/2}
201 = D * /delta x_{n+1/2}
202 where A = D + L + U
203 /delta x_{n+1/2} = x_{n+1/2} - x_n
204 /delta x_{n+1} = x_{n+1} - x_n
205
206 Input: Matrix A
207 residual r_n (in x)
208 Output: /delta x_{n+1} (in x)
209 */
210 void Preconditioner_LocalSmoother_Sweep(SparseMatrix_ptr A,
211 Preconditioner_LocalSmoother* smoother, double* x)
212 {
213 const dim_t nt=omp_get_max_threads();
214 if (smoother->Jacobi) {
215 BlockOps_solveAll(A->row_block_size,A->numRows,smoother->diag,smoother->pivot,x);
216 } else {
217 if (nt < 2) {
218 Preconditioner_LocalSmoother_Sweep_sequential(A,smoother,x);
219 } else {
220 Preconditioner_LocalSmoother_Sweep_colored(A,smoother,x);
221 }
222 }
223 }
224
225 /// inplace Gauss-Seidel sweep in sequential mode
226 void Preconditioner_LocalSmoother_Sweep_sequential(SparseMatrix_ptr A,
227 Preconditioner_LocalSmoother* smoother, double* x)
228 {
229 const dim_t n=A->numRows;
230 const dim_t n_block=A->row_block_size;
231 double *diag = smoother->diag;
232 index_t* pivot = smoother->pivot;
233 const dim_t block_len=A->block_size;
234 register dim_t i,k;
235 register index_t iptr_ik, mm;
236 register double rtmp;
237 int failed = 0;
238 const index_t* ptr_main = A->borrowMainDiagonalPointer();
239
240 // silence warning from var being unused by macros
241 (void)pivot;
242 (void)block_len;
243
244 // forward substitution
245 if (n_block==1) {
246 x[0]*=diag[0];
247 for (i = 1; i < n; ++i) {
248 mm=ptr_main[i];
249 /* x_i=x_i-a_ik*x_k (with k<i) */
250 rtmp=x[i];
251 for (iptr_ik=A->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
252 k=A->pattern->index[iptr_ik];
253 rtmp-=A->val[iptr_ik]*x[k];
254 }
255 x[i]=rtmp*diag[i];
256 }
257 } else if (n_block==2) {
258 BlockOps_MViP_2(&diag[0], &x[0]);
259 for (i = 1; i < n; ++i) {
260 mm=ptr_main[i];
261 for (iptr_ik=A->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
262 k=A->pattern->index[iptr_ik];
263 BlockOps_SMV_2(&x[2*i], &A->val[4*iptr_ik], &x[2*k]);
264 }
265 BlockOps_MViP_2(&diag[4*i], &x[2*i]);
266 }
267 } else if (n_block==3) {
268 BlockOps_MViP_3(&diag[0], &x[0]);
269 for (i = 1; i < n; ++i) {
270 mm=ptr_main[i];
271 for (iptr_ik=A->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
272 k=A->pattern->index[iptr_ik];
273 BlockOps_SMV_3(&x[3*i], &A->val[9*iptr_ik], &x[3*k]);
274 }
275 BlockOps_MViP_3(&diag[9*i], &x[3*i]);
276 }
277 } else {
278 BlockOps_solve_N(n_block, &x[0], &diag[0], &pivot[0], &failed);
279 for (i = 1; i < n; ++i) {
280 mm=ptr_main[i];
281 for (iptr_ik=A->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
282 k=A->pattern->index[iptr_ik];
283 BlockOps_SMV_N(n_block, &x[n_block*i], &A->val[block_len*iptr_ik], &x[n_block*k]);
284 }
285 BlockOps_solve_N(n_block, &x[n_block*i], &diag[block_len*i], &pivot[n_block*i], &failed);
286 }
287 }
288
289 // backward sweeps
290 if (n_block==1) {
291 for (i = n-2; i > -1; --i) {
292 mm=ptr_main[i];
293 rtmp=x[i]*A->val[mm];
294 for (iptr_ik=mm+1; iptr_ik < A->pattern->ptr[i+1]; ++iptr_ik) {
295 k=A->pattern->index[iptr_ik];
296 rtmp-=A->val[iptr_ik]*x[k];
297 }
298 x[i]=diag[i]*rtmp;
299 }
300 } else if (n_block==2) {
301 for (i = n-2; i > -1; --i) {
302 mm=ptr_main[i];
303 BlockOps_MViP_2(&A->val[4*mm], &x[2*i]);
304 for (iptr_ik=mm+1; iptr_ik < A->pattern->ptr[i+1]; ++iptr_ik) {
305 k=A->pattern->index[iptr_ik];
306 BlockOps_SMV_2(&x[2*i], &A->val[4*iptr_ik], &x[2*k]);
307 }
308 BlockOps_MViP_2(&diag[i*4], &x[2*i]);
309 }
310 } else if (n_block==3) {
311 for (i = n-2; i > -1; --i) {
312 mm=ptr_main[i];
313 BlockOps_MViP_3(&A->val[9*mm], &x[3*i]);
314 for (iptr_ik=mm+1; iptr_ik < A->pattern->ptr[i+1]; ++iptr_ik) {
315 k=A->pattern->index[iptr_ik];
316 BlockOps_SMV_3(&x[3*i], &A->val[9*iptr_ik], &x[3*k]);
317 }
318 BlockOps_MViP_3(&diag[i*9], &x[3*i]);
319 }
320 } else {
321 double *y=new double[n_block];
322 for (i = n-2; i > -1; --i) {
323 mm=ptr_main[i];
324 BlockOps_MV_N(n_block, &y[0], &A->val[block_len*mm], &x[n_block*i]);
325 for (iptr_ik=mm+1; iptr_ik < A->pattern->ptr[i+1]; ++iptr_ik) {
326 k=A->pattern->index[iptr_ik];
327 BlockOps_SMV_N(n_block, &y[0], &A->val[block_len*iptr_ik], &x[n_block*k]);
328 }
329 BlockOps_Cpy_N(n_block ,&x[n_block*i], &y[0]);
330 BlockOps_solve_N(n_block, &x[n_block*i], &diag[i*block_len], &pivot[i*n_block], &failed);
331 }
332 delete[] y;
333 }
334
335 if (failed > 0) {
336 Esys_setError(ZERO_DIVISION_ERROR, "Preconditioner_LocalSmoother_Sweep_sequential: non-regular main diagonal block.");
337 }
338 }
339
340 void Preconditioner_LocalSmoother_Sweep_colored(SparseMatrix_ptr A,
341 Preconditioner_LocalSmoother* smoother, double* x)
342 {
343 const dim_t n=A->numRows;
344 const dim_t n_block=A->row_block_size;
345 double *diag = smoother->diag;
346 index_t* pivot = smoother->pivot;
347 const dim_t block_len=A->block_size;
348 double *y;
349
350 register dim_t i,k;
351 register index_t color,iptr_ik, mm;
352 register double rtmp;
353 int failed = 0;
354
355 const index_t* coloring = A->pattern->borrowColoringPointer();
356 const dim_t num_colors = A->pattern->getNumColors();
357 const index_t* ptr_main = A->borrowMainDiagonalPointer();
358
359 (void)pivot; /* These vars are dropped by some macros*/
360 (void)block_len;
361
362 #pragma omp parallel private(mm, i,iptr_ik,k,rtmp, color, y)
363 {
364 if (n_block>3) {
365 y=new double[n_block];
366 } else {
367 y=NULL;
368 }
369 /* forward substitution */
370
371 /* color = 0 */
372 if (n_block==1) {
373 #pragma omp for schedule(static)
374 for (i = 0; i < n; ++i) {
375 if (coloring[i]== 0 ) x[i]*=diag[i];
376 }
377 } else if (n_block==2) {
378 #pragma omp for schedule(static)
379 for (i = 0; i < n; ++i) {
380 if (coloring[i]== 0 ) BlockOps_MViP_2(&diag[i*4], &x[2*i]);
381 }
382 } else if (n_block==3) {
383 #pragma omp for schedule(static)
384 for (i = 0; i < n; ++i) {
385 if (coloring[i]==0) BlockOps_MViP_3(&diag[i*9], &x[3*i]);
386 }
387 } else {
388 #pragma omp for schedule(static)
389 for (i = 0; i < n; ++i) {
390 if (coloring[i]==0) BlockOps_solve_N(n_block, &x[n_block*i], &diag[block_len*i], &pivot[n_block*i], &failed);
391 }
392 }
393
394 for (color=1;color<num_colors;++color) {
395 if (n_block==1) {
396 #pragma omp for schedule(static)
397 for (i = 0; i < n; ++i) {
398 if (coloring[i]==color) {
399 /* x_i=x_i-a_ik*x_k */
400 rtmp=x[i];
401 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
402 k=A->pattern->index[iptr_ik];
403 if (coloring[k]<color) rtmp-=A->val[iptr_ik]*x[k];
404 }
405 x[i]=diag[i]*rtmp;
406 }
407 }
408 } else if (n_block==2) {
409 #pragma omp for schedule(static)
410 for (i = 0; i < n; ++i) {
411 if (coloring[i]==color) {
412 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
413 k=A->pattern->index[iptr_ik];
414 if (coloring[k]<color) BlockOps_SMV_2(&x[2*i], &A->val[4*iptr_ik], &x[2*k]);
415 }
416 BlockOps_MViP_2(&diag[4*i], &x[2*i]);
417 }
418 }
419 } else if (n_block==3) {
420 #pragma omp for schedule(static)
421 for (i = 0; i < n; ++i) {
422 if (coloring[i]==color) {
423 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
424 k=A->pattern->index[iptr_ik];
425 if (coloring[k]<color) BlockOps_SMV_3(&x[3*i], &A->val[9*iptr_ik], &x[3*k]);
426 }
427 BlockOps_MViP_3(&diag[9*i], &x[3*i]);
428 }
429 }
430 } else {
431 #pragma omp for schedule(static)
432 for (i = 0; i < n; ++i) {
433 if (coloring[i] == color) {
434 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
435 k=A->pattern->index[iptr_ik];
436 if (coloring[k]<color) BlockOps_SMV_N(n_block, &x[n_block*i], &A->val[block_len*iptr_ik], &x[n_block*k]);
437 }
438 BlockOps_solve_N(n_block, &x[n_block*i], &diag[block_len*i], &pivot[n_block*i], &failed);
439 }
440 }
441 }
442 } // end of coloring loop
443
444 // backward substitution
445
446 // Note: color=(num_colors)-1 is not required
447 for (color=(num_colors)-2 ;color>-1;--color) {
448 if (n_block==1) {
449 #pragma omp for schedule(static)
450 for (i = 0; i < n; ++i) {
451 if (coloring[i]==color) {
452 mm=ptr_main[i];
453 rtmp=A->val[mm]*x[i];
454 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
455 k=A->pattern->index[iptr_ik];
456 if (coloring[k]>color) rtmp-=A->val[iptr_ik]*x[k];
457 }
458 x[i]= rtmp*diag[i];
459 }
460 }
461 } else if (n_block==2) {
462 #pragma omp for schedule(static)
463 for (i = 0; i < n; ++i) {
464 if (coloring[i]==color) {
465 mm=ptr_main[i];
466 BlockOps_MViP_2(&A->val[4*mm], &x[2*i]);
467 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
468 k=A->pattern->index[iptr_ik];
469 if (coloring[k]>color) BlockOps_SMV_2(&x[2*i], &A->val[4*iptr_ik], &x[2*k]);
470 }
471 BlockOps_MViP_2(&diag[4*i], &x[2*i]);
472 }
473 }
474 } else if (n_block==3) {
475 #pragma omp for schedule(static)
476 for (i = 0; i < n; ++i) {
477 if (coloring[i]==color) {
478 mm=ptr_main[i];
479 BlockOps_MViP_3(&A->val[9*mm], &x[3*i]);
480 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
481 k=A->pattern->index[iptr_ik];
482 if (coloring[k]>color) BlockOps_SMV_3(&x[3*i], &A->val[9*iptr_ik], &x[3*k]);
483 }
484 BlockOps_MViP_3(&diag[9*i], &x[3*i]);
485 }
486 }
487 } else {
488 #pragma omp for schedule(static)
489 for (i = 0; i < n; ++i) {
490 if (coloring[i]==color) {
491 mm=ptr_main[i];
492 BlockOps_MV_N(n_block, &y[0], &A->val[block_len*mm], &x[n_block*i]);
493 for (iptr_ik=A->pattern->ptr[i];iptr_ik<A->pattern->ptr[i+1]; ++iptr_ik) {
494 k=A->pattern->index[iptr_ik];
495 if (coloring[k]>color) BlockOps_SMV_N(n_block, &y[0], &A->val[block_len*iptr_ik], &x[n_block*k]);
496 }
497 BlockOps_Cpy_N(n_block ,&x[n_block*i], &y[0]);
498 BlockOps_solve_N(n_block, &x[n_block*i], &diag[i*block_len], &pivot[i*n_block], &failed);
499 }
500 }
501 }
502 }
503 delete[] y;
504 }
505 if (failed > 0) {
506 Esys_setError(ZERO_DIVISION_ERROR, "Preconditioner_LocalSmoother_Sweep_colored: non-regular main diagonal block.");
507 }
508 }
509
510 } // namespace paso
511

Properties

Name Value
svn:mergeinfo /branches/amg_from_3530/paso/src/Smoother.cpp:3531-3826 /branches/lapack2681/paso/src/Smoother.cpp:2682-2741 /branches/pasowrap/paso/src/Smoother.cpp:3661-3674 /branches/py3_attempt2/paso/src/Smoother.cpp:3871-3891 /branches/restext/paso/src/Smoother.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/Smoother.cpp:3669-3791 /branches/stage3.0/paso/src/Smoother.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/Smoother.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/Smoother.cpp:3517-3974 /release/3.0/paso/src/Smoother.cpp:2591-2601 /trunk/paso/src/Smoother.cpp:4257-4344 /trunk/ripley/test/python/paso/src/Smoother.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26