/[escript]/trunk/paso/src/Smoother.c
ViewVC logotype

Contents of /trunk/paso/src/Smoother.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3793 - (show annotations)
Wed Feb 1 07:39:43 2012 UTC (7 years, 6 months ago) by gross
File MIME type: text/plain
File size: 16790 byte(s)
new implementation of FCT solver with some modifications to the python interface
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: Gauss-Seidel */
18
19 /**************************************************************/
20
21 /* Author: artak@uq.edu.au */
22
23 /**************************************************************/
24
25 #include "Paso.h"
26 #include "Preconditioner.h"
27 #include "PasoUtil.h"
28 #include "BlockOps.h"
29
30 #include <stdio.h>
31
32
33 /**************************************************************/
34
35 /* free all memory used by Smoother */
36
37 void Paso_Preconditioner_Smoother_free(Paso_Preconditioner_Smoother * in) {
38 if (in!=NULL) {
39 Paso_Preconditioner_LocalSmoother_free(in->localSmoother);
40 MEMFREE(in);
41 }
42 }
43 void Paso_Preconditioner_LocalSmoother_free(Paso_Preconditioner_LocalSmoother * in) {
44 if (in!=NULL) {
45 MEMFREE(in->diag);
46 MEMFREE(in->pivot);
47 MEMFREE(in->buffer);
48 MEMFREE(in);
49 }
50 }
51 /**************************************************************/
52
53 /* constructs the symmetric Gauss-Seidel preconditioner */
54
55 Paso_Preconditioner_Smoother* Paso_Preconditioner_Smoother_alloc(Paso_SystemMatrix * A_p, const bool_t jacobi, const bool_t is_local, const bool_t verbose)
56 {
57
58 /* allocations: */
59 Paso_Preconditioner_Smoother* out=MEMALLOC(1,Paso_Preconditioner_Smoother);
60 if (! Esys_checkPtr(out)) {
61 out->localSmoother=Paso_Preconditioner_LocalSmoother_alloc(A_p->mainBlock,jacobi,verbose);
62 out->is_local=is_local;
63 }
64 if (Esys_MPIInfo_noError(A_p->mpi_info)) {
65 return out;
66 } else {
67 Paso_Preconditioner_Smoother_free(out);
68 return NULL;
69 }
70 }
71 Paso_Preconditioner_LocalSmoother* Paso_Preconditioner_LocalSmoother_alloc(Paso_SparseMatrix * A_p, const bool_t jacobi, bool_t verbose)
72 {
73
74 const dim_t n=A_p->numRows;
75 const dim_t n_block=A_p->row_block_size;
76 const dim_t block_size=A_p->block_size;
77
78 double time0=Esys_timer();
79 /* allocations: */
80 Paso_Preconditioner_LocalSmoother* out=MEMALLOC(1,Paso_Preconditioner_LocalSmoother);
81 if (! Esys_checkPtr(out)) {
82
83 out->diag=MEMALLOC( ((size_t) n) * ((size_t) block_size),double);
84 out->pivot=MEMALLOC( ((size_t) n) * ((size_t) n_block), index_t);
85 out->buffer=MEMALLOC( ((size_t) n) * ((size_t) n_block), double);
86 out->Jacobi=jacobi;
87
88 if ( ! ( Esys_checkPtr(out->diag) || Esys_checkPtr(out->pivot) ) ) {
89 Paso_SparseMatrix_invMain(A_p, out->diag, out->pivot );
90 }
91
92 }
93 time0=Esys_timer()-time0;
94
95 if (Esys_noError()) {
96 return out;
97 } else {
98 Paso_Preconditioner_LocalSmoother_free(out);
99 return NULL;
100 }
101 }
102
103 /*
104
105 performs a few sweeps of the form
106
107 S (x_{k} - x_{k-1}) = b - A x_{k-1}
108
109 where x_{0}=0 and S provides some approximation of A.
110
111 Under MPI S is built using A_p->mainBlock only.
112 If Smoother is local the defect b - A x_{k-1} is calculated using A_p->mainBlock only.
113 */
114
115 void Paso_Preconditioner_Smoother_solve(Paso_SystemMatrix* A_p, Paso_Preconditioner_Smoother * smoother, double * x, const double * b,
116 const dim_t sweeps, const bool_t x_is_initial)
117 {
118 const dim_t n= (A_p->mainBlock->numRows) * (A_p->mainBlock->row_block_size);
119
120 double *b_new = smoother->localSmoother->buffer;
121 dim_t nsweeps=sweeps;
122 if (smoother->is_local) {
123 Paso_Preconditioner_LocalSmoother_solve(A_p->mainBlock,smoother->localSmoother,x,b,sweeps,x_is_initial);
124 } else {
125 if (! x_is_initial) {
126 Paso_Copy(n, x, b);
127 Paso_Preconditioner_LocalSmoother_Sweep(A_p->mainBlock,smoother->localSmoother,x);
128 nsweeps--;
129 }
130 while (nsweeps > 0 ) {
131 Paso_Copy(n, b_new, b);
132 Paso_SystemMatrix_MatrixVector((-1.), A_p, x, 1., b_new); /* b_new = b - A*x */
133 Paso_Preconditioner_LocalSmoother_Sweep(A_p->mainBlock,smoother->localSmoother,b_new);
134 Paso_AXPY(n, x, 1., b_new);
135 nsweeps--;
136 }
137
138 }
139 }
140
141 err_t Paso_Preconditioner_Smoother_solve_byTolerance(Paso_SystemMatrix* A_p, Paso_Preconditioner_Smoother * smoother,
142 double * x, const double * b,
143 const double rtol, dim_t *sweeps, const bool_t x_is_initial)
144 {
145 const dim_t n= (A_p->mainBlock->numRows) * (A_p->mainBlock->row_block_size);
146 double *b_new = smoother->localSmoother->buffer;
147 const dim_t max_sweeps=*sweeps;
148 dim_t s=0;
149 double norm_b, norm_r;
150 err_t errorCode = PRECONDITIONER_NO_ERROR;
151
152 norm_b=Paso_lsup(n,b,A_p->mpi_info);
153 if (! x_is_initial) {
154 Paso_Copy(n, x, b);
155 Paso_Preconditioner_LocalSmoother_Sweep(A_p->mainBlock,smoother->localSmoother,x);
156 s++;
157 }
158 while (1) {
159 Paso_Copy(n, b_new, b);
160 Paso_SystemMatrix_MatrixVector((-1.), A_p, x, 1., b_new); /* b_new = b - A*x */
161 norm_r=Paso_lsup(n,b,A_p->mpi_info);
162 if (norm_r <= rtol * norm_b ) break;
163 Paso_Preconditioner_LocalSmoother_Sweep(A_p->mainBlock,smoother->localSmoother,b_new);
164 Paso_AXPY(n, x, 1., b_new);
165 s++;
166 if (s > max_sweeps) {
167 errorCode = PRECONDITIONER_MAXITER_REACHED;
168 break;
169 }
170 }
171 *sweeps=s;
172 return errorCode;
173 }
174
175
176
177 void Paso_Preconditioner_LocalSmoother_solve(Paso_SparseMatrix* A_p, Paso_Preconditioner_LocalSmoother * smoother, double * x, const double * b,
178 const dim_t sweeps, const bool_t x_is_initial)
179 {
180 const dim_t n= (A_p->numRows) * (A_p->row_block_size);
181 double *b_new = smoother->buffer;
182 dim_t nsweeps=sweeps;
183
184 if (! x_is_initial) {
185 Paso_Copy(n, x, b);
186 Paso_Preconditioner_LocalSmoother_Sweep(A_p,smoother,x);
187 nsweeps--;
188 }
189
190 while (nsweeps > 0 ) {
191 Paso_Copy(n, b_new, b);
192
193 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0((-1.), A_p, x, 1., b_new); /* b_new = b - A*x */
194 Paso_Preconditioner_LocalSmoother_Sweep(A_p,smoother,b_new);
195 Paso_AXPY(n, x, 1., b_new);
196 nsweeps--;
197 }
198 }
199
200 void Paso_Preconditioner_LocalSmoother_Sweep(Paso_SparseMatrix* A, Paso_Preconditioner_LocalSmoother * smoother, double * x)
201 {
202 const dim_t nt=omp_get_max_threads();
203 if (smoother->Jacobi) {
204 Paso_BlockOps_solveAll(A->row_block_size,A->numRows,smoother->diag,smoother->pivot,x);
205 } else {
206 if (nt < 2) {
207 Paso_Preconditioner_LocalSmoother_Sweep_sequential(A,smoother,x);
208 } else {
209 Paso_Preconditioner_LocalSmoother_Sweep_colored(A,smoother,x);
210 }
211 }
212 }
213
214 /* inplace Gauss-Seidel sweep in sequential mode: */
215
216 void Paso_Preconditioner_LocalSmoother_Sweep_sequential(Paso_SparseMatrix* A_p, Paso_Preconditioner_LocalSmoother * smoother, double * x)
217 {
218 const dim_t n=A_p->numRows;
219 const dim_t n_block=A_p->row_block_size;
220 double *diag = smoother->diag;
221 index_t* pivot = smoother->pivot;
222 const dim_t block_len=A_p->block_size;
223
224
225 register dim_t i,k;
226 register index_t iptr_ik, mm;
227 register double rtmp;
228 int failed = 0;
229
230 const index_t* ptr_main = Paso_SparseMatrix_borrowMainDiagonalPointer(A_p);
231
232 (void)pivot; /* silence warning from var being unused by macros */
233 (void)block_len;
234 /* forward substitution */
235
236 if (n_block==1) {
237 x[0]*=diag[0];
238 for (i = 1; i < n; ++i) {
239 mm=ptr_main[i];
240 /* x_i=x_i-a_ik*x_k (with k<i) */
241 rtmp=x[i];
242 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
243 k=A_p->pattern->index[iptr_ik];
244 rtmp-=A_p->val[iptr_ik]*x[k];
245 }
246 x[i]=rtmp*diag[i];
247 }
248 } else if (n_block==2) {
249 Paso_BlockOps_MViP_2(&diag[0], &x[0]);
250 for (i = 1; i < n; ++i) {
251 mm=ptr_main[i];
252 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
253 k=A_p->pattern->index[iptr_ik];
254 Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);
255 }
256 Paso_BlockOps_MViP_2(&diag[4*i], &x[2*i]);
257 }
258 } else if (n_block==3) {
259 Paso_BlockOps_MViP_3(&diag[0], &x[0]);
260 for (i = 1; i < n; ++i) {
261 mm=ptr_main[i];
262 /* x_i=x_i-a_ik*x_k */
263 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
264 k=A_p->pattern->index[iptr_ik];
265 Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);
266 }
267 Paso_BlockOps_MViP_3(&diag[9*i], &x[3*i]);
268 }
269 } else {
270 Paso_BlockOps_solve_N(n_block, &x[0], &diag[0], &pivot[0], &failed);
271 for (i = 1; i < n; ++i) {
272 mm=ptr_main[i];
273 /* x_i=x_i-a_ik*x_k */
274 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
275 k=A_p->pattern->index[iptr_ik];
276 Paso_BlockOps_SMV_N(n_block, &x[n_block*i], &A_p->val[block_len*iptr_ik], &x[n_block*k]);
277 }
278 Paso_BlockOps_solve_N(n_block, &x[n_block*i], &diag[block_len*i], &pivot[n_block*i], &failed);
279 }
280 }
281 /* backward substitution */
282
283 if (n_block==1) {
284 for (i = n-2; i > -1; --i) {
285 mm=ptr_main[i];
286 rtmp=x[i]*A_p->val[mm];
287 for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {
288 k=A_p->pattern->index[iptr_ik];
289 rtmp-=A_p->val[iptr_ik]*x[k];
290 }
291 x[i]=diag[i]*rtmp;
292 }
293
294 } else if (n_block==2) {
295 for (i = n-2; i > -1; --i) {
296 mm=ptr_main[i];
297 Paso_BlockOps_MViP_2(&A_p->val[4*mm], &x[2*i]);
298 for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {
299 k=A_p->pattern->index[iptr_ik];
300 Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);
301 }
302 Paso_BlockOps_MViP_2(&diag[i*4], &x[2*i]);
303
304 }
305 } else if (n_block==3) {
306 for (i = n-2; i > -1; --i) {
307 mm=ptr_main[i];
308 Paso_BlockOps_MViP_3(&A_p->val[9*mm], &x[3*i]);
309 for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {
310 k=A_p->pattern->index[iptr_ik];
311 Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);
312 }
313 Paso_BlockOps_MViP_3(&diag[i*9], &x[3*i]);
314 }
315
316 } else {
317 double *y=TMPMEMALLOC(n_block, double);
318 for (i = n-2; i > -1; --i) {
319 mm=ptr_main[i];
320 Paso_BlockOps_MV_N(n_block, &y[0], &A_p->val[block_len*mm], &x[n_block*i]);
321 for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {
322 k=A_p->pattern->index[iptr_ik];
323 Paso_BlockOps_SMV_N(n_block, &y[0], &A_p->val[block_len*iptr_ik], &x[n_block*k]);
324 }
325 Paso_BlockOps_Cpy_N(n_block ,&x[n_block*i], &y[0]);
326 Paso_BlockOps_solve_N(n_block, &x[n_block*i], &diag[i*block_len], &pivot[i*n_block], &failed);
327 }
328 TMPMEMFREE(y);
329 }
330
331 if (failed > 0) {
332 Esys_setError(ZERO_DIVISION_ERROR, "Paso_Preconditioner_LocalSmoother_Sweep_sequential: non-regular main diagonal block.");
333 }
334
335 return;
336 }
337
338 void Paso_Preconditioner_LocalSmoother_Sweep_colored(Paso_SparseMatrix* A_p, Paso_Preconditioner_LocalSmoother * smoother, double * x)
339 {
340 const dim_t n=A_p->numRows;
341 const dim_t n_block=A_p->row_block_size;
342 double *diag = smoother->diag;
343 index_t* pivot = smoother->pivot;
344 const dim_t block_len=A_p->block_size;
345 double *y;
346
347 register dim_t i,k;
348 register index_t color,iptr_ik, mm;
349 register double rtmp;
350 int failed = 0;
351
352 const index_t* coloring = Paso_Pattern_borrowColoringPointer(A_p->pattern);
353 const dim_t num_colors = Paso_Pattern_getNumColors(A_p->pattern);
354 const index_t* ptr_main = Paso_SparseMatrix_borrowMainDiagonalPointer(A_p);
355
356 (void)pivot; /* These vars are dropped by some macros*/
357 (void)block_len;
358
359 #pragma omp parallel private(mm, i,iptr_ik,k,rtmp, color, y)
360 {
361 if (n_block>3) {
362 y=TMPMEMALLOC(n_block, double);
363 } else {
364 y=NULL;
365 }
366 /* forward substitution */
367
368 /* color = 0 */
369 if (n_block==1) {
370 #pragma omp for schedule(static)
371 for (i = 0; i < n; ++i) {
372 if (coloring[i]== 0 ) x[i]*=diag[i];
373 }
374 } else if (n_block==2) {
375 #pragma omp for schedule(static)
376 for (i = 0; i < n; ++i) {
377 if (coloring[i]== 0 ) Paso_BlockOps_MViP_2(&diag[i*4], &x[2*i]);
378 }
379 } else if (n_block==3) {
380 #pragma omp for schedule(static)
381 for (i = 0; i < n; ++i) {
382 if (coloring[i]==0) Paso_BlockOps_MViP_3(&diag[i*9], &x[3*i]);
383 }
384 } else {
385 #pragma omp for schedule(static)
386 for (i = 0; i < n; ++i) {
387 if (coloring[i]==0) Paso_BlockOps_solve_N(n_block, &x[n_block*i], &diag[block_len*i], &pivot[n_block*i], &failed);
388 }
389 }
390
391
392 for (color=1;color<num_colors;++color) {
393
394 if (n_block==1) {
395 #pragma omp for schedule(static)
396 for (i = 0; i < n; ++i) {
397 if (coloring[i]==color) {
398 /* x_i=x_i-a_ik*x_k */
399 rtmp=x[i];
400 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
401 k=A_p->pattern->index[iptr_ik];
402 if (coloring[k]<color) rtmp-=A_p->val[iptr_ik]*x[k];
403 }
404 x[i]=diag[i]*rtmp;
405 }
406 }
407 } else if (n_block==2) {
408 #pragma omp for schedule(static)
409 for (i = 0; i < n; ++i) {
410 if (coloring[i]==color) {
411 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
412 k=A_p->pattern->index[iptr_ik];
413 if (coloring[k]<color) Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);
414 }
415 Paso_BlockOps_MViP_2(&diag[4*i], &x[2*i]);
416 }
417 }
418 } else if (n_block==3) {
419 #pragma omp for schedule(static)
420 for (i = 0; i < n; ++i) {
421 if (coloring[i]==color) {
422 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
423 k=A_p->pattern->index[iptr_ik];
424 if (coloring[k]<color) Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);
425 }
426 Paso_BlockOps_MViP_3(&diag[9*i], &x[3*i]);
427 }
428 }
429 } else {
430 #pragma omp for schedule(static)
431 for (i = 0; i < n; ++i) {
432 if (coloring[i] == color) {
433 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
434 k=A_p->pattern->index[iptr_ik];
435 if (coloring[k]<color) Paso_BlockOps_SMV_N(n_block, &x[n_block*i], &A_p->val[block_len*iptr_ik], &x[n_block*k]);
436 }
437 Paso_BlockOps_solve_N(n_block, &x[n_block*i], &diag[block_len*i], &pivot[n_block*i], &failed);
438 }
439 }
440 }
441 } /* end of coloring loop */
442
443 /* backward substitution */
444 for (color=(num_colors)-2 ;color>-1;--color) { /* Note: color=(num_colors)-1 is not required */
445 if (n_block==1) {
446 #pragma omp for schedule(static)
447 for (i = 0; i < n; ++i) {
448 if (coloring[i]==color) {
449 mm=ptr_main[i];
450 rtmp=A_p->val[mm]*x[i];
451 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
452 k=A_p->pattern->index[iptr_ik];
453 if (coloring[k]>color) rtmp-=A_p->val[iptr_ik]*x[k];
454 }
455 x[i]= rtmp*diag[i];
456 }
457 }
458 } else if (n_block==2) {
459 #pragma omp for schedule(static)
460 for (i = 0; i < n; ++i) {
461 if (coloring[i]==color) {
462 mm=ptr_main[i];
463 Paso_BlockOps_MViP_2(&A_p->val[4*mm], &x[2*i]);
464 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
465 k=A_p->pattern->index[iptr_ik];
466 if (coloring[k]>color) Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);
467 }
468 Paso_BlockOps_MViP_2(&diag[4*i], &x[2*i]);
469 }
470 }
471 } else if (n_block==3) {
472 #pragma omp for schedule(static)
473 for (i = 0; i < n; ++i) {
474 if (coloring[i]==color) {
475 mm=ptr_main[i];
476 Paso_BlockOps_MViP_3(&A_p->val[9*mm], &x[3*i]);
477 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
478 k=A_p->pattern->index[iptr_ik];
479 if (coloring[k]>color) Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);
480 }
481 Paso_BlockOps_MViP_3(&diag[9*i], &x[3*i]);
482 }
483 }
484 } else {
485 #pragma omp for schedule(static)
486 for (i = 0; i < n; ++i) {
487 if (coloring[i]==color) {
488 mm=ptr_main[i];
489 Paso_BlockOps_MV_N(n_block, &y[0], &A_p->val[block_len*mm], &x[n_block*i]);
490 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
491 k=A_p->pattern->index[iptr_ik];
492 if (coloring[k]>color) Paso_BlockOps_SMV_N(n_block, &y[0], &A_p->val[block_len*iptr_ik], &x[n_block*k]);
493 }
494 Paso_BlockOps_Cpy_N(n_block ,&x[n_block*i], &y[0]);
495 Paso_BlockOps_solve_N(n_block, &x[n_block*i], &diag[i*block_len], &pivot[i*n_block], &failed);
496 }
497 }
498 }
499 }
500 TMPMEMFREE(y);
501 }
502 if (failed > 0) {
503 Esys_setError(ZERO_DIVISION_ERROR, "Paso_Preconditioner_LocalSmoother_Sweep_colored: non-regular main diagonal block.");
504 }
505 return;
506 }

  ViewVC Help
Powered by ViewVC 1.1.26