/[escript]/trunk/paso/src/Smoother.c
ViewVC logotype

Contents of /trunk/paso/src/Smoother.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3559 - (show annotations)
Fri Aug 26 07:47:27 2011 UTC (8 years, 1 month ago) by jfenwick
File MIME type: text/plain
File size: 15608 byte(s)
Silence some warnings unused var warnings under gcc.


1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: Gauss-Seidel */
18
19 /**************************************************************/
20
21 /* Author: artak@uq.edu.au */
22
23 /**************************************************************/
24
25 #include "Paso.h"
26 #include "Preconditioner.h"
27 #include "PasoUtil.h"
28 #include "BlockOps.h"
29
30 #include <stdio.h>
31
32
33 /**************************************************************/
34
35 /* free all memory used by Smoother */
36
37 void Paso_Preconditioner_Smoother_free(Paso_Preconditioner_Smoother * in) {
38 if (in!=NULL) {
39 Paso_Preconditioner_LocalSmoother_free(in->localSmoother);
40 MEMFREE(in);
41 }
42 }
43 void Paso_Preconditioner_LocalSmoother_free(Paso_Preconditioner_LocalSmoother * in) {
44 if (in!=NULL) {
45 MEMFREE(in->diag);
46 MEMFREE(in->pivot);
47 MEMFREE(in->buffer);
48 MEMFREE(in);
49 }
50 }
51 /**************************************************************/
52
53 /* constructs the symmetric Gauss-Seidel preconditioner
54
55 */
56 Paso_Preconditioner_Smoother* Paso_Preconditioner_Smoother_alloc(Paso_SystemMatrix * A_p, const bool_t jacobi, const bool_t is_local, const bool_t verbose)
57 {
58
59 /* allocations: */
60 Paso_Preconditioner_Smoother* out=MEMALLOC(1,Paso_Preconditioner_Smoother);
61 if (! Esys_checkPtr(out)) {
62 out->localSmoother=Paso_Preconditioner_LocalSmoother_alloc(A_p->mainBlock,jacobi,verbose);
63 out->is_local=is_local;
64 }
65 if (Esys_MPIInfo_noError(A_p->mpi_info)) {
66 return out;
67 } else {
68 Paso_Preconditioner_Smoother_free(out);
69 return NULL;
70 }
71 }
72 Paso_Preconditioner_LocalSmoother* Paso_Preconditioner_LocalSmoother_alloc(Paso_SparseMatrix * A_p, const bool_t jacobi, bool_t verbose)
73 {
74
75 dim_t n=A_p->numRows;
76 dim_t n_block=A_p->row_block_size;
77 dim_t block_size=A_p->block_size;
78
79 double time0=Esys_timer();
80 /* allocations: */
81 Paso_Preconditioner_LocalSmoother* out=MEMALLOC(1,Paso_Preconditioner_LocalSmoother);
82 if (! Esys_checkPtr(out)) {
83
84 out->diag=MEMALLOC( ((size_t) n) * ((size_t) block_size),double);
85 out->pivot=MEMALLOC( ((size_t) n) * ((size_t) n_block), index_t);
86 out->buffer=MEMALLOC( ((size_t) n) * ((size_t) n_block), double);
87 out->Jacobi=jacobi;
88
89 if ( ! ( Esys_checkPtr(out->diag) || Esys_checkPtr(out->pivot) ) ) {
90 Paso_SparseMatrix_invMain(A_p, out->diag, out->pivot );
91 }
92
93 }
94 time0=Esys_timer()-time0;
95
96 if (Esys_noError()) {
97 return out;
98 } else {
99 Paso_Preconditioner_LocalSmoother_free(out);
100 return NULL;
101 }
102 }
103
104 /*
105
106 performs a few sweeps of the from
107
108 S (x_{k} - x_{k-1}) = b - A x_{k-1}
109
110 where x_{0}=0 and S provides some approximatioon of A.
111
112 Under MPI S is build on using A_p->mainBlock only.
113 if Smoother is local the defect b - A x_{k-1} is calculated using A_p->mainBlock only.
114
115 */
116
117 void Paso_Preconditioner_Smoother_solve(Paso_SystemMatrix* A_p, Paso_Preconditioner_Smoother * smoother, double * x, const double * b,
118 const dim_t sweeps, const bool_t x_is_initial)
119 {
120 const dim_t n= (A_p->mainBlock->numRows) * (A_p->mainBlock->row_block_size);
121
122 double *b_new = smoother->localSmoother->buffer;
123 dim_t nsweeps=sweeps;
124 if (smoother->is_local) {
125 Paso_Preconditioner_LocalSmoother_solve(A_p->mainBlock,smoother->localSmoother,x,b,sweeps,x_is_initial);
126 } else {
127 if (! x_is_initial) {
128 Paso_Copy(n, x, b);
129 Paso_Preconditioner_LocalSmoother_Sweep(A_p->mainBlock,smoother->localSmoother,x);
130 nsweeps--;
131 }
132 while (nsweeps > 0 ) {
133 Paso_Copy(n, b_new, b);
134 Paso_SystemMatrix_MatrixVector((-1.), A_p, x, 1., b_new); /* b_new = b - A*x */
135 Paso_Preconditioner_LocalSmoother_Sweep(A_p->mainBlock,smoother->localSmoother,b_new);
136 Paso_AXPY(n, x, 1., b_new);
137 nsweeps--;
138 }
139
140 }
141 }
142 void Paso_Preconditioner_LocalSmoother_solve(Paso_SparseMatrix* A_p, Paso_Preconditioner_LocalSmoother * smoother, double * x, const double * b,
143 const dim_t sweeps, const bool_t x_is_initial)
144 {
145 const dim_t n= (A_p->numRows) * (A_p->row_block_size);
146 double *b_new = smoother->buffer;
147 dim_t nsweeps=sweeps;
148
149 if (! x_is_initial) {
150 Paso_Copy(n, x, b);
151 Paso_Preconditioner_LocalSmoother_Sweep(A_p,smoother,x);
152 nsweeps--;
153 }
154
155 while (nsweeps > 0 ) {
156 Paso_Copy(n, b_new, b);
157
158 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0((-1.), A_p, x, 1., b_new); /* b_new = b - A*x */
159 Paso_Preconditioner_LocalSmoother_Sweep(A_p,smoother,b_new);
160 Paso_AXPY(n, x, 1., b_new);
161 nsweeps--;
162 }
163 }
164
165 void Paso_Preconditioner_LocalSmoother_Sweep(Paso_SparseMatrix* A, Paso_Preconditioner_LocalSmoother * smoother, double * x)
166 {
167 const dim_t nt=omp_get_max_threads();
168 if (smoother->Jacobi) {
169 Paso_BlockOps_solveAll(A->row_block_size,A->numRows,smoother->diag,smoother->pivot,x);
170 } else {
171 if (nt < 2) {
172 Paso_Preconditioner_LocalSmoother_Sweep_sequential(A,smoother,x);
173 } else {
174 Paso_Preconditioner_LocalSmoother_Sweep_colored(A,smoother,x);
175 }
176 }
177 }
178
179 /* inplace Gauss-Seidel sweep in seqential mode: */
180
181 void Paso_Preconditioner_LocalSmoother_Sweep_sequential(Paso_SparseMatrix* A_p, Paso_Preconditioner_LocalSmoother * smoother, double * x)
182 {
183 const dim_t n=A_p->numRows;
184 const dim_t n_block=A_p->row_block_size;
185 double *diag = smoother->diag;
186 index_t* pivot = smoother->pivot;
187 const dim_t block_len=A_p->block_size;
188
189
190 register dim_t i,k;
191 register index_t iptr_ik, mm;
192 register double rtmp;
193 int failed = 0;
194
195 const index_t* ptr_main = Paso_SparseMatrix_borrowMainDiagonalPointer(A_p);
196
197 (void)pivot; /* silence warning from var being unused by macros */
198 (void)block_len;
199 /* forward substitution */
200
201 if (n_block==1) {
202 x[0]*=diag[0];
203 for (i = 1; i < n; ++i) {
204 mm=ptr_main[i];
205 /* x_i=x_i-a_ik*x_k (with k<i) */
206 rtmp=x[i];
207 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
208 k=A_p->pattern->index[iptr_ik];
209 rtmp-=A_p->val[iptr_ik]*x[k];
210 }
211 x[i]=rtmp*diag[i];
212 }
213 } else if (n_block==2) {
214 Paso_BlockOps_MViP_2(&diag[0], &x[0]);
215 for (i = 1; i < n; ++i) {
216 mm=ptr_main[i];
217 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
218 k=A_p->pattern->index[iptr_ik];
219 Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);
220 }
221 Paso_BlockOps_MViP_2(&diag[4*i], &x[2*i]);
222 }
223 } else if (n_block==3) {
224 Paso_BlockOps_MViP_3(&diag[0], &x[0]);
225 for (i = 1; i < n; ++i) {
226 mm=ptr_main[i];
227 /* x_i=x_i-a_ik*x_k */
228 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
229 k=A_p->pattern->index[iptr_ik];
230 Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);
231 }
232 Paso_BlockOps_MViP_3(&diag[9*i], &x[3*i]);
233 }
234 } else {
235 Paso_BlockOps_solve_N(n_block, &x[0], &diag[0], &pivot[0], &failed);
236 for (i = 1; i < n; ++i) {
237 mm=ptr_main[i];
238 /* x_i=x_i-a_ik*x_k */
239 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
240 k=A_p->pattern->index[iptr_ik];
241 Paso_BlockOps_SMV_N(n_block, &x[n_block*i], &A_p->val[block_len*iptr_ik], &x[n_block*k]);
242 }
243 Paso_BlockOps_solve_N(n_block, &x[n_block*i], &diag[block_len*i], &pivot[n_block*i], &failed);
244 }
245 }
246 /* backward substitution */
247
248 if (n_block==1) {
249 for (i = n-2; i > -1; --i) {
250 mm=ptr_main[i];
251 rtmp=x[i]*A_p->val[mm];
252 for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {
253 k=A_p->pattern->index[iptr_ik];
254 rtmp-=A_p->val[iptr_ik]*x[k];
255 }
256 x[i]=diag[i]*rtmp;
257 }
258
259 } else if (n_block==2) {
260 for (i = n-2; i > -1; --i) {
261 mm=ptr_main[i];
262 Paso_BlockOps_MViP_2(&A_p->val[4*mm], &x[2*i]);
263 for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {
264 k=A_p->pattern->index[iptr_ik];
265 Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);
266 }
267 Paso_BlockOps_MViP_2(&diag[i*4], &x[2*i]);
268
269 }
270 } else if (n_block==3) {
271 for (i = n-2; i > -1; --i) {
272 mm=ptr_main[i];
273 Paso_BlockOps_MViP_3(&A_p->val[9*mm], &x[3*i]);
274 for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {
275 k=A_p->pattern->index[iptr_ik];
276 Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);
277 }
278 Paso_BlockOps_MViP_3(&diag[i*9], &x[3*i]);
279 }
280
281 } else {
282 double *y=TMPMEMALLOC(n_block, double);
283 for (i = n-2; i > -1; --i) {
284 mm=ptr_main[i];
285 Paso_BlockOps_MV_N(n_block, &y[0], &A_p->val[block_len*mm], &x[n_block*i]);
286 for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {
287 k=A_p->pattern->index[iptr_ik];
288 Paso_BlockOps_SMV_N(n_block, &y[0], &A_p->val[block_len*iptr_ik], &x[n_block*k]);
289 }
290 Paso_BlockOps_Cpy_N(n_block ,&x[n_block*i], &y[0]);
291 Paso_BlockOps_solve_N(n_block, &x[n_block*i], &diag[i*block_len], &pivot[i*n_block], &failed);
292 }
293 TMPMEMFREE(y);
294 }
295
296 if (failed > 0) {
297 Esys_setError(ZERO_DIVISION_ERROR, "Paso_Preconditioner_LocalSmoother_Sweep_sequential: non-regular main diagonal block.");
298 }
299
300 return;
301 }
302
303 void Paso_Preconditioner_LocalSmoother_Sweep_colored(Paso_SparseMatrix* A_p, Paso_Preconditioner_LocalSmoother * smoother, double * x)
304 {
305 const dim_t n=A_p->numRows;
306 const dim_t n_block=A_p->row_block_size;
307 double *diag = smoother->diag;
308 index_t* pivot = smoother->pivot;
309 const dim_t block_len=A_p->block_size;
310 double *y;
311
312 register dim_t i,k;
313 register index_t color,iptr_ik, mm;
314 register double rtmp;
315 int failed = 0;
316
317 const index_t* coloring = Paso_Pattern_borrowColoringPointer(A_p->pattern);
318 const dim_t num_colors = Paso_Pattern_getNumColors(A_p->pattern);
319 const index_t* ptr_main = Paso_SparseMatrix_borrowMainDiagonalPointer(A_p);
320
321 (void)pivot; /* These vars are dropped by some macros*/
322 (void)block_len;
323
324 #pragma omp parallel private(mm, i,iptr_ik,k,rtmp, color, y)
325 {
326 if (n_block>3) {
327 y=TMPMEMALLOC(n_block, double);
328 } else {
329 y=NULL;
330 }
331 /* forward substitution */
332
333 /* color = 0 */
334 if (n_block==1) {
335 #pragma omp for schedule(static)
336 for (i = 0; i < n; ++i) {
337 if (coloring[i]== 0 ) x[i]*=diag[i];
338 }
339 } else if (n_block==2) {
340 #pragma omp for schedule(static)
341 for (i = 0; i < n; ++i) {
342 if (coloring[i]== 0 ) Paso_BlockOps_MViP_2(&diag[i*4], &x[2*i]);
343 }
344 } else if (n_block==3) {
345 #pragma omp for schedule(static)
346 for (i = 0; i < n; ++i) {
347 if (coloring[i]==0) Paso_BlockOps_MViP_3(&diag[i*9], &x[3*i]);
348 }
349 } else {
350 #pragma omp for schedule(static)
351 for (i = 0; i < n; ++i) {
352 if (coloring[i]==0) Paso_BlockOps_solve_N(n_block, &x[n_block*i], &diag[block_len*i], &pivot[n_block*i], &failed);
353 }
354 }
355
356
357 for (color=1;color<num_colors;++color) {
358
359 if (n_block==1) {
360 #pragma omp for schedule(static)
361 for (i = 0; i < n; ++i) {
362 if (coloring[i]==color) {
363 /* x_i=x_i-a_ik*x_k */
364 rtmp=x[i];
365 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
366 k=A_p->pattern->index[iptr_ik];
367 if (coloring[k]<color) rtmp-=A_p->val[iptr_ik]*x[k];
368 }
369 x[i]=diag[i]*rtmp;
370 }
371 }
372 } else if (n_block==2) {
373 #pragma omp for schedule(static)
374 for (i = 0; i < n; ++i) {
375 if (coloring[i]==color) {
376 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
377 k=A_p->pattern->index[iptr_ik];
378 if (coloring[k]<color) Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);
379 }
380 Paso_BlockOps_MViP_2(&diag[4*i], &x[2*i]);
381 }
382 }
383 } else if (n_block==3) {
384 #pragma omp for schedule(static)
385 for (i = 0; i < n; ++i) {
386 if (coloring[i]==color) {
387 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
388 k=A_p->pattern->index[iptr_ik];
389 if (coloring[k]<color) Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);
390 }
391 Paso_BlockOps_MViP_3(&diag[9*i], &x[3*i]);
392 }
393 }
394 } else {
395 #pragma omp for schedule(static)
396 for (i = 0; i < n; ++i) {
397 if (coloring[i] == color) {
398 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
399 k=A_p->pattern->index[iptr_ik];
400 if (coloring[k]<color) Paso_BlockOps_SMV_N(n_block, &x[n_block*i], &A_p->val[block_len*iptr_ik], &x[n_block*k]);
401 }
402 Paso_BlockOps_solve_N(n_block, &x[n_block*i], &diag[block_len*i], &pivot[n_block*i], &failed);
403 }
404 }
405 }
406 } /* end of coloring loop */
407
408 /* backward substitution */
409 for (color=(num_colors)-2 ;color>-1;--color) { /* Note: color=(num_colors)-1 is not required */
410 if (n_block==1) {
411 #pragma omp for schedule(static)
412 for (i = 0; i < n; ++i) {
413 if (coloring[i]==color) {
414 mm=ptr_main[i];
415 rtmp=A_p->val[mm]*x[i];
416 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
417 k=A_p->pattern->index[iptr_ik];
418 if (coloring[k]>color) rtmp-=A_p->val[iptr_ik]*x[k];
419 }
420 x[i]= rtmp*diag[i];
421 }
422 }
423 } else if (n_block==2) {
424 #pragma omp for schedule(static)
425 for (i = 0; i < n; ++i) {
426 if (coloring[i]==color) {
427 mm=ptr_main[i];
428 Paso_BlockOps_MViP_2(&A_p->val[4*mm], &x[2*i]);
429 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
430 k=A_p->pattern->index[iptr_ik];
431 if (coloring[k]>color) Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);
432 }
433 Paso_BlockOps_MViP_2(&diag[4*i], &x[2*i]);
434 }
435 }
436 } else if (n_block==3) {
437 #pragma omp for schedule(static)
438 for (i = 0; i < n; ++i) {
439 if (coloring[i]==color) {
440 mm=ptr_main[i];
441 Paso_BlockOps_MViP_3(&A_p->val[9*mm], &x[3*i]);
442 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
443 k=A_p->pattern->index[iptr_ik];
444 if (coloring[k]>color) Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);
445 }
446 Paso_BlockOps_MViP_3(&diag[9*i], &x[3*i]);
447 }
448 }
449 } else {
450 #pragma omp for schedule(static)
451 for (i = 0; i < n; ++i) {
452 if (coloring[i]==color) {
453 mm=ptr_main[i];
454 Paso_BlockOps_MV_N(n_block, &y[0], &A_p->val[block_len*mm], &x[n_block*i]);
455 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
456 k=A_p->pattern->index[iptr_ik];
457 if (coloring[k]>color) Paso_BlockOps_SMV_N(n_block, &y[0], &A_p->val[block_len*iptr_ik], &x[n_block*k]);
458 }
459 Paso_BlockOps_Cpy_N(n_block ,&x[n_block*i], &y[0]);
460 Paso_BlockOps_solve_N(n_block, &x[n_block*i], &diag[i*block_len], &pivot[i*n_block], &failed);
461 }
462 }
463 }
464 }
465 TMPMEMFREE(y);
466 }
467 if (failed > 0) {
468 Esys_setError(ZERO_DIVISION_ERROR, "Paso_Preconditioner_LocalSmoother_Sweep_colored: non-regular main diagonal block.");
469 }
470 return;
471 }

  ViewVC Help
Powered by ViewVC 1.1.26