/[escript]/trunk/paso/src/Smoother.c
ViewVC logotype

Contents of /trunk/paso/src/Smoother.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3283 - (show annotations)
Mon Oct 18 22:39:28 2010 UTC (8 years, 5 months ago) by gross
File MIME type: text/plain
File size: 14805 byte(s)
AMG reengineered, BUG is Smoother fixed.


1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Paso: Gauss-Seidel */
18
19 /**************************************************************/
20
21 /* Author: artak@uq.edu.au */
22
23 /**************************************************************/
24
25 #include "Paso.h"
26 #include "Preconditioner.h"
27 #include "PasoUtil.h"
28 #include "BlockOps.h"
29
30 #include <stdio.h>
31
32
33 /**************************************************************/
34
35 /* free all memory used by Smoother */
36
37 void Paso_Preconditioner_Smoother_free(Paso_Preconditioner_Smoother * in) {
38 if (in!=NULL) {
39 Paso_Preconditioner_LocalSmoother_free(in->localSmoother);
40 MEMFREE(in);
41 }
42 }
43 void Paso_Preconditioner_LocalSmoother_free(Paso_Preconditioner_LocalSmoother * in) {
44 if (in!=NULL) {
45 MEMFREE(in->diag);
46 MEMFREE(in->pivot);
47 MEMFREE(in->buffer);
48 MEMFREE(in);
49 }
50 }
51 /**************************************************************/
52
53 /* constructs the symmetric Gauss-Seidel preconditioner
54
55 */
56 Paso_Preconditioner_Smoother* Paso_Preconditioner_Smoother_alloc(Paso_SystemMatrix * A_p, const bool_t jacobi, const bool_t is_local, const bool_t verbose)
57 {
58
59 /* allocations: */
60 Paso_Preconditioner_Smoother* out=MEMALLOC(1,Paso_Preconditioner_Smoother);
61 if (! Esys_checkPtr(out)) {
62 out->localSmoother=Paso_Preconditioner_LocalSmoother_alloc(A_p->mainBlock,jacobi,verbose);
63 out->is_local=is_local;
64 }
65 if (Esys_MPIInfo_noError(A_p->mpi_info)) {
66 return out;
67 } else {
68 Paso_Preconditioner_Smoother_free(out);
69 return NULL;
70 }
71 }
72 Paso_Preconditioner_LocalSmoother* Paso_Preconditioner_LocalSmoother_alloc(Paso_SparseMatrix * A_p, const bool_t jacobi, bool_t verbose)
73 {
74
75 dim_t n=A_p->numRows;
76 dim_t n_block=A_p->row_block_size;
77 dim_t block_size=A_p->block_size;
78
79 double time0=Esys_timer();
80 /* allocations: */
81 Paso_Preconditioner_LocalSmoother* out=MEMALLOC(1,Paso_Preconditioner_LocalSmoother);
82 if (! Esys_checkPtr(out)) {
83
84 out->diag=MEMALLOC( ((size_t) n) * ((size_t) block_size),double);
85 out->pivot=MEMALLOC( ((size_t) n) * ((size_t) n_block), index_t);
86 out->buffer=MEMALLOC( ((size_t) n) * ((size_t) n_block), double);
87 out->Jacobi=jacobi;
88
89 if ( ! ( Esys_checkPtr(out->diag) || Esys_checkPtr(out->pivot) ) ) {
90 Paso_SparseMatrix_invMain(A_p, out->diag, out->pivot );
91 }
92
93 }
94 time0=Esys_timer()-time0;
95
96 if (Esys_noError()) {
97 if (verbose) {
98 if (jacobi) {
99 printf("timing: Jacobi preparation: elemination : %e\n",time0);
100 } else {
101 printf("timing: Gauss-Seidel preparation: elemination : %e\n",time0);
102 }
103 }
104 return out;
105 } else {
106 Paso_Preconditioner_LocalSmoother_free(out);
107 return NULL;
108 }
109 }
110
111 /*
112
113 performs a few sweeps of the from
114
115 S (x_{k} - x_{k-1}) = b - A x_{k-1}
116
117 where x_{0}=0 and S provides some approximatioon of A.
118
119 Under MPI S is build on using A_p->mainBlock only.
120 if Smoother is local the defect b - A x_{k-1} is calculated using A_p->mainBlock only.
121
122 */
123
124 void Paso_Preconditioner_Smoother_solve(Paso_SystemMatrix* A_p, Paso_Preconditioner_Smoother * smoother, double * x, const double * b,
125 const dim_t sweeps, const bool_t x_is_initial)
126 {
127 const dim_t n= (A_p->mainBlock->numRows) * (A_p->mainBlock->row_block_size);
128
129 double *b_new = smoother->localSmoother->buffer;
130 dim_t nsweeps=sweeps;
131 if (smoother->is_local) {
132 Paso_Preconditioner_LocalSmoother_solve(A_p->mainBlock,smoother->localSmoother,x,b,sweeps,x_is_initial);
133 } else {
134 if (! x_is_initial) {
135 Paso_Copy(n, x, b);
136 Paso_Preconditioner_LocalSmoother_Sweep(A_p->mainBlock,smoother->localSmoother,x);
137 nsweeps--;
138 }
139 while (nsweeps > 0 ) {
140 Paso_Copy(n, b_new, b);
141 Paso_SystemMatrix_MatrixVector((-1.), A_p, x, 1., b_new); /* b_new = b - A*x */
142 Paso_Preconditioner_LocalSmoother_Sweep(A_p->mainBlock,smoother->localSmoother,b_new);
143 Paso_AXPY(n, x, 1., b_new);
144 nsweeps--;
145 }
146
147 }
148 }
149 void Paso_Preconditioner_LocalSmoother_solve(Paso_SparseMatrix* A_p, Paso_Preconditioner_LocalSmoother * smoother, double * x, const double * b,
150 const dim_t sweeps, const bool_t x_is_initial)
151 {
152 const dim_t n= (A_p->numRows) * (A_p->row_block_size);
153 double *b_new = smoother->buffer;
154 dim_t nsweeps=sweeps;
155
156 if (! x_is_initial) {
157 Paso_Copy(n, x, b);
158 Paso_Preconditioner_LocalSmoother_Sweep(A_p,smoother,x);
159 nsweeps--;
160 }
161
162 while (nsweeps > 0 ) {
163 Paso_Copy(n, b_new, b);
164
165 Paso_SparseMatrix_MatrixVector_CSR_OFFSET0((-1.), A_p, x, 1., b_new); /* b_new = b - A*x */
166 Paso_Preconditioner_LocalSmoother_Sweep(A_p,smoother,b_new);
167 Paso_AXPY(n, x, 1., b_new);
168 nsweeps--;
169 }
170 }
171
172 void Paso_Preconditioner_LocalSmoother_Sweep(Paso_SparseMatrix* A, Paso_Preconditioner_LocalSmoother * smoother, double * x)
173 {
174 const dim_t nt=omp_get_max_threads();
175 if (smoother->Jacobi) {
176 Paso_BlockOps_allMV(A->row_block_size,A->numRows,smoother->diag,smoother->pivot,x);
177 } else {
178 if (nt < 2) {
179 Paso_Preconditioner_LocalSmoother_Sweep_sequential(A,smoother,x);
180 } else {
181 Paso_Preconditioner_LocalSmoother_Sweep_colored(A,smoother,x);
182 }
183 }
184 }
185
186 /* inplace Gauss-Seidel sweep in seqential mode: */
187
188 void Paso_Preconditioner_LocalSmoother_Sweep_sequential(Paso_SparseMatrix* A_p, Paso_Preconditioner_LocalSmoother * smoother, double * x)
189 {
190 const dim_t n=A_p->numRows;
191 const dim_t n_block=A_p->row_block_size;
192 const double *diag = smoother->diag;
193 /* const index_t* pivot = smoother->pivot;
194 const dim_t block_size=A_p->block_size; use for block size >3*/
195
196 register dim_t i,k;
197 register index_t iptr_ik, mm;
198
199 const index_t* ptr_main = Paso_SparseMatrix_borrowMainDiagonalPointer(A_p);
200 /* forward substitution */
201
202 if (n_block==1) {
203 Paso_BlockOps_MV_1(&x[0], &diag[0], &x[0]);
204 for (i = 1; i < n; ++i) {
205 mm=ptr_main[i];
206 /* x_i=x_i-a_ik*x_k (with k<i) */
207 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
208 k=A_p->pattern->index[iptr_ik];
209 Paso_BlockOps_SMV_1(&x[i], &A_p->val[iptr_ik], &x[k]);
210 }
211 Paso_BlockOps_MV_1(&x[i], &diag[i], &x[i]);
212 }
213 } else if (n_block==2) {
214 Paso_BlockOps_MV_2(&x[0], &diag[0], &x[0]);
215 for (i = 1; i < n; ++i) {
216 mm=ptr_main[i];
217 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
218 k=A_p->pattern->index[iptr_ik];
219 Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);
220 }
221 Paso_BlockOps_MV_2(&x[2*i], &diag[4*i], &x[2*i]);
222 }
223 } else if (n_block==3) {
224 Paso_BlockOps_MV_3(&x[0], &diag[0], &x[0]);
225 for (i = 1; i < n; ++i) {
226 mm=ptr_main[i];
227 /* x_i=x_i-a_ik*x_k */
228 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
229 k=A_p->pattern->index[iptr_ik];
230 Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);
231 }
232 Paso_BlockOps_MV_3(&x[3*i], &diag[9*i], &x[3*i]);
233 }
234 } /* add block size >3 */
235
236 /* backward substitution */
237
238 if (n_block==1) {
239 for (i = n-2; i > -1; --i) {
240 mm=ptr_main[i];
241 Paso_BlockOps_MV_1(&x[i], &A_p->val[mm], &x[i]);
242 for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {
243 k=A_p->pattern->index[iptr_ik];
244 Paso_BlockOps_SMV_1(&x[i], &A_p->val[iptr_ik], &x[k]);
245 }
246 Paso_BlockOps_MV_1(&x[i], &diag[i], &x[i]);
247 }
248
249 } else if (n_block==2) {
250 for (i = n-2; i > -1; --i) {
251 mm=ptr_main[i];
252 Paso_BlockOps_MV_2(&x[2*i], &A_p->val[4*mm], &x[2*i]);
253 for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {
254 k=A_p->pattern->index[iptr_ik];
255 Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);
256 }
257 Paso_BlockOps_MV_2(&x[2*i], &diag[i*4], &x[2*i]);
258
259 }
260 } else if (n_block==3) {
261 for (i = n-2; i > -1; --i) {
262 mm=ptr_main[i];
263 Paso_BlockOps_MV_3(&x[3*i], &A_p->val[9*mm], &x[3*i]);
264
265 for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {
266 k=A_p->pattern->index[iptr_ik];
267 Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);
268 }
269 Paso_BlockOps_MV_3(&x[3*i], &diag[i*9], &x[3*i]);
270 }
271
272 } /* add block size >3 */
273
274 return;
275 }
276
277 void Paso_Preconditioner_LocalSmoother_Sweep_colored(Paso_SparseMatrix* A_p, Paso_Preconditioner_LocalSmoother * smoother, double * x)
278 {
279 const dim_t n=A_p->numRows;
280 const dim_t n_block=A_p->row_block_size;
281 const double *diag = smoother->diag;
282 index_t* pivot = smoother->pivot;
283 const dim_t block_size=A_p->block_size;
284
285 register dim_t i,k;
286 register index_t color,iptr_ik, mm;
287
288 const index_t* coloring = Paso_Pattern_borrowColoringPointer(A_p->pattern);
289 const dim_t num_colors = Paso_Pattern_getNumColors(A_p->pattern);
290 const index_t* ptr_main = Paso_SparseMatrix_borrowMainDiagonalPointer(A_p);
291
292 /* forward substitution */
293
294
295 /* color = 0 */
296 if (n_block==1) {
297 #pragma omp parallel for schedule(static) private(i)
298 for (i = 0; i < n; ++i) {
299 if (coloring[i]== 0 ) Paso_BlockOps_MV_1(&x[i], &diag[i], &x[i]);
300 }
301 } else if (n_block==2) {
302 #pragma omp parallel for schedule(static) private(i)
303 for (i = 0; i < n; ++i) {
304 if (coloring[i]== 0 ) Paso_BlockOps_MV_2(&x[2*i], &diag[i*4], &x[2*i]);
305 }
306 } else if (n_block==3) {
307 #pragma omp parallel for schedule(static) private(i)
308 for (i = 0; i < n; ++i) {
309 if (coloring[i]==0) Paso_BlockOps_MV_3(&x[3*i], &diag[i*9], &x[3*i]);
310 }
311 } else {
312 #pragma omp parallel for schedule(static) private(i)
313 for (i = 0; i < n; ++i) {
314 if (coloring[i]==0) Paso_BlockOps_Solve_N(n_block, &x[n_block*i], &diag[block_size*i], &pivot[n_block*i], &x[n_block*i]);
315 }
316 }
317
318 for (color=1;color<num_colors;++color) {
319
320 if (n_block==1) {
321 #pragma omp parallel for schedule(static) private(i,iptr_ik,k)
322 for (i = 0; i < n; ++i) {
323 if (coloring[i]==color) {
324 /* x_i=x_i-a_ik*x_k */
325 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
326 k=A_p->pattern->index[iptr_ik];
327 if (coloring[k]<color) Paso_BlockOps_SMV_1(&x[i], &A_p->val[iptr_ik], &x[k]);
328 }
329 Paso_BlockOps_MV_1(&x[i], &diag[i], &x[i]);
330 }
331 }
332 } else if (n_block==2) {
333 #pragma omp parallel for schedule(static) private(i,iptr_ik,k)
334 for (i = 0; i < n; ++i) {
335 if (coloring[i]==color) {
336 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
337 k=A_p->pattern->index[iptr_ik];
338 if (coloring[k]<color) Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);
339 }
340 Paso_BlockOps_MV_2(&x[2*i], &diag[4*i], &x[2*i]);
341 }
342 }
343 } else if (n_block==3) {
344 #pragma omp parallel for schedule(static) private(i,iptr_ik,k)
345 for (i = 0; i < n; ++i) {
346 if (coloring[i]==color) {
347 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
348 k=A_p->pattern->index[iptr_ik];
349 if (coloring[k]<color) Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);
350 }
351 Paso_BlockOps_MV_3(&x[3*i], &diag[9*i], &x[3*i]);
352 }
353 }
354 } else {
355 #pragma omp parallel for schedule(static) private(i,iptr_ik,k)
356 for (i = 0; i < n; ++i) {
357 if (coloring[i] == color) {
358 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
359 k=A_p->pattern->index[iptr_ik];
360 if (coloring[k]<color) Paso_BlockOps_SMV_N(n_block, &x[n_block*i], &A_p->val[block_size*iptr_ik], &x[n_block*k]);
361 }
362 Paso_BlockOps_Solve_N(n_block, &x[n_block*i], &diag[block_size*i], &pivot[n_block*i], &x[n_block*i]);
363 }
364 }
365 }
366 } /* end of coloring loop */
367
368 /* backward substitution */
369 for (color=(num_colors)-2 ;color>-1;--color) { /* Note: color=(num_colors)-1 is not required */
370 if (n_block==1) {
371 #pragma omp parallel for schedule(static) private(mm, i,iptr_ik,k)
372 for (i = 0; i < n; ++i) {
373 if (coloring[i]==color) {
374 mm=ptr_main[i];
375 Paso_BlockOps_MV_1(&x[i], &A_p->val[mm], &x[i]);
376 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
377 k=A_p->pattern->index[iptr_ik];
378 if (coloring[k]>color) Paso_BlockOps_SMV_1(&x[i], &A_p->val[iptr_ik], &x[k]);
379 }
380 Paso_BlockOps_MV_1(&x[i], &diag[i], &x[i]);
381 }
382 }
383 } else if (n_block==2) {
384 #pragma omp parallel for schedule(static) private(mm, i,iptr_ik,k)
385 for (i = 0; i < n; ++i) {
386 if (coloring[i]==color) {
387 mm=ptr_main[i];
388 Paso_BlockOps_MV_2(&x[2*i], &A_p->val[4*mm], &x[2*i]);
389 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
390 k=A_p->pattern->index[iptr_ik];
391 if (coloring[k]>color) Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);
392 }
393 Paso_BlockOps_MV_2(&x[2*i], &diag[4*i], &x[2*i]);
394 }
395 }
396 } else if (n_block==3) {
397 #pragma omp parallel for schedule(static) private(mm, i,iptr_ik,k)
398 for (i = 0; i < n; ++i) {
399 if (coloring[i]==color) {
400 mm=ptr_main[i];
401 Paso_BlockOps_MV_3(&x[3*i], &A_p->val[9*mm], &x[3*i]);
402 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
403 k=A_p->pattern->index[iptr_ik];
404 if (coloring[k]>color) Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);
405 }
406 Paso_BlockOps_MV_3(&x[3*i], &diag[9*i], &x[3*i]);
407 }
408 }
409 } else {
410 #pragma omp parallel for schedule(static) private(mm, i,iptr_ik,k)
411 for (i = 0; i < n; ++i) {
412 if (coloring[i]==color) {
413 mm=ptr_main[i];
414 Paso_BlockOps_MV_N( n_block, &x[n_block*i], &A_p->val[block_size*mm], &x[n_block*i] );
415 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
416 k=A_p->pattern->index[iptr_ik];
417 if (coloring[k]>color) Paso_BlockOps_SMV_N(n_block, &x[n_block*i], &A_p->val[block_size*iptr_ik], &x[n_block*k]);
418 }
419 Paso_BlockOps_Solve_N(n_block, &x[n_block*i], &diag[block_size*i], &pivot[n_block*i], &x[n_block*i]);
420 }
421 }
422 }
423 }
424 return;
425 }

  ViewVC Help
Powered by ViewVC 1.1.26