/[escript]/trunk/paso/src/Smoother.c
ViewVC logotype

Annotation of /trunk/paso/src/Smoother.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3283 - (hide annotations)
Mon Oct 18 22:39:28 2010 UTC (9 years ago) by gross
File MIME type: text/plain
File size: 14805 byte(s)
AMG reengineered, BUG is Smoother fixed.


1 jfenwick 1851
2     /*******************************************************
3     *
4 jfenwick 2881 * Copyright (c) 2003-2010 by University of Queensland
5 jfenwick 1851 * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14    
15     /**************************************************************/
16    
17 gross 3097 /* Paso: Gauss-Seidel */
18 jfenwick 1851
19     /**************************************************************/
20    
21     /* Author: artak@uq.edu.au */
22    
23     /**************************************************************/
24    
25     #include "Paso.h"
26 gross 3120 #include "Preconditioner.h"
27 jfenwick 1851 #include "PasoUtil.h"
28 gross 3097 #include "BlockOps.h"
29 jfenwick 1851
30     #include <stdio.h>
31    
32 gross 3094
33 jfenwick 1851 /**************************************************************/
34    
35 gross 3158 /* free all memory used by Smoother */
36 jfenwick 1851
37 gross 3158 void Paso_Preconditioner_Smoother_free(Paso_Preconditioner_Smoother * in) {
38 jfenwick 1851 if (in!=NULL) {
39 gross 3158 Paso_Preconditioner_LocalSmoother_free(in->localSmoother);
40 jfenwick 1851 MEMFREE(in);
41     }
42     }
43 gross 3158 void Paso_Preconditioner_LocalSmoother_free(Paso_Preconditioner_LocalSmoother * in) {
44 gross 3094 if (in!=NULL) {
45     MEMFREE(in->diag);
46     MEMFREE(in->pivot);
47 gross 3097 MEMFREE(in->buffer);
48 gross 3094 MEMFREE(in);
49     }
50     }
51 jfenwick 1851 /**************************************************************/
52    
53 gross 3094 /* constructs the symmetric Gauss-Seidel preconditioner
54 jfenwick 1851
55     */
56 gross 3158 Paso_Preconditioner_Smoother* Paso_Preconditioner_Smoother_alloc(Paso_SystemMatrix * A_p, const bool_t jacobi, const bool_t is_local, const bool_t verbose)
57 gross 3094 {
58    
59 jfenwick 1851 /* allocations: */
60 gross 3158 Paso_Preconditioner_Smoother* out=MEMALLOC(1,Paso_Preconditioner_Smoother);
61 jfenwick 3259 if (! Esys_checkPtr(out)) {
62 gross 3158 out->localSmoother=Paso_Preconditioner_LocalSmoother_alloc(A_p->mainBlock,jacobi,verbose);
63 gross 3094 out->is_local=is_local;
64 jfenwick 1851 }
65 jfenwick 3259 if (Esys_MPIInfo_noError(A_p->mpi_info)) {
66 jfenwick 1851 return out;
67 gross 3094 } else {
68 gross 3158 Paso_Preconditioner_Smoother_free(out);
69 jfenwick 1851 return NULL;
70     }
71     }
72 jfenwick 3259 Paso_Preconditioner_LocalSmoother* Paso_Preconditioner_LocalSmoother_alloc(Paso_SparseMatrix * A_p, const bool_t jacobi, bool_t verbose)
73 gross 3158 {
74 gross 3094
75     dim_t n=A_p->numRows;
76     dim_t n_block=A_p->row_block_size;
77     dim_t block_size=A_p->block_size;
78    
79 jfenwick 3259 double time0=Esys_timer();
80 gross 3094 /* allocations: */
81 gross 3158 Paso_Preconditioner_LocalSmoother* out=MEMALLOC(1,Paso_Preconditioner_LocalSmoother);
82 jfenwick 3259 if (! Esys_checkPtr(out)) {
83 gross 3094
84     out->diag=MEMALLOC( ((size_t) n) * ((size_t) block_size),double);
85     out->pivot=MEMALLOC( ((size_t) n) * ((size_t) n_block), index_t);
86 gross 3097 out->buffer=MEMALLOC( ((size_t) n) * ((size_t) n_block), double);
87 gross 3158 out->Jacobi=jacobi;
88 gross 3094
89 jfenwick 3259 if ( ! ( Esys_checkPtr(out->diag) || Esys_checkPtr(out->pivot) ) ) {
90 gross 3094 Paso_SparseMatrix_invMain(A_p, out->diag, out->pivot );
91     }
92    
93     }
94 jfenwick 3259 time0=Esys_timer()-time0;
95 gross 3094
96 jfenwick 3259 if (Esys_noError()) {
97 gross 3158 if (verbose) {
98     if (jacobi) {
99     printf("timing: Jacobi preparation: elemination : %e\n",time0);
100     } else {
101     printf("timing: Gauss-Seidel preparation: elemination : %e\n",time0);
102     }
103     }
104 gross 3094 return out;
105     } else {
106 gross 3158 Paso_Preconditioner_LocalSmoother_free(out);
107 gross 3094 return NULL;
108     }
109     }
110 jfenwick 1851
111 gross 3097 /*
112 jfenwick 1851
113 gross 3097 performs a few sweeps of the from
114 jfenwick 1851
115 gross 3097 S (x_{k} - x_{k-1}) = b - A x_{k-1}
116 jfenwick 1851
117 gross 3097 where x_{0}=0 and S provides some approximatioon of A.
118 jfenwick 1851
119 gross 3097 Under MPI S is build on using A_p->mainBlock only.
120 gross 3158 if Smoother is local the defect b - A x_{k-1} is calculated using A_p->mainBlock only.
121 gross 3094
122 jfenwick 1851 */
123    
124 gross 3159 void Paso_Preconditioner_Smoother_solve(Paso_SystemMatrix* A_p, Paso_Preconditioner_Smoother * smoother, double * x, const double * b,
125     const dim_t sweeps, const bool_t x_is_initial)
126 gross 3094 {
127 gross 3097 const dim_t n= (A_p->mainBlock->numRows) * (A_p->mainBlock->row_block_size);
128    
129 gross 3158 double *b_new = smoother->localSmoother->buffer;
130     dim_t nsweeps=sweeps;
131     if (smoother->is_local) {
132 gross 3159 Paso_Preconditioner_LocalSmoother_solve(A_p->mainBlock,smoother->localSmoother,x,b,sweeps,x_is_initial);
133 gross 3097 } else {
134 gross 3159 if (! x_is_initial) {
135     Paso_Copy(n, x, b);
136     Paso_Preconditioner_LocalSmoother_Sweep(A_p->mainBlock,smoother->localSmoother,x);
137     nsweeps--;
138     }
139     while (nsweeps > 0 ) {
140 gross 3158 Paso_Copy(n, b_new, b);
141 gross 3097 Paso_SystemMatrix_MatrixVector((-1.), A_p, x, 1., b_new); /* b_new = b - A*x */
142 gross 3159 Paso_Preconditioner_LocalSmoother_Sweep(A_p->mainBlock,smoother->localSmoother,b_new);
143 gross 3158 Paso_AXPY(n, x, 1., b_new);
144     nsweeps--;
145 gross 3097 }
146    
147 gross 3094 }
148 gross 3097 }
149 gross 3159 void Paso_Preconditioner_LocalSmoother_solve(Paso_SparseMatrix* A_p, Paso_Preconditioner_LocalSmoother * smoother, double * x, const double * b,
150     const dim_t sweeps, const bool_t x_is_initial)
151 gross 3097 {
152     const dim_t n= (A_p->numRows) * (A_p->row_block_size);
153 gross 3158 double *b_new = smoother->buffer;
154     dim_t nsweeps=sweeps;
155 gross 3094
156 gross 3159 if (! x_is_initial) {
157     Paso_Copy(n, x, b);
158     Paso_Preconditioner_LocalSmoother_Sweep(A_p,smoother,x);
159     nsweeps--;
160     }
161 gross 3105
162 gross 3159 while (nsweeps > 0 ) {
163 gross 3158 Paso_Copy(n, b_new, b);
164 gross 3283
165     Paso_SparseMatrix_MatrixVector_CSR_OFFSET0((-1.), A_p, x, 1., b_new); /* b_new = b - A*x */
166 gross 3158 Paso_Preconditioner_LocalSmoother_Sweep(A_p,smoother,b_new);
167 gross 3159 Paso_AXPY(n, x, 1., b_new);
168 gross 3158 nsweeps--;
169 gross 3094 }
170     }
171 jfenwick 1851
172 gross 3158 void Paso_Preconditioner_LocalSmoother_Sweep(Paso_SparseMatrix* A, Paso_Preconditioner_LocalSmoother * smoother, double * x)
173 gross 3094 {
174     const dim_t nt=omp_get_max_threads();
175 gross 3158 if (smoother->Jacobi) {
176     Paso_BlockOps_allMV(A->row_block_size,A->numRows,smoother->diag,smoother->pivot,x);
177 gross 3097 } else {
178 gross 3158 if (nt < 2) {
179     Paso_Preconditioner_LocalSmoother_Sweep_sequential(A,smoother,x);
180     } else {
181     Paso_Preconditioner_LocalSmoother_Sweep_colored(A,smoother,x);
182     }
183 gross 3094 }
184 jfenwick 1851 }
185    
186 gross 3097 /* inplace Gauss-Seidel sweep in seqential mode: */
187 gross 3094
188 gross 3158 void Paso_Preconditioner_LocalSmoother_Sweep_sequential(Paso_SparseMatrix* A_p, Paso_Preconditioner_LocalSmoother * smoother, double * x)
189 gross 3097 {
190     const dim_t n=A_p->numRows;
191     const dim_t n_block=A_p->row_block_size;
192 gross 3158 const double *diag = smoother->diag;
193     /* const index_t* pivot = smoother->pivot;
194 gross 3097 const dim_t block_size=A_p->block_size; use for block size >3*/
195 gross 3094
196 gross 3097 register dim_t i,k;
197     register index_t iptr_ik, mm;
198    
199     const index_t* ptr_main = Paso_SparseMatrix_borrowMainDiagonalPointer(A_p);
200     /* forward substitution */
201    
202     if (n_block==1) {
203 gross 3098 Paso_BlockOps_MV_1(&x[0], &diag[0], &x[0]);
204     for (i = 1; i < n; ++i) {
205 gross 3097 mm=ptr_main[i];
206 gross 3105 /* x_i=x_i-a_ik*x_k (with k<i) */
207 gross 3097 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
208 gross 3098 k=A_p->pattern->index[iptr_ik];
209 gross 3097 Paso_BlockOps_SMV_1(&x[i], &A_p->val[iptr_ik], &x[k]);
210     }
211 gross 3098 Paso_BlockOps_MV_1(&x[i], &diag[i], &x[i]);
212 gross 3097 }
213     } else if (n_block==2) {
214 gross 3098 Paso_BlockOps_MV_2(&x[0], &diag[0], &x[0]);
215     for (i = 1; i < n; ++i) {
216 gross 3097 mm=ptr_main[i];
217     for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
218     k=A_p->pattern->index[iptr_ik];
219     Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);
220     }
221 gross 3098 Paso_BlockOps_MV_2(&x[2*i], &diag[4*i], &x[2*i]);
222 gross 3097 }
223     } else if (n_block==3) {
224 gross 3098 Paso_BlockOps_MV_3(&x[0], &diag[0], &x[0]);
225     for (i = 1; i < n; ++i) {
226 gross 3097 mm=ptr_main[i];
227     /* x_i=x_i-a_ik*x_k */
228     for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
229     k=A_p->pattern->index[iptr_ik];
230     Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);
231     }
232 gross 3098 Paso_BlockOps_MV_3(&x[3*i], &diag[9*i], &x[3*i]);
233 gross 3097 }
234     } /* add block size >3 */
235 gross 3105
236 gross 3097 /* backward substitution */
237    
238     if (n_block==1) {
239 gross 3105 for (i = n-2; i > -1; --i) {
240 gross 3098 mm=ptr_main[i];
241     Paso_BlockOps_MV_1(&x[i], &A_p->val[mm], &x[i]);
242     for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {
243     k=A_p->pattern->index[iptr_ik];
244     Paso_BlockOps_SMV_1(&x[i], &A_p->val[iptr_ik], &x[k]);
245     }
246     Paso_BlockOps_MV_1(&x[i], &diag[i], &x[i]);
247 gross 3097 }
248 gross 3098
249 gross 3097 } else if (n_block==2) {
250 gross 3105 for (i = n-2; i > -1; --i) {
251 gross 3098 mm=ptr_main[i];
252     Paso_BlockOps_MV_2(&x[2*i], &A_p->val[4*mm], &x[2*i]);
253     for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {
254     k=A_p->pattern->index[iptr_ik];
255     Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);
256     }
257     Paso_BlockOps_MV_2(&x[2*i], &diag[i*4], &x[2*i]);
258 gross 3283
259 gross 3097 }
260     } else if (n_block==3) {
261 gross 3105 for (i = n-2; i > -1; --i) {
262 gross 3098 mm=ptr_main[i];
263     Paso_BlockOps_MV_3(&x[3*i], &A_p->val[9*mm], &x[3*i]);
264 gross 3097
265 gross 3098 for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {
266     k=A_p->pattern->index[iptr_ik];
267     Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);
268     }
269     Paso_BlockOps_MV_3(&x[3*i], &diag[i*9], &x[3*i]);
270 gross 3097 }
271    
272     } /* add block size >3 */
273    
274     return;
275     }
276 gross 3094
277 gross 3158 void Paso_Preconditioner_LocalSmoother_Sweep_colored(Paso_SparseMatrix* A_p, Paso_Preconditioner_LocalSmoother * smoother, double * x)
278 gross 3094 {
279     const dim_t n=A_p->numRows;
280     const dim_t n_block=A_p->row_block_size;
281 gross 3158 const double *diag = smoother->diag;
282     index_t* pivot = smoother->pivot;
283 gross 3097 const dim_t block_size=A_p->block_size;
284 gross 3094
285     register dim_t i,k;
286     register index_t color,iptr_ik, mm;
287    
288     const index_t* coloring = Paso_Pattern_borrowColoringPointer(A_p->pattern);
289     const dim_t num_colors = Paso_Pattern_getNumColors(A_p->pattern);
290     const index_t* ptr_main = Paso_SparseMatrix_borrowMainDiagonalPointer(A_p);
291    
292     /* forward substitution */
293 gross 3105
294 gross 3094
295     /* color = 0 */
296     if (n_block==1) {
297 gross 3097 #pragma omp parallel for schedule(static) private(i)
298 gross 3094 for (i = 0; i < n; ++i) {
299 gross 3097 if (coloring[i]== 0 ) Paso_BlockOps_MV_1(&x[i], &diag[i], &x[i]);
300     }
301 gross 3094 } else if (n_block==2) {
302 gross 3097 #pragma omp parallel for schedule(static) private(i)
303 gross 3094 for (i = 0; i < n; ++i) {
304 gross 3097 if (coloring[i]== 0 ) Paso_BlockOps_MV_2(&x[2*i], &diag[i*4], &x[2*i]);
305 gross 3094 }
306 gross 3097 } else if (n_block==3) {
307     #pragma omp parallel for schedule(static) private(i)
308 gross 3094 for (i = 0; i < n; ++i) {
309 gross 3097 if (coloring[i]==0) Paso_BlockOps_MV_3(&x[3*i], &diag[i*9], &x[3*i]);
310 gross 3094 }
311 gross 3097 } else {
312     #pragma omp parallel for schedule(static) private(i)
313     for (i = 0; i < n; ++i) {
314     if (coloring[i]==0) Paso_BlockOps_Solve_N(n_block, &x[n_block*i], &diag[block_size*i], &pivot[n_block*i], &x[n_block*i]);
315     }
316     }
317 gross 3094
318     for (color=1;color<num_colors;++color) {
319 gross 3105
320 gross 3094 if (n_block==1) {
321 gross 3097 #pragma omp parallel for schedule(static) private(i,iptr_ik,k)
322 gross 3094 for (i = 0; i < n; ++i) {
323     if (coloring[i]==color) {
324     /* x_i=x_i-a_ik*x_k */
325     for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
326 gross 3105 k=A_p->pattern->index[iptr_ik];
327 gross 3097 if (coloring[k]<color) Paso_BlockOps_SMV_1(&x[i], &A_p->val[iptr_ik], &x[k]);
328 gross 3094 }
329 gross 3097 Paso_BlockOps_MV_1(&x[i], &diag[i], &x[i]);
330 gross 3094 }
331     }
332     } else if (n_block==2) {
333 gross 3097 #pragma omp parallel for schedule(static) private(i,iptr_ik,k)
334 gross 3094 for (i = 0; i < n; ++i) {
335     if (coloring[i]==color) {
336     for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
337     k=A_p->pattern->index[iptr_ik];
338 gross 3097 if (coloring[k]<color) Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);
339 gross 3094 }
340 gross 3097 Paso_BlockOps_MV_2(&x[2*i], &diag[4*i], &x[2*i]);
341 gross 3094 }
342     }
343     } else if (n_block==3) {
344 gross 3097 #pragma omp parallel for schedule(static) private(i,iptr_ik,k)
345 gross 3094 for (i = 0; i < n; ++i) {
346     if (coloring[i]==color) {
347     for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
348     k=A_p->pattern->index[iptr_ik];
349 gross 3097 if (coloring[k]<color) Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);
350 gross 3094 }
351 gross 3097 Paso_BlockOps_MV_3(&x[3*i], &diag[9*i], &x[3*i]);
352 gross 3094 }
353     }
354 gross 3097 } else {
355     #pragma omp parallel for schedule(static) private(i,iptr_ik,k)
356     for (i = 0; i < n; ++i) {
357     if (coloring[i] == color) {
358     for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
359     k=A_p->pattern->index[iptr_ik];
360     if (coloring[k]<color) Paso_BlockOps_SMV_N(n_block, &x[n_block*i], &A_p->val[block_size*iptr_ik], &x[n_block*k]);
361     }
362     Paso_BlockOps_Solve_N(n_block, &x[n_block*i], &diag[block_size*i], &pivot[n_block*i], &x[n_block*i]);
363     }
364     }
365     }
366 gross 3094 } /* end of coloring loop */
367    
368     /* backward substitution */
369     for (color=(num_colors)-2 ;color>-1;--color) { /* Note: color=(num_colors)-1 is not required */
370     if (n_block==1) {
371 gross 3097 #pragma omp parallel for schedule(static) private(mm, i,iptr_ik,k)
372 gross 3094 for (i = 0; i < n; ++i) {
373     if (coloring[i]==color) {
374     mm=ptr_main[i];
375 gross 3097 Paso_BlockOps_MV_1(&x[i], &A_p->val[mm], &x[i]);
376 gross 3094 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
377     k=A_p->pattern->index[iptr_ik];
378 gross 3097 if (coloring[k]>color) Paso_BlockOps_SMV_1(&x[i], &A_p->val[iptr_ik], &x[k]);
379 gross 3094 }
380 gross 3097 Paso_BlockOps_MV_1(&x[i], &diag[i], &x[i]);
381 gross 3094 }
382     }
383     } else if (n_block==2) {
384 gross 3097 #pragma omp parallel for schedule(static) private(mm, i,iptr_ik,k)
385 gross 3094 for (i = 0; i < n; ++i) {
386     if (coloring[i]==color) {
387     mm=ptr_main[i];
388 gross 3097 Paso_BlockOps_MV_2(&x[2*i], &A_p->val[4*mm], &x[2*i]);
389 gross 3094 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
390     k=A_p->pattern->index[iptr_ik];
391 gross 3097 if (coloring[k]>color) Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);
392 gross 3094 }
393 gross 3097 Paso_BlockOps_MV_2(&x[2*i], &diag[4*i], &x[2*i]);
394 gross 3094 }
395     }
396     } else if (n_block==3) {
397 gross 3097 #pragma omp parallel for schedule(static) private(mm, i,iptr_ik,k)
398 gross 3094 for (i = 0; i < n; ++i) {
399     if (coloring[i]==color) {
400     mm=ptr_main[i];
401 gross 3097 Paso_BlockOps_MV_3(&x[3*i], &A_p->val[9*mm], &x[3*i]);
402 gross 3094 for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
403     k=A_p->pattern->index[iptr_ik];
404 gross 3097 if (coloring[k]>color) Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);
405 gross 3094 }
406 gross 3097 Paso_BlockOps_MV_3(&x[3*i], &diag[9*i], &x[3*i]);
407 gross 3094 }
408     }
409 gross 3097 } else {
410     #pragma omp parallel for schedule(static) private(mm, i,iptr_ik,k)
411     for (i = 0; i < n; ++i) {
412     if (coloring[i]==color) {
413     mm=ptr_main[i];
414     Paso_BlockOps_MV_N( n_block, &x[n_block*i], &A_p->val[block_size*mm], &x[n_block*i] );
415     for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
416     k=A_p->pattern->index[iptr_ik];
417     if (coloring[k]>color) Paso_BlockOps_SMV_N(n_block, &x[n_block*i], &A_p->val[block_size*iptr_ik], &x[n_block*k]);
418     }
419     Paso_BlockOps_Solve_N(n_block, &x[n_block*i], &diag[block_size*i], &pivot[n_block*i], &x[n_block*i]);
420     }
421     }
422     }
423 gross 3094 }
424     return;
425 jfenwick 3259 }

  ViewVC Help
Powered by ViewVC 1.1.26