# Diff of /branches/symbolic_from_3470/paso/src/Smoother.c

revision 3867 by caltinay, Thu Feb 9 00:27:46 2012 UTC revision 3868 by caltinay, Thu Mar 15 06:07:08 2012 UTC
# Line 110  where x_{0}=0 and S provides some approx Line 110  where x_{0}=0 and S provides some approx
110
111  Under MPI S is built using A_p->mainBlock only.  Under MPI S is built using A_p->mainBlock only.
112  If Smoother is local the defect b - A x_{k-1} is calculated using A_p->mainBlock only.  If Smoother is local the defect b - A x_{k-1} is calculated using A_p->mainBlock only.
113
114    The MPI-versioned smoother works in the following way:
115     (1) initialize x
116     (2) calculate residual r_n = b - A * x_n
117     (3) store residual r_n in b_new and pass to Paso_Preconditioner_LocalSmoother_Sweep() as input
118     (4) /delta x_{n+1} is returned (stored in b_new) as the output of Paso_Preconditioner_LocalSmoother_Sweep()
119     (5) recover x_{n+1} by /delta x_{n+1} + x_n
120     (6) repeat steps 2-5 for until sweeps number reduce to 0
121  */  */
122
123  void Paso_Preconditioner_Smoother_solve(Paso_SystemMatrix* A_p, Paso_Preconditioner_Smoother * smoother, double * x, const double * b,  void Paso_Preconditioner_Smoother_solve(Paso_SystemMatrix* A_p, Paso_Preconditioner_Smoother * smoother, double * x, const double * b,
# Line 124  void Paso_Preconditioner_Smoother_solve( Line 132  void Paso_Preconditioner_Smoother_solve(
132     } else {     } else {
133        if (! x_is_initial) {        if (! x_is_initial) {
134       Paso_Copy(n, x, b);       Paso_Copy(n, x, b);
135
136       Paso_Preconditioner_LocalSmoother_Sweep(A_p->mainBlock,smoother->localSmoother,x);       Paso_Preconditioner_LocalSmoother_Sweep(A_p->mainBlock,smoother->localSmoother,x);
137       nsweeps--;       nsweeps--;
138        }        }
139        while (nsweeps > 0 ) {        while (nsweeps > 0 ) {
140       Paso_Copy(n, b_new, b);       Paso_Copy(n, b_new, b);
141           Paso_SystemMatrix_MatrixVector((-1.), A_p, x, 1., b_new); /* b_new = b - A*x */           Paso_SystemMatrix_MatrixVector_CSR_OFFSET0((-1.), A_p, x, 1., b_new); /* b_new = b - A*x */
142       Paso_Preconditioner_LocalSmoother_Sweep(A_p->mainBlock,smoother->localSmoother,b_new);         Paso_Preconditioner_LocalSmoother_Sweep(A_p->mainBlock,smoother->localSmoother,b_new);
143       Paso_AXPY(n, x, 1., b_new);       Paso_AXPY(n, x, 1., b_new);
144       nsweeps--;       nsweeps--;
# Line 138  void Paso_Preconditioner_Smoother_solve( Line 147  void Paso_Preconditioner_Smoother_solve(
147     }     }
148  }  }
149
150  err_t Paso_Preconditioner_Smoother_solve_byTolerance(Paso_SystemMatrix* A_p, Paso_Preconditioner_Smoother * smoother,  err_t Paso_Preconditioner_Smoother_solve_byTolerance(Paso_SystemMatrix* A_p, Paso_Preconditioner_Smoother * smoother,
151                              double * x, const double * b,                                                      double * x, const double * b,
152                              const double rtol, dim_t *sweeps, const bool_t x_is_initial)                                                      const double atol, dim_t *sweeps, const bool_t x_is_initial)
153  {  {
154     const dim_t n= (A_p->mainBlock->numRows) * (A_p->mainBlock->row_block_size);     const dim_t n= (A_p->mainBlock->numRows) * (A_p->mainBlock->row_block_size);
155     double *b_new = smoother->localSmoother->buffer;     double *b_new = smoother->localSmoother->buffer;
156     const dim_t max_sweeps=*sweeps;     const dim_t max_sweeps=*sweeps;
157     dim_t s=0;     dim_t s=0;
158     double norm_b, norm_r;     double norm_dx = atol * 2.;
159     err_t errorCode = PRECONDITIONER_NO_ERROR;     err_t errorCode = PRECONDITIONER_NO_ERROR;
160
norm_b=Paso_lsup(n,b,A_p->mpi_info);
161     if (! x_is_initial) {     if (! x_is_initial) {
162       Paso_Copy(n, x, b);           Paso_Copy(n, x, b);
163       Paso_Preconditioner_LocalSmoother_Sweep(A_p->mainBlock,smoother->localSmoother,x);           Paso_Preconditioner_LocalSmoother_Sweep(A_p->mainBlock,smoother->localSmoother,x);
164       s++;       norm_dx=Paso_lsup(n,x,A_p->mpi_info);
165             s++;
166     }     }
167     while (1) {     while (norm_dx > atol) {
168       Paso_Copy(n, b_new, b);           Paso_Copy(n, b_new, b);
169           Paso_SystemMatrix_MatrixVector((-1.), A_p, x, 1., b_new); /* b_new = b - A*x */           Paso_SystemMatrix_MatrixVector((-1.), A_p, x, 1., b_new); /* b_new = b - A*x */
170       norm_r=Paso_lsup(n,b_new,A_p->mpi_info);           Paso_Preconditioner_LocalSmoother_Sweep(A_p->mainBlock,smoother->localSmoother,b_new);
171       if (norm_r <= rtol * norm_b ) break;       norm_dx=Paso_lsup(n,b_new,A_p->mpi_info);
172       Paso_Preconditioner_LocalSmoother_Sweep(A_p->mainBlock,smoother->localSmoother,b_new);             Paso_AXPY(n, x, 1., b_new);
173       Paso_AXPY(n, x, 1., b_new);           if (s >= max_sweeps) {
174       if (s >= max_sweeps) {                errorCode = PRECONDITIONER_MAXITER_REACHED;
175            errorCode = PRECONDITIONER_MAXITER_REACHED;                break;
176            break;           }
177       }           s++;
s++;
178     }     }
179     *sweeps=s;     *sweeps=s;
180     return errorCode;     return errorCode;
181  }  }
182

183  void Paso_Preconditioner_LocalSmoother_solve(Paso_SparseMatrix* A_p, Paso_Preconditioner_LocalSmoother * smoother, double * x, const double * b,  void Paso_Preconditioner_LocalSmoother_solve(Paso_SparseMatrix* A_p, Paso_Preconditioner_LocalSmoother * smoother, double * x, const double * b,
184                           const dim_t sweeps, const bool_t x_is_initial)                           const dim_t sweeps, const bool_t x_is_initial)
185  {  {
# Line 197  void Paso_Preconditioner_LocalSmoother_s Line 203  void Paso_Preconditioner_LocalSmoother_s
203     }     }
204  }  }
205
206    /*
207      Gauss-Seidel sweep to calculate /delta x_{n+1} from matrix A and
208      residual r_n. It has two steps: forward substitution and backward
209      substitution.
210      Forward substitution: (D+L) * /delta x_{n+1/2} = r_n
211      Backward substitution: (D+U) * /delta x_{n+1} = r_n - L * /delta x_{n+1/2}
212                            = D * /delta x_{n+1/2}
213      where A = D + L + U
214        /delta x_{n+1/2} = x_{n+1/2} - x_n
215        /delta x_{n+1} = x_{n+1} - x_n
216
217      Input: Matrix A
218         residual r_n (in x)
219      Output: /delta x_{n+1} (in x)
220    */
221  void Paso_Preconditioner_LocalSmoother_Sweep(Paso_SparseMatrix* A, Paso_Preconditioner_LocalSmoother * smoother, double * x)  void Paso_Preconditioner_LocalSmoother_Sweep(Paso_SparseMatrix* A, Paso_Preconditioner_LocalSmoother * smoother, double * x)
222  {  {
223     const dim_t nt=omp_get_max_threads();     const dim_t nt=omp_get_max_threads();
# Line 226  void Paso_Preconditioner_LocalSmoother_S Line 247  void Paso_Preconditioner_LocalSmoother_S
247     register index_t iptr_ik, mm;     register index_t iptr_ik, mm;
248     register double rtmp;     register double rtmp;
249     int failed = 0;     int failed = 0;
250
251     const index_t* ptr_main = Paso_SparseMatrix_borrowMainDiagonalPointer(A_p);     const index_t* ptr_main = Paso_SparseMatrix_borrowMainDiagonalPointer(A_p);
252
253     (void)pivot;     /* silence warning from var being unused by macros */     (void)pivot;                /* silence warning from var being unused by macros */
254     (void)block_len;         (void)block_len;
255     /* forward substitution */     /* forward substitution */
256
257     if (n_block==1) {     if (n_block==1) {
258        x[0]*=diag[0];        x[0]*=diag[0];
259        for (i = 1; i < n; ++i) {        for (i = 1; i < n; ++i) {
260       mm=ptr_main[i];       mm=ptr_main[i];
261       /* x_i=x_i-a_ik*x_k  (with k<i) */       /* x_i=x_i-a_ik*x_k (with k<i) */
262       rtmp=x[i];       rtmp=x[i];
263       for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {       for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
264          k=A_p->pattern->index[iptr_ik];          k=A_p->pattern->index[iptr_ik];
# Line 259  void Paso_Preconditioner_LocalSmoother_S Line 280  void Paso_Preconditioner_LocalSmoother_S
280        Paso_BlockOps_MViP_3(&diag[0], &x[0]);        Paso_BlockOps_MViP_3(&diag[0], &x[0]);
281        for (i = 1; i < n; ++i) {        for (i = 1; i < n; ++i) {
282       mm=ptr_main[i];       mm=ptr_main[i];
/* x_i=x_i-a_ik*x_k */
283       for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {       for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
284          k=A_p->pattern->index[iptr_ik];          k=A_p->pattern->index[iptr_ik];
285          Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);          Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);
# Line 270  void Paso_Preconditioner_LocalSmoother_S Line 290  void Paso_Preconditioner_LocalSmoother_S
290        Paso_BlockOps_solve_N(n_block, &x[0], &diag[0], &pivot[0], &failed);        Paso_BlockOps_solve_N(n_block, &x[0], &diag[0], &pivot[0], &failed);
291        for (i = 1; i < n; ++i) {        for (i = 1; i < n; ++i) {
292       mm=ptr_main[i];       mm=ptr_main[i];
/* x_i=x_i-a_ik*x_k */
293       for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {       for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
294          k=A_p->pattern->index[iptr_ik];          k=A_p->pattern->index[iptr_ik];
295          Paso_BlockOps_SMV_N(n_block, &x[n_block*i], &A_p->val[block_len*iptr_ik], &x[n_block*k]);          Paso_BlockOps_SMV_N(n_block, &x[n_block*i], &A_p->val[block_len*iptr_ik], &x[n_block*k]);
# Line 278  void Paso_Preconditioner_LocalSmoother_S Line 297  void Paso_Preconditioner_LocalSmoother_S
297       Paso_BlockOps_solve_N(n_block, &x[n_block*i], &diag[block_len*i], &pivot[n_block*i], &failed);       Paso_BlockOps_solve_N(n_block, &x[n_block*i], &diag[block_len*i], &pivot[n_block*i], &failed);
298        }        }
299     }     }
300     /* backward substitution */
301         /* backward sweeps */
302     if (n_block==1) {     if (n_block==1) {
303        for (i = n-2; i > -1; --i) {                for (i = n-2; i > -1; --i) {
304          mm=ptr_main[i];          mm=ptr_main[i];
# Line 295  void Paso_Preconditioner_LocalSmoother_S Line 314  void Paso_Preconditioner_LocalSmoother_S
314        for (i = n-2; i > -1; --i) {        for (i = n-2; i > -1; --i) {
315          mm=ptr_main[i];          mm=ptr_main[i];
316          Paso_BlockOps_MViP_2(&A_p->val[4*mm], &x[2*i]);          Paso_BlockOps_MViP_2(&A_p->val[4*mm], &x[2*i]);
317          for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {              for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {
318             k=A_p->pattern->index[iptr_ik];             k=A_p->pattern->index[iptr_ik];
319             Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);             Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);
320          }          }
# Line 306  void Paso_Preconditioner_LocalSmoother_S Line 325  void Paso_Preconditioner_LocalSmoother_S
325        for (i = n-2; i > -1; --i) {        for (i = n-2; i > -1; --i) {
326          mm=ptr_main[i];          mm=ptr_main[i];
327          Paso_BlockOps_MViP_3(&A_p->val[9*mm], &x[3*i]);          Paso_BlockOps_MViP_3(&A_p->val[9*mm], &x[3*i]);
328          for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {              for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {
329             k=A_p->pattern->index[iptr_ik];                 k=A_p->pattern->index[iptr_ik];
330             Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);             Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);
331          }          }
332          Paso_BlockOps_MViP_3(&diag[i*9], &x[3*i]);          Paso_BlockOps_MViP_3(&diag[i*9], &x[3*i]);
333        }        }

334     } else {     } else {
335        double *y=TMPMEMALLOC(n_block, double);        double *y=TMPMEMALLOC(n_block, double);
336        for (i = n-2; i > -1; --i) {        for (i = n-2; i > -1; --i) {
337       mm=ptr_main[i];       mm=ptr_main[i];
338       Paso_BlockOps_MV_N(n_block, &y[0], &A_p->val[block_len*mm], &x[n_block*i]);       Paso_BlockOps_MV_N(n_block, &y[0], &A_p->val[block_len*mm], &x[n_block*i]);
339       for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {           for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {
340          k=A_p->pattern->index[iptr_ik];              k=A_p->pattern->index[iptr_ik];
341          Paso_BlockOps_SMV_N(n_block, &y[0], &A_p->val[block_len*iptr_ik], &x[n_block*k]);          Paso_BlockOps_SMV_N(n_block, &y[0], &A_p->val[block_len*iptr_ik], &x[n_block*k]);
342       }       }

Legend:
 Removed from v.3867 changed lines Added in v.3868