/[escript]/trunk/paso/src/Smoother.c
ViewVC logotype

Diff of /trunk/paso/src/Smoother.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3104 by gross, Fri Aug 20 08:08:27 2010 UTC revision 3105 by gross, Wed Aug 25 04:37:52 2010 UTC
# Line 129  void Paso_Solver_solveGS(Paso_SystemMatr Line 129  void Paso_Solver_solveGS(Paso_SystemMatr
129        for (i=0;i<n;++i) x[i]=b[i];        for (i=0;i<n;++i) x[i]=b[i];
130                
131        Paso_Solver_localGSSweep(A_p->mainBlock,gs->localGS,x);        Paso_Solver_localGSSweep(A_p->mainBlock,gs->localGS,x);
132        while (sweeps > 0 ) {        
133          while (sweeps > 1 ) {
134       #pragma omp parallel for private(i) schedule(static)       #pragma omp parallel for private(i) schedule(static)
135       for (i=0;i<n;++i) b_new[i]=b[i];       for (i=0;i<n;++i) b_new[i]=b[i];
136    
# Line 138  void Paso_Solver_solveGS(Paso_SystemMatr Line 139  void Paso_Solver_solveGS(Paso_SystemMatr
139       Paso_Solver_localGSSweep(A_p->mainBlock,gs->localGS,b_new);       Paso_Solver_localGSSweep(A_p->mainBlock,gs->localGS,b_new);
140            
141       #pragma omp parallel for private(i) schedule(static)       #pragma omp parallel for private(i) schedule(static)
142       for (i=0;i<n;++i) x[i]+=b_new[i];       for (i=0;i<n;++i) x[i]+=b_new[i];
       
143       sweeps--;       sweeps--;
144        }        }
145                
# Line 154  void Paso_Solver_solveLocalGS(Paso_Spars Line 154  void Paso_Solver_solveLocalGS(Paso_Spars
154        
155     #pragma omp parallel for private(i) schedule(static)     #pragma omp parallel for private(i) schedule(static)
156     for (i=0;i<n;++i) x[i]=b[i];     for (i=0;i<n;++i) x[i]=b[i];
157      
158     Paso_Solver_localGSSweep(A_p,gs,x);     Paso_Solver_localGSSweep(A_p,gs,x);
159        
160     while (sweeps > 0 ) {     while (sweeps > 1 ) {
161       #pragma omp parallel for private(i) schedule(static)       #pragma omp parallel for private(i) schedule(static)
162       for (i=0;i<n;++i) b_new[i]=b[i];       for (i=0;i<n;++i) b_new[i]=b[i];
163            
# Line 199  void Paso_Solver_localGSSweep_sequential Line 200  void Paso_Solver_localGSSweep_sequential
200     register index_t iptr_ik, mm;     register index_t iptr_ik, mm;
201        
202     const index_t* ptr_main = Paso_SparseMatrix_borrowMainDiagonalPointer(A_p);     const index_t* ptr_main = Paso_SparseMatrix_borrowMainDiagonalPointer(A_p);
   
203     /* forward substitution */     /* forward substitution */
204        
205     if (n_block==1) {     if (n_block==1) {
   
206        Paso_BlockOps_MV_1(&x[0], &diag[0], &x[0]);        Paso_BlockOps_MV_1(&x[0], &diag[0], &x[0]);
   
207        for (i = 1; i < n; ++i) {        for (i = 1; i < n; ++i) {
208       mm=ptr_main[i];       mm=ptr_main[i];
209       /* x_i=x_i-a_ik*x_k  (with k<i) */                           /* x_i=x_i-a_ik*x_k  (with k<i) */
210       for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {       for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
211          k=A_p->pattern->index[iptr_ik];            k=A_p->pattern->index[iptr_ik];  
 /* printf("row %d : add %d\n",i,k); */  
212          Paso_BlockOps_SMV_1(&x[i], &A_p->val[iptr_ik], &x[k]);          Paso_BlockOps_SMV_1(&x[i], &A_p->val[iptr_ik], &x[k]);
213       }       }
214       Paso_BlockOps_MV_1(&x[i], &diag[i], &x[i]);       Paso_BlockOps_MV_1(&x[i], &diag[i], &x[i]);
 /* printf("row %d : multipy by \n",i); */  
215        }        }
       return;  
216     } else if (n_block==2) {     } else if (n_block==2) {
217        Paso_BlockOps_MV_2(&x[0], &diag[0], &x[0]);        Paso_BlockOps_MV_2(&x[0], &diag[0], &x[0]);
218        for (i = 1; i < n; ++i) {        for (i = 1; i < n; ++i) {
219       mm=ptr_main[i];       mm=ptr_main[i];
       
220       for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {       for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
221          k=A_p->pattern->index[iptr_ik];                                    k=A_p->pattern->index[iptr_ik];                          
222          Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);          Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);
# Line 240  void Paso_Solver_localGSSweep_sequential Line 234  void Paso_Solver_localGSSweep_sequential
234       }       }
235       Paso_BlockOps_MV_3(&x[3*i], &diag[9*i], &x[3*i]);       Paso_BlockOps_MV_3(&x[3*i], &diag[9*i], &x[3*i]);
236        }        }
         
237     } /* add block size >3 */     } /* add block size >3 */
238      
     
     
239     /* backward substitution */     /* backward substitution */
240        
241     if (n_block==1) {     if (n_block==1) {
242          for (i = n-2; i > -1; --i) {        
       for (i = n-2; i > -1; ++i) {          
243          mm=ptr_main[i];          mm=ptr_main[i];
244          Paso_BlockOps_MV_1(&x[i], &A_p->val[mm], &x[i]);          Paso_BlockOps_MV_1(&x[i], &A_p->val[mm], &x[i]);
245          for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {          for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {
# Line 260  void Paso_Solver_localGSSweep_sequential Line 250  void Paso_Solver_localGSSweep_sequential
250        }        }
251                
252     } else if (n_block==2) {     } else if (n_block==2) {
253        for (i = n-2; i > -1; ++i) {        for (i = n-2; i > -1; --i) {
254          mm=ptr_main[i];          mm=ptr_main[i];
255          Paso_BlockOps_MV_2(&x[2*i], &A_p->val[4*mm], &x[2*i]);          Paso_BlockOps_MV_2(&x[2*i], &A_p->val[4*mm], &x[2*i]);
256          for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {          for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {
# Line 270  void Paso_Solver_localGSSweep_sequential Line 260  void Paso_Solver_localGSSweep_sequential
260          Paso_BlockOps_MV_2(&x[2*i], &diag[i*4], &x[2*i]);          Paso_BlockOps_MV_2(&x[2*i], &diag[i*4], &x[2*i]);
261        }        }
262     } else if (n_block==3) {     } else if (n_block==3) {
263        for (i = n-2; i > -1; ++i) {        for (i = n-2; i > -1; --i) {
264          mm=ptr_main[i];          mm=ptr_main[i];
265          Paso_BlockOps_MV_3(&x[3*i], &A_p->val[9*mm], &x[3*i]);          Paso_BlockOps_MV_3(&x[3*i], &A_p->val[9*mm], &x[3*i]);
266            
# Line 302  void Paso_Solver_localGSSweep_colored(Pa Line 292  void Paso_Solver_localGSSweep_colored(Pa
292     const index_t* ptr_main = Paso_SparseMatrix_borrowMainDiagonalPointer(A_p);     const index_t* ptr_main = Paso_SparseMatrix_borrowMainDiagonalPointer(A_p);
293        
294     /* forward substitution */     /* forward substitution */
295    
296        
297     /* color = 0 */     /* color = 0 */
298     if (n_block==1) {     if (n_block==1) {
# Line 327  void Paso_Solver_localGSSweep_colored(Pa Line 318  void Paso_Solver_localGSSweep_colored(Pa
318     }     }
319        
320     for (color=1;color<num_colors;++color) {     for (color=1;color<num_colors;++color) {
321    
322        if (n_block==1) {        if (n_block==1) {
323       #pragma omp parallel for schedule(static) private(i,iptr_ik,k)       #pragma omp parallel for schedule(static) private(i,iptr_ik,k)
324       for (i = 0; i < n; ++i) {       for (i = 0; i < n; ++i) {
325          if (coloring[i]==color) {          if (coloring[i]==color) {
326             /* x_i=x_i-a_ik*x_k */                                 /* x_i=x_i-a_ik*x_k */                    
327             for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {             for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<A_p->pattern->ptr[i+1]; ++iptr_ik) {
328            k=A_p->pattern->index[iptr_ik];                                      k=A_p->pattern->index[iptr_ik];
329            if (coloring[k]<color) Paso_BlockOps_SMV_1(&x[i], &A_p->val[iptr_ik], &x[k]);            if (coloring[k]<color) Paso_BlockOps_SMV_1(&x[i], &A_p->val[iptr_ik], &x[k]);
330             }             }
331             Paso_BlockOps_MV_1(&x[i], &diag[i], &x[i]);             Paso_BlockOps_MV_1(&x[i], &diag[i], &x[i]);
# Line 349  void Paso_Solver_localGSSweep_colored(Pa Line 341  void Paso_Solver_localGSSweep_colored(Pa
341             }             }
342             Paso_BlockOps_MV_2(&x[2*i], &diag[4*i], &x[2*i]);             Paso_BlockOps_MV_2(&x[2*i], &diag[4*i], &x[2*i]);
343          }          }
           
344       }       }
345        } else if (n_block==3) {        } else if (n_block==3) {
346       #pragma omp parallel for schedule(static) private(i,iptr_ik,k)       #pragma omp parallel for schedule(static) private(i,iptr_ik,k)
# Line 376  void Paso_Solver_localGSSweep_colored(Pa Line 367  void Paso_Solver_localGSSweep_colored(Pa
367        }        }
368     } /* end of coloring loop */     } /* end of coloring loop */
369        
   
370     /* backward substitution */     /* backward substitution */
371     for (color=(num_colors)-2 ;color>-1;--color) { /* Note: color=(num_colors)-1 is not required */     for (color=(num_colors)-2 ;color>-1;--color) { /* Note: color=(num_colors)-1 is not required */
372        if (n_block==1) {        if (n_block==1) {

Legend:
Removed from v.3104  
changed lines
  Added in v.3105

  ViewVC Help
Powered by ViewVC 1.1.26