/[escript]/trunk/paso/src/Smoother.c
ViewVC logotype

Diff of /trunk/paso/src/Smoother.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3097 by gross, Fri Aug 20 04:59:12 2010 UTC revision 3098 by gross, Fri Aug 20 08:08:27 2010 UTC
# Line 178  void Paso_Solver_localGSSweep(Paso_Spars Line 178  void Paso_Solver_localGSSweep(Paso_Spars
178     #else     #else
179     const dim_t nt = 1;     const dim_t nt = 1;
180     #endif     #endif
181     if (nt > 1) {     if (nt < 2) {
182        Paso_Solver_localGSSweep_sequential(A,gs,x);        Paso_Solver_localGSSweep_sequential(A,gs,x);
183     } else {     } else {
184        Paso_Solver_localGSSweep_colored(A,gs,x);        Paso_Solver_localGSSweep_colored(A,gs,x);
# Line 199  void Paso_Solver_localGSSweep_sequential Line 199  void Paso_Solver_localGSSweep_sequential
199     register index_t iptr_ik, mm;     register index_t iptr_ik, mm;
200        
201     const index_t* ptr_main = Paso_SparseMatrix_borrowMainDiagonalPointer(A_p);     const index_t* ptr_main = Paso_SparseMatrix_borrowMainDiagonalPointer(A_p);
202      
203     /* forward substitution */     /* forward substitution */
204        
205     if (n_block==1) {     if (n_block==1) {
206        for (i = 0; i < n; ++i) {  
207          Paso_BlockOps_MV_1(&x[0], &diag[0], &x[0]);
208    
209          for (i = 1; i < n; ++i) {
210       mm=ptr_main[i];       mm=ptr_main[i];
211       /* x_i=x_i-a_ik*x_k  (with k<i) */                           /* x_i=x_i-a_ik*x_k  (with k<i) */                    
212       for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {       for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
213          k=A_p->pattern->index[iptr_ik];                                    k=A_p->pattern->index[iptr_ik];  
214    /* printf("row %d : add %d\n",i,k); */
215          Paso_BlockOps_SMV_1(&x[i], &A_p->val[iptr_ik], &x[k]);          Paso_BlockOps_SMV_1(&x[i], &A_p->val[iptr_ik], &x[k]);
216       }       }
217       Paso_BlockOps_MV_1(&x[i], &A_p->val[i], &x[i]);       Paso_BlockOps_MV_1(&x[i], &diag[i], &x[i]);
218    /* printf("row %d : multipy by \n",i); */
219        }        }
220          return;
221     } else if (n_block==2) {     } else if (n_block==2) {
222        for (i = 0; i < n; ++i) {        Paso_BlockOps_MV_2(&x[0], &diag[0], &x[0]);
223          for (i = 1; i < n; ++i) {
224       mm=ptr_main[i];       mm=ptr_main[i];
225            
226       for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {       for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
227          k=A_p->pattern->index[iptr_ik];                                    k=A_p->pattern->index[iptr_ik];                          
228          Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);          Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);
229       }       }
230       Paso_BlockOps_MV_2(&x[2*i], &A_p->val[4*i], &x[2*i]);       Paso_BlockOps_MV_2(&x[2*i], &diag[4*i], &x[2*i]);
231        }        }
232     } else if (n_block==3) {     } else if (n_block==3) {
233        for (i = 0; i < n; ++i) {        Paso_BlockOps_MV_3(&x[0], &diag[0], &x[0]);
234          for (i = 1; i < n; ++i) {
235       mm=ptr_main[i];       mm=ptr_main[i];
236       /* x_i=x_i-a_ik*x_k */       /* x_i=x_i-a_ik*x_k */
237       for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {       for (iptr_ik=A_p->pattern->ptr[i];iptr_ik<mm; ++iptr_ik) {
238          k=A_p->pattern->index[iptr_ik];          k=A_p->pattern->index[iptr_ik];
239          Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);          Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);
240       }       }
241       Paso_BlockOps_MV_3(&x[3*i], &A_p->val[9*i], &x[3*i]);       Paso_BlockOps_MV_3(&x[3*i], &diag[9*i], &x[3*i]);
242        }        }
243                
244     } /* add block size >3 */     } /* add block size >3 */
# Line 240  void Paso_Solver_localGSSweep_sequential Line 248  void Paso_Solver_localGSSweep_sequential
248     /* backward substitution */     /* backward substitution */
249        
250     if (n_block==1) {     if (n_block==1) {
251    
252        for (i = n-2; i > -1; ++i) {                for (i = n-2; i > -1; ++i) {        
253       mm=ptr_main[i];          mm=ptr_main[i];
254       Paso_BlockOps_MV_1(&x[i], &A_p->val[mm], &x[i]);          Paso_BlockOps_MV_1(&x[i], &A_p->val[mm], &x[i]);
255       for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {          for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {
256          k=A_p->pattern->index[iptr_ik];               k=A_p->pattern->index[iptr_ik];  
257          Paso_BlockOps_SMV_1(&x[i], &A_p->val[iptr_ik], &x[k]);             Paso_BlockOps_SMV_1(&x[i], &A_p->val[iptr_ik], &x[k]);
258       }          }
259       Paso_BlockOps_MV_1(&x[i], &diag[i], &x[i]);          Paso_BlockOps_MV_1(&x[i], &diag[i], &x[i]);
260        }        }
261          
262     } else if (n_block==2) {     } else if (n_block==2) {
263        for (i = n-2; i > -1; ++i) {        for (i = n-2; i > -1; ++i) {
264       mm=ptr_main[i];          mm=ptr_main[i];
265       Paso_BlockOps_MV_3(&x[2*i], &A_p->val[4*mm], &x[2*i]);          Paso_BlockOps_MV_2(&x[2*i], &A_p->val[4*mm], &x[2*i]);
266       for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {          for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {
267          k=A_p->pattern->index[iptr_ik];             k=A_p->pattern->index[iptr_ik];
268          Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);             Paso_BlockOps_SMV_2(&x[2*i], &A_p->val[4*iptr_ik], &x[2*k]);
269       }          }
270       Paso_BlockOps_MV_2(&x[2*i], &diag[i*4], &x[2*i]);                Paso_BlockOps_MV_2(&x[2*i], &diag[i*4], &x[2*i]);
271        }        }
272     } else if (n_block==3) {     } else if (n_block==3) {
273        for (i = n-2; i > -1; ++i) {        for (i = n-2; i > -1; ++i) {
274            mm=ptr_main[i];
275            Paso_BlockOps_MV_3(&x[3*i], &A_p->val[9*mm], &x[3*i]);
276            
277       mm=ptr_main[i];          for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {
278       Paso_BlockOps_MV_3(&x[3*i], &A_p->val[9*mm], &x[3*i]);             k=A_p->pattern->index[iptr_ik];    
279                   Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);
280       for (iptr_ik=mm+1; iptr_ik < A_p->pattern->ptr[i+1]; ++iptr_ik) {          }
281          k=A_p->pattern->index[iptr_ik];              Paso_BlockOps_MV_3(&x[3*i], &diag[i*9], &x[3*i]);
         Paso_BlockOps_SMV_3(&x[3*i], &A_p->val[9*iptr_ik], &x[3*k]);  
      }  
      Paso_BlockOps_MV_3(&x[3*i], &diag[i*9], &x[3*i]);  
282        }        }
283                
284     } /* add block size >3 */           } /* add block size >3 */      

Legend:
Removed from v.3097  
changed lines
  Added in v.3098

  ViewVC Help
Powered by ViewVC 1.1.26