/[escript]/trunk/paso/src/Smoother.c
ViewVC logotype

Diff of /trunk/paso/src/Smoother.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/paso/src/GS.c revision 3125 by gross, Tue Aug 31 05:37:30 2010 UTC trunk/paso/src/Smoother.c revision 3158 by gross, Mon Sep 6 06:09:11 2010 UTC
# Line 32  Line 32 
32    
33  /**************************************************************/  /**************************************************************/
34    
35  /* free all memory used by GS                                */  /* free all memory used by Smoother                                */
36    
37  void Paso_Preconditioner_GS_free(Paso_Preconditioner_GS * in) {  void Paso_Preconditioner_Smoother_free(Paso_Preconditioner_Smoother * in) {
38       if (in!=NULL) {       if (in!=NULL) {
39      Paso_Preconditioner_LocalGS_free(in->localGS);      Paso_Preconditioner_LocalSmoother_free(in->localSmoother);
40          MEMFREE(in);          MEMFREE(in);
41       }       }
42  }  }
43  void Paso_Preconditioner_LocalGS_free(Paso_Preconditioner_LocalGS * in) {  void Paso_Preconditioner_LocalSmoother_free(Paso_Preconditioner_LocalSmoother * in) {
44     if (in!=NULL) {     if (in!=NULL) {
45        MEMFREE(in->diag);        MEMFREE(in->diag);
46        MEMFREE(in->pivot);        MEMFREE(in->pivot);
# Line 53  void Paso_Preconditioner_LocalGS_free(Pa Line 53  void Paso_Preconditioner_LocalGS_free(Pa
53  /*   constructs the symmetric Gauss-Seidel preconditioner      /*   constructs the symmetric Gauss-Seidel preconditioner    
54    
55  */  */
56  Paso_Preconditioner_GS* Paso_Preconditioner_GS_alloc(Paso_SystemMatrix * A_p, dim_t sweeps, bool_t is_local, bool_t verbose)  Paso_Preconditioner_Smoother* Paso_Preconditioner_Smoother_alloc(Paso_SystemMatrix * A_p, const bool_t jacobi, const bool_t is_local, const bool_t verbose)
57  {  {
58        
59    /* allocations: */      /* allocations: */  
60    Paso_Preconditioner_GS* out=MEMALLOC(1,Paso_Preconditioner_GS);    Paso_Preconditioner_Smoother* out=MEMALLOC(1,Paso_Preconditioner_Smoother);
61    if (! Paso_checkPtr(out)) {    if (! Paso_checkPtr(out)) {
62       out->localGS=Paso_Preconditioner_LocalGS_alloc(A_p->mainBlock,sweeps,verbose);       out->localSmoother=Paso_Preconditioner_LocalSmoother_alloc(A_p->mainBlock,jacobi,verbose);
63       out->is_local=is_local;       out->is_local=is_local;
64    }    }
65    if (Paso_MPIInfo_noError(A_p->mpi_info)) {    if (Paso_MPIInfo_noError(A_p->mpi_info)) {
66       return out;       return out;
67    } else {    } else {
68       Paso_Preconditioner_GS_free(out);       Paso_Preconditioner_Smoother_free(out);
69       return NULL;       return NULL;
70    }    }
71  }  }
72  Paso_Preconditioner_LocalGS* Paso_Preconditioner_LocalGS_alloc(Paso_SparseMatrix * A_p, dim_t sweeps, bool_t verbose) {  Paso_Preconditioner_LocalSmoother* Paso_Preconditioner_LocalSmoother_alloc(Paso_SparseMatrix * A_p, const bool_t jacobi, bool_t verbose)
73    {
74        
75     dim_t n=A_p->numRows;     dim_t n=A_p->numRows;
76     dim_t n_block=A_p->row_block_size;     dim_t n_block=A_p->row_block_size;
# Line 77  Paso_Preconditioner_LocalGS* Paso_Precon Line 78  Paso_Preconditioner_LocalGS* Paso_Precon
78        
79     double time0=Paso_timer();     double time0=Paso_timer();
80     /* allocations: */       /* allocations: */  
81     Paso_Preconditioner_LocalGS* out=MEMALLOC(1,Paso_Preconditioner_LocalGS);     Paso_Preconditioner_LocalSmoother* out=MEMALLOC(1,Paso_Preconditioner_LocalSmoother);
82     if (! Paso_checkPtr(out)) {     if (! Paso_checkPtr(out)) {
83                
84        out->diag=MEMALLOC( ((size_t) n) * ((size_t) block_size),double);        out->diag=MEMALLOC( ((size_t) n) * ((size_t) block_size),double);
85        out->pivot=MEMALLOC( ((size_t) n) * ((size_t)  n_block), index_t);        out->pivot=MEMALLOC( ((size_t) n) * ((size_t)  n_block), index_t);
86        out->buffer=MEMALLOC( ((size_t) n) * ((size_t)  n_block), double);        out->buffer=MEMALLOC( ((size_t) n) * ((size_t)  n_block), double);
87        out->sweeps=sweeps;        out->Jacobi=jacobi;
88                
89        if ( ! ( Paso_checkPtr(out->diag) || Paso_checkPtr(out->pivot) ) ) {        if ( ! ( Paso_checkPtr(out->diag) || Paso_checkPtr(out->pivot) ) ) {
90       Paso_SparseMatrix_invMain(A_p, out->diag, out->pivot );       Paso_SparseMatrix_invMain(A_p, out->diag, out->pivot );
# Line 93  Paso_Preconditioner_LocalGS* Paso_Precon Line 94  Paso_Preconditioner_LocalGS* Paso_Precon
94     time0=Paso_timer()-time0;     time0=Paso_timer()-time0;
95        
96     if (Paso_noError()) {     if (Paso_noError()) {
97        if (verbose) printf("timing: Gauss-Seidel preparation: elemination : %e\n",time0);        if (verbose) {
98         if (jacobi) {
99           printf("timing: Jacobi preparation: elemination : %e\n",time0);
100         } else {  
101           printf("timing: Gauss-Seidel preparation: elemination : %e\n",time0);
102         }
103          }
104        return out;        return out;
105     } else {     } else {
106        Paso_Preconditioner_LocalGS_free(out);        Paso_Preconditioner_LocalSmoother_free(out);
107        return NULL;        return NULL;
108     }     }
109  }  }
# Line 110  S (x_{k} -  x_{k-1}) = b - A x_{k-1} Line 117  S (x_{k} -  x_{k-1}) = b - A x_{k-1}
117  where x_{0}=0 and S provides some approximatioon of A.  where x_{0}=0 and S provides some approximatioon of A.
118    
119  Under MPI S is build on using A_p->mainBlock only.  Under MPI S is build on using A_p->mainBlock only.
120  if GS is local the defect b - A x_{k-1} is calculated using A_p->mainBlock only.  if Smoother is local the defect b - A x_{k-1} is calculated using A_p->mainBlock only.
121    
122  */  */
123    
124  void Paso_Preconditioner_GS_solve(Paso_SystemMatrix* A_p, Paso_Preconditioner_GS * gs, double * x, const double * b)  void Paso_Preconditioner_Smoother_solve(Paso_SystemMatrix* A_p, Paso_Preconditioner_Smoother * smoother, double * x, const double * b, const dim_t sweeps)
125  {  {
    register dim_t i;  
126     const dim_t n= (A_p->mainBlock->numRows) * (A_p->mainBlock->row_block_size);     const dim_t n= (A_p->mainBlock->numRows) * (A_p->mainBlock->row_block_size);
127        
128     double *b_new = gs->localGS->buffer;     double *b_new = smoother->localSmoother->buffer;
129     dim_t sweeps=gs->localGS->sweeps;     dim_t nsweeps=sweeps;
130         if (smoother->is_local) {
131     if (gs->is_local) {        Paso_Preconditioner_LocalSmoother_solve(A_p->mainBlock,smoother->localSmoother,x,b,sweeps);
       Paso_Preconditioner_LocalGS_solve(A_p->mainBlock,gs->localGS,x,b);  
132     } else {     } else {
133        #pragma omp parallel for private(i) schedule(static)        Paso_Copy(n, x, b);
       for (i=0;i<n;++i) x[i]=b[i];  
134                
135        Paso_Preconditioner_LocalGS_Sweep(A_p->mainBlock,gs->localGS,x);        Paso_Preconditioner_LocalSmoother_Sweep(A_p->mainBlock,smoother->localSmoother,x);
136                
137        while (sweeps > 1 ) {        while (nsweeps > 1 ) {
138       #pragma omp parallel for private(i) schedule(static)       Paso_Copy(n, b_new, b);
      for (i=0;i<n;++i) b_new[i]=b[i];  
139    
140           Paso_SystemMatrix_MatrixVector((-1.), A_p, x, 1., b_new); /* b_new = b - A*x */           Paso_SystemMatrix_MatrixVector((-1.), A_p, x, 1., b_new); /* b_new = b - A*x */
141            
142       Paso_Preconditioner_LocalGS_Sweep(A_p->mainBlock,gs->localGS,b_new);       Paso_Preconditioner_LocalSmoother_Sweep(A_p->mainBlock,smoother->localSmoother,b_new);
143            
144       #pragma omp parallel for private(i) schedule(static)       Paso_AXPY(n, x, 1., b_new);
145       for (i=0;i<n;++i) x[i]+=b_new[i];       nsweeps--;
      sweeps--;  
146        }        }
147                
148     }     }
149  }  }
150  void Paso_Preconditioner_LocalGS_solve(Paso_SparseMatrix* A_p, Paso_Preconditioner_LocalGS * gs, double * x, const double * b)  void Paso_Preconditioner_LocalSmoother_solve(Paso_SparseMatrix* A_p, Paso_Preconditioner_LocalSmoother * smoother, double * x, const double * b, const dim_t sweeps)
151  {  {
    register dim_t i;  
152     const dim_t n= (A_p->numRows) * (A_p->row_block_size);     const dim_t n= (A_p->numRows) * (A_p->row_block_size);
153     double *b_new = gs->buffer;     double *b_new = smoother->buffer;
154     dim_t sweeps=gs->sweeps;     dim_t nsweeps=sweeps;
     
    #pragma omp parallel for private(i) schedule(static)  
    for (i=0;i<n;++i) x[i]=b[i];  
155        
156     Paso_Preconditioner_LocalGS_Sweep(A_p,gs,x);     Paso_Copy(n, x, b);
157       Paso_Preconditioner_LocalSmoother_Sweep(A_p,smoother,x);
158        
159     while (sweeps > 1 ) {     while (nsweeps > 1 ) {
160       #pragma omp parallel for private(i) schedule(static)  
161       for (i=0;i<n;++i) b_new[i]=b[i];       Paso_Copy(n, b_new, b);
162            
163       Paso_SparseMatrix_MatrixVector_CSC_OFFSET0((-1.), A_p, x, 1., b_new); /* b_new = b - A*x */       Paso_SparseMatrix_MatrixVector_CSC_OFFSET0((-1.), A_p, x, 1., b_new); /* b_new = b - A*x */
164          
165         Paso_Preconditioner_LocalSmoother_Sweep(A_p,smoother,b_new);
166    
167         Paso_AXPY(n, x, 1., b_new)
168            
169       Paso_Preconditioner_LocalGS_Sweep(A_p,gs,b_new);       nsweeps--;
       
      #pragma omp parallel for private(i) schedule(static)  
      for (i=0;i<n;++i) x[i]+=b_new[i];  
       
      sweeps--;  
170     }     }
171  }  }
172    
173  void Paso_Preconditioner_LocalGS_Sweep(Paso_SparseMatrix* A, Paso_Preconditioner_LocalGS * gs, double * x)  void Paso_Preconditioner_LocalSmoother_Sweep(Paso_SparseMatrix* A, Paso_Preconditioner_LocalSmoother * smoother, double * x)
174  {  {
175     #ifdef _OPENMP     #ifdef _OPENMP
176     const dim_t nt=omp_get_max_threads();     const dim_t nt=omp_get_max_threads();
177     #else     #else
178     const dim_t nt = 1;     const dim_t nt = 1;
179     #endif     #endif
180     if (nt < 2) {     if (smoother->Jacobi) {
181        Paso_Preconditioner_LocalGS_Sweep_sequential(A,gs,x);        Paso_BlockOps_allMV(A->row_block_size,A->numRows,smoother->diag,smoother->pivot,x);
182     } else {     } else {
183        Paso_Preconditioner_LocalGS_Sweep_colored(A,gs,x);        if (nt < 2) {
184         Paso_Preconditioner_LocalSmoother_Sweep_sequential(A,smoother,x);
185          } else {
186         Paso_Preconditioner_LocalSmoother_Sweep_colored(A,smoother,x);
187          }
188     }     }
189  }  }
190    
191  /* inplace Gauss-Seidel sweep in seqential mode: */  /* inplace Gauss-Seidel sweep in seqential mode: */
192    
193  void Paso_Preconditioner_LocalGS_Sweep_sequential(Paso_SparseMatrix* A_p, Paso_Preconditioner_LocalGS * gs, double * x)  void Paso_Preconditioner_LocalSmoother_Sweep_sequential(Paso_SparseMatrix* A_p, Paso_Preconditioner_LocalSmoother * smoother, double * x)
194  {  {
195     const dim_t n=A_p->numRows;     const dim_t n=A_p->numRows;
196     const dim_t n_block=A_p->row_block_size;     const dim_t n_block=A_p->row_block_size;
197     const double *diag = gs->diag;     const double *diag = smoother->diag;
198     /* const index_t* pivot = gs->pivot;     /* const index_t* pivot = smoother->pivot;
199     const dim_t block_size=A_p->block_size;  use for block size >3*/     const dim_t block_size=A_p->block_size;  use for block size >3*/
200        
201     register dim_t i,k;     register dim_t i,k;
# Line 276  void Paso_Preconditioner_LocalGS_Sweep_s Line 278  void Paso_Preconditioner_LocalGS_Sweep_s
278     return;     return;
279  }  }
280                
281  void Paso_Preconditioner_LocalGS_Sweep_colored(Paso_SparseMatrix* A_p, Paso_Preconditioner_LocalGS * gs, double * x)  void Paso_Preconditioner_LocalSmoother_Sweep_colored(Paso_SparseMatrix* A_p, Paso_Preconditioner_LocalSmoother * smoother, double * x)
282  {  {
283     const dim_t n=A_p->numRows;     const dim_t n=A_p->numRows;
284     const dim_t n_block=A_p->row_block_size;     const dim_t n_block=A_p->row_block_size;
285     const double *diag = gs->diag;     const double *diag = smoother->diag;
286     index_t* pivot = gs->pivot;     index_t* pivot = smoother->pivot;
287     const dim_t block_size=A_p->block_size;     const dim_t block_size=A_p->block_size;
288        
289     register dim_t i,k;     register dim_t i,k;

Legend:
Removed from v.3125  
changed lines
  Added in v.3158

  ViewVC Help
Powered by ViewVC 1.1.26