/[escript]/trunk/paso/src/SystemMatrix.c
ViewVC logotype

Diff of /trunk/paso/src/SystemMatrix.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1361 by gross, Fri Dec 14 09:26:51 2007 UTC revision 3391 by gross, Thu Dec 2 02:25:53 2010 UTC
# Line 1  Line 1 
1    
 /* $Id$ */  
   
2  /*******************************************************  /*******************************************************
3   *  *
4   *           Copyright 2003-2007 by ACceSS MNRF  * Copyright (c) 2003-2010 by University of Queensland
5   *       Copyright 2007 by University of Queensland  * Earth Systems Science Computational Center (ESSCC)
6   *  * http://www.uq.edu.au/esscc
7   *                http://esscc.uq.edu.au  *
8   *        Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
9   *  Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
10   *     http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
11   *  *
12   *******************************************************/  *******************************************************/
13    
14    
15  /**************************************************************/  /**************************************************************/
16    
# Line 19  Line 18 
18    
19  /**************************************************************/  /**************************************************************/
20    
21  /* Author: gross@access.edu.au */  /* Author: Lutz Gross, l.gross@uq.edu.au */
22    
23  /**************************************************************/  /**************************************************************/
24    
25  #include "SystemMatrix.h"  #include "SystemMatrix.h"
26    #include "Preconditioner.h"
27    
28  /**************************************************************/  /**************************************************************/
29    
30  /* allocates a SystemMatrix of type type using the given matrix pattern  /* allocates a SystemMatrix of type type using the given matrix pattern
31     if type is UNKOWN CSR is used.     Values are initialized by zero.
32     if CSC or CSC_BLK1 is used pattern has to give the CSC pattern.     if patternIsUnrolled and type & MATRIX_FORMAT_BLK1, it is assumed that the pattern is allready unrolled to match the requested block size
33     if CSR or CSR_BLK1 is used pattern has to give the CSR pattern.     and offsets otherwise unrolling and offset adjustment will be performed.
34     Values are initialized by zero.  */  */
35    
36  Paso_SystemMatrix* Paso_SystemMatrix_alloc(Paso_SystemMatrixType type,Paso_SystemMatrixPattern *pattern, int row_block_size, int col_block_size) {  Paso_SystemMatrix* Paso_SystemMatrix_alloc(Paso_SystemMatrixType type,Paso_SystemMatrixPattern *pattern, int row_block_size, int col_block_size,
37      const bool_t patternIsUnrolled) {
38    
   double time0;  
39    Paso_SystemMatrix*out=NULL;    Paso_SystemMatrix*out=NULL;
40    dim_t n_norm,i;    dim_t n_norm,i;
41    Paso_SystemMatrixType pattern_format_out;    Paso_SystemMatrixType pattern_format_out;
42      bool_t unroll=FALSE;
43    
44    Paso_resetError();    pattern_format_out= (type & MATRIX_FORMAT_OFFSET1)? MATRIX_FORMAT_OFFSET1:  MATRIX_FORMAT_DEFAULT;
45    time0=Paso_timer();    Esys_resetError();
46      if (patternIsUnrolled) {
47         if ( ! XNOR(type & MATRIX_FORMAT_OFFSET1, pattern->type & MATRIX_FORMAT_OFFSET1) ) {
48             Esys_setError(TYPE_ERROR,"Paso_SystemMatrix_alloc: requested offset and pattern offset does not match.");
49             return NULL;
50         }
51      }
52      /* do we need to apply unrolling ? */
53      unroll  
54            /* we don't like non-square blocks */
55        =   (row_block_size!=col_block_size)
56            /* or any block size bigger than 3 */
57        ||  (col_block_size>3)
58            /* or if lock size one requested and the block size is not 1 */
59        ||  ((type & MATRIX_FORMAT_BLK1) &&  (col_block_size>1) )
60            /* or the offsets are wrong */
61        ||  ((type & MATRIX_FORMAT_OFFSET1) != ( pattern->type & MATRIX_FORMAT_OFFSET1));
62      
63    out=MEMALLOC(1,Paso_SystemMatrix);    out=MEMALLOC(1,Paso_SystemMatrix);
64    if (! Paso_checkPtr(out)) {      if (! Esys_checkPtr(out)) {  
65       out->type=type;       out->type=type;
66       out->pattern=NULL;         out->pattern=NULL;  
67       out->row_distribution=NULL;       out->row_distribution=NULL;
68       out->col_distribution=NULL;       out->col_distribution=NULL;
69       out->mpi_info=Paso_MPIInfo_getReference(pattern->mpi_info);       out->mpi_info=Esys_MPIInfo_getReference(pattern->mpi_info);
70         out->row_coupler=NULL;
71         out->col_coupler=NULL;
72       out->mainBlock=NULL;       out->mainBlock=NULL;
73       out->coupleBlock=NULL;       out->row_coupleBlock=NULL;
74       out->normalizer_is_valid=FALSE;       out->col_coupleBlock=NULL;
75       out->normalizer=NULL;       out->is_balanced=FALSE;
76         out->balance_vector=NULL;
77       out->solver_package=PASO_PASO;         out->solver_package=PASO_PASO;  
78       out->solver=NULL;         out->solver_p=NULL;  
79       out->trilinos_data=NULL;       out->trilinos_data=NULL;
80       out->reference_counter=1;       out->reference_counter=1;
81         out->logical_row_block_size=row_block_size;
82         out->logical_col_block_size=col_block_size;
83    
84    
      pattern_format_out= (type & MATRIX_FORMAT_OFFSET1)? PATTERN_FORMAT_OFFSET1:  PATTERN_FORMAT_DEFAULT;  
      /* ====== compressed sparse columns === */  
85       if (type & MATRIX_FORMAT_CSC) {       if (type & MATRIX_FORMAT_CSC) {
86          if (type & MATRIX_FORMAT_SYM) {           if (unroll) {
87             Paso_setError(TYPE_ERROR,"Generation of matrix pattern for symmetric CSC is not implemented yet.");                 if (patternIsUnrolled) {
88          } else {                    out->pattern=Paso_SystemMatrixPattern_getReference(pattern);
89             if ((type & MATRIX_FORMAT_BLK1) || row_block_size!=col_block_size || col_block_size>3) {                 } else {
90                out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,col_block_size,row_block_size);                    out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,col_block_size,row_block_size);
91                out->row_block_size=1;                 }
92                out->col_block_size=1;                 out->row_block_size=1;
93             } else {                 out->col_block_size=1;
94             } else {
95                out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,1,1);                out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,1,1);
96                out->row_block_size=row_block_size;                out->row_block_size=row_block_size;
97                out->col_block_size=col_block_size;                out->col_block_size=col_block_size;
98             }           }
99          }           if (Esys_noError()) {
100          out->row_distribution=Paso_Distribution_getReference(out->pattern->input_distribution);             out->row_distribution=Paso_Distribution_getReference(out->pattern->input_distribution);
101          out->col_distribution=Paso_Distribution_getReference(out->pattern->output_distribution);             out->col_distribution=Paso_Distribution_getReference(out->pattern->output_distribution);
102             }
103       } else {       } else {
104       /* ====== compressed sparse row === */           if (unroll) {
105          if (type & MATRIX_FORMAT_SYM) {                if (patternIsUnrolled) {
106             Paso_setError(TYPE_ERROR,"Generation of matrix pattern for symmetric CSR is not implemented yet.");                    out->pattern=Paso_SystemMatrixPattern_getReference(pattern);
107          } else {                } else {
108             if ((type & MATRIX_FORMAT_BLK1) || row_block_size!=col_block_size || col_block_size>3)  {                    out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,row_block_size,col_block_size);
109                out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,row_block_size,col_block_size);                }
110                out->row_block_size=1;                out->row_block_size=1;
111                out->col_block_size=1;                out->col_block_size=1;
112             } else {           } else {
113                out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,1,1);                out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,1,1);
114                out->row_block_size=row_block_size;                out->row_block_size=row_block_size;
115                out->col_block_size=col_block_size;                out->col_block_size=col_block_size;
116             }           }
117          }           if (Esys_noError()) {
118          out->row_distribution=Paso_Distribution_getReference(out->pattern->output_distribution);                out->row_distribution=Paso_Distribution_getReference(out->pattern->output_distribution);
119          out->col_distribution=Paso_Distribution_getReference(out->pattern->input_distribution);                out->col_distribution=Paso_Distribution_getReference(out->pattern->input_distribution);
120             }
121       }       }
122       out->logical_row_block_size=row_block_size;       if (Esys_noError()) {
123       out->logical_col_block_size=col_block_size;          out->block_size=out->row_block_size*out->col_block_size;
124       out->logical_block_size=out->logical_row_block_size*out->logical_block_size;          out->col_coupler=Paso_Coupler_alloc(pattern->col_connector,out->col_block_size);
125       out->block_size=out->row_block_size*out->col_block_size;          out->row_coupler=Paso_Coupler_alloc(pattern->row_connector,out->row_block_size);
126       /* this should be bypassed if trilinos is used */          /* this should be bypassed if trilinos is used */
127       if (type & MATRIX_FORMAT_TRILINOS_CRS) {          if (type & MATRIX_FORMAT_TRILINOS_CRS) {
128          #ifdef TRILINOS             #ifdef TRILINOS
129          out->trilinos_data=Paso_TRILINOS_alloc();             out->trilinos_data=Paso_TRILINOS_alloc();
130          #endif             #endif
      } else {  
         out->solver_package=PASO_PASO;    
         out->mainBlock=Paso_SparseMatrix_alloc(type,out->pattern->mainPattern,row_block_size,col_block_size);  
         out->coupleBlock=Paso_SparseMatrix_alloc(type,out->pattern->couplePattern,row_block_size,col_block_size);  
         /* allocate memory for matrix entries */  
         if (type & MATRIX_FORMAT_CSC) {  
            n_norm = out->mainBlock->numCols * out->col_block_size;  
131          } else {          } else {
132             n_norm = out->mainBlock->numRows * out->row_block_size;             out->solver_package=PASO_PASO;  
133               out->mainBlock=Paso_SparseMatrix_alloc(type,out->pattern->mainPattern,row_block_size,col_block_size,TRUE);
134               out->col_coupleBlock=Paso_SparseMatrix_alloc(type,out->pattern->col_couplePattern,row_block_size,col_block_size,TRUE);
135               out->row_coupleBlock=Paso_SparseMatrix_alloc(type,out->pattern->row_couplePattern,row_block_size,col_block_size,TRUE);
136               if (Esys_noError()) {
137                  /* allocate memory for matrix entries */
138                  n_norm = MAX(out->mainBlock->numCols * out->col_block_size, out->mainBlock->numRows * out->row_block_size);
139              out->balance_vector=MEMALLOC(n_norm,double);
140              out->is_balanced=FALSE;
141              if (! Esys_checkPtr(out->balance_vector)) {
142                     #pragma omp parallel for private(i) schedule(static)
143                     for (i=0;i<n_norm;++i) out->balance_vector[i]=1.;
144                  }
145               }
146          }          }
147          out->normalizer=MEMALLOC(n_norm,double);       }
         out->normalizer_is_valid=FALSE;  
         if (! Paso_checkPtr(out->normalizer)) {  
            #pragma omp parallel for private(i) schedule(static)  
            for (i=0;i<n_norm;++i) out->normalizer[i]=0.;  
        }  
     }  
148    }    }
149    /* all done: */    /* all done: */
150    if (! Paso_noError()) {    if (! Esys_noError()) {
151      Paso_SystemMatrix_free(out);      Paso_SystemMatrix_free(out);
152      return NULL;      return NULL;
153    } else {    } else {
     #ifdef Paso_TRACE  
     printf("timing: system matrix %.4e sec\n",Paso_timer()-time0);  
     printf("Paso_SystemMatrix_alloc: system matrix has been allocated.\n");  
     #endif  
154      return out;      return out;
155    }    }
156  }  }
157    
158  /* returns a reference to Paso_SystemMatrix in */  /* returns a reference to Paso_SystemMatrix in */
159    
160  Paso_SystemMatrix* Paso_SystemMatrix_reference(Paso_SystemMatrix* in) {  Paso_SystemMatrix* Paso_SystemMatrix_getReference(Paso_SystemMatrix* in) {
161     if (in!=NULL) ++(in->reference_counter);     if (in!=NULL) ++(in->reference_counter);
162     return NULL;     return in;
163  }  }
164    
165  /* deallocates a SystemMatrix: */  /* deallocates a SystemMatrix: */
# Line 147  void Paso_SystemMatrix_free(Paso_SystemM Line 168  void Paso_SystemMatrix_free(Paso_SystemM
168    if (in!=NULL) {    if (in!=NULL) {
169       in->reference_counter--;       in->reference_counter--;
170       if (in->reference_counter<=0) {       if (in->reference_counter<=0) {
171        Paso_solve_free(in);
172          Paso_SystemMatrixPattern_free(in->pattern);          Paso_SystemMatrixPattern_free(in->pattern);
173          Paso_Distribution_free(in->row_distribution);          Paso_Distribution_free(in->row_distribution);
174          Paso_Distribution_free(in->col_distribution);          Paso_Distribution_free(in->col_distribution);
175          Paso_MPIInfo_free(in->mpi_info);          Esys_MPIInfo_free(in->mpi_info);
176            Paso_Coupler_free(in->row_coupler);
177            Paso_Coupler_free(in->col_coupler);
178          Paso_SparseMatrix_free(in->mainBlock);          Paso_SparseMatrix_free(in->mainBlock);
179          Paso_SparseMatrix_free(in->coupleBlock);          Paso_SparseMatrix_free(in->col_coupleBlock);
180          MEMFREE(in->normalizer);          Paso_SparseMatrix_free(in->row_coupleBlock);
181          Paso_solve_free(in);      MEMFREE(in->balance_vector);
182          #ifdef TRILINOS          #ifdef TRILINOS
183          Paso_TRILINOS_free(in->trilinos_data);          Paso_TRILINOS_free(in->trilinos_data);
184          #endif          #endif
# Line 165  void Paso_SystemMatrix_free(Paso_SystemM Line 189  void Paso_SystemMatrix_free(Paso_SystemM
189       }       }
190     }     }
191  }  }
192  void Paso_SystemMatrix_allocBuffer(Paso_SystemMatrix* A) {  void  Paso_SystemMatrix_startCollect(Paso_SystemMatrix* A,const double* in)
193      Paso_Coupler_allocBuffer(A->pattern->coupler,A->col_block_size);  {
194      Paso_SystemMatrix_startColCollect(A,in);
195    }
196    double* Paso_SystemMatrix_finishCollect(Paso_SystemMatrix* A)
197    {
198     return Paso_SystemMatrix_finishColCollect(A);
199    }
200    
201    void  Paso_SystemMatrix_startColCollect(Paso_SystemMatrix* A,const double* in)
202    {
203      Paso_Coupler_startCollect(A->col_coupler, in);
204  }  }
205  void Paso_SystemMatrix_freeBuffer(Paso_SystemMatrix* A) {  double* Paso_SystemMatrix_finishColCollect(Paso_SystemMatrix* A)
206      Paso_Coupler_freeBuffer(A->pattern->coupler);  {
207     Paso_Coupler_finishCollect(A->col_coupler);
208     return A->col_coupler->recv_buffer;
209  }  }
210  void  Paso_SystemMatrix_startCollect(Paso_SystemMatrix* A, double* in)  void  Paso_SystemMatrix_startRowCollect(Paso_SystemMatrix* A,const double* in)
211  {  {
212    Paso_Coupler_startCollect(A->pattern->coupler, in);    Paso_Coupler_startCollect(A->row_coupler, in);
213  }  }
214  double* Paso_SystemMatrix_finishCollect(Paso_SystemMatrix* A)  double* Paso_SystemMatrix_finishRowCollect(Paso_SystemMatrix* A)
215  {  {
216   Paso_Coupler_finishCollect(A->pattern->coupler);   Paso_Coupler_finishCollect(A->row_coupler);
217   return A->pattern->coupler->recv_buffer;   return A->row_coupler->recv_buffer;
218  }  }
219    
220  dim_t Paso_SystemMatrix_getTotalNumRows(Paso_SystemMatrix* A){  dim_t Paso_SystemMatrix_getTotalNumRows(const Paso_SystemMatrix* A){
221    return A->mainBlock->numRows * A->row_block_size;    return A->mainBlock->numRows * A->row_block_size;
222  }  }
223    
224  dim_t Paso_SystemMatrix_getTotalNumCols(Paso_SystemMatrix* A){  dim_t Paso_SystemMatrix_getTotalNumCols(const Paso_SystemMatrix* A){
225    return A->mainBlock->numCols * A->col_block_size;    return A->mainBlock->numCols * A->col_block_size;
226  }  }
227  dim_t Paso_SystemMatrix_getGlobalNumRows(Paso_SystemMatrix* A) {  dim_t Paso_SystemMatrix_getGlobalNumRows(Paso_SystemMatrix* A) {
# Line 207  dim_t Paso_SystemMatrix_getNumOutput(Pas Line 243  dim_t Paso_SystemMatrix_getNumOutput(Pas
243     return Paso_SystemMatrixPattern_getNumOutput(A->pattern);     return Paso_SystemMatrixPattern_getNumOutput(A->pattern);
244  }  }
245    
246    index_t* Paso_SystemMatrix_borrowMainDiagonalPointer(Paso_SystemMatrix * A_p)
247    {
248        index_t* out=NULL;
249        int fail=0;
250        out=Paso_SparseMatrix_borrowMainDiagonalPointer(A_p->mainBlock);
251        if (out==NULL) fail=1;
252        #ifdef ESYS_MPI
253        {
254             int fail_loc = fail;
255             MPI_Allreduce(&fail_loc, &fail, 1, MPI_INT, MPI_MAX, A_p->mpi_info->comm);
256        }
257        #endif
258        if (fail>0) Esys_setError(VALUE_ERROR, "Paso_SystemMatrix_borrowMainDiagonalPointer: no main diagonal");
259        return out;
260    }
261    
262    void Paso_SystemMatrix_makeZeroRowSums(Paso_SystemMatrix * A_p, double* left_over)
263    {
264       index_t ir, ib, irow;
265       register double rtmp1, rtmp2;
266       const dim_t n = Paso_SystemMatrixPattern_getNumOutput(A_p->pattern);
267       const dim_t nblk = A_p->block_size;
268       const dim_t blk = A_p->row_block_size;
269       const index_t* main_ptr=Paso_SystemMatrix_borrowMainDiagonalPointer(A_p);
270      
271      
272       Paso_SystemMatrix_rowSum(A_p, left_over); /* left_over now hold the row sum */
273    
274       #pragma omp parallel for private(ir,ib, rtmp1, rtmp2) schedule(static)
275       for (ir=0;ir< n;ir++) {
276           for (ib=0;ib<blk; ib++) {
277             irow=ib+blk*ir;
278             rtmp1=left_over[irow];
279             rtmp2=A_p->mainBlock->val[main_ptr[ir]*nblk+ib+blk*ib];
280             A_p->mainBlock->val[main_ptr[ir]*nblk+ib+blk*ib] = -rtmp1;
281             left_over[irow]=rtmp2+rtmp1;
282           }
283       }
284    }
285    void Paso_SystemMatrix_copyBlockFromMainDiagonal(Paso_SystemMatrix * A_p, double* out)
286    {
287        Paso_SparseMatrix_copyBlockFromMainDiagonal(A_p->mainBlock, out);
288        return;
289    }
290    void Paso_SystemMatrix_copyBlockToMainDiagonal(Paso_SystemMatrix * A_p, const double* in)
291    {
292        Paso_SparseMatrix_copyBlockToMainDiagonal(A_p->mainBlock, in);
293        return;
294    }
295    void Paso_SystemMatrix_copyFromMainDiagonal(Paso_SystemMatrix * A_p, double* out)
296    {
297        Paso_SparseMatrix_copyFromMainDiagonal(A_p->mainBlock, out);
298        return;
299    }
300    void Paso_SystemMatrix_copyToMainDiagonal(Paso_SystemMatrix * A_p, const double* in)
301    {
302        Paso_SparseMatrix_copyToMainDiagonal(A_p->mainBlock, in);
303        return;
304    }
305    
306    void Paso_SystemMatrix_setPreconditioner(Paso_SystemMatrix* A,Paso_Options* options) {
307       if (A->solver_p==NULL) {
308          A->solver_p=Paso_Preconditioner_alloc(A,options);
309       }
310    }
311    
312    /* applies the preconditioner */
313    /* has to be called within a parallel reqion */
314    /* barrier synchronization is performed before the evaluation to make sure that the input vector is available */
315    void Paso_SystemMatrix_solvePreconditioner(Paso_SystemMatrix* A,double* x,double* b){
316       Paso_Preconditioner* prec=(Paso_Preconditioner*) A->solver_p;
317       Paso_Preconditioner_solve(prec, A,x,b);
318    }
319    void Paso_SystemMatrix_freePreconditioner(Paso_SystemMatrix* A) {
320       Paso_Preconditioner* prec=NULL;
321       if (A!=NULL) {
322          prec=(Paso_Preconditioner*) A->solver_p;
323          Paso_Preconditioner_free(prec);
324          A->solver_p=NULL;
325       }
326    }

Legend:
Removed from v.1361  
changed lines
  Added in v.3391

  ViewVC Help
Powered by ViewVC 1.1.26