/[escript]/trunk/paso/src/SystemMatrix.c
ViewVC logotype

Diff of /trunk/paso/src/SystemMatrix.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 2548 by jfenwick, Mon Jul 20 06:20:06 2009 UTC revision 3005 by gross, Thu Apr 22 05:59:31 2010 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*******************************************************
3  *  *
4  * Copyright (c) 2003-2009 by University of Queensland  * Copyright (c) 2003-2010 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * Earth Systems Science Computational Center (ESSCC)
6  * http://www.uq.edu.au/esscc  * http://www.uq.edu.au/esscc
7  *  *
# Line 18  Line 18 
18    
19  /**************************************************************/  /**************************************************************/
20    
21  /* Author: gross@access.edu.au */  /* Author: Lutz Gross, l.gross@uq.edu.au */
22    
23  /**************************************************************/  /**************************************************************/
24    
# Line 27  Line 27 
27  /**************************************************************/  /**************************************************************/
28    
29  /* allocates a SystemMatrix of type type using the given matrix pattern  /* allocates a SystemMatrix of type type using the given matrix pattern
30     if type is UNKOWN CSR is used.     Values are initialized by zero.
31     if CSC or CSC_BLK1 is used pattern has to give the CSC pattern.     if patternIsUnrolled and type & MATRIX_FORMAT_BLK1, it is assumed that the pattern is allready unrolled to match the requested block size
32     if CSR or CSR_BLK1 is used pattern has to give the CSR pattern.     and offsets otherwise unrolling and offset adjustment will be performed.
33     Values are initialized by zero.  */  */
34    
35  Paso_SystemMatrix* Paso_SystemMatrix_alloc(Paso_SystemMatrixType type,Paso_SystemMatrixPattern *pattern, int row_block_size, int col_block_size) {  Paso_SystemMatrix* Paso_SystemMatrix_alloc(Paso_SystemMatrixType type,Paso_SystemMatrixPattern *pattern, int row_block_size, int col_block_size,
36      const bool_t patternIsUnrolled) {
37    
   double time0;  
38    Paso_SystemMatrix*out=NULL;    Paso_SystemMatrix*out=NULL;
39    dim_t n_norm,i;    dim_t n_norm,i;
40    Paso_SystemMatrixType pattern_format_out;    Paso_SystemMatrixType pattern_format_out;
41      bool_t unroll=FALSE;
42    
43      pattern_format_out= (type & MATRIX_FORMAT_OFFSET1)? PATTERN_FORMAT_OFFSET1:  PATTERN_FORMAT_DEFAULT;
44    Paso_resetError();    Paso_resetError();
45    time0=Paso_timer();    if (type & MATRIX_FORMAT_SYM) {
46          Paso_setError(TYPE_ERROR,"Paso_SystemMatrix_alloc: Symmetric matrix patterns are not supported.");
47          return NULL;
48      }
49      if (patternIsUnrolled) {
50         if ( ! XNOR(type & MATRIX_FORMAT_OFFSET1, pattern->type & PATTERN_FORMAT_OFFSET1) ) {
51             Paso_setError(TYPE_ERROR,"Paso_SystemMatrix_alloc: requested offset and pattern offset does not match.");
52             return NULL;
53         }
54      }
55      /* do we need to apply unrolling ? */
56      unroll  
57            /* we don't like non-square blocks */
58        =   (row_block_size!=col_block_size)
59            /* or any block size bigger than 3 */
60        ||  (col_block_size>3)
61            /* or if lock size one requested and the block size is not 1 */
62        ||  ((type & MATRIX_FORMAT_BLK1) &&  (col_block_size>1) )
63            /* or the offsets are wrong */
64        ||  ((type & MATRIX_FORMAT_OFFSET1) != ( pattern->type & PATTERN_FORMAT_OFFSET1));
65      
66    out=MEMALLOC(1,Paso_SystemMatrix);    out=MEMALLOC(1,Paso_SystemMatrix);
67    if (! Paso_checkPtr(out)) {      if (! Paso_checkPtr(out)) {  
68       out->type=type;       out->type=type;
# Line 59  Paso_SystemMatrix* Paso_SystemMatrix_all Line 81  Paso_SystemMatrix* Paso_SystemMatrix_all
81       out->solver=NULL;         out->solver=NULL;  
82       out->trilinos_data=NULL;       out->trilinos_data=NULL;
83       out->reference_counter=1;       out->reference_counter=1;
84         out->logical_row_block_size=row_block_size;
85         out->logical_col_block_size=col_block_size;
86    
87    
      pattern_format_out= (type & MATRIX_FORMAT_OFFSET1)? PATTERN_FORMAT_OFFSET1:  PATTERN_FORMAT_DEFAULT;  
      /* ====== compressed sparse columns === */  
88       if (type & MATRIX_FORMAT_CSC) {       if (type & MATRIX_FORMAT_CSC) {
89          if (type & MATRIX_FORMAT_SYM) {           if (unroll) {
90             Paso_setError(TYPE_ERROR,"Generation of matrix pattern for symmetric CSC is not implemented yet.");                 if (patternIsUnrolled) {
91          } else {                    out->pattern=Paso_SystemMatrixPattern_getReference(pattern);
92             if ((type & MATRIX_FORMAT_BLK1) || row_block_size!=col_block_size || col_block_size>3) {                 } else {
93                out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,col_block_size,row_block_size);                    out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,col_block_size,row_block_size);
94                out->row_block_size=1;                 }
95                out->col_block_size=1;                 out->row_block_size=1;
96             } else {                 out->col_block_size=1;
97             } else {
98                out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,1,1);                out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,1,1);
99                out->row_block_size=row_block_size;                out->row_block_size=row_block_size;
100                out->col_block_size=col_block_size;                out->col_block_size=col_block_size;
101             }           }
102          }           if (Paso_noError()) {
103          out->row_distribution=Paso_Distribution_getReference(out->pattern->input_distribution);             out->row_distribution=Paso_Distribution_getReference(out->pattern->input_distribution);
104          out->col_distribution=Paso_Distribution_getReference(out->pattern->output_distribution);             out->col_distribution=Paso_Distribution_getReference(out->pattern->output_distribution);
105             }
106       } else {       } else {
107       /* ====== compressed sparse row === */           if (unroll) {
108          if (type & MATRIX_FORMAT_SYM) {                if (patternIsUnrolled) {
109             Paso_setError(TYPE_ERROR,"Generation of matrix pattern for symmetric CSR is not implemented yet.");                    out->pattern=Paso_SystemMatrixPattern_getReference(pattern);
110          } else {                } else {
111             if ((type & MATRIX_FORMAT_BLK1) || row_block_size!=col_block_size || col_block_size>3)  {                    out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,row_block_size,col_block_size);
112                out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,row_block_size,col_block_size);                }
113                out->row_block_size=1;                out->row_block_size=1;
114                out->col_block_size=1;                out->col_block_size=1;
115             } else {           } else {
116                out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,1,1);                out->pattern=Paso_SystemMatrixPattern_unrollBlocks(pattern,pattern_format_out,1,1);
117                out->row_block_size=row_block_size;                out->row_block_size=row_block_size;
118                out->col_block_size=col_block_size;                out->col_block_size=col_block_size;
119             }           }
120          }           if (Paso_noError()) {
121          out->row_distribution=Paso_Distribution_getReference(out->pattern->output_distribution);                out->row_distribution=Paso_Distribution_getReference(out->pattern->output_distribution);
122          out->col_distribution=Paso_Distribution_getReference(out->pattern->input_distribution);                out->col_distribution=Paso_Distribution_getReference(out->pattern->input_distribution);
123             }
124       }       }
125       out->logical_row_block_size=row_block_size;       if (Paso_noError()) {
126       out->logical_col_block_size=col_block_size;          out->block_size=out->row_block_size*out->col_block_size;
127       out->logical_block_size=out->logical_row_block_size*out->logical_col_block_size;          out->col_coupler=Paso_Coupler_alloc(pattern->col_connector,out->col_block_size);
128       out->block_size=out->row_block_size*out->col_block_size;          out->row_coupler=Paso_Coupler_alloc(pattern->row_connector,out->row_block_size);
129       out->col_coupler=Paso_Coupler_alloc(pattern->col_connector,out->col_block_size);          /* this should be bypassed if trilinos is used */
130       out->row_coupler=Paso_Coupler_alloc(pattern->row_connector,out->row_block_size);          if (type & MATRIX_FORMAT_TRILINOS_CRS) {
131       /* this should be bypassed if trilinos is used */             #ifdef TRILINOS
132       if (type & MATRIX_FORMAT_TRILINOS_CRS) {             out->trilinos_data=Paso_TRILINOS_alloc();
133          #ifdef TRILINOS             #endif
         out->trilinos_data=Paso_TRILINOS_alloc();  
         #endif  
      } else {  
         out->solver_package=PASO_PASO;    
         out->mainBlock=Paso_SparseMatrix_alloc(type,out->pattern->mainPattern,row_block_size,col_block_size);  
         out->col_coupleBlock=Paso_SparseMatrix_alloc(type,out->pattern->col_couplePattern,row_block_size,col_block_size);  
         out->row_coupleBlock=Paso_SparseMatrix_alloc(type,out->pattern->row_couplePattern,row_block_size,col_block_size);  
         /* allocate memory for matrix entries */  
         if (type & MATRIX_FORMAT_CSC) {  
            n_norm = out->mainBlock->numCols * out->col_block_size;  
134          } else {          } else {
135             n_norm = out->mainBlock->numRows * out->row_block_size;             out->solver_package=PASO_PASO;  
136               out->mainBlock=Paso_SparseMatrix_alloc(type,out->pattern->mainPattern,row_block_size,col_block_size,TRUE);
137               out->col_coupleBlock=Paso_SparseMatrix_alloc(type,out->pattern->col_couplePattern,row_block_size,col_block_size,TRUE);
138               out->row_coupleBlock=Paso_SparseMatrix_alloc(type,out->pattern->row_couplePattern,row_block_size,col_block_size,TRUE);
139               if (Paso_noError()) {
140                  /* allocate memory for matrix entries */
141                  if (type & MATRIX_FORMAT_CSC) {
142                     n_norm = out->mainBlock->numCols * out->col_block_size;
143                  } else {
144                     n_norm = out->mainBlock->numRows * out->row_block_size;
145                  }
146                  out->normalizer=MEMALLOC(n_norm,double);
147                  out->normalizer_is_valid=FALSE;
148                  if (! Paso_checkPtr(out->normalizer)) {
149                     #pragma omp parallel for private(i) schedule(static)
150                     for (i=0;i<n_norm;++i) out->normalizer[i]=0.;
151                  }
152               }
153          }          }
154          out->normalizer=MEMALLOC(n_norm,double);       }
         out->normalizer_is_valid=FALSE;  
         if (! Paso_checkPtr(out->normalizer)) {  
            #pragma omp parallel for private(i) schedule(static)  
            for (i=0;i<n_norm;++i) out->normalizer[i]=0.;  
        }  
     }  
155    }    }
156    /* all done: */    /* all done: */
157    if (! Paso_noError()) {    if (! Paso_noError()) {
158      Paso_SystemMatrix_free(out);      Paso_SystemMatrix_free(out);
159      return NULL;      return NULL;
160    } else {    } else {
     #ifdef Paso_TRACE  
     printf("timing: system matrix %.4e sec\n",Paso_timer()-time0);  
     printf("Paso_SystemMatrix_alloc: system matrix has been allocated.\n");  
     #endif  
161      return out;      return out;
162    }    }
163  }  }
164    
165  /* returns a reference to Paso_SystemMatrix in */  /* returns a reference to Paso_SystemMatrix in */
166    
167  Paso_SystemMatrix* Paso_SystemMatrix_reference(Paso_SystemMatrix* in) {  Paso_SystemMatrix* Paso_SystemMatrix_getReference(Paso_SystemMatrix* in) {
168     if (in!=NULL) ++(in->reference_counter);     if (in!=NULL) ++(in->reference_counter);
169     return in;     return in;
170  }  }
# Line 228  dim_t Paso_SystemMatrix_getNumOutput(Pas Line 250  dim_t Paso_SystemMatrix_getNumOutput(Pas
250     return Paso_SystemMatrixPattern_getNumOutput(A->pattern);     return Paso_SystemMatrixPattern_getNumOutput(A->pattern);
251  }  }
252    
253    index_t* Paso_SystemMatrix_borrowMainDiagonalPointer(Paso_SystemMatrix * A_p)
254    {
255        return  Paso_SparseMatrix_borrowMainDiagonalPointer(A_p->mainBlock);
256    }
257    
258    void Paso_SystemMatrix_makeZeroRowSums(Paso_SystemMatrix * A_p, double* left_over)
259    {
260       index_t ir, ib, irow;
261       register double rtmp1, rtmp2;
262       const dim_t n = Paso_SystemMatrixPattern_getNumOutput(A_p->pattern);
263       const dim_t nblk = A_p->block_size;
264       const dim_t blk = A_p->row_block_size;
265       const index_t* main_ptr=Paso_SystemMatrix_borrowMainDiagonalPointer(A_p);
266      
267      
268       Paso_SystemMatrix_rowSum(A_p, left_over); /* left_over now hold the row sum */
269    
270       #pragma omp parallel for private(ir,ib, rtmp1, rtmp2) schedule(static)
271       for (ir=0;ir< n;ir++) {
272           for (ib=0;ib<blk; ib++) {
273             irow=ib+blk*ir;
274             rtmp1=left_over[irow];
275             rtmp2=A_p->mainBlock->val[main_ptr[ir]*nblk+ib+blk*ib];
276             A_p->mainBlock->val[main_ptr[ir]*nblk+ib+blk*ib] = -rtmp1;
277             left_over[irow]=rtmp2+rtmp1;
278           }
279       }
280    }
281    void Paso_SystemMatrix_copyBlockFromMainDiagonal(Paso_SystemMatrix * A_p, double* out)
282    {
283        Paso_SparseMatrix_copyBlockFromMainDiagonal(A_p->mainBlock, out);
284        return;
285    }
286    void Paso_SystemMatrix_copyBlockToMainDiagonal(Paso_SystemMatrix * A_p, const double* in)
287    {
288        Paso_SparseMatrix_copyBlockToMainDiagonal(A_p->mainBlock, in);
289        return;
290    }
291    void Paso_SystemMatrix_copyFromMainDiagonal(Paso_SystemMatrix * A_p, double* out)
292    {
293        Paso_SparseMatrix_copyFromMainDiagonal(A_p->mainBlock, out);
294        return;
295    }
296    void Paso_SystemMatrix_copyToMainDiagonal(Paso_SystemMatrix * A_p, const double* in)
297    {
298        Paso_SparseMatrix_copyToMainDiagonal(A_p->mainBlock, in);
299        return;
300    }

Legend:
Removed from v.2548  
changed lines
  Added in v.3005

  ViewVC Help
Powered by ViewVC 1.1.26