/[escript]/trunk/paso/src/SystemMatrix_MatrixVector.c
ViewVC logotype

Annotation of /trunk/paso/src/SystemMatrix_MatrixVector.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 415 - (hide annotations)
Wed Jan 4 05:37:33 2006 UTC (14 years, 6 months ago) by gross
File MIME type: text/plain
File size: 12610 byte(s)
a better way of representing the matrix format type is introduced. this is needed for the Paradiso and UMFPACK interface
1 jgs 150 /* $Id$ */
2    
3     /**************************************************************/
4    
5     /* Paso: matrix vector product with sparse matrix */
6    
7     /**************************************************************/
8    
9     /* Copyrights by ACcESS Australia 2003,2004,2005 */
10     /* Author: gross@access.edu.au */
11    
12     /**************************************************************/
13    
14     #include "Paso.h"
15     #include "SystemMatrix.h"
16    
17     /**************************************************************************/
18    
19     /* raw scaled vector update operation: out = alpha * A * in + beta * out */
20    
21     /* has to be called within a parallel region */
22     /* barrier synconization is performed to make sure that the input vector available */
23    
24     void Paso_SystemMatrix_MatrixVector(double alpha,
25     Paso_SystemMatrix* A,
26     double* in,
27     double beta,
28     double* out) {
29    
30 gross 415 if (A->type & MATRIX_FORMAT_CSC) {
31     if (A->type & MATRIX_FORMAT_OFFSET1) {
32     Paso_SystemMatrix_MatrixVector_CSC_OFFSET1(alpha,A,in,beta,out);
33     } else {
34     Paso_SystemMatrix_MatrixVector_CSC_OFFSET0(alpha,A,in,beta,out);
35     }
36     } else {
37     if (A->type & MATRIX_FORMAT_OFFSET1) {
38     Paso_SystemMatrix_MatrixVector_CSR_OFFSET1(alpha,A,in,beta,out);
39     } else {
40     Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(alpha,A,in,beta,out);
41     }
42     }
43     return;
44     }
45    
46     void Paso_SystemMatrix_MatrixVector_CSC_OFFSET0(double alpha,
47     Paso_SystemMatrix* A,
48     double* in,
49     double beta,
50     double* out) {
51    
52 jgs 150 register index_t ir,icol,iptr,icb,irb,irow,ic;
53     register double reg,reg1,reg2,reg3;
54     #pragma omp barrier
55    
56     if (ABS(beta)>0.) {
57     #pragma omp for private(irow) schedule(static)
58     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
59     out[irow] *= beta;
60     } else {
61     #pragma omp for private(irow) schedule(static)
62     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
63     out[irow] = 0;
64     }
65    
66     /* do the operation: */
67     if (ABS(alpha)>0) {
68     if (A ->col_block_size==1 && A->row_block_size ==1) {
69 gross 415 /* TODO: parallelize (good luck!) */
70     #pragma omp single
71     for (icol=0;icol< A->pattern->n_ptr;++icol) {
72     for (iptr=A->pattern->ptr[icol];iptr<A->pattern->ptr[icol+1]; ++iptr) {
73     out[A->pattern->index[iptr]]+= alpha * A->val[iptr] * in[icol];
74 jgs 150 }
75     }
76 gross 415 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
77     /* TODO: parallelize */
78     #pragma omp single
79     for (ic=0;ic< A->pattern->n_ptr;ic++) {
80     for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
81     ic=2*(A->pattern->index[iptr]);
82     out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
83     out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
84     }
85     }
86     } else if (A ->col_block_size==3 && A->row_block_size ==3) {
87     /* TODO: parallelize */
88     #pragma omp single
89     for (ic=0;ic< A->pattern->n_ptr;ic++) {
90     for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
91     ir=3*(A->pattern->index[iptr]);
92     out[ 3*ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
93     out[1+3*ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
94     out[2+3*ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
95     }
96     }
97     } else {
98     /* TODO: parallelize */
99     #pragma omp single
100     for (ic=0;ic< A->pattern->n_ptr;ic++) {
101     for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
102     for (irb=0;irb< A->row_block_size;irb++) {
103     irow=irb+A->row_block_size*(A->pattern->index[iptr]);
104     for (icb=0;icb< A->col_block_size;icb++) {
105     icol=icb+A->col_block_size*ic;
106     out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
107     }
108     }
109     }
110     }
111     }
112     }
113     return;
114     }
115    
116     void Paso_SystemMatrix_MatrixVector_CSC_OFFSET1(double alpha,
117     Paso_SystemMatrix* A,
118     double* in,
119     double beta,
120     double* out) {
121    
122     register index_t ir,icol,iptr,icb,irb,irow,ic;
123     register double reg,reg1,reg2,reg3;
124     #pragma omp barrier
125    
126     if (ABS(beta)>0.) {
127     #pragma omp for private(irow) schedule(static)
128     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
129     out[irow] *= beta;
130     } else {
131     #pragma omp for private(irow) schedule(static)
132     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
133     out[irow] = 0;
134     }
135    
136     /* do the operation: */
137     if (ABS(alpha)>0) {
138     if (A ->col_block_size==1 && A->row_block_size ==1) {
139 jgs 150 /* TODO: parallelize (good luck!) */
140     #pragma omp single
141     for (icol=0;icol< A->pattern->n_ptr;++icol) {
142 gross 415 for (iptr=A->pattern->ptr[icol]-1;iptr<A->pattern->ptr[icol+1]-1; ++iptr) {
143     out[A->pattern->index[iptr]-1]+= alpha * A->val[iptr] * in[icol];
144 jgs 150 }
145     }
146     } else if (A ->col_block_size==2 && A->row_block_size ==2) {
147 gross 415 /* TODO: parallelize */
148     #pragma omp single
149     for (ic=0;ic< A->pattern->n_ptr;ic++) {
150     for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
151     ic=2*(A->pattern->index[iptr]-1);
152     out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
153     out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
154     }
155     }
156     } else if (A ->col_block_size==3 && A->row_block_size ==3) {
157     /* TODO: parallelize */
158     #pragma omp single
159     for (ic=0;ic< A->pattern->n_ptr;ic++) {
160     for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
161     ir=3*(A->pattern->index[iptr]-1);
162     out[ 3*ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
163     out[1+3*ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
164     out[2+3*ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
165     }
166     }
167     } else {
168     /* TODO: parallelize */
169     #pragma omp single
170     for (ic=0;ic< A->pattern->n_ptr;ic++) {
171     for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
172     for (irb=0;irb< A->row_block_size;irb++) {
173     irow=irb+A->row_block_size*(A->pattern->index[iptr]-1);
174     for (icb=0;icb< A->col_block_size;icb++) {
175     icol=icb+A->col_block_size*ic;
176     out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
177     }
178     }
179     }
180     }
181     }
182     }
183     return;
184     }
185    
186     void Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(double alpha,
187     Paso_SystemMatrix* A,
188     double* in,
189     double beta,
190     double* out) {
191    
192     register index_t ir,icol,iptr,icb,irb,irow,ic;
193     register double reg,reg1,reg2,reg3;
194     #pragma omp barrier
195     if (ABS(beta)>0.) {
196     #pragma omp for private(irow) schedule(static)
197     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
198     out[irow] *= beta;
199     } else {
200     #pragma omp for private(irow) schedule(static)
201     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
202     out[irow] = 0;
203     }
204     /* do the operation: */
205     if (ABS(alpha)>0) {
206     if (A ->col_block_size==1 && A->row_block_size ==1) {
207     #pragma omp for private(irow,iptr,reg) schedule(static)
208     for (irow=0;irow< A->pattern->n_ptr;++irow) {
209     reg=0.;
210     for (iptr=(A->pattern->ptr[irow]);iptr<(A->pattern->ptr[irow+1]); ++iptr) {
211     reg += A->val[iptr] * in[A->pattern->index[iptr]];
212     }
213     out[irow] += alpha * reg;
214     }
215     } else if (A ->col_block_size==2 && A->row_block_size ==2) {
216 jgs 150 #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg1,reg2) schedule(static)
217     for (ir=0;ir< A->pattern->n_ptr;ir++) {
218     reg1=0.;
219     reg2=0.;
220 gross 415 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
221     ic=2*(A->pattern->index[iptr]);
222 jgs 150 reg1 += A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic];
223     reg2 += A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic];
224     }
225     out[ 2*ir] += alpha * reg1;
226     out[1+2*ir] += alpha * reg2;
227     }
228     } else if (A ->col_block_size==3 && A->row_block_size ==3) {
229     #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg1,reg2,reg3) schedule(static)
230     for (ir=0;ir< A->pattern->n_ptr;ir++) {
231     reg1=0.;
232     reg2=0.;
233     reg3=0.;
234 gross 415 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
235     ic=3*(A->pattern->index[iptr]);
236 jgs 150 reg1 += A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic];
237     reg2 += A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic];
238     reg3 += A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic];
239     }
240     out[ 3*ir] += alpha * reg1;
241     out[1+3*ir] += alpha * reg2;
242     out[2+3*ir] += alpha * reg3;
243     }
244     } else {
245     #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
246     for (ir=0;ir< A->pattern->n_ptr;ir++) {
247 gross 415 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
248 jgs 150 for (irb=0;irb< A->row_block_size;irb++) {
249     irow=irb+A->row_block_size*ir;
250     reg=0.;
251     for (icb=0;icb< A->col_block_size;icb++) {
252 gross 415 icol=icb+A->col_block_size*(A->pattern->index[iptr]);
253 jgs 150 reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
254     }
255     out[irow] += alpha * reg;
256     }
257     }
258     }
259 gross 415 }
260     }
261     return;
262     }
263    
264     void Paso_SystemMatrix_MatrixVector_CSR_OFFSET1(double alpha,
265     Paso_SystemMatrix* A,
266     double* in,
267     double beta,
268     double* out) {
269    
270     register index_t ir,icol,iptr,icb,irb,irow,ic;
271     register double reg,reg1,reg2,reg3;
272     #pragma omp barrier
273    
274     if (ABS(beta)>0.) {
275     #pragma omp for private(irow) schedule(static)
276     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
277     out[irow] *= beta;
278     } else {
279     #pragma omp for private(irow) schedule(static)
280     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
281     out[irow] = 0;
282     }
283     /* do the operation: */
284     if (ABS(alpha)>0) {
285     if (A ->col_block_size==1 && A->row_block_size ==1) {
286     #pragma omp for private(irow,iptr,reg) schedule(static)
287     for (irow=0;irow< A->pattern->n_ptr;++irow) {
288     reg=0.;
289     for (iptr=(A->pattern->ptr[irow])-1;iptr<(A->pattern->ptr[irow+1])-1; ++iptr) {
290     reg += A->val[iptr] * in[A->pattern->index[iptr]-1];
291     }
292     out[irow] += alpha * reg;
293     }
294     } else if (A ->col_block_size==2 && A->row_block_size ==2) {
295     #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg1,reg2) schedule(static)
296     for (ir=0;ir< A->pattern->n_ptr;ir++) {
297     reg1=0.;
298     reg2=0.;
299     for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
300     ic=2*(A->pattern->index[iptr]-1);
301     reg1 += A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic];
302     reg2 += A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic];
303     }
304     out[ 2*ir] += alpha * reg1;
305     out[1+2*ir] += alpha * reg2;
306     }
307     } else if (A ->col_block_size==3 && A->row_block_size ==3) {
308     #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg1,reg2,reg3) schedule(static)
309     for (ir=0;ir< A->pattern->n_ptr;ir++) {
310     reg1=0.;
311     reg2=0.;
312     reg3=0.;
313     for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
314     ic=3*(A->pattern->index[iptr]-1);
315     reg1 += A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic];
316     reg2 += A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic];
317     reg3 += A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic];
318     }
319     out[ 3*ir] += alpha * reg1;
320     out[1+3*ir] += alpha * reg2;
321     out[2+3*ir] += alpha * reg3;
322     }
323     } else {
324     #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
325     for (ir=0;ir< A->pattern->n_ptr;ir++) {
326     for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
327 jgs 150 for (irb=0;irb< A->row_block_size;irb++) {
328 gross 415 irow=irb+A->row_block_size*ir;
329     reg=0.;
330 jgs 150 for (icb=0;icb< A->col_block_size;icb++) {
331 gross 415 icol=icb+A->col_block_size*(A->pattern->index[iptr]-1);
332     reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
333 jgs 150 }
334 gross 415 out[irow] += alpha * reg;
335 jgs 150 }
336     }
337     }
338     }
339     }
340     return;
341     }
342     /*
343     * $Log$
344     * Revision 1.2 2005/09/15 03:44:39 jgs
345     * Merge of development branch dev-02 back to main trunk on 2005-09-15
346     *
347     * Revision 1.1.2.1 2005/09/05 06:29:47 gross
348     * These files have been extracted from finley to define a stand alone libray for iterative
349     * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
350     * has not been tested yet.
351     *
352     *
353     */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26