/[escript]/trunk/paso/src/SystemMatrix_MatrixVector.c
ViewVC logotype

Annotation of /trunk/paso/src/SystemMatrix_MatrixVector.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 631 - (hide annotations)
Thu Mar 23 04:27:32 2006 UTC (13 years, 11 months ago) by dhawcroft
File MIME type: text/plain
File size: 13777 byte(s)
Prepended all paso source files with new Copyright notice
1 jgs 150 /* $Id$ */
2    
3 dhawcroft 631
4     /*
5     ********************************************************************************
6     * Copyright © 2006 by ACcESS MNRF *
7     * *
8     * http://www.access.edu.au *
9     * Primary Business: Queensland, Australia *
10     * Licensed under the Open Software License version 3.0 *
11     * http://www.opensource.org/licenses/osl-3.0.php *
12     ********************************************************************************
13     */
14    
15 jgs 150 /**************************************************************/
16    
17     /* Paso: matrix vector product with sparse matrix */
18    
19     /**************************************************************/
20    
21     /* Copyrights by ACcESS Australia 2003,2004,2005 */
22     /* Author: gross@access.edu.au */
23    
24     /**************************************************************/
25    
26     #include "Paso.h"
27     #include "SystemMatrix.h"
28    
29     /**************************************************************************/
30    
31     /* raw scaled vector update operation: out = alpha * A * in + beta * out */
32    
33     /* has to be called within a parallel region */
34     /* barrier synconization is performed to make sure that the input vector available */
35    
36     void Paso_SystemMatrix_MatrixVector(double alpha,
37     Paso_SystemMatrix* A,
38     double* in,
39     double beta,
40     double* out) {
41    
42 gross 415 if (A->type & MATRIX_FORMAT_CSC) {
43     if (A->type & MATRIX_FORMAT_OFFSET1) {
44     Paso_SystemMatrix_MatrixVector_CSC_OFFSET1(alpha,A,in,beta,out);
45     } else {
46     Paso_SystemMatrix_MatrixVector_CSC_OFFSET0(alpha,A,in,beta,out);
47     }
48     } else {
49     if (A->type & MATRIX_FORMAT_OFFSET1) {
50     Paso_SystemMatrix_MatrixVector_CSR_OFFSET1(alpha,A,in,beta,out);
51     } else {
52     Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(alpha,A,in,beta,out);
53     }
54     }
55     return;
56     }
57    
58     void Paso_SystemMatrix_MatrixVector_CSC_OFFSET0(double alpha,
59     Paso_SystemMatrix* A,
60     double* in,
61     double beta,
62     double* out) {
63    
64 jgs 150 register index_t ir,icol,iptr,icb,irb,irow,ic;
65     register double reg,reg1,reg2,reg3;
66     #pragma omp barrier
67    
68     if (ABS(beta)>0.) {
69     #pragma omp for private(irow) schedule(static)
70     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
71     out[irow] *= beta;
72     } else {
73     #pragma omp for private(irow) schedule(static)
74     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
75     out[irow] = 0;
76     }
77    
78     /* do the operation: */
79     if (ABS(alpha)>0) {
80     if (A ->col_block_size==1 && A->row_block_size ==1) {
81 gross 415 /* TODO: parallelize (good luck!) */
82     #pragma omp single
83     for (icol=0;icol< A->pattern->n_ptr;++icol) {
84     for (iptr=A->pattern->ptr[icol];iptr<A->pattern->ptr[icol+1]; ++iptr) {
85     out[A->pattern->index[iptr]]+= alpha * A->val[iptr] * in[icol];
86 jgs 150 }
87     }
88 gross 415 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
89     /* TODO: parallelize */
90     #pragma omp single
91     for (ic=0;ic< A->pattern->n_ptr;ic++) {
92     for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
93     ic=2*(A->pattern->index[iptr]);
94     out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
95     out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
96     }
97     }
98     } else if (A ->col_block_size==3 && A->row_block_size ==3) {
99     /* TODO: parallelize */
100     #pragma omp single
101     for (ic=0;ic< A->pattern->n_ptr;ic++) {
102     for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
103     ir=3*(A->pattern->index[iptr]);
104     out[ 3*ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
105     out[1+3*ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
106     out[2+3*ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
107     }
108     }
109     } else {
110     /* TODO: parallelize */
111     #pragma omp single
112     for (ic=0;ic< A->pattern->n_ptr;ic++) {
113     for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
114     for (irb=0;irb< A->row_block_size;irb++) {
115     irow=irb+A->row_block_size*(A->pattern->index[iptr]);
116     for (icb=0;icb< A->col_block_size;icb++) {
117     icol=icb+A->col_block_size*ic;
118     out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
119     }
120     }
121     }
122     }
123     }
124     }
125     return;
126     }
127    
128     void Paso_SystemMatrix_MatrixVector_CSC_OFFSET1(double alpha,
129     Paso_SystemMatrix* A,
130     double* in,
131     double beta,
132     double* out) {
133    
134     register index_t ir,icol,iptr,icb,irb,irow,ic;
135     register double reg,reg1,reg2,reg3;
136     #pragma omp barrier
137    
138     if (ABS(beta)>0.) {
139     #pragma omp for private(irow) schedule(static)
140     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
141     out[irow] *= beta;
142     } else {
143     #pragma omp for private(irow) schedule(static)
144     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
145     out[irow] = 0;
146     }
147    
148     /* do the operation: */
149     if (ABS(alpha)>0) {
150     if (A ->col_block_size==1 && A->row_block_size ==1) {
151 jgs 150 /* TODO: parallelize (good luck!) */
152     #pragma omp single
153     for (icol=0;icol< A->pattern->n_ptr;++icol) {
154 gross 415 for (iptr=A->pattern->ptr[icol]-1;iptr<A->pattern->ptr[icol+1]-1; ++iptr) {
155     out[A->pattern->index[iptr]-1]+= alpha * A->val[iptr] * in[icol];
156 jgs 150 }
157     }
158     } else if (A ->col_block_size==2 && A->row_block_size ==2) {
159 gross 415 /* TODO: parallelize */
160     #pragma omp single
161     for (ic=0;ic< A->pattern->n_ptr;ic++) {
162     for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
163     ic=2*(A->pattern->index[iptr]-1);
164     out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
165     out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
166     }
167     }
168     } else if (A ->col_block_size==3 && A->row_block_size ==3) {
169     /* TODO: parallelize */
170     #pragma omp single
171     for (ic=0;ic< A->pattern->n_ptr;ic++) {
172     for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
173     ir=3*(A->pattern->index[iptr]-1);
174     out[ 3*ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
175     out[1+3*ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
176     out[2+3*ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
177     }
178     }
179     } else {
180     /* TODO: parallelize */
181     #pragma omp single
182     for (ic=0;ic< A->pattern->n_ptr;ic++) {
183     for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
184     for (irb=0;irb< A->row_block_size;irb++) {
185     irow=irb+A->row_block_size*(A->pattern->index[iptr]-1);
186     for (icb=0;icb< A->col_block_size;icb++) {
187     icol=icb+A->col_block_size*ic;
188     out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
189     }
190     }
191     }
192     }
193     }
194     }
195     return;
196     }
197    
198     void Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(double alpha,
199     Paso_SystemMatrix* A,
200     double* in,
201     double beta,
202     double* out) {
203    
204 gross 495 register index_t ir,icol,iptr,icb,irb,irow,ic,Aiptr;
205     register double reg,reg1,reg2,reg3,in1,in2,in3,A00,A10,A20,A01,A11,A21,A02,A12,A22;
206 gross 415 #pragma omp barrier
207     if (ABS(beta)>0.) {
208     #pragma omp for private(irow) schedule(static)
209     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
210     out[irow] *= beta;
211     } else {
212     #pragma omp for private(irow) schedule(static)
213     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
214     out[irow] = 0;
215     }
216     /* do the operation: */
217     if (ABS(alpha)>0) {
218     if (A ->col_block_size==1 && A->row_block_size ==1) {
219     #pragma omp for private(irow,iptr,reg) schedule(static)
220     for (irow=0;irow< A->pattern->n_ptr;++irow) {
221     reg=0.;
222     for (iptr=(A->pattern->ptr[irow]);iptr<(A->pattern->ptr[irow+1]); ++iptr) {
223     reg += A->val[iptr] * in[A->pattern->index[iptr]];
224     }
225     out[irow] += alpha * reg;
226     }
227     } else if (A ->col_block_size==2 && A->row_block_size ==2) {
228 gross 512 #pragma omp for private(ir,reg1,reg2,iptr,ic,Aiptr,in1,in2,A00,A10,A01,A11) schedule(static)
229 jgs 150 for (ir=0;ir< A->pattern->n_ptr;ir++) {
230     reg1=0.;
231     reg2=0.;
232 gross 415 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
233     ic=2*(A->pattern->index[iptr]);
234 gross 495 Aiptr=iptr*4;
235     in1=in[ic];
236     in2=in[1+ic];
237     A00=A->val[Aiptr ];
238     A10=A->val[Aiptr+1];
239     A01=A->val[Aiptr+2];
240     A11=A->val[Aiptr+3];
241     reg1 += A00*in1 + A01*in2;
242     reg2 += A10*in1 + A11*in2;
243 jgs 150 }
244     out[ 2*ir] += alpha * reg1;
245     out[1+2*ir] += alpha * reg2;
246     }
247     } else if (A ->col_block_size==3 && A->row_block_size ==3) {
248 gross 512 #pragma omp for private(ir,reg1,reg2,reg3,iptr,ic,Aiptr,in1,in2,in3,A00,A10,A20,A01,A11,A21,A02,A12,A22) schedule(static)
249 jgs 150 for (ir=0;ir< A->pattern->n_ptr;ir++) {
250     reg1=0.;
251     reg2=0.;
252     reg3=0.;
253 gross 415 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
254     ic=3*(A->pattern->index[iptr]);
255 gross 495 Aiptr=iptr*9;
256     in1=in[ic];
257     in2=in[1+ic];
258     in3=in[2+ic];
259     A00=A->val[Aiptr ];
260     A10=A->val[Aiptr+1];
261     A20=A->val[Aiptr+2];
262     A01=A->val[Aiptr+3];
263     A11=A->val[Aiptr+4];
264     A21=A->val[Aiptr+5];
265     A02=A->val[Aiptr+6];
266     A12=A->val[Aiptr+7];
267     A22=A->val[Aiptr+8];
268     reg1 += A00*in1 + A01*in2 + A02*in3;
269     reg2 += A10*in1 + A11*in2 + A12*in3;
270     reg3 += A20*in1 + A21*in2 + A22*in3;
271 jgs 150 }
272     out[ 3*ir] += alpha * reg1;
273     out[1+3*ir] += alpha * reg2;
274     out[2+3*ir] += alpha * reg3;
275     }
276     } else {
277     #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
278     for (ir=0;ir< A->pattern->n_ptr;ir++) {
279 gross 415 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
280 jgs 150 for (irb=0;irb< A->row_block_size;irb++) {
281     irow=irb+A->row_block_size*ir;
282     reg=0.;
283     for (icb=0;icb< A->col_block_size;icb++) {
284 gross 415 icol=icb+A->col_block_size*(A->pattern->index[iptr]);
285 jgs 150 reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
286     }
287     out[irow] += alpha * reg;
288     }
289     }
290     }
291 gross 415 }
292     }
293     return;
294     }
295    
296     void Paso_SystemMatrix_MatrixVector_CSR_OFFSET1(double alpha,
297     Paso_SystemMatrix* A,
298     double* in,
299     double beta,
300     double* out) {
301    
302     register index_t ir,icol,iptr,icb,irb,irow,ic;
303     register double reg,reg1,reg2,reg3;
304     #pragma omp barrier
305    
306     if (ABS(beta)>0.) {
307     #pragma omp for private(irow) schedule(static)
308     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
309     out[irow] *= beta;
310     } else {
311     #pragma omp for private(irow) schedule(static)
312     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
313     out[irow] = 0;
314     }
315     /* do the operation: */
316     if (ABS(alpha)>0) {
317     if (A ->col_block_size==1 && A->row_block_size ==1) {
318     #pragma omp for private(irow,iptr,reg) schedule(static)
319     for (irow=0;irow< A->pattern->n_ptr;++irow) {
320     reg=0.;
321     for (iptr=(A->pattern->ptr[irow])-1;iptr<(A->pattern->ptr[irow+1])-1; ++iptr) {
322     reg += A->val[iptr] * in[A->pattern->index[iptr]-1];
323     }
324     out[irow] += alpha * reg;
325     }
326     } else if (A ->col_block_size==2 && A->row_block_size ==2) {
327 gross 512 #pragma omp for private(ir,reg1,reg2,iptr,ic) schedule(static)
328 gross 415 for (ir=0;ir< A->pattern->n_ptr;ir++) {
329     reg1=0.;
330     reg2=0.;
331     for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
332     ic=2*(A->pattern->index[iptr]-1);
333     reg1 += A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic];
334     reg2 += A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic];
335     }
336     out[ 2*ir] += alpha * reg1;
337     out[1+2*ir] += alpha * reg2;
338     }
339     } else if (A ->col_block_size==3 && A->row_block_size ==3) {
340 gross 512 #pragma omp for private(ir,reg1,reg2,reg3,iptr,ic) schedule(static)
341 gross 415 for (ir=0;ir< A->pattern->n_ptr;ir++) {
342     reg1=0.;
343     reg2=0.;
344     reg3=0.;
345     for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
346     ic=3*(A->pattern->index[iptr]-1);
347     reg1 += A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic];
348     reg2 += A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic];
349     reg3 += A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic];
350     }
351     out[ 3*ir] += alpha * reg1;
352     out[1+3*ir] += alpha * reg2;
353     out[2+3*ir] += alpha * reg3;
354     }
355     } else {
356     #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
357     for (ir=0;ir< A->pattern->n_ptr;ir++) {
358     for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
359 jgs 150 for (irb=0;irb< A->row_block_size;irb++) {
360 gross 415 irow=irb+A->row_block_size*ir;
361     reg=0.;
362 jgs 150 for (icb=0;icb< A->col_block_size;icb++) {
363 gross 415 icol=icb+A->col_block_size*(A->pattern->index[iptr]-1);
364     reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
365 jgs 150 }
366 gross 415 out[irow] += alpha * reg;
367 jgs 150 }
368     }
369     }
370     }
371     }
372     return;
373     }
374     /*
375     * $Log$
376     * Revision 1.2 2005/09/15 03:44:39 jgs
377     * Merge of development branch dev-02 back to main trunk on 2005-09-15
378     *
379     * Revision 1.1.2.1 2005/09/05 06:29:47 gross
380     * These files have been extracted from finley to define a stand alone libray for iterative
381     * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
382     * has not been tested yet.
383     *
384     *
385     */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26