/[escript]/trunk/paso/src/SparseMatrix_MatrixVector.c
ViewVC logotype

Annotation of /trunk/paso/src/SparseMatrix_MatrixVector.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1384 - (hide annotations)
Fri Jan 11 02:29:38 2008 UTC (12 years, 9 months ago) by phornby
Original Path: temp_trunk_copy/paso/src/SparseMatrix_MatrixVector.c
File MIME type: text/plain
File size: 13685 byte(s)
Make a temp copy of the trunk before checking in the windows changes


1 ksteube 1315
2     /* $Id: SparseMatrix_MatrixVector.c 1306 2007-09-18 05:51:09Z ksteube $ */
3    
4     /*******************************************************
5     *
6     * Copyright 2003-2007 by ACceSS MNRF
7     * Copyright 2007 by University of Queensland
8     *
9     * http://esscc.uq.edu.au
10     * Primary Business: Queensland, Australia
11     * Licensed under the Open Software License version 3.0
12     * http://www.opensource.org/licenses/osl-3.0.php
13     *
14     *******************************************************/
15    
16     /**************************************************************/
17    
18     /* Paso: raw scaled vector update operation: out = alpha * A * in + beta * out */
19    
20     /**************************************************************/
21    
22     /* Author: gross@access.edu.au */
23    
24     /**************************************************************/
25    
26     #include "SparseMatrix.h"
27    
28     /* CSC format with offset 0*/
29     void Paso_SparseMatrix_MatrixVector_CSC_OFFSET0(double alpha,
30     Paso_SparseMatrix* A,
31     double* in,
32     double beta,
33     double* out) {
34    
35     register index_t ir,icol,iptr,icb,irb,irow,ic;
36     register double reg,reg1,reg2,reg3;
37     #pragma omp barrier
38    
39     if (ABS(beta)>0.) {
40     if (beta != 1.) {
41     #pragma omp for private(irow) schedule(static)
42     for (irow=0;irow < A->numRows * A->row_block_size;irow++)
43     out[irow] *= beta;
44     }
45     } else {
46     #pragma omp for private(irow) schedule(static)
47     for (irow=0;irow < A->numRows * A->row_block_size;irow++)
48     out[irow] = 0;
49     }
50     if (Paso_Pattern_isEmpty(A->pattern)) return;
51     /* do the operation: */
52     if (ABS(alpha)>0) {
53     if (A ->col_block_size==1 && A->row_block_size ==1) {
54     /* TODO: parallelize (good luck!) */
55     #pragma omp single
56     for (icol=0;icol< A->pattern->numOutput;++icol) {
57     #pragma ivdep
58     for (iptr=A->pattern->ptr[icol];iptr<A->pattern->ptr[icol+1]; ++iptr) {
59     out[A->pattern->index[iptr]]+= alpha * A->val[iptr] * in[icol];
60     }
61     }
62     } else if (A ->col_block_size==2 && A->row_block_size ==2) {
63     /* TODO: parallelize */
64     #pragma omp single
65     for (ic=0;ic< A->pattern->numOutput;ic++) {
66     #pragma ivdep
67     for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
68     ic=2*(A->pattern->index[iptr]);
69     out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
70     out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
71     }
72     }
73     } else if (A ->col_block_size==3 && A->row_block_size ==3) {
74     /* TODO: parallelize */
75     #pragma omp single
76     for (ic=0;ic< A->pattern->numOutput;ic++) {
77     #pragma ivdep
78     for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
79     ir=3*(A->pattern->index[iptr]);
80     out[ 3*ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
81     out[1+3*ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
82     out[2+3*ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
83     }
84     }
85     } else {
86     /* TODO: parallelize */
87     #pragma omp single
88     for (ic=0;ic< A->pattern->numOutput;ic++) {
89     for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
90     for (irb=0;irb< A->row_block_size;irb++) {
91     irow=irb+A->row_block_size*(A->pattern->index[iptr]);
92     #pragma ivdep
93     for (icb=0;icb< A->col_block_size;icb++) {
94     icol=icb+A->col_block_size*ic;
95     out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
96     }
97     }
98     }
99     }
100     }
101     }
102     return;
103     }
104    
105     /* CSC format with offset 1*/
106     void Paso_SparseMatrix_MatrixVector_CSC_OFFSET1(double alpha,
107     Paso_SparseMatrix* A,
108     double* in,
109     double beta,
110     double* out) {
111    
112     register index_t ir,icol,iptr,icb,irb,irow,ic;
113     register double reg,reg1,reg2,reg3;
114     #pragma omp barrier
115    
116     if (ABS(beta)>0.) {
117     if (beta != 1.) {
118     #pragma omp for private(irow) schedule(static)
119     for (irow=0;irow < A->numRows * A->row_block_size;irow++) {
120     out[irow] *= beta;
121     }
122     }
123     } else {
124     #pragma omp for private(irow) schedule(static)
125     for (irow=0;irow < A->numRows * A->row_block_size;irow++)
126     out[irow] = 0;
127     }
128    
129     /* do the operation: */
130     if (ABS(alpha)>0) {
131     if (A ->col_block_size==1 && A->row_block_size ==1) {
132     /* TODO: parallelize (good luck!) */
133     #pragma omp single
134     for (icol=0;icol< A->pattern->numOutput;++icol) {
135     #pragma ivdep
136     for (iptr=A->pattern->ptr[icol]-1;iptr<A->pattern->ptr[icol+1]-1; ++iptr) {
137     out[A->pattern->index[iptr]-1]+= alpha * A->val[iptr] * in[icol];
138     }
139     }
140     } else if (A ->col_block_size==2 && A->row_block_size ==2) {
141     /* TODO: parallelize */
142     #pragma omp single
143     for (ic=0;ic< A->pattern->numOutput;ic++) {
144     for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
145     ic=2*(A->pattern->index[iptr]-1);
146     out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
147     out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
148     }
149     }
150     } else if (A ->col_block_size==3 && A->row_block_size ==3) {
151     /* TODO: parallelize */
152     #pragma omp single
153     for (ic=0;ic< A->pattern->numOutput;ic++) {
154     #pragma ivdep
155     for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
156     ir=3*(A->pattern->index[iptr]-1);
157     out[ 3*ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
158     out[1+3*ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
159     out[2+3*ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
160     }
161     }
162     } else {
163     /* TODO: parallelize */
164     #pragma omp single
165     for (ic=0;ic< A->pattern->numOutput;ic++) {
166     for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
167     for (irb=0;irb< A->row_block_size;irb++) {
168     irow=irb+A->row_block_size*(A->pattern->index[iptr]-1);
169     #pragma ivdep
170     for (icb=0;icb< A->col_block_size;icb++) {
171     icol=icb+A->col_block_size*ic;
172     out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
173     }
174     }
175     }
176     }
177     }
178     }
179     return;
180     }
181     /* CSR format with offset 0*/
182     void Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(double alpha,
183     Paso_SparseMatrix* A,
184     double* in,
185     double beta,
186     double* out)
187     {
188     register index_t ir,icol,iptr,icb,irb,irow,ic,Aiptr;
189     register double reg,reg1,reg2,reg3,in1,in2,in3,A00,A10,A20,A01,A11,A21,A02,A12,A22;
190     if (ABS(beta)>0.) {
191     if (beta != 1.) {
192     #pragma omp for private(irow) schedule(static)
193     for (irow=0;irow < A->numRows * A->row_block_size;irow++)
194     out[irow] *= beta;
195     }
196     } else {
197     #pragma omp for private(irow) schedule(static)
198     for (irow=0;irow < A->numRows * A->row_block_size;irow++)
199     out[irow] = 0;
200     }
201     if (ABS(alpha)>0) {
202     if (A ->col_block_size==1 && A->row_block_size ==1) {
203     #pragma omp for private(irow,iptr,reg) schedule(static)
204     for (irow=0;irow< A->pattern->numOutput;++irow) {
205     reg=0.;
206     #pragma ivdep
207     for (iptr=(A->pattern->ptr[irow]);iptr<(A->pattern->ptr[irow+1]); ++iptr) {
208     reg += A->val[iptr] * in[A->pattern->index[iptr]];
209     }
210     out[irow] += alpha * reg;
211     }
212     } else if (A ->col_block_size==2 && A->row_block_size ==2) {
213     #pragma omp for private(ir,reg1,reg2,iptr,ic,Aiptr,in1,in2,A00,A10,A01,A11) schedule(static)
214     for (ir=0;ir< A->pattern->numOutput;ir++) {
215     reg1=0.;
216     reg2=0.;
217     #pragma ivdep
218     for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
219     ic=2*(A->pattern->index[iptr]);
220     Aiptr=iptr*4;
221     in1=in[ic];
222     in2=in[1+ic];
223     A00=A->val[Aiptr ];
224     A10=A->val[Aiptr+1];
225     A01=A->val[Aiptr+2];
226     A11=A->val[Aiptr+3];
227     reg1 += A00*in1 + A01*in2;
228     reg2 += A10*in1 + A11*in2;
229     }
230     out[ 2*ir] += alpha * reg1;
231     out[1+2*ir] += alpha * reg2;
232     }
233     } else if (A ->col_block_size==3 && A->row_block_size ==3) {
234     #pragma omp for private(ir,reg1,reg2,reg3,iptr,ic,Aiptr,in1,in2,in3,A00,A10,A20,A01,A11,A21,A02,A12,A22) schedule(static)
235     for (ir=0;ir< A->pattern->numOutput;ir++) {
236     reg1=0.;
237     reg2=0.;
238     reg3=0.;
239     #pragma ivdep
240     for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
241     ic=3*(A->pattern->index[iptr]);
242     Aiptr=iptr*9;
243     in1=in[ic];
244     in2=in[1+ic];
245     in3=in[2+ic];
246     A00=A->val[Aiptr ];
247     A10=A->val[Aiptr+1];
248     A20=A->val[Aiptr+2];
249     A01=A->val[Aiptr+3];
250     A11=A->val[Aiptr+4];
251     A21=A->val[Aiptr+5];
252     A02=A->val[Aiptr+6];
253     A12=A->val[Aiptr+7];
254     A22=A->val[Aiptr+8];
255     reg1 += A00*in1 + A01*in2 + A02*in3;
256     reg2 += A10*in1 + A11*in2 + A12*in3;
257     reg3 += A20*in1 + A21*in2 + A22*in3;
258     }
259     out[ 3*ir] += alpha * reg1;
260     out[1+3*ir] += alpha * reg2;
261     out[2+3*ir] += alpha * reg3;
262     }
263     } else {
264     #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
265     for (ir=0;ir< A->pattern->numOutput;ir++) {
266     for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
267     for (irb=0;irb< A->row_block_size;irb++) {
268     irow=irb+A->row_block_size*ir;
269     reg=0.;
270     #pragma ivdep
271     for (icb=0;icb< A->col_block_size;icb++) {
272     icol=icb+A->col_block_size*(A->pattern->index[iptr]);
273     reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
274     }
275     out[irow] += alpha * reg;
276     }
277     }
278     }
279     }
280     }
281     return;
282     }
283     /* CSR format with offset 1*/
284     void Paso_SparseMatrix_MatrixVector_CSR_OFFSET1(double alpha,
285     Paso_SparseMatrix* A,
286     double* in,
287     double beta,
288     double* out) {
289    
290     register index_t ir,icol,iptr,icb,irb,irow,ic;
291     register double reg,reg1,reg2,reg3;
292     #pragma omp barrier
293    
294     if (ABS(beta)>0.) {
295     if (beta != 1.) {
296     #pragma omp for private(irow) schedule(static)
297     for (irow=0;irow < A->numRows * A->row_block_size;irow++)
298     out[irow] *= beta;
299     }
300     } else {
301     #pragma omp for private(irow) schedule(static)
302     for (irow=0;irow < A->numRows * A->row_block_size;irow++)
303     out[irow] = 0;
304     }
305     /* do the operation: */
306     if (ABS(alpha)>0) {
307     if (A ->col_block_size==1 && A->row_block_size ==1) {
308     #pragma omp for private(irow,iptr,reg) schedule(static)
309     for (irow=0;irow< A->pattern->numOutput;++irow) {
310     reg=0.;
311     #pragma ivdep
312     for (iptr=(A->pattern->ptr[irow])-1;iptr<(A->pattern->ptr[irow+1])-1; ++iptr) {
313     reg += A->val[iptr] * in[A->pattern->index[iptr]-1];
314     }
315     out[irow] += alpha * reg;
316     }
317     } else if (A ->col_block_size==2 && A->row_block_size ==2) {
318     #pragma omp for private(ir,reg1,reg2,iptr,ic) schedule(static)
319     for (ir=0;ir< A->pattern->numOutput;ir++) {
320     reg1=0.;
321     reg2=0.;
322     #pragma ivdep
323     for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
324     ic=2*(A->pattern->index[iptr]-1);
325     reg1 += A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic];
326     reg2 += A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic];
327     }
328     out[ 2*ir] += alpha * reg1;
329     out[1+2*ir] += alpha * reg2;
330     }
331     } else if (A ->col_block_size==3 && A->row_block_size ==3) {
332     #pragma omp for private(ir,reg1,reg2,reg3,iptr,ic) schedule(static)
333     for (ir=0;ir< A->pattern->numOutput;ir++) {
334     reg1=0.;
335     reg2=0.;
336     reg3=0.;
337     #pragma ivdep
338     for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
339     ic=3*(A->pattern->index[iptr]-1);
340     reg1 += A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic];
341     reg2 += A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic];
342     reg3 += A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic];
343     }
344     out[ 3*ir] += alpha * reg1;
345     out[1+3*ir] += alpha * reg2;
346     out[2+3*ir] += alpha * reg3;
347     }
348     } else {
349     #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
350     for (ir=0;ir< A->pattern->numOutput;ir++) {
351     for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
352     for (irb=0;irb< A->row_block_size;irb++) {
353     irow=irb+A->row_block_size*ir;
354     reg=0.;
355     #pragma ivdep
356     for (icb=0;icb< A->col_block_size;icb++) {
357     icol=icb+A->col_block_size*(A->pattern->index[iptr]-1);
358     reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
359     }
360     out[irow] += alpha * reg;
361     }
362     }
363     }
364     }
365     }
366     return;
367     }

  ViewVC Help
Powered by ViewVC 1.1.26