/[escript]/trunk/paso/src/SparseMatrix_MatrixVector.c
ViewVC logotype

Contents of /trunk/paso/src/SparseMatrix_MatrixVector.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1511 - (show annotations)
Mon Apr 14 23:09:38 2008 UTC (11 years, 4 months ago) by gross
File MIME type: text/plain
File size: 13665 byte(s)
some fixes of compiler complains under windows
1
2 /* $Id: SparseMatrix_MatrixVector.c 1306 2007-09-18 05:51:09Z ksteube $ */
3
4 /*******************************************************
5 *
6 * Copyright 2003-2007 by ACceSS MNRF
7 * Copyright 2007 by University of Queensland
8 *
9 * http://esscc.uq.edu.au
10 * Primary Business: Queensland, Australia
11 * Licensed under the Open Software License version 3.0
12 * http://www.opensource.org/licenses/osl-3.0.php
13 *
14 *******************************************************/
15
16 /**************************************************************/
17
18 /* Paso: raw scaled vector update operation: out = alpha * A * in + beta * out */
19
20 /**************************************************************/
21
22 /* Author: gross@access.edu.au */
23
24 /**************************************************************/
25
26 #include "SparseMatrix.h"
27
28 /* CSC format with offset 0*/
29 void Paso_SparseMatrix_MatrixVector_CSC_OFFSET0(double alpha,
30 Paso_SparseMatrix* A,
31 double* in,
32 double beta,
33 double* out) {
34
35 register index_t ir,icol,iptr,icb,irb,irow,ic;
36 register double reg,reg1,reg2,reg3;
37 #pragma omp barrier
38
39 if (ABS(beta)>0.) {
40 if (beta != 1.) {
41 #pragma omp for private(irow) schedule(static)
42 for (irow=0;irow < A->numRows * A->row_block_size;irow++)
43 out[irow] *= beta;
44 }
45 } else {
46 #pragma omp for private(irow) schedule(static)
47 for (irow=0;irow < A->numRows * A->row_block_size;irow++)
48 out[irow] = 0;
49 }
50 if (Paso_Pattern_isEmpty(A->pattern)) return;
51 /* do the operation: */
52 if (ABS(alpha)>0) {
53 if (A ->col_block_size==1 && A->row_block_size ==1) {
54 /* TODO: parallelize (good luck!) */
55 #pragma omp single
56 for (icol=0;icol< A->pattern->numOutput;++icol) {
57 #pragma ivdep
58 for (iptr=A->pattern->ptr[icol];iptr<A->pattern->ptr[icol+1]; ++iptr) {
59 out[A->pattern->index[iptr]]+= alpha * A->val[iptr] * in[icol];
60 }
61 }
62 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
63 /* TODO: parallelize */
64 #pragma omp single
65 for (ic=0;ic< A->pattern->numOutput;ic++) {
66 #pragma ivdep
67 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
68 ir=2*(A->pattern->index[iptr]);
69 out[ ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
70 out[1+ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
71 }
72 }
73 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
74 /* TODO: parallelize */
75 #pragma omp single
76 for (ic=0;ic< A->pattern->numOutput;ic++) {
77 #pragma ivdep
78 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
79 ir=3*(A->pattern->index[iptr]);
80 out[ ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
81 out[1+ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
82 out[2+ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
83 }
84 }
85 } else {
86 /* TODO: parallelize */
87 #pragma omp single
88 for (ic=0;ic< A->pattern->numOutput;ic++) {
89 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
90 for (irb=0;irb< A->row_block_size;irb++) {
91 irow=irb+A->row_block_size*(A->pattern->index[iptr]);
92 #pragma ivdep
93 for (icb=0;icb< A->col_block_size;icb++) {
94 icol=icb+A->col_block_size*ic;
95 out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
96 }
97 }
98 }
99 }
100 }
101 }
102 return;
103 }
104
105 /* CSC format with offset 1*/
106 void Paso_SparseMatrix_MatrixVector_CSC_OFFSET1(double alpha,
107 Paso_SparseMatrix* A,
108 double* in,
109 double beta,
110 double* out) {
111
112 register index_t ir,icol,iptr,icb,irb,irow,ic;
113 register double reg,reg1,reg2,reg3;
114 #pragma omp barrier
115
116 if (ABS(beta)>0.) {
117 if (beta != 1.) {
118 #pragma omp for private(irow) schedule(static)
119 for (irow=0;irow < A->numRows * A->row_block_size;irow++) {
120 out[irow] *= beta;
121 }
122 }
123 } else {
124 #pragma omp for private(irow) schedule(static)
125 for (irow=0;irow < A->numRows * A->row_block_size;irow++)
126 out[irow] = 0;
127 }
128
129 /* do the operation: */
130 if (ABS(alpha)>0) {
131 if (A ->col_block_size==1 && A->row_block_size ==1) {
132 /* TODO: parallelize (good luck!) */
133 #pragma omp single
134 for (icol=0;icol< A->pattern->numOutput;++icol) {
135 #pragma ivdep
136 for (iptr=A->pattern->ptr[icol]-1;iptr<A->pattern->ptr[icol+1]-1; ++iptr) {
137 out[A->pattern->index[iptr]-1]+= alpha * A->val[iptr] * in[icol];
138 }
139 }
140 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
141 /* TODO: parallelize */
142 #pragma omp single
143 for (ic=0;ic< A->pattern->numOutput;ic++) {
144 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
145 ir=2*(A->pattern->index[iptr]-1);
146 out[ ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
147 out[1+ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
148 }
149 }
150 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
151 /* TODO: parallelize */
152 #pragma omp single
153 for (ic=0;ic< A->pattern->numOutput;ic++) {
154 #pragma ivdep
155 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
156 ir=3*(A->pattern->index[iptr]-1);
157 out[ ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
158 out[1+ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
159 out[2+ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
160 }
161 }
162 } else {
163 /* TODO: parallelize */
164 #pragma omp single
165 for (ic=0;ic< A->pattern->numOutput;ic++) {
166 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
167 for (irb=0;irb< A->row_block_size;irb++) {
168 irow=irb+A->row_block_size*(A->pattern->index[iptr]-1);
169 #pragma ivdep
170 for (icb=0;icb< A->col_block_size;icb++) {
171 icol=icb+A->col_block_size*ic;
172 out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
173 }
174 }
175 }
176 }
177 }
178 }
179 return;
180 }
181 /* CSR format with offset 0*/
182 void Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(double alpha,
183 Paso_SparseMatrix* A,
184 double* in,
185 double beta,
186 double* out)
187 {
188 register index_t ir,icol,iptr,icb,irb,irow,ic,Aiptr;
189 register double reg,reg1,reg2,reg3,in1,in2,in3,A00,A10,A20,A01,A11,A21,A02,A12,A22;
190 if (ABS(beta)>0.) {
191 if (beta != 1.) {
192 #pragma omp for private(irow) schedule(static)
193 for (irow=0;irow < A->numRows * A->row_block_size;irow++)
194 out[irow] *= beta;
195 }
196 } else {
197 #pragma omp for private(irow) schedule(static)
198 for (irow=0;irow < A->numRows * A->row_block_size;irow++)
199 out[irow] = 0;
200 }
201 if (ABS(alpha)>0) {
202 if (A ->col_block_size==1 && A->row_block_size ==1) {
203 #pragma omp for private(irow,iptr,reg) schedule(static)
204 for (irow=0;irow< A->pattern->numOutput;++irow) {
205 reg=0.;
206 #pragma ivdep
207 for (iptr=(A->pattern->ptr[irow]);iptr<(A->pattern->ptr[irow+1]); ++iptr) {
208 reg += A->val[iptr] * in[A->pattern->index[iptr]];
209 }
210 out[irow] += alpha * reg;
211 }
212 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
213 #pragma omp for private(ir,reg1,reg2,iptr,ic,Aiptr,in1,in2,A00,A10,A01,A11) schedule(static)
214 for (ir=0;ir< A->pattern->numOutput;ir++) {
215 reg1=0.;
216 reg2=0.;
217 #pragma ivdep
218 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
219 ic=2*(A->pattern->index[iptr]);
220 Aiptr=iptr*4;
221 in1=in[ic];
222 in2=in[1+ic];
223 A00=A->val[Aiptr ];
224 A10=A->val[Aiptr+1];
225 A01=A->val[Aiptr+2];
226 A11=A->val[Aiptr+3];
227 reg1 += A00*in1 + A01*in2;
228 reg2 += A10*in1 + A11*in2;
229 }
230 out[ 2*ir] += alpha * reg1;
231 out[1+2*ir] += alpha * reg2;
232 }
233 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
234 #pragma omp for private(ir,reg1,reg2,reg3,iptr,ic,Aiptr,in1,in2,in3,A00,A10,A20,A01,A11,A21,A02,A12,A22) schedule(static)
235 for (ir=0;ir< A->pattern->numOutput;ir++) {
236 reg1=0.;
237 reg2=0.;
238 reg3=0.;
239 #pragma ivdep
240 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
241 ic=3*(A->pattern->index[iptr]);
242 Aiptr=iptr*9;
243 in1=in[ic];
244 in2=in[1+ic];
245 in3=in[2+ic];
246 A00=A->val[Aiptr ];
247 A10=A->val[Aiptr+1];
248 A20=A->val[Aiptr+2];
249 A01=A->val[Aiptr+3];
250 A11=A->val[Aiptr+4];
251 A21=A->val[Aiptr+5];
252 A02=A->val[Aiptr+6];
253 A12=A->val[Aiptr+7];
254 A22=A->val[Aiptr+8];
255 reg1 += A00*in1 + A01*in2 + A02*in3;
256 reg2 += A10*in1 + A11*in2 + A12*in3;
257 reg3 += A20*in1 + A21*in2 + A22*in3;
258 }
259 out[ 3*ir] += alpha * reg1;
260 out[1+3*ir] += alpha * reg2;
261 out[2+3*ir] += alpha * reg3;
262 }
263 } else {
264 #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
265 for (ir=0;ir< A->pattern->numOutput;ir++) {
266 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
267 for (irb=0;irb< A->row_block_size;irb++) {
268 irow=irb+A->row_block_size*ir;
269 reg=0.;
270 #pragma ivdep
271 for (icb=0;icb< A->col_block_size;icb++) {
272 icol=icb+A->col_block_size*(A->pattern->index[iptr]);
273 reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
274 }
275 out[irow] += alpha * reg;
276 }
277 }
278 }
279 }
280 }
281 return;
282 }
283 /* CSR format with offset 1*/
284 void Paso_SparseMatrix_MatrixVector_CSR_OFFSET1(double alpha,
285 Paso_SparseMatrix* A,
286 double* in,
287 double beta,
288 double* out) {
289
290 register index_t ir,icol,iptr,icb,irb,irow,ic;
291 register double reg,reg1,reg2,reg3;
292 #pragma omp barrier
293
294 if (ABS(beta)>0.) {
295 if (beta != 1.) {
296 #pragma omp for private(irow) schedule(static)
297 for (irow=0;irow < A->numRows * A->row_block_size;irow++)
298 out[irow] *= beta;
299 }
300 } else {
301 #pragma omp for private(irow) schedule(static)
302 for (irow=0;irow < A->numRows * A->row_block_size;irow++)
303 out[irow] = 0;
304 }
305 /* do the operation: */
306 if (ABS(alpha)>0) {
307 if (A ->col_block_size==1 && A->row_block_size ==1) {
308 #pragma omp for private(irow,iptr,reg) schedule(static)
309 for (irow=0;irow< A->pattern->numOutput;++irow) {
310 reg=0.;
311 #pragma ivdep
312 for (iptr=(A->pattern->ptr[irow])-1;iptr<(A->pattern->ptr[irow+1])-1; ++iptr) {
313 reg += A->val[iptr] * in[A->pattern->index[iptr]-1];
314 }
315 out[irow] += alpha * reg;
316 }
317 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
318 #pragma omp for private(ir,reg1,reg2,iptr,ic) schedule(static)
319 for (ir=0;ir< A->pattern->numOutput;ir++) {
320 reg1=0.;
321 reg2=0.;
322 #pragma ivdep
323 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
324 ic=2*(A->pattern->index[iptr]-1);
325 reg1 += A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic];
326 reg2 += A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic];
327 }
328 out[ 2*ir] += alpha * reg1;
329 out[1+2*ir] += alpha * reg2;
330 }
331 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
332 #pragma omp for private(ir,reg1,reg2,reg3,iptr,ic) schedule(static)
333 for (ir=0;ir< A->pattern->numOutput;ir++) {
334 reg1=0.;
335 reg2=0.;
336 reg3=0.;
337 #pragma ivdep
338 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
339 ic=3*(A->pattern->index[iptr]-1);
340 reg1 += A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic];
341 reg2 += A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic];
342 reg3 += A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic];
343 }
344 out[ 3*ir] += alpha * reg1;
345 out[1+3*ir] += alpha * reg2;
346 out[2+3*ir] += alpha * reg3;
347 }
348 } else {
349 #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
350 for (ir=0;ir< A->pattern->numOutput;ir++) {
351 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
352 for (irb=0;irb< A->row_block_size;irb++) {
353 irow=irb+A->row_block_size*ir;
354 reg=0.;
355 #pragma ivdep
356 for (icb=0;icb< A->col_block_size;icb++) {
357 icol=icb+A->col_block_size*(A->pattern->index[iptr]-1);
358 reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
359 }
360 out[irow] += alpha * reg;
361 }
362 }
363 }
364 }
365 }
366 return;
367 }

  ViewVC Help
Powered by ViewVC 1.1.26