/[escript]/branches/windows_from_1456_trunk_1490_merged_in/paso/src/SparseMatrix_MatrixVector.c
ViewVC logotype

Contents of /branches/windows_from_1456_trunk_1490_merged_in/paso/src/SparseMatrix_MatrixVector.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1520 - (show annotations)
Tue Apr 22 05:32:08 2008 UTC (14 years, 2 months ago) by phornby
File MIME type: text/plain
File size: 13589 byte(s)
Catch up with Lutz' mods  of files I've modded before merging in recent trunk changes.
1
2 /* $Id: SparseMatrix_MatrixVector.c 1306 2007-09-18 05:51:09Z ksteube $ */
3
4 /*******************************************************
5 *
6 * Copyright 2003-2007 by ACceSS MNRF
7 * Copyright 2007 by University of Queensland
8 *
9 * http://esscc.uq.edu.au
10 * Primary Business: Queensland, Australia
11 * Licensed under the Open Software License version 3.0
12 * http://www.opensource.org/licenses/osl-3.0.php
13 *
14 *******************************************************/
15
16 /**************************************************************/
17
18 /* Paso: raw scaled vector update operation: out = alpha * A * in + beta * out */
19
20 /**************************************************************/
21
22 /* Author: gross@access.edu.au */
23
24 /**************************************************************/
25
26 #include "SparseMatrix.h"
27
28 /* CSC format with offset 0*/
29 void Paso_SparseMatrix_MatrixVector_CSC_OFFSET0(double alpha,
30 Paso_SparseMatrix* A,
31 double* in,
32 double beta,
33 double* out) {
34
35 register index_t ir,icol,iptr,icb,irb,irow,ic;
36 #pragma omp barrier
37
38 if (ABS(beta)>0.) {
39 if (beta != 1.) {
40 #pragma omp for private(irow) schedule(static)
41 for (irow=0;irow < A->numRows * A->row_block_size;irow++)
42 out[irow] *= beta;
43 }
44 } else {
45 #pragma omp for private(irow) schedule(static)
46 for (irow=0;irow < A->numRows * A->row_block_size;irow++)
47 out[irow] = 0;
48 }
49 if (Paso_Pattern_isEmpty(A->pattern)) return;
50 /* do the operation: */
51 if (ABS(alpha)>0) {
52 if (A ->col_block_size==1 && A->row_block_size ==1) {
53 /* TODO: parallelize (good luck!) */
54 #pragma omp single
55 for (icol=0;icol< A->pattern->numOutput;++icol) {
56 #pragma ivdep
57 for (iptr=A->pattern->ptr[icol];iptr<A->pattern->ptr[icol+1]; ++iptr) {
58 out[A->pattern->index[iptr]]+= alpha * A->val[iptr] * in[icol];
59 }
60 }
61 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
62 /* TODO: parallelize */
63 #pragma omp single
64 for (ic=0;ic< A->pattern->numOutput;ic++) {
65 #pragma ivdep
66 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
67 ir=2*(A->pattern->index[iptr]);
68 out[ ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
69 out[1+ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
70 }
71 }
72 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
73 /* TODO: parallelize */
74 #pragma omp single
75 for (ic=0;ic< A->pattern->numOutput;ic++) {
76 #pragma ivdep
77 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
78 ir=3*(A->pattern->index[iptr]);
79 out[ ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
80 out[1+ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
81 out[2+ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
82 }
83 }
84 } else {
85 /* TODO: parallelize */
86 #pragma omp single
87 for (ic=0;ic< A->pattern->numOutput;ic++) {
88 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
89 for (irb=0;irb< A->row_block_size;irb++) {
90 irow=irb+A->row_block_size*(A->pattern->index[iptr]);
91 #pragma ivdep
92 for (icb=0;icb< A->col_block_size;icb++) {
93 icol=icb+A->col_block_size*ic;
94 out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
95 }
96 }
97 }
98 }
99 }
100 }
101 return;
102 }
103
104 /* CSC format with offset 1*/
105 void Paso_SparseMatrix_MatrixVector_CSC_OFFSET1(double alpha,
106 Paso_SparseMatrix* A,
107 double* in,
108 double beta,
109 double* out) {
110
111 register index_t ir,icol,iptr,icb,irb,irow,ic;
112 #pragma omp barrier
113
114 if (ABS(beta)>0.) {
115 if (beta != 1.) {
116 #pragma omp for private(irow) schedule(static)
117 for (irow=0;irow < A->numRows * A->row_block_size;irow++) {
118 out[irow] *= beta;
119 }
120 }
121 } else {
122 #pragma omp for private(irow) schedule(static)
123 for (irow=0;irow < A->numRows * A->row_block_size;irow++)
124 out[irow] = 0;
125 }
126
127 /* do the operation: */
128 if (ABS(alpha)>0) {
129 if (A ->col_block_size==1 && A->row_block_size ==1) {
130 /* TODO: parallelize (good luck!) */
131 #pragma omp single
132 for (icol=0;icol< A->pattern->numOutput;++icol) {
133 #pragma ivdep
134 for (iptr=A->pattern->ptr[icol]-1;iptr<A->pattern->ptr[icol+1]-1; ++iptr) {
135 out[A->pattern->index[iptr]-1]+= alpha * A->val[iptr] * in[icol];
136 }
137 }
138 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
139 /* TODO: parallelize */
140 #pragma omp single
141 for (ic=0;ic< A->pattern->numOutput;ic++) {
142 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
143 ir=2*(A->pattern->index[iptr]-1);
144 out[ ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
145 out[1+ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
146 }
147 }
148 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
149 /* TODO: parallelize */
150 #pragma omp single
151 for (ic=0;ic< A->pattern->numOutput;ic++) {
152 #pragma ivdep
153 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
154 ir=3*(A->pattern->index[iptr]-1);
155 out[ ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
156 out[1+ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
157 out[2+ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
158 }
159 }
160 } else {
161 /* TODO: parallelize */
162 #pragma omp single
163 for (ic=0;ic< A->pattern->numOutput;ic++) {
164 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
165 for (irb=0;irb< A->row_block_size;irb++) {
166 irow=irb+A->row_block_size*(A->pattern->index[iptr]-1);
167 #pragma ivdep
168 for (icb=0;icb< A->col_block_size;icb++) {
169 icol=icb+A->col_block_size*ic;
170 out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
171 }
172 }
173 }
174 }
175 }
176 }
177 return;
178 }
179 /* CSR format with offset 0*/
180 void Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(double alpha,
181 Paso_SparseMatrix* A,
182 double* in,
183 double beta,
184 double* out)
185 {
186 register index_t ir,icol,iptr,icb,irb,irow,ic,Aiptr;
187 register double reg,reg1,reg2,reg3,in1,in2,in3,A00,A10,A20,A01,A11,A21,A02,A12,A22;
188 if (ABS(beta)>0.) {
189 if (beta != 1.) {
190 #pragma omp for private(irow) schedule(static)
191 for (irow=0;irow < A->numRows * A->row_block_size;irow++)
192 out[irow] *= beta;
193 }
194 } else {
195 #pragma omp for private(irow) schedule(static)
196 for (irow=0;irow < A->numRows * A->row_block_size;irow++)
197 out[irow] = 0;
198 }
199 if (ABS(alpha)>0) {
200 if (A ->col_block_size==1 && A->row_block_size ==1) {
201 #pragma omp for private(irow,iptr,reg) schedule(static)
202 for (irow=0;irow< A->pattern->numOutput;++irow) {
203 reg=0.;
204 #pragma ivdep
205 for (iptr=(A->pattern->ptr[irow]);iptr<(A->pattern->ptr[irow+1]); ++iptr) {
206 reg += A->val[iptr] * in[A->pattern->index[iptr]];
207 }
208 out[irow] += alpha * reg;
209 }
210 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
211 #pragma omp for private(ir,reg1,reg2,iptr,ic,Aiptr,in1,in2,A00,A10,A01,A11) schedule(static)
212 for (ir=0;ir< A->pattern->numOutput;ir++) {
213 reg1=0.;
214 reg2=0.;
215 #pragma ivdep
216 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
217 ic=2*(A->pattern->index[iptr]);
218 Aiptr=iptr*4;
219 in1=in[ic];
220 in2=in[1+ic];
221 A00=A->val[Aiptr ];
222 A10=A->val[Aiptr+1];
223 A01=A->val[Aiptr+2];
224 A11=A->val[Aiptr+3];
225 reg1 += A00*in1 + A01*in2;
226 reg2 += A10*in1 + A11*in2;
227 }
228 out[ 2*ir] += alpha * reg1;
229 out[1+2*ir] += alpha * reg2;
230 }
231 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
232 #pragma omp for private(ir,reg1,reg2,reg3,iptr,ic,Aiptr,in1,in2,in3,A00,A10,A20,A01,A11,A21,A02,A12,A22) schedule(static)
233 for (ir=0;ir< A->pattern->numOutput;ir++) {
234 reg1=0.;
235 reg2=0.;
236 reg3=0.;
237 #pragma ivdep
238 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
239 ic=3*(A->pattern->index[iptr]);
240 Aiptr=iptr*9;
241 in1=in[ic];
242 in2=in[1+ic];
243 in3=in[2+ic];
244 A00=A->val[Aiptr ];
245 A10=A->val[Aiptr+1];
246 A20=A->val[Aiptr+2];
247 A01=A->val[Aiptr+3];
248 A11=A->val[Aiptr+4];
249 A21=A->val[Aiptr+5];
250 A02=A->val[Aiptr+6];
251 A12=A->val[Aiptr+7];
252 A22=A->val[Aiptr+8];
253 reg1 += A00*in1 + A01*in2 + A02*in3;
254 reg2 += A10*in1 + A11*in2 + A12*in3;
255 reg3 += A20*in1 + A21*in2 + A22*in3;
256 }
257 out[ 3*ir] += alpha * reg1;
258 out[1+3*ir] += alpha * reg2;
259 out[2+3*ir] += alpha * reg3;
260 }
261 } else {
262 #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
263 for (ir=0;ir< A->pattern->numOutput;ir++) {
264 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
265 for (irb=0;irb< A->row_block_size;irb++) {
266 irow=irb+A->row_block_size*ir;
267 reg=0.;
268 #pragma ivdep
269 for (icb=0;icb< A->col_block_size;icb++) {
270 icol=icb+A->col_block_size*(A->pattern->index[iptr]);
271 reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
272 }
273 out[irow] += alpha * reg;
274 }
275 }
276 }
277 }
278 }
279 return;
280 }
281 /* CSR format with offset 1*/
282 void Paso_SparseMatrix_MatrixVector_CSR_OFFSET1(double alpha,
283 Paso_SparseMatrix* A,
284 double* in,
285 double beta,
286 double* out) {
287
288 register index_t ir,icol,iptr,icb,irb,irow,ic;
289 register double reg,reg1,reg2,reg3;
290 #pragma omp barrier
291
292 if (ABS(beta)>0.) {
293 if (beta != 1.) {
294 #pragma omp for private(irow) schedule(static)
295 for (irow=0;irow < A->numRows * A->row_block_size;irow++)
296 out[irow] *= beta;
297 }
298 } else {
299 #pragma omp for private(irow) schedule(static)
300 for (irow=0;irow < A->numRows * A->row_block_size;irow++)
301 out[irow] = 0;
302 }
303 /* do the operation: */
304 if (ABS(alpha)>0) {
305 if (A ->col_block_size==1 && A->row_block_size ==1) {
306 #pragma omp for private(irow,iptr,reg) schedule(static)
307 for (irow=0;irow< A->pattern->numOutput;++irow) {
308 reg=0.;
309 #pragma ivdep
310 for (iptr=(A->pattern->ptr[irow])-1;iptr<(A->pattern->ptr[irow+1])-1; ++iptr) {
311 reg += A->val[iptr] * in[A->pattern->index[iptr]-1];
312 }
313 out[irow] += alpha * reg;
314 }
315 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
316 #pragma omp for private(ir,reg1,reg2,iptr,ic) schedule(static)
317 for (ir=0;ir< A->pattern->numOutput;ir++) {
318 reg1=0.;
319 reg2=0.;
320 #pragma ivdep
321 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
322 ic=2*(A->pattern->index[iptr]-1);
323 reg1 += A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic];
324 reg2 += A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic];
325 }
326 out[ 2*ir] += alpha * reg1;
327 out[1+2*ir] += alpha * reg2;
328 }
329 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
330 #pragma omp for private(ir,reg1,reg2,reg3,iptr,ic) schedule(static)
331 for (ir=0;ir< A->pattern->numOutput;ir++) {
332 reg1=0.;
333 reg2=0.;
334 reg3=0.;
335 #pragma ivdep
336 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
337 ic=3*(A->pattern->index[iptr]-1);
338 reg1 += A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic];
339 reg2 += A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic];
340 reg3 += A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic];
341 }
342 out[ 3*ir] += alpha * reg1;
343 out[1+3*ir] += alpha * reg2;
344 out[2+3*ir] += alpha * reg3;
345 }
346 } else {
347 #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
348 for (ir=0;ir< A->pattern->numOutput;ir++) {
349 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
350 for (irb=0;irb< A->row_block_size;irb++) {
351 irow=irb+A->row_block_size*ir;
352 reg=0.;
353 #pragma ivdep
354 for (icb=0;icb< A->col_block_size;icb++) {
355 icol=icb+A->col_block_size*(A->pattern->index[iptr]-1);
356 reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
357 }
358 out[irow] += alpha * reg;
359 }
360 }
361 }
362 }
363 }
364 return;
365 }

  ViewVC Help
Powered by ViewVC 1.1.26