/[escript]/trunk/paso/src/SystemMatrix_MatrixVector.c
ViewVC logotype

Contents of /trunk/paso/src/SystemMatrix_MatrixVector.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 512 - (show annotations)
Fri Feb 10 07:04:14 2006 UTC (13 years, 6 months ago) by gross
File MIME type: text/plain
File size: 13131 byte(s)
bug in parallelization fixed
1 /* $Id$ */
2
3 /**************************************************************/
4
5 /* Paso: matrix vector product with sparse matrix */
6
7 /**************************************************************/
8
9 /* Copyrights by ACcESS Australia 2003,2004,2005 */
10 /* Author: gross@access.edu.au */
11
12 /**************************************************************/
13
14 #include "Paso.h"
15 #include "SystemMatrix.h"
16
17 /**************************************************************************/
18
19 /* raw scaled vector update operation: out = alpha * A * in + beta * out */
20
21 /* has to be called within a parallel region */
22 /* barrier synconization is performed to make sure that the input vector available */
23
24 void Paso_SystemMatrix_MatrixVector(double alpha,
25 Paso_SystemMatrix* A,
26 double* in,
27 double beta,
28 double* out) {
29
30 if (A->type & MATRIX_FORMAT_CSC) {
31 if (A->type & MATRIX_FORMAT_OFFSET1) {
32 Paso_SystemMatrix_MatrixVector_CSC_OFFSET1(alpha,A,in,beta,out);
33 } else {
34 Paso_SystemMatrix_MatrixVector_CSC_OFFSET0(alpha,A,in,beta,out);
35 }
36 } else {
37 if (A->type & MATRIX_FORMAT_OFFSET1) {
38 Paso_SystemMatrix_MatrixVector_CSR_OFFSET1(alpha,A,in,beta,out);
39 } else {
40 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(alpha,A,in,beta,out);
41 }
42 }
43 return;
44 }
45
46 void Paso_SystemMatrix_MatrixVector_CSC_OFFSET0(double alpha,
47 Paso_SystemMatrix* A,
48 double* in,
49 double beta,
50 double* out) {
51
52 register index_t ir,icol,iptr,icb,irb,irow,ic;
53 register double reg,reg1,reg2,reg3;
54 #pragma omp barrier
55
56 if (ABS(beta)>0.) {
57 #pragma omp for private(irow) schedule(static)
58 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
59 out[irow] *= beta;
60 } else {
61 #pragma omp for private(irow) schedule(static)
62 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
63 out[irow] = 0;
64 }
65
66 /* do the operation: */
67 if (ABS(alpha)>0) {
68 if (A ->col_block_size==1 && A->row_block_size ==1) {
69 /* TODO: parallelize (good luck!) */
70 #pragma omp single
71 for (icol=0;icol< A->pattern->n_ptr;++icol) {
72 for (iptr=A->pattern->ptr[icol];iptr<A->pattern->ptr[icol+1]; ++iptr) {
73 out[A->pattern->index[iptr]]+= alpha * A->val[iptr] * in[icol];
74 }
75 }
76 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
77 /* TODO: parallelize */
78 #pragma omp single
79 for (ic=0;ic< A->pattern->n_ptr;ic++) {
80 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
81 ic=2*(A->pattern->index[iptr]);
82 out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
83 out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
84 }
85 }
86 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
87 /* TODO: parallelize */
88 #pragma omp single
89 for (ic=0;ic< A->pattern->n_ptr;ic++) {
90 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
91 ir=3*(A->pattern->index[iptr]);
92 out[ 3*ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
93 out[1+3*ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
94 out[2+3*ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
95 }
96 }
97 } else {
98 /* TODO: parallelize */
99 #pragma omp single
100 for (ic=0;ic< A->pattern->n_ptr;ic++) {
101 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
102 for (irb=0;irb< A->row_block_size;irb++) {
103 irow=irb+A->row_block_size*(A->pattern->index[iptr]);
104 for (icb=0;icb< A->col_block_size;icb++) {
105 icol=icb+A->col_block_size*ic;
106 out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
107 }
108 }
109 }
110 }
111 }
112 }
113 return;
114 }
115
116 void Paso_SystemMatrix_MatrixVector_CSC_OFFSET1(double alpha,
117 Paso_SystemMatrix* A,
118 double* in,
119 double beta,
120 double* out) {
121
122 register index_t ir,icol,iptr,icb,irb,irow,ic;
123 register double reg,reg1,reg2,reg3;
124 #pragma omp barrier
125
126 if (ABS(beta)>0.) {
127 #pragma omp for private(irow) schedule(static)
128 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
129 out[irow] *= beta;
130 } else {
131 #pragma omp for private(irow) schedule(static)
132 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
133 out[irow] = 0;
134 }
135
136 /* do the operation: */
137 if (ABS(alpha)>0) {
138 if (A ->col_block_size==1 && A->row_block_size ==1) {
139 /* TODO: parallelize (good luck!) */
140 #pragma omp single
141 for (icol=0;icol< A->pattern->n_ptr;++icol) {
142 for (iptr=A->pattern->ptr[icol]-1;iptr<A->pattern->ptr[icol+1]-1; ++iptr) {
143 out[A->pattern->index[iptr]-1]+= alpha * A->val[iptr] * in[icol];
144 }
145 }
146 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
147 /* TODO: parallelize */
148 #pragma omp single
149 for (ic=0;ic< A->pattern->n_ptr;ic++) {
150 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
151 ic=2*(A->pattern->index[iptr]-1);
152 out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
153 out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
154 }
155 }
156 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
157 /* TODO: parallelize */
158 #pragma omp single
159 for (ic=0;ic< A->pattern->n_ptr;ic++) {
160 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
161 ir=3*(A->pattern->index[iptr]-1);
162 out[ 3*ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
163 out[1+3*ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
164 out[2+3*ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
165 }
166 }
167 } else {
168 /* TODO: parallelize */
169 #pragma omp single
170 for (ic=0;ic< A->pattern->n_ptr;ic++) {
171 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
172 for (irb=0;irb< A->row_block_size;irb++) {
173 irow=irb+A->row_block_size*(A->pattern->index[iptr]-1);
174 for (icb=0;icb< A->col_block_size;icb++) {
175 icol=icb+A->col_block_size*ic;
176 out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
177 }
178 }
179 }
180 }
181 }
182 }
183 return;
184 }
185
186 void Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(double alpha,
187 Paso_SystemMatrix* A,
188 double* in,
189 double beta,
190 double* out) {
191
192 register index_t ir,icol,iptr,icb,irb,irow,ic,Aiptr;
193 register double reg,reg1,reg2,reg3,in1,in2,in3,A00,A10,A20,A01,A11,A21,A02,A12,A22;
194 #pragma omp barrier
195 if (ABS(beta)>0.) {
196 #pragma omp for private(irow) schedule(static)
197 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
198 out[irow] *= beta;
199 } else {
200 #pragma omp for private(irow) schedule(static)
201 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
202 out[irow] = 0;
203 }
204 /* do the operation: */
205 if (ABS(alpha)>0) {
206 if (A ->col_block_size==1 && A->row_block_size ==1) {
207 #pragma omp for private(irow,iptr,reg) schedule(static)
208 for (irow=0;irow< A->pattern->n_ptr;++irow) {
209 reg=0.;
210 for (iptr=(A->pattern->ptr[irow]);iptr<(A->pattern->ptr[irow+1]); ++iptr) {
211 reg += A->val[iptr] * in[A->pattern->index[iptr]];
212 }
213 out[irow] += alpha * reg;
214 }
215 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
216 #pragma omp for private(ir,reg1,reg2,iptr,ic,Aiptr,in1,in2,A00,A10,A01,A11) schedule(static)
217 for (ir=0;ir< A->pattern->n_ptr;ir++) {
218 reg1=0.;
219 reg2=0.;
220 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
221 ic=2*(A->pattern->index[iptr]);
222 Aiptr=iptr*4;
223 in1=in[ic];
224 in2=in[1+ic];
225 A00=A->val[Aiptr ];
226 A10=A->val[Aiptr+1];
227 A01=A->val[Aiptr+2];
228 A11=A->val[Aiptr+3];
229 reg1 += A00*in1 + A01*in2;
230 reg2 += A10*in1 + A11*in2;
231 }
232 out[ 2*ir] += alpha * reg1;
233 out[1+2*ir] += alpha * reg2;
234 }
235 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
236 #pragma omp for private(ir,reg1,reg2,reg3,iptr,ic,Aiptr,in1,in2,in3,A00,A10,A20,A01,A11,A21,A02,A12,A22) schedule(static)
237 for (ir=0;ir< A->pattern->n_ptr;ir++) {
238 reg1=0.;
239 reg2=0.;
240 reg3=0.;
241 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
242 ic=3*(A->pattern->index[iptr]);
243 Aiptr=iptr*9;
244 in1=in[ic];
245 in2=in[1+ic];
246 in3=in[2+ic];
247 A00=A->val[Aiptr ];
248 A10=A->val[Aiptr+1];
249 A20=A->val[Aiptr+2];
250 A01=A->val[Aiptr+3];
251 A11=A->val[Aiptr+4];
252 A21=A->val[Aiptr+5];
253 A02=A->val[Aiptr+6];
254 A12=A->val[Aiptr+7];
255 A22=A->val[Aiptr+8];
256 reg1 += A00*in1 + A01*in2 + A02*in3;
257 reg2 += A10*in1 + A11*in2 + A12*in3;
258 reg3 += A20*in1 + A21*in2 + A22*in3;
259 }
260 out[ 3*ir] += alpha * reg1;
261 out[1+3*ir] += alpha * reg2;
262 out[2+3*ir] += alpha * reg3;
263 }
264 } else {
265 #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
266 for (ir=0;ir< A->pattern->n_ptr;ir++) {
267 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
268 for (irb=0;irb< A->row_block_size;irb++) {
269 irow=irb+A->row_block_size*ir;
270 reg=0.;
271 for (icb=0;icb< A->col_block_size;icb++) {
272 icol=icb+A->col_block_size*(A->pattern->index[iptr]);
273 reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
274 }
275 out[irow] += alpha * reg;
276 }
277 }
278 }
279 }
280 }
281 return;
282 }
283
284 void Paso_SystemMatrix_MatrixVector_CSR_OFFSET1(double alpha,
285 Paso_SystemMatrix* A,
286 double* in,
287 double beta,
288 double* out) {
289
290 register index_t ir,icol,iptr,icb,irb,irow,ic;
291 register double reg,reg1,reg2,reg3;
292 #pragma omp barrier
293
294 if (ABS(beta)>0.) {
295 #pragma omp for private(irow) schedule(static)
296 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
297 out[irow] *= beta;
298 } else {
299 #pragma omp for private(irow) schedule(static)
300 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
301 out[irow] = 0;
302 }
303 /* do the operation: */
304 if (ABS(alpha)>0) {
305 if (A ->col_block_size==1 && A->row_block_size ==1) {
306 #pragma omp for private(irow,iptr,reg) schedule(static)
307 for (irow=0;irow< A->pattern->n_ptr;++irow) {
308 reg=0.;
309 for (iptr=(A->pattern->ptr[irow])-1;iptr<(A->pattern->ptr[irow+1])-1; ++iptr) {
310 reg += A->val[iptr] * in[A->pattern->index[iptr]-1];
311 }
312 out[irow] += alpha * reg;
313 }
314 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
315 #pragma omp for private(ir,reg1,reg2,iptr,ic) schedule(static)
316 for (ir=0;ir< A->pattern->n_ptr;ir++) {
317 reg1=0.;
318 reg2=0.;
319 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
320 ic=2*(A->pattern->index[iptr]-1);
321 reg1 += A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic];
322 reg2 += A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic];
323 }
324 out[ 2*ir] += alpha * reg1;
325 out[1+2*ir] += alpha * reg2;
326 }
327 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
328 #pragma omp for private(ir,reg1,reg2,reg3,iptr,ic) schedule(static)
329 for (ir=0;ir< A->pattern->n_ptr;ir++) {
330 reg1=0.;
331 reg2=0.;
332 reg3=0.;
333 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
334 ic=3*(A->pattern->index[iptr]-1);
335 reg1 += A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic];
336 reg2 += A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic];
337 reg3 += A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic];
338 }
339 out[ 3*ir] += alpha * reg1;
340 out[1+3*ir] += alpha * reg2;
341 out[2+3*ir] += alpha * reg3;
342 }
343 } else {
344 #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
345 for (ir=0;ir< A->pattern->n_ptr;ir++) {
346 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
347 for (irb=0;irb< A->row_block_size;irb++) {
348 irow=irb+A->row_block_size*ir;
349 reg=0.;
350 for (icb=0;icb< A->col_block_size;icb++) {
351 icol=icb+A->col_block_size*(A->pattern->index[iptr]-1);
352 reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
353 }
354 out[irow] += alpha * reg;
355 }
356 }
357 }
358 }
359 }
360 return;
361 }
362 /*
363 * $Log$
364 * Revision 1.2 2005/09/15 03:44:39 jgs
365 * Merge of development branch dev-02 back to main trunk on 2005-09-15
366 *
367 * Revision 1.1.2.1 2005/09/05 06:29:47 gross
368 * These files have been extracted from finley to define a stand alone libray for iterative
369 * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
370 * has not been tested yet.
371 *
372 *
373 */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26