/[escript]/trunk/paso/src/SystemMatrix_MatrixVector.c
ViewVC logotype

Contents of /trunk/paso/src/SystemMatrix_MatrixVector.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 633 - (show annotations)
Thu Mar 23 05:37:00 2006 UTC (13 years, 7 months ago) by dhawcroft
File MIME type: text/plain
File size: 13777 byte(s)


1 /* $Id$ */
2
3
4 /*
5 ********************************************************************************
6 * Copyright 2006 by ACcESS MNRF *
7 * *
8 * http://www.access.edu.au *
9 * Primary Business: Queensland, Australia *
10 * Licensed under the Open Software License version 3.0 *
11 * http://www.opensource.org/licenses/osl-3.0.php *
12 ********************************************************************************
13 */
14
15 /**************************************************************/
16
17 /* Paso: matrix vector product with sparse matrix */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia 2003,2004,2005 */
22 /* Author: gross@access.edu.au */
23
24 /**************************************************************/
25
26 #include "Paso.h"
27 #include "SystemMatrix.h"
28
29 /**************************************************************************/
30
31 /* raw scaled vector update operation: out = alpha * A * in + beta * out */
32
33 /* has to be called within a parallel region */
34 /* barrier synconization is performed to make sure that the input vector available */
35
36 void Paso_SystemMatrix_MatrixVector(double alpha,
37 Paso_SystemMatrix* A,
38 double* in,
39 double beta,
40 double* out) {
41
42 if (A->type & MATRIX_FORMAT_CSC) {
43 if (A->type & MATRIX_FORMAT_OFFSET1) {
44 Paso_SystemMatrix_MatrixVector_CSC_OFFSET1(alpha,A,in,beta,out);
45 } else {
46 Paso_SystemMatrix_MatrixVector_CSC_OFFSET0(alpha,A,in,beta,out);
47 }
48 } else {
49 if (A->type & MATRIX_FORMAT_OFFSET1) {
50 Paso_SystemMatrix_MatrixVector_CSR_OFFSET1(alpha,A,in,beta,out);
51 } else {
52 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(alpha,A,in,beta,out);
53 }
54 }
55 return;
56 }
57
58 void Paso_SystemMatrix_MatrixVector_CSC_OFFSET0(double alpha,
59 Paso_SystemMatrix* A,
60 double* in,
61 double beta,
62 double* out) {
63
64 register index_t ir,icol,iptr,icb,irb,irow,ic;
65 register double reg,reg1,reg2,reg3;
66 #pragma omp barrier
67
68 if (ABS(beta)>0.) {
69 #pragma omp for private(irow) schedule(static)
70 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
71 out[irow] *= beta;
72 } else {
73 #pragma omp for private(irow) schedule(static)
74 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
75 out[irow] = 0;
76 }
77
78 /* do the operation: */
79 if (ABS(alpha)>0) {
80 if (A ->col_block_size==1 && A->row_block_size ==1) {
81 /* TODO: parallelize (good luck!) */
82 #pragma omp single
83 for (icol=0;icol< A->pattern->n_ptr;++icol) {
84 for (iptr=A->pattern->ptr[icol];iptr<A->pattern->ptr[icol+1]; ++iptr) {
85 out[A->pattern->index[iptr]]+= alpha * A->val[iptr] * in[icol];
86 }
87 }
88 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
89 /* TODO: parallelize */
90 #pragma omp single
91 for (ic=0;ic< A->pattern->n_ptr;ic++) {
92 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
93 ic=2*(A->pattern->index[iptr]);
94 out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
95 out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
96 }
97 }
98 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
99 /* TODO: parallelize */
100 #pragma omp single
101 for (ic=0;ic< A->pattern->n_ptr;ic++) {
102 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
103 ir=3*(A->pattern->index[iptr]);
104 out[ 3*ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
105 out[1+3*ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
106 out[2+3*ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
107 }
108 }
109 } else {
110 /* TODO: parallelize */
111 #pragma omp single
112 for (ic=0;ic< A->pattern->n_ptr;ic++) {
113 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
114 for (irb=0;irb< A->row_block_size;irb++) {
115 irow=irb+A->row_block_size*(A->pattern->index[iptr]);
116 for (icb=0;icb< A->col_block_size;icb++) {
117 icol=icb+A->col_block_size*ic;
118 out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
119 }
120 }
121 }
122 }
123 }
124 }
125 return;
126 }
127
128 void Paso_SystemMatrix_MatrixVector_CSC_OFFSET1(double alpha,
129 Paso_SystemMatrix* A,
130 double* in,
131 double beta,
132 double* out) {
133
134 register index_t ir,icol,iptr,icb,irb,irow,ic;
135 register double reg,reg1,reg2,reg3;
136 #pragma omp barrier
137
138 if (ABS(beta)>0.) {
139 #pragma omp for private(irow) schedule(static)
140 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
141 out[irow] *= beta;
142 } else {
143 #pragma omp for private(irow) schedule(static)
144 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
145 out[irow] = 0;
146 }
147
148 /* do the operation: */
149 if (ABS(alpha)>0) {
150 if (A ->col_block_size==1 && A->row_block_size ==1) {
151 /* TODO: parallelize (good luck!) */
152 #pragma omp single
153 for (icol=0;icol< A->pattern->n_ptr;++icol) {
154 for (iptr=A->pattern->ptr[icol]-1;iptr<A->pattern->ptr[icol+1]-1; ++iptr) {
155 out[A->pattern->index[iptr]-1]+= alpha * A->val[iptr] * in[icol];
156 }
157 }
158 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
159 /* TODO: parallelize */
160 #pragma omp single
161 for (ic=0;ic< A->pattern->n_ptr;ic++) {
162 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
163 ic=2*(A->pattern->index[iptr]-1);
164 out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
165 out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
166 }
167 }
168 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
169 /* TODO: parallelize */
170 #pragma omp single
171 for (ic=0;ic< A->pattern->n_ptr;ic++) {
172 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
173 ir=3*(A->pattern->index[iptr]-1);
174 out[ 3*ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
175 out[1+3*ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
176 out[2+3*ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
177 }
178 }
179 } else {
180 /* TODO: parallelize */
181 #pragma omp single
182 for (ic=0;ic< A->pattern->n_ptr;ic++) {
183 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
184 for (irb=0;irb< A->row_block_size;irb++) {
185 irow=irb+A->row_block_size*(A->pattern->index[iptr]-1);
186 for (icb=0;icb< A->col_block_size;icb++) {
187 icol=icb+A->col_block_size*ic;
188 out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
189 }
190 }
191 }
192 }
193 }
194 }
195 return;
196 }
197
198 void Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(double alpha,
199 Paso_SystemMatrix* A,
200 double* in,
201 double beta,
202 double* out) {
203
204 register index_t ir,icol,iptr,icb,irb,irow,ic,Aiptr;
205 register double reg,reg1,reg2,reg3,in1,in2,in3,A00,A10,A20,A01,A11,A21,A02,A12,A22;
206 #pragma omp barrier
207 if (ABS(beta)>0.) {
208 #pragma omp for private(irow) schedule(static)
209 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
210 out[irow] *= beta;
211 } else {
212 #pragma omp for private(irow) schedule(static)
213 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
214 out[irow] = 0;
215 }
216 /* do the operation: */
217 if (ABS(alpha)>0) {
218 if (A ->col_block_size==1 && A->row_block_size ==1) {
219 #pragma omp for private(irow,iptr,reg) schedule(static)
220 for (irow=0;irow< A->pattern->n_ptr;++irow) {
221 reg=0.;
222 for (iptr=(A->pattern->ptr[irow]);iptr<(A->pattern->ptr[irow+1]); ++iptr) {
223 reg += A->val[iptr] * in[A->pattern->index[iptr]];
224 }
225 out[irow] += alpha * reg;
226 }
227 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
228 #pragma omp for private(ir,reg1,reg2,iptr,ic,Aiptr,in1,in2,A00,A10,A01,A11) schedule(static)
229 for (ir=0;ir< A->pattern->n_ptr;ir++) {
230 reg1=0.;
231 reg2=0.;
232 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
233 ic=2*(A->pattern->index[iptr]);
234 Aiptr=iptr*4;
235 in1=in[ic];
236 in2=in[1+ic];
237 A00=A->val[Aiptr ];
238 A10=A->val[Aiptr+1];
239 A01=A->val[Aiptr+2];
240 A11=A->val[Aiptr+3];
241 reg1 += A00*in1 + A01*in2;
242 reg2 += A10*in1 + A11*in2;
243 }
244 out[ 2*ir] += alpha * reg1;
245 out[1+2*ir] += alpha * reg2;
246 }
247 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
248 #pragma omp for private(ir,reg1,reg2,reg3,iptr,ic,Aiptr,in1,in2,in3,A00,A10,A20,A01,A11,A21,A02,A12,A22) schedule(static)
249 for (ir=0;ir< A->pattern->n_ptr;ir++) {
250 reg1=0.;
251 reg2=0.;
252 reg3=0.;
253 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
254 ic=3*(A->pattern->index[iptr]);
255 Aiptr=iptr*9;
256 in1=in[ic];
257 in2=in[1+ic];
258 in3=in[2+ic];
259 A00=A->val[Aiptr ];
260 A10=A->val[Aiptr+1];
261 A20=A->val[Aiptr+2];
262 A01=A->val[Aiptr+3];
263 A11=A->val[Aiptr+4];
264 A21=A->val[Aiptr+5];
265 A02=A->val[Aiptr+6];
266 A12=A->val[Aiptr+7];
267 A22=A->val[Aiptr+8];
268 reg1 += A00*in1 + A01*in2 + A02*in3;
269 reg2 += A10*in1 + A11*in2 + A12*in3;
270 reg3 += A20*in1 + A21*in2 + A22*in3;
271 }
272 out[ 3*ir] += alpha * reg1;
273 out[1+3*ir] += alpha * reg2;
274 out[2+3*ir] += alpha * reg3;
275 }
276 } else {
277 #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
278 for (ir=0;ir< A->pattern->n_ptr;ir++) {
279 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
280 for (irb=0;irb< A->row_block_size;irb++) {
281 irow=irb+A->row_block_size*ir;
282 reg=0.;
283 for (icb=0;icb< A->col_block_size;icb++) {
284 icol=icb+A->col_block_size*(A->pattern->index[iptr]);
285 reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
286 }
287 out[irow] += alpha * reg;
288 }
289 }
290 }
291 }
292 }
293 return;
294 }
295
296 void Paso_SystemMatrix_MatrixVector_CSR_OFFSET1(double alpha,
297 Paso_SystemMatrix* A,
298 double* in,
299 double beta,
300 double* out) {
301
302 register index_t ir,icol,iptr,icb,irb,irow,ic;
303 register double reg,reg1,reg2,reg3;
304 #pragma omp barrier
305
306 if (ABS(beta)>0.) {
307 #pragma omp for private(irow) schedule(static)
308 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
309 out[irow] *= beta;
310 } else {
311 #pragma omp for private(irow) schedule(static)
312 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
313 out[irow] = 0;
314 }
315 /* do the operation: */
316 if (ABS(alpha)>0) {
317 if (A ->col_block_size==1 && A->row_block_size ==1) {
318 #pragma omp for private(irow,iptr,reg) schedule(static)
319 for (irow=0;irow< A->pattern->n_ptr;++irow) {
320 reg=0.;
321 for (iptr=(A->pattern->ptr[irow])-1;iptr<(A->pattern->ptr[irow+1])-1; ++iptr) {
322 reg += A->val[iptr] * in[A->pattern->index[iptr]-1];
323 }
324 out[irow] += alpha * reg;
325 }
326 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
327 #pragma omp for private(ir,reg1,reg2,iptr,ic) schedule(static)
328 for (ir=0;ir< A->pattern->n_ptr;ir++) {
329 reg1=0.;
330 reg2=0.;
331 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
332 ic=2*(A->pattern->index[iptr]-1);
333 reg1 += A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic];
334 reg2 += A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic];
335 }
336 out[ 2*ir] += alpha * reg1;
337 out[1+2*ir] += alpha * reg2;
338 }
339 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
340 #pragma omp for private(ir,reg1,reg2,reg3,iptr,ic) schedule(static)
341 for (ir=0;ir< A->pattern->n_ptr;ir++) {
342 reg1=0.;
343 reg2=0.;
344 reg3=0.;
345 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
346 ic=3*(A->pattern->index[iptr]-1);
347 reg1 += A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic];
348 reg2 += A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic];
349 reg3 += A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic];
350 }
351 out[ 3*ir] += alpha * reg1;
352 out[1+3*ir] += alpha * reg2;
353 out[2+3*ir] += alpha * reg3;
354 }
355 } else {
356 #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
357 for (ir=0;ir< A->pattern->n_ptr;ir++) {
358 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
359 for (irb=0;irb< A->row_block_size;irb++) {
360 irow=irb+A->row_block_size*ir;
361 reg=0.;
362 for (icb=0;icb< A->col_block_size;icb++) {
363 icol=icb+A->col_block_size*(A->pattern->index[iptr]-1);
364 reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
365 }
366 out[irow] += alpha * reg;
367 }
368 }
369 }
370 }
371 }
372 return;
373 }
374 /*
375 * $Log$
376 * Revision 1.2 2005/09/15 03:44:39 jgs
377 * Merge of development branch dev-02 back to main trunk on 2005-09-15
378 *
379 * Revision 1.1.2.1 2005/09/05 06:29:47 gross
380 * These files have been extracted from finley to define a stand alone libray for iterative
381 * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
382 * has not been tested yet.
383 *
384 *
385 */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26