/[escript]/trunk/paso/src/SystemMatrix_MatrixVector.c
ViewVC logotype

Contents of /trunk/paso/src/SystemMatrix_MatrixVector.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 415 - (show annotations)
Wed Jan 4 05:37:33 2006 UTC (13 years, 10 months ago) by gross
File MIME type: text/plain
File size: 12610 byte(s)
a better way of representing the matrix format type is introduced. this is needed for the Paradiso and UMFPACK interface
1 /* $Id$ */
2
3 /**************************************************************/
4
5 /* Paso: matrix vector product with sparse matrix */
6
7 /**************************************************************/
8
9 /* Copyrights by ACcESS Australia 2003,2004,2005 */
10 /* Author: gross@access.edu.au */
11
12 /**************************************************************/
13
14 #include "Paso.h"
15 #include "SystemMatrix.h"
16
17 /**************************************************************************/
18
19 /* raw scaled vector update operation: out = alpha * A * in + beta * out */
20
21 /* has to be called within a parallel region */
22 /* barrier synconization is performed to make sure that the input vector available */
23
24 void Paso_SystemMatrix_MatrixVector(double alpha,
25 Paso_SystemMatrix* A,
26 double* in,
27 double beta,
28 double* out) {
29
30 if (A->type & MATRIX_FORMAT_CSC) {
31 if (A->type & MATRIX_FORMAT_OFFSET1) {
32 Paso_SystemMatrix_MatrixVector_CSC_OFFSET1(alpha,A,in,beta,out);
33 } else {
34 Paso_SystemMatrix_MatrixVector_CSC_OFFSET0(alpha,A,in,beta,out);
35 }
36 } else {
37 if (A->type & MATRIX_FORMAT_OFFSET1) {
38 Paso_SystemMatrix_MatrixVector_CSR_OFFSET1(alpha,A,in,beta,out);
39 } else {
40 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(alpha,A,in,beta,out);
41 }
42 }
43 return;
44 }
45
46 void Paso_SystemMatrix_MatrixVector_CSC_OFFSET0(double alpha,
47 Paso_SystemMatrix* A,
48 double* in,
49 double beta,
50 double* out) {
51
52 register index_t ir,icol,iptr,icb,irb,irow,ic;
53 register double reg,reg1,reg2,reg3;
54 #pragma omp barrier
55
56 if (ABS(beta)>0.) {
57 #pragma omp for private(irow) schedule(static)
58 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
59 out[irow] *= beta;
60 } else {
61 #pragma omp for private(irow) schedule(static)
62 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
63 out[irow] = 0;
64 }
65
66 /* do the operation: */
67 if (ABS(alpha)>0) {
68 if (A ->col_block_size==1 && A->row_block_size ==1) {
69 /* TODO: parallelize (good luck!) */
70 #pragma omp single
71 for (icol=0;icol< A->pattern->n_ptr;++icol) {
72 for (iptr=A->pattern->ptr[icol];iptr<A->pattern->ptr[icol+1]; ++iptr) {
73 out[A->pattern->index[iptr]]+= alpha * A->val[iptr] * in[icol];
74 }
75 }
76 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
77 /* TODO: parallelize */
78 #pragma omp single
79 for (ic=0;ic< A->pattern->n_ptr;ic++) {
80 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
81 ic=2*(A->pattern->index[iptr]);
82 out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
83 out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
84 }
85 }
86 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
87 /* TODO: parallelize */
88 #pragma omp single
89 for (ic=0;ic< A->pattern->n_ptr;ic++) {
90 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
91 ir=3*(A->pattern->index[iptr]);
92 out[ 3*ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
93 out[1+3*ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
94 out[2+3*ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
95 }
96 }
97 } else {
98 /* TODO: parallelize */
99 #pragma omp single
100 for (ic=0;ic< A->pattern->n_ptr;ic++) {
101 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
102 for (irb=0;irb< A->row_block_size;irb++) {
103 irow=irb+A->row_block_size*(A->pattern->index[iptr]);
104 for (icb=0;icb< A->col_block_size;icb++) {
105 icol=icb+A->col_block_size*ic;
106 out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
107 }
108 }
109 }
110 }
111 }
112 }
113 return;
114 }
115
116 void Paso_SystemMatrix_MatrixVector_CSC_OFFSET1(double alpha,
117 Paso_SystemMatrix* A,
118 double* in,
119 double beta,
120 double* out) {
121
122 register index_t ir,icol,iptr,icb,irb,irow,ic;
123 register double reg,reg1,reg2,reg3;
124 #pragma omp barrier
125
126 if (ABS(beta)>0.) {
127 #pragma omp for private(irow) schedule(static)
128 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
129 out[irow] *= beta;
130 } else {
131 #pragma omp for private(irow) schedule(static)
132 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
133 out[irow] = 0;
134 }
135
136 /* do the operation: */
137 if (ABS(alpha)>0) {
138 if (A ->col_block_size==1 && A->row_block_size ==1) {
139 /* TODO: parallelize (good luck!) */
140 #pragma omp single
141 for (icol=0;icol< A->pattern->n_ptr;++icol) {
142 for (iptr=A->pattern->ptr[icol]-1;iptr<A->pattern->ptr[icol+1]-1; ++iptr) {
143 out[A->pattern->index[iptr]-1]+= alpha * A->val[iptr] * in[icol];
144 }
145 }
146 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
147 /* TODO: parallelize */
148 #pragma omp single
149 for (ic=0;ic< A->pattern->n_ptr;ic++) {
150 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
151 ic=2*(A->pattern->index[iptr]-1);
152 out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
153 out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
154 }
155 }
156 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
157 /* TODO: parallelize */
158 #pragma omp single
159 for (ic=0;ic< A->pattern->n_ptr;ic++) {
160 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
161 ir=3*(A->pattern->index[iptr]-1);
162 out[ 3*ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
163 out[1+3*ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
164 out[2+3*ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
165 }
166 }
167 } else {
168 /* TODO: parallelize */
169 #pragma omp single
170 for (ic=0;ic< A->pattern->n_ptr;ic++) {
171 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
172 for (irb=0;irb< A->row_block_size;irb++) {
173 irow=irb+A->row_block_size*(A->pattern->index[iptr]-1);
174 for (icb=0;icb< A->col_block_size;icb++) {
175 icol=icb+A->col_block_size*ic;
176 out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
177 }
178 }
179 }
180 }
181 }
182 }
183 return;
184 }
185
186 void Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(double alpha,
187 Paso_SystemMatrix* A,
188 double* in,
189 double beta,
190 double* out) {
191
192 register index_t ir,icol,iptr,icb,irb,irow,ic;
193 register double reg,reg1,reg2,reg3;
194 #pragma omp barrier
195 if (ABS(beta)>0.) {
196 #pragma omp for private(irow) schedule(static)
197 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
198 out[irow] *= beta;
199 } else {
200 #pragma omp for private(irow) schedule(static)
201 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
202 out[irow] = 0;
203 }
204 /* do the operation: */
205 if (ABS(alpha)>0) {
206 if (A ->col_block_size==1 && A->row_block_size ==1) {
207 #pragma omp for private(irow,iptr,reg) schedule(static)
208 for (irow=0;irow< A->pattern->n_ptr;++irow) {
209 reg=0.;
210 for (iptr=(A->pattern->ptr[irow]);iptr<(A->pattern->ptr[irow+1]); ++iptr) {
211 reg += A->val[iptr] * in[A->pattern->index[iptr]];
212 }
213 out[irow] += alpha * reg;
214 }
215 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
216 #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg1,reg2) schedule(static)
217 for (ir=0;ir< A->pattern->n_ptr;ir++) {
218 reg1=0.;
219 reg2=0.;
220 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
221 ic=2*(A->pattern->index[iptr]);
222 reg1 += A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic];
223 reg2 += A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic];
224 }
225 out[ 2*ir] += alpha * reg1;
226 out[1+2*ir] += alpha * reg2;
227 }
228 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
229 #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg1,reg2,reg3) schedule(static)
230 for (ir=0;ir< A->pattern->n_ptr;ir++) {
231 reg1=0.;
232 reg2=0.;
233 reg3=0.;
234 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
235 ic=3*(A->pattern->index[iptr]);
236 reg1 += A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic];
237 reg2 += A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic];
238 reg3 += A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic];
239 }
240 out[ 3*ir] += alpha * reg1;
241 out[1+3*ir] += alpha * reg2;
242 out[2+3*ir] += alpha * reg3;
243 }
244 } else {
245 #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
246 for (ir=0;ir< A->pattern->n_ptr;ir++) {
247 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
248 for (irb=0;irb< A->row_block_size;irb++) {
249 irow=irb+A->row_block_size*ir;
250 reg=0.;
251 for (icb=0;icb< A->col_block_size;icb++) {
252 icol=icb+A->col_block_size*(A->pattern->index[iptr]);
253 reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
254 }
255 out[irow] += alpha * reg;
256 }
257 }
258 }
259 }
260 }
261 return;
262 }
263
264 void Paso_SystemMatrix_MatrixVector_CSR_OFFSET1(double alpha,
265 Paso_SystemMatrix* A,
266 double* in,
267 double beta,
268 double* out) {
269
270 register index_t ir,icol,iptr,icb,irb,irow,ic;
271 register double reg,reg1,reg2,reg3;
272 #pragma omp barrier
273
274 if (ABS(beta)>0.) {
275 #pragma omp for private(irow) schedule(static)
276 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
277 out[irow] *= beta;
278 } else {
279 #pragma omp for private(irow) schedule(static)
280 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
281 out[irow] = 0;
282 }
283 /* do the operation: */
284 if (ABS(alpha)>0) {
285 if (A ->col_block_size==1 && A->row_block_size ==1) {
286 #pragma omp for private(irow,iptr,reg) schedule(static)
287 for (irow=0;irow< A->pattern->n_ptr;++irow) {
288 reg=0.;
289 for (iptr=(A->pattern->ptr[irow])-1;iptr<(A->pattern->ptr[irow+1])-1; ++iptr) {
290 reg += A->val[iptr] * in[A->pattern->index[iptr]-1];
291 }
292 out[irow] += alpha * reg;
293 }
294 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
295 #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg1,reg2) schedule(static)
296 for (ir=0;ir< A->pattern->n_ptr;ir++) {
297 reg1=0.;
298 reg2=0.;
299 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
300 ic=2*(A->pattern->index[iptr]-1);
301 reg1 += A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic];
302 reg2 += A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic];
303 }
304 out[ 2*ir] += alpha * reg1;
305 out[1+2*ir] += alpha * reg2;
306 }
307 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
308 #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg1,reg2,reg3) schedule(static)
309 for (ir=0;ir< A->pattern->n_ptr;ir++) {
310 reg1=0.;
311 reg2=0.;
312 reg3=0.;
313 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
314 ic=3*(A->pattern->index[iptr]-1);
315 reg1 += A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic];
316 reg2 += A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic];
317 reg3 += A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic];
318 }
319 out[ 3*ir] += alpha * reg1;
320 out[1+3*ir] += alpha * reg2;
321 out[2+3*ir] += alpha * reg3;
322 }
323 } else {
324 #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
325 for (ir=0;ir< A->pattern->n_ptr;ir++) {
326 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
327 for (irb=0;irb< A->row_block_size;irb++) {
328 irow=irb+A->row_block_size*ir;
329 reg=0.;
330 for (icb=0;icb< A->col_block_size;icb++) {
331 icol=icb+A->col_block_size*(A->pattern->index[iptr]-1);
332 reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
333 }
334 out[irow] += alpha * reg;
335 }
336 }
337 }
338 }
339 }
340 return;
341 }
342 /*
343 * $Log$
344 * Revision 1.2 2005/09/15 03:44:39 jgs
345 * Merge of development branch dev-02 back to main trunk on 2005-09-15
346 *
347 * Revision 1.1.2.1 2005/09/05 06:29:47 gross
348 * These files have been extracted from finley to define a stand alone libray for iterative
349 * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
350 * has not been tested yet.
351 *
352 *
353 */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26