/[escript]/temp_trunk_copy/paso/src/SystemMatrix_MatrixVector.c
ViewVC logotype

Contents of /temp_trunk_copy/paso/src/SystemMatrix_MatrixVector.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 971 - (show annotations)
Wed Feb 14 04:40:49 2007 UTC (13 years ago) by ksteube
Original Path: trunk/paso/src/SystemMatrix_MatrixVector.c
File MIME type: text/plain
File size: 13777 byte(s)
Had to undo commit to new MPI branch. The changes went into the original and
not the branch. The files committed here are exactly the same as revision 969.


1 /* $Id$ */
2
3
4 /*
5 ********************************************************************************
6 * Copyright 2006 by ACcESS MNRF *
7 * *
8 * http://www.access.edu.au *
9 * Primary Business: Queensland, Australia *
10 * Licensed under the Open Software License version 3.0 *
11 * http://www.opensource.org/licenses/osl-3.0.php *
12 ********************************************************************************
13 */
14
15 /**************************************************************/
16
17 /* Paso: matrix vector product with sparse matrix */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia 2003,2004,2005 */
22 /* Author: gross@access.edu.au */
23
24 /**************************************************************/
25
26 #include "Paso.h"
27 #include "SystemMatrix.h"
28
29 /**************************************************************************/
30
31 /* raw scaled vector update operation: out = alpha * A * in + beta * out */
32
33 /* has to be called within a parallel region */
34 /* barrier synconization is performed to make sure that the input vector available */
35
36 void Paso_SystemMatrix_MatrixVector(double alpha,
37 Paso_SystemMatrix* A,
38 double* in,
39 double beta,
40 double* out) {
41
42 if (A->type & MATRIX_FORMAT_CSC) {
43 if (A->type & MATRIX_FORMAT_OFFSET1) {
44 Paso_SystemMatrix_MatrixVector_CSC_OFFSET1(alpha,A,in,beta,out);
45 } else {
46 Paso_SystemMatrix_MatrixVector_CSC_OFFSET0(alpha,A,in,beta,out);
47 }
48 } else {
49 if (A->type & MATRIX_FORMAT_OFFSET1) {
50 Paso_SystemMatrix_MatrixVector_CSR_OFFSET1(alpha,A,in,beta,out);
51 } else {
52 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(alpha,A,in,beta,out);
53 }
54 }
55 return;
56 }
57
58 void Paso_SystemMatrix_MatrixVector_CSC_OFFSET0(double alpha,
59 Paso_SystemMatrix* A,
60 double* in,
61 double beta,
62 double* out) {
63
64 register index_t ir,icol,iptr,icb,irb,irow,ic;
65 register double reg,reg1,reg2,reg3;
66 #pragma omp barrier
67
68 if (ABS(beta)>0.) {
69 #pragma omp for private(irow) schedule(static)
70 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
71 out[irow] *= beta;
72 } else {
73 #pragma omp for private(irow) schedule(static)
74 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
75 out[irow] = 0;
76 }
77
78 /* do the operation: */
79 if (ABS(alpha)>0) {
80 if (A ->col_block_size==1 && A->row_block_size ==1) {
81 /* TODO: parallelize (good luck!) */
82 #pragma omp single
83 for (icol=0;icol< A->pattern->n_ptr;++icol) {
84 for (iptr=A->pattern->ptr[icol];iptr<A->pattern->ptr[icol+1]; ++iptr) {
85 out[A->pattern->index[iptr]]+= alpha * A->val[iptr] * in[icol];
86 }
87 }
88 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
89 /* TODO: parallelize */
90 #pragma omp single
91 for (ic=0;ic< A->pattern->n_ptr;ic++) {
92 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
93 ic=2*(A->pattern->index[iptr]);
94 out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
95 out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
96 }
97 }
98 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
99 /* TODO: parallelize */
100 #pragma omp single
101 for (ic=0;ic< A->pattern->n_ptr;ic++) {
102 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
103 ir=3*(A->pattern->index[iptr]);
104 out[ 3*ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
105 out[1+3*ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
106 out[2+3*ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
107 }
108 }
109 } else {
110 /* TODO: parallelize */
111 #pragma omp single
112 for (ic=0;ic< A->pattern->n_ptr;ic++) {
113 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
114 for (irb=0;irb< A->row_block_size;irb++) {
115 irow=irb+A->row_block_size*(A->pattern->index[iptr]);
116 for (icb=0;icb< A->col_block_size;icb++) {
117 icol=icb+A->col_block_size*ic;
118 out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
119 }
120 }
121 }
122 }
123 }
124 }
125 return;
126 }
127
128 void Paso_SystemMatrix_MatrixVector_CSC_OFFSET1(double alpha,
129 Paso_SystemMatrix* A,
130 double* in,
131 double beta,
132 double* out) {
133
134 register index_t ir,icol,iptr,icb,irb,irow,ic;
135 register double reg,reg1,reg2,reg3;
136 #pragma omp barrier
137
138 if (ABS(beta)>0.) {
139 #pragma omp for private(irow) schedule(static)
140 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
141 out[irow] *= beta;
142 } else {
143 #pragma omp for private(irow) schedule(static)
144 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
145 out[irow] = 0;
146 }
147
148 /* do the operation: */
149 if (ABS(alpha)>0) {
150 if (A ->col_block_size==1 && A->row_block_size ==1) {
151 /* TODO: parallelize (good luck!) */
152 #pragma omp single
153 for (icol=0;icol< A->pattern->n_ptr;++icol) {
154 for (iptr=A->pattern->ptr[icol]-1;iptr<A->pattern->ptr[icol+1]-1; ++iptr) {
155 out[A->pattern->index[iptr]-1]+= alpha * A->val[iptr] * in[icol];
156 }
157 }
158 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
159 /* TODO: parallelize */
160 #pragma omp single
161 for (ic=0;ic< A->pattern->n_ptr;ic++) {
162 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
163 ic=2*(A->pattern->index[iptr]-1);
164 out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
165 out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
166 }
167 }
168 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
169 /* TODO: parallelize */
170 #pragma omp single
171 for (ic=0;ic< A->pattern->n_ptr;ic++) {
172 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
173 ir=3*(A->pattern->index[iptr]-1);
174 out[ 3*ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
175 out[1+3*ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
176 out[2+3*ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
177 }
178 }
179 } else {
180 /* TODO: parallelize */
181 #pragma omp single
182 for (ic=0;ic< A->pattern->n_ptr;ic++) {
183 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
184 for (irb=0;irb< A->row_block_size;irb++) {
185 irow=irb+A->row_block_size*(A->pattern->index[iptr]-1);
186 for (icb=0;icb< A->col_block_size;icb++) {
187 icol=icb+A->col_block_size*ic;
188 out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
189 }
190 }
191 }
192 }
193 }
194 }
195 return;
196 }
197
198 void Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(double alpha,
199 Paso_SystemMatrix* A,
200 double* in,
201 double beta,
202 double* out) {
203
204 register index_t ir,icol,iptr,icb,irb,irow,ic,Aiptr;
205 register double reg,reg1,reg2,reg3,in1,in2,in3,A00,A10,A20,A01,A11,A21,A02,A12,A22;
206 #pragma omp barrier
207 if (ABS(beta)>0.) {
208 #pragma omp for private(irow) schedule(static)
209 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
210 out[irow] *= beta;
211 } else {
212 #pragma omp for private(irow) schedule(static)
213 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
214 out[irow] = 0;
215 }
216 /* do the operation: */
217 if (ABS(alpha)>0) {
218 if (A ->col_block_size==1 && A->row_block_size ==1) {
219 #pragma omp for private(irow,iptr,reg) schedule(static)
220 for (irow=0;irow< A->pattern->n_ptr;++irow) {
221 reg=0.;
222 for (iptr=(A->pattern->ptr[irow]);iptr<(A->pattern->ptr[irow+1]); ++iptr) {
223 reg += A->val[iptr] * in[A->pattern->index[iptr]];
224 }
225 out[irow] += alpha * reg;
226 }
227 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
228 #pragma omp for private(ir,reg1,reg2,iptr,ic,Aiptr,in1,in2,A00,A10,A01,A11) schedule(static)
229 for (ir=0;ir< A->pattern->n_ptr;ir++) {
230 reg1=0.;
231 reg2=0.;
232 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
233 ic=2*(A->pattern->index[iptr]);
234 Aiptr=iptr*4;
235 in1=in[ic];
236 in2=in[1+ic];
237 A00=A->val[Aiptr ];
238 A10=A->val[Aiptr+1];
239 A01=A->val[Aiptr+2];
240 A11=A->val[Aiptr+3];
241 reg1 += A00*in1 + A01*in2;
242 reg2 += A10*in1 + A11*in2;
243 }
244 out[ 2*ir] += alpha * reg1;
245 out[1+2*ir] += alpha * reg2;
246 }
247 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
248 #pragma omp for private(ir,reg1,reg2,reg3,iptr,ic,Aiptr,in1,in2,in3,A00,A10,A20,A01,A11,A21,A02,A12,A22) schedule(static)
249 for (ir=0;ir< A->pattern->n_ptr;ir++) {
250 reg1=0.;
251 reg2=0.;
252 reg3=0.;
253 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
254 ic=3*(A->pattern->index[iptr]);
255 Aiptr=iptr*9;
256 in1=in[ic];
257 in2=in[1+ic];
258 in3=in[2+ic];
259 A00=A->val[Aiptr ];
260 A10=A->val[Aiptr+1];
261 A20=A->val[Aiptr+2];
262 A01=A->val[Aiptr+3];
263 A11=A->val[Aiptr+4];
264 A21=A->val[Aiptr+5];
265 A02=A->val[Aiptr+6];
266 A12=A->val[Aiptr+7];
267 A22=A->val[Aiptr+8];
268 reg1 += A00*in1 + A01*in2 + A02*in3;
269 reg2 += A10*in1 + A11*in2 + A12*in3;
270 reg3 += A20*in1 + A21*in2 + A22*in3;
271 }
272 out[ 3*ir] += alpha * reg1;
273 out[1+3*ir] += alpha * reg2;
274 out[2+3*ir] += alpha * reg3;
275 }
276 } else {
277 #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
278 for (ir=0;ir< A->pattern->n_ptr;ir++) {
279 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
280 for (irb=0;irb< A->row_block_size;irb++) {
281 irow=irb+A->row_block_size*ir;
282 reg=0.;
283 for (icb=0;icb< A->col_block_size;icb++) {
284 icol=icb+A->col_block_size*(A->pattern->index[iptr]);
285 reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
286 }
287 out[irow] += alpha * reg;
288 }
289 }
290 }
291 }
292 }
293 return;
294 }
295
296 void Paso_SystemMatrix_MatrixVector_CSR_OFFSET1(double alpha,
297 Paso_SystemMatrix* A,
298 double* in,
299 double beta,
300 double* out) {
301
302 register index_t ir,icol,iptr,icb,irb,irow,ic;
303 register double reg,reg1,reg2,reg3;
304 #pragma omp barrier
305
306 if (ABS(beta)>0.) {
307 #pragma omp for private(irow) schedule(static)
308 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
309 out[irow] *= beta;
310 } else {
311 #pragma omp for private(irow) schedule(static)
312 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
313 out[irow] = 0;
314 }
315 /* do the operation: */
316 if (ABS(alpha)>0) {
317 if (A ->col_block_size==1 && A->row_block_size ==1) {
318 #pragma omp for private(irow,iptr,reg) schedule(static)
319 for (irow=0;irow< A->pattern->n_ptr;++irow) {
320 reg=0.;
321 for (iptr=(A->pattern->ptr[irow])-1;iptr<(A->pattern->ptr[irow+1])-1; ++iptr) {
322 reg += A->val[iptr] * in[A->pattern->index[iptr]-1];
323 }
324 out[irow] += alpha * reg;
325 }
326 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
327 #pragma omp for private(ir,reg1,reg2,iptr,ic) schedule(static)
328 for (ir=0;ir< A->pattern->n_ptr;ir++) {
329 reg1=0.;
330 reg2=0.;
331 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
332 ic=2*(A->pattern->index[iptr]-1);
333 reg1 += A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic];
334 reg2 += A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic];
335 }
336 out[ 2*ir] += alpha * reg1;
337 out[1+2*ir] += alpha * reg2;
338 }
339 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
340 #pragma omp for private(ir,reg1,reg2,reg3,iptr,ic) schedule(static)
341 for (ir=0;ir< A->pattern->n_ptr;ir++) {
342 reg1=0.;
343 reg2=0.;
344 reg3=0.;
345 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
346 ic=3*(A->pattern->index[iptr]-1);
347 reg1 += A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic];
348 reg2 += A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic];
349 reg3 += A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic];
350 }
351 out[ 3*ir] += alpha * reg1;
352 out[1+3*ir] += alpha * reg2;
353 out[2+3*ir] += alpha * reg3;
354 }
355 } else {
356 #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
357 for (ir=0;ir< A->pattern->n_ptr;ir++) {
358 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
359 for (irb=0;irb< A->row_block_size;irb++) {
360 irow=irb+A->row_block_size*ir;
361 reg=0.;
362 for (icb=0;icb< A->col_block_size;icb++) {
363 icol=icb+A->col_block_size*(A->pattern->index[iptr]-1);
364 reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
365 }
366 out[irow] += alpha * reg;
367 }
368 }
369 }
370 }
371 }
372 return;
373 }
374 /*
375 * $Log$
376 * Revision 1.2 2005/09/15 03:44:39 jgs
377 * Merge of development branch dev-02 back to main trunk on 2005-09-15
378 *
379 * Revision 1.1.2.1 2005/09/05 06:29:47 gross
380 * These files have been extracted from finley to define a stand alone libray for iterative
381 * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
382 * has not been tested yet.
383 *
384 *
385 */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26