/[escript]/trunk/paso/src/SystemMatrix_MatrixVector.c
ViewVC logotype

Contents of /trunk/paso/src/SystemMatrix_MatrixVector.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 969 - (show annotations)
Tue Feb 13 23:02:23 2007 UTC (13 years ago) by ksteube
File MIME type: text/plain
File size: 13944 byte(s)
Parallelization using MPI for solution of implicit problems.

Parallelization for explicit problems has already been accomplished in
the main SVN branch.

This is incomplete and is not ready for use.


1 /* $Id$ */
2
3
4 /*
5 ********************************************************************************
6 * Copyright 2006 by ACcESS MNRF *
7 * *
8 * http://www.access.edu.au *
9 * Primary Business: Queensland, Australia *
10 * Licensed under the Open Software License version 3.0 *
11 * http://www.opensource.org/licenses/osl-3.0.php *
12 ********************************************************************************
13 */
14
15 /**************************************************************/
16
17 /* Paso: matrix vector product with sparse matrix */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia 2003,2004,2005 */
22 /* Author: gross@access.edu.au */
23
24 /**************************************************************/
25
26 #include "Paso.h"
27 #include "SystemMatrix.h"
28
29 /**************************************************************************/
30
31 /* raw scaled vector update operation: out = alpha * A * in + beta * out */
32
33 /* has to be called within a parallel region */
34 /* barrier synconization is performed to make sure that the input vector available */
35
36 void Paso_SystemMatrix_MatrixVector(double alpha,
37 Paso_SystemMatrix* A,
38 double* in,
39 double beta,
40 double* out) {
41
42 if (A->type & MATRIX_FORMAT_CSC) {
43 if (A->type & MATRIX_FORMAT_OFFSET1) {
44 Paso_SystemMatrix_MatrixVector_CSC_OFFSET1(alpha,A,in,beta,out);
45 } else {
46 Paso_SystemMatrix_MatrixVector_CSC_OFFSET0(alpha,A,in,beta,out);
47 }
48 } else if (A->type & MATRIX_FORMAT_TRILINOS_CRS) {
49 fprintf(stderr, "Paso_SystemMatrix_MatrixVector: need to implement MATRIX_FORMAT_TRILINOS_CRS");
50 exit(1);
51 } else {
52 if (A->type & MATRIX_FORMAT_OFFSET1) {
53 Paso_SystemMatrix_MatrixVector_CSR_OFFSET1(alpha,A,in,beta,out);
54 } else {
55 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(alpha,A,in,beta,out);
56 }
57 }
58 return;
59 }
60
61 void Paso_SystemMatrix_MatrixVector_CSC_OFFSET0(double alpha,
62 Paso_SystemMatrix* A,
63 double* in,
64 double beta,
65 double* out) {
66
67 register index_t ir,icol,iptr,icb,irb,irow,ic;
68 register double reg,reg1,reg2,reg3;
69 #pragma omp barrier
70
71 if (ABS(beta)>0.) {
72 #pragma omp for private(irow) schedule(static)
73 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
74 out[irow] *= beta;
75 } else {
76 #pragma omp for private(irow) schedule(static)
77 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
78 out[irow] = 0;
79 }
80
81 /* do the operation: */
82 if (ABS(alpha)>0) {
83 if (A ->col_block_size==1 && A->row_block_size ==1) {
84 /* TODO: parallelize (good luck!) */
85 #pragma omp single
86 for (icol=0;icol< A->pattern->n_ptr;++icol) {
87 for (iptr=A->pattern->ptr[icol];iptr<A->pattern->ptr[icol+1]; ++iptr) {
88 out[A->pattern->index[iptr]]+= alpha * A->val[iptr] * in[icol];
89 }
90 }
91 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
92 /* TODO: parallelize */
93 #pragma omp single
94 for (ic=0;ic< A->pattern->n_ptr;ic++) {
95 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
96 ic=2*(A->pattern->index[iptr]);
97 out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
98 out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
99 }
100 }
101 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
102 /* TODO: parallelize */
103 #pragma omp single
104 for (ic=0;ic< A->pattern->n_ptr;ic++) {
105 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
106 ir=3*(A->pattern->index[iptr]);
107 out[ 3*ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
108 out[1+3*ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
109 out[2+3*ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
110 }
111 }
112 } else {
113 /* TODO: parallelize */
114 #pragma omp single
115 for (ic=0;ic< A->pattern->n_ptr;ic++) {
116 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
117 for (irb=0;irb< A->row_block_size;irb++) {
118 irow=irb+A->row_block_size*(A->pattern->index[iptr]);
119 for (icb=0;icb< A->col_block_size;icb++) {
120 icol=icb+A->col_block_size*ic;
121 out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
122 }
123 }
124 }
125 }
126 }
127 }
128 return;
129 }
130
131 void Paso_SystemMatrix_MatrixVector_CSC_OFFSET1(double alpha,
132 Paso_SystemMatrix* A,
133 double* in,
134 double beta,
135 double* out) {
136
137 register index_t ir,icol,iptr,icb,irb,irow,ic;
138 register double reg,reg1,reg2,reg3;
139 #pragma omp barrier
140
141 if (ABS(beta)>0.) {
142 #pragma omp for private(irow) schedule(static)
143 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
144 out[irow] *= beta;
145 } else {
146 #pragma omp for private(irow) schedule(static)
147 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
148 out[irow] = 0;
149 }
150
151 /* do the operation: */
152 if (ABS(alpha)>0) {
153 if (A ->col_block_size==1 && A->row_block_size ==1) {
154 /* TODO: parallelize (good luck!) */
155 #pragma omp single
156 for (icol=0;icol< A->pattern->n_ptr;++icol) {
157 for (iptr=A->pattern->ptr[icol]-1;iptr<A->pattern->ptr[icol+1]-1; ++iptr) {
158 out[A->pattern->index[iptr]-1]+= alpha * A->val[iptr] * in[icol];
159 }
160 }
161 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
162 /* TODO: parallelize */
163 #pragma omp single
164 for (ic=0;ic< A->pattern->n_ptr;ic++) {
165 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
166 ic=2*(A->pattern->index[iptr]-1);
167 out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
168 out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
169 }
170 }
171 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
172 /* TODO: parallelize */
173 #pragma omp single
174 for (ic=0;ic< A->pattern->n_ptr;ic++) {
175 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
176 ir=3*(A->pattern->index[iptr]-1);
177 out[ 3*ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
178 out[1+3*ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
179 out[2+3*ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
180 }
181 }
182 } else {
183 /* TODO: parallelize */
184 #pragma omp single
185 for (ic=0;ic< A->pattern->n_ptr;ic++) {
186 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
187 for (irb=0;irb< A->row_block_size;irb++) {
188 irow=irb+A->row_block_size*(A->pattern->index[iptr]-1);
189 for (icb=0;icb< A->col_block_size;icb++) {
190 icol=icb+A->col_block_size*ic;
191 out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
192 }
193 }
194 }
195 }
196 }
197 }
198 return;
199 }
200
201 void Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(double alpha,
202 Paso_SystemMatrix* A,
203 double* in,
204 double beta,
205 double* out) {
206
207 register index_t ir,icol,iptr,icb,irb,irow,ic,Aiptr;
208 register double reg,reg1,reg2,reg3,in1,in2,in3,A00,A10,A20,A01,A11,A21,A02,A12,A22;
209 #pragma omp barrier
210 if (ABS(beta)>0.) {
211 #pragma omp for private(irow) schedule(static)
212 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
213 out[irow] *= beta;
214 } else {
215 #pragma omp for private(irow) schedule(static)
216 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
217 out[irow] = 0;
218 }
219 /* do the operation: */
220 if (ABS(alpha)>0) {
221 if (A ->col_block_size==1 && A->row_block_size ==1) {
222 #pragma omp for private(irow,iptr,reg) schedule(static)
223 for (irow=0;irow< A->pattern->n_ptr;++irow) {
224 reg=0.;
225 for (iptr=(A->pattern->ptr[irow]);iptr<(A->pattern->ptr[irow+1]); ++iptr) {
226 reg += A->val[iptr] * in[A->pattern->index[iptr]];
227 }
228 out[irow] += alpha * reg;
229 }
230 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
231 #pragma omp for private(ir,reg1,reg2,iptr,ic,Aiptr,in1,in2,A00,A10,A01,A11) schedule(static)
232 for (ir=0;ir< A->pattern->n_ptr;ir++) {
233 reg1=0.;
234 reg2=0.;
235 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
236 ic=2*(A->pattern->index[iptr]);
237 Aiptr=iptr*4;
238 in1=in[ic];
239 in2=in[1+ic];
240 A00=A->val[Aiptr ];
241 A10=A->val[Aiptr+1];
242 A01=A->val[Aiptr+2];
243 A11=A->val[Aiptr+3];
244 reg1 += A00*in1 + A01*in2;
245 reg2 += A10*in1 + A11*in2;
246 }
247 out[ 2*ir] += alpha * reg1;
248 out[1+2*ir] += alpha * reg2;
249 }
250 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
251 #pragma omp for private(ir,reg1,reg2,reg3,iptr,ic,Aiptr,in1,in2,in3,A00,A10,A20,A01,A11,A21,A02,A12,A22) schedule(static)
252 for (ir=0;ir< A->pattern->n_ptr;ir++) {
253 reg1=0.;
254 reg2=0.;
255 reg3=0.;
256 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
257 ic=3*(A->pattern->index[iptr]);
258 Aiptr=iptr*9;
259 in1=in[ic];
260 in2=in[1+ic];
261 in3=in[2+ic];
262 A00=A->val[Aiptr ];
263 A10=A->val[Aiptr+1];
264 A20=A->val[Aiptr+2];
265 A01=A->val[Aiptr+3];
266 A11=A->val[Aiptr+4];
267 A21=A->val[Aiptr+5];
268 A02=A->val[Aiptr+6];
269 A12=A->val[Aiptr+7];
270 A22=A->val[Aiptr+8];
271 reg1 += A00*in1 + A01*in2 + A02*in3;
272 reg2 += A10*in1 + A11*in2 + A12*in3;
273 reg3 += A20*in1 + A21*in2 + A22*in3;
274 }
275 out[ 3*ir] += alpha * reg1;
276 out[1+3*ir] += alpha * reg2;
277 out[2+3*ir] += alpha * reg3;
278 }
279 } else {
280 #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
281 for (ir=0;ir< A->pattern->n_ptr;ir++) {
282 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
283 for (irb=0;irb< A->row_block_size;irb++) {
284 irow=irb+A->row_block_size*ir;
285 reg=0.;
286 for (icb=0;icb< A->col_block_size;icb++) {
287 icol=icb+A->col_block_size*(A->pattern->index[iptr]);
288 reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
289 }
290 out[irow] += alpha * reg;
291 }
292 }
293 }
294 }
295 }
296 return;
297 }
298
299 void Paso_SystemMatrix_MatrixVector_CSR_OFFSET1(double alpha,
300 Paso_SystemMatrix* A,
301 double* in,
302 double beta,
303 double* out) {
304
305 register index_t ir,icol,iptr,icb,irb,irow,ic;
306 register double reg,reg1,reg2,reg3;
307 #pragma omp barrier
308
309 if (ABS(beta)>0.) {
310 #pragma omp for private(irow) schedule(static)
311 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
312 out[irow] *= beta;
313 } else {
314 #pragma omp for private(irow) schedule(static)
315 for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
316 out[irow] = 0;
317 }
318 /* do the operation: */
319 if (ABS(alpha)>0) {
320 if (A ->col_block_size==1 && A->row_block_size ==1) {
321 #pragma omp for private(irow,iptr,reg) schedule(static)
322 for (irow=0;irow< A->pattern->n_ptr;++irow) {
323 reg=0.;
324 for (iptr=(A->pattern->ptr[irow])-1;iptr<(A->pattern->ptr[irow+1])-1; ++iptr) {
325 reg += A->val[iptr] * in[A->pattern->index[iptr]-1];
326 }
327 out[irow] += alpha * reg;
328 }
329 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
330 #pragma omp for private(ir,reg1,reg2,iptr,ic) schedule(static)
331 for (ir=0;ir< A->pattern->n_ptr;ir++) {
332 reg1=0.;
333 reg2=0.;
334 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
335 ic=2*(A->pattern->index[iptr]-1);
336 reg1 += A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic];
337 reg2 += A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic];
338 }
339 out[ 2*ir] += alpha * reg1;
340 out[1+2*ir] += alpha * reg2;
341 }
342 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
343 #pragma omp for private(ir,reg1,reg2,reg3,iptr,ic) schedule(static)
344 for (ir=0;ir< A->pattern->n_ptr;ir++) {
345 reg1=0.;
346 reg2=0.;
347 reg3=0.;
348 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
349 ic=3*(A->pattern->index[iptr]-1);
350 reg1 += A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic];
351 reg2 += A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic];
352 reg3 += A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic];
353 }
354 out[ 3*ir] += alpha * reg1;
355 out[1+3*ir] += alpha * reg2;
356 out[2+3*ir] += alpha * reg3;
357 }
358 } else {
359 #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
360 for (ir=0;ir< A->pattern->n_ptr;ir++) {
361 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
362 for (irb=0;irb< A->row_block_size;irb++) {
363 irow=irb+A->row_block_size*ir;
364 reg=0.;
365 for (icb=0;icb< A->col_block_size;icb++) {
366 icol=icb+A->col_block_size*(A->pattern->index[iptr]-1);
367 reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
368 }
369 out[irow] += alpha * reg;
370 }
371 }
372 }
373 }
374 }
375 return;
376 }
377 /*
378 * $Log$
379 * Revision 1.2 2005/09/15 03:44:39 jgs
380 * Merge of development branch dev-02 back to main trunk on 2005-09-15
381 *
382 * Revision 1.1.2.1 2005/09/05 06:29:47 gross
383 * These files have been extracted from finley to define a stand alone libray for iterative
384 * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
385 * has not been tested yet.
386 *
387 *
388 */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26