/[escript]/trunk/paso/src/SystemMatrix_MatrixVector.c
ViewVC logotype

Annotation of /trunk/paso/src/SystemMatrix_MatrixVector.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 971 - (hide annotations)
Wed Feb 14 04:40:49 2007 UTC (12 years, 9 months ago) by ksteube
File MIME type: text/plain
File size: 13777 byte(s)
Had to undo commit to new MPI branch. The changes went into the original and
not the branch. The files committed here are exactly the same as revision 969.


1 jgs 150 /* $Id$ */
2    
3 dhawcroft 631
4     /*
5     ********************************************************************************
6 dhawcroft 633 * Copyright 2006 by ACcESS MNRF *
7 dhawcroft 631 * *
8     * http://www.access.edu.au *
9     * Primary Business: Queensland, Australia *
10     * Licensed under the Open Software License version 3.0 *
11     * http://www.opensource.org/licenses/osl-3.0.php *
12     ********************************************************************************
13     */
14    
15 jgs 150 /**************************************************************/
16    
17     /* Paso: matrix vector product with sparse matrix */
18    
19     /**************************************************************/
20    
21     /* Copyrights by ACcESS Australia 2003,2004,2005 */
22     /* Author: gross@access.edu.au */
23    
24     /**************************************************************/
25    
26     #include "Paso.h"
27     #include "SystemMatrix.h"
28    
29     /**************************************************************************/
30    
31     /* raw scaled vector update operation: out = alpha * A * in + beta * out */
32    
33     /* has to be called within a parallel region */
34     /* barrier synconization is performed to make sure that the input vector available */
35    
36     void Paso_SystemMatrix_MatrixVector(double alpha,
37     Paso_SystemMatrix* A,
38     double* in,
39     double beta,
40     double* out) {
41    
42 gross 415 if (A->type & MATRIX_FORMAT_CSC) {
43     if (A->type & MATRIX_FORMAT_OFFSET1) {
44     Paso_SystemMatrix_MatrixVector_CSC_OFFSET1(alpha,A,in,beta,out);
45     } else {
46     Paso_SystemMatrix_MatrixVector_CSC_OFFSET0(alpha,A,in,beta,out);
47     }
48     } else {
49     if (A->type & MATRIX_FORMAT_OFFSET1) {
50     Paso_SystemMatrix_MatrixVector_CSR_OFFSET1(alpha,A,in,beta,out);
51     } else {
52     Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(alpha,A,in,beta,out);
53     }
54     }
55     return;
56     }
57    
58     void Paso_SystemMatrix_MatrixVector_CSC_OFFSET0(double alpha,
59     Paso_SystemMatrix* A,
60     double* in,
61     double beta,
62     double* out) {
63    
64 jgs 150 register index_t ir,icol,iptr,icb,irb,irow,ic;
65     register double reg,reg1,reg2,reg3;
66     #pragma omp barrier
67    
68     if (ABS(beta)>0.) {
69     #pragma omp for private(irow) schedule(static)
70     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
71     out[irow] *= beta;
72     } else {
73     #pragma omp for private(irow) schedule(static)
74     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
75     out[irow] = 0;
76     }
77    
78     /* do the operation: */
79     if (ABS(alpha)>0) {
80     if (A ->col_block_size==1 && A->row_block_size ==1) {
81 gross 415 /* TODO: parallelize (good luck!) */
82     #pragma omp single
83     for (icol=0;icol< A->pattern->n_ptr;++icol) {
84     for (iptr=A->pattern->ptr[icol];iptr<A->pattern->ptr[icol+1]; ++iptr) {
85     out[A->pattern->index[iptr]]+= alpha * A->val[iptr] * in[icol];
86 jgs 150 }
87     }
88 gross 415 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
89     /* TODO: parallelize */
90     #pragma omp single
91     for (ic=0;ic< A->pattern->n_ptr;ic++) {
92     for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
93     ic=2*(A->pattern->index[iptr]);
94     out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
95     out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
96     }
97     }
98     } else if (A ->col_block_size==3 && A->row_block_size ==3) {
99     /* TODO: parallelize */
100     #pragma omp single
101     for (ic=0;ic< A->pattern->n_ptr;ic++) {
102     for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
103     ir=3*(A->pattern->index[iptr]);
104     out[ 3*ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
105     out[1+3*ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
106     out[2+3*ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
107     }
108     }
109     } else {
110     /* TODO: parallelize */
111     #pragma omp single
112     for (ic=0;ic< A->pattern->n_ptr;ic++) {
113     for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
114     for (irb=0;irb< A->row_block_size;irb++) {
115     irow=irb+A->row_block_size*(A->pattern->index[iptr]);
116     for (icb=0;icb< A->col_block_size;icb++) {
117     icol=icb+A->col_block_size*ic;
118     out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
119     }
120     }
121     }
122     }
123     }
124     }
125     return;
126     }
127    
128     void Paso_SystemMatrix_MatrixVector_CSC_OFFSET1(double alpha,
129     Paso_SystemMatrix* A,
130     double* in,
131     double beta,
132     double* out) {
133    
134     register index_t ir,icol,iptr,icb,irb,irow,ic;
135     register double reg,reg1,reg2,reg3;
136     #pragma omp barrier
137    
138     if (ABS(beta)>0.) {
139     #pragma omp for private(irow) schedule(static)
140     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
141     out[irow] *= beta;
142     } else {
143     #pragma omp for private(irow) schedule(static)
144     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
145     out[irow] = 0;
146     }
147    
148     /* do the operation: */
149     if (ABS(alpha)>0) {
150     if (A ->col_block_size==1 && A->row_block_size ==1) {
151 jgs 150 /* TODO: parallelize (good luck!) */
152     #pragma omp single
153     for (icol=0;icol< A->pattern->n_ptr;++icol) {
154 gross 415 for (iptr=A->pattern->ptr[icol]-1;iptr<A->pattern->ptr[icol+1]-1; ++iptr) {
155     out[A->pattern->index[iptr]-1]+= alpha * A->val[iptr] * in[icol];
156 jgs 150 }
157     }
158     } else if (A ->col_block_size==2 && A->row_block_size ==2) {
159 gross 415 /* TODO: parallelize */
160     #pragma omp single
161     for (ic=0;ic< A->pattern->n_ptr;ic++) {
162     for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
163     ic=2*(A->pattern->index[iptr]-1);
164     out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
165     out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
166     }
167     }
168     } else if (A ->col_block_size==3 && A->row_block_size ==3) {
169     /* TODO: parallelize */
170     #pragma omp single
171     for (ic=0;ic< A->pattern->n_ptr;ic++) {
172     for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
173     ir=3*(A->pattern->index[iptr]-1);
174     out[ 3*ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
175     out[1+3*ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
176     out[2+3*ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
177     }
178     }
179     } else {
180     /* TODO: parallelize */
181     #pragma omp single
182     for (ic=0;ic< A->pattern->n_ptr;ic++) {
183     for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
184     for (irb=0;irb< A->row_block_size;irb++) {
185     irow=irb+A->row_block_size*(A->pattern->index[iptr]-1);
186     for (icb=0;icb< A->col_block_size;icb++) {
187     icol=icb+A->col_block_size*ic;
188     out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
189     }
190     }
191     }
192     }
193     }
194     }
195     return;
196     }
197    
198     void Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(double alpha,
199     Paso_SystemMatrix* A,
200     double* in,
201     double beta,
202     double* out) {
203    
204 gross 495 register index_t ir,icol,iptr,icb,irb,irow,ic,Aiptr;
205     register double reg,reg1,reg2,reg3,in1,in2,in3,A00,A10,A20,A01,A11,A21,A02,A12,A22;
206 gross 415 #pragma omp barrier
207     if (ABS(beta)>0.) {
208     #pragma omp for private(irow) schedule(static)
209     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
210     out[irow] *= beta;
211     } else {
212     #pragma omp for private(irow) schedule(static)
213     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
214     out[irow] = 0;
215     }
216     /* do the operation: */
217     if (ABS(alpha)>0) {
218     if (A ->col_block_size==1 && A->row_block_size ==1) {
219     #pragma omp for private(irow,iptr,reg) schedule(static)
220     for (irow=0;irow< A->pattern->n_ptr;++irow) {
221     reg=0.;
222     for (iptr=(A->pattern->ptr[irow]);iptr<(A->pattern->ptr[irow+1]); ++iptr) {
223     reg += A->val[iptr] * in[A->pattern->index[iptr]];
224     }
225     out[irow] += alpha * reg;
226     }
227     } else if (A ->col_block_size==2 && A->row_block_size ==2) {
228 gross 512 #pragma omp for private(ir,reg1,reg2,iptr,ic,Aiptr,in1,in2,A00,A10,A01,A11) schedule(static)
229 jgs 150 for (ir=0;ir< A->pattern->n_ptr;ir++) {
230     reg1=0.;
231     reg2=0.;
232 gross 415 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
233     ic=2*(A->pattern->index[iptr]);
234 gross 495 Aiptr=iptr*4;
235     in1=in[ic];
236     in2=in[1+ic];
237     A00=A->val[Aiptr ];
238     A10=A->val[Aiptr+1];
239     A01=A->val[Aiptr+2];
240     A11=A->val[Aiptr+3];
241     reg1 += A00*in1 + A01*in2;
242     reg2 += A10*in1 + A11*in2;
243 jgs 150 }
244     out[ 2*ir] += alpha * reg1;
245     out[1+2*ir] += alpha * reg2;
246     }
247     } else if (A ->col_block_size==3 && A->row_block_size ==3) {
248 gross 512 #pragma omp for private(ir,reg1,reg2,reg3,iptr,ic,Aiptr,in1,in2,in3,A00,A10,A20,A01,A11,A21,A02,A12,A22) schedule(static)
249 jgs 150 for (ir=0;ir< A->pattern->n_ptr;ir++) {
250     reg1=0.;
251     reg2=0.;
252     reg3=0.;
253 gross 415 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
254     ic=3*(A->pattern->index[iptr]);
255 gross 495 Aiptr=iptr*9;
256     in1=in[ic];
257     in2=in[1+ic];
258     in3=in[2+ic];
259     A00=A->val[Aiptr ];
260     A10=A->val[Aiptr+1];
261     A20=A->val[Aiptr+2];
262     A01=A->val[Aiptr+3];
263     A11=A->val[Aiptr+4];
264     A21=A->val[Aiptr+5];
265     A02=A->val[Aiptr+6];
266     A12=A->val[Aiptr+7];
267     A22=A->val[Aiptr+8];
268     reg1 += A00*in1 + A01*in2 + A02*in3;
269     reg2 += A10*in1 + A11*in2 + A12*in3;
270     reg3 += A20*in1 + A21*in2 + A22*in3;
271 jgs 150 }
272     out[ 3*ir] += alpha * reg1;
273     out[1+3*ir] += alpha * reg2;
274     out[2+3*ir] += alpha * reg3;
275     }
276     } else {
277     #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
278     for (ir=0;ir< A->pattern->n_ptr;ir++) {
279 gross 415 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
280 jgs 150 for (irb=0;irb< A->row_block_size;irb++) {
281     irow=irb+A->row_block_size*ir;
282     reg=0.;
283     for (icb=0;icb< A->col_block_size;icb++) {
284 gross 415 icol=icb+A->col_block_size*(A->pattern->index[iptr]);
285 jgs 150 reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
286     }
287     out[irow] += alpha * reg;
288     }
289     }
290     }
291 gross 415 }
292     }
293     return;
294     }
295    
296     void Paso_SystemMatrix_MatrixVector_CSR_OFFSET1(double alpha,
297     Paso_SystemMatrix* A,
298     double* in,
299     double beta,
300     double* out) {
301    
302     register index_t ir,icol,iptr,icb,irb,irow,ic;
303     register double reg,reg1,reg2,reg3;
304     #pragma omp barrier
305    
306     if (ABS(beta)>0.) {
307     #pragma omp for private(irow) schedule(static)
308     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
309     out[irow] *= beta;
310     } else {
311     #pragma omp for private(irow) schedule(static)
312     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
313     out[irow] = 0;
314     }
315     /* do the operation: */
316     if (ABS(alpha)>0) {
317     if (A ->col_block_size==1 && A->row_block_size ==1) {
318     #pragma omp for private(irow,iptr,reg) schedule(static)
319     for (irow=0;irow< A->pattern->n_ptr;++irow) {
320     reg=0.;
321     for (iptr=(A->pattern->ptr[irow])-1;iptr<(A->pattern->ptr[irow+1])-1; ++iptr) {
322     reg += A->val[iptr] * in[A->pattern->index[iptr]-1];
323     }
324     out[irow] += alpha * reg;
325     }
326     } else if (A ->col_block_size==2 && A->row_block_size ==2) {
327 gross 512 #pragma omp for private(ir,reg1,reg2,iptr,ic) schedule(static)
328 gross 415 for (ir=0;ir< A->pattern->n_ptr;ir++) {
329     reg1=0.;
330     reg2=0.;
331     for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
332     ic=2*(A->pattern->index[iptr]-1);
333     reg1 += A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic];
334     reg2 += A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic];
335     }
336     out[ 2*ir] += alpha * reg1;
337     out[1+2*ir] += alpha * reg2;
338     }
339     } else if (A ->col_block_size==3 && A->row_block_size ==3) {
340 gross 512 #pragma omp for private(ir,reg1,reg2,reg3,iptr,ic) schedule(static)
341 gross 415 for (ir=0;ir< A->pattern->n_ptr;ir++) {
342     reg1=0.;
343     reg2=0.;
344     reg3=0.;
345     for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
346     ic=3*(A->pattern->index[iptr]-1);
347     reg1 += A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic];
348     reg2 += A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic];
349     reg3 += A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic];
350     }
351     out[ 3*ir] += alpha * reg1;
352     out[1+3*ir] += alpha * reg2;
353     out[2+3*ir] += alpha * reg3;
354     }
355     } else {
356     #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
357     for (ir=0;ir< A->pattern->n_ptr;ir++) {
358     for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
359 jgs 150 for (irb=0;irb< A->row_block_size;irb++) {
360 gross 415 irow=irb+A->row_block_size*ir;
361     reg=0.;
362 jgs 150 for (icb=0;icb< A->col_block_size;icb++) {
363 gross 415 icol=icb+A->col_block_size*(A->pattern->index[iptr]-1);
364     reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
365 jgs 150 }
366 gross 415 out[irow] += alpha * reg;
367 jgs 150 }
368     }
369     }
370     }
371     }
372     return;
373     }
374     /*
375     * $Log$
376     * Revision 1.2 2005/09/15 03:44:39 jgs
377     * Merge of development branch dev-02 back to main trunk on 2005-09-15
378     *
379     * Revision 1.1.2.1 2005/09/05 06:29:47 gross
380     * These files have been extracted from finley to define a stand alone libray for iterative
381     * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
382     * has not been tested yet.
383     *
384     *
385     */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26