/[escript]/temp_trunk_copy/paso/src/SystemMatrix_MatrixVector.c
ViewVC logotype

Annotation of /temp_trunk_copy/paso/src/SystemMatrix_MatrixVector.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 969 - (hide annotations)
Tue Feb 13 23:02:23 2007 UTC (13 years ago) by ksteube
Original Path: trunk/paso/src/SystemMatrix_MatrixVector.c
File MIME type: text/plain
File size: 13944 byte(s)
Parallelization using MPI for solution of implicit problems.

Parallelization for explicit problems has already been accomplished in
the main SVN branch.

This is incomplete and is not ready for use.


1 jgs 150 /* $Id$ */
2    
3 dhawcroft 631
4     /*
5     ********************************************************************************
6 dhawcroft 633 * Copyright 2006 by ACcESS MNRF *
7 dhawcroft 631 * *
8     * http://www.access.edu.au *
9     * Primary Business: Queensland, Australia *
10     * Licensed under the Open Software License version 3.0 *
11     * http://www.opensource.org/licenses/osl-3.0.php *
12     ********************************************************************************
13     */
14    
15 jgs 150 /**************************************************************/
16    
17     /* Paso: matrix vector product with sparse matrix */
18    
19     /**************************************************************/
20    
21     /* Copyrights by ACcESS Australia 2003,2004,2005 */
22     /* Author: gross@access.edu.au */
23    
24     /**************************************************************/
25    
26     #include "Paso.h"
27     #include "SystemMatrix.h"
28    
29     /**************************************************************************/
30    
31     /* raw scaled vector update operation: out = alpha * A * in + beta * out */
32    
33     /* has to be called within a parallel region */
34     /* barrier synconization is performed to make sure that the input vector available */
35    
36     void Paso_SystemMatrix_MatrixVector(double alpha,
37     Paso_SystemMatrix* A,
38     double* in,
39     double beta,
40     double* out) {
41    
42 gross 415 if (A->type & MATRIX_FORMAT_CSC) {
43     if (A->type & MATRIX_FORMAT_OFFSET1) {
44     Paso_SystemMatrix_MatrixVector_CSC_OFFSET1(alpha,A,in,beta,out);
45     } else {
46     Paso_SystemMatrix_MatrixVector_CSC_OFFSET0(alpha,A,in,beta,out);
47     }
48 ksteube 969 } else if (A->type & MATRIX_FORMAT_TRILINOS_CRS) {
49     fprintf(stderr, "Paso_SystemMatrix_MatrixVector: need to implement MATRIX_FORMAT_TRILINOS_CRS");
50     exit(1);
51 gross 415 } else {
52     if (A->type & MATRIX_FORMAT_OFFSET1) {
53     Paso_SystemMatrix_MatrixVector_CSR_OFFSET1(alpha,A,in,beta,out);
54     } else {
55     Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(alpha,A,in,beta,out);
56     }
57     }
58     return;
59     }
60    
61     void Paso_SystemMatrix_MatrixVector_CSC_OFFSET0(double alpha,
62     Paso_SystemMatrix* A,
63     double* in,
64     double beta,
65     double* out) {
66    
67 jgs 150 register index_t ir,icol,iptr,icb,irb,irow,ic;
68     register double reg,reg1,reg2,reg3;
69     #pragma omp barrier
70    
71     if (ABS(beta)>0.) {
72     #pragma omp for private(irow) schedule(static)
73     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
74     out[irow] *= beta;
75     } else {
76     #pragma omp for private(irow) schedule(static)
77     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
78     out[irow] = 0;
79     }
80    
81     /* do the operation: */
82     if (ABS(alpha)>0) {
83     if (A ->col_block_size==1 && A->row_block_size ==1) {
84 gross 415 /* TODO: parallelize (good luck!) */
85     #pragma omp single
86     for (icol=0;icol< A->pattern->n_ptr;++icol) {
87     for (iptr=A->pattern->ptr[icol];iptr<A->pattern->ptr[icol+1]; ++iptr) {
88     out[A->pattern->index[iptr]]+= alpha * A->val[iptr] * in[icol];
89 jgs 150 }
90     }
91 gross 415 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
92     /* TODO: parallelize */
93     #pragma omp single
94     for (ic=0;ic< A->pattern->n_ptr;ic++) {
95     for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
96     ic=2*(A->pattern->index[iptr]);
97     out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
98     out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
99     }
100     }
101     } else if (A ->col_block_size==3 && A->row_block_size ==3) {
102     /* TODO: parallelize */
103     #pragma omp single
104     for (ic=0;ic< A->pattern->n_ptr;ic++) {
105     for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
106     ir=3*(A->pattern->index[iptr]);
107     out[ 3*ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
108     out[1+3*ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
109     out[2+3*ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
110     }
111     }
112     } else {
113     /* TODO: parallelize */
114     #pragma omp single
115     for (ic=0;ic< A->pattern->n_ptr;ic++) {
116     for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
117     for (irb=0;irb< A->row_block_size;irb++) {
118     irow=irb+A->row_block_size*(A->pattern->index[iptr]);
119     for (icb=0;icb< A->col_block_size;icb++) {
120     icol=icb+A->col_block_size*ic;
121     out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
122     }
123     }
124     }
125     }
126     }
127     }
128     return;
129     }
130    
131     void Paso_SystemMatrix_MatrixVector_CSC_OFFSET1(double alpha,
132     Paso_SystemMatrix* A,
133     double* in,
134     double beta,
135     double* out) {
136    
137     register index_t ir,icol,iptr,icb,irb,irow,ic;
138     register double reg,reg1,reg2,reg3;
139     #pragma omp barrier
140    
141     if (ABS(beta)>0.) {
142     #pragma omp for private(irow) schedule(static)
143     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
144     out[irow] *= beta;
145     } else {
146     #pragma omp for private(irow) schedule(static)
147     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
148     out[irow] = 0;
149     }
150    
151     /* do the operation: */
152     if (ABS(alpha)>0) {
153     if (A ->col_block_size==1 && A->row_block_size ==1) {
154 jgs 150 /* TODO: parallelize (good luck!) */
155     #pragma omp single
156     for (icol=0;icol< A->pattern->n_ptr;++icol) {
157 gross 415 for (iptr=A->pattern->ptr[icol]-1;iptr<A->pattern->ptr[icol+1]-1; ++iptr) {
158     out[A->pattern->index[iptr]-1]+= alpha * A->val[iptr] * in[icol];
159 jgs 150 }
160     }
161     } else if (A ->col_block_size==2 && A->row_block_size ==2) {
162 gross 415 /* TODO: parallelize */
163     #pragma omp single
164     for (ic=0;ic< A->pattern->n_ptr;ic++) {
165     for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
166     ic=2*(A->pattern->index[iptr]-1);
167     out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
168     out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
169     }
170     }
171     } else if (A ->col_block_size==3 && A->row_block_size ==3) {
172     /* TODO: parallelize */
173     #pragma omp single
174     for (ic=0;ic< A->pattern->n_ptr;ic++) {
175     for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
176     ir=3*(A->pattern->index[iptr]-1);
177     out[ 3*ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
178     out[1+3*ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
179     out[2+3*ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
180     }
181     }
182     } else {
183     /* TODO: parallelize */
184     #pragma omp single
185     for (ic=0;ic< A->pattern->n_ptr;ic++) {
186     for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
187     for (irb=0;irb< A->row_block_size;irb++) {
188     irow=irb+A->row_block_size*(A->pattern->index[iptr]-1);
189     for (icb=0;icb< A->col_block_size;icb++) {
190     icol=icb+A->col_block_size*ic;
191     out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
192     }
193     }
194     }
195     }
196     }
197     }
198     return;
199     }
200    
201     void Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(double alpha,
202     Paso_SystemMatrix* A,
203     double* in,
204     double beta,
205     double* out) {
206    
207 gross 495 register index_t ir,icol,iptr,icb,irb,irow,ic,Aiptr;
208     register double reg,reg1,reg2,reg3,in1,in2,in3,A00,A10,A20,A01,A11,A21,A02,A12,A22;
209 gross 415 #pragma omp barrier
210     if (ABS(beta)>0.) {
211     #pragma omp for private(irow) schedule(static)
212     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
213     out[irow] *= beta;
214     } else {
215     #pragma omp for private(irow) schedule(static)
216     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
217     out[irow] = 0;
218     }
219     /* do the operation: */
220     if (ABS(alpha)>0) {
221     if (A ->col_block_size==1 && A->row_block_size ==1) {
222     #pragma omp for private(irow,iptr,reg) schedule(static)
223     for (irow=0;irow< A->pattern->n_ptr;++irow) {
224     reg=0.;
225     for (iptr=(A->pattern->ptr[irow]);iptr<(A->pattern->ptr[irow+1]); ++iptr) {
226     reg += A->val[iptr] * in[A->pattern->index[iptr]];
227     }
228     out[irow] += alpha * reg;
229     }
230     } else if (A ->col_block_size==2 && A->row_block_size ==2) {
231 gross 512 #pragma omp for private(ir,reg1,reg2,iptr,ic,Aiptr,in1,in2,A00,A10,A01,A11) schedule(static)
232 jgs 150 for (ir=0;ir< A->pattern->n_ptr;ir++) {
233     reg1=0.;
234     reg2=0.;
235 gross 415 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
236     ic=2*(A->pattern->index[iptr]);
237 gross 495 Aiptr=iptr*4;
238     in1=in[ic];
239     in2=in[1+ic];
240     A00=A->val[Aiptr ];
241     A10=A->val[Aiptr+1];
242     A01=A->val[Aiptr+2];
243     A11=A->val[Aiptr+3];
244     reg1 += A00*in1 + A01*in2;
245     reg2 += A10*in1 + A11*in2;
246 jgs 150 }
247     out[ 2*ir] += alpha * reg1;
248     out[1+2*ir] += alpha * reg2;
249     }
250     } else if (A ->col_block_size==3 && A->row_block_size ==3) {
251 gross 512 #pragma omp for private(ir,reg1,reg2,reg3,iptr,ic,Aiptr,in1,in2,in3,A00,A10,A20,A01,A11,A21,A02,A12,A22) schedule(static)
252 jgs 150 for (ir=0;ir< A->pattern->n_ptr;ir++) {
253     reg1=0.;
254     reg2=0.;
255     reg3=0.;
256 gross 415 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
257     ic=3*(A->pattern->index[iptr]);
258 gross 495 Aiptr=iptr*9;
259     in1=in[ic];
260     in2=in[1+ic];
261     in3=in[2+ic];
262     A00=A->val[Aiptr ];
263     A10=A->val[Aiptr+1];
264     A20=A->val[Aiptr+2];
265     A01=A->val[Aiptr+3];
266     A11=A->val[Aiptr+4];
267     A21=A->val[Aiptr+5];
268     A02=A->val[Aiptr+6];
269     A12=A->val[Aiptr+7];
270     A22=A->val[Aiptr+8];
271     reg1 += A00*in1 + A01*in2 + A02*in3;
272     reg2 += A10*in1 + A11*in2 + A12*in3;
273     reg3 += A20*in1 + A21*in2 + A22*in3;
274 jgs 150 }
275     out[ 3*ir] += alpha * reg1;
276     out[1+3*ir] += alpha * reg2;
277     out[2+3*ir] += alpha * reg3;
278     }
279     } else {
280     #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
281     for (ir=0;ir< A->pattern->n_ptr;ir++) {
282 gross 415 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
283 jgs 150 for (irb=0;irb< A->row_block_size;irb++) {
284     irow=irb+A->row_block_size*ir;
285     reg=0.;
286     for (icb=0;icb< A->col_block_size;icb++) {
287 gross 415 icol=icb+A->col_block_size*(A->pattern->index[iptr]);
288 jgs 150 reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
289     }
290     out[irow] += alpha * reg;
291     }
292     }
293     }
294 gross 415 }
295     }
296     return;
297     }
298    
299     void Paso_SystemMatrix_MatrixVector_CSR_OFFSET1(double alpha,
300     Paso_SystemMatrix* A,
301     double* in,
302     double beta,
303     double* out) {
304    
305     register index_t ir,icol,iptr,icb,irb,irow,ic;
306     register double reg,reg1,reg2,reg3;
307     #pragma omp barrier
308    
309     if (ABS(beta)>0.) {
310     #pragma omp for private(irow) schedule(static)
311     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
312     out[irow] *= beta;
313     } else {
314     #pragma omp for private(irow) schedule(static)
315     for (irow=0;irow < A->num_rows * A->row_block_size;irow++)
316     out[irow] = 0;
317     }
318     /* do the operation: */
319     if (ABS(alpha)>0) {
320     if (A ->col_block_size==1 && A->row_block_size ==1) {
321     #pragma omp for private(irow,iptr,reg) schedule(static)
322     for (irow=0;irow< A->pattern->n_ptr;++irow) {
323     reg=0.;
324     for (iptr=(A->pattern->ptr[irow])-1;iptr<(A->pattern->ptr[irow+1])-1; ++iptr) {
325     reg += A->val[iptr] * in[A->pattern->index[iptr]-1];
326     }
327     out[irow] += alpha * reg;
328     }
329     } else if (A ->col_block_size==2 && A->row_block_size ==2) {
330 gross 512 #pragma omp for private(ir,reg1,reg2,iptr,ic) schedule(static)
331 gross 415 for (ir=0;ir< A->pattern->n_ptr;ir++) {
332     reg1=0.;
333     reg2=0.;
334     for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
335     ic=2*(A->pattern->index[iptr]-1);
336     reg1 += A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic];
337     reg2 += A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic];
338     }
339     out[ 2*ir] += alpha * reg1;
340     out[1+2*ir] += alpha * reg2;
341     }
342     } else if (A ->col_block_size==3 && A->row_block_size ==3) {
343 gross 512 #pragma omp for private(ir,reg1,reg2,reg3,iptr,ic) schedule(static)
344 gross 415 for (ir=0;ir< A->pattern->n_ptr;ir++) {
345     reg1=0.;
346     reg2=0.;
347     reg3=0.;
348     for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
349     ic=3*(A->pattern->index[iptr]-1);
350     reg1 += A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic];
351     reg2 += A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic];
352     reg3 += A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic];
353     }
354     out[ 3*ir] += alpha * reg1;
355     out[1+3*ir] += alpha * reg2;
356     out[2+3*ir] += alpha * reg3;
357     }
358     } else {
359     #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
360     for (ir=0;ir< A->pattern->n_ptr;ir++) {
361     for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
362 jgs 150 for (irb=0;irb< A->row_block_size;irb++) {
363 gross 415 irow=irb+A->row_block_size*ir;
364     reg=0.;
365 jgs 150 for (icb=0;icb< A->col_block_size;icb++) {
366 gross 415 icol=icb+A->col_block_size*(A->pattern->index[iptr]-1);
367     reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
368 jgs 150 }
369 gross 415 out[irow] += alpha * reg;
370 jgs 150 }
371     }
372     }
373     }
374     }
375     return;
376     }
377     /*
378     * $Log$
379     * Revision 1.2 2005/09/15 03:44:39 jgs
380     * Merge of development branch dev-02 back to main trunk on 2005-09-15
381     *
382     * Revision 1.1.2.1 2005/09/05 06:29:47 gross
383     * These files have been extracted from finley to define a stand alone libray for iterative
384     * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
385     * has not been tested yet.
386     *
387     *
388     */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26