/[escript]/trunk-mpi-branch/paso/src/SparseMatrix_MatrixVector.c
ViewVC logotype

Contents of /trunk-mpi-branch/paso/src/SparseMatrix_MatrixVector.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1259 - (show annotations)
Mon Aug 20 06:30:26 2007 UTC (11 years, 8 months ago) by gross
File MIME type: text/plain
File size: 13837 byte(s)
the solver seems to work now but simplesolve has still some problems
1 /* $Id$ */
2 /*
3 ********************************************************************************
4 * Copyright 2006, 2007 by ACcESS MNRF *
5 * *
6 * http://www.access.edu.au *
7 * Primary Business: Queensland, Australia *
8 * Licensed under the Open Software License version 3.0 *
9 * http://www.opensource.org/licenses/osl-3.0.php *
10 ********************************************************************************
11 */
12
13 /**************************************************************/
14
15 /* Paso: raw scaled vector update operation: out = alpha * A * in + beta * out */
16
17 /**************************************************************/
18
19 /* Author: gross@access.edu.au */
20
21 /**************************************************************/
22
23 #include "SparseMatrix.h"
24
25 /* CSC format with offset 0*/
26 void Paso_SparseMatrix_MatrixVector_CSC_OFFSET0(double alpha,
27 Paso_SparseMatrix* A,
28 double* in,
29 double beta,
30 double* out) {
31
32 register index_t ir,icol,iptr,icb,irb,irow,ic;
33 register double reg,reg1,reg2,reg3;
34 #pragma omp barrier
35
36 if (ABS(beta)>0.) {
37 if (beta != 1.) {
38 #pragma omp for private(irow) schedule(static)
39 for (irow=0;irow < A->numRows * A->row_block_size;irow++)
40 out[irow] *= beta;
41 }
42 } else {
43 #pragma omp for private(irow) schedule(static)
44 for (irow=0;irow < A->numRows * A->row_block_size;irow++)
45 out[irow] = 0;
46 }
47 if (Paso_Pattern_isEmpty(A->pattern)) return;
48 /* do the operation: */
49 if (ABS(alpha)>0) {
50 if (A ->col_block_size==1 && A->row_block_size ==1) {
51 /* TODO: parallelize (good luck!) */
52 #pragma omp single
53 for (icol=0;icol< A->pattern->numOutput;++icol) {
54 #pragma ivdep
55 for (iptr=A->pattern->ptr[icol];iptr<A->pattern->ptr[icol+1]; ++iptr) {
56 out[A->pattern->index[iptr]]+= alpha * A->val[iptr] * in[icol];
57 }
58 }
59 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
60 /* TODO: parallelize */
61 #pragma omp single
62 for (ic=0;ic< A->pattern->numOutput;ic++) {
63 #pragma ivdep
64 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
65 ic=2*(A->pattern->index[iptr]);
66 out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
67 out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
68 }
69 }
70 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
71 /* TODO: parallelize */
72 #pragma omp single
73 for (ic=0;ic< A->pattern->numOutput;ic++) {
74 #pragma ivdep
75 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
76 ir=3*(A->pattern->index[iptr]);
77 out[ 3*ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
78 out[1+3*ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
79 out[2+3*ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
80 }
81 }
82 } else {
83 /* TODO: parallelize */
84 #pragma omp single
85 for (ic=0;ic< A->pattern->numOutput;ic++) {
86 for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
87 for (irb=0;irb< A->row_block_size;irb++) {
88 irow=irb+A->row_block_size*(A->pattern->index[iptr]);
89 #pragma ivdep
90 for (icb=0;icb< A->col_block_size;icb++) {
91 icol=icb+A->col_block_size*ic;
92 out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
93 }
94 }
95 }
96 }
97 }
98 }
99 return;
100 }
101
102 /* CSC format with offset 1*/
103 void Paso_SparseMatrix_MatrixVector_CSC_OFFSET1(double alpha,
104 Paso_SparseMatrix* A,
105 double* in,
106 double beta,
107 double* out) {
108
109 register index_t ir,icol,iptr,icb,irb,irow,ic;
110 register double reg,reg1,reg2,reg3;
111 #pragma omp barrier
112
113 if (ABS(beta)>0.) {
114 if (beta != 1.) {
115 #pragma omp for private(irow) schedule(static)
116 for (irow=0;irow < A->numRows * A->row_block_size;irow++) {
117 out[irow] *= beta;
118 }
119 }
120 } else {
121 #pragma omp for private(irow) schedule(static)
122 for (irow=0;irow < A->numRows * A->row_block_size;irow++)
123 out[irow] = 0;
124 }
125
126 /* do the operation: */
127 if (ABS(alpha)>0) {
128 if (A ->col_block_size==1 && A->row_block_size ==1) {
129 /* TODO: parallelize (good luck!) */
130 #pragma omp single
131 for (icol=0;icol< A->pattern->numOutput;++icol) {
132 #pragma ivdep
133 for (iptr=A->pattern->ptr[icol]-1;iptr<A->pattern->ptr[icol+1]-1; ++iptr) {
134 out[A->pattern->index[iptr]-1]+= alpha * A->val[iptr] * in[icol];
135 }
136 }
137 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
138 /* TODO: parallelize */
139 #pragma omp single
140 for (ic=0;ic< A->pattern->numOutput;ic++) {
141 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
142 ic=2*(A->pattern->index[iptr]-1);
143 out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
144 out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] );
145 }
146 }
147 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
148 /* TODO: parallelize */
149 #pragma omp single
150 for (ic=0;ic< A->pattern->numOutput;ic++) {
151 #pragma ivdep
152 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
153 ir=3*(A->pattern->index[iptr]-1);
154 out[ 3*ir] += alpha * ( A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
155 out[1+3*ir] += alpha * ( A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic] );
156 out[2+3*ir] += alpha * ( A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic] );
157 }
158 }
159 } else {
160 /* TODO: parallelize */
161 #pragma omp single
162 for (ic=0;ic< A->pattern->numOutput;ic++) {
163 for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
164 for (irb=0;irb< A->row_block_size;irb++) {
165 irow=irb+A->row_block_size*(A->pattern->index[iptr]-1);
166 #pragma ivdep
167 for (icb=0;icb< A->col_block_size;icb++) {
168 icol=icb+A->col_block_size*ic;
169 out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
170 }
171 }
172 }
173 }
174 }
175 }
176 return;
177 }
178 /* CSR format with offset 0*/
179 void Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(double alpha,
180 Paso_SparseMatrix* A,
181 double* in,
182 double beta,
183 double* out)
184 {
185 register index_t ir,icol,iptr,icb,irb,irow,ic,Aiptr;
186 register double reg,reg1,reg2,reg3,in1,in2,in3,A00,A10,A20,A01,A11,A21,A02,A12,A22;
187 if (ABS(beta)>0.) {
188 if (beta != 1.) {
189 #pragma omp for private(irow) schedule(static)
190 for (irow=0;irow < A->numRows * A->row_block_size;irow++)
191 out[irow] *= beta;
192 }
193 } else {
194 #pragma omp for private(irow) schedule(static)
195 for (irow=0;irow < A->numRows * A->row_block_size;irow++)
196 out[irow] = 0;
197 }
198 if (ABS(alpha)>0) {
199 if (A ->col_block_size==1 && A->row_block_size ==1) {
200 #pragma omp for private(irow,iptr,reg) schedule(static)
201 for (irow=0;irow< A->pattern->numOutput;++irow) {
202 reg=0.;
203 #pragma ivdep
204 for (iptr=(A->pattern->ptr[irow]);iptr<(A->pattern->ptr[irow+1]); ++iptr) {
205 reg += A->val[iptr] * in[A->pattern->index[iptr]];
206 }
207 out[irow] += alpha * reg;
208 }
209 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
210 #pragma omp for private(ir,reg1,reg2,iptr,ic,Aiptr,in1,in2,A00,A10,A01,A11) schedule(static)
211 for (ir=0;ir< A->pattern->numOutput;ir++) {
212 reg1=0.;
213 reg2=0.;
214 #pragma ivdep
215 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
216 ic=2*(A->pattern->index[iptr]);
217 Aiptr=iptr*4;
218 in1=in[ic];
219 in2=in[1+ic];
220 A00=A->val[Aiptr ];
221 A10=A->val[Aiptr+1];
222 A01=A->val[Aiptr+2];
223 A11=A->val[Aiptr+3];
224 reg1 += A00*in1 + A01*in2;
225 reg2 += A10*in1 + A11*in2;
226 }
227 out[ 2*ir] += alpha * reg1;
228 out[1+2*ir] += alpha * reg2;
229 }
230 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
231 #pragma omp for private(ir,reg1,reg2,reg3,iptr,ic,Aiptr,in1,in2,in3,A00,A10,A20,A01,A11,A21,A02,A12,A22) schedule(static)
232 for (ir=0;ir< A->pattern->numOutput;ir++) {
233 reg1=0.;
234 reg2=0.;
235 reg3=0.;
236 #pragma ivdep
237 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
238 ic=3*(A->pattern->index[iptr]);
239 Aiptr=iptr*9;
240 in1=in[ic];
241 in2=in[1+ic];
242 in3=in[2+ic];
243 A00=A->val[Aiptr ];
244 A10=A->val[Aiptr+1];
245 A20=A->val[Aiptr+2];
246 A01=A->val[Aiptr+3];
247 A11=A->val[Aiptr+4];
248 A21=A->val[Aiptr+5];
249 A02=A->val[Aiptr+6];
250 A12=A->val[Aiptr+7];
251 A22=A->val[Aiptr+8];
252 reg1 += A00*in1 + A01*in2 + A02*in3;
253 reg2 += A10*in1 + A11*in2 + A12*in3;
254 reg3 += A20*in1 + A21*in2 + A22*in3;
255 }
256 out[ 3*ir] += alpha * reg1;
257 out[1+3*ir] += alpha * reg2;
258 out[2+3*ir] += alpha * reg3;
259 }
260 } else {
261 #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
262 for (ir=0;ir< A->pattern->numOutput;ir++) {
263 for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
264 for (irb=0;irb< A->row_block_size;irb++) {
265 irow=irb+A->row_block_size*ir;
266 reg=0.;
267 #pragma ivdep
268 for (icb=0;icb< A->col_block_size;icb++) {
269 icol=icb+A->col_block_size*(A->pattern->index[iptr]);
270 reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
271 }
272 out[irow] += alpha * reg;
273 }
274 }
275 }
276 }
277 }
278 return;
279 }
280 /* CSR format with offset 1*/
281 void Paso_SparseMatrix_MatrixVector_CSR_OFFSET1(double alpha,
282 Paso_SparseMatrix* A,
283 double* in,
284 double beta,
285 double* out) {
286
287 register index_t ir,icol,iptr,icb,irb,irow,ic;
288 register double reg,reg1,reg2,reg3;
289 #pragma omp barrier
290
291 if (ABS(beta)>0.) {
292 if (beta != 1.) {
293 #pragma omp for private(irow) schedule(static)
294 for (irow=0;irow < A->numRows * A->row_block_size;irow++)
295 out[irow] *= beta;
296 }
297 } else {
298 #pragma omp for private(irow) schedule(static)
299 for (irow=0;irow < A->numRows * A->row_block_size;irow++)
300 out[irow] = 0;
301 }
302 /* do the operation: */
303 if (ABS(alpha)>0) {
304 if (A ->col_block_size==1 && A->row_block_size ==1) {
305 #pragma omp for private(irow,iptr,reg) schedule(static)
306 for (irow=0;irow< A->pattern->numOutput;++irow) {
307 reg=0.;
308 #pragma ivdep
309 for (iptr=(A->pattern->ptr[irow])-1;iptr<(A->pattern->ptr[irow+1])-1; ++iptr) {
310 reg += A->val[iptr] * in[A->pattern->index[iptr]-1];
311 }
312 out[irow] += alpha * reg;
313 }
314 } else if (A ->col_block_size==2 && A->row_block_size ==2) {
315 #pragma omp for private(ir,reg1,reg2,iptr,ic) schedule(static)
316 for (ir=0;ir< A->pattern->numOutput;ir++) {
317 reg1=0.;
318 reg2=0.;
319 #pragma ivdep
320 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
321 ic=2*(A->pattern->index[iptr]-1);
322 reg1 += A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic];
323 reg2 += A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic];
324 }
325 out[ 2*ir] += alpha * reg1;
326 out[1+2*ir] += alpha * reg2;
327 }
328 } else if (A ->col_block_size==3 && A->row_block_size ==3) {
329 #pragma omp for private(ir,reg1,reg2,reg3,iptr,ic) schedule(static)
330 for (ir=0;ir< A->pattern->numOutput;ir++) {
331 reg1=0.;
332 reg2=0.;
333 reg3=0.;
334 #pragma ivdep
335 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
336 ic=3*(A->pattern->index[iptr]-1);
337 reg1 += A->val[iptr*9 ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic];
338 reg2 += A->val[iptr*9+1]*in[ic] + A->val[iptr*9+4]*in[1+ic] + A->val[iptr*9+7]*in[2+ic];
339 reg3 += A->val[iptr*9+2]*in[ic] + A->val[iptr*9+5]*in[1+ic] + A->val[iptr*9+8]*in[2+ic];
340 }
341 out[ 3*ir] += alpha * reg1;
342 out[1+3*ir] += alpha * reg2;
343 out[2+3*ir] += alpha * reg3;
344 }
345 } else {
346 #pragma omp for private(ir,iptr,irb,icb,irow,icol,reg) schedule(static)
347 for (ir=0;ir< A->pattern->numOutput;ir++) {
348 for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
349 for (irb=0;irb< A->row_block_size;irb++) {
350 irow=irb+A->row_block_size*ir;
351 reg=0.;
352 #pragma ivdep
353 for (icb=0;icb< A->col_block_size;icb++) {
354 icol=icb+A->col_block_size*(A->pattern->index[iptr]-1);
355 reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
356 }
357 out[irow] += alpha * reg;
358 }
359 }
360 }
361 }
362 }
363 return;
364 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26