64 |
for (ic=0;ic< A->pattern->numOutput;ic++) { |
for (ic=0;ic< A->pattern->numOutput;ic++) { |
65 |
#pragma ivdep |
#pragma ivdep |
66 |
for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) { |
for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) { |
67 |
ic=2*(A->pattern->index[iptr]); |
ir=2*(A->pattern->index[iptr]); |
68 |
out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] ); |
out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] ); |
69 |
out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] ); |
out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] ); |
70 |
} |
} |
140 |
#pragma omp single |
#pragma omp single |
141 |
for (ic=0;ic< A->pattern->numOutput;ic++) { |
for (ic=0;ic< A->pattern->numOutput;ic++) { |
142 |
for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) { |
for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) { |
143 |
ic=2*(A->pattern->index[iptr]-1); |
ir=2*(A->pattern->index[iptr]-1); |
144 |
out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] ); |
out[ 2*ir] += alpha * ( A->val[iptr*4 ]*in[ic] + A->val[iptr*4+2]*in[1+ic] ); |
145 |
out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] ); |
out[1+2*ir] += alpha * ( A->val[iptr*4+1]*in[ic] + A->val[iptr*4+3]*in[1+ic] ); |
146 |
} |
} |