/[escript]/trunk/paso/src/SystemMatrix_MatrixVector.c
ViewVC logotype

Diff of /trunk/paso/src/SystemMatrix_MatrixVector.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 971 by ksteube, Wed Feb 14 04:40:49 2007 UTC revision 1098 by gross, Mon Apr 16 23:15:23 2007 UTC
# Line 81  void  Paso_SystemMatrix_MatrixVector_CSC Line 81  void  Paso_SystemMatrix_MatrixVector_CSC
81          /* TODO: parallelize (good luck!) */          /* TODO: parallelize (good luck!) */
82          #pragma omp single          #pragma omp single
83      for (icol=0;icol< A->pattern->n_ptr;++icol) {      for (icol=0;icol< A->pattern->n_ptr;++icol) {
84              #pragma ivdep
85        for (iptr=A->pattern->ptr[icol];iptr<A->pattern->ptr[icol+1]; ++iptr) {        for (iptr=A->pattern->ptr[icol];iptr<A->pattern->ptr[icol+1]; ++iptr) {
86          out[A->pattern->index[iptr]]+= alpha * A->val[iptr] * in[icol];          out[A->pattern->index[iptr]]+= alpha * A->val[iptr] * in[icol];
87        }        }
# Line 89  void  Paso_SystemMatrix_MatrixVector_CSC Line 90  void  Paso_SystemMatrix_MatrixVector_CSC
90          /* TODO: parallelize */          /* TODO: parallelize */
91          #pragma omp single          #pragma omp single
92      for (ic=0;ic< A->pattern->n_ptr;ic++) {      for (ic=0;ic< A->pattern->n_ptr;ic++) {
93              #pragma ivdep
94        for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {        for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
95             ic=2*(A->pattern->index[iptr]);             ic=2*(A->pattern->index[iptr]);
96             out[  2*ir] += alpha * ( A->val[iptr*4  ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );             out[  2*ir] += alpha * ( A->val[iptr*4  ]*in[ic] + A->val[iptr*4+2]*in[1+ic] );
# Line 99  void  Paso_SystemMatrix_MatrixVector_CSC Line 101  void  Paso_SystemMatrix_MatrixVector_CSC
101          /* TODO: parallelize */          /* TODO: parallelize */
102          #pragma omp single          #pragma omp single
103      for (ic=0;ic< A->pattern->n_ptr;ic++) {      for (ic=0;ic< A->pattern->n_ptr;ic++) {
104              #pragma ivdep
105        for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {        for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
106            ir=3*(A->pattern->index[iptr]);            ir=3*(A->pattern->index[iptr]);
107                out[  3*ir] += alpha * ( A->val[iptr*9  ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );                out[  3*ir] += alpha * ( A->val[iptr*9  ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
# Line 113  void  Paso_SystemMatrix_MatrixVector_CSC Line 116  void  Paso_SystemMatrix_MatrixVector_CSC
116        for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {        for (iptr=A->pattern->ptr[ic];iptr<A->pattern->ptr[ic+1]; iptr++) {
117          for (irb=0;irb< A->row_block_size;irb++) {          for (irb=0;irb< A->row_block_size;irb++) {
118            irow=irb+A->row_block_size*(A->pattern->index[iptr]);            irow=irb+A->row_block_size*(A->pattern->index[iptr]);
119                  #pragma ivdep
120            for (icb=0;icb< A->col_block_size;icb++) {            for (icb=0;icb< A->col_block_size;icb++) {
121          icol=icb+A->col_block_size*ic;          icol=icb+A->col_block_size*ic;
122          out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];          out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
# Line 151  void  Paso_SystemMatrix_MatrixVector_CSC Line 155  void  Paso_SystemMatrix_MatrixVector_CSC
155          /* TODO: parallelize (good luck!) */          /* TODO: parallelize (good luck!) */
156          #pragma omp single          #pragma omp single
157      for (icol=0;icol< A->pattern->n_ptr;++icol) {      for (icol=0;icol< A->pattern->n_ptr;++icol) {
158              #pragma ivdep
159        for (iptr=A->pattern->ptr[icol]-1;iptr<A->pattern->ptr[icol+1]-1; ++iptr) {        for (iptr=A->pattern->ptr[icol]-1;iptr<A->pattern->ptr[icol+1]-1; ++iptr) {
160          out[A->pattern->index[iptr]-1]+= alpha * A->val[iptr] * in[icol];          out[A->pattern->index[iptr]-1]+= alpha * A->val[iptr] * in[icol];
161        }        }
# Line 169  void  Paso_SystemMatrix_MatrixVector_CSC Line 174  void  Paso_SystemMatrix_MatrixVector_CSC
174          /* TODO: parallelize */          /* TODO: parallelize */
175          #pragma omp single          #pragma omp single
176      for (ic=0;ic< A->pattern->n_ptr;ic++) {      for (ic=0;ic< A->pattern->n_ptr;ic++) {
177              #pragma ivdep
178        for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {        for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
179            ir=3*(A->pattern->index[iptr]-1);            ir=3*(A->pattern->index[iptr]-1);
180                out[  3*ir] += alpha * ( A->val[iptr*9  ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );                out[  3*ir] += alpha * ( A->val[iptr*9  ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic] );
# Line 183  void  Paso_SystemMatrix_MatrixVector_CSC Line 189  void  Paso_SystemMatrix_MatrixVector_CSC
189        for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {        for (iptr=A->pattern->ptr[ic]-1;iptr<A->pattern->ptr[ic+1]-1; iptr++) {
190          for (irb=0;irb< A->row_block_size;irb++) {          for (irb=0;irb< A->row_block_size;irb++) {
191            irow=irb+A->row_block_size*(A->pattern->index[iptr]-1);            irow=irb+A->row_block_size*(A->pattern->index[iptr]-1);
192                  #pragma ivdep
193            for (icb=0;icb< A->col_block_size;icb++) {            for (icb=0;icb< A->col_block_size;icb++) {
194          icol=icb+A->col_block_size*ic;          icol=icb+A->col_block_size*ic;
195          out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];          out[irow] += alpha * A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
# Line 219  void  Paso_SystemMatrix_MatrixVector_CSR Line 226  void  Paso_SystemMatrix_MatrixVector_CSR
226          #pragma omp for private(irow,iptr,reg) schedule(static)          #pragma omp for private(irow,iptr,reg) schedule(static)
227      for (irow=0;irow< A->pattern->n_ptr;++irow) {      for (irow=0;irow< A->pattern->n_ptr;++irow) {
228            reg=0.;            reg=0.;
229              #pragma ivdep
230        for (iptr=(A->pattern->ptr[irow]);iptr<(A->pattern->ptr[irow+1]); ++iptr) {        for (iptr=(A->pattern->ptr[irow]);iptr<(A->pattern->ptr[irow+1]); ++iptr) {
231            reg += A->val[iptr] * in[A->pattern->index[iptr]];            reg += A->val[iptr] * in[A->pattern->index[iptr]];
232        }        }
# Line 250  void  Paso_SystemMatrix_MatrixVector_CSR Line 258  void  Paso_SystemMatrix_MatrixVector_CSR
258            reg1=0.;            reg1=0.;
259            reg2=0.;            reg2=0.;
260            reg3=0.;            reg3=0.;
261              #pragma ivdep
262        for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {        for (iptr=A->pattern->ptr[ir];iptr<A->pattern->ptr[ir+1]; iptr++) {
263             ic=3*(A->pattern->index[iptr]);             ic=3*(A->pattern->index[iptr]);
264                 Aiptr=iptr*9;                 Aiptr=iptr*9;
# Line 280  void  Paso_SystemMatrix_MatrixVector_CSR Line 289  void  Paso_SystemMatrix_MatrixVector_CSR
289          for (irb=0;irb< A->row_block_size;irb++) {          for (irb=0;irb< A->row_block_size;irb++) {
290            irow=irb+A->row_block_size*ir;            irow=irb+A->row_block_size*ir;
291                reg=0.;                reg=0.;
292                  #pragma ivdep
293            for (icb=0;icb< A->col_block_size;icb++) {            for (icb=0;icb< A->col_block_size;icb++) {
294          icol=icb+A->col_block_size*(A->pattern->index[iptr]);          icol=icb+A->col_block_size*(A->pattern->index[iptr]);
295          reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];          reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];
# Line 318  void  Paso_SystemMatrix_MatrixVector_CSR Line 328  void  Paso_SystemMatrix_MatrixVector_CSR
328          #pragma omp for private(irow,iptr,reg) schedule(static)          #pragma omp for private(irow,iptr,reg) schedule(static)
329      for (irow=0;irow< A->pattern->n_ptr;++irow) {      for (irow=0;irow< A->pattern->n_ptr;++irow) {
330            reg=0.;            reg=0.;
331              #pragma ivdep
332        for (iptr=(A->pattern->ptr[irow])-1;iptr<(A->pattern->ptr[irow+1])-1; ++iptr) {        for (iptr=(A->pattern->ptr[irow])-1;iptr<(A->pattern->ptr[irow+1])-1; ++iptr) {
333            reg += A->val[iptr] * in[A->pattern->index[iptr]-1];            reg += A->val[iptr] * in[A->pattern->index[iptr]-1];
334        }        }
# Line 328  void  Paso_SystemMatrix_MatrixVector_CSR Line 339  void  Paso_SystemMatrix_MatrixVector_CSR
339      for (ir=0;ir< A->pattern->n_ptr;ir++) {      for (ir=0;ir< A->pattern->n_ptr;ir++) {
340            reg1=0.;            reg1=0.;
341            reg2=0.;            reg2=0.;
342              #pragma ivdep
343        for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {        for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
344             ic=2*(A->pattern->index[iptr]-1);             ic=2*(A->pattern->index[iptr]-1);
345             reg1 += A->val[iptr*4  ]*in[ic] + A->val[iptr*4+2]*in[1+ic];             reg1 += A->val[iptr*4  ]*in[ic] + A->val[iptr*4+2]*in[1+ic];
# Line 342  void  Paso_SystemMatrix_MatrixVector_CSR Line 354  void  Paso_SystemMatrix_MatrixVector_CSR
354            reg1=0.;            reg1=0.;
355            reg2=0.;            reg2=0.;
356            reg3=0.;            reg3=0.;
357              #pragma ivdep
358        for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {        for (iptr=A->pattern->ptr[ir]-1;iptr<A->pattern->ptr[ir+1]-1; iptr++) {
359             ic=3*(A->pattern->index[iptr]-1);             ic=3*(A->pattern->index[iptr]-1);
360             reg1 += A->val[iptr*9  ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic];             reg1 += A->val[iptr*9  ]*in[ic] + A->val[iptr*9+3]*in[1+ic] + A->val[iptr*9+6]*in[2+ic];
# Line 359  void  Paso_SystemMatrix_MatrixVector_CSR Line 372  void  Paso_SystemMatrix_MatrixVector_CSR
372          for (irb=0;irb< A->row_block_size;irb++) {          for (irb=0;irb< A->row_block_size;irb++) {
373            irow=irb+A->row_block_size*ir;            irow=irb+A->row_block_size*ir;
374                reg=0.;                reg=0.;
375                  #pragma ivdep
376            for (icb=0;icb< A->col_block_size;icb++) {            for (icb=0;icb< A->col_block_size;icb++) {
377          icol=icb+A->col_block_size*(A->pattern->index[iptr]-1);          icol=icb+A->col_block_size*(A->pattern->index[iptr]-1);
378          reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];          reg += A->val[iptr*A->block_size+irb+A->row_block_size*icb] * in[icol];

Legend:
Removed from v.971  
changed lines
  Added in v.1098

  ViewVC Help
Powered by ViewVC 1.1.26