/[escript]/trunk/paso/src/PasoUtil.c
ViewVC logotype

Diff of /trunk/paso/src/PasoUtil.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1797 by ksteube, Thu Aug 14 05:56:40 2008 UTC revision 1798 by gross, Wed Sep 17 06:21:12 2008 UTC
# Line 183  void Paso_LinearCombination(const dim_t Line 183  void Paso_LinearCombination(const dim_t
183          rest=n-local_n*num_threads;          rest=n-local_n*num_threads;
184          n_start=local_n*i+MIN(i,rest);          n_start=local_n*i+MIN(i,rest);
185          n_end=local_n*(i+1)+MIN(i+1,rest);          n_end=local_n*(i+1)+MIN(i+1,rest);
186          if ((ABS(a)==0) && (ABS(b)==0)) {          if (((ABS(a)==0) && (ABS(b)==0)) || (y==NULL) || (x==NULL)) {
187              #pragma ivdep              #pragma ivdep
188              for (q=n_start;q<n_end;++q) {              for (q=n_start;q<n_end;++q) {
189                z[q]=0.;                z[q]=0.;
190              }              }
191          } else if ((ABS(a)==0) && (ABS(b)>0.)) {          } else if ( ((ABS(a)==0) && (ABS(b)>0.)) || (x==NULL) )  {
192              #pragma ivdep              #pragma ivdep
193              for (q=n_start;q<n_end;++q) {              for (q=n_start;q<n_end;++q) {
194                z[q]=b*y[q];                z[q]=b*y[q];
195              }              }
196          } else if ((ABS(a)>0) && (ABS(b)==0.)) {          } else if (((ABS(a)>0) && (ABS(b)==0.)) || (y==NULL) ) {
197              #pragma ivdep              #pragma ivdep
198              for (q=n_start;q<n_end;++q) {              for (q=n_start;q<n_end;++q) {
199                z[q]=a*x[q];                z[q]=a*x[q];
# Line 218  double Paso_InnerProduct(const dim_t n,c Line 218  double Paso_InnerProduct(const dim_t n,c
218     #else     #else
219         const int num_threads=1;         const int num_threads=1;
220     #endif     #endif
221     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q) reduction(+:my_out)     #pragma omp parallel for private(i,local_out,local_n,rest,n_start,n_end,q) reduction(+:my_out)
222     for (i=0;i<num_threads;++i) {     for (i=0;i<num_threads;++i) {
223            local_out=0;
224          local_n=n/num_threads;          local_n=n/num_threads;
225          rest=n-local_n*num_threads;          rest=n-local_n*num_threads;
226          n_start=local_n*i+MIN(i,rest);          n_start=local_n*i+MIN(i,rest);
227          n_end=local_n*(i+1)+MIN(i+1,rest);          n_end=local_n*(i+1)+MIN(i+1,rest);
228            #pragma ivdep
229          for (q=n_start;q<n_end;++q) local_out+=x[q]*y[q];          for (q=n_start;q<n_end;++q) local_out+=x[q]*y[q];
230          my_out+=local_out;          my_out+=local_out;
231     }     }
# Line 248  double Paso_l2(const dim_t n, const doub Line 250  double Paso_l2(const dim_t n, const doub
250     #else     #else
251         const int num_threads=1;         const int num_threads=1;
252     #endif     #endif
253     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q) reduction(+:my_out)     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out) reduction(+:my_out)
254     for (i=0;i<num_threads;++i) {     for (i=0;i<num_threads;++i) {
255          local_n=n/num_threads;          local_n=n/num_threads;
256          rest=n-local_n*num_threads;          rest=n-local_n*num_threads;
257          n_start=local_n*i+MIN(i,rest);          n_start=local_n*i+MIN(i,rest);
258          n_end=local_n*(i+1)+MIN(i+1,rest);          n_end=local_n*(i+1)+MIN(i+1,rest);
259            local_out=0;
260          for (q=n_start;q<n_end;++q) local_out+=x[q]*x[q];          for (q=n_start;q<n_end;++q) local_out+=x[q]*x[q];
261          my_out+=local_out;          my_out+=local_out;
262     }     }

Legend:
Removed from v.1797  
changed lines
  Added in v.1798

  ViewVC Help
Powered by ViewVC 1.1.26