/[escript]/trunk/paso/src/PasoUtil.c
ViewVC logotype

Diff of /trunk/paso/src/PasoUtil.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3318 by caltinay, Thu Oct 28 01:05:36 2010 UTC revision 3323 by gross, Thu Oct 28 09:53:46 2010 UTC
# Line 59  bool_t Paso_Util_isAny(dim_t N,index_t* Line 59  bool_t Paso_Util_isAny(dim_t N,index_t*
59  index_t Paso_Util_cumsum(dim_t N,index_t* array) {  index_t Paso_Util_cumsum(dim_t N,index_t* array) {
60     index_t out=0,tmp;     index_t out=0,tmp;
61     dim_t i;     dim_t i;
62  #ifdef _OPENMP     index_t *partial_sums=NULL,sum;
63     const int num_threads=omp_get_max_threads();     const int num_threads=omp_get_max_threads();
64     const int thread_num=omp_get_thread_num();     int thread_num;
65  #else    
    const int num_threads=1;  
    const int thread_num=0;  
 #endif  
66     if (num_threads>1) {     if (num_threads>1) {
       index_t *partial_sums=NULL,sum;  
67        partial_sums=TMPMEMALLOC(num_threads, index_t);        partial_sums=TMPMEMALLOC(num_threads, index_t);
68        #pragma omp parallel private(sum,i)        #pragma omp parallel private(sum,thread_num ,i,tmp)
69        {        {
70          sum=0;          sum=0;
71        thread_num=omp_get_thread_num();
72          #pragma omp for schedule(static)          #pragma omp for schedule(static)
73          for (i=0;i<N;++i) sum+=array[i];          for (i=0;i<N;++i) sum+=array[i];
74    
75          partial_sums[thread_num]=sum;          partial_sums[thread_num]=sum;
76        }      #pragma omp barrier
77        #pragma omp master
78        {          {
79            out=0;            out=0;
80            for (i=0;i<num_threads;++i) {            for (i=0;i<num_threads;++i) {
81               tmp=out;               tmp=out;
82               out+=partial_sums[i];               out+=partial_sums[i];
83               partial_sums[i]=tmp;               partial_sums[i]=tmp;
84             }             }
85        }          }
86                #pragma omp barrier
       #pragma omp parallel private(sum,tmp,i)  
       {  
87          sum=partial_sums[thread_num];          sum=partial_sums[thread_num];
88          #pragma omp for schedule(static)          #pragma omp for schedule(static)
89          for (i=0;i<N;++i) {          for (i=0;i<N;++i) {
# Line 112  index_t Paso_Util_cumsum_maskedTrue(dim_ Line 107  index_t Paso_Util_cumsum_maskedTrue(dim_
107     index_t out=0,tmp;     index_t out=0,tmp;
108     dim_t i;     dim_t i;
109     index_t *partial_sums=NULL,sum;     index_t *partial_sums=NULL,sum;
 #ifdef _OPENMP  
110     const int num_threads=omp_get_max_threads();     const int num_threads=omp_get_max_threads();
111     const int thread_num=omp_get_thread_num();     int thread_num;
 #else  
    const int num_threads=1;  
    const int thread_num=0;  
 #endif  
112    
113     if (num_threads>1) {     if (num_threads>1) {
114        partial_sums=TMPMEMALLOC(num_threads, index_t);        partial_sums=TMPMEMALLOC(num_threads, index_t);
115        #pragma omp parallel private(sum,i)        #pragma omp parallel private(sum,i,thread_num,tmp)
116        {        {
117           sum=0;           sum=0;
118         thread_num=omp_get_thread_num();
119           #pragma omp for schedule(static)           #pragma omp for schedule(static)
120           for (i=0;i<N;++i) {           for (i=0;i<N;++i) {
121              if (mask[i]) {              if (mask[i]) {
# Line 135  index_t Paso_Util_cumsum_maskedTrue(dim_ Line 126  index_t Paso_Util_cumsum_maskedTrue(dim_
126              }              }
127           }           }
128           partial_sums[thread_num]=sum;           partial_sums[thread_num]=sum;
129        }       #pragma omp barrier
130                   #pragma omp master
131        {       {
132           out=0;          out=0;
133           for (i=0;i<num_threads;++i) {          for (i=0;i<num_threads;++i) {
134              tmp=out;             tmp=out;
135              out+=partial_sums[i];             out+=partial_sums[i];
136              partial_sums[i]=tmp;             partial_sums[i]=tmp;
137           }          }
138        }       }
139               #pragma omp barrier
       #pragma omp parallel private(sum,tmp,i)  
       {  
140           sum=partial_sums[thread_num];           sum=partial_sums[thread_num];
141           #pragma omp for schedule(static)           #pragma omp for schedule(static)
142           for (i=0;i<N;++i) {           for (i=0;i<N;++i) {
# Line 175  index_t Paso_Util_cumsum_maskedTrue(dim_ Line 164  index_t Paso_Util_cumsum_maskedTrue(dim_
164  }  }
165    
166  index_t Paso_Util_cumsum_maskedFalse(dim_t N,index_t* array, bool_t* mask) {  index_t Paso_Util_cumsum_maskedFalse(dim_t N,index_t* array, bool_t* mask) {
167     index_t out=0,tmp;     index_t out=0,tmp=0;
168     dim_t i;     dim_t i;
169     index_t *partial_sums=NULL,sum;     index_t *partial_sums=NULL,sum;
 #ifdef _OPENMP  
170     const int num_threads=omp_get_max_threads();     const int num_threads=omp_get_max_threads();
171     const int thread_num=omp_get_thread_num();     int thread_num=0;
 #else  
    const int num_threads=1;  
    const int thread_num=0;  
 #endif  
172    
173     if (num_threads>1) {     if (num_threads>1) {
174        partial_sums=TMPMEMALLOC(num_threads,index_t);        partial_sums=TMPMEMALLOC(num_threads,index_t);
175        #pragma omp parallel private(sum,i)        #pragma omp parallel private(sum,i,thread_num,tmp)
176        {        {
177           sum=0;           sum=0;
178         thread_num=omp_get_thread_num();
179           #pragma omp for schedule(static)           #pragma omp for schedule(static)
180           for (i=0;i<N;++i) {           for (i=0;i<N;++i) {
181              if (! mask[i]) {              if (! mask[i]) {
# Line 201  index_t Paso_Util_cumsum_maskedFalse(dim Line 186  index_t Paso_Util_cumsum_maskedFalse(dim
186              }              }
187           }           }
188           partial_sums[thread_num]=sum;           partial_sums[thread_num]=sum;
189        }       #pragma omp barrier
190               #pragma omp master
191        {       {
192           out=0;          out=0;
193           for (i=0;i<num_threads;++i) {          for (i=0;i<num_threads;++i) {
194              tmp=out;             tmp=out;
195              out+=partial_sums[i];             out+=partial_sums[i];
196              partial_sums[i]=tmp;             partial_sums[i]=tmp;
197            }          }
198        }       }
199               #pragma omp barrier
       #pragma omp parallel private(sum,tmp,i)  
       {  
200           sum=partial_sums[thread_num];           sum=partial_sums[thread_num];
201           #pragma omp for schedule(static)           #pragma omp for schedule(static)
202           for (i=0;i<N;++i) {           for (i=0;i<N;++i) {
# Line 248  index_t Paso_Util_arg_max(dim_t n, dim_t Line 231  index_t Paso_Util_arg_max(dim_t n, dim_t
231     index_t argmax=-1;     index_t argmax=-1;
232     index_t lmax=-1;     index_t lmax=-1;
233     index_t li=-1;     index_t li=-1;
 #ifdef _OPENMP  
234     const int num_threads=omp_get_max_threads();     const int num_threads=omp_get_max_threads();
 #else  
    const int num_threads=1;  
 #endif  
235        
236     if (n>0) {     if (n>0) {
237        max=lambda[0];        max=lambda[0];
# Line 297  index_t Paso_Util_arg_max(dim_t n, dim_t Line 276  index_t Paso_Util_arg_max(dim_t n, dim_t
276  void Paso_zeroes(const dim_t n, double* x)  void Paso_zeroes(const dim_t n, double* x)
277  {  {
278     dim_t i,local_n,rest,n_start,n_end,q;     dim_t i,local_n,rest,n_start,n_end,q;
 #ifdef _OPENMP  
279     const int num_threads=omp_get_max_threads();     const int num_threads=omp_get_max_threads();
 #else  
    const int num_threads=1;  
 #endif  
280    
281     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
282     for (i=0;i<num_threads;++i) {     for (i=0;i<num_threads;++i) {
# Line 322  void Paso_zeroes(const dim_t n, double* Line 297  void Paso_zeroes(const dim_t n, double*
297  void Paso_Update(const dim_t n, const double a, double* x, const double b, const double* y)  void Paso_Update(const dim_t n, const double a, double* x, const double b, const double* y)
298  {  {
299     dim_t i,local_n,rest,n_start,n_end,q;     dim_t i,local_n,rest,n_start,n_end,q;
300     #ifdef _OPENMP     const int num_threads=omp_get_max_threads();
301         const int num_threads=omp_get_max_threads();  
    #else  
        const int num_threads=1;  
    #endif  
302    
303     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
304     for (i=0;i<num_threads;++i) {     for (i=0;i<num_threads;++i) {
# Line 378  void Paso_Copy(const dim_t n, double* ou Line 350  void Paso_Copy(const dim_t n, double* ou
350  void Paso_LinearCombination(const dim_t n, double*z, const double a,const double* x, const double b, const double* y)  void Paso_LinearCombination(const dim_t n, double*z, const double a,const double* x, const double b, const double* y)
351  {  {
352     dim_t i,local_n,rest,n_start,n_end,q;     dim_t i,local_n,rest,n_start,n_end,q;
353     #ifdef _OPENMP     const int num_threads=omp_get_max_threads();
354         const int num_threads=omp_get_max_threads();  
    #else  
        const int num_threads=1;  
    #endif  
355    
356     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
357     for (i=0;i<num_threads;++i) {     for (i=0;i<num_threads;++i) {
# Line 420  double Paso_InnerProduct(const dim_t n,c Line 389  double Paso_InnerProduct(const dim_t n,c
389  {  {
390     dim_t i,local_n,rest,n_start,n_end,q;     dim_t i,local_n,rest,n_start,n_end,q;
391     double my_out=0, local_out=0., out=0.;     double my_out=0, local_out=0., out=0.;
392     #ifdef _OPENMP     const int num_threads=omp_get_max_threads();
393         const int num_threads=omp_get_max_threads();  
    #else  
        const int num_threads=1;  
    #endif  
394     #pragma omp parallel for private(i,local_out,local_n,rest,n_start,n_end,q)     #pragma omp parallel for private(i,local_out,local_n,rest,n_start,n_end,q)
395     for (i=0;i<num_threads;++i) {     for (i=0;i<num_threads;++i) {
396          local_out=0;          local_out=0;
# Line 455  double Paso_lsup(const dim_t n, const do Line 421  double Paso_lsup(const dim_t n, const do
421  {  {
422     dim_t i,local_n,rest,n_start,n_end,q;     dim_t i,local_n,rest,n_start,n_end,q;
423     double my_out=0., local_out=0., out=0.;     double my_out=0., local_out=0., out=0.;
424     #ifdef _OPENMP     const int num_threads=omp_get_max_threads();
425         const int num_threads=omp_get_max_threads();  
    #else  
        const int num_threads=1;  
    #endif  
426     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out)     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out)
427     for (i=0;i<num_threads;++i) {     for (i=0;i<num_threads;++i) {
428          local_n=n/num_threads;          local_n=n/num_threads;
# Line 489  double Paso_l2(const dim_t n, const doub Line 452  double Paso_l2(const dim_t n, const doub
452  {  {
453     dim_t i,local_n,rest,n_start,n_end,q;     dim_t i,local_n,rest,n_start,n_end,q;
454     double my_out=0, local_out=0., out=0.;     double my_out=0, local_out=0., out=0.;
455     #ifdef _OPENMP     const int num_threads=omp_get_max_threads();
456         const int num_threads=omp_get_max_threads();  
    #else  
        const int num_threads=1;  
    #endif  
457     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out)     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out)
458     for (i=0;i<num_threads;++i) {     for (i=0;i<num_threads;++i) {
459          local_n=n/num_threads;          local_n=n/num_threads;

Legend:
Removed from v.3318  
changed lines
  Added in v.3323

  ViewVC Help
Powered by ViewVC 1.1.26