/[escript]/trunk/paso/src/PasoUtil.c
ViewVC logotype

Annotation of /trunk/paso/src/PasoUtil.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1705 - (hide annotations)
Thu Aug 14 05:56:40 2008 UTC (11 years, 2 months ago) by ksteube
File MIME type: text/plain
File size: 8086 byte(s)
Branch scons-dev is hereby closed.
Some parts of scons scripts have been re-written.

1 jgs 150 /* $Id$ */
2    
3 ksteube 1312 /*******************************************************
4     *
5     * Copyright 2003-2007 by ACceSS MNRF
6     * Copyright 2007 by University of Queensland
7     *
8     * http://esscc.uq.edu.au
9     * Primary Business: Queensland, Australia
10     * Licensed under the Open Software License version 3.0
11     * http://www.opensource.org/licenses/osl-3.0.php
12     *
13     *******************************************************/
14 dhawcroft 631
15 jgs 150 /**************************************************************/
16    
17     /* Some utility routines: */
18    
19     /**************************************************************/
20    
21     /* Copyrights by ACcESS Australia, 2003,2004,2005 */
22    
23     /**************************************************************/
24    
25     #include "Common.h"
26 jgs 482 #include "PasoUtil.h"
27 jgs 150 #ifdef _OPENMP
28     #include <omp.h>
29     #endif
30    
31     /* returns true if array contains value */
32     bool_t Paso_Util_isAny(dim_t N,index_t* array,index_t value) {
33     bool_t out=FALSE;
34     dim_t i;
35     #pragma omp parallel for private(i) schedule(static) reduction(||:out)
36 gross 1639 for (i=0;i<N;i++) {
37     out = out || (array[i]==value);
38     }
39 jgs 150 return out;
40     }
41    
42     /**************************************************************/
43    
44     /* calculates the cummultative sum in array and returns the total sum */
45    
46     /**************************************************************/
47     index_t Paso_Util_cumsum(dim_t N,index_t* array) {
48     index_t out=0,tmp;
49     dim_t i;
50     #ifdef _OPENMP
51 gross 1564 index_t *partial_sums=NULL,sum;
52     partial_sums=TMPMEMALLOC(omp_get_max_threads(),index_t);
53 jgs 150 #pragma omp parallel private(sum,i,tmp)
54     {
55     sum=0;
56     #pragma omp for schedule(static)
57     for (i=0;i<N;++i) sum+=array[i];
58 gross 1639
59 jgs 150 partial_sums[omp_get_thread_num()]=sum;
60     #pragma omp barrier
61 gross 1556 #pragma omp single
62 jgs 150 {
63     out=0;
64     for (i=0;i<omp_get_max_threads();++i) {
65     tmp=out;
66     out+=partial_sums[i];
67     partial_sums[i]=tmp;
68     }
69     }
70     sum=partial_sums[omp_get_thread_num()];
71     #pragma omp for schedule(static)
72     for (i=0;i<N;++i) {
73     tmp=sum;
74     sum+=array[i];
75     array[i]=tmp;
76     }
77     }
78 gross 1564 TMPMEMFREE(partial_sums);
79 jgs 150 #else
80     for (i=0;i<N;++i) {
81     tmp=out;
82     out+=array[i];
83     array[i]=tmp;
84     }
85     #endif
86     return out;
87     }
88 gross 1639 /*
89     * set x to zero:
90     */
91     void Paso_zeroes(const dim_t n, double* x)
92     {
93     dim_t i,local_n,rest,n_start,n_end,q;
94     #ifdef _OPENMP
95     const int num_threads=omp_get_max_threads();
96     #else
97     const int num_threads=1;
98     #endif
99 jgs 150
100 gross 1639 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
101     for (i=0;i<num_threads;++i) {
102     local_n=n/num_threads;
103     rest=n-local_n*num_threads;
104     n_start=local_n*i+MIN(i,rest);
105     n_end=local_n*(i+1)+MIN(i+1,rest);
106     #pragma ivdep
107     for (q=n_start;q<n_end;++q) x[q]=0;
108     }
109    
110     }
111 jgs 150 /*
112 gross 1639 * performs an update of the form x=a*x+b*y where y and x are long vectors.
113     * if b=0, y is not used.
114     */
115     void Paso_Update(const dim_t n, const double a, double* x, const double b, const double* y)
116     {
117     dim_t i,local_n,rest,n_start,n_end,q;
118     #ifdef _OPENMP
119     const int num_threads=omp_get_max_threads();
120     #else
121     const int num_threads=1;
122     #endif
123    
124     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
125     for (i=0;i<num_threads;++i) {
126     local_n=n/num_threads;
127     rest=n-local_n*num_threads;
128     n_start=local_n*i+MIN(i,rest);
129     n_end=local_n*(i+1)+MIN(i+1,rest);
130     if (a==1.) {
131     #pragma ivdep
132     for (q=n_start;q<n_end;++q) {
133     x[q]+=b*y[q];
134     }
135     } else if ((a==1.) && (b==1)) {
136     #pragma ivdep
137     for (q=n_start;q<n_end;++q) {
138     x[q]+=y[q];
139     }
140     } else if ((ABS(a)==0) && (ABS(b)==0)) {
141     #pragma ivdep
142     for (q=n_start;q<n_end;++q) {
143     x[q]=0.;
144     }
145     } else if ((ABS(a)==0) && (ABS(b)==1)) {
146     #pragma ivdep
147     for (q=n_start;q<n_end;++q) {
148     x[q]=y[q];
149     }
150     } else if (ABS(a)==0) {
151     #pragma ivdep
152     for (q=n_start;q<n_end;++q) {
153     x[q]=b*y[q];
154     }
155     } else {
156     #pragma ivdep
157     for (q=n_start;q<n_end;++q) {
158     x[q]=a*x[q]+b*y[q];
159     }
160     }
161     }
162     }
163     void Paso_Copy(const dim_t n, double* out, const double* in) {
164     Paso_LinearCombination(n,out, 1.,in,0., in);
165     }
166     /*
167     * performs an update of the form z=a*x+b*y where y and x are long vectors.
168     * if a=0, x is not used.
169     * if b=0, y is not used.
170     */
171     void Paso_LinearCombination(const dim_t n, double*z, const double a,const double* x, const double b, const double* y)
172     {
173     dim_t i,local_n,rest,n_start,n_end,q;
174     #ifdef _OPENMP
175     const int num_threads=omp_get_max_threads();
176     #else
177     const int num_threads=1;
178     #endif
179    
180     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
181     for (i=0;i<num_threads;++i) {
182     local_n=n/num_threads;
183     rest=n-local_n*num_threads;
184     n_start=local_n*i+MIN(i,rest);
185     n_end=local_n*(i+1)+MIN(i+1,rest);
186     if ((ABS(a)==0) && (ABS(b)==0)) {
187     #pragma ivdep
188     for (q=n_start;q<n_end;++q) {
189     z[q]=0.;
190     }
191     } else if ((ABS(a)==0) && (ABS(b)>0.)) {
192     #pragma ivdep
193     for (q=n_start;q<n_end;++q) {
194     z[q]=b*y[q];
195     }
196     } else if ((ABS(a)>0) && (ABS(b)==0.)) {
197     #pragma ivdep
198     for (q=n_start;q<n_end;++q) {
199     z[q]=a*x[q];
200     }
201     } else {
202     #pragma ivdep
203     for (q=n_start;q<n_end;++q) {
204     z[q]=a*x[q]+b*y[q];
205     }
206     }
207     }
208     }
209    
210    
211    
212     double Paso_InnerProduct(const dim_t n,const double* x, const double* y, Paso_MPIInfo* mpiinfo)
213     {
214     dim_t i,local_n,rest,n_start,n_end,q;
215     double my_out=0, local_out=0., out=0.;
216     #ifdef _OPENMP
217     const int num_threads=omp_get_max_threads();
218     #else
219     const int num_threads=1;
220     #endif
221     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q) reduction(+:my_out)
222     for (i=0;i<num_threads;++i) {
223     local_n=n/num_threads;
224     rest=n-local_n*num_threads;
225     n_start=local_n*i+MIN(i,rest);
226     n_end=local_n*(i+1)+MIN(i+1,rest);
227     for (q=n_start;q<n_end;++q) local_out+=x[q]*y[q];
228     my_out+=local_out;
229     }
230     #ifdef PASO_MPI
231     #pragma omp single
232     {
233     MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
234     }
235     #else
236     out=my_out;
237     #endif
238    
239     return out;
240    
241     }
242     double Paso_l2(const dim_t n, const double* x, Paso_MPIInfo* mpiinfo)
243     {
244     dim_t i,local_n,rest,n_start,n_end,q;
245     double my_out=0, local_out=0., out=0.;
246     #ifdef _OPENMP
247     const int num_threads=omp_get_max_threads();
248     #else
249     const int num_threads=1;
250     #endif
251     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q) reduction(+:my_out)
252     for (i=0;i<num_threads;++i) {
253     local_n=n/num_threads;
254     rest=n-local_n*num_threads;
255     n_start=local_n*i+MIN(i,rest);
256     n_end=local_n*(i+1)+MIN(i+1,rest);
257     for (q=n_start;q<n_end;++q) local_out+=x[q]*x[q];
258     my_out+=local_out;
259     }
260     #ifdef PASO_MPI
261     #pragma omp single
262     {
263     MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
264     }
265     #else
266     out=my_out;
267     #endif
268    
269     return sqrt(out);
270    
271     }
272     /*
273     * Apply a sequence of n-1 Givens rotations (c,s) to v of length n which assumed to be small.
274 jgs 150 *
275     */
276 gross 1639 void ApplyGivensRotations(const dim_t n,double* v,const double* c,const double* s)
277     {
278     dim_t i;
279     register double w1,w2;
280     #pragma ivdep
281     for (i=0; i<n-1;++i) {
282     w1=c[i]*v[i]-s[i]*v[i+1];
283     w2=s[i]*v[i]+c[i]*v[i+1];
284     v[i]=w1;
285     v[i+1]=w2;
286     }
287     }
288 gross 1669
289     bool_t Paso_fileExists( const char* filename )
290     {
291     FILE* fp = NULL;
292     fp = fopen(filename,"r");
293     if( fp != NULL ) {
294 ksteube 1705 fclose(fp);
295 gross 1669 return TRUE;
296     } else {
297     return FALSE;
298     }
299     }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26