/[escript]/branches/arrexp_2137_win/paso/src/PasoUtil.c
ViewVC logotype

Contents of /branches/arrexp_2137_win/paso/src/PasoUtil.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2202 - (show annotations)
Fri Jan 9 01:28:32 2009 UTC (10 years, 4 months ago) by jfenwick
File MIME type: text/plain
File size: 9175 byte(s)
Branching the array experiments from version 2137.
The idea is to make the changes required for the c++ changes to compile 
on windows without bringing in the later python changes.


1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2008 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Some utility routines: */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia, 2003,2004,2005 */
22
23 /**************************************************************/
24
25 #include "Common.h"
26 #include "PasoUtil.h"
27 #ifdef _OPENMP
28 #include <omp.h>
29 #endif
30
31 /* returns true if array contains value */
32 bool_t Paso_Util_isAny(dim_t N,index_t* array,index_t value) {
33 bool_t out=FALSE;
34 dim_t i;
35 #pragma omp parallel for private(i) schedule(static) reduction(||:out)
36 for (i=0;i<N;i++) {
37 out = out || (array[i]==value);
38 }
39 return out;
40 }
41
42 /**************************************************************/
43
44 /* calculates the cummultative sum in array and returns the total sum */
45
46 /**************************************************************/
47 index_t Paso_Util_cumsum(dim_t N,index_t* array) {
48 index_t out=0,tmp;
49 dim_t i;
50 #ifdef _OPENMP
51 index_t *partial_sums=NULL,sum;
52 partial_sums=TMPMEMALLOC(omp_get_max_threads(),index_t);
53 #pragma omp parallel private(sum,i,tmp)
54 {
55 sum=0;
56 #pragma omp for schedule(static)
57 for (i=0;i<N;++i) sum+=array[i];
58
59 partial_sums[omp_get_thread_num()]=sum;
60 #pragma omp barrier
61 #pragma omp single
62 {
63 out=0;
64 for (i=0;i<omp_get_max_threads();++i) {
65 tmp=out;
66 out+=partial_sums[i];
67 partial_sums[i]=tmp;
68 }
69 }
70 sum=partial_sums[omp_get_thread_num()];
71 #pragma omp for schedule(static)
72 for (i=0;i<N;++i) {
73 tmp=sum;
74 sum+=array[i];
75 array[i]=tmp;
76 }
77 }
78 TMPMEMFREE(partial_sums);
79 #else
80 for (i=0;i<N;++i) {
81 tmp=out;
82 out+=array[i];
83 array[i]=tmp;
84 }
85 #endif
86 return out;
87 }
88 /*
89 * set x to zero:
90 */
91 void Paso_zeroes(const dim_t n, double* x)
92 {
93 dim_t i,local_n,rest,n_start,n_end,q;
94 #ifdef _OPENMP
95 const int num_threads=omp_get_max_threads();
96 #else
97 const int num_threads=1;
98 #endif
99
100 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
101 for (i=0;i<num_threads;++i) {
102 local_n=n/num_threads;
103 rest=n-local_n*num_threads;
104 n_start=local_n*i+MIN(i,rest);
105 n_end=local_n*(i+1)+MIN(i+1,rest);
106 #pragma ivdep
107 for (q=n_start;q<n_end;++q) x[q]=0;
108 }
109
110 }
111 /*
112 * performs an update of the form x=a*x+b*y where y and x are long vectors.
113 * if b=0, y is not used.
114 */
115 void Paso_Update(const dim_t n, const double a, double* x, const double b, const double* y)
116 {
117 dim_t i,local_n,rest,n_start,n_end,q;
118 #ifdef _OPENMP
119 const int num_threads=omp_get_max_threads();
120 #else
121 const int num_threads=1;
122 #endif
123
124 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
125 for (i=0;i<num_threads;++i) {
126 local_n=n/num_threads;
127 rest=n-local_n*num_threads;
128 n_start=local_n*i+MIN(i,rest);
129 n_end=local_n*(i+1)+MIN(i+1,rest);
130 if (a==1.) {
131 #pragma ivdep
132 for (q=n_start;q<n_end;++q) {
133 x[q]+=b*y[q];
134 }
135 } else if ((a==1.) && (b==1)) {
136 #pragma ivdep
137 for (q=n_start;q<n_end;++q) {
138 x[q]+=y[q];
139 }
140 } else if ((ABS(a)==0) && (ABS(b)==0)) {
141 #pragma ivdep
142 for (q=n_start;q<n_end;++q) {
143 x[q]=0.;
144 }
145 } else if ((ABS(a)==0) && (ABS(b)==1)) {
146 #pragma ivdep
147 for (q=n_start;q<n_end;++q) {
148 x[q]=y[q];
149 }
150 } else if (ABS(a)==0) {
151 #pragma ivdep
152 for (q=n_start;q<n_end;++q) {
153 x[q]=b*y[q];
154 }
155 } else {
156 #pragma ivdep
157 for (q=n_start;q<n_end;++q) {
158 x[q]=a*x[q]+b*y[q];
159 }
160 }
161 }
162 }
163 void Paso_Copy(const dim_t n, double* out, const double* in) {
164 Paso_LinearCombination(n,out, 1.,in,0., in);
165 }
166 /*
167 * performs an update of the form z=a*x+b*y where y and x are long vectors.
168 * if a=0, x is not used.
169 * if b=0, y is not used.
170 */
171 void Paso_LinearCombination(const dim_t n, double*z, const double a,const double* x, const double b, const double* y)
172 {
173 dim_t i,local_n,rest,n_start,n_end,q;
174 #ifdef _OPENMP
175 const int num_threads=omp_get_max_threads();
176 #else
177 const int num_threads=1;
178 #endif
179
180 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
181 for (i=0;i<num_threads;++i) {
182 local_n=n/num_threads;
183 rest=n-local_n*num_threads;
184 n_start=local_n*i+MIN(i,rest);
185 n_end=local_n*(i+1)+MIN(i+1,rest);
186 if (((ABS(a)==0) && (ABS(b)==0)) || (y==NULL) || (x==NULL)) {
187 #pragma ivdep
188 for (q=n_start;q<n_end;++q) {
189 z[q]=0.;
190 }
191 } else if ( ((ABS(a)==0) && (ABS(b)>0.)) || (x==NULL) ) {
192 #pragma ivdep
193 for (q=n_start;q<n_end;++q) {
194 z[q]=b*y[q];
195 }
196 } else if (((ABS(a)>0) && (ABS(b)==0.)) || (y==NULL) ) {
197 #pragma ivdep
198 for (q=n_start;q<n_end;++q) {
199 z[q]=a*x[q];
200 }
201 } else {
202 #pragma ivdep
203 for (q=n_start;q<n_end;++q) {
204 z[q]=a*x[q]+b*y[q];
205 }
206 }
207 }
208 }
209
210
211
212 double Paso_InnerProduct(const dim_t n,const double* x, const double* y, Paso_MPIInfo* mpiinfo)
213 {
214 dim_t i,local_n,rest,n_start,n_end,q;
215 double my_out=0, local_out=0., out=0.;
216 #ifdef _OPENMP
217 const int num_threads=omp_get_max_threads();
218 #else
219 const int num_threads=1;
220 #endif
221 #pragma omp parallel for private(i,local_out,local_n,rest,n_start,n_end,q) reduction(+:my_out)
222 for (i=0;i<num_threads;++i) {
223 local_out=0;
224 local_n=n/num_threads;
225 rest=n-local_n*num_threads;
226 n_start=local_n*i+MIN(i,rest);
227 n_end=local_n*(i+1)+MIN(i+1,rest);
228 #pragma ivdep
229 for (q=n_start;q<n_end;++q) local_out+=x[q]*y[q];
230 my_out+=local_out;
231 }
232 #ifdef PASO_MPI
233 #pragma omp single
234 {
235 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
236 }
237 #else
238 out=my_out;
239 #endif
240
241 return out;
242
243 }
244 double Paso_lsup(const dim_t n, const double* x, Paso_MPIInfo* mpiinfo)
245 {
246 dim_t i,local_n,rest,n_start,n_end,q;
247 double my_out=SMALL_NEGATIVE_FLOAT, local_out=SMALL_NEGATIVE_FLOAT, out=SMALL_NEGATIVE_FLOAT;
248 #ifdef _OPENMP
249 const int num_threads=omp_get_max_threads();
250 #else
251 const int num_threads=1;
252 #endif
253 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out) reduction(+:my_out)
254 for (i=0;i<num_threads;++i) {
255 local_n=n/num_threads;
256 rest=n-local_n*num_threads;
257 n_start=local_n*i+MIN(i,rest);
258 n_end=local_n*(i+1)+MIN(i+1,rest);
259 local_out=0;
260 for (q=n_start;q<n_end;++q) local_out=MAX(ABS(x[q]),local_out);
261 #pragma omp critical
262 my_out=MAX(my_out,local_out);
263 }
264 #ifdef PASO_MPI
265 #pragma omp single
266 {
267 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_MAX, mpiinfo->comm);
268 }
269 #else
270 out=my_out;
271 #endif
272
273 return out;
274
275 }
276 double Paso_l2(const dim_t n, const double* x, Paso_MPIInfo* mpiinfo)
277 {
278 dim_t i,local_n,rest,n_start,n_end,q;
279 double my_out=0, local_out=0., out=0.;
280 #ifdef _OPENMP
281 const int num_threads=omp_get_max_threads();
282 #else
283 const int num_threads=1;
284 #endif
285 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out) reduction(+:my_out)
286 for (i=0;i<num_threads;++i) {
287 local_n=n/num_threads;
288 rest=n-local_n*num_threads;
289 n_start=local_n*i+MIN(i,rest);
290 n_end=local_n*(i+1)+MIN(i+1,rest);
291 local_out=0;
292 for (q=n_start;q<n_end;++q) local_out+=x[q]*x[q];
293 my_out+=local_out;
294 }
295 #ifdef PASO_MPI
296 #pragma omp single
297 {
298 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
299 }
300 #else
301 out=my_out;
302 #endif
303
304 return sqrt(out);
305
306 }
307 /*
308 * Apply a sequence of n-1 Givens rotations (c,s) to v of length n which assumed to be small.
309 *
310 */
311 void ApplyGivensRotations(const dim_t n,double* v,const double* c,const double* s)
312 {
313 dim_t i;
314 register double w1,w2;
315 #pragma ivdep
316 for (i=0; i<n-1;++i) {
317 w1=c[i]*v[i]-s[i]*v[i+1];
318 w2=s[i]*v[i]+c[i]*v[i+1];
319 v[i]=w1;
320 v[i+1]=w2;
321 }
322 }
323
324 bool_t Paso_fileExists( const char* filename )
325 {
326 FILE* fp = NULL;
327 fp = fopen(filename,"r");
328 if( fp != NULL ) {
329 fclose(fp);
330 return TRUE;
331 } else {
332 return FALSE;
333 }
334 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26