/[escript]/trunk/paso/src/PasoUtil.c
ViewVC logotype

Contents of /trunk/paso/src/PasoUtil.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2244 - (show annotations)
Wed Feb 4 06:27:34 2009 UTC (10 years, 7 months ago) by gross
File MIME type: text/plain
File size: 9201 byte(s)
fix for openmp
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2008 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Some utility routines: */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia, 2003,2004,2005 */
22
23 /**************************************************************/
24
25 #include "Common.h"
26 #include "PasoUtil.h"
27 #ifdef _OPENMP
28 #include <omp.h>
29 #endif
30
31 /* returns true if array contains value */
32 bool_t Paso_Util_isAny(dim_t N,index_t* array,index_t value) {
33 bool_t out=FALSE;
34 dim_t i;
35 #pragma omp parallel for private(i) schedule(static) reduction(||:out)
36 for (i=0;i<N;i++) {
37 out = out || (array[i]==value);
38 }
39 return out;
40 }
41
42 /**************************************************************/
43
44 /* calculates the cummultative sum in array and returns the total sum */
45
46 /**************************************************************/
47 index_t Paso_Util_cumsum(dim_t N,index_t* array) {
48 index_t out=0,tmp;
49 dim_t i;
50 #ifdef _OPENMP
51 index_t *partial_sums=NULL,sum;
52 partial_sums=TMPMEMALLOC(omp_get_max_threads(),index_t);
53 #pragma omp parallel private(sum,i)
54 {
55 sum=0;
56 #pragma omp for schedule(static)
57 for (i=0;i<N;++i) sum+=array[i];
58
59 partial_sums[omp_get_thread_num()]=sum;
60 }
61
62 {
63 out=0;
64 for (i=0;i<omp_get_max_threads();++i) {
65 tmp=out;
66 out+=partial_sums[i];
67 partial_sums[i]=tmp;
68 }
69 }
70
71 #pragma omp parallel private(sum,tmp,i)
72 {
73 sum=partial_sums[omp_get_thread_num()];
74 #pragma omp for schedule(static)
75 for (i=0;i<N;++i) {
76 tmp=sum;
77 sum+=array[i];
78 array[i]=tmp;
79 }
80 }
81 TMPMEMFREE(partial_sums);
82 #else
83 for (i=0;i<N;++i) {
84 tmp=out;
85 out+=array[i];
86 array[i]=tmp;
87 }
88 #endif
89 return out;
90 }
91 /*
92 * set x to zero:
93 */
94 void Paso_zeroes(const dim_t n, double* x)
95 {
96 dim_t i,local_n,rest,n_start,n_end,q;
97 #ifdef _OPENMP
98 const int num_threads=omp_get_max_threads();
99 #else
100 const int num_threads=1;
101 #endif
102
103 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
104 for (i=0;i<num_threads;++i) {
105 local_n=n/num_threads;
106 rest=n-local_n*num_threads;
107 n_start=local_n*i+MIN(i,rest);
108 n_end=local_n*(i+1)+MIN(i+1,rest);
109 #pragma ivdep
110 for (q=n_start;q<n_end;++q) x[q]=0;
111 }
112
113 }
114 /*
115 * performs an update of the form x=a*x+b*y where y and x are long vectors.
116 * if b=0, y is not used.
117 */
118 void Paso_Update(const dim_t n, const double a, double* x, const double b, const double* y)
119 {
120 dim_t i,local_n,rest,n_start,n_end,q;
121 #ifdef _OPENMP
122 const int num_threads=omp_get_max_threads();
123 #else
124 const int num_threads=1;
125 #endif
126
127 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
128 for (i=0;i<num_threads;++i) {
129 local_n=n/num_threads;
130 rest=n-local_n*num_threads;
131 n_start=local_n*i+MIN(i,rest);
132 n_end=local_n*(i+1)+MIN(i+1,rest);
133 if (a==1.) {
134 #pragma ivdep
135 for (q=n_start;q<n_end;++q) {
136 x[q]+=b*y[q];
137 }
138 } else if ((a==1.) && (b==1)) {
139 #pragma ivdep
140 for (q=n_start;q<n_end;++q) {
141 x[q]+=y[q];
142 }
143 } else if ((ABS(a)==0) && (ABS(b)==0)) {
144 #pragma ivdep
145 for (q=n_start;q<n_end;++q) {
146 x[q]=0.;
147 }
148 } else if ((ABS(a)==0) && (ABS(b)==1)) {
149 #pragma ivdep
150 for (q=n_start;q<n_end;++q) {
151 x[q]=y[q];
152 }
153 } else if (ABS(a)==0) {
154 #pragma ivdep
155 for (q=n_start;q<n_end;++q) {
156 x[q]=b*y[q];
157 }
158 } else {
159 #pragma ivdep
160 for (q=n_start;q<n_end;++q) {
161 x[q]=a*x[q]+b*y[q];
162 }
163 }
164 }
165 }
166 void Paso_Copy(const dim_t n, double* out, const double* in) {
167 Paso_LinearCombination(n,out, 1.,in,0., in);
168 }
169 /*
170 * performs an update of the form z=a*x+b*y where y and x are long vectors.
171 * if a=0, x is not used.
172 * if b=0, y is not used.
173 */
174 void Paso_LinearCombination(const dim_t n, double*z, const double a,const double* x, const double b, const double* y)
175 {
176 dim_t i,local_n,rest,n_start,n_end,q;
177 #ifdef _OPENMP
178 const int num_threads=omp_get_max_threads();
179 #else
180 const int num_threads=1;
181 #endif
182
183 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
184 for (i=0;i<num_threads;++i) {
185 local_n=n/num_threads;
186 rest=n-local_n*num_threads;
187 n_start=local_n*i+MIN(i,rest);
188 n_end=local_n*(i+1)+MIN(i+1,rest);
189 if (((ABS(a)==0) && (ABS(b)==0)) || (y==NULL) || (x==NULL)) {
190 #pragma ivdep
191 for (q=n_start;q<n_end;++q) {
192 z[q]=0.;
193 }
194 } else if ( ((ABS(a)==0) && (ABS(b)>0.)) || (x==NULL) ) {
195 #pragma ivdep
196 for (q=n_start;q<n_end;++q) {
197 z[q]=b*y[q];
198 }
199 } else if (((ABS(a)>0) && (ABS(b)==0.)) || (y==NULL) ) {
200 #pragma ivdep
201 for (q=n_start;q<n_end;++q) {
202 z[q]=a*x[q];
203 }
204 } else {
205 #pragma ivdep
206 for (q=n_start;q<n_end;++q) {
207 z[q]=a*x[q]+b*y[q];
208 }
209 }
210 }
211 }
212
213
214
215 double Paso_InnerProduct(const dim_t n,const double* x, const double* y, Paso_MPIInfo* mpiinfo)
216 {
217 dim_t i,local_n,rest,n_start,n_end,q;
218 double my_out=0, local_out=0., out=0.;
219 #ifdef _OPENMP
220 const int num_threads=omp_get_max_threads();
221 #else
222 const int num_threads=1;
223 #endif
224 #pragma omp parallel for private(i,local_out,local_n,rest,n_start,n_end,q)
225 for (i=0;i<num_threads;++i) {
226 local_out=0;
227 local_n=n/num_threads;
228 rest=n-local_n*num_threads;
229 n_start=local_n*i+MIN(i,rest);
230 n_end=local_n*(i+1)+MIN(i+1,rest);
231 #pragma ivdep
232 for (q=n_start;q<n_end;++q) local_out+=x[q]*y[q];
233 #pragma omp critical
234 {
235 my_out+=local_out;
236 }
237 }
238 #ifdef PASO_MPI
239 #pragma omp single
240 {
241 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
242 }
243 #else
244 out=my_out;
245 #endif
246
247 return out;
248
249 }
250 double Paso_lsup(const dim_t n, const double* x, Paso_MPIInfo* mpiinfo)
251 {
252 dim_t i,local_n,rest,n_start,n_end,q;
253 double my_out=0., local_out=0., out=0.;
254 #ifdef _OPENMP
255 const int num_threads=omp_get_max_threads();
256 #else
257 const int num_threads=1;
258 #endif
259 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out)
260 for (i=0;i<num_threads;++i) {
261 local_n=n/num_threads;
262 rest=n-local_n*num_threads;
263 n_start=local_n*i+MIN(i,rest);
264 n_end=local_n*(i+1)+MIN(i+1,rest);
265 local_out=0;
266 for (q=n_start;q<n_end;++q) local_out=MAX(ABS(x[q]),local_out);
267 #pragma omp critical
268 {
269 my_out=MAX(my_out,local_out);
270 }
271 }
272 #ifdef PASO_MPI
273 #pragma omp single
274 {
275 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_MAX, mpiinfo->comm);
276 }
277 #else
278 out=my_out;
279 #endif
280
281 return out;
282
283 }
284 double Paso_l2(const dim_t n, const double* x, Paso_MPIInfo* mpiinfo)
285 {
286 dim_t i,local_n,rest,n_start,n_end,q;
287 double my_out=0, local_out=0., out=0.;
288 #ifdef _OPENMP
289 const int num_threads=omp_get_max_threads();
290 #else
291 const int num_threads=1;
292 #endif
293 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out)
294 for (i=0;i<num_threads;++i) {
295 local_n=n/num_threads;
296 rest=n-local_n*num_threads;
297 n_start=local_n*i+MIN(i,rest);
298 n_end=local_n*(i+1)+MIN(i+1,rest);
299 local_out=0;
300 for (q=n_start;q<n_end;++q) local_out+=x[q]*x[q];
301 #pragma omp critical
302 {
303 my_out+=local_out;
304 }
305 }
306 #ifdef PASO_MPI
307 #pragma omp single
308 {
309 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
310 }
311 #else
312 out=my_out;
313 #endif
314
315 return sqrt(out);
316
317 }
318 /*
319 * Apply a sequence of n-1 Givens rotations (c,s) to v of length n which assumed to be small.
320 *
321 */
322 void ApplyGivensRotations(const dim_t n,double* v,const double* c,const double* s)
323 {
324 dim_t i;
325 register double w1,w2;
326 #pragma ivdep
327 for (i=0; i<n-1;++i) {
328 w1=c[i]*v[i]-s[i]*v[i+1];
329 w2=s[i]*v[i]+c[i]*v[i+1];
330 v[i]=w1;
331 v[i+1]=w2;
332 }
333 }
334
335 bool_t Paso_fileExists( const char* filename )
336 {
337 FILE* fp = NULL;
338 fp = fopen(filename,"r");
339 if( fp != NULL ) {
340 fclose(fp);
341 return TRUE;
342 } else {
343 return FALSE;
344 }
345 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26