/[escript]/trunk/paso/src/PasoUtil.c
ViewVC logotype

Contents of /trunk/paso/src/PasoUtil.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3259 - (show annotations)
Mon Oct 11 01:48:14 2010 UTC (8 years, 11 months ago) by jfenwick
File MIME type: text/plain
File size: 9581 byte(s)
Merging dudley and scons updates from branches

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Some utility routines: */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia, 2003,2004,2005 */
22
23 /**************************************************************/
24
25 #include "Common.h"
26 #include "PasoUtil.h"
27 #ifdef _OPENMP
28 #include <omp.h>
29 #endif
30
31 /* returns true if array contains value */
32 bool_t Paso_Util_isAny(dim_t N,index_t* array,index_t value) {
33 bool_t out=FALSE;
34 register bool_t out2;
35 dim_t i;
36
37 #pragma omp parallel private(i, out2)
38 {
39 out2=FALSE;
40 #pragma omp for schedule(static)
41 for (i=0;i<N;i++) out2 = out2 || (array[i]==value);
42 #pragma omp critical
43 {
44 out = out || out2;
45 }
46 /* this how this should look like but gcc 4.4 seems to have a problem with this:
47 #pragma omp parallel for private(i) schedule(static) reduction(||:out)
48 for (i=0;i<N;i++) out = out || (array[i]==value);
49 */
50 }
51 return out;
52 }
53
54 /**************************************************************/
55
56 /* calculates the cummultative sum in array and returns the total sum */
57
58 /**************************************************************/
59 index_t Paso_Util_cumsum(dim_t N,index_t* array) {
60 index_t out=0,tmp;
61 dim_t i;
62 #ifdef _OPENMP
63 index_t *partial_sums=NULL,sum;
64 partial_sums=TMPMEMALLOC(omp_get_max_threads(),index_t);
65 #pragma omp parallel private(sum,i)
66 {
67 sum=0;
68 #pragma omp for schedule(static)
69 for (i=0;i<N;++i) sum+=array[i];
70
71 partial_sums[omp_get_thread_num()]=sum;
72 }
73
74 {
75 out=0;
76 for (i=0;i<omp_get_max_threads();++i) {
77 tmp=out;
78 out+=partial_sums[i];
79 partial_sums[i]=tmp;
80 }
81 }
82
83 #pragma omp parallel private(sum,tmp,i)
84 {
85 sum=partial_sums[omp_get_thread_num()];
86 #pragma omp for schedule(static)
87 for (i=0;i<N;++i) {
88 tmp=sum;
89 sum+=array[i];
90 array[i]=tmp;
91 }
92 }
93 TMPMEMFREE(partial_sums);
94 #else
95 for (i=0;i<N;++i) {
96 tmp=out;
97 out+=array[i];
98 array[i]=tmp;
99 }
100 #endif
101 return out;
102 }
103 /*
104 * set x to zero:
105 */
106 void Paso_zeroes(const dim_t n, double* x)
107 {
108 dim_t i,local_n,rest,n_start,n_end,q;
109 #ifdef _OPENMP
110 const int num_threads=omp_get_max_threads();
111 #else
112 const int num_threads=1;
113 #endif
114
115 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
116 for (i=0;i<num_threads;++i) {
117 local_n=n/num_threads;
118 rest=n-local_n*num_threads;
119 n_start=local_n*i+MIN(i,rest);
120 n_end=local_n*(i+1)+MIN(i+1,rest);
121 #pragma ivdep
122 for (q=n_start;q<n_end;++q) x[q]=0;
123 }
124
125 }
126
127 /*
128 * performs an update of the form x=a*x+b*y where y and x are long vectors.
129 * if b=0, y is not used.
130 */
131 void Paso_Update(const dim_t n, const double a, double* x, const double b, const double* y)
132 {
133 dim_t i,local_n,rest,n_start,n_end,q;
134 #ifdef _OPENMP
135 const int num_threads=omp_get_max_threads();
136 #else
137 const int num_threads=1;
138 #endif
139
140 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
141 for (i=0;i<num_threads;++i) {
142 local_n=n/num_threads;
143 rest=n-local_n*num_threads;
144 n_start=local_n*i+MIN(i,rest);
145 n_end=local_n*(i+1)+MIN(i+1,rest);
146 if (a==1.) {
147 #pragma ivdep
148 for (q=n_start;q<n_end;++q) {
149 x[q]+=b*y[q];
150 }
151 } else if ((a==1.) && (b==1)) {
152 #pragma ivdep
153 for (q=n_start;q<n_end;++q) {
154 x[q]+=y[q];
155 }
156 } else if ((ABS(a)==0) && (ABS(b)==0)) {
157 #pragma ivdep
158 for (q=n_start;q<n_end;++q) {
159 x[q]=0.;
160 }
161 } else if ((ABS(a)==0) && (b==1)) {
162 #pragma ivdep
163 for (q=n_start;q<n_end;++q) {
164 x[q]=y[q];
165 }
166 } else if (ABS(a)==0) {
167 #pragma ivdep
168 for (q=n_start;q<n_end;++q) {
169 x[q]=b*y[q];
170 }
171 } else {
172 #pragma ivdep
173 for (q=n_start;q<n_end;++q) {
174 x[q]=a*x[q]+b*y[q];
175 }
176 }
177 }
178 }
179 void Paso_Copy(const dim_t n, double* out, const double* in) {
180 Paso_LinearCombination(n,out, 1.,in,0., in);
181 }
182 /*
183 * performs an update of the form z=a*x+b*y where y and x are long vectors.
184 * if a=0, x is not used.
185 * if b=0, y is not used.
186 */
187 void Paso_LinearCombination(const dim_t n, double*z, const double a,const double* x, const double b, const double* y)
188 {
189 dim_t i,local_n,rest,n_start,n_end,q;
190 #ifdef _OPENMP
191 const int num_threads=omp_get_max_threads();
192 #else
193 const int num_threads=1;
194 #endif
195
196 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
197 for (i=0;i<num_threads;++i) {
198 local_n=n/num_threads;
199 rest=n-local_n*num_threads;
200 n_start=local_n*i+MIN(i,rest);
201 n_end=local_n*(i+1)+MIN(i+1,rest);
202 if (((ABS(a)==0) && (ABS(b)==0)) || (y==NULL) || (x==NULL)) {
203 #pragma ivdep
204 for (q=n_start;q<n_end;++q) {
205 z[q]=0.;
206 }
207 } else if ( ((ABS(a)==0) && (ABS(b)>0.)) || (x==NULL) ) {
208 #pragma ivdep
209 for (q=n_start;q<n_end;++q) {
210 z[q]=b*y[q];
211 }
212 } else if (((ABS(a)>0) && (ABS(b)==0.)) || (y==NULL) ) {
213 #pragma ivdep
214 for (q=n_start;q<n_end;++q) {
215 z[q]=a*x[q];
216 }
217 } else {
218 #pragma ivdep
219 for (q=n_start;q<n_end;++q) {
220 z[q]=a*x[q]+b*y[q];
221 }
222 }
223 }
224 }
225
226
227
228 double Paso_InnerProduct(const dim_t n,const double* x, const double* y, Esys_MPIInfo* mpiinfo)
229 {
230 dim_t i,local_n,rest,n_start,n_end,q;
231 double my_out=0, local_out=0., out=0.;
232 #ifdef _OPENMP
233 const int num_threads=omp_get_max_threads();
234 #else
235 const int num_threads=1;
236 #endif
237 #pragma omp parallel for private(i,local_out,local_n,rest,n_start,n_end,q)
238 for (i=0;i<num_threads;++i) {
239 local_out=0;
240 local_n=n/num_threads;
241 rest=n-local_n*num_threads;
242 n_start=local_n*i+MIN(i,rest);
243 n_end=local_n*(i+1)+MIN(i+1,rest);
244 #pragma ivdep
245 for (q=n_start;q<n_end;++q) local_out+=x[q]*y[q];
246 #pragma omp critical
247 {
248 my_out+=local_out;
249 }
250 }
251 #ifdef ESYS_MPI
252 #pragma omp single
253 {
254 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
255 }
256 #else
257 out=my_out;
258 #endif
259
260 return out;
261
262 }
263 double Paso_lsup(const dim_t n, const double* x, Esys_MPIInfo* mpiinfo)
264 {
265 dim_t i,local_n,rest,n_start,n_end,q;
266 double my_out=0., local_out=0., out=0.;
267 #ifdef _OPENMP
268 const int num_threads=omp_get_max_threads();
269 #else
270 const int num_threads=1;
271 #endif
272 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out)
273 for (i=0;i<num_threads;++i) {
274 local_n=n/num_threads;
275 rest=n-local_n*num_threads;
276 n_start=local_n*i+MIN(i,rest);
277 n_end=local_n*(i+1)+MIN(i+1,rest);
278 local_out=0;
279 for (q=n_start;q<n_end;++q) local_out=MAX(ABS(x[q]),local_out);
280 #pragma omp critical
281 {
282 my_out=MAX(my_out,local_out);
283 }
284 }
285 #ifdef ESYS_MPI
286 #pragma omp single
287 {
288 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_MAX, mpiinfo->comm);
289 }
290 #else
291 out=my_out;
292 #endif
293
294 return out;
295
296 }
297 double Paso_l2(const dim_t n, const double* x, Esys_MPIInfo* mpiinfo)
298 {
299 dim_t i,local_n,rest,n_start,n_end,q;
300 double my_out=0, local_out=0., out=0.;
301 #ifdef _OPENMP
302 const int num_threads=omp_get_max_threads();
303 #else
304 const int num_threads=1;
305 #endif
306 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out)
307 for (i=0;i<num_threads;++i) {
308 local_n=n/num_threads;
309 rest=n-local_n*num_threads;
310 n_start=local_n*i+MIN(i,rest);
311 n_end=local_n*(i+1)+MIN(i+1,rest);
312 local_out=0;
313 for (q=n_start;q<n_end;++q) local_out+=x[q]*x[q];
314 #pragma omp critical
315 {
316 my_out+=local_out;
317 }
318 }
319 #ifdef ESYS_MPI
320 #pragma omp single
321 {
322 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
323 }
324 #else
325 out=my_out;
326 #endif
327
328 return sqrt(out);
329
330 }
331 /*
332 * Apply a sequence of n-1 Givens rotations (c,s) to v of length n which assumed to be small.
333 *
334 */
335 void ApplyGivensRotations(const dim_t n,double* v,const double* c,const double* s)
336 {
337 dim_t i;
338 register double w1,w2;
339 #pragma ivdep
340 for (i=0; i<n-1;++i) {
341 w1=c[i]*v[i]-s[i]*v[i+1];
342 w2=s[i]*v[i]+c[i]*v[i+1];
343 v[i]=w1;
344 v[i+1]=w2;
345 }
346 }
347
348 bool_t Paso_fileExists( const char* filename )
349 {
350 FILE* fp = NULL;
351 fp = fopen(filename,"r");
352 if( fp != NULL ) {
353 fclose(fp);
354 return TRUE;
355 } else {
356 return FALSE;
357 }
358 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26