/[escript]/trunk/paso/src/PasoUtil.c
ViewVC logotype

Contents of /trunk/paso/src/PasoUtil.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3318 - (show annotations)
Thu Oct 28 01:05:36 2010 UTC (8 years, 4 months ago) by caltinay
File MIME type: text/plain
File size: 13670 byte(s)
This should fix building paso without OpenMP.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Some utility routines: */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia, 2003,2004,2005 */
22
23 /**************************************************************/
24
25 #include "Common.h"
26 #include "PasoUtil.h"
27 #ifdef _OPENMP
28 #include <omp.h>
29 #endif
30
31 /* returns true if array contains value */
32 bool_t Paso_Util_isAny(dim_t N,index_t* array,index_t value) {
33 bool_t out=FALSE;
34 register bool_t out2;
35 dim_t i;
36
37 #pragma omp parallel private(i, out2)
38 {
39 out2=FALSE;
40 #pragma omp for schedule(static)
41 for (i=0;i<N;i++) out2 = out2 || (array[i]==value);
42 #pragma omp critical
43 {
44 out = out || out2;
45 }
46 /* this how this should look like but gcc 4.4 seems to have a problem with this:
47 #pragma omp parallel for private(i) schedule(static) reduction(||:out)
48 for (i=0;i<N;i++) out = out || (array[i]==value);
49 */
50 }
51 return out;
52 }
53
54 /**************************************************************/
55
56 /* calculates the cummultative sum in array and returns the total sum */
57
58 /**************************************************************/
59 index_t Paso_Util_cumsum(dim_t N,index_t* array) {
60 index_t out=0,tmp;
61 dim_t i;
62 #ifdef _OPENMP
63 const int num_threads=omp_get_max_threads();
64 const int thread_num=omp_get_thread_num();
65 #else
66 const int num_threads=1;
67 const int thread_num=0;
68 #endif
69 if (num_threads>1) {
70 index_t *partial_sums=NULL,sum;
71 partial_sums=TMPMEMALLOC(num_threads, index_t);
72 #pragma omp parallel private(sum,i)
73 {
74 sum=0;
75 #pragma omp for schedule(static)
76 for (i=0;i<N;++i) sum+=array[i];
77
78 partial_sums[thread_num]=sum;
79 }
80
81 {
82 out=0;
83 for (i=0;i<num_threads;++i) {
84 tmp=out;
85 out+=partial_sums[i];
86 partial_sums[i]=tmp;
87 }
88 }
89
90 #pragma omp parallel private(sum,tmp,i)
91 {
92 sum=partial_sums[thread_num];
93 #pragma omp for schedule(static)
94 for (i=0;i<N;++i) {
95 tmp=sum;
96 sum+=array[i];
97 array[i]=tmp;
98 }
99 }
100 TMPMEMFREE(partial_sums);
101 } else {
102 for (i=0;i<N;++i) {
103 tmp=out;
104 out+=array[i];
105 array[i]=tmp;
106 }
107 }
108 return out;
109 }
110
111 index_t Paso_Util_cumsum_maskedTrue(dim_t N,index_t* array, bool_t* mask) {
112 index_t out=0,tmp;
113 dim_t i;
114 index_t *partial_sums=NULL,sum;
115 #ifdef _OPENMP
116 const int num_threads=omp_get_max_threads();
117 const int thread_num=omp_get_thread_num();
118 #else
119 const int num_threads=1;
120 const int thread_num=0;
121 #endif
122
123 if (num_threads>1) {
124 partial_sums=TMPMEMALLOC(num_threads, index_t);
125 #pragma omp parallel private(sum,i)
126 {
127 sum=0;
128 #pragma omp for schedule(static)
129 for (i=0;i<N;++i) {
130 if (mask[i]) {
131 array[i] =1;
132 sum++;
133 } else {
134 array[i] =0;
135 }
136 }
137 partial_sums[thread_num]=sum;
138 }
139
140 {
141 out=0;
142 for (i=0;i<num_threads;++i) {
143 tmp=out;
144 out+=partial_sums[i];
145 partial_sums[i]=tmp;
146 }
147 }
148
149 #pragma omp parallel private(sum,tmp,i)
150 {
151 sum=partial_sums[thread_num];
152 #pragma omp for schedule(static)
153 for (i=0;i<N;++i) {
154 if (mask[i]) {
155 tmp=sum;
156 sum+=array[i];
157 array[i]=tmp;
158 } else {
159 array[i]=-1;
160 }
161 }
162 }
163 TMPMEMFREE(partial_sums);
164 } else { /* num_threads=1 */
165 for (i=0;i<N;++i) {
166 if (mask[i]) {
167 array[i]=out;
168 out++;
169 } else {
170 array[i]=-1;
171 }
172 }
173 }
174 return out;
175 }
176
177 index_t Paso_Util_cumsum_maskedFalse(dim_t N,index_t* array, bool_t* mask) {
178 index_t out=0,tmp;
179 dim_t i;
180 index_t *partial_sums=NULL,sum;
181 #ifdef _OPENMP
182 const int num_threads=omp_get_max_threads();
183 const int thread_num=omp_get_thread_num();
184 #else
185 const int num_threads=1;
186 const int thread_num=0;
187 #endif
188
189 if (num_threads>1) {
190 partial_sums=TMPMEMALLOC(num_threads,index_t);
191 #pragma omp parallel private(sum,i)
192 {
193 sum=0;
194 #pragma omp for schedule(static)
195 for (i=0;i<N;++i) {
196 if (! mask[i]) {
197 array[i] =1;
198 sum++;
199 } else {
200 array[i] =0;
201 }
202 }
203 partial_sums[thread_num]=sum;
204 }
205
206 {
207 out=0;
208 for (i=0;i<num_threads;++i) {
209 tmp=out;
210 out+=partial_sums[i];
211 partial_sums[i]=tmp;
212 }
213 }
214
215 #pragma omp parallel private(sum,tmp,i)
216 {
217 sum=partial_sums[thread_num];
218 #pragma omp for schedule(static)
219 for (i=0;i<N;++i) {
220 if (! mask[i]) {
221 tmp=sum;
222 sum+=array[i];
223 array[i]=tmp;
224 } else {
225 array[i]=-1;
226 }
227 }
228 }
229 TMPMEMFREE(partial_sums);
230 } else {
231 for (i=0;i<N;++i) {
232 if (! mask[i]) {
233 array[i]=out;
234 out++;
235 } else {
236 array[i]=-1;
237 }
238 }
239 }
240
241 return out;
242 }
243
244 /* return the index to the largest entry in lambda takes is maximum */
245 index_t Paso_Util_arg_max(dim_t n, dim_t* lambda) {
246 dim_t i;
247 index_t max=-1;
248 index_t argmax=-1;
249 index_t lmax=-1;
250 index_t li=-1;
251 #ifdef _OPENMP
252 const int num_threads=omp_get_max_threads();
253 #else
254 const int num_threads=1;
255 #endif
256
257 if (n>0) {
258 max=lambda[0];
259 argmax=0;
260 if (num_threads>1) {
261 #pragma omp parallel private(i,lmax,li)
262 {
263 lmax=max;
264 li=argmax;
265 #pragma omp for schedule(static)
266 for (i=0;i<n;++i) {
267 if(lmax<lambda[i]){
268 lmax=lambda[i];
269 li=i;
270 }
271 }
272 #pragma omp critical
273 {
274 if (max < lmax) {
275 max=lmax;
276 argmax=li;
277 } else if (max==lmax && argmax>li) {
278 argmax=li;
279 }
280 }
281 }
282 } else {
283 for (i=0;i<n;++i) {
284 if(max<lambda[i]){
285 max=lambda[i];
286 argmax=i;
287 }
288 }
289 }
290 }
291 return argmax;
292 }
293
294 /*
295 * set x to zero:
296 */
297 void Paso_zeroes(const dim_t n, double* x)
298 {
299 dim_t i,local_n,rest,n_start,n_end,q;
300 #ifdef _OPENMP
301 const int num_threads=omp_get_max_threads();
302 #else
303 const int num_threads=1;
304 #endif
305
306 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
307 for (i=0;i<num_threads;++i) {
308 local_n=n/num_threads;
309 rest=n-local_n*num_threads;
310 n_start=local_n*i+MIN(i,rest);
311 n_end=local_n*(i+1)+MIN(i+1,rest);
312 #pragma ivdep
313 for (q=n_start;q<n_end;++q) x[q]=0;
314 }
315
316 }
317
318 /*
319 * performs an update of the form x=a*x+b*y where y and x are long vectors.
320 * if b=0, y is not used.
321 */
322 void Paso_Update(const dim_t n, const double a, double* x, const double b, const double* y)
323 {
324 dim_t i,local_n,rest,n_start,n_end,q;
325 #ifdef _OPENMP
326 const int num_threads=omp_get_max_threads();
327 #else
328 const int num_threads=1;
329 #endif
330
331 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
332 for (i=0;i<num_threads;++i) {
333 local_n=n/num_threads;
334 rest=n-local_n*num_threads;
335 n_start=local_n*i+MIN(i,rest);
336 n_end=local_n*(i+1)+MIN(i+1,rest);
337 if (a==1.) {
338 #pragma ivdep
339 for (q=n_start;q<n_end;++q) {
340 x[q]+=b*y[q];
341 }
342 } else if ((a==1.) && (b==1)) {
343 #pragma ivdep
344 for (q=n_start;q<n_end;++q) {
345 x[q]+=y[q];
346 }
347 } else if ((ABS(a)==0) && (ABS(b)==0)) {
348 #pragma ivdep
349 for (q=n_start;q<n_end;++q) {
350 x[q]=0.;
351 }
352 } else if ((ABS(a)==0) && (b==1)) {
353 #pragma ivdep
354 for (q=n_start;q<n_end;++q) {
355 x[q]=y[q];
356 }
357 } else if (ABS(a)==0) {
358 #pragma ivdep
359 for (q=n_start;q<n_end;++q) {
360 x[q]=b*y[q];
361 }
362 } else {
363 #pragma ivdep
364 for (q=n_start;q<n_end;++q) {
365 x[q]=a*x[q]+b*y[q];
366 }
367 }
368 }
369 }
370 void Paso_Copy(const dim_t n, double* out, const double* in) {
371 Paso_LinearCombination(n,out, 1.,in,0., in);
372 }
373 /*
374 * performs an update of the form z=a*x+b*y where y and x are long vectors.
375 * if a=0, x is not used.
376 * if b=0, y is not used.
377 */
378 void Paso_LinearCombination(const dim_t n, double*z, const double a,const double* x, const double b, const double* y)
379 {
380 dim_t i,local_n,rest,n_start,n_end,q;
381 #ifdef _OPENMP
382 const int num_threads=omp_get_max_threads();
383 #else
384 const int num_threads=1;
385 #endif
386
387 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
388 for (i=0;i<num_threads;++i) {
389 local_n=n/num_threads;
390 rest=n-local_n*num_threads;
391 n_start=local_n*i+MIN(i,rest);
392 n_end=local_n*(i+1)+MIN(i+1,rest);
393 if (((ABS(a)==0) && (ABS(b)==0)) || (y==NULL) || (x==NULL)) {
394 #pragma ivdep
395 for (q=n_start;q<n_end;++q) {
396 z[q]=0.;
397 }
398 } else if ( ((ABS(a)==0) && (ABS(b)>0.)) || (x==NULL) ) {
399 #pragma ivdep
400 for (q=n_start;q<n_end;++q) {
401 z[q]=b*y[q];
402 }
403 } else if (((ABS(a)>0) && (ABS(b)==0.)) || (y==NULL) ) {
404 #pragma ivdep
405 for (q=n_start;q<n_end;++q) {
406 z[q]=a*x[q];
407 }
408 } else {
409 #pragma ivdep
410 for (q=n_start;q<n_end;++q) {
411 z[q]=a*x[q]+b*y[q];
412 }
413 }
414 }
415 }
416
417
418
419 double Paso_InnerProduct(const dim_t n,const double* x, const double* y, Esys_MPIInfo* mpiinfo)
420 {
421 dim_t i,local_n,rest,n_start,n_end,q;
422 double my_out=0, local_out=0., out=0.;
423 #ifdef _OPENMP
424 const int num_threads=omp_get_max_threads();
425 #else
426 const int num_threads=1;
427 #endif
428 #pragma omp parallel for private(i,local_out,local_n,rest,n_start,n_end,q)
429 for (i=0;i<num_threads;++i) {
430 local_out=0;
431 local_n=n/num_threads;
432 rest=n-local_n*num_threads;
433 n_start=local_n*i+MIN(i,rest);
434 n_end=local_n*(i+1)+MIN(i+1,rest);
435 #pragma ivdep
436 for (q=n_start;q<n_end;++q) local_out+=x[q]*y[q];
437 #pragma omp critical
438 {
439 my_out+=local_out;
440 }
441 }
442 #ifdef ESYS_MPI
443 #pragma omp single
444 {
445 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
446 }
447 #else
448 out=my_out;
449 #endif
450
451 return out;
452
453 }
454 double Paso_lsup(const dim_t n, const double* x, Esys_MPIInfo* mpiinfo)
455 {
456 dim_t i,local_n,rest,n_start,n_end,q;
457 double my_out=0., local_out=0., out=0.;
458 #ifdef _OPENMP
459 const int num_threads=omp_get_max_threads();
460 #else
461 const int num_threads=1;
462 #endif
463 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out)
464 for (i=0;i<num_threads;++i) {
465 local_n=n/num_threads;
466 rest=n-local_n*num_threads;
467 n_start=local_n*i+MIN(i,rest);
468 n_end=local_n*(i+1)+MIN(i+1,rest);
469 local_out=0;
470 for (q=n_start;q<n_end;++q) local_out=MAX(ABS(x[q]),local_out);
471 #pragma omp critical
472 {
473 my_out=MAX(my_out,local_out);
474 }
475 }
476 #ifdef ESYS_MPI
477 #pragma omp single
478 {
479 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_MAX, mpiinfo->comm);
480 }
481 #else
482 out=my_out;
483 #endif
484
485 return out;
486
487 }
488 double Paso_l2(const dim_t n, const double* x, Esys_MPIInfo* mpiinfo)
489 {
490 dim_t i,local_n,rest,n_start,n_end,q;
491 double my_out=0, local_out=0., out=0.;
492 #ifdef _OPENMP
493 const int num_threads=omp_get_max_threads();
494 #else
495 const int num_threads=1;
496 #endif
497 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out)
498 for (i=0;i<num_threads;++i) {
499 local_n=n/num_threads;
500 rest=n-local_n*num_threads;
501 n_start=local_n*i+MIN(i,rest);
502 n_end=local_n*(i+1)+MIN(i+1,rest);
503 local_out=0;
504 for (q=n_start;q<n_end;++q) local_out+=x[q]*x[q];
505 #pragma omp critical
506 {
507 my_out+=local_out;
508 }
509 }
510 #ifdef ESYS_MPI
511 #pragma omp single
512 {
513 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
514 }
515 #else
516 out=my_out;
517 #endif
518
519 return sqrt(out);
520
521 }
522 /*
523 * Apply a sequence of n-1 Givens rotations (c,s) to v of length n which assumed to be small.
524 *
525 */
526 void ApplyGivensRotations(const dim_t n,double* v,const double* c,const double* s)
527 {
528 dim_t i;
529 register double w1,w2;
530 #pragma ivdep
531 for (i=0; i<n-1;++i) {
532 w1=c[i]*v[i]-s[i]*v[i+1];
533 w2=s[i]*v[i]+c[i]*v[i+1];
534 v[i]=w1;
535 v[i+1]=w2;
536 }
537 }
538
539 bool_t Paso_fileExists( const char* filename )
540 {
541 FILE* fp = NULL;
542 fp = fopen(filename,"r");
543 if( fp != NULL ) {
544 fclose(fp);
545 return TRUE;
546 } else {
547 return FALSE;
548 }
549 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26