/[escript]/branches/doubleplusgood/paso/src/PasoUtil.cpp
ViewVC logotype

Contents of /branches/doubleplusgood/paso/src/PasoUtil.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4324 - (show annotations)
Wed Mar 20 00:55:44 2013 UTC (5 years, 10 months ago) by jfenwick
File size: 14018 byte(s)
continues
1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15
16
17 /************************************************************************************/
18
19 /* Some utility routines: */
20
21 /************************************************************************************/
22
23 /* Copyrights by ACcESS Australia, 2003,2004,2005 */
24
25 /************************************************************************************/
26
27 #include "Common.h"
28 #include "PasoUtil.h"
29 #include "Paso.h"
30
31
32 /* returns true if array contains value */
33 bool_t Paso_Util_isAny(dim_t N,index_t* array,index_t value) {
34 bool_t out=FALSE;
35 register bool_t out2;
36 dim_t i;
37
38 #pragma omp parallel private(i, out2)
39 {
40 out2=FALSE;
41 #pragma omp for schedule(static)
42 for (i=0;i<N;i++) out2 = out2 || (array[i]==value);
43 #pragma omp critical
44 {
45 out = out || out2;
46 }
47 /* this is how this should look like but gcc 4.4 seems to have
48 * a problem with this:
49 #pragma omp parallel for private(i) schedule(static) reduction(||:out)
50 for (i=0;i<N;i++) out = out || (array[i]==value);
51 */
52 }
53 return out;
54 }
55
56 /* returns the number of positive values in x */
57 dim_t Paso_Util_numPositives(const dim_t N, const double *x) {
58 dim_t out=0;
59 register dim_t out2;
60 dim_t i;
61
62 #pragma omp parallel private(i, out2)
63 {
64 out2=0;
65 #pragma omp for schedule(static)
66 for (i=0;i<N;i++) {
67 if ( x[i] > 0 ) out2++;
68 }
69 #pragma omp critical
70 {
71 out = out + out2;
72 }
73 }
74 return out;
75 }
76
77 /* returns the maximum value in array */
78 index_t Paso_Util_iMax(const dim_t N,const index_t* array) {
79 index_t out=INDEX_T_MIN;
80 register index_t out2;
81 dim_t i;
82 if (N>0) {
83 #pragma omp parallel private(i, out2)
84 {
85 out2=INDEX_T_MIN;
86 #pragma omp for schedule(static)
87 for (i=0;i<N;i++) out2 = MAX(out2, array[i]);
88
89 #pragma omp critical
90 {
91 out = MAX(out, out2);
92 }
93 }
94 }
95 return out;
96 }
97
98 /************************************************************************************/
99
100 /* calculates the cumulative sum in array and returns the total sum */
101
102 /************************************************************************************/
103 index_t Paso_Util_cumsum(dim_t N,index_t* array) {
104 index_t out=0,tmp;
105 dim_t i;
106 index_t *partial_sums=NULL,sum;
107 const int num_threads=omp_get_max_threads();
108 int thread_num;
109
110 if (num_threads>1) {
111 partial_sums=new index_t[num_threads];
112 #pragma omp parallel private(sum,thread_num ,i,tmp)
113 {
114 sum=0;
115 thread_num=omp_get_thread_num();
116 #pragma omp for schedule(static)
117 for (i=0;i<N;++i) sum+=array[i];
118
119 partial_sums[thread_num]=sum;
120 #pragma omp barrier
121 #pragma omp master
122 {
123 out=0;
124 for (i=0;i<num_threads;++i) {
125 tmp=out;
126 out+=partial_sums[i];
127 partial_sums[i]=tmp;
128 }
129 }
130 #pragma omp barrier
131 sum=partial_sums[thread_num];
132 #pragma omp for schedule(static)
133 for (i=0;i<N;++i) {
134 tmp=sum;
135 sum+=array[i];
136 array[i]=tmp;
137 }
138 }
139 delete[] partial_sums;
140 } else {
141 for (i=0;i<N;++i) {
142 tmp=out;
143 out+=array[i];
144 array[i]=tmp;
145 }
146 }
147 return out;
148 }
149
150 index_t Paso_Util_cumsum_maskedTrue(dim_t N,index_t* array, bool_t* mask) {
151 index_t out=0,tmp;
152 dim_t i;
153 index_t *partial_sums=NULL,sum;
154 const int num_threads=omp_get_max_threads();
155 int thread_num;
156
157 if (num_threads>1) {
158 partial_sums=new index_t[num_threads];
159 #pragma omp parallel private(sum,i,thread_num,tmp)
160 {
161 sum=0;
162 thread_num=omp_get_thread_num();
163 #pragma omp for schedule(static)
164 for (i=0;i<N;++i) {
165 if (mask[i]) {
166 array[i] =1;
167 sum++;
168 } else {
169 array[i] =0;
170 }
171 }
172 partial_sums[thread_num]=sum;
173 #pragma omp barrier
174 #pragma omp master
175 {
176 out=0;
177 for (i=0;i<num_threads;++i) {
178 tmp=out;
179 out+=partial_sums[i];
180 partial_sums[i]=tmp;
181 }
182 }
183 #pragma omp barrier
184 sum=partial_sums[thread_num];
185 #pragma omp for schedule(static)
186 for (i=0;i<N;++i) {
187 if (mask[i]) {
188 tmp=sum;
189 sum+=array[i];
190 array[i]=tmp;
191 } else {
192 array[i]=-1;
193 }
194 }
195 }
196 delete[] partial_sums;
197 } else { /* num_threads=1 */
198 for (i=0;i<N;++i) {
199 if (mask[i]) {
200 array[i]=out;
201 out++;
202 } else {
203 array[i]=-1;
204 }
205 }
206 }
207 return out;
208 }
209
210 index_t Paso_Util_cumsum_maskedFalse(dim_t N,index_t* array, bool_t* mask) {
211 index_t out=0,tmp=0;
212 dim_t i;
213 index_t *partial_sums=NULL,sum;
214 const int num_threads=omp_get_max_threads();
215 int thread_num=0;
216
217 if (num_threads>1) {
218 partial_sums=new index_t[num_threads];
219 #pragma omp parallel private(sum,i,thread_num,tmp)
220 {
221 sum=0;
222 thread_num=omp_get_thread_num();
223 #pragma omp for schedule(static)
224 for (i=0;i<N;++i) {
225 if (! mask[i]) {
226 array[i] =1;
227 sum++;
228 } else {
229 array[i] =0;
230 }
231 }
232 partial_sums[thread_num]=sum;
233 #pragma omp barrier
234 #pragma omp master
235 {
236 out=0;
237 for (i=0;i<num_threads;++i) {
238 tmp=out;
239 out+=partial_sums[i];
240 partial_sums[i]=tmp;
241 }
242 }
243 #pragma omp barrier
244 sum=partial_sums[thread_num];
245 #pragma omp for schedule(static)
246 for (i=0;i<N;++i) {
247 if (! mask[i]) {
248 tmp=sum;
249 sum+=array[i];
250 array[i]=tmp;
251 } else {
252 array[i]=-1;
253 }
254 }
255 }
256 delete[] partial_sums;
257 } else {
258 for (i=0;i<N;++i) {
259 if (! mask[i]) {
260 array[i]=out;
261 out++;
262 } else {
263 array[i]=-1;
264 }
265 }
266 }
267
268 return out;
269 }
270
271 /* return the index to the largest entry in lambda */
272 index_t Paso_Util_arg_max(dim_t n, dim_t* lambda) {
273 dim_t i;
274 index_t max=-1;
275 index_t argmax=-1;
276 index_t lmax=-1;
277 index_t li=-1;
278 const int num_threads=omp_get_max_threads();
279
280 if (n>0) {
281 max=lambda[0];
282 argmax=0;
283 if (num_threads>1) {
284 #pragma omp parallel private(i,lmax,li)
285 {
286 lmax=max;
287 li=argmax;
288 #pragma omp for schedule(static)
289 for (i=0;i<n;++i) {
290 if(lmax<lambda[i]){
291 lmax=lambda[i];
292 li=i;
293 }
294 }
295 #pragma omp critical
296 {
297 if (max < lmax) {
298 max=lmax;
299 argmax=li;
300 } else if (max==lmax && argmax>li) {
301 argmax=li;
302 }
303 }
304 }
305 } else {
306 for (i=0;i<n;++i) {
307 if(max<lambda[i]){
308 max=lambda[i];
309 argmax=i;
310 }
311 }
312 }
313 }
314 return argmax;
315 }
316
317 /*
318 * set x to zero:
319 */
320 void Paso_zeroes(const dim_t n, double* x)
321 {
322 dim_t i,local_n,rest,n_start,n_end,q;
323 const int num_threads=omp_get_max_threads();
324
325 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
326 for (i=0;i<num_threads;++i) {
327 local_n=n/num_threads;
328 rest=n-local_n*num_threads;
329 n_start=local_n*i+MIN(i,rest);
330 n_end=local_n*(i+1)+MIN(i+1,rest);
331 #pragma ivdep
332 for (q=n_start;q<n_end;++q) x[q]=0;
333 }
334
335 }
336
337 /*
338 * Performs an update of the form x=a*x+b*y where y and x are long vectors.
339 * If b=0, y is not used.
340 */
341 void Paso_Update(const dim_t n, const double a, double* x, const double b, const double* y)
342 {
343 dim_t i,local_n,rest,n_start,n_end,q;
344 const int num_threads=omp_get_max_threads();
345
346
347 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
348 for (i=0;i<num_threads;++i) {
349 local_n=n/num_threads;
350 rest=n-local_n*num_threads;
351 n_start=local_n*i+MIN(i,rest);
352 n_end=local_n*(i+1)+MIN(i+1,rest);
353 if (a==1.) {
354 #pragma ivdep
355 for (q=n_start;q<n_end;++q) {
356 x[q]+=b*y[q];
357 }
358 } else if ((a==1.) && (b==1)) {
359 #pragma ivdep
360 for (q=n_start;q<n_end;++q) {
361 x[q]+=y[q];
362 }
363 } else if ((ABS(a)==0) && (ABS(b)==0)) {
364 #pragma ivdep
365 for (q=n_start;q<n_end;++q) {
366 x[q]=0.;
367 }
368 } else if ((ABS(a)==0) && (b==1)) {
369 #pragma ivdep
370 for (q=n_start;q<n_end;++q) {
371 x[q]=y[q];
372 }
373 } else if (ABS(a)==0) {
374 #pragma ivdep
375 for (q=n_start;q<n_end;++q) {
376 x[q]=b*y[q];
377 }
378 } else {
379 #pragma ivdep
380 for (q=n_start;q<n_end;++q) {
381 x[q]=a*x[q]+b*y[q];
382 }
383 }
384 }
385 }
386 void Paso_Copy(const dim_t n, double* out, const double* in) {
387 Paso_LinearCombination(n,out, 1.,in,0., in);
388 }
389 /*
390 * Performs an update of the form z=a*x+b*y where y and x are long vectors.
391 * If a=0, x is not used.
392 * If b=0, y is not used.
393 */
394 void Paso_LinearCombination(const dim_t n, double*z, const double a,const double* x, const double b, const double* y)
395 {
396 dim_t i,local_n,rest,n_start,n_end,q;
397 const int num_threads=omp_get_max_threads();
398
399
400 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
401 for (i=0;i<num_threads;++i) {
402 local_n=n/num_threads;
403 rest=n-local_n*num_threads;
404 n_start=local_n*i+MIN(i,rest);
405 n_end=local_n*(i+1)+MIN(i+1,rest);
406 if (((ABS(a)==0) && (ABS(b)==0)) || (y==NULL) || (x==NULL)) {
407 #pragma ivdep
408 for (q=n_start;q<n_end;++q) {
409 z[q]=0.;
410 }
411 } else if ( ((ABS(a)==0) && (ABS(b)>0.)) || (x==NULL) ) {
412 #pragma ivdep
413 for (q=n_start;q<n_end;++q) {
414 z[q]=b*y[q];
415 }
416 } else if (((ABS(a)>0) && (ABS(b)==0.)) || (y==NULL) ) {
417 #pragma ivdep
418 for (q=n_start;q<n_end;++q) {
419 z[q]=a*x[q];
420 }
421 } else {
422 #pragma ivdep
423 for (q=n_start;q<n_end;++q) {
424 z[q]=a*x[q]+b*y[q];
425 }
426 }
427 }
428 }
429
430
431
432 double Paso_InnerProduct(const dim_t n,const double* x, const double* y, Esys_MPIInfo* mpiinfo)
433 {
434 dim_t i,local_n,rest,n_start,n_end,q;
435 double my_out=0, local_out=0., out=0.;
436 const int num_threads=omp_get_max_threads();
437
438 #pragma omp parallel for private(i,local_out,local_n,rest,n_start,n_end,q)
439 for (i=0;i<num_threads;++i) {
440 local_out=0;
441 local_n=n/num_threads;
442 rest=n-local_n*num_threads;
443 n_start=local_n*i+MIN(i,rest);
444 n_end=local_n*(i+1)+MIN(i+1,rest);
445 #pragma ivdep
446 for (q=n_start;q<n_end;++q) local_out+=x[q]*y[q];
447 #pragma omp critical
448 {
449 my_out+=local_out;
450 }
451 }
452 #ifdef ESYS_MPI
453 #pragma omp single
454 {
455 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
456 }
457 #else
458 out=my_out;
459 #endif
460
461 return out;
462
463 }
464 double Paso_lsup(const dim_t n, const double* x, Esys_MPIInfo* mpiinfo)
465 {
466 dim_t i,local_n,rest,n_start,n_end,q;
467 double my_out=0., local_out=0., out=0.;
468 const int num_threads=omp_get_max_threads();
469
470 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out)
471 for (i=0;i<num_threads;++i) {
472 local_n=n/num_threads;
473 rest=n-local_n*num_threads;
474 n_start=local_n*i+MIN(i,rest);
475 n_end=local_n*(i+1)+MIN(i+1,rest);
476 local_out=0;
477 for (q=n_start;q<n_end;++q) local_out=MAX(ABS(x[q]),local_out);
478 #pragma omp critical
479 {
480 my_out=MAX(my_out,local_out);
481 }
482 }
483 #ifdef ESYS_MPI
484 #pragma omp single
485 {
486 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_MAX, mpiinfo->comm);
487 }
488 #else
489 out=my_out;
490 #endif
491
492 return out;
493
494 }
495 double Paso_l2(const dim_t n, const double* x, Esys_MPIInfo* mpiinfo)
496 {
497 dim_t i,local_n,rest,n_start,n_end,q;
498 double my_out=0, local_out=0., out=0.;
499 const int num_threads=omp_get_max_threads();
500
501 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out)
502 for (i=0;i<num_threads;++i) {
503 local_n=n/num_threads;
504 rest=n-local_n*num_threads;
505 n_start=local_n*i+MIN(i,rest);
506 n_end=local_n*(i+1)+MIN(i+1,rest);
507 local_out=0;
508 for (q=n_start;q<n_end;++q) local_out+=x[q]*x[q];
509 #pragma omp critical
510 {
511 my_out+=local_out;
512 }
513 }
514 #ifdef ESYS_MPI
515 #pragma omp single
516 {
517 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
518 }
519 #else
520 out=my_out;
521 #endif
522
523 return sqrt(out);
524
525 }
526 /*
527 * Applies a sequence of n-1 Givens rotations (c,s) to v of length n which is
528 * assumed to be small.
529 */
530 void ApplyGivensRotations(const dim_t n,double* v,const double* c,const double* s)
531 {
532 dim_t i;
533 register double w1,w2;
534 #pragma ivdep
535 for (i=0; i<n-1;++i) {
536 w1=c[i]*v[i]-s[i]*v[i+1];
537 w2=s[i]*v[i]+c[i]*v[i+1];
538 v[i]=w1;
539 v[i+1]=w2;
540 }
541 }
542
543 bool_t Paso_fileExists( const char* filename )
544 {
545 FILE* fp = NULL;
546 fp = fopen(filename,"r");
547 if( fp != NULL ) {
548 fclose(fp);
549 return TRUE;
550 } else {
551 return FALSE;
552 }
553 }
554

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26