/[escript]/trunk/paso/src/PasoUtil.c
ViewVC logotype

Contents of /trunk/paso/src/PasoUtil.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3285 - (show annotations)
Tue Oct 19 00:28:00 2010 UTC (8 years, 8 months ago) by gross
File MIME type: text/plain
File size: 12563 byte(s)
bug in initialization step fixed.
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Some utility routines: */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia, 2003,2004,2005 */
22
23 /**************************************************************/
24
25 #include "Common.h"
26 #include "PasoUtil.h"
27 #ifdef _OPENMP
28 #include <omp.h>
29 #endif
30
31 /* returns true if array contains value */
32 bool_t Paso_Util_isAny(dim_t N,index_t* array,index_t value) {
33 bool_t out=FALSE;
34 register bool_t out2;
35 dim_t i;
36
37 #pragma omp parallel private(i, out2)
38 {
39 out2=FALSE;
40 #pragma omp for schedule(static)
41 for (i=0;i<N;i++) out2 = out2 || (array[i]==value);
42 #pragma omp critical
43 {
44 out = out || out2;
45 }
46 /* this how this should look like but gcc 4.4 seems to have a problem with this:
47 #pragma omp parallel for private(i) schedule(static) reduction(||:out)
48 for (i=0;i<N;i++) out = out || (array[i]==value);
49 */
50 }
51 return out;
52 }
53
54 /**************************************************************/
55
56 /* calculates the cummultative sum in array and returns the total sum */
57
58 /**************************************************************/
59 index_t Paso_Util_cumsum(dim_t N,index_t* array) {
60 index_t out=0,tmp;
61 dim_t i;
62 if (omp_get_max_threads()>1) {
63 index_t *partial_sums=NULL,sum;
64 partial_sums=TMPMEMALLOC(omp_get_max_threads(),index_t);
65 #pragma omp parallel private(sum,i)
66 {
67 sum=0;
68 #pragma omp for schedule(static)
69 for (i=0;i<N;++i) sum+=array[i];
70
71 partial_sums[omp_get_thread_num()]=sum;
72 }
73
74 {
75 out=0;
76 for (i=0;i<omp_get_max_threads();++i) {
77 tmp=out;
78 out+=partial_sums[i];
79 partial_sums[i]=tmp;
80 }
81 }
82
83 #pragma omp parallel private(sum,tmp,i)
84 {
85 sum=partial_sums[omp_get_thread_num()];
86 #pragma omp for schedule(static)
87 for (i=0;i<N;++i) {
88 tmp=sum;
89 sum+=array[i];
90 array[i]=tmp;
91 }
92 }
93 TMPMEMFREE(partial_sums);
94 } else {
95 for (i=0;i<N;++i) {
96 tmp=out;
97 out+=array[i];
98 array[i]=tmp;
99 }
100 }
101 return out;
102 }
103
104 index_t Paso_Util_cumsum_maskedTrue(dim_t N,index_t* array, bool_t* mask) {
105 index_t out=0,tmp;
106 dim_t i;
107 index_t *partial_sums=NULL,sum;
108
109
110 if (omp_get_max_threads()>1) {
111 partial_sums=TMPMEMALLOC(omp_get_max_threads(),index_t);
112 #pragma omp parallel private(sum,i)
113 {
114 sum=0;
115 #pragma omp for schedule(static)
116 for (i=0;i<N;++i) {
117 if (mask[i]) {
118 array[i] =1;
119 sum++;
120 } else {
121 array[i] =0;
122 }
123 }
124 partial_sums[omp_get_thread_num()]=sum;
125 }
126
127 {
128 out=0;
129 for (i=0;i<omp_get_max_threads();++i) {
130 tmp=out;
131 out+=partial_sums[i];
132 partial_sums[i]=tmp;
133 }
134 }
135
136 #pragma omp parallel private(sum,tmp,i)
137 {
138 sum=partial_sums[omp_get_thread_num()];
139 #pragma omp for schedule(static)
140 for (i=0;i<N;++i) {
141 if (mask[i]) {
142 tmp=sum;
143 sum+=array[i];
144 array[i]=tmp;
145 } else {
146 array[i]=-1;
147 }
148 }
149 }
150 TMPMEMFREE(partial_sums);
151 } else {
152 for (i=0;i<N;++i) {
153
154 if (mask[i]) {
155 array[i]=out;
156 out++;
157 } else {
158 array[i]=-1;
159 }
160 }
161 }
162 return out;
163 }
164
165 index_t Paso_Util_cumsum_maskedFalse(dim_t N,index_t* array, bool_t* mask) {
166 index_t out=0,tmp;
167 dim_t i;
168
169 index_t *partial_sums=NULL,sum;
170
171
172 if (omp_get_max_threads()>1) {
173 partial_sums=TMPMEMALLOC(omp_get_max_threads(),index_t);
174 #pragma omp parallel private(sum,i)
175 {
176 sum=0;
177 #pragma omp for schedule(static)
178 for (i=0;i<N;++i) {
179 if (! mask[i]) {
180 array[i] =1;
181 sum++;
182 } else {
183 array[i] =0;
184 }
185 }
186 partial_sums[omp_get_thread_num()]=sum;
187 }
188
189 {
190 out=0;
191 for (i=0;i<omp_get_max_threads();++i) {
192 tmp=out;
193 out+=partial_sums[i];
194 partial_sums[i]=tmp;
195 }
196 }
197
198 #pragma omp parallel private(sum,tmp,i)
199 {
200 sum=partial_sums[omp_get_thread_num()];
201 #pragma omp for schedule(static)
202 for (i=0;i<N;++i) {
203 if (! mask[i]) {
204 tmp=sum;
205 sum+=array[i];
206 array[i]=tmp;
207 } else {
208 array[i]=-1;
209 }
210 }
211 }
212 TMPMEMFREE(partial_sums);
213 } else {
214 for (i=0;i<N;++i) {
215 if (! mask[i]) {
216 array[i]=out;
217 out++;
218 } else {
219 array[i]=-1;
220 }
221 }
222 }
223
224 return out;
225 }
226
227 /* return the index to the largest entry in lambda takes is maximum */
228 index_t Paso_Util_arg_max(dim_t n, dim_t* lambda) {
229 dim_t i;
230 index_t max=-1;
231 index_t argmax=-1;
232 index_t lmax=-1;
233 index_t li=-1;
234
235 if (n>0) {
236
237 max=lambda[0];
238 argmax=0;
239 if (omp_get_max_threads()>1) {
240 #pragma omp parallel private(i,lmax,li)
241 {
242 lmax=max;
243 li=argmax;
244 #pragma omp for schedule(static)
245 for (i=0;i<n;++i) {
246 if(lmax<lambda[i]){
247 lmax=lambda[i];
248 li=i;
249 }
250 }
251 #pragma omp critical
252 {
253 if (max < lmax) {
254 max=lmax;
255 argmax=li;
256 } else if (max==lmax && argmax>li) {
257 argmax=li;
258 }
259 }
260 }
261 } else {
262 for (i=0;i<n;++i) {
263 if(max<lambda[i]){
264 max=lambda[i];
265 argmax=i;
266 }
267 }
268 }
269 }
270 return argmax;
271 }
272
273 /*
274 * set x to zero:
275 */
276 void Paso_zeroes(const dim_t n, double* x)
277 {
278 dim_t i,local_n,rest,n_start,n_end,q;
279 const int num_threads=omp_get_max_threads();
280
281
282 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
283 for (i=0;i<num_threads;++i) {
284 local_n=n/num_threads;
285 rest=n-local_n*num_threads;
286 n_start=local_n*i+MIN(i,rest);
287 n_end=local_n*(i+1)+MIN(i+1,rest);
288 #pragma ivdep
289 for (q=n_start;q<n_end;++q) x[q]=0;
290 }
291
292 }
293
294 /*
295 * performs an update of the form x=a*x+b*y where y and x are long vectors.
296 * if b=0, y is not used.
297 */
298 void Paso_Update(const dim_t n, const double a, double* x, const double b, const double* y)
299 {
300 dim_t i,local_n,rest,n_start,n_end,q;
301 #ifdef _OPENMP
302 const int num_threads=omp_get_max_threads();
303 #else
304 const int num_threads=1;
305 #endif
306
307 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
308 for (i=0;i<num_threads;++i) {
309 local_n=n/num_threads;
310 rest=n-local_n*num_threads;
311 n_start=local_n*i+MIN(i,rest);
312 n_end=local_n*(i+1)+MIN(i+1,rest);
313 if (a==1.) {
314 #pragma ivdep
315 for (q=n_start;q<n_end;++q) {
316 x[q]+=b*y[q];
317 }
318 } else if ((a==1.) && (b==1)) {
319 #pragma ivdep
320 for (q=n_start;q<n_end;++q) {
321 x[q]+=y[q];
322 }
323 } else if ((ABS(a)==0) && (ABS(b)==0)) {
324 #pragma ivdep
325 for (q=n_start;q<n_end;++q) {
326 x[q]=0.;
327 }
328 } else if ((ABS(a)==0) && (b==1)) {
329 #pragma ivdep
330 for (q=n_start;q<n_end;++q) {
331 x[q]=y[q];
332 }
333 } else if (ABS(a)==0) {
334 #pragma ivdep
335 for (q=n_start;q<n_end;++q) {
336 x[q]=b*y[q];
337 }
338 } else {
339 #pragma ivdep
340 for (q=n_start;q<n_end;++q) {
341 x[q]=a*x[q]+b*y[q];
342 }
343 }
344 }
345 }
346 void Paso_Copy(const dim_t n, double* out, const double* in) {
347 Paso_LinearCombination(n,out, 1.,in,0., in);
348 }
349 /*
350 * performs an update of the form z=a*x+b*y where y and x are long vectors.
351 * if a=0, x is not used.
352 * if b=0, y is not used.
353 */
354 void Paso_LinearCombination(const dim_t n, double*z, const double a,const double* x, const double b, const double* y)
355 {
356 dim_t i,local_n,rest,n_start,n_end,q;
357 #ifdef _OPENMP
358 const int num_threads=omp_get_max_threads();
359 #else
360 const int num_threads=1;
361 #endif
362
363 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
364 for (i=0;i<num_threads;++i) {
365 local_n=n/num_threads;
366 rest=n-local_n*num_threads;
367 n_start=local_n*i+MIN(i,rest);
368 n_end=local_n*(i+1)+MIN(i+1,rest);
369 if (((ABS(a)==0) && (ABS(b)==0)) || (y==NULL) || (x==NULL)) {
370 #pragma ivdep
371 for (q=n_start;q<n_end;++q) {
372 z[q]=0.;
373 }
374 } else if ( ((ABS(a)==0) && (ABS(b)>0.)) || (x==NULL) ) {
375 #pragma ivdep
376 for (q=n_start;q<n_end;++q) {
377 z[q]=b*y[q];
378 }
379 } else if (((ABS(a)>0) && (ABS(b)==0.)) || (y==NULL) ) {
380 #pragma ivdep
381 for (q=n_start;q<n_end;++q) {
382 z[q]=a*x[q];
383 }
384 } else {
385 #pragma ivdep
386 for (q=n_start;q<n_end;++q) {
387 z[q]=a*x[q]+b*y[q];
388 }
389 }
390 }
391 }
392
393
394
395 double Paso_InnerProduct(const dim_t n,const double* x, const double* y, Esys_MPIInfo* mpiinfo)
396 {
397 dim_t i,local_n,rest,n_start,n_end,q;
398 double my_out=0, local_out=0., out=0.;
399 #ifdef _OPENMP
400 const int num_threads=omp_get_max_threads();
401 #else
402 const int num_threads=1;
403 #endif
404 #pragma omp parallel for private(i,local_out,local_n,rest,n_start,n_end,q)
405 for (i=0;i<num_threads;++i) {
406 local_out=0;
407 local_n=n/num_threads;
408 rest=n-local_n*num_threads;
409 n_start=local_n*i+MIN(i,rest);
410 n_end=local_n*(i+1)+MIN(i+1,rest);
411 #pragma ivdep
412 for (q=n_start;q<n_end;++q) local_out+=x[q]*y[q];
413 #pragma omp critical
414 {
415 my_out+=local_out;
416 }
417 }
418 #ifdef ESYS_MPI
419 #pragma omp single
420 {
421 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
422 }
423 #else
424 out=my_out;
425 #endif
426
427 return out;
428
429 }
430 double Paso_lsup(const dim_t n, const double* x, Esys_MPIInfo* mpiinfo)
431 {
432 dim_t i,local_n,rest,n_start,n_end,q;
433 double my_out=0., local_out=0., out=0.;
434 #ifdef _OPENMP
435 const int num_threads=omp_get_max_threads();
436 #else
437 const int num_threads=1;
438 #endif
439 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out)
440 for (i=0;i<num_threads;++i) {
441 local_n=n/num_threads;
442 rest=n-local_n*num_threads;
443 n_start=local_n*i+MIN(i,rest);
444 n_end=local_n*(i+1)+MIN(i+1,rest);
445 local_out=0;
446 for (q=n_start;q<n_end;++q) local_out=MAX(ABS(x[q]),local_out);
447 #pragma omp critical
448 {
449 my_out=MAX(my_out,local_out);
450 }
451 }
452 #ifdef ESYS_MPI
453 #pragma omp single
454 {
455 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_MAX, mpiinfo->comm);
456 }
457 #else
458 out=my_out;
459 #endif
460
461 return out;
462
463 }
464 double Paso_l2(const dim_t n, const double* x, Esys_MPIInfo* mpiinfo)
465 {
466 dim_t i,local_n,rest,n_start,n_end,q;
467 double my_out=0, local_out=0., out=0.;
468 #ifdef _OPENMP
469 const int num_threads=omp_get_max_threads();
470 #else
471 const int num_threads=1;
472 #endif
473 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out)
474 for (i=0;i<num_threads;++i) {
475 local_n=n/num_threads;
476 rest=n-local_n*num_threads;
477 n_start=local_n*i+MIN(i,rest);
478 n_end=local_n*(i+1)+MIN(i+1,rest);
479 local_out=0;
480 for (q=n_start;q<n_end;++q) local_out+=x[q]*x[q];
481 #pragma omp critical
482 {
483 my_out+=local_out;
484 }
485 }
486 #ifdef ESYS_MPI
487 #pragma omp single
488 {
489 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
490 }
491 #else
492 out=my_out;
493 #endif
494
495 return sqrt(out);
496
497 }
498 /*
499 * Apply a sequence of n-1 Givens rotations (c,s) to v of length n which assumed to be small.
500 *
501 */
502 void ApplyGivensRotations(const dim_t n,double* v,const double* c,const double* s)
503 {
504 dim_t i;
505 register double w1,w2;
506 #pragma ivdep
507 for (i=0; i<n-1;++i) {
508 w1=c[i]*v[i]-s[i]*v[i+1];
509 w2=s[i]*v[i]+c[i]*v[i+1];
510 v[i]=w1;
511 v[i+1]=w2;
512 }
513 }
514
515 bool_t Paso_fileExists( const char* filename )
516 {
517 FILE* fp = NULL;
518 fp = fopen(filename,"r");
519 if( fp != NULL ) {
520 fclose(fp);
521 return TRUE;
522 } else {
523 return FALSE;
524 }
525 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26