/[escript]/trunk/paso/src/PasoUtil.c
ViewVC logotype

Contents of /trunk/paso/src/PasoUtil.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3327 - (show annotations)
Fri Oct 29 02:08:37 2010 UTC (8 years, 11 months ago) by gross
File MIME type: text/plain
File size: 12923 byte(s)
missing omp function added.
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Some utility routines: */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia, 2003,2004,2005 */
22
23 /**************************************************************/
24
25 #include "Common.h"
26 #include "PasoUtil.h"
27 #include "Paso.h"
28
29
30 /* returns true if array contains value */
31 bool_t Paso_Util_isAny(dim_t N,index_t* array,index_t value) {
32 bool_t out=FALSE;
33 register bool_t out2;
34 dim_t i;
35
36 #pragma omp parallel private(i, out2)
37 {
38 out2=FALSE;
39 #pragma omp for schedule(static)
40 for (i=0;i<N;i++) out2 = out2 || (array[i]==value);
41 #pragma omp critical
42 {
43 out = out || out2;
44 }
45 /* this how this should look like but gcc 4.4 seems to have a problem with this:
46 #pragma omp parallel for private(i) schedule(static) reduction(||:out)
47 for (i=0;i<N;i++) out = out || (array[i]==value);
48 */
49 }
50 return out;
51 }
52
53 /**************************************************************/
54
55 /* calculates the cummultative sum in array and returns the total sum */
56
57 /**************************************************************/
58 index_t Paso_Util_cumsum(dim_t N,index_t* array) {
59 index_t out=0,tmp;
60 dim_t i;
61 index_t *partial_sums=NULL,sum;
62 const int num_threads=omp_get_max_threads();
63 int thread_num;
64
65 if (num_threads>1) {
66 partial_sums=TMPMEMALLOC(num_threads, index_t);
67 #pragma omp parallel private(sum,thread_num ,i,tmp)
68 {
69 sum=0;
70 thread_num=omp_get_thread_num();
71 #pragma omp for schedule(static)
72 for (i=0;i<N;++i) sum+=array[i];
73
74 partial_sums[thread_num]=sum;
75 #pragma omp barrier
76 #pragma omp master
77 {
78 out=0;
79 for (i=0;i<num_threads;++i) {
80 tmp=out;
81 out+=partial_sums[i];
82 partial_sums[i]=tmp;
83 }
84 }
85 #pragma omp barrier
86 sum=partial_sums[thread_num];
87 #pragma omp for schedule(static)
88 for (i=0;i<N;++i) {
89 tmp=sum;
90 sum+=array[i];
91 array[i]=tmp;
92 }
93 }
94 TMPMEMFREE(partial_sums);
95 } else {
96 for (i=0;i<N;++i) {
97 tmp=out;
98 out+=array[i];
99 array[i]=tmp;
100 }
101 }
102 return out;
103 }
104
105 index_t Paso_Util_cumsum_maskedTrue(dim_t N,index_t* array, bool_t* mask) {
106 index_t out=0,tmp;
107 dim_t i;
108 index_t *partial_sums=NULL,sum;
109 const int num_threads=omp_get_max_threads();
110 int thread_num;
111
112 if (num_threads>1) {
113 partial_sums=TMPMEMALLOC(num_threads, index_t);
114 #pragma omp parallel private(sum,i,thread_num,tmp)
115 {
116 sum=0;
117 thread_num=omp_get_thread_num();
118 #pragma omp for schedule(static)
119 for (i=0;i<N;++i) {
120 if (mask[i]) {
121 array[i] =1;
122 sum++;
123 } else {
124 array[i] =0;
125 }
126 }
127 partial_sums[thread_num]=sum;
128 #pragma omp barrier
129 #pragma omp master
130 {
131 out=0;
132 for (i=0;i<num_threads;++i) {
133 tmp=out;
134 out+=partial_sums[i];
135 partial_sums[i]=tmp;
136 }
137 }
138 #pragma omp barrier
139 sum=partial_sums[thread_num];
140 #pragma omp for schedule(static)
141 for (i=0;i<N;++i) {
142 if (mask[i]) {
143 tmp=sum;
144 sum+=array[i];
145 array[i]=tmp;
146 } else {
147 array[i]=-1;
148 }
149 }
150 }
151 TMPMEMFREE(partial_sums);
152 } else { /* num_threads=1 */
153 for (i=0;i<N;++i) {
154 if (mask[i]) {
155 array[i]=out;
156 out++;
157 } else {
158 array[i]=-1;
159 }
160 }
161 }
162 return out;
163 }
164
165 index_t Paso_Util_cumsum_maskedFalse(dim_t N,index_t* array, bool_t* mask) {
166 index_t out=0,tmp=0;
167 dim_t i;
168 index_t *partial_sums=NULL,sum;
169 const int num_threads=omp_get_max_threads();
170 int thread_num=0;
171
172 if (num_threads>1) {
173 partial_sums=TMPMEMALLOC(num_threads,index_t);
174 #pragma omp parallel private(sum,i,thread_num,tmp)
175 {
176 sum=0;
177 thread_num=omp_get_thread_num();
178 #pragma omp for schedule(static)
179 for (i=0;i<N;++i) {
180 if (! mask[i]) {
181 array[i] =1;
182 sum++;
183 } else {
184 array[i] =0;
185 }
186 }
187 partial_sums[thread_num]=sum;
188 #pragma omp barrier
189 #pragma omp master
190 {
191 out=0;
192 for (i=0;i<num_threads;++i) {
193 tmp=out;
194 out+=partial_sums[i];
195 partial_sums[i]=tmp;
196 }
197 }
198 #pragma omp barrier
199 sum=partial_sums[thread_num];
200 #pragma omp for schedule(static)
201 for (i=0;i<N;++i) {
202 if (! mask[i]) {
203 tmp=sum;
204 sum+=array[i];
205 array[i]=tmp;
206 } else {
207 array[i]=-1;
208 }
209 }
210 }
211 TMPMEMFREE(partial_sums);
212 } else {
213 for (i=0;i<N;++i) {
214 if (! mask[i]) {
215 array[i]=out;
216 out++;
217 } else {
218 array[i]=-1;
219 }
220 }
221 }
222
223 return out;
224 }
225
226 /* return the index to the largest entry in lambda takes is maximum */
227 index_t Paso_Util_arg_max(dim_t n, dim_t* lambda) {
228 dim_t i;
229 index_t max=-1;
230 index_t argmax=-1;
231 index_t lmax=-1;
232 index_t li=-1;
233 const int num_threads=omp_get_max_threads();
234
235 if (n>0) {
236 max=lambda[0];
237 argmax=0;
238 if (num_threads>1) {
239 #pragma omp parallel private(i,lmax,li)
240 {
241 lmax=max;
242 li=argmax;
243 #pragma omp for schedule(static)
244 for (i=0;i<n;++i) {
245 if(lmax<lambda[i]){
246 lmax=lambda[i];
247 li=i;
248 }
249 }
250 #pragma omp critical
251 {
252 if (max < lmax) {
253 max=lmax;
254 argmax=li;
255 } else if (max==lmax && argmax>li) {
256 argmax=li;
257 }
258 }
259 }
260 } else {
261 for (i=0;i<n;++i) {
262 if(max<lambda[i]){
263 max=lambda[i];
264 argmax=i;
265 }
266 }
267 }
268 }
269 return argmax;
270 }
271
272 /*
273 * set x to zero:
274 */
275 void Paso_zeroes(const dim_t n, double* x)
276 {
277 dim_t i,local_n,rest,n_start,n_end,q;
278 const int num_threads=omp_get_max_threads();
279
280 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
281 for (i=0;i<num_threads;++i) {
282 local_n=n/num_threads;
283 rest=n-local_n*num_threads;
284 n_start=local_n*i+MIN(i,rest);
285 n_end=local_n*(i+1)+MIN(i+1,rest);
286 #pragma ivdep
287 for (q=n_start;q<n_end;++q) x[q]=0;
288 }
289
290 }
291
292 /*
293 * performs an update of the form x=a*x+b*y where y and x are long vectors.
294 * if b=0, y is not used.
295 */
296 void Paso_Update(const dim_t n, const double a, double* x, const double b, const double* y)
297 {
298 dim_t i,local_n,rest,n_start,n_end,q;
299 const int num_threads=omp_get_max_threads();
300
301
302 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
303 for (i=0;i<num_threads;++i) {
304 local_n=n/num_threads;
305 rest=n-local_n*num_threads;
306 n_start=local_n*i+MIN(i,rest);
307 n_end=local_n*(i+1)+MIN(i+1,rest);
308 if (a==1.) {
309 #pragma ivdep
310 for (q=n_start;q<n_end;++q) {
311 x[q]+=b*y[q];
312 }
313 } else if ((a==1.) && (b==1)) {
314 #pragma ivdep
315 for (q=n_start;q<n_end;++q) {
316 x[q]+=y[q];
317 }
318 } else if ((ABS(a)==0) && (ABS(b)==0)) {
319 #pragma ivdep
320 for (q=n_start;q<n_end;++q) {
321 x[q]=0.;
322 }
323 } else if ((ABS(a)==0) && (b==1)) {
324 #pragma ivdep
325 for (q=n_start;q<n_end;++q) {
326 x[q]=y[q];
327 }
328 } else if (ABS(a)==0) {
329 #pragma ivdep
330 for (q=n_start;q<n_end;++q) {
331 x[q]=b*y[q];
332 }
333 } else {
334 #pragma ivdep
335 for (q=n_start;q<n_end;++q) {
336 x[q]=a*x[q]+b*y[q];
337 }
338 }
339 }
340 }
341 void Paso_Copy(const dim_t n, double* out, const double* in) {
342 Paso_LinearCombination(n,out, 1.,in,0., in);
343 }
344 /*
345 * performs an update of the form z=a*x+b*y where y and x are long vectors.
346 * if a=0, x is not used.
347 * if b=0, y is not used.
348 */
349 void Paso_LinearCombination(const dim_t n, double*z, const double a,const double* x, const double b, const double* y)
350 {
351 dim_t i,local_n,rest,n_start,n_end,q;
352 const int num_threads=omp_get_max_threads();
353
354
355 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
356 for (i=0;i<num_threads;++i) {
357 local_n=n/num_threads;
358 rest=n-local_n*num_threads;
359 n_start=local_n*i+MIN(i,rest);
360 n_end=local_n*(i+1)+MIN(i+1,rest);
361 if (((ABS(a)==0) && (ABS(b)==0)) || (y==NULL) || (x==NULL)) {
362 #pragma ivdep
363 for (q=n_start;q<n_end;++q) {
364 z[q]=0.;
365 }
366 } else if ( ((ABS(a)==0) && (ABS(b)>0.)) || (x==NULL) ) {
367 #pragma ivdep
368 for (q=n_start;q<n_end;++q) {
369 z[q]=b*y[q];
370 }
371 } else if (((ABS(a)>0) && (ABS(b)==0.)) || (y==NULL) ) {
372 #pragma ivdep
373 for (q=n_start;q<n_end;++q) {
374 z[q]=a*x[q];
375 }
376 } else {
377 #pragma ivdep
378 for (q=n_start;q<n_end;++q) {
379 z[q]=a*x[q]+b*y[q];
380 }
381 }
382 }
383 }
384
385
386
387 double Paso_InnerProduct(const dim_t n,const double* x, const double* y, Esys_MPIInfo* mpiinfo)
388 {
389 dim_t i,local_n,rest,n_start,n_end,q;
390 double my_out=0, local_out=0., out=0.;
391 const int num_threads=omp_get_max_threads();
392
393 #pragma omp parallel for private(i,local_out,local_n,rest,n_start,n_end,q)
394 for (i=0;i<num_threads;++i) {
395 local_out=0;
396 local_n=n/num_threads;
397 rest=n-local_n*num_threads;
398 n_start=local_n*i+MIN(i,rest);
399 n_end=local_n*(i+1)+MIN(i+1,rest);
400 #pragma ivdep
401 for (q=n_start;q<n_end;++q) local_out+=x[q]*y[q];
402 #pragma omp critical
403 {
404 my_out+=local_out;
405 }
406 }
407 #ifdef ESYS_MPI
408 #pragma omp single
409 {
410 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
411 }
412 #else
413 out=my_out;
414 #endif
415
416 return out;
417
418 }
419 double Paso_lsup(const dim_t n, const double* x, Esys_MPIInfo* mpiinfo)
420 {
421 dim_t i,local_n,rest,n_start,n_end,q;
422 double my_out=0., local_out=0., out=0.;
423 const int num_threads=omp_get_max_threads();
424
425 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out)
426 for (i=0;i<num_threads;++i) {
427 local_n=n/num_threads;
428 rest=n-local_n*num_threads;
429 n_start=local_n*i+MIN(i,rest);
430 n_end=local_n*(i+1)+MIN(i+1,rest);
431 local_out=0;
432 for (q=n_start;q<n_end;++q) local_out=MAX(ABS(x[q]),local_out);
433 #pragma omp critical
434 {
435 my_out=MAX(my_out,local_out);
436 }
437 }
438 #ifdef ESYS_MPI
439 #pragma omp single
440 {
441 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_MAX, mpiinfo->comm);
442 }
443 #else
444 out=my_out;
445 #endif
446
447 return out;
448
449 }
450 double Paso_l2(const dim_t n, const double* x, Esys_MPIInfo* mpiinfo)
451 {
452 dim_t i,local_n,rest,n_start,n_end,q;
453 double my_out=0, local_out=0., out=0.;
454 const int num_threads=omp_get_max_threads();
455
456 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out)
457 for (i=0;i<num_threads;++i) {
458 local_n=n/num_threads;
459 rest=n-local_n*num_threads;
460 n_start=local_n*i+MIN(i,rest);
461 n_end=local_n*(i+1)+MIN(i+1,rest);
462 local_out=0;
463 for (q=n_start;q<n_end;++q) local_out+=x[q]*x[q];
464 #pragma omp critical
465 {
466 my_out+=local_out;
467 }
468 }
469 #ifdef ESYS_MPI
470 #pragma omp single
471 {
472 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
473 }
474 #else
475 out=my_out;
476 #endif
477
478 return sqrt(out);
479
480 }
481 /*
482 * Apply a sequence of n-1 Givens rotations (c,s) to v of length n which assumed to be small.
483 *
484 */
485 void ApplyGivensRotations(const dim_t n,double* v,const double* c,const double* s)
486 {
487 dim_t i;
488 register double w1,w2;
489 #pragma ivdep
490 for (i=0; i<n-1;++i) {
491 w1=c[i]*v[i]-s[i]*v[i+1];
492 w2=s[i]*v[i]+c[i]*v[i+1];
493 v[i]=w1;
494 v[i+1]=w2;
495 }
496 }
497
498 bool_t Paso_fileExists( const char* filename )
499 {
500 FILE* fp = NULL;
501 fp = fopen(filename,"r");
502 if( fp != NULL ) {
503 fclose(fp);
504 return TRUE;
505 } else {
506 return FALSE;
507 }
508 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26