/[escript]/trunk/paso/src/PasoUtil.c
ViewVC logotype

Contents of /trunk/paso/src/PasoUtil.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3482 - (show annotations)
Wed Mar 23 04:06:52 2011 UTC (8 years, 5 months ago) by gross
File MIME type: text/plain
File size: 13818 byte(s)
some work on AMG MPI
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Some utility routines: */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia, 2003,2004,2005 */
22
23 /**************************************************************/
24
25 #include "Common.h"
26 #include "PasoUtil.h"
27 #include "Paso.h"
28
29
30 /* returns true if array contains value */
31 bool_t Paso_Util_isAny(dim_t N,index_t* array,index_t value) {
32 bool_t out=FALSE;
33 register bool_t out2;
34 dim_t i;
35
36 #pragma omp parallel private(i, out2)
37 {
38 out2=FALSE;
39 #pragma omp for schedule(static)
40 for (i=0;i<N;i++) out2 = out2 || (array[i]==value);
41 #pragma omp critical
42 {
43 out = out || out2;
44 }
45 /* this how this should look like but gcc 4.4 seems to have a problem with this:
46 #pragma omp parallel for private(i) schedule(static) reduction(||:out)
47 for (i=0;i<N;i++) out = out || (array[i]==value);
48 */
49 }
50 return out;
51 }
52
53 /* returns the number of positive values in x */
54 dim_t Paso_Util_numPositives(const dim_t N, const double *x) {
55 dim_t out=0;
56 register dim_t out2;
57 dim_t i;
58
59 #pragma omp parallel private(i, out2)
60 {
61 out2=0;
62 #pragma omp for schedule(static)
63 for (i=0;i<N;i++) {
64 if ( x[i] > 0 ) out2++;
65 }
66 #pragma omp critical
67 {
68 out = out + out2;
69 }
70 }
71 return out;
72 }
73
74 /* returnsthe maximum value in array */
75 index_t Paso_Util_iMax(const dim_t N,const index_t* array) {
76 index_t out=INDEX_T_MIN;
77 register index_t out2;
78 dim_t i;
79 if (N>0) {
80 #pragma omp parallel private(i, out2)
81 {
82 out2=INDEX_T_MIN;
83 #pragma omp for schedule(static)
84 for (i=0;i<N;i++) out2 = MAX(out2, array[i]);
85
86 #pragma omp critical
87 {
88 out = MAX(out, out2);
89 }
90 }
91 }
92 return out;
93 }
94
95 /**************************************************************/
96
97 /* calculates the cummultative sum in array and returns the total sum */
98
99 /**************************************************************/
100 index_t Paso_Util_cumsum(dim_t N,index_t* array) {
101 index_t out=0,tmp;
102 dim_t i;
103 index_t *partial_sums=NULL,sum;
104 const int num_threads=omp_get_max_threads();
105 int thread_num;
106
107 if (num_threads>1) {
108 partial_sums=TMPMEMALLOC(num_threads, index_t);
109 #pragma omp parallel private(sum,thread_num ,i,tmp)
110 {
111 sum=0;
112 thread_num=omp_get_thread_num();
113 #pragma omp for schedule(static)
114 for (i=0;i<N;++i) sum+=array[i];
115
116 partial_sums[thread_num]=sum;
117 #pragma omp barrier
118 #pragma omp master
119 {
120 out=0;
121 for (i=0;i<num_threads;++i) {
122 tmp=out;
123 out+=partial_sums[i];
124 partial_sums[i]=tmp;
125 }
126 }
127 #pragma omp barrier
128 sum=partial_sums[thread_num];
129 #pragma omp for schedule(static)
130 for (i=0;i<N;++i) {
131 tmp=sum;
132 sum+=array[i];
133 array[i]=tmp;
134 }
135 }
136 TMPMEMFREE(partial_sums);
137 } else {
138 for (i=0;i<N;++i) {
139 tmp=out;
140 out+=array[i];
141 array[i]=tmp;
142 }
143 }
144 return out;
145 }
146
147 index_t Paso_Util_cumsum_maskedTrue(dim_t N,index_t* array, bool_t* mask) {
148 index_t out=0,tmp;
149 dim_t i;
150 index_t *partial_sums=NULL,sum;
151 const int num_threads=omp_get_max_threads();
152 int thread_num;
153
154 if (num_threads>1) {
155 partial_sums=TMPMEMALLOC(num_threads, index_t);
156 #pragma omp parallel private(sum,i,thread_num,tmp)
157 {
158 sum=0;
159 thread_num=omp_get_thread_num();
160 #pragma omp for schedule(static)
161 for (i=0;i<N;++i) {
162 if (mask[i]) {
163 array[i] =1;
164 sum++;
165 } else {
166 array[i] =0;
167 }
168 }
169 partial_sums[thread_num]=sum;
170 #pragma omp barrier
171 #pragma omp master
172 {
173 out=0;
174 for (i=0;i<num_threads;++i) {
175 tmp=out;
176 out+=partial_sums[i];
177 partial_sums[i]=tmp;
178 }
179 }
180 #pragma omp barrier
181 sum=partial_sums[thread_num];
182 #pragma omp for schedule(static)
183 for (i=0;i<N;++i) {
184 if (mask[i]) {
185 tmp=sum;
186 sum+=array[i];
187 array[i]=tmp;
188 } else {
189 array[i]=-1;
190 }
191 }
192 }
193 TMPMEMFREE(partial_sums);
194 } else { /* num_threads=1 */
195 for (i=0;i<N;++i) {
196 if (mask[i]) {
197 array[i]=out;
198 out++;
199 } else {
200 array[i]=-1;
201 }
202 }
203 }
204 return out;
205 }
206
207 index_t Paso_Util_cumsum_maskedFalse(dim_t N,index_t* array, bool_t* mask) {
208 index_t out=0,tmp=0;
209 dim_t i;
210 index_t *partial_sums=NULL,sum;
211 const int num_threads=omp_get_max_threads();
212 int thread_num=0;
213
214 if (num_threads>1) {
215 partial_sums=TMPMEMALLOC(num_threads,index_t);
216 #pragma omp parallel private(sum,i,thread_num,tmp)
217 {
218 sum=0;
219 thread_num=omp_get_thread_num();
220 #pragma omp for schedule(static)
221 for (i=0;i<N;++i) {
222 if (! mask[i]) {
223 array[i] =1;
224 sum++;
225 } else {
226 array[i] =0;
227 }
228 }
229 partial_sums[thread_num]=sum;
230 #pragma omp barrier
231 #pragma omp master
232 {
233 out=0;
234 for (i=0;i<num_threads;++i) {
235 tmp=out;
236 out+=partial_sums[i];
237 partial_sums[i]=tmp;
238 }
239 }
240 #pragma omp barrier
241 sum=partial_sums[thread_num];
242 #pragma omp for schedule(static)
243 for (i=0;i<N;++i) {
244 if (! mask[i]) {
245 tmp=sum;
246 sum+=array[i];
247 array[i]=tmp;
248 } else {
249 array[i]=-1;
250 }
251 }
252 }
253 TMPMEMFREE(partial_sums);
254 } else {
255 for (i=0;i<N;++i) {
256 if (! mask[i]) {
257 array[i]=out;
258 out++;
259 } else {
260 array[i]=-1;
261 }
262 }
263 }
264
265 return out;
266 }
267
268 /* return the index to the largest entry in lambda takes is maximum */
269 index_t Paso_Util_arg_max(dim_t n, dim_t* lambda) {
270 dim_t i;
271 index_t max=-1;
272 index_t argmax=-1;
273 index_t lmax=-1;
274 index_t li=-1;
275 const int num_threads=omp_get_max_threads();
276
277 if (n>0) {
278 max=lambda[0];
279 argmax=0;
280 if (num_threads>1) {
281 #pragma omp parallel private(i,lmax,li)
282 {
283 lmax=max;
284 li=argmax;
285 #pragma omp for schedule(static)
286 for (i=0;i<n;++i) {
287 if(lmax<lambda[i]){
288 lmax=lambda[i];
289 li=i;
290 }
291 }
292 #pragma omp critical
293 {
294 if (max < lmax) {
295 max=lmax;
296 argmax=li;
297 } else if (max==lmax && argmax>li) {
298 argmax=li;
299 }
300 }
301 }
302 } else {
303 for (i=0;i<n;++i) {
304 if(max<lambda[i]){
305 max=lambda[i];
306 argmax=i;
307 }
308 }
309 }
310 }
311 return argmax;
312 }
313
314 /*
315 * set x to zero:
316 */
317 void Paso_zeroes(const dim_t n, double* x)
318 {
319 dim_t i,local_n,rest,n_start,n_end,q;
320 const int num_threads=omp_get_max_threads();
321
322 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
323 for (i=0;i<num_threads;++i) {
324 local_n=n/num_threads;
325 rest=n-local_n*num_threads;
326 n_start=local_n*i+MIN(i,rest);
327 n_end=local_n*(i+1)+MIN(i+1,rest);
328 #pragma ivdep
329 for (q=n_start;q<n_end;++q) x[q]=0;
330 }
331
332 }
333
334 /*
335 * performs an update of the form x=a*x+b*y where y and x are long vectors.
336 * if b=0, y is not used.
337 */
338 void Paso_Update(const dim_t n, const double a, double* x, const double b, const double* y)
339 {
340 dim_t i,local_n,rest,n_start,n_end,q;
341 const int num_threads=omp_get_max_threads();
342
343
344 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
345 for (i=0;i<num_threads;++i) {
346 local_n=n/num_threads;
347 rest=n-local_n*num_threads;
348 n_start=local_n*i+MIN(i,rest);
349 n_end=local_n*(i+1)+MIN(i+1,rest);
350 if (a==1.) {
351 #pragma ivdep
352 for (q=n_start;q<n_end;++q) {
353 x[q]+=b*y[q];
354 }
355 } else if ((a==1.) && (b==1)) {
356 #pragma ivdep
357 for (q=n_start;q<n_end;++q) {
358 x[q]+=y[q];
359 }
360 } else if ((ABS(a)==0) && (ABS(b)==0)) {
361 #pragma ivdep
362 for (q=n_start;q<n_end;++q) {
363 x[q]=0.;
364 }
365 } else if ((ABS(a)==0) && (b==1)) {
366 #pragma ivdep
367 for (q=n_start;q<n_end;++q) {
368 x[q]=y[q];
369 }
370 } else if (ABS(a)==0) {
371 #pragma ivdep
372 for (q=n_start;q<n_end;++q) {
373 x[q]=b*y[q];
374 }
375 } else {
376 #pragma ivdep
377 for (q=n_start;q<n_end;++q) {
378 x[q]=a*x[q]+b*y[q];
379 }
380 }
381 }
382 }
383 void Paso_Copy(const dim_t n, double* out, const double* in) {
384 Paso_LinearCombination(n,out, 1.,in,0., in);
385 }
386 /*
387 * performs an update of the form z=a*x+b*y where y and x are long vectors.
388 * if a=0, x is not used.
389 * if b=0, y is not used.
390 */
391 void Paso_LinearCombination(const dim_t n, double*z, const double a,const double* x, const double b, const double* y)
392 {
393 dim_t i,local_n,rest,n_start,n_end,q;
394 const int num_threads=omp_get_max_threads();
395
396
397 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
398 for (i=0;i<num_threads;++i) {
399 local_n=n/num_threads;
400 rest=n-local_n*num_threads;
401 n_start=local_n*i+MIN(i,rest);
402 n_end=local_n*(i+1)+MIN(i+1,rest);
403 if (((ABS(a)==0) && (ABS(b)==0)) || (y==NULL) || (x==NULL)) {
404 #pragma ivdep
405 for (q=n_start;q<n_end;++q) {
406 z[q]=0.;
407 }
408 } else if ( ((ABS(a)==0) && (ABS(b)>0.)) || (x==NULL) ) {
409 #pragma ivdep
410 for (q=n_start;q<n_end;++q) {
411 z[q]=b*y[q];
412 }
413 } else if (((ABS(a)>0) && (ABS(b)==0.)) || (y==NULL) ) {
414 #pragma ivdep
415 for (q=n_start;q<n_end;++q) {
416 z[q]=a*x[q];
417 }
418 } else {
419 #pragma ivdep
420 for (q=n_start;q<n_end;++q) {
421 z[q]=a*x[q]+b*y[q];
422 }
423 }
424 }
425 }
426
427
428
429 double Paso_InnerProduct(const dim_t n,const double* x, const double* y, Esys_MPIInfo* mpiinfo)
430 {
431 dim_t i,local_n,rest,n_start,n_end,q;
432 double my_out=0, local_out=0., out=0.;
433 const int num_threads=omp_get_max_threads();
434
435 #pragma omp parallel for private(i,local_out,local_n,rest,n_start,n_end,q)
436 for (i=0;i<num_threads;++i) {
437 local_out=0;
438 local_n=n/num_threads;
439 rest=n-local_n*num_threads;
440 n_start=local_n*i+MIN(i,rest);
441 n_end=local_n*(i+1)+MIN(i+1,rest);
442 #pragma ivdep
443 for (q=n_start;q<n_end;++q) local_out+=x[q]*y[q];
444 #pragma omp critical
445 {
446 my_out+=local_out;
447 }
448 }
449 #ifdef ESYS_MPI
450 #pragma omp single
451 {
452 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
453 }
454 #else
455 out=my_out;
456 #endif
457
458 return out;
459
460 }
461 double Paso_lsup(const dim_t n, const double* x, Esys_MPIInfo* mpiinfo)
462 {
463 dim_t i,local_n,rest,n_start,n_end,q;
464 double my_out=0., local_out=0., out=0.;
465 const int num_threads=omp_get_max_threads();
466
467 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out)
468 for (i=0;i<num_threads;++i) {
469 local_n=n/num_threads;
470 rest=n-local_n*num_threads;
471 n_start=local_n*i+MIN(i,rest);
472 n_end=local_n*(i+1)+MIN(i+1,rest);
473 local_out=0;
474 for (q=n_start;q<n_end;++q) local_out=MAX(ABS(x[q]),local_out);
475 #pragma omp critical
476 {
477 my_out=MAX(my_out,local_out);
478 }
479 }
480 #ifdef ESYS_MPI
481 #pragma omp single
482 {
483 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_MAX, mpiinfo->comm);
484 }
485 #else
486 out=my_out;
487 #endif
488
489 return out;
490
491 }
492 double Paso_l2(const dim_t n, const double* x, Esys_MPIInfo* mpiinfo)
493 {
494 dim_t i,local_n,rest,n_start,n_end,q;
495 double my_out=0, local_out=0., out=0.;
496 const int num_threads=omp_get_max_threads();
497
498 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out)
499 for (i=0;i<num_threads;++i) {
500 local_n=n/num_threads;
501 rest=n-local_n*num_threads;
502 n_start=local_n*i+MIN(i,rest);
503 n_end=local_n*(i+1)+MIN(i+1,rest);
504 local_out=0;
505 for (q=n_start;q<n_end;++q) local_out+=x[q]*x[q];
506 #pragma omp critical
507 {
508 my_out+=local_out;
509 }
510 }
511 #ifdef ESYS_MPI
512 #pragma omp single
513 {
514 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
515 }
516 #else
517 out=my_out;
518 #endif
519
520 return sqrt(out);
521
522 }
523 /*
524 * Apply a sequence of n-1 Givens rotations (c,s) to v of length n which assumed to be small.
525 *
526 */
527 void ApplyGivensRotations(const dim_t n,double* v,const double* c,const double* s)
528 {
529 dim_t i;
530 register double w1,w2;
531 #pragma ivdep
532 for (i=0; i<n-1;++i) {
533 w1=c[i]*v[i]-s[i]*v[i+1];
534 w2=s[i]*v[i]+c[i]*v[i+1];
535 v[i]=w1;
536 v[i+1]=w2;
537 }
538 }
539
540 bool_t Paso_fileExists( const char* filename )
541 {
542 FILE* fp = NULL;
543 fp = fopen(filename,"r");
544 if( fp != NULL ) {
545 fclose(fp);
546 return TRUE;
547 } else {
548 return FALSE;
549 }
550 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26