/[escript]/trunk/paso/src/PasoUtil.c
ViewVC logotype

Contents of /trunk/paso/src/PasoUtil.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3440 - (show annotations)
Fri Jan 14 00:04:53 2011 UTC (8 years, 7 months ago) by gross
File MIME type: text/plain
File size: 13404 byte(s)
a second AMG interpolation implemented
1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Some utility routines: */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia, 2003,2004,2005 */
22
23 /**************************************************************/
24
25 #include "Common.h"
26 #include "PasoUtil.h"
27 #include "Paso.h"
28
29
30 /* returns true if array contains value */
31 bool_t Paso_Util_isAny(dim_t N,index_t* array,index_t value) {
32 bool_t out=FALSE;
33 register bool_t out2;
34 dim_t i;
35
36 #pragma omp parallel private(i, out2)
37 {
38 out2=FALSE;
39 #pragma omp for schedule(static)
40 for (i=0;i<N;i++) out2 = out2 || (array[i]==value);
41 #pragma omp critical
42 {
43 out = out || out2;
44 }
45 /* this how this should look like but gcc 4.4 seems to have a problem with this:
46 #pragma omp parallel for private(i) schedule(static) reduction(||:out)
47 for (i=0;i<N;i++) out = out || (array[i]==value);
48 */
49 }
50 return out;
51 }
52
53 /* returnsthe maximum value in array */
54 index_t Paso_Util_iMax(const dim_t N,const index_t* array) {
55 index_t out=INDEX_T_MIN;
56 register index_t out2;
57 dim_t i;
58 if (N>0) {
59 #pragma omp parallel private(i, out2)
60 {
61 out2=INDEX_T_MIN;
62 #pragma omp for schedule(static)
63 for (i=0;i<N;i++) out2 = MAX(out2, array[i]);
64
65 #pragma omp critical
66 {
67 out = MAX(out, out2);
68 }
69 }
70 }
71 return out;
72 }
73
74 /**************************************************************/
75
76 /* calculates the cummultative sum in array and returns the total sum */
77
78 /**************************************************************/
79 index_t Paso_Util_cumsum(dim_t N,index_t* array) {
80 index_t out=0,tmp;
81 dim_t i;
82 index_t *partial_sums=NULL,sum;
83 const int num_threads=omp_get_max_threads();
84 int thread_num;
85
86 if (num_threads>1) {
87 partial_sums=TMPMEMALLOC(num_threads, index_t);
88 #pragma omp parallel private(sum,thread_num ,i,tmp)
89 {
90 sum=0;
91 thread_num=omp_get_thread_num();
92 #pragma omp for schedule(static)
93 for (i=0;i<N;++i) sum+=array[i];
94
95 partial_sums[thread_num]=sum;
96 #pragma omp barrier
97 #pragma omp master
98 {
99 out=0;
100 for (i=0;i<num_threads;++i) {
101 tmp=out;
102 out+=partial_sums[i];
103 partial_sums[i]=tmp;
104 }
105 }
106 #pragma omp barrier
107 sum=partial_sums[thread_num];
108 #pragma omp for schedule(static)
109 for (i=0;i<N;++i) {
110 tmp=sum;
111 sum+=array[i];
112 array[i]=tmp;
113 }
114 }
115 TMPMEMFREE(partial_sums);
116 } else {
117 for (i=0;i<N;++i) {
118 tmp=out;
119 out+=array[i];
120 array[i]=tmp;
121 }
122 }
123 return out;
124 }
125
126 index_t Paso_Util_cumsum_maskedTrue(dim_t N,index_t* array, bool_t* mask) {
127 index_t out=0,tmp;
128 dim_t i;
129 index_t *partial_sums=NULL,sum;
130 const int num_threads=omp_get_max_threads();
131 int thread_num;
132
133 if (num_threads>1) {
134 partial_sums=TMPMEMALLOC(num_threads, index_t);
135 #pragma omp parallel private(sum,i,thread_num,tmp)
136 {
137 sum=0;
138 thread_num=omp_get_thread_num();
139 #pragma omp for schedule(static)
140 for (i=0;i<N;++i) {
141 if (mask[i]) {
142 array[i] =1;
143 sum++;
144 } else {
145 array[i] =0;
146 }
147 }
148 partial_sums[thread_num]=sum;
149 #pragma omp barrier
150 #pragma omp master
151 {
152 out=0;
153 for (i=0;i<num_threads;++i) {
154 tmp=out;
155 out+=partial_sums[i];
156 partial_sums[i]=tmp;
157 }
158 }
159 #pragma omp barrier
160 sum=partial_sums[thread_num];
161 #pragma omp for schedule(static)
162 for (i=0;i<N;++i) {
163 if (mask[i]) {
164 tmp=sum;
165 sum+=array[i];
166 array[i]=tmp;
167 } else {
168 array[i]=-1;
169 }
170 }
171 }
172 TMPMEMFREE(partial_sums);
173 } else { /* num_threads=1 */
174 for (i=0;i<N;++i) {
175 if (mask[i]) {
176 array[i]=out;
177 out++;
178 } else {
179 array[i]=-1;
180 }
181 }
182 }
183 return out;
184 }
185
186 index_t Paso_Util_cumsum_maskedFalse(dim_t N,index_t* array, bool_t* mask) {
187 index_t out=0,tmp=0;
188 dim_t i;
189 index_t *partial_sums=NULL,sum;
190 const int num_threads=omp_get_max_threads();
191 int thread_num=0;
192
193 if (num_threads>1) {
194 partial_sums=TMPMEMALLOC(num_threads,index_t);
195 #pragma omp parallel private(sum,i,thread_num,tmp)
196 {
197 sum=0;
198 thread_num=omp_get_thread_num();
199 #pragma omp for schedule(static)
200 for (i=0;i<N;++i) {
201 if (! mask[i]) {
202 array[i] =1;
203 sum++;
204 } else {
205 array[i] =0;
206 }
207 }
208 partial_sums[thread_num]=sum;
209 #pragma omp barrier
210 #pragma omp master
211 {
212 out=0;
213 for (i=0;i<num_threads;++i) {
214 tmp=out;
215 out+=partial_sums[i];
216 partial_sums[i]=tmp;
217 }
218 }
219 #pragma omp barrier
220 sum=partial_sums[thread_num];
221 #pragma omp for schedule(static)
222 for (i=0;i<N;++i) {
223 if (! mask[i]) {
224 tmp=sum;
225 sum+=array[i];
226 array[i]=tmp;
227 } else {
228 array[i]=-1;
229 }
230 }
231 }
232 TMPMEMFREE(partial_sums);
233 } else {
234 for (i=0;i<N;++i) {
235 if (! mask[i]) {
236 array[i]=out;
237 out++;
238 } else {
239 array[i]=-1;
240 }
241 }
242 }
243
244 return out;
245 }
246
247 /* return the index to the largest entry in lambda takes is maximum */
248 index_t Paso_Util_arg_max(dim_t n, dim_t* lambda) {
249 dim_t i;
250 index_t max=-1;
251 index_t argmax=-1;
252 index_t lmax=-1;
253 index_t li=-1;
254 const int num_threads=omp_get_max_threads();
255
256 if (n>0) {
257 max=lambda[0];
258 argmax=0;
259 if (num_threads>1) {
260 #pragma omp parallel private(i,lmax,li)
261 {
262 lmax=max;
263 li=argmax;
264 #pragma omp for schedule(static)
265 for (i=0;i<n;++i) {
266 if(lmax<lambda[i]){
267 lmax=lambda[i];
268 li=i;
269 }
270 }
271 #pragma omp critical
272 {
273 if (max < lmax) {
274 max=lmax;
275 argmax=li;
276 } else if (max==lmax && argmax>li) {
277 argmax=li;
278 }
279 }
280 }
281 } else {
282 for (i=0;i<n;++i) {
283 if(max<lambda[i]){
284 max=lambda[i];
285 argmax=i;
286 }
287 }
288 }
289 }
290 return argmax;
291 }
292
293 /*
294 * set x to zero:
295 */
296 void Paso_zeroes(const dim_t n, double* x)
297 {
298 dim_t i,local_n,rest,n_start,n_end,q;
299 const int num_threads=omp_get_max_threads();
300
301 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
302 for (i=0;i<num_threads;++i) {
303 local_n=n/num_threads;
304 rest=n-local_n*num_threads;
305 n_start=local_n*i+MIN(i,rest);
306 n_end=local_n*(i+1)+MIN(i+1,rest);
307 #pragma ivdep
308 for (q=n_start;q<n_end;++q) x[q]=0;
309 }
310
311 }
312
313 /*
314 * performs an update of the form x=a*x+b*y where y and x are long vectors.
315 * if b=0, y is not used.
316 */
317 void Paso_Update(const dim_t n, const double a, double* x, const double b, const double* y)
318 {
319 dim_t i,local_n,rest,n_start,n_end,q;
320 const int num_threads=omp_get_max_threads();
321
322
323 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
324 for (i=0;i<num_threads;++i) {
325 local_n=n/num_threads;
326 rest=n-local_n*num_threads;
327 n_start=local_n*i+MIN(i,rest);
328 n_end=local_n*(i+1)+MIN(i+1,rest);
329 if (a==1.) {
330 #pragma ivdep
331 for (q=n_start;q<n_end;++q) {
332 x[q]+=b*y[q];
333 }
334 } else if ((a==1.) && (b==1)) {
335 #pragma ivdep
336 for (q=n_start;q<n_end;++q) {
337 x[q]+=y[q];
338 }
339 } else if ((ABS(a)==0) && (ABS(b)==0)) {
340 #pragma ivdep
341 for (q=n_start;q<n_end;++q) {
342 x[q]=0.;
343 }
344 } else if ((ABS(a)==0) && (b==1)) {
345 #pragma ivdep
346 for (q=n_start;q<n_end;++q) {
347 x[q]=y[q];
348 }
349 } else if (ABS(a)==0) {
350 #pragma ivdep
351 for (q=n_start;q<n_end;++q) {
352 x[q]=b*y[q];
353 }
354 } else {
355 #pragma ivdep
356 for (q=n_start;q<n_end;++q) {
357 x[q]=a*x[q]+b*y[q];
358 }
359 }
360 }
361 }
362 void Paso_Copy(const dim_t n, double* out, const double* in) {
363 Paso_LinearCombination(n,out, 1.,in,0., in);
364 }
365 /*
366 * performs an update of the form z=a*x+b*y where y and x are long vectors.
367 * if a=0, x is not used.
368 * if b=0, y is not used.
369 */
370 void Paso_LinearCombination(const dim_t n, double*z, const double a,const double* x, const double b, const double* y)
371 {
372 dim_t i,local_n,rest,n_start,n_end,q;
373 const int num_threads=omp_get_max_threads();
374
375
376 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
377 for (i=0;i<num_threads;++i) {
378 local_n=n/num_threads;
379 rest=n-local_n*num_threads;
380 n_start=local_n*i+MIN(i,rest);
381 n_end=local_n*(i+1)+MIN(i+1,rest);
382 if (((ABS(a)==0) && (ABS(b)==0)) || (y==NULL) || (x==NULL)) {
383 #pragma ivdep
384 for (q=n_start;q<n_end;++q) {
385 z[q]=0.;
386 }
387 } else if ( ((ABS(a)==0) && (ABS(b)>0.)) || (x==NULL) ) {
388 #pragma ivdep
389 for (q=n_start;q<n_end;++q) {
390 z[q]=b*y[q];
391 }
392 } else if (((ABS(a)>0) && (ABS(b)==0.)) || (y==NULL) ) {
393 #pragma ivdep
394 for (q=n_start;q<n_end;++q) {
395 z[q]=a*x[q];
396 }
397 } else {
398 #pragma ivdep
399 for (q=n_start;q<n_end;++q) {
400 z[q]=a*x[q]+b*y[q];
401 }
402 }
403 }
404 }
405
406
407
408 double Paso_InnerProduct(const dim_t n,const double* x, const double* y, Esys_MPIInfo* mpiinfo)
409 {
410 dim_t i,local_n,rest,n_start,n_end,q;
411 double my_out=0, local_out=0., out=0.;
412 const int num_threads=omp_get_max_threads();
413
414 #pragma omp parallel for private(i,local_out,local_n,rest,n_start,n_end,q)
415 for (i=0;i<num_threads;++i) {
416 local_out=0;
417 local_n=n/num_threads;
418 rest=n-local_n*num_threads;
419 n_start=local_n*i+MIN(i,rest);
420 n_end=local_n*(i+1)+MIN(i+1,rest);
421 #pragma ivdep
422 for (q=n_start;q<n_end;++q) local_out+=x[q]*y[q];
423 #pragma omp critical
424 {
425 my_out+=local_out;
426 }
427 }
428 #ifdef ESYS_MPI
429 #pragma omp single
430 {
431 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
432 }
433 #else
434 out=my_out;
435 #endif
436
437 return out;
438
439 }
440 double Paso_lsup(const dim_t n, const double* x, Esys_MPIInfo* mpiinfo)
441 {
442 dim_t i,local_n,rest,n_start,n_end,q;
443 double my_out=0., local_out=0., out=0.;
444 const int num_threads=omp_get_max_threads();
445
446 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out)
447 for (i=0;i<num_threads;++i) {
448 local_n=n/num_threads;
449 rest=n-local_n*num_threads;
450 n_start=local_n*i+MIN(i,rest);
451 n_end=local_n*(i+1)+MIN(i+1,rest);
452 local_out=0;
453 for (q=n_start;q<n_end;++q) local_out=MAX(ABS(x[q]),local_out);
454 #pragma omp critical
455 {
456 my_out=MAX(my_out,local_out);
457 }
458 }
459 #ifdef ESYS_MPI
460 #pragma omp single
461 {
462 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_MAX, mpiinfo->comm);
463 }
464 #else
465 out=my_out;
466 #endif
467
468 return out;
469
470 }
471 double Paso_l2(const dim_t n, const double* x, Esys_MPIInfo* mpiinfo)
472 {
473 dim_t i,local_n,rest,n_start,n_end,q;
474 double my_out=0, local_out=0., out=0.;
475 const int num_threads=omp_get_max_threads();
476
477 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out)
478 for (i=0;i<num_threads;++i) {
479 local_n=n/num_threads;
480 rest=n-local_n*num_threads;
481 n_start=local_n*i+MIN(i,rest);
482 n_end=local_n*(i+1)+MIN(i+1,rest);
483 local_out=0;
484 for (q=n_start;q<n_end;++q) local_out+=x[q]*x[q];
485 #pragma omp critical
486 {
487 my_out+=local_out;
488 }
489 }
490 #ifdef ESYS_MPI
491 #pragma omp single
492 {
493 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
494 }
495 #else
496 out=my_out;
497 #endif
498
499 return sqrt(out);
500
501 }
502 /*
503 * Apply a sequence of n-1 Givens rotations (c,s) to v of length n which assumed to be small.
504 *
505 */
506 void ApplyGivensRotations(const dim_t n,double* v,const double* c,const double* s)
507 {
508 dim_t i;
509 register double w1,w2;
510 #pragma ivdep
511 for (i=0; i<n-1;++i) {
512 w1=c[i]*v[i]-s[i]*v[i+1];
513 w2=s[i]*v[i]+c[i]*v[i+1];
514 v[i]=w1;
515 v[i+1]=w2;
516 }
517 }
518
519 bool_t Paso_fileExists( const char* filename )
520 {
521 FILE* fp = NULL;
522 fp = fopen(filename,"r");
523 if( fp != NULL ) {
524 fclose(fp);
525 return TRUE;
526 } else {
527 return FALSE;
528 }
529 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26