/[escript]/trunk/paso/src/PasoUtil.c
ViewVC logotype

Contents of /trunk/paso/src/PasoUtil.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3283 - (show annotations)
Mon Oct 18 22:39:28 2010 UTC (10 years, 4 months ago) by gross
File MIME type: text/plain
File size: 12669 byte(s)
AMG reengineered, BUG is Smoother fixed.


1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /**************************************************************/
16
17 /* Some utility routines: */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia, 2003,2004,2005 */
22
23 /**************************************************************/
24
25 #include "Common.h"
26 #include "PasoUtil.h"
27 #ifdef _OPENMP
28 #include <omp.h>
29 #endif
30
31 /* returns true if array contains value */
32 bool_t Paso_Util_isAny(dim_t N,index_t* array,index_t value) {
33 bool_t out=FALSE;
34 register bool_t out2;
35 dim_t i;
36
37 #pragma omp parallel private(i, out2)
38 {
39 out2=FALSE;
40 #pragma omp for schedule(static)
41 for (i=0;i<N;i++) out2 = out2 || (array[i]==value);
42 #pragma omp critical
43 {
44 out = out || out2;
45 }
46 /* this how this should look like but gcc 4.4 seems to have a problem with this:
47 #pragma omp parallel for private(i) schedule(static) reduction(||:out)
48 for (i=0;i<N;i++) out = out || (array[i]==value);
49 */
50 }
51 return out;
52 }
53
54 /**************************************************************/
55
56 /* calculates the cummultative sum in array and returns the total sum */
57
58 /**************************************************************/
59 index_t Paso_Util_cumsum(dim_t N,index_t* array) {
60 index_t out=0,tmp;
61 dim_t i;
62 if (omp_get_max_threads()>1) {
63 index_t *partial_sums=NULL,sum;
64 partial_sums=TMPMEMALLOC(omp_get_max_threads(),index_t);
65 #pragma omp parallel private(sum,i)
66 {
67 sum=0;
68 #pragma omp for schedule(static)
69 for (i=0;i<N;++i) sum+=array[i];
70
71 partial_sums[omp_get_thread_num()]=sum;
72 }
73
74 {
75 out=0;
76 for (i=0;i<omp_get_max_threads();++i) {
77 tmp=out;
78 out+=partial_sums[i];
79 partial_sums[i]=tmp;
80 }
81 }
82
83 #pragma omp parallel private(sum,tmp,i)
84 {
85 sum=partial_sums[omp_get_thread_num()];
86 #pragma omp for schedule(static)
87 for (i=0;i<N;++i) {
88 tmp=sum;
89 sum+=array[i];
90 array[i]=tmp;
91 }
92 }
93 TMPMEMFREE(partial_sums);
94 } else {
95 for (i=0;i<N;++i) {
96 tmp=out;
97 out+=array[i];
98 array[i]=tmp;
99 }
100 }
101 return out;
102 }
103
104 index_t Paso_Util_cumsum_maskedTrue(dim_t N,index_t* array, bool_t* mask) {
105 index_t out=0,tmp;
106 dim_t i;
107 index_t *partial_sums=NULL,sum;
108
109
110 if (omp_get_max_threads()>1) {
111 partial_sums=TMPMEMALLOC(omp_get_max_threads(),index_t);
112 #pragma omp parallel private(sum,i)
113 {
114 sum=0;
115 #pragma omp for schedule(static)
116 for (i=0;i<N;++i) {
117 if (mask[i]) {
118 array[i] =1;
119 sum++;
120 } else {
121 array[i] =0;
122 }
123 }
124 partial_sums[omp_get_thread_num()]=sum;
125 }
126
127 {
128 out=0;
129 for (i=0;i<omp_get_max_threads();++i) {
130 tmp=out;
131 out+=partial_sums[i];
132 partial_sums[i]=tmp;
133 }
134 }
135
136 #pragma omp parallel private(sum,tmp,i)
137 {
138 sum=partial_sums[omp_get_thread_num()];
139 #pragma omp for schedule(static)
140 for (i=0;i<N;++i) {
141 if (mask[i]) {
142 tmp=sum;
143 sum+=array[i];
144 array[i]=tmp;
145 } else {
146 array[i]=-1;
147 }
148 }
149 }
150 TMPMEMFREE(partial_sums);
151 } else {
152 for (i=0;i<N;++i) {
153
154 if (mask[i]) {
155 array[i]=out;
156 out++;
157 } else {
158 array[i]=-1;
159 }
160 }
161 }
162 return out;
163 }
164
165 index_t Paso_Util_cumsum_maskedFalse(dim_t N,index_t* array, bool_t* mask) {
166 index_t out=0,tmp;
167 dim_t i;
168
169 index_t *partial_sums=NULL,sum;
170
171
172 if (omp_get_max_threads()>1) {
173 partial_sums=TMPMEMALLOC(omp_get_max_threads(),index_t);
174 #pragma omp parallel private(sum,i)
175 {
176 sum=0;
177 #pragma omp for schedule(static)
178 for (i=0;i<N;++i) {
179 if (! mask[i]) {
180 array[i] =1;
181 sum++;
182 } else {
183 array[i] =0;
184 }
185 }
186 partial_sums[omp_get_thread_num()]=sum;
187 }
188
189 {
190 out=0;
191 for (i=0;i<omp_get_max_threads();++i) {
192 tmp=out;
193 out+=partial_sums[i];
194 partial_sums[i]=tmp;
195 }
196 }
197
198 #pragma omp parallel private(sum,tmp,i)
199 {
200 sum=partial_sums[omp_get_thread_num()];
201 #pragma omp for schedule(static)
202 for (i=0;i<N;++i) {
203 if (! mask[i]) {
204 tmp=sum;
205 sum+=array[i];
206 array[i]=tmp;
207 } else {
208 array[i]=-1;
209 }
210 }
211 }
212 TMPMEMFREE(partial_sums);
213 } else {
214 for (i=0;i<N;++i) {
215 if (! mask[i]) {
216 array[i]=out;
217 out++;
218 } else {
219 array[i]=-1;
220 }
221 }
222 }
223
224 return out;
225 }
226
227 /* return the index to the largest entry in lambda takes is maximum */
228 index_t Paso_Util_arg_max(dim_t n, dim_t* lambda) {
229 dim_t i;
230 index_t max=-1;
231 index_t argmax=-1;
232 index_t lmax=-1;
233 index_t li=-1;
234
235 if (n>0) {
236
237 max=lambda[0];
238 argmax=0;
239 if (omp_get_max_threads()>1) {
240 #pragma omp parallel private(i,lmax,li)
241 {
242 lmax=max;
243 li=lmax;
244 #pragma omp for schedule(static)
245 for (i=0;i<n;++i) {
246 if(lmax<lambda[i]){
247 lmax=lambda[i];
248 li=i;
249 }
250 }
251 #pragma omp critical
252 {
253 if (max<=lmax) {
254 if(max==lmax && argmax>li)
255 {
256 argmax=li;
257 }
258 if (max < lmax)
259 {
260 max=lmax;
261 argmax=li;
262 }
263 }
264 }
265 }
266 } else {
267 for (i=0;i<n;++i) {
268 if(max<lambda[i]){
269 max=lambda[i];
270 argmax=i;
271 }
272 }
273 }
274 }
275 return argmax;
276 }
277
278 /*
279 * set x to zero:
280 */
281 void Paso_zeroes(const dim_t n, double* x)
282 {
283 dim_t i,local_n,rest,n_start,n_end,q;
284 #ifdef _OPENMP
285 const int num_threads=omp_get_max_threads();
286 #else
287 const int num_threads=1;
288 #endif
289
290 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
291 for (i=0;i<num_threads;++i) {
292 local_n=n/num_threads;
293 rest=n-local_n*num_threads;
294 n_start=local_n*i+MIN(i,rest);
295 n_end=local_n*(i+1)+MIN(i+1,rest);
296 #pragma ivdep
297 for (q=n_start;q<n_end;++q) x[q]=0;
298 }
299
300 }
301
302 /*
303 * performs an update of the form x=a*x+b*y where y and x are long vectors.
304 * if b=0, y is not used.
305 */
306 void Paso_Update(const dim_t n, const double a, double* x, const double b, const double* y)
307 {
308 dim_t i,local_n,rest,n_start,n_end,q;
309 #ifdef _OPENMP
310 const int num_threads=omp_get_max_threads();
311 #else
312 const int num_threads=1;
313 #endif
314
315 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
316 for (i=0;i<num_threads;++i) {
317 local_n=n/num_threads;
318 rest=n-local_n*num_threads;
319 n_start=local_n*i+MIN(i,rest);
320 n_end=local_n*(i+1)+MIN(i+1,rest);
321 if (a==1.) {
322 #pragma ivdep
323 for (q=n_start;q<n_end;++q) {
324 x[q]+=b*y[q];
325 }
326 } else if ((a==1.) && (b==1)) {
327 #pragma ivdep
328 for (q=n_start;q<n_end;++q) {
329 x[q]+=y[q];
330 }
331 } else if ((ABS(a)==0) && (ABS(b)==0)) {
332 #pragma ivdep
333 for (q=n_start;q<n_end;++q) {
334 x[q]=0.;
335 }
336 } else if ((ABS(a)==0) && (b==1)) {
337 #pragma ivdep
338 for (q=n_start;q<n_end;++q) {
339 x[q]=y[q];
340 }
341 } else if (ABS(a)==0) {
342 #pragma ivdep
343 for (q=n_start;q<n_end;++q) {
344 x[q]=b*y[q];
345 }
346 } else {
347 #pragma ivdep
348 for (q=n_start;q<n_end;++q) {
349 x[q]=a*x[q]+b*y[q];
350 }
351 }
352 }
353 }
354 void Paso_Copy(const dim_t n, double* out, const double* in) {
355 Paso_LinearCombination(n,out, 1.,in,0., in);
356 }
357 /*
358 * performs an update of the form z=a*x+b*y where y and x are long vectors.
359 * if a=0, x is not used.
360 * if b=0, y is not used.
361 */
362 void Paso_LinearCombination(const dim_t n, double*z, const double a,const double* x, const double b, const double* y)
363 {
364 dim_t i,local_n,rest,n_start,n_end,q;
365 #ifdef _OPENMP
366 const int num_threads=omp_get_max_threads();
367 #else
368 const int num_threads=1;
369 #endif
370
371 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
372 for (i=0;i<num_threads;++i) {
373 local_n=n/num_threads;
374 rest=n-local_n*num_threads;
375 n_start=local_n*i+MIN(i,rest);
376 n_end=local_n*(i+1)+MIN(i+1,rest);
377 if (((ABS(a)==0) && (ABS(b)==0)) || (y==NULL) || (x==NULL)) {
378 #pragma ivdep
379 for (q=n_start;q<n_end;++q) {
380 z[q]=0.;
381 }
382 } else if ( ((ABS(a)==0) && (ABS(b)>0.)) || (x==NULL) ) {
383 #pragma ivdep
384 for (q=n_start;q<n_end;++q) {
385 z[q]=b*y[q];
386 }
387 } else if (((ABS(a)>0) && (ABS(b)==0.)) || (y==NULL) ) {
388 #pragma ivdep
389 for (q=n_start;q<n_end;++q) {
390 z[q]=a*x[q];
391 }
392 } else {
393 #pragma ivdep
394 for (q=n_start;q<n_end;++q) {
395 z[q]=a*x[q]+b*y[q];
396 }
397 }
398 }
399 }
400
401
402
403 double Paso_InnerProduct(const dim_t n,const double* x, const double* y, Esys_MPIInfo* mpiinfo)
404 {
405 dim_t i,local_n,rest,n_start,n_end,q;
406 double my_out=0, local_out=0., out=0.;
407 #ifdef _OPENMP
408 const int num_threads=omp_get_max_threads();
409 #else
410 const int num_threads=1;
411 #endif
412 #pragma omp parallel for private(i,local_out,local_n,rest,n_start,n_end,q)
413 for (i=0;i<num_threads;++i) {
414 local_out=0;
415 local_n=n/num_threads;
416 rest=n-local_n*num_threads;
417 n_start=local_n*i+MIN(i,rest);
418 n_end=local_n*(i+1)+MIN(i+1,rest);
419 #pragma ivdep
420 for (q=n_start;q<n_end;++q) local_out+=x[q]*y[q];
421 #pragma omp critical
422 {
423 my_out+=local_out;
424 }
425 }
426 #ifdef ESYS_MPI
427 #pragma omp single
428 {
429 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
430 }
431 #else
432 out=my_out;
433 #endif
434
435 return out;
436
437 }
438 double Paso_lsup(const dim_t n, const double* x, Esys_MPIInfo* mpiinfo)
439 {
440 dim_t i,local_n,rest,n_start,n_end,q;
441 double my_out=0., local_out=0., out=0.;
442 #ifdef _OPENMP
443 const int num_threads=omp_get_max_threads();
444 #else
445 const int num_threads=1;
446 #endif
447 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out)
448 for (i=0;i<num_threads;++i) {
449 local_n=n/num_threads;
450 rest=n-local_n*num_threads;
451 n_start=local_n*i+MIN(i,rest);
452 n_end=local_n*(i+1)+MIN(i+1,rest);
453 local_out=0;
454 for (q=n_start;q<n_end;++q) local_out=MAX(ABS(x[q]),local_out);
455 #pragma omp critical
456 {
457 my_out=MAX(my_out,local_out);
458 }
459 }
460 #ifdef ESYS_MPI
461 #pragma omp single
462 {
463 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_MAX, mpiinfo->comm);
464 }
465 #else
466 out=my_out;
467 #endif
468
469 return out;
470
471 }
472 double Paso_l2(const dim_t n, const double* x, Esys_MPIInfo* mpiinfo)
473 {
474 dim_t i,local_n,rest,n_start,n_end,q;
475 double my_out=0, local_out=0., out=0.;
476 #ifdef _OPENMP
477 const int num_threads=omp_get_max_threads();
478 #else
479 const int num_threads=1;
480 #endif
481 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out)
482 for (i=0;i<num_threads;++i) {
483 local_n=n/num_threads;
484 rest=n-local_n*num_threads;
485 n_start=local_n*i+MIN(i,rest);
486 n_end=local_n*(i+1)+MIN(i+1,rest);
487 local_out=0;
488 for (q=n_start;q<n_end;++q) local_out+=x[q]*x[q];
489 #pragma omp critical
490 {
491 my_out+=local_out;
492 }
493 }
494 #ifdef ESYS_MPI
495 #pragma omp single
496 {
497 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
498 }
499 #else
500 out=my_out;
501 #endif
502
503 return sqrt(out);
504
505 }
506 /*
507 * Apply a sequence of n-1 Givens rotations (c,s) to v of length n which assumed to be small.
508 *
509 */
510 void ApplyGivensRotations(const dim_t n,double* v,const double* c,const double* s)
511 {
512 dim_t i;
513 register double w1,w2;
514 #pragma ivdep
515 for (i=0; i<n-1;++i) {
516 w1=c[i]*v[i]-s[i]*v[i+1];
517 w2=s[i]*v[i]+c[i]*v[i+1];
518 v[i]=w1;
519 v[i+1]=w2;
520 }
521 }
522
523 bool_t Paso_fileExists( const char* filename )
524 {
525 FILE* fp = NULL;
526 fp = fopen(filename,"r");
527 if( fp != NULL ) {
528 fclose(fp);
529 return TRUE;
530 } else {
531 return FALSE;
532 }
533 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26