/[escript]/trunk/paso/src/PasoUtil.c
ViewVC logotype

Annotation of /trunk/paso/src/PasoUtil.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1798 - (hide annotations)
Wed Sep 17 06:21:12 2008 UTC (13 years ago) by gross
File MIME type: text/plain
File size: 8233 byte(s)
Fixes for the JacobeanFreeNewton scheme. Still needs to be tested under OPENMP but runs under MPI.


1 jgs 150 /* $Id$ */
2    
3 ksteube 1312 /*******************************************************
4     *
5     * Copyright 2003-2007 by ACceSS MNRF
6     * Copyright 2007 by University of Queensland
7     *
8     * http://esscc.uq.edu.au
9     * Primary Business: Queensland, Australia
10     * Licensed under the Open Software License version 3.0
11     * http://www.opensource.org/licenses/osl-3.0.php
12     *
13     *******************************************************/
14 dhawcroft 631
15 jgs 150 /**************************************************************/
16    
17     /* Some utility routines: */
18    
19     /**************************************************************/
20    
21     /* Copyrights by ACcESS Australia, 2003,2004,2005 */
22    
23     /**************************************************************/
24    
25     #include "Common.h"
26 jgs 482 #include "PasoUtil.h"
27 jgs 150 #ifdef _OPENMP
28     #include <omp.h>
29     #endif
30    
31     /* returns true if array contains value */
32     bool_t Paso_Util_isAny(dim_t N,index_t* array,index_t value) {
33     bool_t out=FALSE;
34     dim_t i;
35     #pragma omp parallel for private(i) schedule(static) reduction(||:out)
36 gross 1639 for (i=0;i<N;i++) {
37     out = out || (array[i]==value);
38     }
39 jgs 150 return out;
40     }
41    
42     /**************************************************************/
43    
44     /* calculates the cummultative sum in array and returns the total sum */
45    
46     /**************************************************************/
47     index_t Paso_Util_cumsum(dim_t N,index_t* array) {
48     index_t out=0,tmp;
49     dim_t i;
50     #ifdef _OPENMP
51 gross 1564 index_t *partial_sums=NULL,sum;
52     partial_sums=TMPMEMALLOC(omp_get_max_threads(),index_t);
53 jgs 150 #pragma omp parallel private(sum,i,tmp)
54     {
55     sum=0;
56     #pragma omp for schedule(static)
57     for (i=0;i<N;++i) sum+=array[i];
58 gross 1639
59 jgs 150 partial_sums[omp_get_thread_num()]=sum;
60     #pragma omp barrier
61 gross 1556 #pragma omp single
62 jgs 150 {
63     out=0;
64     for (i=0;i<omp_get_max_threads();++i) {
65     tmp=out;
66     out+=partial_sums[i];
67     partial_sums[i]=tmp;
68     }
69     }
70     sum=partial_sums[omp_get_thread_num()];
71     #pragma omp for schedule(static)
72     for (i=0;i<N;++i) {
73     tmp=sum;
74     sum+=array[i];
75     array[i]=tmp;
76     }
77     }
78 gross 1564 TMPMEMFREE(partial_sums);
79 jgs 150 #else
80     for (i=0;i<N;++i) {
81     tmp=out;
82     out+=array[i];
83     array[i]=tmp;
84     }
85     #endif
86     return out;
87     }
88 gross 1639 /*
89     * set x to zero:
90     */
91     void Paso_zeroes(const dim_t n, double* x)
92     {
93     dim_t i,local_n,rest,n_start,n_end,q;
94     #ifdef _OPENMP
95     const int num_threads=omp_get_max_threads();
96     #else
97     const int num_threads=1;
98     #endif
99 jgs 150
100 gross 1639 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
101     for (i=0;i<num_threads;++i) {
102     local_n=n/num_threads;
103     rest=n-local_n*num_threads;
104     n_start=local_n*i+MIN(i,rest);
105     n_end=local_n*(i+1)+MIN(i+1,rest);
106     #pragma ivdep
107     for (q=n_start;q<n_end;++q) x[q]=0;
108     }
109    
110     }
111 jgs 150 /*
112 gross 1639 * performs an update of the form x=a*x+b*y where y and x are long vectors.
113     * if b=0, y is not used.
114     */
115     void Paso_Update(const dim_t n, const double a, double* x, const double b, const double* y)
116     {
117     dim_t i,local_n,rest,n_start,n_end,q;
118     #ifdef _OPENMP
119     const int num_threads=omp_get_max_threads();
120     #else
121     const int num_threads=1;
122     #endif
123    
124     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
125     for (i=0;i<num_threads;++i) {
126     local_n=n/num_threads;
127     rest=n-local_n*num_threads;
128     n_start=local_n*i+MIN(i,rest);
129     n_end=local_n*(i+1)+MIN(i+1,rest);
130     if (a==1.) {
131     #pragma ivdep
132     for (q=n_start;q<n_end;++q) {
133     x[q]+=b*y[q];
134     }
135     } else if ((a==1.) && (b==1)) {
136     #pragma ivdep
137     for (q=n_start;q<n_end;++q) {
138     x[q]+=y[q];
139     }
140     } else if ((ABS(a)==0) && (ABS(b)==0)) {
141     #pragma ivdep
142     for (q=n_start;q<n_end;++q) {
143     x[q]=0.;
144     }
145     } else if ((ABS(a)==0) && (ABS(b)==1)) {
146     #pragma ivdep
147     for (q=n_start;q<n_end;++q) {
148     x[q]=y[q];
149     }
150     } else if (ABS(a)==0) {
151     #pragma ivdep
152     for (q=n_start;q<n_end;++q) {
153     x[q]=b*y[q];
154     }
155     } else {
156     #pragma ivdep
157     for (q=n_start;q<n_end;++q) {
158     x[q]=a*x[q]+b*y[q];
159     }
160     }
161     }
162     }
163     void Paso_Copy(const dim_t n, double* out, const double* in) {
164     Paso_LinearCombination(n,out, 1.,in,0., in);
165     }
166     /*
167     * performs an update of the form z=a*x+b*y where y and x are long vectors.
168     * if a=0, x is not used.
169     * if b=0, y is not used.
170     */
171     void Paso_LinearCombination(const dim_t n, double*z, const double a,const double* x, const double b, const double* y)
172     {
173     dim_t i,local_n,rest,n_start,n_end,q;
174     #ifdef _OPENMP
175     const int num_threads=omp_get_max_threads();
176     #else
177     const int num_threads=1;
178     #endif
179    
180     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
181     for (i=0;i<num_threads;++i) {
182     local_n=n/num_threads;
183     rest=n-local_n*num_threads;
184     n_start=local_n*i+MIN(i,rest);
185     n_end=local_n*(i+1)+MIN(i+1,rest);
186 gross 1798 if (((ABS(a)==0) && (ABS(b)==0)) || (y==NULL) || (x==NULL)) {
187 gross 1639 #pragma ivdep
188     for (q=n_start;q<n_end;++q) {
189     z[q]=0.;
190     }
191 gross 1798 } else if ( ((ABS(a)==0) && (ABS(b)>0.)) || (x==NULL) ) {
192 gross 1639 #pragma ivdep
193     for (q=n_start;q<n_end;++q) {
194     z[q]=b*y[q];
195     }
196 gross 1798 } else if (((ABS(a)>0) && (ABS(b)==0.)) || (y==NULL) ) {
197 gross 1639 #pragma ivdep
198     for (q=n_start;q<n_end;++q) {
199     z[q]=a*x[q];
200     }
201     } else {
202     #pragma ivdep
203     for (q=n_start;q<n_end;++q) {
204     z[q]=a*x[q]+b*y[q];
205     }
206     }
207     }
208     }
209    
210    
211    
212     double Paso_InnerProduct(const dim_t n,const double* x, const double* y, Paso_MPIInfo* mpiinfo)
213     {
214     dim_t i,local_n,rest,n_start,n_end,q;
215     double my_out=0, local_out=0., out=0.;
216     #ifdef _OPENMP
217     const int num_threads=omp_get_max_threads();
218     #else
219     const int num_threads=1;
220     #endif
221 gross 1798 #pragma omp parallel for private(i,local_out,local_n,rest,n_start,n_end,q) reduction(+:my_out)
222 gross 1639 for (i=0;i<num_threads;++i) {
223 gross 1798 local_out=0;
224 gross 1639 local_n=n/num_threads;
225     rest=n-local_n*num_threads;
226     n_start=local_n*i+MIN(i,rest);
227     n_end=local_n*(i+1)+MIN(i+1,rest);
228 gross 1798 #pragma ivdep
229 gross 1639 for (q=n_start;q<n_end;++q) local_out+=x[q]*y[q];
230     my_out+=local_out;
231     }
232     #ifdef PASO_MPI
233     #pragma omp single
234     {
235     MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
236     }
237     #else
238     out=my_out;
239     #endif
240    
241     return out;
242    
243     }
244     double Paso_l2(const dim_t n, const double* x, Paso_MPIInfo* mpiinfo)
245     {
246     dim_t i,local_n,rest,n_start,n_end,q;
247     double my_out=0, local_out=0., out=0.;
248     #ifdef _OPENMP
249     const int num_threads=omp_get_max_threads();
250     #else
251     const int num_threads=1;
252     #endif
253 gross 1798 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q, local_out) reduction(+:my_out)
254 gross 1639 for (i=0;i<num_threads;++i) {
255     local_n=n/num_threads;
256     rest=n-local_n*num_threads;
257     n_start=local_n*i+MIN(i,rest);
258     n_end=local_n*(i+1)+MIN(i+1,rest);
259 gross 1798 local_out=0;
260 gross 1639 for (q=n_start;q<n_end;++q) local_out+=x[q]*x[q];
261     my_out+=local_out;
262     }
263     #ifdef PASO_MPI
264     #pragma omp single
265     {
266     MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
267     }
268     #else
269     out=my_out;
270     #endif
271    
272     return sqrt(out);
273    
274     }
275     /*
276     * Apply a sequence of n-1 Givens rotations (c,s) to v of length n which assumed to be small.
277 jgs 150 *
278     */
279 gross 1639 void ApplyGivensRotations(const dim_t n,double* v,const double* c,const double* s)
280     {
281     dim_t i;
282     register double w1,w2;
283     #pragma ivdep
284     for (i=0; i<n-1;++i) {
285     w1=c[i]*v[i]-s[i]*v[i+1];
286     w2=s[i]*v[i]+c[i]*v[i+1];
287     v[i]=w1;
288     v[i+1]=w2;
289     }
290     }
291 gross 1669
292     bool_t Paso_fileExists( const char* filename )
293     {
294     FILE* fp = NULL;
295     fp = fopen(filename,"r");
296     if( fp != NULL ) {
297 ksteube 1705 fclose(fp);
298 gross 1669 return TRUE;
299     } else {
300     return FALSE;
301     }
302     }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26