/[escript]/trunk/paso/src/PasoUtil.c
ViewVC logotype

Contents of /trunk/paso/src/PasoUtil.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1639 - (show annotations)
Mon Jul 14 08:55:25 2008 UTC (11 years, 3 months ago) by gross
File MIME type: text/plain
File size: 7878 byte(s)


1 /* $Id$ */
2
3 /*******************************************************
4 *
5 * Copyright 2003-2007 by ACceSS MNRF
6 * Copyright 2007 by University of Queensland
7 *
8 * http://esscc.uq.edu.au
9 * Primary Business: Queensland, Australia
10 * Licensed under the Open Software License version 3.0
11 * http://www.opensource.org/licenses/osl-3.0.php
12 *
13 *******************************************************/
14
15 /**************************************************************/
16
17 /* Some utility routines: */
18
19 /**************************************************************/
20
21 /* Copyrights by ACcESS Australia, 2003,2004,2005 */
22
23 /**************************************************************/
24
25 #include "Common.h"
26 #include "PasoUtil.h"
27 #ifdef _OPENMP
28 #include <omp.h>
29 #endif
30
31 /* returns true if array contains value */
32 bool_t Paso_Util_isAny(dim_t N,index_t* array,index_t value) {
33 bool_t out=FALSE;
34 dim_t i;
35 #pragma omp parallel for private(i) schedule(static) reduction(||:out)
36 for (i=0;i<N;i++) {
37 out = out || (array[i]==value);
38 }
39 return out;
40 }
41
42 /**************************************************************/
43
44 /* calculates the cummultative sum in array and returns the total sum */
45
46 /**************************************************************/
47 index_t Paso_Util_cumsum(dim_t N,index_t* array) {
48 index_t out=0,tmp;
49 dim_t i;
50 #ifdef _OPENMP
51 index_t *partial_sums=NULL,sum;
52 partial_sums=TMPMEMALLOC(omp_get_max_threads(),index_t);
53 #pragma omp parallel private(sum,i,tmp)
54 {
55 sum=0;
56 #pragma omp for schedule(static)
57 for (i=0;i<N;++i) sum+=array[i];
58
59 partial_sums[omp_get_thread_num()]=sum;
60 #pragma omp barrier
61 #pragma omp single
62 {
63 out=0;
64 for (i=0;i<omp_get_max_threads();++i) {
65 tmp=out;
66 out+=partial_sums[i];
67 partial_sums[i]=tmp;
68 }
69 }
70 sum=partial_sums[omp_get_thread_num()];
71 #pragma omp for schedule(static)
72 for (i=0;i<N;++i) {
73 tmp=sum;
74 sum+=array[i];
75 array[i]=tmp;
76 }
77 }
78 TMPMEMFREE(partial_sums);
79 #else
80 for (i=0;i<N;++i) {
81 tmp=out;
82 out+=array[i];
83 array[i]=tmp;
84 }
85 #endif
86 return out;
87 }
88 /*
89 * set x to zero:
90 */
91 void Paso_zeroes(const dim_t n, double* x)
92 {
93 dim_t i,local_n,rest,n_start,n_end,q;
94 #ifdef _OPENMP
95 const int num_threads=omp_get_max_threads();
96 #else
97 const int num_threads=1;
98 #endif
99
100 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
101 for (i=0;i<num_threads;++i) {
102 local_n=n/num_threads;
103 rest=n-local_n*num_threads;
104 n_start=local_n*i+MIN(i,rest);
105 n_end=local_n*(i+1)+MIN(i+1,rest);
106 #pragma ivdep
107 for (q=n_start;q<n_end;++q) x[q]=0;
108 }
109
110 }
111 /*
112 * performs an update of the form x=a*x+b*y where y and x are long vectors.
113 * if b=0, y is not used.
114 */
115 void Paso_Update(const dim_t n, const double a, double* x, const double b, const double* y)
116 {
117 dim_t i,local_n,rest,n_start,n_end,q;
118 #ifdef _OPENMP
119 const int num_threads=omp_get_max_threads();
120 #else
121 const int num_threads=1;
122 #endif
123
124 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
125 for (i=0;i<num_threads;++i) {
126 local_n=n/num_threads;
127 rest=n-local_n*num_threads;
128 n_start=local_n*i+MIN(i,rest);
129 n_end=local_n*(i+1)+MIN(i+1,rest);
130 if (a==1.) {
131 #pragma ivdep
132 for (q=n_start;q<n_end;++q) {
133 x[q]+=b*y[q];
134 }
135 } else if ((a==1.) && (b==1)) {
136 #pragma ivdep
137 for (q=n_start;q<n_end;++q) {
138 x[q]+=y[q];
139 }
140 } else if ((ABS(a)==0) && (ABS(b)==0)) {
141 #pragma ivdep
142 for (q=n_start;q<n_end;++q) {
143 x[q]=0.;
144 }
145 } else if ((ABS(a)==0) && (ABS(b)==1)) {
146 #pragma ivdep
147 for (q=n_start;q<n_end;++q) {
148 x[q]=y[q];
149 }
150 } else if (ABS(a)==0) {
151 #pragma ivdep
152 for (q=n_start;q<n_end;++q) {
153 x[q]=b*y[q];
154 }
155 } else {
156 #pragma ivdep
157 for (q=n_start;q<n_end;++q) {
158 x[q]=a*x[q]+b*y[q];
159 }
160 }
161 }
162 }
163 void Paso_Copy(const dim_t n, double* out, const double* in) {
164 Paso_LinearCombination(n,out, 1.,in,0., in);
165 }
166 /*
167 * performs an update of the form z=a*x+b*y where y and x are long vectors.
168 * if a=0, x is not used.
169 * if b=0, y is not used.
170 */
171 void Paso_LinearCombination(const dim_t n, double*z, const double a,const double* x, const double b, const double* y)
172 {
173 dim_t i,local_n,rest,n_start,n_end,q;
174 #ifdef _OPENMP
175 const int num_threads=omp_get_max_threads();
176 #else
177 const int num_threads=1;
178 #endif
179
180 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
181 for (i=0;i<num_threads;++i) {
182 local_n=n/num_threads;
183 rest=n-local_n*num_threads;
184 n_start=local_n*i+MIN(i,rest);
185 n_end=local_n*(i+1)+MIN(i+1,rest);
186 if ((ABS(a)==0) && (ABS(b)==0)) {
187 #pragma ivdep
188 for (q=n_start;q<n_end;++q) {
189 z[q]=0.;
190 }
191 } else if ((ABS(a)==0) && (ABS(b)>0.)) {
192 #pragma ivdep
193 for (q=n_start;q<n_end;++q) {
194 z[q]=b*y[q];
195 }
196 } else if ((ABS(a)>0) && (ABS(b)==0.)) {
197 #pragma ivdep
198 for (q=n_start;q<n_end;++q) {
199 z[q]=a*x[q];
200 }
201 } else {
202 #pragma ivdep
203 for (q=n_start;q<n_end;++q) {
204 z[q]=a*x[q]+b*y[q];
205 }
206 }
207 }
208 }
209
210
211
212 double Paso_InnerProduct(const dim_t n,const double* x, const double* y, Paso_MPIInfo* mpiinfo)
213 {
214 dim_t i,local_n,rest,n_start,n_end,q;
215 double my_out=0, local_out=0., out=0.;
216 #ifdef _OPENMP
217 const int num_threads=omp_get_max_threads();
218 #else
219 const int num_threads=1;
220 #endif
221 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q) reduction(+:my_out)
222 for (i=0;i<num_threads;++i) {
223 local_n=n/num_threads;
224 rest=n-local_n*num_threads;
225 n_start=local_n*i+MIN(i,rest);
226 n_end=local_n*(i+1)+MIN(i+1,rest);
227 for (q=n_start;q<n_end;++q) local_out+=x[q]*y[q];
228 my_out+=local_out;
229 }
230 #ifdef PASO_MPI
231 #pragma omp single
232 {
233 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
234 }
235 #else
236 out=my_out;
237 #endif
238
239 return out;
240
241 }
242 double Paso_l2(const dim_t n, const double* x, Paso_MPIInfo* mpiinfo)
243 {
244 dim_t i,local_n,rest,n_start,n_end,q;
245 double my_out=0, local_out=0., out=0.;
246 #ifdef _OPENMP
247 const int num_threads=omp_get_max_threads();
248 #else
249 const int num_threads=1;
250 #endif
251 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q) reduction(+:my_out)
252 for (i=0;i<num_threads;++i) {
253 local_n=n/num_threads;
254 rest=n-local_n*num_threads;
255 n_start=local_n*i+MIN(i,rest);
256 n_end=local_n*(i+1)+MIN(i+1,rest);
257 for (q=n_start;q<n_end;++q) local_out+=x[q]*x[q];
258 my_out+=local_out;
259 }
260 #ifdef PASO_MPI
261 #pragma omp single
262 {
263 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
264 }
265 #else
266 out=my_out;
267 #endif
268
269 return sqrt(out);
270
271 }
272 /*
273 * Apply a sequence of n-1 Givens rotations (c,s) to v of length n which assumed to be small.
274 *
275 */
276 void ApplyGivensRotations(const dim_t n,double* v,const double* c,const double* s)
277 {
278 dim_t i;
279 register double w1,w2;
280 #pragma ivdep
281 for (i=0; i<n-1;++i) {
282 w1=c[i]*v[i]-s[i]*v[i+1];
283 w2=s[i]*v[i]+c[i]*v[i+1];
284 v[i]=w1;
285 v[i+1]=w2;
286 }
287 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26