/[escript]/trunk/paso/src/Util.c
ViewVC logotype

Contents of /trunk/paso/src/Util.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1556 - (show annotations)
Mon May 12 00:54:58 2008 UTC (11 years, 6 months ago) by gross
File MIME type: text/plain
File size: 5858 byte(s)
Modification to allow mixed mode execution. 
In order to keep the code portable accross platform all MPI calls within
parallel regions have been moved. 


1 /* $Id:$ */
2 /*******************************************************
3 *
4 * Copyright 2008 by University of Queensland
5 *
6 * http://esscc.uq.edu.au
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 *******************************************************/
12
13 #include "Util.h"
14
15 /*
16 * set x to zero:
17 */
18 void Paso_zeroes(const dim_t n, double* x)
19 {
20 dim_t i,local_n,rest,n_start,n_end,q;
21 #ifdef _OPENMP
22 const int num_threads=omp_get_num_threads();
23 #else
24 const int num_threads=1;
25 #endif
26
27 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
28 for (i=0;i<num_threads;++i) {
29 local_n=n/num_threads;
30 rest=n-local_n*num_threads;
31 n_start=local_n*i+MIN(i,rest);
32 n_end=local_n*(i+1)+MIN(i+1,rest);
33 #pragma ivdep
34 for (q=n_start;q<n_end;++q) {
35 x[q]=0;
36 }
37 }
38
39 }
40 /*
41 * performs an update of the form x=a*x+b*y where y and x are long vectors.
42 * if b=0, y is not used.
43 */
44 void Paso_Update(const dim_t n, const double a, double* x, const double b, const double* y)
45 {
46 dim_t i,local_n,rest,n_start,n_end,q;
47 #ifdef _OPENMP
48 const int num_threads=omp_get_num_threads();
49 #else
50 const int num_threads=1;
51 #endif
52
53 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
54 for (i=0;i<num_threads;++i) {
55 local_n=n/num_threads;
56 rest=n-local_n*num_threads;
57 n_start=local_n*i+MIN(i,rest);
58 n_end=local_n*(i+1)+MIN(i+1,rest);
59 if (a==1.) {
60 #pragma ivdep
61 for (q=n_start;q<n_end;++q) {
62 x[q]+=b*y[q];
63 }
64 } else if ((a==1.) && (b==1)) {
65 #pragma ivdep
66 for (q=n_start;q<n_end;++q) {
67 x[q]+=y[q];
68 }
69 } else if ((ABS(a)==0) && (ABS(b)==0)) {
70 #pragma ivdep
71 for (q=n_start;q<n_end;++q) {
72 x[q]=0.;
73 }
74 } else if ((ABS(a)==0) && (ABS(b)==1)) {
75 #pragma ivdep
76 for (q=n_start;q<n_end;++q) {
77 x[q]=y[q];
78 }
79 } else if (ABS(a)==0) {
80 #pragma ivdep
81 for (q=n_start;q<n_end;++q) {
82 x[q]=b*y[q];
83 }
84 } else {
85 #pragma ivdep
86 for (q=n_start;q<n_end;++q) {
87 x[q]=a*x[q]+b*y[q];
88 }
89 }
90 }
91 }
92 /*
93 * performs an update of the form z=a*x+b*y where y and x are long vectors.
94 * if a=0, x is not used.
95 * if b=0, y is not used.
96 */
97 void Paso_LinearCombination(const dim_t n, double*z, const double a,const double* x, const double b, const double* y)
98 {
99 dim_t i,local_n,rest,n_start,n_end,q;
100 #ifdef _OPENMP
101 const int num_threads=omp_get_num_threads();
102 #else
103 const int num_threads=1;
104 #endif
105
106 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
107 for (i=0;i<num_threads;++i) {
108 local_n=n/num_threads;
109 rest=n-local_n*num_threads;
110 n_start=local_n*i+MIN(i,rest);
111 n_end=local_n*(i+1)+MIN(i+1,rest);
112 if ((ABS(a)==0) && (ABS(b)==0)) {
113 #pragma ivdep
114 for (q=n_start;q<n_end;++q) {
115 z[q]=0.;
116 }
117 } else if ((ABS(a)==0) && (ABS(b)>0.)) {
118 #pragma ivdep
119 for (q=n_start;q<n_end;++q) {
120 z[q]=b*y[q];
121 }
122 } else if ((ABS(a)>0) && (ABS(b)==0.)) {
123 #pragma ivdep
124 for (q=n_start;q<n_end;++q) {
125 z[q]=a*x[q];
126 }
127 } else {
128 #pragma ivdep
129 for (q=n_start;q<n_end;++q) {
130 z[q]=a*x[q]+b*y[q];
131 }
132 }
133 }
134 }
135
136
137
138 double Paso_InnerProduct(const dim_t n,const double* x, const double* y, Paso_MPIInfo* mpiinfo)
139 {
140 dim_t i,local_n,rest,n_start,n_end,q;
141 double my_out=0, local_out=0., out=0.;
142 #ifdef _OPENMP
143 const int num_threads=omp_get_num_threads();
144 #else
145 const int num_threads=1;
146 #endif
147 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q) reduction(+:my_out)
148 for (i=0;i<num_threads;++i) {
149 local_n=n/num_threads;
150 rest=n-local_n*num_threads;
151 n_start=local_n*i+MIN(i,rest);
152 n_end=local_n*(i+1)+MIN(i+1,rest);
153 for (q=n_start;q<n_end;++q) local_out+=x[q]*y[q];
154 my_out+=local_out;
155 }
156 #ifdef PASO_MPI
157 #pragma omp single
158 {
159 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
160 }
161 #else
162 out=my_out;
163 #endif
164
165 return out;
166
167 }
168 double Paso_l2(const dim_t n, const double* x, Paso_MPIInfo* mpiinfo)
169 {
170 dim_t i,local_n,rest,n_start,n_end,q;
171 double my_out=0, local_out=0., out=0.;
172 #ifdef _OPENMP
173 const int num_threads=omp_get_num_threads();
174 #else
175 const int num_threads=1;
176 #endif
177 #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q) reduction(+:my_out)
178 for (i=0;i<num_threads;++i) {
179 local_n=n/num_threads;
180 rest=n-local_n*num_threads;
181 n_start=local_n*i+MIN(i,rest);
182 n_end=local_n*(i+1)+MIN(i+1,rest);
183 for (q=n_start;q<n_end;++q) local_out+=x[q]*x[q];
184 my_out+=local_out;
185 }
186 #ifdef PASO_MPI
187 #pragma omp single
188 {
189 MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
190 }
191 #else
192 out=my_out;
193 #endif
194
195 return sqrt(out);
196
197 }
198 /*
199 * Apply a sequence of n-1 Givens rotations (c,s) to v of length n which assumed to be small.
200 *
201 */
202 void ApplyGivensRotations(const dim_t n,double* v,const double* c,const double* s)
203 {
204 dim_t i;
205 register double w1,w2;
206 #pragma ivdep
207 for (i=0; i<n-1;++i) {
208 w1=c[i]*v[i]-s[i]*v[i+1];
209 w2=s[i]*v[i]+c[i]*v[i+1];
210 v[i]=w1;
211 v[i+1]=w2;
212 }
213 }

  ViewVC Help
Powered by ViewVC 1.1.26