/[escript]/trunk/paso/src/Util.c
ViewVC logotype

Annotation of /trunk/paso/src/Util.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1476 - (hide annotations)
Mon Apr 7 23:38:50 2008 UTC (13 years, 3 months ago) by gross
File MIME type: text/plain
File size: 5860 byte(s)
Jacobian-free Newton method added to Paso
1 gross 1476 /* $Id:$ */
2     /*******************************************************
3     *
4     * Copyright 2008 by University of Queensland
5     *
6     * http://esscc.uq.edu.au
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11     *******************************************************/
12    
13     #include "Util.h"
14    
15     /*
16     * set x to zero:
17     */
18     void Paso_zeroes(const dim_t n, double* x)
19     {
20     dim_t i,local_n,rest,n_start,n_end,q;
21     #ifdef _OPENMP
22     const int num_threads=omp_get_num_threads();
23     #else
24     const int num_threads=1;
25     #endif
26    
27     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
28     for (i=0;i<num_threads;++i) {
29     local_n=n/num_threads;
30     rest=n-local_n*num_threads;
31     n_start=local_n*i+MIN(i,rest);
32     n_end=local_n*(i+1)+MIN(i+1,rest);
33     #pragma ivdep
34     for (q=n_start;q<n_end;++q) {
35     x[q]=0;
36     }
37     }
38    
39     }
40     /*
41     * performs an update of the form x=a*x+b*y where y and x are long vectors.
42     * if b=0, y is not used.
43     */
44     void Paso_Update(const dim_t n, const double a, double* x, const double b, const double* y)
45     {
46     dim_t i,local_n,rest,n_start,n_end,q;
47     #ifdef _OPENMP
48     const int num_threads=omp_get_num_threads();
49     #else
50     const int num_threads=1;
51     #endif
52    
53     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
54     for (i=0;i<num_threads;++i) {
55     local_n=n/num_threads;
56     rest=n-local_n*num_threads;
57     n_start=local_n*i+MIN(i,rest);
58     n_end=local_n*(i+1)+MIN(i+1,rest);
59     if (a==1.) {
60     #pragma ivdep
61     for (q=n_start;q<n_end;++q) {
62     x[q]+=b*y[q];
63     }
64     } else if ((a==1.) && (b==1)) {
65     #pragma ivdep
66     for (q=n_start;q<n_end;++q) {
67     x[q]+=y[q];
68     }
69     } else if ((ABS(a)==0) && (ABS(b)==0)) {
70     #pragma ivdep
71     for (q=n_start;q<n_end;++q) {
72     x[q]=0.;
73     }
74     } else if ((ABS(a)==0) && (ABS(b)==1)) {
75     #pragma ivdep
76     for (q=n_start;q<n_end;++q) {
77     x[q]=y[q];
78     }
79     } else if (ABS(a)==0) {
80     #pragma ivdep
81     for (q=n_start;q<n_end;++q) {
82     x[q]=b*y[q];
83     }
84     } else {
85     #pragma ivdep
86     for (q=n_start;q<n_end;++q) {
87     x[q]=a*x[q]+b*y[q];
88     }
89     }
90     }
91     }
92     /*
93     * performs an update of the form z=a*x+b*y where y and x are long vectors.
94     * if a=0, x is not used.
95     * if b=0, y is not used.
96     */
97     void Paso_LinearCombination(const dim_t n, double*z, const double a,const double* x, const double b, const double* y)
98     {
99     dim_t i,local_n,rest,n_start,n_end,q;
100     #ifdef _OPENMP
101     const int num_threads=omp_get_num_threads();
102     #else
103     const int num_threads=1;
104     #endif
105    
106     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q)
107     for (i=0;i<num_threads;++i) {
108     local_n=n/num_threads;
109     rest=n-local_n*num_threads;
110     n_start=local_n*i+MIN(i,rest);
111     n_end=local_n*(i+1)+MIN(i+1,rest);
112     if ((ABS(a)==0) && (ABS(b)==0)) {
113     #pragma ivdep
114     for (q=n_start;q<n_end;++q) {
115     z[q]=0.;
116     }
117     } else if ((ABS(a)==0) && (ABS(b)>0.)) {
118     #pragma ivdep
119     for (q=n_start;q<n_end;++q) {
120     z[q]=b*y[q];
121     }
122     } else if ((ABS(a)>0) && (ABS(b)==0.)) {
123     #pragma ivdep
124     for (q=n_start;q<n_end;++q) {
125     z[q]=a*x[q];
126     }
127     } else {
128     #pragma ivdep
129     for (q=n_start;q<n_end;++q) {
130     z[q]=a*x[q]+b*y[q];
131     }
132     }
133     }
134     }
135    
136    
137    
138     double Paso_InnerProduct(const dim_t n,const double* x, const double* y, Paso_MPIInfo* mpiinfo)
139     {
140     dim_t i,local_n,rest,n_start,n_end,q;
141     double my_out=0, local_out=0., out=0.;
142     #ifdef _OPENMP
143     const int num_threads=omp_get_num_threads();
144     #else
145     const int num_threads=1;
146     #endif
147     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q) reduction(+:my_out);
148     for (i=0;i<num_threads;++i) {
149     local_n=n/num_threads;
150     rest=n-local_n*num_threads;
151     n_start=local_n*i+MIN(i,rest);
152     n_end=local_n*(i+1)+MIN(i+1,rest);
153     for (q=n_start;q<n_end;++q) local_out+=x[q]*y[q];
154     my_out+=local_out;
155     }
156     #ifdef PASO_MPI
157     #pragma omp master
158     {
159     MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
160     }
161     #else
162     out=my_out;
163     #endif
164    
165     return out;
166    
167     }
168     double Paso_l2(const dim_t n, const double* x, Paso_MPIInfo* mpiinfo)
169     {
170     dim_t i,local_n,rest,n_start,n_end,q;
171     double my_out=0, local_out=0., out=0.;
172     #ifdef _OPENMP
173     const int num_threads=omp_get_num_threads();
174     #else
175     const int num_threads=1;
176     #endif
177     #pragma omp parallel for private(i,local_n,rest,n_start,n_end,q) reduction(+:my_out);
178     for (i=0;i<num_threads;++i) {
179     local_n=n/num_threads;
180     rest=n-local_n*num_threads;
181     n_start=local_n*i+MIN(i,rest);
182     n_end=local_n*(i+1)+MIN(i+1,rest);
183     for (q=n_start;q<n_end;++q) local_out+=x[q]*x[q];
184     my_out+=local_out;
185     }
186     #ifdef PASO_MPI
187     #pragma omp master
188     {
189     MPI_Allreduce(&my_out,&out, 1, MPI_DOUBLE, MPI_SUM, mpiinfo->comm);
190     }
191     #else
192     out=my_out;
193     #endif
194    
195     return sqrt(out);
196    
197     }
198     /*
199     * Apply a sequence of n-1 Givens rotations (c,s) to v of length n which assumed to be small.
200     *
201     */
202     void ApplyGivensRotations(const dim_t n,double* v,const double* c,const double* s)
203     {
204     dim_t i;
205     register double w1,w2;
206     #pragma ivdep
207     for (i=0; i<n-1;++i) {
208     w1=c[i]*v[i]-s[i]*v[i+1];
209     w2=s[i]*v[i]+c[i]*v[i+1];
210     v[i]=w1;
211     v[i+1]=w2;
212     }
213     }

  ViewVC Help
Powered by ViewVC 1.1.26