/[escript]/trunk/escript/src/LocalOps.h
ViewVC logotype

Annotation of /trunk/escript/src/LocalOps.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2019 - (hide annotations)
Mon Nov 10 13:49:00 2008 UTC (10 years, 9 months ago) by phornby
File MIME type: text/plain
File size: 16005 byte(s)
Yet another concerted effort to handle missing macro arguments
in a portable way.


1 gross 576
2 ksteube 1312 /*******************************************************
3 ksteube 1811 *
4     * Copyright (c) 2003-2008 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13 ksteube 1312
14 ksteube 1811
15 gross 576 #if !defined escript_LocalOps_H
16     #define escript_LocalOps_H
17 phornby 2019 #if defined(_WIN32) && defined(__INTEL_COMPILER)
18 phornby 1020 # include <mathimf.h>
19 woo409 757 #else
20 phornby 1020 # include <math.h>
21 woo409 757 #endif
22 phornby 1020 #ifndef M_PI
23     # define M_PI 3.14159265358979323846 /* pi */
24     #endif
25 woo409 757
26 jfenwick 2005
27     /**
28     \file LocalOps.h
29     \brief Describes binary operations performed on double*.
30    
31     For operations on DataAbstract see BinaryOp.h.
32     For operations on DataVector see DataMaths.h.
33     */
34    
35 gross 576 namespace escript {
36    
37    
38     /**
39     \brief
40     solves a 1x1 eigenvalue A*V=ev*V problem
41    
42 matt 1327 \param A00 Input - A_00
43 gross 576 \param ev0 Output - eigenvalue
44     */
45     inline
46     void eigenvalues1(const double A00,double* ev0) {
47    
48 gross 580 *ev0=A00;
49 gross 576
50     }
51     /**
52     \brief
53     solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A
54    
55 matt 1327 \param A00 Input - A_00
56     \param A01 Input - A_01
57 gross 576 \param A11 Input - A_11
58     \param ev0 Output - smallest eigenvalue
59     \param ev1 Output - largest eigenvalue
60     */
61     inline
62 gross 583 void eigenvalues2(const double A00,const double A01,const double A11,
63 gross 576 double* ev0, double* ev1) {
64 gross 580 const register double trA=(A00+A11)/2.;
65     const register double A_00=A00-trA;
66     const register double A_11=A11-trA;
67     const register double s=sqrt(A01*A01-A_00*A_11);
68     *ev0=trA-s;
69     *ev1=trA+s;
70 gross 576 }
71     /**
72     \brief
73     solves a 3x3 eigenvalue A*V=ev*V problem for symmetric A
74    
75 matt 1327 \param A00 Input - A_00
76     \param A01 Input - A_01
77     \param A02 Input - A_02
78     \param A11 Input - A_11
79     \param A12 Input - A_12
80     \param A22 Input - A_22
81 gross 576 \param ev0 Output - smallest eigenvalue
82     \param ev1 Output - eigenvalue
83     \param ev2 Output - largest eigenvalue
84     */
85     inline
86     void eigenvalues3(const double A00, const double A01, const double A02,
87     const double A11, const double A12,
88     const double A22,
89     double* ev0, double* ev1,double* ev2) {
90    
91 gross 580 const register double trA=(A00+A11+A22)/3.;
92     const register double A_00=A00-trA;
93     const register double A_11=A11-trA;
94     const register double A_22=A22-trA;
95     const register double A01_2=A01*A01;
96     const register double A02_2=A02*A02;
97     const register double A12_2=A12*A12;
98     const register double p=A02_2+A12_2+A01_2+(A_00*A_00+A_11*A_11+A_22*A_22)/2.;
99 gross 585 if (p<=0.) {
100     *ev2=trA;
101     *ev1=trA;
102     *ev0=trA;
103    
104     } else {
105     const register double q=(A02_2*A_11+A12_2*A_00+A01_2*A_22)-(A_00*A_11*A_22+2*A01*A12*A02);
106     const register double sq_p=sqrt(p/3.);
107     register double z=-q/(2*pow(sq_p,3));
108     if (z<-1.) {
109     z=-1.;
110     } else if (z>1.) {
111     z=1.;
112     }
113     const register double alpha_3=acos(z)/3.;
114     *ev2=trA+2.*sq_p*cos(alpha_3);
115     *ev1=trA-2.*sq_p*cos(alpha_3+M_PI/3.);
116     *ev0=trA-2.*sq_p*cos(alpha_3-M_PI/3.);
117 gross 580 }
118 gross 576 }
119 gross 583 /**
120     \brief
121     solves a 1x1 eigenvalue A*V=ev*V problem for symmetric A
122    
123 matt 1327 \param A00 Input - A_00
124 gross 583 \param ev0 Output - eigenvalue
125     \param V00 Output - eigenvector
126     \param tol Input - tolerance to identify to eigenvalues
127     */
128     inline
129     void eigenvalues_and_eigenvectors1(const double A00,double* ev0,double* V00,const double tol)
130     {
131     eigenvalues1(A00,ev0);
132     *V00=1.;
133     return;
134     }
135     /**
136     \brief
137     returns a non-zero vector in the kernel of [[A00,A01],[A01,A11]] assuming that the kernel dimension is at least 1.
138    
139     \param A00 Input - matrix component
140     \param A10 Input - matrix component
141     \param A01 Input - matrix component
142     \param A11 Input - matrix component
143     \param V0 Output - vector component
144     \param V1 Output - vector component
145     */
146     inline
147     void vectorInKernel2(const double A00,const double A10,const double A01,const double A11,
148     double* V0, double*V1)
149     {
150     register double absA00=fabs(A00);
151 gross 587 register double absA10=fabs(A10);
152 gross 583 register double absA01=fabs(A01);
153     register double absA11=fabs(A11);
154 gross 587 register double m=absA11>absA10 ? absA11 : absA10;
155     if (absA00>m || absA01>m) {
156     *V0=-A01;
157 gross 583 *V1=A00;
158     } else {
159     if (m<=0) {
160     *V0=1.;
161     *V1=0.;
162     } else {
163     *V0=A11;
164 gross 587 *V1=-A10;
165 gross 583 }
166     }
167     }
168     /**
169     \brief
170 matt 1327 returns a non-zero vector in the kernel of [[A00,A01,A02],[A10,A11,A12],[A20,A21,A22]]
171 gross 583 assuming that the kernel dimension is at least 1 and A00 is non zero.
172    
173     \param A00 Input - matrix component
174     \param A10 Input - matrix component
175     \param A20 Input - matrix component
176     \param A01 Input - matrix component
177     \param A11 Input - matrix component
178     \param A21 Input - matrix component
179     \param A02 Input - matrix component
180     \param A12 Input - matrix component
181     \param A22 Input - matrix component
182     \param V0 Output - vector component
183     \param V1 Output - vector component
184     \param V2 Output - vector component
185     */
186     inline
187     void vectorInKernel3__nonZeroA00(const double A00,const double A10,const double A20,
188     const double A01,const double A11,const double A21,
189     const double A02,const double A12,const double A22,
190     double* V0,double* V1,double* V2)
191     {
192     double TEMP0,TEMP1;
193     register const double I00=1./A00;
194     register const double IA10=I00*A10;
195     register const double IA20=I00*A20;
196 gross 588 vectorInKernel2(A11-IA10*A01,A12-IA10*A02,
197     A21-IA20*A01,A22-IA20*A02,&TEMP0,&TEMP1);
198 gross 583 *V0=-(A10*TEMP0+A20*TEMP1);
199     *V1=A00*TEMP0;
200     *V2=A00*TEMP1;
201     }
202 matt 1327
203 gross 583 /**
204     \brief
205 matt 1327 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
206     ordered by increasing value and eigen vectors are normalizeVector3d such that
207 gross 583 length is zero and first non-zero component is positive.
208    
209 matt 1327 \param A00 Input - A_00
210     \param A01 Input - A_01
211     \param A11 Input - A_11
212 gross 583 \param ev0 Output - smallest eigenvalue
213     \param ev1 Output - eigenvalue
214     \param V00 Output - eigenvector componenent coresponding to ev0
215     \param V10 Output - eigenvector componenent coresponding to ev0
216     \param V01 Output - eigenvector componenent coresponding to ev1
217     \param V11 Output - eigenvector componenent coresponding to ev1
218     \param tol Input - tolerance to identify to eigenvalues
219     */
220     inline
221     void eigenvalues_and_eigenvectors2(const double A00,const double A01,const double A11,
222     double* ev0, double* ev1,
223     double* V00, double* V10, double* V01, double* V11,
224 matt 1327 const double tol)
225 gross 583 {
226     double TEMP0,TEMP1;
227     eigenvalues2(A00,A01,A11,ev0,ev1);
228     const register double absev0=fabs(*ev0);
229     const register double absev1=fabs(*ev1);
230     register double max_ev=absev0>absev1 ? absev0 : absev1;
231     if (fabs((*ev0)-(*ev1))<tol*max_ev) {
232     *V00=1.;
233     *V10=0.;
234     *V01=0.;
235     *V11=1.;
236     } else {
237     vectorInKernel2(A00-(*ev0),A01,A01,A11-(*ev0),&TEMP0,&TEMP1);
238     const register double scale=1./sqrt(TEMP0*TEMP0+TEMP1*TEMP1);
239     if (TEMP0<0.) {
240     *V00=-TEMP0*scale;
241     *V10=-TEMP1*scale;
242     if (TEMP1<0.) {
243 matt 1327 *V01= *V10;
244 gross 583 *V11=-(*V00);
245     } else {
246     *V01=-(*V10);
247 gross 587 *V11= (*V10);
248 gross 583 }
249     } else if (TEMP0>0.) {
250     *V00=TEMP0*scale;
251     *V10=TEMP1*scale;
252     if (TEMP1<0.) {
253 matt 1327 *V01=-(*V10);
254 gross 583 *V11= (*V00);
255     } else {
256 matt 1327 *V01= (*V10);
257     *V11=-(*V00);
258 gross 583 }
259     } else {
260     *V00=0.;
261     *V10=1;
262     *V11=0.;
263     *V01=1.;
264 matt 1327 }
265 gross 583 }
266     }
267     /**
268     \brief
269     nomalizes a 3-d vector such that length is one and first non-zero component is positive.
270    
271 matt 1327 \param V0 - vector componenent
272 gross 583 \param V1 - vector componenent
273     \param V2 - vector componenent
274     */
275     inline
276     void normalizeVector3(double* V0,double* V1,double* V2)
277     {
278     register double s;
279     if (*V0>0) {
280     s=1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
281     *V0*=s;
282     *V1*=s;
283     *V2*=s;
284     } else if (*V0<0) {
285     s=-1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
286     *V0*=s;
287     *V1*=s;
288     *V2*=s;
289     } else {
290     if (*V1>0) {
291     s=1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
292     *V1*=s;
293     *V2*=s;
294     } else if (*V1<0) {
295     s=-1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
296     *V1*=s;
297     *V2*=s;
298     } else {
299     *V2=1.;
300     }
301     }
302     }
303     /**
304     \brief
305 matt 1327 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
306     ordered by increasing value and eigen vectors are normalizeVector3d such that
307 gross 583 length is zero and first non-zero component is positive.
308    
309 matt 1327 \param A00 Input - A_00
310     \param A01 Input - A_01
311     \param A11 Input - A_11
312 gross 583 \param ev0 Output - smallest eigenvalue
313     \param ev1 Output - eigenvalue
314     \param V00 Output - eigenvector componenent coresponding to ev0
315     \param V10 Output - eigenvector componenent coresponding to ev0
316     \param V01 Output - eigenvector componenent coresponding to ev1
317     \param V11 Output - eigenvector componenent coresponding to ev1
318     \param tol Input - tolerance to identify to eigenvalues
319     */
320     inline
321     void eigenvalues_and_eigenvectors3(const double A00, const double A01, const double A02,
322     const double A11, const double A12, const double A22,
323     double* ev0, double* ev1, double* ev2,
324 matt 1327 double* V00, double* V10, double* V20,
325     double* V01, double* V11, double* V21,
326     double* V02, double* V12, double* V22,
327 gross 583 const double tol)
328     {
329     register const double absA01=fabs(A01);
330     register const double absA02=fabs(A02);
331     register const double m=absA01>absA02 ? absA01 : absA02;
332     if (m<=0) {
333     double TEMP_V00,TEMP_V10,TEMP_V01,TEMP_V11,TEMP_EV0,TEMP_EV1;
334     eigenvalues_and_eigenvectors2(A11,A12,A22,
335     &TEMP_EV0,&TEMP_EV1,
336     &TEMP_V00,&TEMP_V10,&TEMP_V01,&TEMP_V11,tol);
337     if (A00<=TEMP_EV0) {
338     *V00=1.;
339     *V10=0.;
340     *V20=0.;
341     *V01=0.;
342     *V11=TEMP_V00;
343     *V21=TEMP_V10;
344     *V02=0.;
345     *V12=TEMP_V01;
346     *V22=TEMP_V11;
347     *ev0=A00;
348     *ev1=TEMP_EV0;
349     *ev2=TEMP_EV1;
350     } else if (A00>TEMP_EV1) {
351 gross 588 *V02=1.;
352 gross 583 *V12=0.;
353 gross 588 *V22=0.;
354     *V00=0.;
355     *V10=TEMP_V00;
356     *V20=TEMP_V10;
357     *V01=0.;
358     *V11=TEMP_V01;
359     *V21=TEMP_V11;
360 gross 583 *ev0=TEMP_EV0;
361     *ev1=TEMP_EV1;
362 gross 588 *ev2=A00;
363 gross 583 } else {
364 gross 588 *V01=1.;
365     *V11=0.;
366     *V21=0.;
367     *V00=0.;
368     *V10=TEMP_V00;
369 gross 583 *V20=TEMP_V10;
370 gross 588 *V02=0.;
371     *V12=TEMP_V01;
372 gross 583 *V22=TEMP_V11;
373     *ev0=TEMP_EV0;
374     *ev1=A00;
375     *ev2=TEMP_EV1;
376     }
377     } else {
378     eigenvalues3(A00,A01,A02,A11,A12,A22,ev0,ev1,ev2);
379     const register double absev0=fabs(*ev0);
380     const register double absev1=fabs(*ev1);
381     const register double absev2=fabs(*ev2);
382     register double max_ev=absev0>absev1 ? absev0 : absev1;
383     max_ev=max_ev>absev2 ? max_ev : absev2;
384     const register double d_01=fabs((*ev0)-(*ev1));
385     const register double d_12=fabs((*ev1)-(*ev2));
386     const register double max_d=d_01>d_12 ? d_01 : d_12;
387     if (max_d<=tol*max_ev) {
388     *V00=1.;
389     *V10=0;
390     *V20=0;
391     *V01=0;
392     *V11=1.;
393     *V21=0;
394     *V02=0;
395     *V12=0;
396     *V22=1.;
397     } else {
398     const register double S00=A00-(*ev0);
399     const register double absS00=fabs(S00);
400 jfenwick 1946 if (absS00>m) {
401 gross 583 vectorInKernel3__nonZeroA00(S00,A01,A02,A01,A11-(*ev0),A12,A02,A12,A22-(*ev0),V00,V10,V20);
402     } else if (absA02<m) {
403     vectorInKernel3__nonZeroA00(A01,A11-(*ev0),A12,S00,A01,A02,A02,A12,A22-(*ev0),V00,V10,V20);
404     } else {
405     vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev0),S00,A01,A02,A01,A11-(*ev0),A12,V00,V10,V20);
406     }
407     normalizeVector3(V00,V10,V20);;
408     const register double T00=A00-(*ev2);
409     const register double absT00=fabs(T00);
410 jfenwick 1946 if (absT00>m) {
411 gross 583 vectorInKernel3__nonZeroA00(T00,A01,A02,A01,A11-(*ev2),A12,A02,A12,A22-(*ev2),V02,V12,V22);
412     } else if (absA02<m) {
413     vectorInKernel3__nonZeroA00(A01,A11-(*ev2),A12,T00,A01,A02,A02,A12,A22-(*ev2),V02,V12,V22);
414     } else {
415     vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev2),T00,A01,A02,A01,A11-(*ev2),A12,V02,V12,V22);
416     }
417     const register double dot=(*V02)*(*V00)+(*V12)*(*V10)+(*V22)*(*V20);
418     *V02-=dot*(*V00);
419     *V12-=dot*(*V10);
420     *V22-=dot*(*V20);
421     normalizeVector3(V02,V12,V22);
422     *V01=(*V10)*(*V22)-(*V12)*(*V20);
423     *V11=(*V20)*(*V02)-(*V00)*(*V22);
424     *V21=(*V00)*(*V12)-(*V02)*(*V10);
425     normalizeVector3(V01,V11,V21);
426     }
427     }
428     }
429 ksteube 813
430     // General tensor product: arg_2(SL x SR) = arg_0(SL x SM) * arg_1(SM x SR)
431     // SM is the product of the last axis_offset entries in arg_0.getShape().
432     inline
433     void matrix_matrix_product(const int SL, const int SM, const int SR, const double* A, const double* B, double* C, int transpose)
434     {
435     if (transpose == 0) {
436     for (int i=0; i<SL; i++) {
437     for (int j=0; j<SR; j++) {
438     double sum = 0.0;
439     for (int l=0; l<SM; l++) {
440     sum += A[i+SL*l] * B[l+SM*j];
441     }
442     C[i+SL*j] = sum;
443     }
444     }
445     }
446     else if (transpose == 1) {
447     for (int i=0; i<SL; i++) {
448     for (int j=0; j<SR; j++) {
449     double sum = 0.0;
450     for (int l=0; l<SM; l++) {
451     sum += A[i*SM+l] * B[l+SM*j];
452     }
453     C[i+SL*j] = sum;
454     }
455     }
456     }
457     else if (transpose == 2) {
458     for (int i=0; i<SL; i++) {
459     for (int j=0; j<SR; j++) {
460     double sum = 0.0;
461     for (int l=0; l<SM; l++) {
462     sum += A[i+SL*l] * B[l*SR+j];
463     }
464     C[i+SL*j] = sum;
465     }
466     }
467     }
468     }
469    
470 matt 1334 template <typename UnaryFunction>
471     inline void tensor_unary_operation(const int size,
472     const double *arg1,
473     double * argRes,
474     UnaryFunction operation)
475     {
476     for (int i = 0; i < size; ++i) {
477     argRes[i] = operation(arg1[i]);
478     }
479     return;
480     }
481    
482 matt 1327 template <typename BinaryFunction>
483     inline void tensor_binary_operation(const int size,
484     const double *arg1,
485     const double *arg2,
486     double * argRes,
487     BinaryFunction operation)
488     {
489     for (int i = 0; i < size; ++i) {
490     argRes[i] = operation(arg1[i], arg2[i]);
491     }
492     return;
493     }
494    
495     template <typename BinaryFunction>
496     inline void tensor_binary_operation(const int size,
497     double arg1,
498     const double *arg2,
499     double *argRes,
500     BinaryFunction operation)
501     {
502     for (int i = 0; i < size; ++i) {
503     argRes[i] = operation(arg1, arg2[i]);
504     }
505     return;
506     }
507    
508     template <typename BinaryFunction>
509     inline void tensor_binary_operation(const int size,
510     const double *arg1,
511     double arg2,
512     double *argRes,
513     BinaryFunction operation)
514     {
515     for (int i = 0; i < size; ++i) {
516     argRes[i] = operation(arg1[i], arg2);
517     }
518     return;
519     }
520    
521 gross 576 } // end of namespace
522     #endif

  ViewVC Help
Powered by ViewVC 1.1.26