/[escript]/trunk/escript/src/LocalOps.h
ViewVC logotype

Annotation of /trunk/escript/src/LocalOps.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2891 - (hide annotations)
Fri Jan 29 00:26:07 2010 UTC (10 years ago) by caltinay
File MIME type: text/plain
File size: 16832 byte(s)
Fix for ambiguous call to sqrt...

1 gross 576
2 ksteube 1312 /*******************************************************
3 ksteube 1811 *
4 jfenwick 2881 * Copyright (c) 2003-2010 by University of Queensland
5 ksteube 1811 * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13 ksteube 1312
14 ksteube 1811
15 gross 576 #if !defined escript_LocalOps_H
16     #define escript_LocalOps_H
17 phornby 2019 #if defined(_WIN32) && defined(__INTEL_COMPILER)
18 phornby 1020 # include <mathimf.h>
19 woo409 757 #else
20 phornby 1020 # include <math.h>
21 woo409 757 #endif
22 phornby 1020 #ifndef M_PI
23     # define M_PI 3.14159265358979323846 /* pi */
24     #endif
25 woo409 757
26 jfenwick 2005
27     /**
28     \file LocalOps.h
29     \brief Describes binary operations performed on double*.
30    
31     For operations on DataAbstract see BinaryOp.h.
32     For operations on DataVector see DataMaths.h.
33     */
34    
35 gross 576 namespace escript {
36    
37 jfenwick 2766 /**
38     \brief acts as a wrapper to isnan.
39     \warning if compiler does not support FP_NAN this function will always return false.
40     */
41     inline
42     bool nancheck(double d)
43     {
44 jfenwick 2884 // Q: so why not just test d!=d?
45     // A: Coz it doesn't always work [I've checked].
46     // One theory is that the optimizer skips the test.
47     #ifdef isnan
48 jfenwick 2766 return isnan(d);
49 jfenwick 2884 #elif defined _isnan
50     return _isnan(d);
51     #else
52     return false;
53 jfenwick 2766 #endif
54     }
55 gross 576
56     /**
57 jfenwick 2766 \brief returns a NaN.
58     \warning Should probably only used where you know you can test for NaNs
59     */
60     inline
61     double makeNaN()
62     {
63 jfenwick 2884 #ifdef nan
64 jfenwick 2777 return nan("");
65 jfenwick 2766 #else
66 caltinay 2891 return sqrt(-1.);
67 jfenwick 2766 #endif
68    
69     }
70    
71    
72     /**
73 gross 576 \brief
74     solves a 1x1 eigenvalue A*V=ev*V problem
75    
76 matt 1327 \param A00 Input - A_00
77 gross 576 \param ev0 Output - eigenvalue
78     */
79     inline
80     void eigenvalues1(const double A00,double* ev0) {
81    
82 gross 580 *ev0=A00;
83 gross 576
84     }
85     /**
86     \brief
87     solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A
88    
89 matt 1327 \param A00 Input - A_00
90     \param A01 Input - A_01
91 gross 576 \param A11 Input - A_11
92     \param ev0 Output - smallest eigenvalue
93     \param ev1 Output - largest eigenvalue
94     */
95     inline
96 gross 583 void eigenvalues2(const double A00,const double A01,const double A11,
97 gross 576 double* ev0, double* ev1) {
98 gross 580 const register double trA=(A00+A11)/2.;
99     const register double A_00=A00-trA;
100     const register double A_11=A11-trA;
101     const register double s=sqrt(A01*A01-A_00*A_11);
102     *ev0=trA-s;
103     *ev1=trA+s;
104 gross 576 }
105     /**
106     \brief
107     solves a 3x3 eigenvalue A*V=ev*V problem for symmetric A
108    
109 matt 1327 \param A00 Input - A_00
110     \param A01 Input - A_01
111     \param A02 Input - A_02
112     \param A11 Input - A_11
113     \param A12 Input - A_12
114     \param A22 Input - A_22
115 gross 576 \param ev0 Output - smallest eigenvalue
116     \param ev1 Output - eigenvalue
117     \param ev2 Output - largest eigenvalue
118     */
119     inline
120     void eigenvalues3(const double A00, const double A01, const double A02,
121     const double A11, const double A12,
122     const double A22,
123     double* ev0, double* ev1,double* ev2) {
124    
125 gross 580 const register double trA=(A00+A11+A22)/3.;
126     const register double A_00=A00-trA;
127     const register double A_11=A11-trA;
128     const register double A_22=A22-trA;
129     const register double A01_2=A01*A01;
130     const register double A02_2=A02*A02;
131     const register double A12_2=A12*A12;
132     const register double p=A02_2+A12_2+A01_2+(A_00*A_00+A_11*A_11+A_22*A_22)/2.;
133 gross 585 if (p<=0.) {
134     *ev2=trA;
135     *ev1=trA;
136     *ev0=trA;
137    
138     } else {
139     const register double q=(A02_2*A_11+A12_2*A_00+A01_2*A_22)-(A_00*A_11*A_22+2*A01*A12*A02);
140     const register double sq_p=sqrt(p/3.);
141     register double z=-q/(2*pow(sq_p,3));
142     if (z<-1.) {
143     z=-1.;
144     } else if (z>1.) {
145     z=1.;
146     }
147     const register double alpha_3=acos(z)/3.;
148     *ev2=trA+2.*sq_p*cos(alpha_3);
149     *ev1=trA-2.*sq_p*cos(alpha_3+M_PI/3.);
150     *ev0=trA-2.*sq_p*cos(alpha_3-M_PI/3.);
151 gross 580 }
152 gross 576 }
153 gross 583 /**
154     \brief
155     solves a 1x1 eigenvalue A*V=ev*V problem for symmetric A
156    
157 matt 1327 \param A00 Input - A_00
158 gross 583 \param ev0 Output - eigenvalue
159     \param V00 Output - eigenvector
160     \param tol Input - tolerance to identify to eigenvalues
161     */
162     inline
163     void eigenvalues_and_eigenvectors1(const double A00,double* ev0,double* V00,const double tol)
164     {
165     eigenvalues1(A00,ev0);
166     *V00=1.;
167     return;
168     }
169     /**
170     \brief
171     returns a non-zero vector in the kernel of [[A00,A01],[A01,A11]] assuming that the kernel dimension is at least 1.
172    
173     \param A00 Input - matrix component
174     \param A10 Input - matrix component
175     \param A01 Input - matrix component
176     \param A11 Input - matrix component
177     \param V0 Output - vector component
178     \param V1 Output - vector component
179     */
180     inline
181     void vectorInKernel2(const double A00,const double A10,const double A01,const double A11,
182     double* V0, double*V1)
183     {
184     register double absA00=fabs(A00);
185 gross 587 register double absA10=fabs(A10);
186 gross 583 register double absA01=fabs(A01);
187     register double absA11=fabs(A11);
188 gross 587 register double m=absA11>absA10 ? absA11 : absA10;
189     if (absA00>m || absA01>m) {
190     *V0=-A01;
191 gross 583 *V1=A00;
192     } else {
193     if (m<=0) {
194     *V0=1.;
195     *V1=0.;
196     } else {
197     *V0=A11;
198 gross 587 *V1=-A10;
199 gross 583 }
200     }
201     }
202     /**
203     \brief
204 matt 1327 returns a non-zero vector in the kernel of [[A00,A01,A02],[A10,A11,A12],[A20,A21,A22]]
205 gross 583 assuming that the kernel dimension is at least 1 and A00 is non zero.
206    
207     \param A00 Input - matrix component
208     \param A10 Input - matrix component
209     \param A20 Input - matrix component
210     \param A01 Input - matrix component
211     \param A11 Input - matrix component
212     \param A21 Input - matrix component
213     \param A02 Input - matrix component
214     \param A12 Input - matrix component
215     \param A22 Input - matrix component
216     \param V0 Output - vector component
217     \param V1 Output - vector component
218     \param V2 Output - vector component
219     */
220     inline
221     void vectorInKernel3__nonZeroA00(const double A00,const double A10,const double A20,
222     const double A01,const double A11,const double A21,
223     const double A02,const double A12,const double A22,
224     double* V0,double* V1,double* V2)
225     {
226     double TEMP0,TEMP1;
227     register const double I00=1./A00;
228     register const double IA10=I00*A10;
229     register const double IA20=I00*A20;
230 gross 588 vectorInKernel2(A11-IA10*A01,A12-IA10*A02,
231     A21-IA20*A01,A22-IA20*A02,&TEMP0,&TEMP1);
232 gross 583 *V0=-(A10*TEMP0+A20*TEMP1);
233     *V1=A00*TEMP0;
234     *V2=A00*TEMP1;
235     }
236 matt 1327
237 gross 583 /**
238     \brief
239 matt 1327 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
240     ordered by increasing value and eigen vectors are normalizeVector3d such that
241 gross 583 length is zero and first non-zero component is positive.
242    
243 matt 1327 \param A00 Input - A_00
244     \param A01 Input - A_01
245     \param A11 Input - A_11
246 gross 583 \param ev0 Output - smallest eigenvalue
247     \param ev1 Output - eigenvalue
248     \param V00 Output - eigenvector componenent coresponding to ev0
249     \param V10 Output - eigenvector componenent coresponding to ev0
250     \param V01 Output - eigenvector componenent coresponding to ev1
251     \param V11 Output - eigenvector componenent coresponding to ev1
252     \param tol Input - tolerance to identify to eigenvalues
253     */
254     inline
255     void eigenvalues_and_eigenvectors2(const double A00,const double A01,const double A11,
256     double* ev0, double* ev1,
257     double* V00, double* V10, double* V01, double* V11,
258 matt 1327 const double tol)
259 gross 583 {
260     double TEMP0,TEMP1;
261     eigenvalues2(A00,A01,A11,ev0,ev1);
262     const register double absev0=fabs(*ev0);
263     const register double absev1=fabs(*ev1);
264     register double max_ev=absev0>absev1 ? absev0 : absev1;
265     if (fabs((*ev0)-(*ev1))<tol*max_ev) {
266     *V00=1.;
267     *V10=0.;
268     *V01=0.;
269     *V11=1.;
270     } else {
271     vectorInKernel2(A00-(*ev0),A01,A01,A11-(*ev0),&TEMP0,&TEMP1);
272     const register double scale=1./sqrt(TEMP0*TEMP0+TEMP1*TEMP1);
273     if (TEMP0<0.) {
274     *V00=-TEMP0*scale;
275     *V10=-TEMP1*scale;
276     if (TEMP1<0.) {
277 matt 1327 *V01= *V10;
278 gross 583 *V11=-(*V00);
279     } else {
280     *V01=-(*V10);
281 gross 587 *V11= (*V10);
282 gross 583 }
283     } else if (TEMP0>0.) {
284     *V00=TEMP0*scale;
285     *V10=TEMP1*scale;
286     if (TEMP1<0.) {
287 matt 1327 *V01=-(*V10);
288 gross 583 *V11= (*V00);
289     } else {
290 matt 1327 *V01= (*V10);
291     *V11=-(*V00);
292 gross 583 }
293     } else {
294     *V00=0.;
295     *V10=1;
296     *V11=0.;
297     *V01=1.;
298 matt 1327 }
299 gross 583 }
300     }
301     /**
302     \brief
303     nomalizes a 3-d vector such that length is one and first non-zero component is positive.
304    
305 matt 1327 \param V0 - vector componenent
306 gross 583 \param V1 - vector componenent
307     \param V2 - vector componenent
308     */
309     inline
310     void normalizeVector3(double* V0,double* V1,double* V2)
311     {
312     register double s;
313     if (*V0>0) {
314     s=1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
315     *V0*=s;
316     *V1*=s;
317     *V2*=s;
318     } else if (*V0<0) {
319     s=-1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
320     *V0*=s;
321     *V1*=s;
322     *V2*=s;
323     } else {
324     if (*V1>0) {
325     s=1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
326     *V1*=s;
327     *V2*=s;
328     } else if (*V1<0) {
329     s=-1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
330     *V1*=s;
331     *V2*=s;
332     } else {
333     *V2=1.;
334     }
335     }
336     }
337     /**
338     \brief
339 matt 1327 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
340     ordered by increasing value and eigen vectors are normalizeVector3d such that
341 gross 583 length is zero and first non-zero component is positive.
342    
343 matt 1327 \param A00 Input - A_00
344     \param A01 Input - A_01
345 jfenwick 2521 \param A02 Input - A_02
346 matt 1327 \param A11 Input - A_11
347 jfenwick 2521 \param A12 Input - A_12
348     \param A22 Input - A_22
349 gross 583 \param ev0 Output - smallest eigenvalue
350     \param ev1 Output - eigenvalue
351 jfenwick 2521 \param ev2 Output -
352 gross 583 \param V00 Output - eigenvector componenent coresponding to ev0
353     \param V10 Output - eigenvector componenent coresponding to ev0
354 jfenwick 2521 \param V20 Output -
355 gross 583 \param V01 Output - eigenvector componenent coresponding to ev1
356     \param V11 Output - eigenvector componenent coresponding to ev1
357 jfenwick 2521 \param V21 Output -
358     \param V02 Output -
359     \param V12 Output -
360     \param V22 Output -
361 gross 583 \param tol Input - tolerance to identify to eigenvalues
362     */
363     inline
364     void eigenvalues_and_eigenvectors3(const double A00, const double A01, const double A02,
365     const double A11, const double A12, const double A22,
366     double* ev0, double* ev1, double* ev2,
367 matt 1327 double* V00, double* V10, double* V20,
368     double* V01, double* V11, double* V21,
369     double* V02, double* V12, double* V22,
370 gross 583 const double tol)
371     {
372     register const double absA01=fabs(A01);
373     register const double absA02=fabs(A02);
374     register const double m=absA01>absA02 ? absA01 : absA02;
375     if (m<=0) {
376     double TEMP_V00,TEMP_V10,TEMP_V01,TEMP_V11,TEMP_EV0,TEMP_EV1;
377     eigenvalues_and_eigenvectors2(A11,A12,A22,
378     &TEMP_EV0,&TEMP_EV1,
379     &TEMP_V00,&TEMP_V10,&TEMP_V01,&TEMP_V11,tol);
380     if (A00<=TEMP_EV0) {
381     *V00=1.;
382     *V10=0.;
383     *V20=0.;
384     *V01=0.;
385     *V11=TEMP_V00;
386     *V21=TEMP_V10;
387     *V02=0.;
388     *V12=TEMP_V01;
389     *V22=TEMP_V11;
390     *ev0=A00;
391     *ev1=TEMP_EV0;
392     *ev2=TEMP_EV1;
393     } else if (A00>TEMP_EV1) {
394 gross 588 *V02=1.;
395 gross 583 *V12=0.;
396 gross 588 *V22=0.;
397     *V00=0.;
398     *V10=TEMP_V00;
399     *V20=TEMP_V10;
400     *V01=0.;
401     *V11=TEMP_V01;
402     *V21=TEMP_V11;
403 gross 583 *ev0=TEMP_EV0;
404     *ev1=TEMP_EV1;
405 gross 588 *ev2=A00;
406 gross 583 } else {
407 gross 588 *V01=1.;
408     *V11=0.;
409     *V21=0.;
410     *V00=0.;
411     *V10=TEMP_V00;
412 gross 583 *V20=TEMP_V10;
413 gross 588 *V02=0.;
414     *V12=TEMP_V01;
415 gross 583 *V22=TEMP_V11;
416     *ev0=TEMP_EV0;
417     *ev1=A00;
418     *ev2=TEMP_EV1;
419     }
420     } else {
421     eigenvalues3(A00,A01,A02,A11,A12,A22,ev0,ev1,ev2);
422     const register double absev0=fabs(*ev0);
423     const register double absev1=fabs(*ev1);
424     const register double absev2=fabs(*ev2);
425     register double max_ev=absev0>absev1 ? absev0 : absev1;
426     max_ev=max_ev>absev2 ? max_ev : absev2;
427     const register double d_01=fabs((*ev0)-(*ev1));
428     const register double d_12=fabs((*ev1)-(*ev2));
429     const register double max_d=d_01>d_12 ? d_01 : d_12;
430     if (max_d<=tol*max_ev) {
431     *V00=1.;
432     *V10=0;
433     *V20=0;
434     *V01=0;
435     *V11=1.;
436     *V21=0;
437     *V02=0;
438     *V12=0;
439     *V22=1.;
440     } else {
441     const register double S00=A00-(*ev0);
442     const register double absS00=fabs(S00);
443 jfenwick 1946 if (absS00>m) {
444 gross 583 vectorInKernel3__nonZeroA00(S00,A01,A02,A01,A11-(*ev0),A12,A02,A12,A22-(*ev0),V00,V10,V20);
445     } else if (absA02<m) {
446     vectorInKernel3__nonZeroA00(A01,A11-(*ev0),A12,S00,A01,A02,A02,A12,A22-(*ev0),V00,V10,V20);
447     } else {
448     vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev0),S00,A01,A02,A01,A11-(*ev0),A12,V00,V10,V20);
449     }
450     normalizeVector3(V00,V10,V20);;
451     const register double T00=A00-(*ev2);
452     const register double absT00=fabs(T00);
453 jfenwick 1946 if (absT00>m) {
454 gross 583 vectorInKernel3__nonZeroA00(T00,A01,A02,A01,A11-(*ev2),A12,A02,A12,A22-(*ev2),V02,V12,V22);
455     } else if (absA02<m) {
456     vectorInKernel3__nonZeroA00(A01,A11-(*ev2),A12,T00,A01,A02,A02,A12,A22-(*ev2),V02,V12,V22);
457     } else {
458     vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev2),T00,A01,A02,A01,A11-(*ev2),A12,V02,V12,V22);
459     }
460     const register double dot=(*V02)*(*V00)+(*V12)*(*V10)+(*V22)*(*V20);
461     *V02-=dot*(*V00);
462     *V12-=dot*(*V10);
463     *V22-=dot*(*V20);
464     normalizeVector3(V02,V12,V22);
465     *V01=(*V10)*(*V22)-(*V12)*(*V20);
466     *V11=(*V20)*(*V02)-(*V00)*(*V22);
467     *V21=(*V00)*(*V12)-(*V02)*(*V10);
468     normalizeVector3(V01,V11,V21);
469     }
470     }
471     }
472 ksteube 813
473     // General tensor product: arg_2(SL x SR) = arg_0(SL x SM) * arg_1(SM x SR)
474     // SM is the product of the last axis_offset entries in arg_0.getShape().
475     inline
476     void matrix_matrix_product(const int SL, const int SM, const int SR, const double* A, const double* B, double* C, int transpose)
477     {
478     if (transpose == 0) {
479     for (int i=0; i<SL; i++) {
480     for (int j=0; j<SR; j++) {
481     double sum = 0.0;
482     for (int l=0; l<SM; l++) {
483     sum += A[i+SL*l] * B[l+SM*j];
484     }
485     C[i+SL*j] = sum;
486     }
487     }
488     }
489     else if (transpose == 1) {
490     for (int i=0; i<SL; i++) {
491     for (int j=0; j<SR; j++) {
492     double sum = 0.0;
493     for (int l=0; l<SM; l++) {
494     sum += A[i*SM+l] * B[l+SM*j];
495     }
496     C[i+SL*j] = sum;
497     }
498     }
499     }
500     else if (transpose == 2) {
501     for (int i=0; i<SL; i++) {
502     for (int j=0; j<SR; j++) {
503     double sum = 0.0;
504     for (int l=0; l<SM; l++) {
505     sum += A[i+SL*l] * B[l*SR+j];
506     }
507     C[i+SL*j] = sum;
508     }
509     }
510     }
511     }
512    
513 matt 1334 template <typename UnaryFunction>
514     inline void tensor_unary_operation(const int size,
515     const double *arg1,
516     double * argRes,
517     UnaryFunction operation)
518     {
519     for (int i = 0; i < size; ++i) {
520     argRes[i] = operation(arg1[i]);
521     }
522     return;
523     }
524    
525 matt 1327 template <typename BinaryFunction>
526     inline void tensor_binary_operation(const int size,
527     const double *arg1,
528     const double *arg2,
529     double * argRes,
530     BinaryFunction operation)
531     {
532     for (int i = 0; i < size; ++i) {
533     argRes[i] = operation(arg1[i], arg2[i]);
534     }
535     return;
536     }
537    
538     template <typename BinaryFunction>
539     inline void tensor_binary_operation(const int size,
540     double arg1,
541     const double *arg2,
542     double *argRes,
543     BinaryFunction operation)
544     {
545     for (int i = 0; i < size; ++i) {
546     argRes[i] = operation(arg1, arg2[i]);
547     }
548     return;
549     }
550    
551     template <typename BinaryFunction>
552     inline void tensor_binary_operation(const int size,
553     const double *arg1,
554     double arg2,
555     double *argRes,
556     BinaryFunction operation)
557     {
558     for (int i = 0; i < size; ++i) {
559     argRes[i] = operation(arg1[i], arg2);
560     }
561     return;
562     }
563    
564 gross 576 } // end of namespace
565     #endif

  ViewVC Help
Powered by ViewVC 1.1.26