/[escript]/trunk/escript/src/LocalOps.h
ViewVC logotype

Annotation of /trunk/escript/src/LocalOps.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2777 - (hide annotations)
Thu Nov 26 01:06:00 2009 UTC (9 years, 11 months ago) by jfenwick
File MIME type: text/plain
File size: 16788 byte(s)
Added the LAZY_STACK_PROF #define for Lazy.
If enabled lazy will print the (roughly) maximum stack used by any openmp 
thread over the course of this session.

1 gross 576
2 ksteube 1312 /*******************************************************
3 ksteube 1811 *
4 jfenwick 2548 * Copyright (c) 2003-2009 by University of Queensland
5 ksteube 1811 * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13 ksteube 1312
14 ksteube 1811
15 gross 576 #if !defined escript_LocalOps_H
16     #define escript_LocalOps_H
17 phornby 2019 #if defined(_WIN32) && defined(__INTEL_COMPILER)
18 phornby 1020 # include <mathimf.h>
19 woo409 757 #else
20 phornby 1020 # include <math.h>
21 woo409 757 #endif
22 phornby 1020 #ifndef M_PI
23     # define M_PI 3.14159265358979323846 /* pi */
24     #endif
25 woo409 757
26 jfenwick 2005
27     /**
28     \file LocalOps.h
29     \brief Describes binary operations performed on double*.
30    
31     For operations on DataAbstract see BinaryOp.h.
32     For operations on DataVector see DataMaths.h.
33     */
34    
35 gross 576 namespace escript {
36    
37 jfenwick 2766 /**
38     \brief acts as a wrapper to isnan.
39     \warning if compiler does not support FP_NAN this function will always return false.
40     */
41     inline
42     bool nancheck(double d)
43     {
44 jfenwick 2773 #ifndef isnan // Q: so why not just test d!=d?
45 jfenwick 2766 return false; // A: Coz it doesn't always work [I've checked].
46     #else // One theory is that the optimizer skips the test.
47     return isnan(d);
48     #endif
49     }
50 gross 576
51     /**
52 jfenwick 2766 \brief returns a NaN.
53     \warning Should probably only used where you know you can test for NaNs
54     */
55     inline
56     double makeNaN()
57     {
58 artak 2774 #ifdef isnan
59 jfenwick 2777 return nan("");
60 jfenwick 2766 #else
61     return sqrt(-1);
62     #endif
63    
64     }
65    
66    
67     /**
68 gross 576 \brief
69     solves a 1x1 eigenvalue A*V=ev*V problem
70    
71 matt 1327 \param A00 Input - A_00
72 gross 576 \param ev0 Output - eigenvalue
73     */
74     inline
75     void eigenvalues1(const double A00,double* ev0) {
76    
77 gross 580 *ev0=A00;
78 gross 576
79     }
80     /**
81     \brief
82     solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A
83    
84 matt 1327 \param A00 Input - A_00
85     \param A01 Input - A_01
86 gross 576 \param A11 Input - A_11
87     \param ev0 Output - smallest eigenvalue
88     \param ev1 Output - largest eigenvalue
89     */
90     inline
91 gross 583 void eigenvalues2(const double A00,const double A01,const double A11,
92 gross 576 double* ev0, double* ev1) {
93 gross 580 const register double trA=(A00+A11)/2.;
94     const register double A_00=A00-trA;
95     const register double A_11=A11-trA;
96     const register double s=sqrt(A01*A01-A_00*A_11);
97     *ev0=trA-s;
98     *ev1=trA+s;
99 gross 576 }
100     /**
101     \brief
102     solves a 3x3 eigenvalue A*V=ev*V problem for symmetric A
103    
104 matt 1327 \param A00 Input - A_00
105     \param A01 Input - A_01
106     \param A02 Input - A_02
107     \param A11 Input - A_11
108     \param A12 Input - A_12
109     \param A22 Input - A_22
110 gross 576 \param ev0 Output - smallest eigenvalue
111     \param ev1 Output - eigenvalue
112     \param ev2 Output - largest eigenvalue
113     */
114     inline
115     void eigenvalues3(const double A00, const double A01, const double A02,
116     const double A11, const double A12,
117     const double A22,
118     double* ev0, double* ev1,double* ev2) {
119    
120 gross 580 const register double trA=(A00+A11+A22)/3.;
121     const register double A_00=A00-trA;
122     const register double A_11=A11-trA;
123     const register double A_22=A22-trA;
124     const register double A01_2=A01*A01;
125     const register double A02_2=A02*A02;
126     const register double A12_2=A12*A12;
127     const register double p=A02_2+A12_2+A01_2+(A_00*A_00+A_11*A_11+A_22*A_22)/2.;
128 gross 585 if (p<=0.) {
129     *ev2=trA;
130     *ev1=trA;
131     *ev0=trA;
132    
133     } else {
134     const register double q=(A02_2*A_11+A12_2*A_00+A01_2*A_22)-(A_00*A_11*A_22+2*A01*A12*A02);
135     const register double sq_p=sqrt(p/3.);
136     register double z=-q/(2*pow(sq_p,3));
137     if (z<-1.) {
138     z=-1.;
139     } else if (z>1.) {
140     z=1.;
141     }
142     const register double alpha_3=acos(z)/3.;
143     *ev2=trA+2.*sq_p*cos(alpha_3);
144     *ev1=trA-2.*sq_p*cos(alpha_3+M_PI/3.);
145     *ev0=trA-2.*sq_p*cos(alpha_3-M_PI/3.);
146 gross 580 }
147 gross 576 }
148 gross 583 /**
149     \brief
150     solves a 1x1 eigenvalue A*V=ev*V problem for symmetric A
151    
152 matt 1327 \param A00 Input - A_00
153 gross 583 \param ev0 Output - eigenvalue
154     \param V00 Output - eigenvector
155     \param tol Input - tolerance to identify to eigenvalues
156     */
157     inline
158     void eigenvalues_and_eigenvectors1(const double A00,double* ev0,double* V00,const double tol)
159     {
160     eigenvalues1(A00,ev0);
161     *V00=1.;
162     return;
163     }
164     /**
165     \brief
166     returns a non-zero vector in the kernel of [[A00,A01],[A01,A11]] assuming that the kernel dimension is at least 1.
167    
168     \param A00 Input - matrix component
169     \param A10 Input - matrix component
170     \param A01 Input - matrix component
171     \param A11 Input - matrix component
172     \param V0 Output - vector component
173     \param V1 Output - vector component
174     */
175     inline
176     void vectorInKernel2(const double A00,const double A10,const double A01,const double A11,
177     double* V0, double*V1)
178     {
179     register double absA00=fabs(A00);
180 gross 587 register double absA10=fabs(A10);
181 gross 583 register double absA01=fabs(A01);
182     register double absA11=fabs(A11);
183 gross 587 register double m=absA11>absA10 ? absA11 : absA10;
184     if (absA00>m || absA01>m) {
185     *V0=-A01;
186 gross 583 *V1=A00;
187     } else {
188     if (m<=0) {
189     *V0=1.;
190     *V1=0.;
191     } else {
192     *V0=A11;
193 gross 587 *V1=-A10;
194 gross 583 }
195     }
196     }
197     /**
198     \brief
199 matt 1327 returns a non-zero vector in the kernel of [[A00,A01,A02],[A10,A11,A12],[A20,A21,A22]]
200 gross 583 assuming that the kernel dimension is at least 1 and A00 is non zero.
201    
202     \param A00 Input - matrix component
203     \param A10 Input - matrix component
204     \param A20 Input - matrix component
205     \param A01 Input - matrix component
206     \param A11 Input - matrix component
207     \param A21 Input - matrix component
208     \param A02 Input - matrix component
209     \param A12 Input - matrix component
210     \param A22 Input - matrix component
211     \param V0 Output - vector component
212     \param V1 Output - vector component
213     \param V2 Output - vector component
214     */
215     inline
216     void vectorInKernel3__nonZeroA00(const double A00,const double A10,const double A20,
217     const double A01,const double A11,const double A21,
218     const double A02,const double A12,const double A22,
219     double* V0,double* V1,double* V2)
220     {
221     double TEMP0,TEMP1;
222     register const double I00=1./A00;
223     register const double IA10=I00*A10;
224     register const double IA20=I00*A20;
225 gross 588 vectorInKernel2(A11-IA10*A01,A12-IA10*A02,
226     A21-IA20*A01,A22-IA20*A02,&TEMP0,&TEMP1);
227 gross 583 *V0=-(A10*TEMP0+A20*TEMP1);
228     *V1=A00*TEMP0;
229     *V2=A00*TEMP1;
230     }
231 matt 1327
232 gross 583 /**
233     \brief
234 matt 1327 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
235     ordered by increasing value and eigen vectors are normalizeVector3d such that
236 gross 583 length is zero and first non-zero component is positive.
237    
238 matt 1327 \param A00 Input - A_00
239     \param A01 Input - A_01
240     \param A11 Input - A_11
241 gross 583 \param ev0 Output - smallest eigenvalue
242     \param ev1 Output - eigenvalue
243     \param V00 Output - eigenvector componenent coresponding to ev0
244     \param V10 Output - eigenvector componenent coresponding to ev0
245     \param V01 Output - eigenvector componenent coresponding to ev1
246     \param V11 Output - eigenvector componenent coresponding to ev1
247     \param tol Input - tolerance to identify to eigenvalues
248     */
249     inline
250     void eigenvalues_and_eigenvectors2(const double A00,const double A01,const double A11,
251     double* ev0, double* ev1,
252     double* V00, double* V10, double* V01, double* V11,
253 matt 1327 const double tol)
254 gross 583 {
255     double TEMP0,TEMP1;
256     eigenvalues2(A00,A01,A11,ev0,ev1);
257     const register double absev0=fabs(*ev0);
258     const register double absev1=fabs(*ev1);
259     register double max_ev=absev0>absev1 ? absev0 : absev1;
260     if (fabs((*ev0)-(*ev1))<tol*max_ev) {
261     *V00=1.;
262     *V10=0.;
263     *V01=0.;
264     *V11=1.;
265     } else {
266     vectorInKernel2(A00-(*ev0),A01,A01,A11-(*ev0),&TEMP0,&TEMP1);
267     const register double scale=1./sqrt(TEMP0*TEMP0+TEMP1*TEMP1);
268     if (TEMP0<0.) {
269     *V00=-TEMP0*scale;
270     *V10=-TEMP1*scale;
271     if (TEMP1<0.) {
272 matt 1327 *V01= *V10;
273 gross 583 *V11=-(*V00);
274     } else {
275     *V01=-(*V10);
276 gross 587 *V11= (*V10);
277 gross 583 }
278     } else if (TEMP0>0.) {
279     *V00=TEMP0*scale;
280     *V10=TEMP1*scale;
281     if (TEMP1<0.) {
282 matt 1327 *V01=-(*V10);
283 gross 583 *V11= (*V00);
284     } else {
285 matt 1327 *V01= (*V10);
286     *V11=-(*V00);
287 gross 583 }
288     } else {
289     *V00=0.;
290     *V10=1;
291     *V11=0.;
292     *V01=1.;
293 matt 1327 }
294 gross 583 }
295     }
296     /**
297     \brief
298     nomalizes a 3-d vector such that length is one and first non-zero component is positive.
299    
300 matt 1327 \param V0 - vector componenent
301 gross 583 \param V1 - vector componenent
302     \param V2 - vector componenent
303     */
304     inline
305     void normalizeVector3(double* V0,double* V1,double* V2)
306     {
307     register double s;
308     if (*V0>0) {
309     s=1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
310     *V0*=s;
311     *V1*=s;
312     *V2*=s;
313     } else if (*V0<0) {
314     s=-1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
315     *V0*=s;
316     *V1*=s;
317     *V2*=s;
318     } else {
319     if (*V1>0) {
320     s=1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
321     *V1*=s;
322     *V2*=s;
323     } else if (*V1<0) {
324     s=-1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
325     *V1*=s;
326     *V2*=s;
327     } else {
328     *V2=1.;
329     }
330     }
331     }
332     /**
333     \brief
334 matt 1327 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
335     ordered by increasing value and eigen vectors are normalizeVector3d such that
336 gross 583 length is zero and first non-zero component is positive.
337    
338 matt 1327 \param A00 Input - A_00
339     \param A01 Input - A_01
340 jfenwick 2521 \param A02 Input - A_02
341 matt 1327 \param A11 Input - A_11
342 jfenwick 2521 \param A12 Input - A_12
343     \param A22 Input - A_22
344 gross 583 \param ev0 Output - smallest eigenvalue
345     \param ev1 Output - eigenvalue
346 jfenwick 2521 \param ev2 Output -
347 gross 583 \param V00 Output - eigenvector componenent coresponding to ev0
348     \param V10 Output - eigenvector componenent coresponding to ev0
349 jfenwick 2521 \param V20 Output -
350 gross 583 \param V01 Output - eigenvector componenent coresponding to ev1
351     \param V11 Output - eigenvector componenent coresponding to ev1
352 jfenwick 2521 \param V21 Output -
353     \param V02 Output -
354     \param V12 Output -
355     \param V22 Output -
356 gross 583 \param tol Input - tolerance to identify to eigenvalues
357     */
358     inline
359     void eigenvalues_and_eigenvectors3(const double A00, const double A01, const double A02,
360     const double A11, const double A12, const double A22,
361     double* ev0, double* ev1, double* ev2,
362 matt 1327 double* V00, double* V10, double* V20,
363     double* V01, double* V11, double* V21,
364     double* V02, double* V12, double* V22,
365 gross 583 const double tol)
366     {
367     register const double absA01=fabs(A01);
368     register const double absA02=fabs(A02);
369     register const double m=absA01>absA02 ? absA01 : absA02;
370     if (m<=0) {
371     double TEMP_V00,TEMP_V10,TEMP_V01,TEMP_V11,TEMP_EV0,TEMP_EV1;
372     eigenvalues_and_eigenvectors2(A11,A12,A22,
373     &TEMP_EV0,&TEMP_EV1,
374     &TEMP_V00,&TEMP_V10,&TEMP_V01,&TEMP_V11,tol);
375     if (A00<=TEMP_EV0) {
376     *V00=1.;
377     *V10=0.;
378     *V20=0.;
379     *V01=0.;
380     *V11=TEMP_V00;
381     *V21=TEMP_V10;
382     *V02=0.;
383     *V12=TEMP_V01;
384     *V22=TEMP_V11;
385     *ev0=A00;
386     *ev1=TEMP_EV0;
387     *ev2=TEMP_EV1;
388     } else if (A00>TEMP_EV1) {
389 gross 588 *V02=1.;
390 gross 583 *V12=0.;
391 gross 588 *V22=0.;
392     *V00=0.;
393     *V10=TEMP_V00;
394     *V20=TEMP_V10;
395     *V01=0.;
396     *V11=TEMP_V01;
397     *V21=TEMP_V11;
398 gross 583 *ev0=TEMP_EV0;
399     *ev1=TEMP_EV1;
400 gross 588 *ev2=A00;
401 gross 583 } else {
402 gross 588 *V01=1.;
403     *V11=0.;
404     *V21=0.;
405     *V00=0.;
406     *V10=TEMP_V00;
407 gross 583 *V20=TEMP_V10;
408 gross 588 *V02=0.;
409     *V12=TEMP_V01;
410 gross 583 *V22=TEMP_V11;
411     *ev0=TEMP_EV0;
412     *ev1=A00;
413     *ev2=TEMP_EV1;
414     }
415     } else {
416     eigenvalues3(A00,A01,A02,A11,A12,A22,ev0,ev1,ev2);
417     const register double absev0=fabs(*ev0);
418     const register double absev1=fabs(*ev1);
419     const register double absev2=fabs(*ev2);
420     register double max_ev=absev0>absev1 ? absev0 : absev1;
421     max_ev=max_ev>absev2 ? max_ev : absev2;
422     const register double d_01=fabs((*ev0)-(*ev1));
423     const register double d_12=fabs((*ev1)-(*ev2));
424     const register double max_d=d_01>d_12 ? d_01 : d_12;
425     if (max_d<=tol*max_ev) {
426     *V00=1.;
427     *V10=0;
428     *V20=0;
429     *V01=0;
430     *V11=1.;
431     *V21=0;
432     *V02=0;
433     *V12=0;
434     *V22=1.;
435     } else {
436     const register double S00=A00-(*ev0);
437     const register double absS00=fabs(S00);
438 jfenwick 1946 if (absS00>m) {
439 gross 583 vectorInKernel3__nonZeroA00(S00,A01,A02,A01,A11-(*ev0),A12,A02,A12,A22-(*ev0),V00,V10,V20);
440     } else if (absA02<m) {
441     vectorInKernel3__nonZeroA00(A01,A11-(*ev0),A12,S00,A01,A02,A02,A12,A22-(*ev0),V00,V10,V20);
442     } else {
443     vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev0),S00,A01,A02,A01,A11-(*ev0),A12,V00,V10,V20);
444     }
445     normalizeVector3(V00,V10,V20);;
446     const register double T00=A00-(*ev2);
447     const register double absT00=fabs(T00);
448 jfenwick 1946 if (absT00>m) {
449 gross 583 vectorInKernel3__nonZeroA00(T00,A01,A02,A01,A11-(*ev2),A12,A02,A12,A22-(*ev2),V02,V12,V22);
450     } else if (absA02<m) {
451     vectorInKernel3__nonZeroA00(A01,A11-(*ev2),A12,T00,A01,A02,A02,A12,A22-(*ev2),V02,V12,V22);
452     } else {
453     vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev2),T00,A01,A02,A01,A11-(*ev2),A12,V02,V12,V22);
454     }
455     const register double dot=(*V02)*(*V00)+(*V12)*(*V10)+(*V22)*(*V20);
456     *V02-=dot*(*V00);
457     *V12-=dot*(*V10);
458     *V22-=dot*(*V20);
459     normalizeVector3(V02,V12,V22);
460     *V01=(*V10)*(*V22)-(*V12)*(*V20);
461     *V11=(*V20)*(*V02)-(*V00)*(*V22);
462     *V21=(*V00)*(*V12)-(*V02)*(*V10);
463     normalizeVector3(V01,V11,V21);
464     }
465     }
466     }
467 ksteube 813
468     // General tensor product: arg_2(SL x SR) = arg_0(SL x SM) * arg_1(SM x SR)
469     // SM is the product of the last axis_offset entries in arg_0.getShape().
470     inline
471     void matrix_matrix_product(const int SL, const int SM, const int SR, const double* A, const double* B, double* C, int transpose)
472     {
473     if (transpose == 0) {
474     for (int i=0; i<SL; i++) {
475     for (int j=0; j<SR; j++) {
476     double sum = 0.0;
477     for (int l=0; l<SM; l++) {
478     sum += A[i+SL*l] * B[l+SM*j];
479     }
480     C[i+SL*j] = sum;
481     }
482     }
483     }
484     else if (transpose == 1) {
485     for (int i=0; i<SL; i++) {
486     for (int j=0; j<SR; j++) {
487     double sum = 0.0;
488     for (int l=0; l<SM; l++) {
489     sum += A[i*SM+l] * B[l+SM*j];
490     }
491     C[i+SL*j] = sum;
492     }
493     }
494     }
495     else if (transpose == 2) {
496     for (int i=0; i<SL; i++) {
497     for (int j=0; j<SR; j++) {
498     double sum = 0.0;
499     for (int l=0; l<SM; l++) {
500     sum += A[i+SL*l] * B[l*SR+j];
501     }
502     C[i+SL*j] = sum;
503     }
504     }
505     }
506     }
507    
508 matt 1334 template <typename UnaryFunction>
509     inline void tensor_unary_operation(const int size,
510     const double *arg1,
511     double * argRes,
512     UnaryFunction operation)
513     {
514     for (int i = 0; i < size; ++i) {
515     argRes[i] = operation(arg1[i]);
516     }
517     return;
518     }
519    
520 matt 1327 template <typename BinaryFunction>
521     inline void tensor_binary_operation(const int size,
522     const double *arg1,
523     const double *arg2,
524     double * argRes,
525     BinaryFunction operation)
526     {
527     for (int i = 0; i < size; ++i) {
528     argRes[i] = operation(arg1[i], arg2[i]);
529     }
530     return;
531     }
532    
533     template <typename BinaryFunction>
534     inline void tensor_binary_operation(const int size,
535     double arg1,
536     const double *arg2,
537     double *argRes,
538     BinaryFunction operation)
539     {
540     for (int i = 0; i < size; ++i) {
541     argRes[i] = operation(arg1, arg2[i]);
542     }
543     return;
544     }
545    
546     template <typename BinaryFunction>
547     inline void tensor_binary_operation(const int size,
548     const double *arg1,
549     double arg2,
550     double *argRes,
551     BinaryFunction operation)
552     {
553     for (int i = 0; i < size; ++i) {
554     argRes[i] = operation(arg1[i], arg2);
555     }
556     return;
557     }
558    
559 gross 576 } // end of namespace
560     #endif

  ViewVC Help
Powered by ViewVC 1.1.26