/[escript]/trunk/escript/src/LocalOps.h
ViewVC logotype

Annotation of /trunk/escript/src/LocalOps.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2005 - (hide annotations)
Mon Nov 10 01:21:39 2008 UTC (11 years, 3 months ago) by jfenwick
File MIME type: text/plain
File size: 15980 byte(s)
Bringing all changes across from schroedinger.
(Note this does not mean development is done, just that it will happen
on the trunk for now).
If anyone notices any problems please contact me.


1 gross 576
2 ksteube 1312 /*******************************************************
3 ksteube 1811 *
4     * Copyright (c) 2003-2008 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13 ksteube 1312
14 ksteube 1811
15 gross 576 #if !defined escript_LocalOps_H
16     #define escript_LocalOps_H
17 woo409 779 #ifdef __INTEL_COMPILER
18 phornby 1020 # include <mathimf.h>
19 woo409 757 #else
20 phornby 1020 # include <math.h>
21 woo409 757 #endif
22 phornby 1020 #ifndef M_PI
23     # define M_PI 3.14159265358979323846 /* pi */
24     #endif
25 woo409 757
26 jfenwick 2005
27     /**
28     \file LocalOps.h
29     \brief Describes binary operations performed on double*.
30    
31     For operations on DataAbstract see BinaryOp.h.
32     For operations on DataVector see DataMaths.h.
33     */
34    
35 gross 576 namespace escript {
36    
37    
38     /**
39     \brief
40     solves a 1x1 eigenvalue A*V=ev*V problem
41    
42 matt 1327 \param A00 Input - A_00
43 gross 576 \param ev0 Output - eigenvalue
44     */
45     inline
46     void eigenvalues1(const double A00,double* ev0) {
47    
48 gross 580 *ev0=A00;
49 gross 576
50     }
51     /**
52     \brief
53     solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A
54    
55 matt 1327 \param A00 Input - A_00
56     \param A01 Input - A_01
57 gross 576 \param A11 Input - A_11
58     \param ev0 Output - smallest eigenvalue
59     \param ev1 Output - largest eigenvalue
60     */
61     inline
62 gross 583 void eigenvalues2(const double A00,const double A01,const double A11,
63 gross 576 double* ev0, double* ev1) {
64 gross 580 const register double trA=(A00+A11)/2.;
65     const register double A_00=A00-trA;
66     const register double A_11=A11-trA;
67     const register double s=sqrt(A01*A01-A_00*A_11);
68     *ev0=trA-s;
69     *ev1=trA+s;
70 gross 576 }
71     /**
72     \brief
73     solves a 3x3 eigenvalue A*V=ev*V problem for symmetric A
74    
75 matt 1327 \param A00 Input - A_00
76     \param A01 Input - A_01
77     \param A02 Input - A_02
78     \param A11 Input - A_11
79     \param A12 Input - A_12
80     \param A22 Input - A_22
81 gross 576 \param ev0 Output - smallest eigenvalue
82     \param ev1 Output - eigenvalue
83     \param ev2 Output - largest eigenvalue
84     */
85     inline
86     void eigenvalues3(const double A00, const double A01, const double A02,
87     const double A11, const double A12,
88     const double A22,
89     double* ev0, double* ev1,double* ev2) {
90    
91 gross 580 const register double trA=(A00+A11+A22)/3.;
92     const register double A_00=A00-trA;
93     const register double A_11=A11-trA;
94     const register double A_22=A22-trA;
95     const register double A01_2=A01*A01;
96     const register double A02_2=A02*A02;
97     const register double A12_2=A12*A12;
98     const register double p=A02_2+A12_2+A01_2+(A_00*A_00+A_11*A_11+A_22*A_22)/2.;
99 gross 585 if (p<=0.) {
100     *ev2=trA;
101     *ev1=trA;
102     *ev0=trA;
103    
104     } else {
105     const register double q=(A02_2*A_11+A12_2*A_00+A01_2*A_22)-(A_00*A_11*A_22+2*A01*A12*A02);
106     const register double sq_p=sqrt(p/3.);
107     register double z=-q/(2*pow(sq_p,3));
108     if (z<-1.) {
109     z=-1.;
110     } else if (z>1.) {
111     z=1.;
112     }
113     const register double alpha_3=acos(z)/3.;
114     *ev2=trA+2.*sq_p*cos(alpha_3);
115     *ev1=trA-2.*sq_p*cos(alpha_3+M_PI/3.);
116     *ev0=trA-2.*sq_p*cos(alpha_3-M_PI/3.);
117 gross 580 }
118 gross 576 }
119 gross 583 /**
120     \brief
121     solves a 1x1 eigenvalue A*V=ev*V problem for symmetric A
122    
123 matt 1327 \param A00 Input - A_00
124 gross 583 \param ev0 Output - eigenvalue
125     \param V00 Output - eigenvector
126     \param tol Input - tolerance to identify to eigenvalues
127     */
128     inline
129     void eigenvalues_and_eigenvectors1(const double A00,double* ev0,double* V00,const double tol)
130     {
131     eigenvalues1(A00,ev0);
132     *V00=1.;
133     return;
134     }
135     /**
136     \brief
137     returns a non-zero vector in the kernel of [[A00,A01],[A01,A11]] assuming that the kernel dimension is at least 1.
138    
139     \param A00 Input - matrix component
140     \param A10 Input - matrix component
141     \param A01 Input - matrix component
142     \param A11 Input - matrix component
143     \param V0 Output - vector component
144     \param V1 Output - vector component
145     */
146     inline
147     void vectorInKernel2(const double A00,const double A10,const double A01,const double A11,
148     double* V0, double*V1)
149     {
150     register double absA00=fabs(A00);
151 gross 587 register double absA10=fabs(A10);
152 gross 583 register double absA01=fabs(A01);
153     register double absA11=fabs(A11);
154 gross 587 register double m=absA11>absA10 ? absA11 : absA10;
155     if (absA00>m || absA01>m) {
156     *V0=-A01;
157 gross 583 *V1=A00;
158     } else {
159     if (m<=0) {
160     *V0=1.;
161     *V1=0.;
162     } else {
163     *V0=A11;
164 gross 587 *V1=-A10;
165 gross 583 }
166     }
167     }
168     /**
169     \brief
170 matt 1327 returns a non-zero vector in the kernel of [[A00,A01,A02],[A10,A11,A12],[A20,A21,A22]]
171 gross 583 assuming that the kernel dimension is at least 1 and A00 is non zero.
172    
173     \param A00 Input - matrix component
174     \param A10 Input - matrix component
175     \param A20 Input - matrix component
176     \param A01 Input - matrix component
177     \param A11 Input - matrix component
178     \param A21 Input - matrix component
179     \param A02 Input - matrix component
180     \param A12 Input - matrix component
181     \param A22 Input - matrix component
182     \param V0 Output - vector component
183     \param V1 Output - vector component
184     \param V2 Output - vector component
185     */
186     inline
187     void vectorInKernel3__nonZeroA00(const double A00,const double A10,const double A20,
188     const double A01,const double A11,const double A21,
189     const double A02,const double A12,const double A22,
190     double* V0,double* V1,double* V2)
191     {
192     double TEMP0,TEMP1;
193     register const double I00=1./A00;
194     register const double IA10=I00*A10;
195     register const double IA20=I00*A20;
196 gross 588 vectorInKernel2(A11-IA10*A01,A12-IA10*A02,
197     A21-IA20*A01,A22-IA20*A02,&TEMP0,&TEMP1);
198 gross 583 *V0=-(A10*TEMP0+A20*TEMP1);
199     *V1=A00*TEMP0;
200     *V2=A00*TEMP1;
201     }
202 matt 1327
203 gross 583 /**
204     \brief
205 matt 1327 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
206     ordered by increasing value and eigen vectors are normalizeVector3d such that
207 gross 583 length is zero and first non-zero component is positive.
208    
209 matt 1327 \param A00 Input - A_00
210     \param A01 Input - A_01
211     \param A11 Input - A_11
212 gross 583 \param ev0 Output - smallest eigenvalue
213     \param ev1 Output - eigenvalue
214     \param V00 Output - eigenvector componenent coresponding to ev0
215     \param V10 Output - eigenvector componenent coresponding to ev0
216     \param V01 Output - eigenvector componenent coresponding to ev1
217     \param V11 Output - eigenvector componenent coresponding to ev1
218     \param tol Input - tolerance to identify to eigenvalues
219     */
220     inline
221     void eigenvalues_and_eigenvectors2(const double A00,const double A01,const double A11,
222     double* ev0, double* ev1,
223     double* V00, double* V10, double* V01, double* V11,
224 matt 1327 const double tol)
225 gross 583 {
226     double TEMP0,TEMP1;
227     eigenvalues2(A00,A01,A11,ev0,ev1);
228     const register double absev0=fabs(*ev0);
229     const register double absev1=fabs(*ev1);
230     register double max_ev=absev0>absev1 ? absev0 : absev1;
231     if (fabs((*ev0)-(*ev1))<tol*max_ev) {
232     *V00=1.;
233     *V10=0.;
234     *V01=0.;
235     *V11=1.;
236     } else {
237     vectorInKernel2(A00-(*ev0),A01,A01,A11-(*ev0),&TEMP0,&TEMP1);
238     const register double scale=1./sqrt(TEMP0*TEMP0+TEMP1*TEMP1);
239     if (TEMP0<0.) {
240     *V00=-TEMP0*scale;
241     *V10=-TEMP1*scale;
242     if (TEMP1<0.) {
243 matt 1327 *V01= *V10;
244 gross 583 *V11=-(*V00);
245     } else {
246     *V01=-(*V10);
247 gross 587 *V11= (*V10);
248 gross 583 }
249     } else if (TEMP0>0.) {
250     *V00=TEMP0*scale;
251     *V10=TEMP1*scale;
252     if (TEMP1<0.) {
253 matt 1327 *V01=-(*V10);
254 gross 583 *V11= (*V00);
255     } else {
256 matt 1327 *V01= (*V10);
257     *V11=-(*V00);
258 gross 583 }
259     } else {
260     *V00=0.;
261     *V10=1;
262     *V11=0.;
263     *V01=1.;
264 matt 1327 }
265 gross 583 }
266     }
267     /**
268     \brief
269     nomalizes a 3-d vector such that length is one and first non-zero component is positive.
270    
271 matt 1327 \param V0 - vector componenent
272 gross 583 \param V1 - vector componenent
273     \param V2 - vector componenent
274     */
275     inline
276     void normalizeVector3(double* V0,double* V1,double* V2)
277     {
278     register double s;
279     if (*V0>0) {
280     s=1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
281     *V0*=s;
282     *V1*=s;
283     *V2*=s;
284     } else if (*V0<0) {
285     s=-1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
286     *V0*=s;
287     *V1*=s;
288     *V2*=s;
289     } else {
290     if (*V1>0) {
291     s=1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
292     *V1*=s;
293     *V2*=s;
294     } else if (*V1<0) {
295     s=-1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
296     *V1*=s;
297     *V2*=s;
298     } else {
299     *V2=1.;
300     }
301     }
302     }
303     /**
304     \brief
305 matt 1327 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
306     ordered by increasing value and eigen vectors are normalizeVector3d such that
307 gross 583 length is zero and first non-zero component is positive.
308    
309 matt 1327 \param A00 Input - A_00
310     \param A01 Input - A_01
311     \param A11 Input - A_11
312 gross 583 \param ev0 Output - smallest eigenvalue
313     \param ev1 Output - eigenvalue
314     \param V00 Output - eigenvector componenent coresponding to ev0
315     \param V10 Output - eigenvector componenent coresponding to ev0
316     \param V01 Output - eigenvector componenent coresponding to ev1
317     \param V11 Output - eigenvector componenent coresponding to ev1
318     \param tol Input - tolerance to identify to eigenvalues
319     */
320     inline
321     void eigenvalues_and_eigenvectors3(const double A00, const double A01, const double A02,
322     const double A11, const double A12, const double A22,
323     double* ev0, double* ev1, double* ev2,
324 matt 1327 double* V00, double* V10, double* V20,
325     double* V01, double* V11, double* V21,
326     double* V02, double* V12, double* V22,
327 gross 583 const double tol)
328     {
329     register const double absA01=fabs(A01);
330     register const double absA02=fabs(A02);
331     register const double m=absA01>absA02 ? absA01 : absA02;
332     if (m<=0) {
333     double TEMP_V00,TEMP_V10,TEMP_V01,TEMP_V11,TEMP_EV0,TEMP_EV1;
334     eigenvalues_and_eigenvectors2(A11,A12,A22,
335     &TEMP_EV0,&TEMP_EV1,
336     &TEMP_V00,&TEMP_V10,&TEMP_V01,&TEMP_V11,tol);
337     if (A00<=TEMP_EV0) {
338     *V00=1.;
339     *V10=0.;
340     *V20=0.;
341     *V01=0.;
342     *V11=TEMP_V00;
343     *V21=TEMP_V10;
344     *V02=0.;
345     *V12=TEMP_V01;
346     *V22=TEMP_V11;
347     *ev0=A00;
348     *ev1=TEMP_EV0;
349     *ev2=TEMP_EV1;
350     } else if (A00>TEMP_EV1) {
351 gross 588 *V02=1.;
352 gross 583 *V12=0.;
353 gross 588 *V22=0.;
354     *V00=0.;
355     *V10=TEMP_V00;
356     *V20=TEMP_V10;
357     *V01=0.;
358     *V11=TEMP_V01;
359     *V21=TEMP_V11;
360 gross 583 *ev0=TEMP_EV0;
361     *ev1=TEMP_EV1;
362 gross 588 *ev2=A00;
363 gross 583 } else {
364 gross 588 *V01=1.;
365     *V11=0.;
366     *V21=0.;
367     *V00=0.;
368     *V10=TEMP_V00;
369 gross 583 *V20=TEMP_V10;
370 gross 588 *V02=0.;
371     *V12=TEMP_V01;
372 gross 583 *V22=TEMP_V11;
373     *ev0=TEMP_EV0;
374     *ev1=A00;
375     *ev2=TEMP_EV1;
376     }
377     } else {
378     eigenvalues3(A00,A01,A02,A11,A12,A22,ev0,ev1,ev2);
379     const register double absev0=fabs(*ev0);
380     const register double absev1=fabs(*ev1);
381     const register double absev2=fabs(*ev2);
382     register double max_ev=absev0>absev1 ? absev0 : absev1;
383     max_ev=max_ev>absev2 ? max_ev : absev2;
384     const register double d_01=fabs((*ev0)-(*ev1));
385     const register double d_12=fabs((*ev1)-(*ev2));
386     const register double max_d=d_01>d_12 ? d_01 : d_12;
387     if (max_d<=tol*max_ev) {
388     *V00=1.;
389     *V10=0;
390     *V20=0;
391     *V01=0;
392     *V11=1.;
393     *V21=0;
394     *V02=0;
395     *V12=0;
396     *V22=1.;
397     } else {
398     const register double S00=A00-(*ev0);
399     const register double absS00=fabs(S00);
400 jfenwick 1946 if (absS00>m) {
401 gross 583 vectorInKernel3__nonZeroA00(S00,A01,A02,A01,A11-(*ev0),A12,A02,A12,A22-(*ev0),V00,V10,V20);
402     } else if (absA02<m) {
403     vectorInKernel3__nonZeroA00(A01,A11-(*ev0),A12,S00,A01,A02,A02,A12,A22-(*ev0),V00,V10,V20);
404     } else {
405     vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev0),S00,A01,A02,A01,A11-(*ev0),A12,V00,V10,V20);
406     }
407     normalizeVector3(V00,V10,V20);;
408     const register double T00=A00-(*ev2);
409     const register double absT00=fabs(T00);
410 jfenwick 1946 if (absT00>m) {
411 gross 583 vectorInKernel3__nonZeroA00(T00,A01,A02,A01,A11-(*ev2),A12,A02,A12,A22-(*ev2),V02,V12,V22);
412     } else if (absA02<m) {
413     vectorInKernel3__nonZeroA00(A01,A11-(*ev2),A12,T00,A01,A02,A02,A12,A22-(*ev2),V02,V12,V22);
414     } else {
415     vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev2),T00,A01,A02,A01,A11-(*ev2),A12,V02,V12,V22);
416     }
417     const register double dot=(*V02)*(*V00)+(*V12)*(*V10)+(*V22)*(*V20);
418     *V02-=dot*(*V00);
419     *V12-=dot*(*V10);
420     *V22-=dot*(*V20);
421     normalizeVector3(V02,V12,V22);
422     *V01=(*V10)*(*V22)-(*V12)*(*V20);
423     *V11=(*V20)*(*V02)-(*V00)*(*V22);
424     *V21=(*V00)*(*V12)-(*V02)*(*V10);
425     normalizeVector3(V01,V11,V21);
426     }
427     }
428     }
429 ksteube 813
430     // General tensor product: arg_2(SL x SR) = arg_0(SL x SM) * arg_1(SM x SR)
431     // SM is the product of the last axis_offset entries in arg_0.getShape().
432     inline
433     void matrix_matrix_product(const int SL, const int SM, const int SR, const double* A, const double* B, double* C, int transpose)
434     {
435     if (transpose == 0) {
436     for (int i=0; i<SL; i++) {
437     for (int j=0; j<SR; j++) {
438     double sum = 0.0;
439     for (int l=0; l<SM; l++) {
440     sum += A[i+SL*l] * B[l+SM*j];
441     }
442     C[i+SL*j] = sum;
443     }
444     }
445     }
446     else if (transpose == 1) {
447     for (int i=0; i<SL; i++) {
448     for (int j=0; j<SR; j++) {
449     double sum = 0.0;
450     for (int l=0; l<SM; l++) {
451     sum += A[i*SM+l] * B[l+SM*j];
452     }
453     C[i+SL*j] = sum;
454     }
455     }
456     }
457     else if (transpose == 2) {
458     for (int i=0; i<SL; i++) {
459     for (int j=0; j<SR; j++) {
460     double sum = 0.0;
461     for (int l=0; l<SM; l++) {
462     sum += A[i+SL*l] * B[l*SR+j];
463     }
464     C[i+SL*j] = sum;
465     }
466     }
467     }
468     }
469    
470 matt 1334 template <typename UnaryFunction>
471     inline void tensor_unary_operation(const int size,
472     const double *arg1,
473     double * argRes,
474     UnaryFunction operation)
475     {
476     for (int i = 0; i < size; ++i) {
477     argRes[i] = operation(arg1[i]);
478     }
479     return;
480     }
481    
482 matt 1327 template <typename BinaryFunction>
483     inline void tensor_binary_operation(const int size,
484     const double *arg1,
485     const double *arg2,
486     double * argRes,
487     BinaryFunction operation)
488     {
489     for (int i = 0; i < size; ++i) {
490     argRes[i] = operation(arg1[i], arg2[i]);
491     }
492     return;
493     }
494    
495     template <typename BinaryFunction>
496     inline void tensor_binary_operation(const int size,
497     double arg1,
498     const double *arg2,
499     double *argRes,
500     BinaryFunction operation)
501     {
502     for (int i = 0; i < size; ++i) {
503     argRes[i] = operation(arg1, arg2[i]);
504     }
505     return;
506     }
507    
508     template <typename BinaryFunction>
509     inline void tensor_binary_operation(const int size,
510     const double *arg1,
511     double arg2,
512     double *argRes,
513     BinaryFunction operation)
514     {
515     for (int i = 0; i < size; ++i) {
516     argRes[i] = operation(arg1[i], arg2);
517     }
518     return;
519     }
520    
521 gross 576 } // end of namespace
522     #endif

  ViewVC Help
Powered by ViewVC 1.1.26