/[escript]/trunk/escript/src/LocalOps.h
ViewVC logotype

Annotation of /trunk/escript/src/LocalOps.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1388 - (hide annotations)
Fri Jan 11 07:45:58 2008 UTC (11 years, 10 months ago) by trankine
File MIME type: text/plain
File size: 15843 byte(s)
And get the *(&(*&(* name right
1 gross 576
2 ksteube 1312 /* $Id$ */
3    
4     /*******************************************************
5     *
6     * Copyright 2003-2007 by ACceSS MNRF
7     * Copyright 2007 by University of Queensland
8     *
9     * http://esscc.uq.edu.au
10     * Primary Business: Queensland, Australia
11     * Licensed under the Open Software License version 3.0
12     * http://www.opensource.org/licenses/osl-3.0.php
13     *
14     *******************************************************/
15    
16 gross 576 #if !defined escript_LocalOps_H
17     #define escript_LocalOps_H
18 woo409 779 #ifdef __INTEL_COMPILER
19 phornby 1020 # include <mathimf.h>
20 woo409 757 #else
21 phornby 1020 # include <math.h>
22 woo409 757 #endif
23 phornby 1020 #ifndef M_PI
24     # define M_PI 3.14159265358979323846 /* pi */
25     #endif
26 woo409 757
27 gross 576 namespace escript {
28    
29    
30     /**
31     \brief
32     solves a 1x1 eigenvalue A*V=ev*V problem
33    
34 matt 1327 \param A00 Input - A_00
35 gross 576 \param ev0 Output - eigenvalue
36     */
37     inline
38     void eigenvalues1(const double A00,double* ev0) {
39    
40 gross 580 *ev0=A00;
41 gross 576
42     }
43     /**
44     \brief
45     solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A
46    
47 matt 1327 \param A00 Input - A_00
48     \param A01 Input - A_01
49 gross 576 \param A11 Input - A_11
50     \param ev0 Output - smallest eigenvalue
51     \param ev1 Output - largest eigenvalue
52     */
53     inline
54 gross 583 void eigenvalues2(const double A00,const double A01,const double A11,
55 gross 576 double* ev0, double* ev1) {
56 gross 580 const register double trA=(A00+A11)/2.;
57     const register double A_00=A00-trA;
58     const register double A_11=A11-trA;
59     const register double s=sqrt(A01*A01-A_00*A_11);
60     *ev0=trA-s;
61     *ev1=trA+s;
62 gross 576 }
63     /**
64     \brief
65     solves a 3x3 eigenvalue A*V=ev*V problem for symmetric A
66    
67 matt 1327 \param A00 Input - A_00
68     \param A01 Input - A_01
69     \param A02 Input - A_02
70     \param A11 Input - A_11
71     \param A12 Input - A_12
72     \param A22 Input - A_22
73 gross 576 \param ev0 Output - smallest eigenvalue
74     \param ev1 Output - eigenvalue
75     \param ev2 Output - largest eigenvalue
76     */
77     inline
78     void eigenvalues3(const double A00, const double A01, const double A02,
79     const double A11, const double A12,
80     const double A22,
81     double* ev0, double* ev1,double* ev2) {
82    
83 gross 580 const register double trA=(A00+A11+A22)/3.;
84     const register double A_00=A00-trA;
85     const register double A_11=A11-trA;
86     const register double A_22=A22-trA;
87     const register double A01_2=A01*A01;
88     const register double A02_2=A02*A02;
89     const register double A12_2=A12*A12;
90     const register double p=A02_2+A12_2+A01_2+(A_00*A_00+A_11*A_11+A_22*A_22)/2.;
91 gross 585 if (p<=0.) {
92     *ev2=trA;
93     *ev1=trA;
94     *ev0=trA;
95    
96     } else {
97     const register double q=(A02_2*A_11+A12_2*A_00+A01_2*A_22)-(A_00*A_11*A_22+2*A01*A12*A02);
98     const register double sq_p=sqrt(p/3.);
99     register double z=-q/(2*pow(sq_p,3));
100     if (z<-1.) {
101     z=-1.;
102     } else if (z>1.) {
103     z=1.;
104     }
105     const register double alpha_3=acos(z)/3.;
106     *ev2=trA+2.*sq_p*cos(alpha_3);
107     *ev1=trA-2.*sq_p*cos(alpha_3+M_PI/3.);
108     *ev0=trA-2.*sq_p*cos(alpha_3-M_PI/3.);
109 gross 580 }
110 gross 576 }
111 gross 583 /**
112     \brief
113     solves a 1x1 eigenvalue A*V=ev*V problem for symmetric A
114    
115 matt 1327 \param A00 Input - A_00
116 gross 583 \param ev0 Output - eigenvalue
117     \param V00 Output - eigenvector
118     \param tol Input - tolerance to identify to eigenvalues
119     */
120     inline
121     void eigenvalues_and_eigenvectors1(const double A00,double* ev0,double* V00,const double tol)
122     {
123     eigenvalues1(A00,ev0);
124     *V00=1.;
125     return;
126     }
127     /**
128     \brief
129     returns a non-zero vector in the kernel of [[A00,A01],[A01,A11]] assuming that the kernel dimension is at least 1.
130    
131     \param A00 Input - matrix component
132     \param A10 Input - matrix component
133     \param A01 Input - matrix component
134     \param A11 Input - matrix component
135     \param V0 Output - vector component
136     \param V1 Output - vector component
137     */
138     inline
139     void vectorInKernel2(const double A00,const double A10,const double A01,const double A11,
140     double* V0, double*V1)
141     {
142     register double absA00=fabs(A00);
143 gross 587 register double absA10=fabs(A10);
144 gross 583 register double absA01=fabs(A01);
145     register double absA11=fabs(A11);
146 gross 587 register double m=absA11>absA10 ? absA11 : absA10;
147     if (absA00>m || absA01>m) {
148     *V0=-A01;
149 gross 583 *V1=A00;
150     } else {
151     if (m<=0) {
152     *V0=1.;
153     *V1=0.;
154     } else {
155     *V0=A11;
156 gross 587 *V1=-A10;
157 gross 583 }
158     }
159     }
160     /**
161     \brief
162 matt 1327 returns a non-zero vector in the kernel of [[A00,A01,A02],[A10,A11,A12],[A20,A21,A22]]
163 gross 583 assuming that the kernel dimension is at least 1 and A00 is non zero.
164    
165     \param A00 Input - matrix component
166     \param A10 Input - matrix component
167     \param A20 Input - matrix component
168     \param A01 Input - matrix component
169     \param A11 Input - matrix component
170     \param A21 Input - matrix component
171     \param A02 Input - matrix component
172     \param A12 Input - matrix component
173     \param A22 Input - matrix component
174     \param V0 Output - vector component
175     \param V1 Output - vector component
176     \param V2 Output - vector component
177     */
178     inline
179     void vectorInKernel3__nonZeroA00(const double A00,const double A10,const double A20,
180     const double A01,const double A11,const double A21,
181     const double A02,const double A12,const double A22,
182     double* V0,double* V1,double* V2)
183     {
184     double TEMP0,TEMP1;
185     register const double I00=1./A00;
186     register const double IA10=I00*A10;
187     register const double IA20=I00*A20;
188 gross 588 vectorInKernel2(A11-IA10*A01,A12-IA10*A02,
189     A21-IA20*A01,A22-IA20*A02,&TEMP0,&TEMP1);
190 gross 583 *V0=-(A10*TEMP0+A20*TEMP1);
191     *V1=A00*TEMP0;
192     *V2=A00*TEMP1;
193     }
194 matt 1327
195 gross 583 /**
196     \brief
197 matt 1327 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
198     ordered by increasing value and eigen vectors are normalizeVector3d such that
199 gross 583 length is zero and first non-zero component is positive.
200    
201 matt 1327 \param A00 Input - A_00
202     \param A01 Input - A_01
203     \param A11 Input - A_11
204 gross 583 \param ev0 Output - smallest eigenvalue
205     \param ev1 Output - eigenvalue
206     \param V00 Output - eigenvector componenent coresponding to ev0
207     \param V10 Output - eigenvector componenent coresponding to ev0
208     \param V01 Output - eigenvector componenent coresponding to ev1
209     \param V11 Output - eigenvector componenent coresponding to ev1
210     \param tol Input - tolerance to identify to eigenvalues
211     */
212     inline
213     void eigenvalues_and_eigenvectors2(const double A00,const double A01,const double A11,
214     double* ev0, double* ev1,
215     double* V00, double* V10, double* V01, double* V11,
216 matt 1327 const double tol)
217 gross 583 {
218     double TEMP0,TEMP1;
219     eigenvalues2(A00,A01,A11,ev0,ev1);
220     const register double absev0=fabs(*ev0);
221     const register double absev1=fabs(*ev1);
222     register double max_ev=absev0>absev1 ? absev0 : absev1;
223     if (fabs((*ev0)-(*ev1))<tol*max_ev) {
224     *V00=1.;
225     *V10=0.;
226     *V01=0.;
227     *V11=1.;
228     } else {
229     vectorInKernel2(A00-(*ev0),A01,A01,A11-(*ev0),&TEMP0,&TEMP1);
230     const register double scale=1./sqrt(TEMP0*TEMP0+TEMP1*TEMP1);
231     if (TEMP0<0.) {
232     *V00=-TEMP0*scale;
233     *V10=-TEMP1*scale;
234     if (TEMP1<0.) {
235 matt 1327 *V01= *V10;
236 gross 583 *V11=-(*V00);
237     } else {
238     *V01=-(*V10);
239 gross 587 *V11= (*V10);
240 gross 583 }
241     } else if (TEMP0>0.) {
242     *V00=TEMP0*scale;
243     *V10=TEMP1*scale;
244     if (TEMP1<0.) {
245 matt 1327 *V01=-(*V10);
246 gross 583 *V11= (*V00);
247     } else {
248 matt 1327 *V01= (*V10);
249     *V11=-(*V00);
250 gross 583 }
251     } else {
252     *V00=0.;
253     *V10=1;
254     *V11=0.;
255     *V01=1.;
256 matt 1327 }
257 gross 583 }
258     }
259     /**
260     \brief
261     nomalizes a 3-d vector such that length is one and first non-zero component is positive.
262    
263 matt 1327 \param V0 - vector componenent
264 gross 583 \param V1 - vector componenent
265     \param V2 - vector componenent
266     */
267     inline
268     void normalizeVector3(double* V0,double* V1,double* V2)
269     {
270     register double s;
271     if (*V0>0) {
272     s=1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
273     *V0*=s;
274     *V1*=s;
275     *V2*=s;
276     } else if (*V0<0) {
277     s=-1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
278     *V0*=s;
279     *V1*=s;
280     *V2*=s;
281     } else {
282     if (*V1>0) {
283     s=1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
284     *V1*=s;
285     *V2*=s;
286     } else if (*V1<0) {
287     s=-1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
288     *V1*=s;
289     *V2*=s;
290     } else {
291     *V2=1.;
292     }
293     }
294     }
295     /**
296     \brief
297 matt 1327 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
298     ordered by increasing value and eigen vectors are normalizeVector3d such that
299 gross 583 length is zero and first non-zero component is positive.
300    
301 matt 1327 \param A00 Input - A_00
302     \param A01 Input - A_01
303     \param A11 Input - A_11
304 gross 583 \param ev0 Output - smallest eigenvalue
305     \param ev1 Output - eigenvalue
306     \param V00 Output - eigenvector componenent coresponding to ev0
307     \param V10 Output - eigenvector componenent coresponding to ev0
308     \param V01 Output - eigenvector componenent coresponding to ev1
309     \param V11 Output - eigenvector componenent coresponding to ev1
310     \param tol Input - tolerance to identify to eigenvalues
311     */
312     inline
313     void eigenvalues_and_eigenvectors3(const double A00, const double A01, const double A02,
314     const double A11, const double A12, const double A22,
315     double* ev0, double* ev1, double* ev2,
316 matt 1327 double* V00, double* V10, double* V20,
317     double* V01, double* V11, double* V21,
318     double* V02, double* V12, double* V22,
319 gross 583 const double tol)
320     {
321     register const double absA01=fabs(A01);
322     register const double absA02=fabs(A02);
323     register const double m=absA01>absA02 ? absA01 : absA02;
324     if (m<=0) {
325     double TEMP_V00,TEMP_V10,TEMP_V01,TEMP_V11,TEMP_EV0,TEMP_EV1;
326     eigenvalues_and_eigenvectors2(A11,A12,A22,
327     &TEMP_EV0,&TEMP_EV1,
328     &TEMP_V00,&TEMP_V10,&TEMP_V01,&TEMP_V11,tol);
329     if (A00<=TEMP_EV0) {
330     *V00=1.;
331     *V10=0.;
332     *V20=0.;
333     *V01=0.;
334     *V11=TEMP_V00;
335     *V21=TEMP_V10;
336     *V02=0.;
337     *V12=TEMP_V01;
338     *V22=TEMP_V11;
339     *ev0=A00;
340     *ev1=TEMP_EV0;
341     *ev2=TEMP_EV1;
342     } else if (A00>TEMP_EV1) {
343 gross 588 *V02=1.;
344 gross 583 *V12=0.;
345 gross 588 *V22=0.;
346     *V00=0.;
347     *V10=TEMP_V00;
348     *V20=TEMP_V10;
349     *V01=0.;
350     *V11=TEMP_V01;
351     *V21=TEMP_V11;
352 gross 583 *ev0=TEMP_EV0;
353     *ev1=TEMP_EV1;
354 gross 588 *ev2=A00;
355 gross 583 } else {
356 gross 588 *V01=1.;
357     *V11=0.;
358     *V21=0.;
359     *V00=0.;
360     *V10=TEMP_V00;
361 gross 583 *V20=TEMP_V10;
362 gross 588 *V02=0.;
363     *V12=TEMP_V01;
364 gross 583 *V22=TEMP_V11;
365     *ev0=TEMP_EV0;
366     *ev1=A00;
367     *ev2=TEMP_EV1;
368     }
369     } else {
370     eigenvalues3(A00,A01,A02,A11,A12,A22,ev0,ev1,ev2);
371     const register double absev0=fabs(*ev0);
372     const register double absev1=fabs(*ev1);
373     const register double absev2=fabs(*ev2);
374     register double max_ev=absev0>absev1 ? absev0 : absev1;
375     max_ev=max_ev>absev2 ? max_ev : absev2;
376     const register double d_01=fabs((*ev0)-(*ev1));
377     const register double d_12=fabs((*ev1)-(*ev2));
378     const register double max_d=d_01>d_12 ? d_01 : d_12;
379     if (max_d<=tol*max_ev) {
380     *V00=1.;
381     *V10=0;
382     *V20=0;
383     *V01=0;
384     *V11=1.;
385     *V21=0;
386     *V02=0;
387     *V12=0;
388     *V22=1.;
389     } else {
390     const register double S00=A00-(*ev0);
391     const register double absS00=fabs(S00);
392     if (fabs(S00)>m) {
393     vectorInKernel3__nonZeroA00(S00,A01,A02,A01,A11-(*ev0),A12,A02,A12,A22-(*ev0),V00,V10,V20);
394     } else if (absA02<m) {
395     vectorInKernel3__nonZeroA00(A01,A11-(*ev0),A12,S00,A01,A02,A02,A12,A22-(*ev0),V00,V10,V20);
396     } else {
397     vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev0),S00,A01,A02,A01,A11-(*ev0),A12,V00,V10,V20);
398     }
399     normalizeVector3(V00,V10,V20);;
400     const register double T00=A00-(*ev2);
401     const register double absT00=fabs(T00);
402     if (fabs(T00)>m) {
403     vectorInKernel3__nonZeroA00(T00,A01,A02,A01,A11-(*ev2),A12,A02,A12,A22-(*ev2),V02,V12,V22);
404     } else if (absA02<m) {
405     vectorInKernel3__nonZeroA00(A01,A11-(*ev2),A12,T00,A01,A02,A02,A12,A22-(*ev2),V02,V12,V22);
406     } else {
407     vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev2),T00,A01,A02,A01,A11-(*ev2),A12,V02,V12,V22);
408     }
409     const register double dot=(*V02)*(*V00)+(*V12)*(*V10)+(*V22)*(*V20);
410     *V02-=dot*(*V00);
411     *V12-=dot*(*V10);
412     *V22-=dot*(*V20);
413     normalizeVector3(V02,V12,V22);
414     *V01=(*V10)*(*V22)-(*V12)*(*V20);
415     *V11=(*V20)*(*V02)-(*V00)*(*V22);
416     *V21=(*V00)*(*V12)-(*V02)*(*V10);
417     normalizeVector3(V01,V11,V21);
418     }
419     }
420     }
421 ksteube 813
422     // General tensor product: arg_2(SL x SR) = arg_0(SL x SM) * arg_1(SM x SR)
423     // SM is the product of the last axis_offset entries in arg_0.getShape().
424     inline
425     void matrix_matrix_product(const int SL, const int SM, const int SR, const double* A, const double* B, double* C, int transpose)
426     {
427     if (transpose == 0) {
428     for (int i=0; i<SL; i++) {
429     for (int j=0; j<SR; j++) {
430     double sum = 0.0;
431     for (int l=0; l<SM; l++) {
432     sum += A[i+SL*l] * B[l+SM*j];
433     }
434     C[i+SL*j] = sum;
435     }
436     }
437     }
438     else if (transpose == 1) {
439     for (int i=0; i<SL; i++) {
440     for (int j=0; j<SR; j++) {
441     double sum = 0.0;
442     for (int l=0; l<SM; l++) {
443     sum += A[i*SM+l] * B[l+SM*j];
444     }
445     C[i+SL*j] = sum;
446     }
447     }
448     }
449     else if (transpose == 2) {
450     for (int i=0; i<SL; i++) {
451     for (int j=0; j<SR; j++) {
452     double sum = 0.0;
453     for (int l=0; l<SM; l++) {
454     sum += A[i+SL*l] * B[l*SR+j];
455     }
456     C[i+SL*j] = sum;
457     }
458     }
459     }
460     }
461    
462 matt 1334 template <typename UnaryFunction>
463     inline void tensor_unary_operation(const int size,
464     const double *arg1,
465     double * argRes,
466     UnaryFunction operation)
467     {
468     for (int i = 0; i < size; ++i) {
469     argRes[i] = operation(arg1[i]);
470     }
471     return;
472     }
473    
474 matt 1327 template <typename BinaryFunction>
475     inline void tensor_binary_operation(const int size,
476     const double *arg1,
477     const double *arg2,
478     double * argRes,
479     BinaryFunction operation)
480     {
481     for (int i = 0; i < size; ++i) {
482     argRes[i] = operation(arg1[i], arg2[i]);
483     }
484     return;
485     }
486    
487     template <typename BinaryFunction>
488     inline void tensor_binary_operation(const int size,
489     double arg1,
490     const double *arg2,
491     double *argRes,
492     BinaryFunction operation)
493     {
494     for (int i = 0; i < size; ++i) {
495     argRes[i] = operation(arg1, arg2[i]);
496     }
497     return;
498     }
499    
500     template <typename BinaryFunction>
501     inline void tensor_binary_operation(const int size,
502     const double *arg1,
503     double arg2,
504     double *argRes,
505     BinaryFunction operation)
506     {
507     for (int i = 0; i < size; ++i) {
508     argRes[i] = operation(arg1[i], arg2);
509     }
510     return;
511     }
512    
513 gross 576 } // end of namespace
514     #endif

  ViewVC Help
Powered by ViewVC 1.1.26