/[escript]/trunk/escript/src/LocalOps.h
ViewVC logotype

Contents of /trunk/escript/src/LocalOps.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2005 - (show annotations)
Mon Nov 10 01:21:39 2008 UTC (11 years ago) by jfenwick
File MIME type: text/plain
File size: 15980 byte(s)
Bringing all changes across from schroedinger.
(Note this does not mean development is done, just that it will happen
on the trunk for now).
If anyone notices any problems please contact me.


1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2008 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 #if !defined escript_LocalOps_H
16 #define escript_LocalOps_H
17 #ifdef __INTEL_COMPILER
18 # include <mathimf.h>
19 #else
20 # include <math.h>
21 #endif
22 #ifndef M_PI
23 # define M_PI 3.14159265358979323846 /* pi */
24 #endif
25
26
27 /**
28 \file LocalOps.h
29 \brief Describes binary operations performed on double*.
30
31 For operations on DataAbstract see BinaryOp.h.
32 For operations on DataVector see DataMaths.h.
33 */
34
35 namespace escript {
36
37
38 /**
39 \brief
40 solves a 1x1 eigenvalue A*V=ev*V problem
41
42 \param A00 Input - A_00
43 \param ev0 Output - eigenvalue
44 */
45 inline
46 void eigenvalues1(const double A00,double* ev0) {
47
48 *ev0=A00;
49
50 }
51 /**
52 \brief
53 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A
54
55 \param A00 Input - A_00
56 \param A01 Input - A_01
57 \param A11 Input - A_11
58 \param ev0 Output - smallest eigenvalue
59 \param ev1 Output - largest eigenvalue
60 */
61 inline
62 void eigenvalues2(const double A00,const double A01,const double A11,
63 double* ev0, double* ev1) {
64 const register double trA=(A00+A11)/2.;
65 const register double A_00=A00-trA;
66 const register double A_11=A11-trA;
67 const register double s=sqrt(A01*A01-A_00*A_11);
68 *ev0=trA-s;
69 *ev1=trA+s;
70 }
71 /**
72 \brief
73 solves a 3x3 eigenvalue A*V=ev*V problem for symmetric A
74
75 \param A00 Input - A_00
76 \param A01 Input - A_01
77 \param A02 Input - A_02
78 \param A11 Input - A_11
79 \param A12 Input - A_12
80 \param A22 Input - A_22
81 \param ev0 Output - smallest eigenvalue
82 \param ev1 Output - eigenvalue
83 \param ev2 Output - largest eigenvalue
84 */
85 inline
86 void eigenvalues3(const double A00, const double A01, const double A02,
87 const double A11, const double A12,
88 const double A22,
89 double* ev0, double* ev1,double* ev2) {
90
91 const register double trA=(A00+A11+A22)/3.;
92 const register double A_00=A00-trA;
93 const register double A_11=A11-trA;
94 const register double A_22=A22-trA;
95 const register double A01_2=A01*A01;
96 const register double A02_2=A02*A02;
97 const register double A12_2=A12*A12;
98 const register double p=A02_2+A12_2+A01_2+(A_00*A_00+A_11*A_11+A_22*A_22)/2.;
99 if (p<=0.) {
100 *ev2=trA;
101 *ev1=trA;
102 *ev0=trA;
103
104 } else {
105 const register double q=(A02_2*A_11+A12_2*A_00+A01_2*A_22)-(A_00*A_11*A_22+2*A01*A12*A02);
106 const register double sq_p=sqrt(p/3.);
107 register double z=-q/(2*pow(sq_p,3));
108 if (z<-1.) {
109 z=-1.;
110 } else if (z>1.) {
111 z=1.;
112 }
113 const register double alpha_3=acos(z)/3.;
114 *ev2=trA+2.*sq_p*cos(alpha_3);
115 *ev1=trA-2.*sq_p*cos(alpha_3+M_PI/3.);
116 *ev0=trA-2.*sq_p*cos(alpha_3-M_PI/3.);
117 }
118 }
119 /**
120 \brief
121 solves a 1x1 eigenvalue A*V=ev*V problem for symmetric A
122
123 \param A00 Input - A_00
124 \param ev0 Output - eigenvalue
125 \param V00 Output - eigenvector
126 \param tol Input - tolerance to identify to eigenvalues
127 */
128 inline
129 void eigenvalues_and_eigenvectors1(const double A00,double* ev0,double* V00,const double tol)
130 {
131 eigenvalues1(A00,ev0);
132 *V00=1.;
133 return;
134 }
135 /**
136 \brief
137 returns a non-zero vector in the kernel of [[A00,A01],[A01,A11]] assuming that the kernel dimension is at least 1.
138
139 \param A00 Input - matrix component
140 \param A10 Input - matrix component
141 \param A01 Input - matrix component
142 \param A11 Input - matrix component
143 \param V0 Output - vector component
144 \param V1 Output - vector component
145 */
146 inline
147 void vectorInKernel2(const double A00,const double A10,const double A01,const double A11,
148 double* V0, double*V1)
149 {
150 register double absA00=fabs(A00);
151 register double absA10=fabs(A10);
152 register double absA01=fabs(A01);
153 register double absA11=fabs(A11);
154 register double m=absA11>absA10 ? absA11 : absA10;
155 if (absA00>m || absA01>m) {
156 *V0=-A01;
157 *V1=A00;
158 } else {
159 if (m<=0) {
160 *V0=1.;
161 *V1=0.;
162 } else {
163 *V0=A11;
164 *V1=-A10;
165 }
166 }
167 }
168 /**
169 \brief
170 returns a non-zero vector in the kernel of [[A00,A01,A02],[A10,A11,A12],[A20,A21,A22]]
171 assuming that the kernel dimension is at least 1 and A00 is non zero.
172
173 \param A00 Input - matrix component
174 \param A10 Input - matrix component
175 \param A20 Input - matrix component
176 \param A01 Input - matrix component
177 \param A11 Input - matrix component
178 \param A21 Input - matrix component
179 \param A02 Input - matrix component
180 \param A12 Input - matrix component
181 \param A22 Input - matrix component
182 \param V0 Output - vector component
183 \param V1 Output - vector component
184 \param V2 Output - vector component
185 */
186 inline
187 void vectorInKernel3__nonZeroA00(const double A00,const double A10,const double A20,
188 const double A01,const double A11,const double A21,
189 const double A02,const double A12,const double A22,
190 double* V0,double* V1,double* V2)
191 {
192 double TEMP0,TEMP1;
193 register const double I00=1./A00;
194 register const double IA10=I00*A10;
195 register const double IA20=I00*A20;
196 vectorInKernel2(A11-IA10*A01,A12-IA10*A02,
197 A21-IA20*A01,A22-IA20*A02,&TEMP0,&TEMP1);
198 *V0=-(A10*TEMP0+A20*TEMP1);
199 *V1=A00*TEMP0;
200 *V2=A00*TEMP1;
201 }
202
203 /**
204 \brief
205 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
206 ordered by increasing value and eigen vectors are normalizeVector3d such that
207 length is zero and first non-zero component is positive.
208
209 \param A00 Input - A_00
210 \param A01 Input - A_01
211 \param A11 Input - A_11
212 \param ev0 Output - smallest eigenvalue
213 \param ev1 Output - eigenvalue
214 \param V00 Output - eigenvector componenent coresponding to ev0
215 \param V10 Output - eigenvector componenent coresponding to ev0
216 \param V01 Output - eigenvector componenent coresponding to ev1
217 \param V11 Output - eigenvector componenent coresponding to ev1
218 \param tol Input - tolerance to identify to eigenvalues
219 */
220 inline
221 void eigenvalues_and_eigenvectors2(const double A00,const double A01,const double A11,
222 double* ev0, double* ev1,
223 double* V00, double* V10, double* V01, double* V11,
224 const double tol)
225 {
226 double TEMP0,TEMP1;
227 eigenvalues2(A00,A01,A11,ev0,ev1);
228 const register double absev0=fabs(*ev0);
229 const register double absev1=fabs(*ev1);
230 register double max_ev=absev0>absev1 ? absev0 : absev1;
231 if (fabs((*ev0)-(*ev1))<tol*max_ev) {
232 *V00=1.;
233 *V10=0.;
234 *V01=0.;
235 *V11=1.;
236 } else {
237 vectorInKernel2(A00-(*ev0),A01,A01,A11-(*ev0),&TEMP0,&TEMP1);
238 const register double scale=1./sqrt(TEMP0*TEMP0+TEMP1*TEMP1);
239 if (TEMP0<0.) {
240 *V00=-TEMP0*scale;
241 *V10=-TEMP1*scale;
242 if (TEMP1<0.) {
243 *V01= *V10;
244 *V11=-(*V00);
245 } else {
246 *V01=-(*V10);
247 *V11= (*V10);
248 }
249 } else if (TEMP0>0.) {
250 *V00=TEMP0*scale;
251 *V10=TEMP1*scale;
252 if (TEMP1<0.) {
253 *V01=-(*V10);
254 *V11= (*V00);
255 } else {
256 *V01= (*V10);
257 *V11=-(*V00);
258 }
259 } else {
260 *V00=0.;
261 *V10=1;
262 *V11=0.;
263 *V01=1.;
264 }
265 }
266 }
267 /**
268 \brief
269 nomalizes a 3-d vector such that length is one and first non-zero component is positive.
270
271 \param V0 - vector componenent
272 \param V1 - vector componenent
273 \param V2 - vector componenent
274 */
275 inline
276 void normalizeVector3(double* V0,double* V1,double* V2)
277 {
278 register double s;
279 if (*V0>0) {
280 s=1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
281 *V0*=s;
282 *V1*=s;
283 *V2*=s;
284 } else if (*V0<0) {
285 s=-1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
286 *V0*=s;
287 *V1*=s;
288 *V2*=s;
289 } else {
290 if (*V1>0) {
291 s=1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
292 *V1*=s;
293 *V2*=s;
294 } else if (*V1<0) {
295 s=-1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
296 *V1*=s;
297 *V2*=s;
298 } else {
299 *V2=1.;
300 }
301 }
302 }
303 /**
304 \brief
305 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
306 ordered by increasing value and eigen vectors are normalizeVector3d such that
307 length is zero and first non-zero component is positive.
308
309 \param A00 Input - A_00
310 \param A01 Input - A_01
311 \param A11 Input - A_11
312 \param ev0 Output - smallest eigenvalue
313 \param ev1 Output - eigenvalue
314 \param V00 Output - eigenvector componenent coresponding to ev0
315 \param V10 Output - eigenvector componenent coresponding to ev0
316 \param V01 Output - eigenvector componenent coresponding to ev1
317 \param V11 Output - eigenvector componenent coresponding to ev1
318 \param tol Input - tolerance to identify to eigenvalues
319 */
320 inline
321 void eigenvalues_and_eigenvectors3(const double A00, const double A01, const double A02,
322 const double A11, const double A12, const double A22,
323 double* ev0, double* ev1, double* ev2,
324 double* V00, double* V10, double* V20,
325 double* V01, double* V11, double* V21,
326 double* V02, double* V12, double* V22,
327 const double tol)
328 {
329 register const double absA01=fabs(A01);
330 register const double absA02=fabs(A02);
331 register const double m=absA01>absA02 ? absA01 : absA02;
332 if (m<=0) {
333 double TEMP_V00,TEMP_V10,TEMP_V01,TEMP_V11,TEMP_EV0,TEMP_EV1;
334 eigenvalues_and_eigenvectors2(A11,A12,A22,
335 &TEMP_EV0,&TEMP_EV1,
336 &TEMP_V00,&TEMP_V10,&TEMP_V01,&TEMP_V11,tol);
337 if (A00<=TEMP_EV0) {
338 *V00=1.;
339 *V10=0.;
340 *V20=0.;
341 *V01=0.;
342 *V11=TEMP_V00;
343 *V21=TEMP_V10;
344 *V02=0.;
345 *V12=TEMP_V01;
346 *V22=TEMP_V11;
347 *ev0=A00;
348 *ev1=TEMP_EV0;
349 *ev2=TEMP_EV1;
350 } else if (A00>TEMP_EV1) {
351 *V02=1.;
352 *V12=0.;
353 *V22=0.;
354 *V00=0.;
355 *V10=TEMP_V00;
356 *V20=TEMP_V10;
357 *V01=0.;
358 *V11=TEMP_V01;
359 *V21=TEMP_V11;
360 *ev0=TEMP_EV0;
361 *ev1=TEMP_EV1;
362 *ev2=A00;
363 } else {
364 *V01=1.;
365 *V11=0.;
366 *V21=0.;
367 *V00=0.;
368 *V10=TEMP_V00;
369 *V20=TEMP_V10;
370 *V02=0.;
371 *V12=TEMP_V01;
372 *V22=TEMP_V11;
373 *ev0=TEMP_EV0;
374 *ev1=A00;
375 *ev2=TEMP_EV1;
376 }
377 } else {
378 eigenvalues3(A00,A01,A02,A11,A12,A22,ev0,ev1,ev2);
379 const register double absev0=fabs(*ev0);
380 const register double absev1=fabs(*ev1);
381 const register double absev2=fabs(*ev2);
382 register double max_ev=absev0>absev1 ? absev0 : absev1;
383 max_ev=max_ev>absev2 ? max_ev : absev2;
384 const register double d_01=fabs((*ev0)-(*ev1));
385 const register double d_12=fabs((*ev1)-(*ev2));
386 const register double max_d=d_01>d_12 ? d_01 : d_12;
387 if (max_d<=tol*max_ev) {
388 *V00=1.;
389 *V10=0;
390 *V20=0;
391 *V01=0;
392 *V11=1.;
393 *V21=0;
394 *V02=0;
395 *V12=0;
396 *V22=1.;
397 } else {
398 const register double S00=A00-(*ev0);
399 const register double absS00=fabs(S00);
400 if (absS00>m) {
401 vectorInKernel3__nonZeroA00(S00,A01,A02,A01,A11-(*ev0),A12,A02,A12,A22-(*ev0),V00,V10,V20);
402 } else if (absA02<m) {
403 vectorInKernel3__nonZeroA00(A01,A11-(*ev0),A12,S00,A01,A02,A02,A12,A22-(*ev0),V00,V10,V20);
404 } else {
405 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev0),S00,A01,A02,A01,A11-(*ev0),A12,V00,V10,V20);
406 }
407 normalizeVector3(V00,V10,V20);;
408 const register double T00=A00-(*ev2);
409 const register double absT00=fabs(T00);
410 if (absT00>m) {
411 vectorInKernel3__nonZeroA00(T00,A01,A02,A01,A11-(*ev2),A12,A02,A12,A22-(*ev2),V02,V12,V22);
412 } else if (absA02<m) {
413 vectorInKernel3__nonZeroA00(A01,A11-(*ev2),A12,T00,A01,A02,A02,A12,A22-(*ev2),V02,V12,V22);
414 } else {
415 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev2),T00,A01,A02,A01,A11-(*ev2),A12,V02,V12,V22);
416 }
417 const register double dot=(*V02)*(*V00)+(*V12)*(*V10)+(*V22)*(*V20);
418 *V02-=dot*(*V00);
419 *V12-=dot*(*V10);
420 *V22-=dot*(*V20);
421 normalizeVector3(V02,V12,V22);
422 *V01=(*V10)*(*V22)-(*V12)*(*V20);
423 *V11=(*V20)*(*V02)-(*V00)*(*V22);
424 *V21=(*V00)*(*V12)-(*V02)*(*V10);
425 normalizeVector3(V01,V11,V21);
426 }
427 }
428 }
429
430 // General tensor product: arg_2(SL x SR) = arg_0(SL x SM) * arg_1(SM x SR)
431 // SM is the product of the last axis_offset entries in arg_0.getShape().
432 inline
433 void matrix_matrix_product(const int SL, const int SM, const int SR, const double* A, const double* B, double* C, int transpose)
434 {
435 if (transpose == 0) {
436 for (int i=0; i<SL; i++) {
437 for (int j=0; j<SR; j++) {
438 double sum = 0.0;
439 for (int l=0; l<SM; l++) {
440 sum += A[i+SL*l] * B[l+SM*j];
441 }
442 C[i+SL*j] = sum;
443 }
444 }
445 }
446 else if (transpose == 1) {
447 for (int i=0; i<SL; i++) {
448 for (int j=0; j<SR; j++) {
449 double sum = 0.0;
450 for (int l=0; l<SM; l++) {
451 sum += A[i*SM+l] * B[l+SM*j];
452 }
453 C[i+SL*j] = sum;
454 }
455 }
456 }
457 else if (transpose == 2) {
458 for (int i=0; i<SL; i++) {
459 for (int j=0; j<SR; j++) {
460 double sum = 0.0;
461 for (int l=0; l<SM; l++) {
462 sum += A[i+SL*l] * B[l*SR+j];
463 }
464 C[i+SL*j] = sum;
465 }
466 }
467 }
468 }
469
470 template <typename UnaryFunction>
471 inline void tensor_unary_operation(const int size,
472 const double *arg1,
473 double * argRes,
474 UnaryFunction operation)
475 {
476 for (int i = 0; i < size; ++i) {
477 argRes[i] = operation(arg1[i]);
478 }
479 return;
480 }
481
482 template <typename BinaryFunction>
483 inline void tensor_binary_operation(const int size,
484 const double *arg1,
485 const double *arg2,
486 double * argRes,
487 BinaryFunction operation)
488 {
489 for (int i = 0; i < size; ++i) {
490 argRes[i] = operation(arg1[i], arg2[i]);
491 }
492 return;
493 }
494
495 template <typename BinaryFunction>
496 inline void tensor_binary_operation(const int size,
497 double arg1,
498 const double *arg2,
499 double *argRes,
500 BinaryFunction operation)
501 {
502 for (int i = 0; i < size; ++i) {
503 argRes[i] = operation(arg1, arg2[i]);
504 }
505 return;
506 }
507
508 template <typename BinaryFunction>
509 inline void tensor_binary_operation(const int size,
510 const double *arg1,
511 double arg2,
512 double *argRes,
513 BinaryFunction operation)
514 {
515 for (int i = 0; i < size; ++i) {
516 argRes[i] = operation(arg1[i], arg2);
517 }
518 return;
519 }
520
521 } // end of namespace
522 #endif

  ViewVC Help
Powered by ViewVC 1.1.26