/[escript]/trunk/escript/src/LocalOps.h
ViewVC logotype

Contents of /trunk/escript/src/LocalOps.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2773 - (show annotations)
Wed Nov 25 04:02:01 2009 UTC (9 years, 11 months ago) by jfenwick
File MIME type: text/plain
File size: 16787 byte(s)
Hadn't realised isnan was a macro.
Checking for it rather than FP_NAN

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2009 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 #if !defined escript_LocalOps_H
16 #define escript_LocalOps_H
17 #if defined(_WIN32) && defined(__INTEL_COMPILER)
18 # include <mathimf.h>
19 #else
20 # include <math.h>
21 #endif
22 #ifndef M_PI
23 # define M_PI 3.14159265358979323846 /* pi */
24 #endif
25
26
27 /**
28 \file LocalOps.h
29 \brief Describes binary operations performed on double*.
30
31 For operations on DataAbstract see BinaryOp.h.
32 For operations on DataVector see DataMaths.h.
33 */
34
35 namespace escript {
36
37 /**
38 \brief acts as a wrapper to isnan.
39 \warning if compiler does not support FP_NAN this function will always return false.
40 */
41 inline
42 bool nancheck(double d)
43 {
44 #ifndef isnan // Q: so why not just test d!=d?
45 return false; // A: Coz it doesn't always work [I've checked].
46 #else // One theory is that the optimizer skips the test.
47 return isnan(d);
48 #endif
49 }
50
51 /**
52 \brief returns a NaN.
53 \warning Should probably only used where you know you can test for NaNs
54 */
55 inline
56 double makeNaN()
57 {
58 #ifndef isnan
59 return nan();
60 #else
61 return sqrt(-1);
62 #endif
63
64 }
65
66
67 /**
68 \brief
69 solves a 1x1 eigenvalue A*V=ev*V problem
70
71 \param A00 Input - A_00
72 \param ev0 Output - eigenvalue
73 */
74 inline
75 void eigenvalues1(const double A00,double* ev0) {
76
77 *ev0=A00;
78
79 }
80 /**
81 \brief
82 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A
83
84 \param A00 Input - A_00
85 \param A01 Input - A_01
86 \param A11 Input - A_11
87 \param ev0 Output - smallest eigenvalue
88 \param ev1 Output - largest eigenvalue
89 */
90 inline
91 void eigenvalues2(const double A00,const double A01,const double A11,
92 double* ev0, double* ev1) {
93 const register double trA=(A00+A11)/2.;
94 const register double A_00=A00-trA;
95 const register double A_11=A11-trA;
96 const register double s=sqrt(A01*A01-A_00*A_11);
97 *ev0=trA-s;
98 *ev1=trA+s;
99 }
100 /**
101 \brief
102 solves a 3x3 eigenvalue A*V=ev*V problem for symmetric A
103
104 \param A00 Input - A_00
105 \param A01 Input - A_01
106 \param A02 Input - A_02
107 \param A11 Input - A_11
108 \param A12 Input - A_12
109 \param A22 Input - A_22
110 \param ev0 Output - smallest eigenvalue
111 \param ev1 Output - eigenvalue
112 \param ev2 Output - largest eigenvalue
113 */
114 inline
115 void eigenvalues3(const double A00, const double A01, const double A02,
116 const double A11, const double A12,
117 const double A22,
118 double* ev0, double* ev1,double* ev2) {
119
120 const register double trA=(A00+A11+A22)/3.;
121 const register double A_00=A00-trA;
122 const register double A_11=A11-trA;
123 const register double A_22=A22-trA;
124 const register double A01_2=A01*A01;
125 const register double A02_2=A02*A02;
126 const register double A12_2=A12*A12;
127 const register double p=A02_2+A12_2+A01_2+(A_00*A_00+A_11*A_11+A_22*A_22)/2.;
128 if (p<=0.) {
129 *ev2=trA;
130 *ev1=trA;
131 *ev0=trA;
132
133 } else {
134 const register double q=(A02_2*A_11+A12_2*A_00+A01_2*A_22)-(A_00*A_11*A_22+2*A01*A12*A02);
135 const register double sq_p=sqrt(p/3.);
136 register double z=-q/(2*pow(sq_p,3));
137 if (z<-1.) {
138 z=-1.;
139 } else if (z>1.) {
140 z=1.;
141 }
142 const register double alpha_3=acos(z)/3.;
143 *ev2=trA+2.*sq_p*cos(alpha_3);
144 *ev1=trA-2.*sq_p*cos(alpha_3+M_PI/3.);
145 *ev0=trA-2.*sq_p*cos(alpha_3-M_PI/3.);
146 }
147 }
148 /**
149 \brief
150 solves a 1x1 eigenvalue A*V=ev*V problem for symmetric A
151
152 \param A00 Input - A_00
153 \param ev0 Output - eigenvalue
154 \param V00 Output - eigenvector
155 \param tol Input - tolerance to identify to eigenvalues
156 */
157 inline
158 void eigenvalues_and_eigenvectors1(const double A00,double* ev0,double* V00,const double tol)
159 {
160 eigenvalues1(A00,ev0);
161 *V00=1.;
162 return;
163 }
164 /**
165 \brief
166 returns a non-zero vector in the kernel of [[A00,A01],[A01,A11]] assuming that the kernel dimension is at least 1.
167
168 \param A00 Input - matrix component
169 \param A10 Input - matrix component
170 \param A01 Input - matrix component
171 \param A11 Input - matrix component
172 \param V0 Output - vector component
173 \param V1 Output - vector component
174 */
175 inline
176 void vectorInKernel2(const double A00,const double A10,const double A01,const double A11,
177 double* V0, double*V1)
178 {
179 register double absA00=fabs(A00);
180 register double absA10=fabs(A10);
181 register double absA01=fabs(A01);
182 register double absA11=fabs(A11);
183 register double m=absA11>absA10 ? absA11 : absA10;
184 if (absA00>m || absA01>m) {
185 *V0=-A01;
186 *V1=A00;
187 } else {
188 if (m<=0) {
189 *V0=1.;
190 *V1=0.;
191 } else {
192 *V0=A11;
193 *V1=-A10;
194 }
195 }
196 }
197 /**
198 \brief
199 returns a non-zero vector in the kernel of [[A00,A01,A02],[A10,A11,A12],[A20,A21,A22]]
200 assuming that the kernel dimension is at least 1 and A00 is non zero.
201
202 \param A00 Input - matrix component
203 \param A10 Input - matrix component
204 \param A20 Input - matrix component
205 \param A01 Input - matrix component
206 \param A11 Input - matrix component
207 \param A21 Input - matrix component
208 \param A02 Input - matrix component
209 \param A12 Input - matrix component
210 \param A22 Input - matrix component
211 \param V0 Output - vector component
212 \param V1 Output - vector component
213 \param V2 Output - vector component
214 */
215 inline
216 void vectorInKernel3__nonZeroA00(const double A00,const double A10,const double A20,
217 const double A01,const double A11,const double A21,
218 const double A02,const double A12,const double A22,
219 double* V0,double* V1,double* V2)
220 {
221 double TEMP0,TEMP1;
222 register const double I00=1./A00;
223 register const double IA10=I00*A10;
224 register const double IA20=I00*A20;
225 vectorInKernel2(A11-IA10*A01,A12-IA10*A02,
226 A21-IA20*A01,A22-IA20*A02,&TEMP0,&TEMP1);
227 *V0=-(A10*TEMP0+A20*TEMP1);
228 *V1=A00*TEMP0;
229 *V2=A00*TEMP1;
230 }
231
232 /**
233 \brief
234 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
235 ordered by increasing value and eigen vectors are normalizeVector3d such that
236 length is zero and first non-zero component is positive.
237
238 \param A00 Input - A_00
239 \param A01 Input - A_01
240 \param A11 Input - A_11
241 \param ev0 Output - smallest eigenvalue
242 \param ev1 Output - eigenvalue
243 \param V00 Output - eigenvector componenent coresponding to ev0
244 \param V10 Output - eigenvector componenent coresponding to ev0
245 \param V01 Output - eigenvector componenent coresponding to ev1
246 \param V11 Output - eigenvector componenent coresponding to ev1
247 \param tol Input - tolerance to identify to eigenvalues
248 */
249 inline
250 void eigenvalues_and_eigenvectors2(const double A00,const double A01,const double A11,
251 double* ev0, double* ev1,
252 double* V00, double* V10, double* V01, double* V11,
253 const double tol)
254 {
255 double TEMP0,TEMP1;
256 eigenvalues2(A00,A01,A11,ev0,ev1);
257 const register double absev0=fabs(*ev0);
258 const register double absev1=fabs(*ev1);
259 register double max_ev=absev0>absev1 ? absev0 : absev1;
260 if (fabs((*ev0)-(*ev1))<tol*max_ev) {
261 *V00=1.;
262 *V10=0.;
263 *V01=0.;
264 *V11=1.;
265 } else {
266 vectorInKernel2(A00-(*ev0),A01,A01,A11-(*ev0),&TEMP0,&TEMP1);
267 const register double scale=1./sqrt(TEMP0*TEMP0+TEMP1*TEMP1);
268 if (TEMP0<0.) {
269 *V00=-TEMP0*scale;
270 *V10=-TEMP1*scale;
271 if (TEMP1<0.) {
272 *V01= *V10;
273 *V11=-(*V00);
274 } else {
275 *V01=-(*V10);
276 *V11= (*V10);
277 }
278 } else if (TEMP0>0.) {
279 *V00=TEMP0*scale;
280 *V10=TEMP1*scale;
281 if (TEMP1<0.) {
282 *V01=-(*V10);
283 *V11= (*V00);
284 } else {
285 *V01= (*V10);
286 *V11=-(*V00);
287 }
288 } else {
289 *V00=0.;
290 *V10=1;
291 *V11=0.;
292 *V01=1.;
293 }
294 }
295 }
296 /**
297 \brief
298 nomalizes a 3-d vector such that length is one and first non-zero component is positive.
299
300 \param V0 - vector componenent
301 \param V1 - vector componenent
302 \param V2 - vector componenent
303 */
304 inline
305 void normalizeVector3(double* V0,double* V1,double* V2)
306 {
307 register double s;
308 if (*V0>0) {
309 s=1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
310 *V0*=s;
311 *V1*=s;
312 *V2*=s;
313 } else if (*V0<0) {
314 s=-1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
315 *V0*=s;
316 *V1*=s;
317 *V2*=s;
318 } else {
319 if (*V1>0) {
320 s=1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
321 *V1*=s;
322 *V2*=s;
323 } else if (*V1<0) {
324 s=-1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
325 *V1*=s;
326 *V2*=s;
327 } else {
328 *V2=1.;
329 }
330 }
331 }
332 /**
333 \brief
334 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
335 ordered by increasing value and eigen vectors are normalizeVector3d such that
336 length is zero and first non-zero component is positive.
337
338 \param A00 Input - A_00
339 \param A01 Input - A_01
340 \param A02 Input - A_02
341 \param A11 Input - A_11
342 \param A12 Input - A_12
343 \param A22 Input - A_22
344 \param ev0 Output - smallest eigenvalue
345 \param ev1 Output - eigenvalue
346 \param ev2 Output -
347 \param V00 Output - eigenvector componenent coresponding to ev0
348 \param V10 Output - eigenvector componenent coresponding to ev0
349 \param V20 Output -
350 \param V01 Output - eigenvector componenent coresponding to ev1
351 \param V11 Output - eigenvector componenent coresponding to ev1
352 \param V21 Output -
353 \param V02 Output -
354 \param V12 Output -
355 \param V22 Output -
356 \param tol Input - tolerance to identify to eigenvalues
357 */
358 inline
359 void eigenvalues_and_eigenvectors3(const double A00, const double A01, const double A02,
360 const double A11, const double A12, const double A22,
361 double* ev0, double* ev1, double* ev2,
362 double* V00, double* V10, double* V20,
363 double* V01, double* V11, double* V21,
364 double* V02, double* V12, double* V22,
365 const double tol)
366 {
367 register const double absA01=fabs(A01);
368 register const double absA02=fabs(A02);
369 register const double m=absA01>absA02 ? absA01 : absA02;
370 if (m<=0) {
371 double TEMP_V00,TEMP_V10,TEMP_V01,TEMP_V11,TEMP_EV0,TEMP_EV1;
372 eigenvalues_and_eigenvectors2(A11,A12,A22,
373 &TEMP_EV0,&TEMP_EV1,
374 &TEMP_V00,&TEMP_V10,&TEMP_V01,&TEMP_V11,tol);
375 if (A00<=TEMP_EV0) {
376 *V00=1.;
377 *V10=0.;
378 *V20=0.;
379 *V01=0.;
380 *V11=TEMP_V00;
381 *V21=TEMP_V10;
382 *V02=0.;
383 *V12=TEMP_V01;
384 *V22=TEMP_V11;
385 *ev0=A00;
386 *ev1=TEMP_EV0;
387 *ev2=TEMP_EV1;
388 } else if (A00>TEMP_EV1) {
389 *V02=1.;
390 *V12=0.;
391 *V22=0.;
392 *V00=0.;
393 *V10=TEMP_V00;
394 *V20=TEMP_V10;
395 *V01=0.;
396 *V11=TEMP_V01;
397 *V21=TEMP_V11;
398 *ev0=TEMP_EV0;
399 *ev1=TEMP_EV1;
400 *ev2=A00;
401 } else {
402 *V01=1.;
403 *V11=0.;
404 *V21=0.;
405 *V00=0.;
406 *V10=TEMP_V00;
407 *V20=TEMP_V10;
408 *V02=0.;
409 *V12=TEMP_V01;
410 *V22=TEMP_V11;
411 *ev0=TEMP_EV0;
412 *ev1=A00;
413 *ev2=TEMP_EV1;
414 }
415 } else {
416 eigenvalues3(A00,A01,A02,A11,A12,A22,ev0,ev1,ev2);
417 const register double absev0=fabs(*ev0);
418 const register double absev1=fabs(*ev1);
419 const register double absev2=fabs(*ev2);
420 register double max_ev=absev0>absev1 ? absev0 : absev1;
421 max_ev=max_ev>absev2 ? max_ev : absev2;
422 const register double d_01=fabs((*ev0)-(*ev1));
423 const register double d_12=fabs((*ev1)-(*ev2));
424 const register double max_d=d_01>d_12 ? d_01 : d_12;
425 if (max_d<=tol*max_ev) {
426 *V00=1.;
427 *V10=0;
428 *V20=0;
429 *V01=0;
430 *V11=1.;
431 *V21=0;
432 *V02=0;
433 *V12=0;
434 *V22=1.;
435 } else {
436 const register double S00=A00-(*ev0);
437 const register double absS00=fabs(S00);
438 if (absS00>m) {
439 vectorInKernel3__nonZeroA00(S00,A01,A02,A01,A11-(*ev0),A12,A02,A12,A22-(*ev0),V00,V10,V20);
440 } else if (absA02<m) {
441 vectorInKernel3__nonZeroA00(A01,A11-(*ev0),A12,S00,A01,A02,A02,A12,A22-(*ev0),V00,V10,V20);
442 } else {
443 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev0),S00,A01,A02,A01,A11-(*ev0),A12,V00,V10,V20);
444 }
445 normalizeVector3(V00,V10,V20);;
446 const register double T00=A00-(*ev2);
447 const register double absT00=fabs(T00);
448 if (absT00>m) {
449 vectorInKernel3__nonZeroA00(T00,A01,A02,A01,A11-(*ev2),A12,A02,A12,A22-(*ev2),V02,V12,V22);
450 } else if (absA02<m) {
451 vectorInKernel3__nonZeroA00(A01,A11-(*ev2),A12,T00,A01,A02,A02,A12,A22-(*ev2),V02,V12,V22);
452 } else {
453 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev2),T00,A01,A02,A01,A11-(*ev2),A12,V02,V12,V22);
454 }
455 const register double dot=(*V02)*(*V00)+(*V12)*(*V10)+(*V22)*(*V20);
456 *V02-=dot*(*V00);
457 *V12-=dot*(*V10);
458 *V22-=dot*(*V20);
459 normalizeVector3(V02,V12,V22);
460 *V01=(*V10)*(*V22)-(*V12)*(*V20);
461 *V11=(*V20)*(*V02)-(*V00)*(*V22);
462 *V21=(*V00)*(*V12)-(*V02)*(*V10);
463 normalizeVector3(V01,V11,V21);
464 }
465 }
466 }
467
468 // General tensor product: arg_2(SL x SR) = arg_0(SL x SM) * arg_1(SM x SR)
469 // SM is the product of the last axis_offset entries in arg_0.getShape().
470 inline
471 void matrix_matrix_product(const int SL, const int SM, const int SR, const double* A, const double* B, double* C, int transpose)
472 {
473 if (transpose == 0) {
474 for (int i=0; i<SL; i++) {
475 for (int j=0; j<SR; j++) {
476 double sum = 0.0;
477 for (int l=0; l<SM; l++) {
478 sum += A[i+SL*l] * B[l+SM*j];
479 }
480 C[i+SL*j] = sum;
481 }
482 }
483 }
484 else if (transpose == 1) {
485 for (int i=0; i<SL; i++) {
486 for (int j=0; j<SR; j++) {
487 double sum = 0.0;
488 for (int l=0; l<SM; l++) {
489 sum += A[i*SM+l] * B[l+SM*j];
490 }
491 C[i+SL*j] = sum;
492 }
493 }
494 }
495 else if (transpose == 2) {
496 for (int i=0; i<SL; i++) {
497 for (int j=0; j<SR; j++) {
498 double sum = 0.0;
499 for (int l=0; l<SM; l++) {
500 sum += A[i+SL*l] * B[l*SR+j];
501 }
502 C[i+SL*j] = sum;
503 }
504 }
505 }
506 }
507
508 template <typename UnaryFunction>
509 inline void tensor_unary_operation(const int size,
510 const double *arg1,
511 double * argRes,
512 UnaryFunction operation)
513 {
514 for (int i = 0; i < size; ++i) {
515 argRes[i] = operation(arg1[i]);
516 }
517 return;
518 }
519
520 template <typename BinaryFunction>
521 inline void tensor_binary_operation(const int size,
522 const double *arg1,
523 const double *arg2,
524 double * argRes,
525 BinaryFunction operation)
526 {
527 for (int i = 0; i < size; ++i) {
528 argRes[i] = operation(arg1[i], arg2[i]);
529 }
530 return;
531 }
532
533 template <typename BinaryFunction>
534 inline void tensor_binary_operation(const int size,
535 double arg1,
536 const double *arg2,
537 double *argRes,
538 BinaryFunction operation)
539 {
540 for (int i = 0; i < size; ++i) {
541 argRes[i] = operation(arg1, arg2[i]);
542 }
543 return;
544 }
545
546 template <typename BinaryFunction>
547 inline void tensor_binary_operation(const int size,
548 const double *arg1,
549 double arg2,
550 double *argRes,
551 BinaryFunction operation)
552 {
553 for (int i = 0; i < size; ++i) {
554 argRes[i] = operation(arg1[i], arg2);
555 }
556 return;
557 }
558
559 } // end of namespace
560 #endif

  ViewVC Help
Powered by ViewVC 1.1.26