/[escript]/trunk/escript/src/LocalOps.h
ViewVC logotype

Contents of /trunk/escript/src/LocalOps.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2884 - (show annotations)
Thu Jan 28 05:00:59 2010 UTC (9 years, 6 months ago) by jfenwick
File MIME type: text/plain
File size: 16831 byte(s)
Updated various nan checks to consider the windows _isnan

The default compiler flags have changed as well.
+ intel will now take -std=c99 instead of -ansi
+ gcc has -ansi removed which means it defaults to gnu99

We could have set gcc to -std=c99 as well but that gives a 
warning on g++ which gets converted into an error by our
pedantic warning.

Rationale:
We need something more than ansi to get proper nan handling.
- We currently don't have any code which does not comply with ansi
  but the nan checks don't work.

Impact:
If we want our code to still be able to compile on older compilers
(at reduced functionality) we need to be careful not to introduce other
c99-isms.
If we don't care, then it's time for some celebratory // comments.


1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 #if !defined escript_LocalOps_H
16 #define escript_LocalOps_H
17 #if defined(_WIN32) && defined(__INTEL_COMPILER)
18 # include <mathimf.h>
19 #else
20 # include <math.h>
21 #endif
22 #ifndef M_PI
23 # define M_PI 3.14159265358979323846 /* pi */
24 #endif
25
26
27 /**
28 \file LocalOps.h
29 \brief Describes binary operations performed on double*.
30
31 For operations on DataAbstract see BinaryOp.h.
32 For operations on DataVector see DataMaths.h.
33 */
34
35 namespace escript {
36
37 /**
38 \brief acts as a wrapper to isnan.
39 \warning if compiler does not support FP_NAN this function will always return false.
40 */
41 inline
42 bool nancheck(double d)
43 {
44 // Q: so why not just test d!=d?
45 // A: Coz it doesn't always work [I've checked].
46 // One theory is that the optimizer skips the test.
47 #ifdef isnan
48 return isnan(d);
49 #elif defined _isnan
50 return _isnan(d);
51 #else
52 return false;
53 #endif
54 }
55
56 /**
57 \brief returns a NaN.
58 \warning Should probably only used where you know you can test for NaNs
59 */
60 inline
61 double makeNaN()
62 {
63 #ifdef nan
64 return nan("");
65 #else
66 return sqrt(-1);
67 #endif
68
69 }
70
71
72 /**
73 \brief
74 solves a 1x1 eigenvalue A*V=ev*V problem
75
76 \param A00 Input - A_00
77 \param ev0 Output - eigenvalue
78 */
79 inline
80 void eigenvalues1(const double A00,double* ev0) {
81
82 *ev0=A00;
83
84 }
85 /**
86 \brief
87 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A
88
89 \param A00 Input - A_00
90 \param A01 Input - A_01
91 \param A11 Input - A_11
92 \param ev0 Output - smallest eigenvalue
93 \param ev1 Output - largest eigenvalue
94 */
95 inline
96 void eigenvalues2(const double A00,const double A01,const double A11,
97 double* ev0, double* ev1) {
98 const register double trA=(A00+A11)/2.;
99 const register double A_00=A00-trA;
100 const register double A_11=A11-trA;
101 const register double s=sqrt(A01*A01-A_00*A_11);
102 *ev0=trA-s;
103 *ev1=trA+s;
104 }
105 /**
106 \brief
107 solves a 3x3 eigenvalue A*V=ev*V problem for symmetric A
108
109 \param A00 Input - A_00
110 \param A01 Input - A_01
111 \param A02 Input - A_02
112 \param A11 Input - A_11
113 \param A12 Input - A_12
114 \param A22 Input - A_22
115 \param ev0 Output - smallest eigenvalue
116 \param ev1 Output - eigenvalue
117 \param ev2 Output - largest eigenvalue
118 */
119 inline
120 void eigenvalues3(const double A00, const double A01, const double A02,
121 const double A11, const double A12,
122 const double A22,
123 double* ev0, double* ev1,double* ev2) {
124
125 const register double trA=(A00+A11+A22)/3.;
126 const register double A_00=A00-trA;
127 const register double A_11=A11-trA;
128 const register double A_22=A22-trA;
129 const register double A01_2=A01*A01;
130 const register double A02_2=A02*A02;
131 const register double A12_2=A12*A12;
132 const register double p=A02_2+A12_2+A01_2+(A_00*A_00+A_11*A_11+A_22*A_22)/2.;
133 if (p<=0.) {
134 *ev2=trA;
135 *ev1=trA;
136 *ev0=trA;
137
138 } else {
139 const register double q=(A02_2*A_11+A12_2*A_00+A01_2*A_22)-(A_00*A_11*A_22+2*A01*A12*A02);
140 const register double sq_p=sqrt(p/3.);
141 register double z=-q/(2*pow(sq_p,3));
142 if (z<-1.) {
143 z=-1.;
144 } else if (z>1.) {
145 z=1.;
146 }
147 const register double alpha_3=acos(z)/3.;
148 *ev2=trA+2.*sq_p*cos(alpha_3);
149 *ev1=trA-2.*sq_p*cos(alpha_3+M_PI/3.);
150 *ev0=trA-2.*sq_p*cos(alpha_3-M_PI/3.);
151 }
152 }
153 /**
154 \brief
155 solves a 1x1 eigenvalue A*V=ev*V problem for symmetric A
156
157 \param A00 Input - A_00
158 \param ev0 Output - eigenvalue
159 \param V00 Output - eigenvector
160 \param tol Input - tolerance to identify to eigenvalues
161 */
162 inline
163 void eigenvalues_and_eigenvectors1(const double A00,double* ev0,double* V00,const double tol)
164 {
165 eigenvalues1(A00,ev0);
166 *V00=1.;
167 return;
168 }
169 /**
170 \brief
171 returns a non-zero vector in the kernel of [[A00,A01],[A01,A11]] assuming that the kernel dimension is at least 1.
172
173 \param A00 Input - matrix component
174 \param A10 Input - matrix component
175 \param A01 Input - matrix component
176 \param A11 Input - matrix component
177 \param V0 Output - vector component
178 \param V1 Output - vector component
179 */
180 inline
181 void vectorInKernel2(const double A00,const double A10,const double A01,const double A11,
182 double* V0, double*V1)
183 {
184 register double absA00=fabs(A00);
185 register double absA10=fabs(A10);
186 register double absA01=fabs(A01);
187 register double absA11=fabs(A11);
188 register double m=absA11>absA10 ? absA11 : absA10;
189 if (absA00>m || absA01>m) {
190 *V0=-A01;
191 *V1=A00;
192 } else {
193 if (m<=0) {
194 *V0=1.;
195 *V1=0.;
196 } else {
197 *V0=A11;
198 *V1=-A10;
199 }
200 }
201 }
202 /**
203 \brief
204 returns a non-zero vector in the kernel of [[A00,A01,A02],[A10,A11,A12],[A20,A21,A22]]
205 assuming that the kernel dimension is at least 1 and A00 is non zero.
206
207 \param A00 Input - matrix component
208 \param A10 Input - matrix component
209 \param A20 Input - matrix component
210 \param A01 Input - matrix component
211 \param A11 Input - matrix component
212 \param A21 Input - matrix component
213 \param A02 Input - matrix component
214 \param A12 Input - matrix component
215 \param A22 Input - matrix component
216 \param V0 Output - vector component
217 \param V1 Output - vector component
218 \param V2 Output - vector component
219 */
220 inline
221 void vectorInKernel3__nonZeroA00(const double A00,const double A10,const double A20,
222 const double A01,const double A11,const double A21,
223 const double A02,const double A12,const double A22,
224 double* V0,double* V1,double* V2)
225 {
226 double TEMP0,TEMP1;
227 register const double I00=1./A00;
228 register const double IA10=I00*A10;
229 register const double IA20=I00*A20;
230 vectorInKernel2(A11-IA10*A01,A12-IA10*A02,
231 A21-IA20*A01,A22-IA20*A02,&TEMP0,&TEMP1);
232 *V0=-(A10*TEMP0+A20*TEMP1);
233 *V1=A00*TEMP0;
234 *V2=A00*TEMP1;
235 }
236
237 /**
238 \brief
239 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
240 ordered by increasing value and eigen vectors are normalizeVector3d such that
241 length is zero and first non-zero component is positive.
242
243 \param A00 Input - A_00
244 \param A01 Input - A_01
245 \param A11 Input - A_11
246 \param ev0 Output - smallest eigenvalue
247 \param ev1 Output - eigenvalue
248 \param V00 Output - eigenvector componenent coresponding to ev0
249 \param V10 Output - eigenvector componenent coresponding to ev0
250 \param V01 Output - eigenvector componenent coresponding to ev1
251 \param V11 Output - eigenvector componenent coresponding to ev1
252 \param tol Input - tolerance to identify to eigenvalues
253 */
254 inline
255 void eigenvalues_and_eigenvectors2(const double A00,const double A01,const double A11,
256 double* ev0, double* ev1,
257 double* V00, double* V10, double* V01, double* V11,
258 const double tol)
259 {
260 double TEMP0,TEMP1;
261 eigenvalues2(A00,A01,A11,ev0,ev1);
262 const register double absev0=fabs(*ev0);
263 const register double absev1=fabs(*ev1);
264 register double max_ev=absev0>absev1 ? absev0 : absev1;
265 if (fabs((*ev0)-(*ev1))<tol*max_ev) {
266 *V00=1.;
267 *V10=0.;
268 *V01=0.;
269 *V11=1.;
270 } else {
271 vectorInKernel2(A00-(*ev0),A01,A01,A11-(*ev0),&TEMP0,&TEMP1);
272 const register double scale=1./sqrt(TEMP0*TEMP0+TEMP1*TEMP1);
273 if (TEMP0<0.) {
274 *V00=-TEMP0*scale;
275 *V10=-TEMP1*scale;
276 if (TEMP1<0.) {
277 *V01= *V10;
278 *V11=-(*V00);
279 } else {
280 *V01=-(*V10);
281 *V11= (*V10);
282 }
283 } else if (TEMP0>0.) {
284 *V00=TEMP0*scale;
285 *V10=TEMP1*scale;
286 if (TEMP1<0.) {
287 *V01=-(*V10);
288 *V11= (*V00);
289 } else {
290 *V01= (*V10);
291 *V11=-(*V00);
292 }
293 } else {
294 *V00=0.;
295 *V10=1;
296 *V11=0.;
297 *V01=1.;
298 }
299 }
300 }
301 /**
302 \brief
303 nomalizes a 3-d vector such that length is one and first non-zero component is positive.
304
305 \param V0 - vector componenent
306 \param V1 - vector componenent
307 \param V2 - vector componenent
308 */
309 inline
310 void normalizeVector3(double* V0,double* V1,double* V2)
311 {
312 register double s;
313 if (*V0>0) {
314 s=1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
315 *V0*=s;
316 *V1*=s;
317 *V2*=s;
318 } else if (*V0<0) {
319 s=-1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
320 *V0*=s;
321 *V1*=s;
322 *V2*=s;
323 } else {
324 if (*V1>0) {
325 s=1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
326 *V1*=s;
327 *V2*=s;
328 } else if (*V1<0) {
329 s=-1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
330 *V1*=s;
331 *V2*=s;
332 } else {
333 *V2=1.;
334 }
335 }
336 }
337 /**
338 \brief
339 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
340 ordered by increasing value and eigen vectors are normalizeVector3d such that
341 length is zero and first non-zero component is positive.
342
343 \param A00 Input - A_00
344 \param A01 Input - A_01
345 \param A02 Input - A_02
346 \param A11 Input - A_11
347 \param A12 Input - A_12
348 \param A22 Input - A_22
349 \param ev0 Output - smallest eigenvalue
350 \param ev1 Output - eigenvalue
351 \param ev2 Output -
352 \param V00 Output - eigenvector componenent coresponding to ev0
353 \param V10 Output - eigenvector componenent coresponding to ev0
354 \param V20 Output -
355 \param V01 Output - eigenvector componenent coresponding to ev1
356 \param V11 Output - eigenvector componenent coresponding to ev1
357 \param V21 Output -
358 \param V02 Output -
359 \param V12 Output -
360 \param V22 Output -
361 \param tol Input - tolerance to identify to eigenvalues
362 */
363 inline
364 void eigenvalues_and_eigenvectors3(const double A00, const double A01, const double A02,
365 const double A11, const double A12, const double A22,
366 double* ev0, double* ev1, double* ev2,
367 double* V00, double* V10, double* V20,
368 double* V01, double* V11, double* V21,
369 double* V02, double* V12, double* V22,
370 const double tol)
371 {
372 register const double absA01=fabs(A01);
373 register const double absA02=fabs(A02);
374 register const double m=absA01>absA02 ? absA01 : absA02;
375 if (m<=0) {
376 double TEMP_V00,TEMP_V10,TEMP_V01,TEMP_V11,TEMP_EV0,TEMP_EV1;
377 eigenvalues_and_eigenvectors2(A11,A12,A22,
378 &TEMP_EV0,&TEMP_EV1,
379 &TEMP_V00,&TEMP_V10,&TEMP_V01,&TEMP_V11,tol);
380 if (A00<=TEMP_EV0) {
381 *V00=1.;
382 *V10=0.;
383 *V20=0.;
384 *V01=0.;
385 *V11=TEMP_V00;
386 *V21=TEMP_V10;
387 *V02=0.;
388 *V12=TEMP_V01;
389 *V22=TEMP_V11;
390 *ev0=A00;
391 *ev1=TEMP_EV0;
392 *ev2=TEMP_EV1;
393 } else if (A00>TEMP_EV1) {
394 *V02=1.;
395 *V12=0.;
396 *V22=0.;
397 *V00=0.;
398 *V10=TEMP_V00;
399 *V20=TEMP_V10;
400 *V01=0.;
401 *V11=TEMP_V01;
402 *V21=TEMP_V11;
403 *ev0=TEMP_EV0;
404 *ev1=TEMP_EV1;
405 *ev2=A00;
406 } else {
407 *V01=1.;
408 *V11=0.;
409 *V21=0.;
410 *V00=0.;
411 *V10=TEMP_V00;
412 *V20=TEMP_V10;
413 *V02=0.;
414 *V12=TEMP_V01;
415 *V22=TEMP_V11;
416 *ev0=TEMP_EV0;
417 *ev1=A00;
418 *ev2=TEMP_EV1;
419 }
420 } else {
421 eigenvalues3(A00,A01,A02,A11,A12,A22,ev0,ev1,ev2);
422 const register double absev0=fabs(*ev0);
423 const register double absev1=fabs(*ev1);
424 const register double absev2=fabs(*ev2);
425 register double max_ev=absev0>absev1 ? absev0 : absev1;
426 max_ev=max_ev>absev2 ? max_ev : absev2;
427 const register double d_01=fabs((*ev0)-(*ev1));
428 const register double d_12=fabs((*ev1)-(*ev2));
429 const register double max_d=d_01>d_12 ? d_01 : d_12;
430 if (max_d<=tol*max_ev) {
431 *V00=1.;
432 *V10=0;
433 *V20=0;
434 *V01=0;
435 *V11=1.;
436 *V21=0;
437 *V02=0;
438 *V12=0;
439 *V22=1.;
440 } else {
441 const register double S00=A00-(*ev0);
442 const register double absS00=fabs(S00);
443 if (absS00>m) {
444 vectorInKernel3__nonZeroA00(S00,A01,A02,A01,A11-(*ev0),A12,A02,A12,A22-(*ev0),V00,V10,V20);
445 } else if (absA02<m) {
446 vectorInKernel3__nonZeroA00(A01,A11-(*ev0),A12,S00,A01,A02,A02,A12,A22-(*ev0),V00,V10,V20);
447 } else {
448 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev0),S00,A01,A02,A01,A11-(*ev0),A12,V00,V10,V20);
449 }
450 normalizeVector3(V00,V10,V20);;
451 const register double T00=A00-(*ev2);
452 const register double absT00=fabs(T00);
453 if (absT00>m) {
454 vectorInKernel3__nonZeroA00(T00,A01,A02,A01,A11-(*ev2),A12,A02,A12,A22-(*ev2),V02,V12,V22);
455 } else if (absA02<m) {
456 vectorInKernel3__nonZeroA00(A01,A11-(*ev2),A12,T00,A01,A02,A02,A12,A22-(*ev2),V02,V12,V22);
457 } else {
458 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev2),T00,A01,A02,A01,A11-(*ev2),A12,V02,V12,V22);
459 }
460 const register double dot=(*V02)*(*V00)+(*V12)*(*V10)+(*V22)*(*V20);
461 *V02-=dot*(*V00);
462 *V12-=dot*(*V10);
463 *V22-=dot*(*V20);
464 normalizeVector3(V02,V12,V22);
465 *V01=(*V10)*(*V22)-(*V12)*(*V20);
466 *V11=(*V20)*(*V02)-(*V00)*(*V22);
467 *V21=(*V00)*(*V12)-(*V02)*(*V10);
468 normalizeVector3(V01,V11,V21);
469 }
470 }
471 }
472
473 // General tensor product: arg_2(SL x SR) = arg_0(SL x SM) * arg_1(SM x SR)
474 // SM is the product of the last axis_offset entries in arg_0.getShape().
475 inline
476 void matrix_matrix_product(const int SL, const int SM, const int SR, const double* A, const double* B, double* C, int transpose)
477 {
478 if (transpose == 0) {
479 for (int i=0; i<SL; i++) {
480 for (int j=0; j<SR; j++) {
481 double sum = 0.0;
482 for (int l=0; l<SM; l++) {
483 sum += A[i+SL*l] * B[l+SM*j];
484 }
485 C[i+SL*j] = sum;
486 }
487 }
488 }
489 else if (transpose == 1) {
490 for (int i=0; i<SL; i++) {
491 for (int j=0; j<SR; j++) {
492 double sum = 0.0;
493 for (int l=0; l<SM; l++) {
494 sum += A[i*SM+l] * B[l+SM*j];
495 }
496 C[i+SL*j] = sum;
497 }
498 }
499 }
500 else if (transpose == 2) {
501 for (int i=0; i<SL; i++) {
502 for (int j=0; j<SR; j++) {
503 double sum = 0.0;
504 for (int l=0; l<SM; l++) {
505 sum += A[i+SL*l] * B[l*SR+j];
506 }
507 C[i+SL*j] = sum;
508 }
509 }
510 }
511 }
512
513 template <typename UnaryFunction>
514 inline void tensor_unary_operation(const int size,
515 const double *arg1,
516 double * argRes,
517 UnaryFunction operation)
518 {
519 for (int i = 0; i < size; ++i) {
520 argRes[i] = operation(arg1[i]);
521 }
522 return;
523 }
524
525 template <typename BinaryFunction>
526 inline void tensor_binary_operation(const int size,
527 const double *arg1,
528 const double *arg2,
529 double * argRes,
530 BinaryFunction operation)
531 {
532 for (int i = 0; i < size; ++i) {
533 argRes[i] = operation(arg1[i], arg2[i]);
534 }
535 return;
536 }
537
538 template <typename BinaryFunction>
539 inline void tensor_binary_operation(const int size,
540 double arg1,
541 const double *arg2,
542 double *argRes,
543 BinaryFunction operation)
544 {
545 for (int i = 0; i < size; ++i) {
546 argRes[i] = operation(arg1, arg2[i]);
547 }
548 return;
549 }
550
551 template <typename BinaryFunction>
552 inline void tensor_binary_operation(const int size,
553 const double *arg1,
554 double arg2,
555 double *argRes,
556 BinaryFunction operation)
557 {
558 for (int i = 0; i < size; ++i) {
559 argRes[i] = operation(arg1[i], arg2);
560 }
561 return;
562 }
563
564 } // end of namespace
565 #endif

  ViewVC Help
Powered by ViewVC 1.1.26