/[escript]/trunk/escript/src/LocalOps.h
ViewVC logotype

Contents of /trunk/escript/src/LocalOps.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4154 - (show annotations)
Tue Jan 22 09:30:23 2013 UTC (6 years, 9 months ago) by jfenwick
File MIME type: text/plain
File size: 16951 byte(s)
Round 1 of copyright fixes
1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15
16
17 #if !defined escript_LocalOps_H
18 #define escript_LocalOps_H
19 #if defined(_WIN32) && defined(__INTEL_COMPILER)
20 # include <mathimf.h>
21 #else
22 # include <math.h>
23 #endif
24 #ifndef M_PI
25 # define M_PI 3.14159265358979323846 /* pi */
26 #endif
27
28
29 /**
30 \file LocalOps.h
31 \brief Describes binary operations performed on double*.
32
33 For operations on DataAbstract see BinaryOp.h.
34 For operations on DataVector see DataMaths.h.
35 */
36
37 namespace escript {
38
39 /**
40 \brief acts as a wrapper to isnan.
41 \warning if compiler does not support FP_NAN this function will always return false.
42 */
43 inline
44 bool nancheck(double d)
45 {
46 // Q: so why not just test d!=d?
47 // A: Coz it doesn't always work [I've checked].
48 // One theory is that the optimizer skips the test.
49 #ifdef isnan
50 return isnan(d);
51 #elif defined _isnan
52 return _isnan(d);
53 #else
54 return false;
55 #endif
56 }
57
58 /**
59 \brief returns a NaN.
60 \warning Should probably only used where you know you can test for NaNs
61 */
62 inline
63 double makeNaN()
64 {
65 #ifdef nan
66 return nan("");
67 #else
68 return sqrt(-1.);
69 #endif
70
71 }
72
73
74 /**
75 \brief
76 solves a 1x1 eigenvalue A*V=ev*V problem
77
78 \param A00 Input - A_00
79 \param ev0 Output - eigenvalue
80 */
81 inline
82 void eigenvalues1(const double A00,double* ev0) {
83
84 *ev0=A00;
85
86 }
87 /**
88 \brief
89 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A
90
91 \param A00 Input - A_00
92 \param A01 Input - A_01
93 \param A11 Input - A_11
94 \param ev0 Output - smallest eigenvalue
95 \param ev1 Output - largest eigenvalue
96 */
97 inline
98 void eigenvalues2(const double A00,const double A01,const double A11,
99 double* ev0, double* ev1) {
100 const register double trA=(A00+A11)/2.;
101 const register double A_00=A00-trA;
102 const register double A_11=A11-trA;
103 const register double s=sqrt(A01*A01-A_00*A_11);
104 *ev0=trA-s;
105 *ev1=trA+s;
106 }
107 /**
108 \brief
109 solves a 3x3 eigenvalue A*V=ev*V problem for symmetric A
110
111 \param A00 Input - A_00
112 \param A01 Input - A_01
113 \param A02 Input - A_02
114 \param A11 Input - A_11
115 \param A12 Input - A_12
116 \param A22 Input - A_22
117 \param ev0 Output - smallest eigenvalue
118 \param ev1 Output - eigenvalue
119 \param ev2 Output - largest eigenvalue
120 */
121 inline
122 void eigenvalues3(const double A00, const double A01, const double A02,
123 const double A11, const double A12,
124 const double A22,
125 double* ev0, double* ev1,double* ev2) {
126
127 const register double trA=(A00+A11+A22)/3.;
128 const register double A_00=A00-trA;
129 const register double A_11=A11-trA;
130 const register double A_22=A22-trA;
131 const register double A01_2=A01*A01;
132 const register double A02_2=A02*A02;
133 const register double A12_2=A12*A12;
134 const register double p=A02_2+A12_2+A01_2+(A_00*A_00+A_11*A_11+A_22*A_22)/2.;
135 if (p<=0.) {
136 *ev2=trA;
137 *ev1=trA;
138 *ev0=trA;
139
140 } else {
141 const register double q=(A02_2*A_11+A12_2*A_00+A01_2*A_22)-(A_00*A_11*A_22+2*A01*A12*A02);
142 const register double sq_p=sqrt(p/3.);
143 register double z=-q/(2*pow(sq_p,3));
144 if (z<-1.) {
145 z=-1.;
146 } else if (z>1.) {
147 z=1.;
148 }
149 const register double alpha_3=acos(z)/3.;
150 *ev2=trA+2.*sq_p*cos(alpha_3);
151 *ev1=trA-2.*sq_p*cos(alpha_3+M_PI/3.);
152 *ev0=trA-2.*sq_p*cos(alpha_3-M_PI/3.);
153 }
154 }
155 /**
156 \brief
157 solves a 1x1 eigenvalue A*V=ev*V problem for symmetric A
158
159 \param A00 Input - A_00
160 \param ev0 Output - eigenvalue
161 \param V00 Output - eigenvector
162 \param tol Input - tolerance to identify to eigenvalues
163 */
164 inline
165 void eigenvalues_and_eigenvectors1(const double A00,double* ev0,double* V00,const double tol)
166 {
167 eigenvalues1(A00,ev0);
168 *V00=1.;
169 return;
170 }
171 /**
172 \brief
173 returns a non-zero vector in the kernel of [[A00,A01],[A01,A11]] assuming that the kernel dimension is at least 1.
174
175 \param A00 Input - matrix component
176 \param A10 Input - matrix component
177 \param A01 Input - matrix component
178 \param A11 Input - matrix component
179 \param V0 Output - vector component
180 \param V1 Output - vector component
181 */
182 inline
183 void vectorInKernel2(const double A00,const double A10,const double A01,const double A11,
184 double* V0, double*V1)
185 {
186 register double absA00=fabs(A00);
187 register double absA10=fabs(A10);
188 register double absA01=fabs(A01);
189 register double absA11=fabs(A11);
190 register double m=absA11>absA10 ? absA11 : absA10;
191 if (absA00>m || absA01>m) {
192 *V0=-A01;
193 *V1=A00;
194 } else {
195 if (m<=0) {
196 *V0=1.;
197 *V1=0.;
198 } else {
199 *V0=A11;
200 *V1=-A10;
201 }
202 }
203 }
204 /**
205 \brief
206 returns a non-zero vector in the kernel of [[A00,A01,A02],[A10,A11,A12],[A20,A21,A22]]
207 assuming that the kernel dimension is at least 1 and A00 is non zero.
208
209 \param A00 Input - matrix component
210 \param A10 Input - matrix component
211 \param A20 Input - matrix component
212 \param A01 Input - matrix component
213 \param A11 Input - matrix component
214 \param A21 Input - matrix component
215 \param A02 Input - matrix component
216 \param A12 Input - matrix component
217 \param A22 Input - matrix component
218 \param V0 Output - vector component
219 \param V1 Output - vector component
220 \param V2 Output - vector component
221 */
222 inline
223 void vectorInKernel3__nonZeroA00(const double A00,const double A10,const double A20,
224 const double A01,const double A11,const double A21,
225 const double A02,const double A12,const double A22,
226 double* V0,double* V1,double* V2)
227 {
228 double TEMP0,TEMP1;
229 register const double I00=1./A00;
230 register const double IA10=I00*A10;
231 register const double IA20=I00*A20;
232 vectorInKernel2(A11-IA10*A01,A12-IA10*A02,
233 A21-IA20*A01,A22-IA20*A02,&TEMP0,&TEMP1);
234 *V0=-(A10*TEMP0+A20*TEMP1);
235 *V1=A00*TEMP0;
236 *V2=A00*TEMP1;
237 }
238
239 /**
240 \brief
241 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
242 ordered by increasing value and eigen vectors are normalizeVector3d such that
243 length is zero and first non-zero component is positive.
244
245 \param A00 Input - A_00
246 \param A01 Input - A_01
247 \param A11 Input - A_11
248 \param ev0 Output - smallest eigenvalue
249 \param ev1 Output - eigenvalue
250 \param V00 Output - eigenvector componenent coresponding to ev0
251 \param V10 Output - eigenvector componenent coresponding to ev0
252 \param V01 Output - eigenvector componenent coresponding to ev1
253 \param V11 Output - eigenvector componenent coresponding to ev1
254 \param tol Input - tolerance to identify to eigenvalues
255 */
256 inline
257 void eigenvalues_and_eigenvectors2(const double A00,const double A01,const double A11,
258 double* ev0, double* ev1,
259 double* V00, double* V10, double* V01, double* V11,
260 const double tol)
261 {
262 double TEMP0,TEMP1;
263 eigenvalues2(A00,A01,A11,ev0,ev1);
264 const register double absev0=fabs(*ev0);
265 const register double absev1=fabs(*ev1);
266 register double max_ev=absev0>absev1 ? absev0 : absev1;
267 if (fabs((*ev0)-(*ev1))<tol*max_ev) {
268 *V00=1.;
269 *V10=0.;
270 *V01=0.;
271 *V11=1.;
272 } else {
273 vectorInKernel2(A00-(*ev0),A01,A01,A11-(*ev0),&TEMP0,&TEMP1);
274 const register double scale=1./sqrt(TEMP0*TEMP0+TEMP1*TEMP1);
275 if (TEMP0<0.) {
276 *V00=-TEMP0*scale;
277 *V10=-TEMP1*scale;
278 if (TEMP1<0.) {
279 *V01= *V10;
280 *V11=-(*V00);
281 } else {
282 *V01=-(*V10);
283 *V11= (*V00);
284 }
285 } else if (TEMP0>0.) {
286 *V00=TEMP0*scale;
287 *V10=TEMP1*scale;
288 if (TEMP1<0.) {
289 *V01=-(*V10);
290 *V11= (*V00);
291 } else {
292 *V01= (*V10);
293 *V11=-(*V00);
294 }
295 } else {
296 *V00=0.;
297 *V10=1;
298 *V11=0.;
299 *V01=1.;
300 }
301 }
302 }
303 /**
304 \brief
305 nomalizes a 3-d vector such that length is one and first non-zero component is positive.
306
307 \param V0 - vector componenent
308 \param V1 - vector componenent
309 \param V2 - vector componenent
310 */
311 inline
312 void normalizeVector3(double* V0,double* V1,double* V2)
313 {
314 register double s;
315 if (*V0>0) {
316 s=1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
317 *V0*=s;
318 *V1*=s;
319 *V2*=s;
320 } else if (*V0<0) {
321 s=-1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
322 *V0*=s;
323 *V1*=s;
324 *V2*=s;
325 } else {
326 if (*V1>0) {
327 s=1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
328 *V1*=s;
329 *V2*=s;
330 } else if (*V1<0) {
331 s=-1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
332 *V1*=s;
333 *V2*=s;
334 } else {
335 *V2=1.;
336 }
337 }
338 }
339 /**
340 \brief
341 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
342 ordered by increasing value and eigen vectors are normalizeVector3d such that
343 length is zero and first non-zero component is positive.
344
345 \param A00 Input - A_00
346 \param A01 Input - A_01
347 \param A02 Input - A_02
348 \param A11 Input - A_11
349 \param A12 Input - A_12
350 \param A22 Input - A_22
351 \param ev0 Output - smallest eigenvalue
352 \param ev1 Output - eigenvalue
353 \param ev2 Output -
354 \param V00 Output - eigenvector componenent coresponding to ev0
355 \param V10 Output - eigenvector componenent coresponding to ev0
356 \param V20 Output -
357 \param V01 Output - eigenvector componenent coresponding to ev1
358 \param V11 Output - eigenvector componenent coresponding to ev1
359 \param V21 Output -
360 \param V02 Output -
361 \param V12 Output -
362 \param V22 Output -
363 \param tol Input - tolerance to identify to eigenvalues
364 */
365 inline
366 void eigenvalues_and_eigenvectors3(const double A00, const double A01, const double A02,
367 const double A11, const double A12, const double A22,
368 double* ev0, double* ev1, double* ev2,
369 double* V00, double* V10, double* V20,
370 double* V01, double* V11, double* V21,
371 double* V02, double* V12, double* V22,
372 const double tol)
373 {
374 register const double absA01=fabs(A01);
375 register const double absA02=fabs(A02);
376 register const double m=absA01>absA02 ? absA01 : absA02;
377 if (m<=0) {
378 double TEMP_V00,TEMP_V10,TEMP_V01,TEMP_V11,TEMP_EV0,TEMP_EV1;
379 eigenvalues_and_eigenvectors2(A11,A12,A22,
380 &TEMP_EV0,&TEMP_EV1,
381 &TEMP_V00,&TEMP_V10,&TEMP_V01,&TEMP_V11,tol);
382 if (A00<=TEMP_EV0) {
383 *V00=1.;
384 *V10=0.;
385 *V20=0.;
386 *V01=0.;
387 *V11=TEMP_V00;
388 *V21=TEMP_V10;
389 *V02=0.;
390 *V12=TEMP_V01;
391 *V22=TEMP_V11;
392 *ev0=A00;
393 *ev1=TEMP_EV0;
394 *ev2=TEMP_EV1;
395 } else if (A00>TEMP_EV1) {
396 *V02=1.;
397 *V12=0.;
398 *V22=0.;
399 *V00=0.;
400 *V10=TEMP_V00;
401 *V20=TEMP_V10;
402 *V01=0.;
403 *V11=TEMP_V01;
404 *V21=TEMP_V11;
405 *ev0=TEMP_EV0;
406 *ev1=TEMP_EV1;
407 *ev2=A00;
408 } else {
409 *V01=1.;
410 *V11=0.;
411 *V21=0.;
412 *V00=0.;
413 *V10=TEMP_V00;
414 *V20=TEMP_V10;
415 *V02=0.;
416 *V12=TEMP_V01;
417 *V22=TEMP_V11;
418 *ev0=TEMP_EV0;
419 *ev1=A00;
420 *ev2=TEMP_EV1;
421 }
422 } else {
423 eigenvalues3(A00,A01,A02,A11,A12,A22,ev0,ev1,ev2);
424 const register double absev0=fabs(*ev0);
425 const register double absev1=fabs(*ev1);
426 const register double absev2=fabs(*ev2);
427 register double max_ev=absev0>absev1 ? absev0 : absev1;
428 max_ev=max_ev>absev2 ? max_ev : absev2;
429 const register double d_01=fabs((*ev0)-(*ev1));
430 const register double d_12=fabs((*ev1)-(*ev2));
431 const register double max_d=d_01>d_12 ? d_01 : d_12;
432 if (max_d<=tol*max_ev) {
433 *V00=1.;
434 *V10=0;
435 *V20=0;
436 *V01=0;
437 *V11=1.;
438 *V21=0;
439 *V02=0;
440 *V12=0;
441 *V22=1.;
442 } else {
443 const register double S00=A00-(*ev0);
444 const register double absS00=fabs(S00);
445 if (absS00>m) {
446 vectorInKernel3__nonZeroA00(S00,A01,A02,A01,A11-(*ev0),A12,A02,A12,A22-(*ev0),V00,V10,V20);
447 } else if (absA02<m) {
448 vectorInKernel3__nonZeroA00(A01,A11-(*ev0),A12,S00,A01,A02,A02,A12,A22-(*ev0),V00,V10,V20);
449 } else {
450 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev0),S00,A01,A02,A01,A11-(*ev0),A12,V00,V10,V20);
451 }
452 normalizeVector3(V00,V10,V20);;
453 const register double T00=A00-(*ev2);
454 const register double absT00=fabs(T00);
455 if (absT00>m) {
456 vectorInKernel3__nonZeroA00(T00,A01,A02,A01,A11-(*ev2),A12,A02,A12,A22-(*ev2),V02,V12,V22);
457 } else if (absA02<m) {
458 vectorInKernel3__nonZeroA00(A01,A11-(*ev2),A12,T00,A01,A02,A02,A12,A22-(*ev2),V02,V12,V22);
459 } else {
460 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev2),T00,A01,A02,A01,A11-(*ev2),A12,V02,V12,V22);
461 }
462 const register double dot=(*V02)*(*V00)+(*V12)*(*V10)+(*V22)*(*V20);
463 *V02-=dot*(*V00);
464 *V12-=dot*(*V10);
465 *V22-=dot*(*V20);
466 normalizeVector3(V02,V12,V22);
467 *V01=(*V10)*(*V22)-(*V12)*(*V20);
468 *V11=(*V20)*(*V02)-(*V00)*(*V22);
469 *V21=(*V00)*(*V12)-(*V02)*(*V10);
470 normalizeVector3(V01,V11,V21);
471 }
472 }
473 }
474
475 // General tensor product: arg_2(SL x SR) = arg_0(SL x SM) * arg_1(SM x SR)
476 // SM is the product of the last axis_offset entries in arg_0.getShape().
477 inline
478 void matrix_matrix_product(const int SL, const int SM, const int SR, const double* A, const double* B, double* C, int transpose)
479 {
480 if (transpose == 0) {
481 for (int i=0; i<SL; i++) {
482 for (int j=0; j<SR; j++) {
483 double sum = 0.0;
484 for (int l=0; l<SM; l++) {
485 sum += A[i+SL*l] * B[l+SM*j];
486 }
487 C[i+SL*j] = sum;
488 }
489 }
490 }
491 else if (transpose == 1) {
492 for (int i=0; i<SL; i++) {
493 for (int j=0; j<SR; j++) {
494 double sum = 0.0;
495 for (int l=0; l<SM; l++) {
496 sum += A[i*SM+l] * B[l+SM*j];
497 }
498 C[i+SL*j] = sum;
499 }
500 }
501 }
502 else if (transpose == 2) {
503 for (int i=0; i<SL; i++) {
504 for (int j=0; j<SR; j++) {
505 double sum = 0.0;
506 for (int l=0; l<SM; l++) {
507 sum += A[i+SL*l] * B[l*SR+j];
508 }
509 C[i+SL*j] = sum;
510 }
511 }
512 }
513 }
514
515 template <typename UnaryFunction>
516 inline void tensor_unary_operation(const int size,
517 const double *arg1,
518 double * argRes,
519 UnaryFunction operation)
520 {
521 for (int i = 0; i < size; ++i) {
522 argRes[i] = operation(arg1[i]);
523 }
524 return;
525 }
526
527 template <typename BinaryFunction>
528 inline void tensor_binary_operation(const int size,
529 const double *arg1,
530 const double *arg2,
531 double * argRes,
532 BinaryFunction operation)
533 {
534 for (int i = 0; i < size; ++i) {
535 argRes[i] = operation(arg1[i], arg2[i]);
536 }
537 return;
538 }
539
540 template <typename BinaryFunction>
541 inline void tensor_binary_operation(const int size,
542 double arg1,
543 const double *arg2,
544 double *argRes,
545 BinaryFunction operation)
546 {
547 for (int i = 0; i < size; ++i) {
548 argRes[i] = operation(arg1, arg2[i]);
549 }
550 return;
551 }
552
553 template <typename BinaryFunction>
554 inline void tensor_binary_operation(const int size,
555 const double *arg1,
556 double arg2,
557 double *argRes,
558 BinaryFunction operation)
559 {
560 for (int i = 0; i < size; ++i) {
561 argRes[i] = operation(arg1[i], arg2);
562 }
563 return;
564 }
565
566 } // end of namespace
567 #endif

  ViewVC Help
Powered by ViewVC 1.1.26