/[escript]/branches/subworld2/escriptcore/src/LocalOps.h
ViewVC logotype

Contents of /branches/subworld2/escriptcore/src/LocalOps.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5504 - (show annotations)
Wed Mar 4 22:58:13 2015 UTC (4 years, 1 month ago) by jfenwick
File MIME type: text/plain
File size: 16963 byte(s)
Again with a more up to date copy


1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2015 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17
18 #if !defined escript_LocalOps_H
19 #define escript_LocalOps_H
20 #include <cmath>
21 #ifndef M_PI
22 # define M_PI 3.14159265358979323846 /* pi */
23 #endif
24
25
26 /**
27 \file LocalOps.h
28 \brief Describes binary operations performed on double*.
29
30 For operations on DataAbstract see BinaryOp.h.
31 For operations on DataVector see DataMaths.h.
32 */
33
34 namespace escript {
35
36 /**
37 \brief acts as a wrapper to isnan.
38 \warning if compiler does not support FP_NAN this function will always return false.
39 */
40 inline
41 bool nancheck(double d)
42 {
43 using namespace std;
44 // Q: so why not just test d!=d?
45 // A: Coz it doesn't always work [I've checked].
46 // One theory is that the optimizer skips the test.
47 #if defined _isnan
48 return _isnan(d);
49 #else
50 return isnan(d); // isNan should be a function in C++ land
51 #endif
52 }
53
54 /**
55 \brief returns a NaN.
56 \warning Should probably only used where you know you can test for NaNs
57 */
58 inline
59 double makeNaN()
60 {
61 #ifdef nan
62 return nan("");
63 #else
64 return sqrt(-1.);
65 #endif
66
67 }
68
69
70 /**
71 \brief
72 solves a 1x1 eigenvalue A*V=ev*V problem
73
74 \param A00 Input - A_00
75 \param ev0 Output - eigenvalue
76 */
77 inline
78 void eigenvalues1(const double A00,double* ev0) {
79
80 *ev0=A00;
81
82 }
83 /**
84 \brief
85 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A
86
87 \param A00 Input - A_00
88 \param A01 Input - A_01
89 \param A11 Input - A_11
90 \param ev0 Output - smallest eigenvalue
91 \param ev1 Output - largest eigenvalue
92 */
93 inline
94 void eigenvalues2(const double A00,const double A01,const double A11,
95 double* ev0, double* ev1) {
96 const register double trA=(A00+A11)/2.;
97 const register double A_00=A00-trA;
98 const register double A_11=A11-trA;
99 const register double s=sqrt(A01*A01-A_00*A_11);
100 *ev0=trA-s;
101 *ev1=trA+s;
102 }
103 /**
104 \brief
105 solves a 3x3 eigenvalue A*V=ev*V problem for symmetric A
106
107 \param A00 Input - A_00
108 \param A01 Input - A_01
109 \param A02 Input - A_02
110 \param A11 Input - A_11
111 \param A12 Input - A_12
112 \param A22 Input - A_22
113 \param ev0 Output - smallest eigenvalue
114 \param ev1 Output - eigenvalue
115 \param ev2 Output - largest eigenvalue
116 */
117 inline
118 void eigenvalues3(const double A00, const double A01, const double A02,
119 const double A11, const double A12,
120 const double A22,
121 double* ev0, double* ev1,double* ev2) {
122
123 const register double trA=(A00+A11+A22)/3.;
124 const register double A_00=A00-trA;
125 const register double A_11=A11-trA;
126 const register double A_22=A22-trA;
127 const register double A01_2=A01*A01;
128 const register double A02_2=A02*A02;
129 const register double A12_2=A12*A12;
130 const register double p=A02_2+A12_2+A01_2+(A_00*A_00+A_11*A_11+A_22*A_22)/2.;
131 if (p<=0.) {
132 *ev2=trA;
133 *ev1=trA;
134 *ev0=trA;
135
136 } else {
137 const register double q=(A02_2*A_11+A12_2*A_00+A01_2*A_22)-(A_00*A_11*A_22+2*A01*A12*A02);
138 const register double sq_p=sqrt(p/3.);
139 register double z=-q/(2*pow(sq_p,3));
140 if (z<-1.) {
141 z=-1.;
142 } else if (z>1.) {
143 z=1.;
144 }
145 const register double alpha_3=acos(z)/3.;
146 *ev2=trA+2.*sq_p*cos(alpha_3);
147 *ev1=trA-2.*sq_p*cos(alpha_3+M_PI/3.);
148 *ev0=trA-2.*sq_p*cos(alpha_3-M_PI/3.);
149 }
150 }
151 /**
152 \brief
153 solves a 1x1 eigenvalue A*V=ev*V problem for symmetric A
154
155 \param A00 Input - A_00
156 \param ev0 Output - eigenvalue
157 \param V00 Output - eigenvector
158 \param tol Input - tolerance to identify to eigenvalues
159 */
160 inline
161 void eigenvalues_and_eigenvectors1(const double A00,double* ev0,double* V00,const double tol)
162 {
163 eigenvalues1(A00,ev0);
164 *V00=1.;
165 return;
166 }
167 /**
168 \brief
169 returns a non-zero vector in the kernel of [[A00,A01],[A01,A11]] assuming that the kernel dimension is at least 1.
170
171 \param A00 Input - matrix component
172 \param A10 Input - matrix component
173 \param A01 Input - matrix component
174 \param A11 Input - matrix component
175 \param V0 Output - vector component
176 \param V1 Output - vector component
177 */
178 inline
179 void vectorInKernel2(const double A00,const double A10,const double A01,const double A11,
180 double* V0, double*V1)
181 {
182 register double absA00=fabs(A00);
183 register double absA10=fabs(A10);
184 register double absA01=fabs(A01);
185 register double absA11=fabs(A11);
186 register double m=absA11>absA10 ? absA11 : absA10;
187 if (absA00>m || absA01>m) {
188 *V0=-A01;
189 *V1=A00;
190 } else {
191 if (m<=0) {
192 *V0=1.;
193 *V1=0.;
194 } else {
195 *V0=A11;
196 *V1=-A10;
197 }
198 }
199 }
200 /**
201 \brief
202 returns a non-zero vector in the kernel of [[A00,A01,A02],[A10,A11,A12],[A20,A21,A22]]
203 assuming that the kernel dimension is at least 1 and A00 is non zero.
204
205 \param A00 Input - matrix component
206 \param A10 Input - matrix component
207 \param A20 Input - matrix component
208 \param A01 Input - matrix component
209 \param A11 Input - matrix component
210 \param A21 Input - matrix component
211 \param A02 Input - matrix component
212 \param A12 Input - matrix component
213 \param A22 Input - matrix component
214 \param V0 Output - vector component
215 \param V1 Output - vector component
216 \param V2 Output - vector component
217 */
218 inline
219 void vectorInKernel3__nonZeroA00(const double A00,const double A10,const double A20,
220 const double A01,const double A11,const double A21,
221 const double A02,const double A12,const double A22,
222 double* V0,double* V1,double* V2)
223 {
224 double TEMP0,TEMP1;
225 register const double I00=1./A00;
226 register const double IA10=I00*A10;
227 register const double IA20=I00*A20;
228 vectorInKernel2(A11-IA10*A01,A12-IA10*A02,
229 A21-IA20*A01,A22-IA20*A02,&TEMP0,&TEMP1);
230 *V0=-(A10*TEMP0+A20*TEMP1);
231 *V1=A00*TEMP0;
232 *V2=A00*TEMP1;
233 }
234
235 /**
236 \brief
237 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
238 ordered by increasing value and eigen vectors are normalizeVector3d such that
239 length is zero and first non-zero component is positive.
240
241 \param A00 Input - A_00
242 \param A01 Input - A_01
243 \param A11 Input - A_11
244 \param ev0 Output - smallest eigenvalue
245 \param ev1 Output - eigenvalue
246 \param V00 Output - eigenvector componenent coresponding to ev0
247 \param V10 Output - eigenvector componenent coresponding to ev0
248 \param V01 Output - eigenvector componenent coresponding to ev1
249 \param V11 Output - eigenvector componenent coresponding to ev1
250 \param tol Input - tolerance to identify to eigenvalues
251 */
252 inline
253 void eigenvalues_and_eigenvectors2(const double A00,const double A01,const double A11,
254 double* ev0, double* ev1,
255 double* V00, double* V10, double* V01, double* V11,
256 const double tol)
257 {
258 double TEMP0,TEMP1;
259 eigenvalues2(A00,A01,A11,ev0,ev1);
260 const register double absev0=fabs(*ev0);
261 const register double absev1=fabs(*ev1);
262 register double max_ev=absev0>absev1 ? absev0 : absev1;
263 if (fabs((*ev0)-(*ev1))<tol*max_ev) {
264 *V00=1.;
265 *V10=0.;
266 *V01=0.;
267 *V11=1.;
268 } else {
269 vectorInKernel2(A00-(*ev0),A01,A01,A11-(*ev0),&TEMP0,&TEMP1);
270 const register double scale=1./sqrt(TEMP0*TEMP0+TEMP1*TEMP1);
271 if (TEMP0<0.) {
272 *V00=-TEMP0*scale;
273 *V10=-TEMP1*scale;
274 if (TEMP1<0.) {
275 *V01= *V10;
276 *V11=-(*V00);
277 } else {
278 *V01=-(*V10);
279 *V11= (*V00);
280 }
281 } else if (TEMP0>0.) {
282 *V00=TEMP0*scale;
283 *V10=TEMP1*scale;
284 if (TEMP1<0.) {
285 *V01=-(*V10);
286 *V11= (*V00);
287 } else {
288 *V01= (*V10);
289 *V11=-(*V00);
290 }
291 } else {
292 *V00=0.;
293 *V10=1;
294 *V11=0.;
295 *V01=1.;
296 }
297 }
298 }
299 /**
300 \brief
301 nomalizes a 3-d vector such that length is one and first non-zero component is positive.
302
303 \param V0 - vector componenent
304 \param V1 - vector componenent
305 \param V2 - vector componenent
306 */
307 inline
308 void normalizeVector3(double* V0,double* V1,double* V2)
309 {
310 register double s;
311 if (*V0>0) {
312 s=1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
313 *V0*=s;
314 *V1*=s;
315 *V2*=s;
316 } else if (*V0<0) {
317 s=-1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
318 *V0*=s;
319 *V1*=s;
320 *V2*=s;
321 } else {
322 if (*V1>0) {
323 s=1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
324 *V1*=s;
325 *V2*=s;
326 } else if (*V1<0) {
327 s=-1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
328 *V1*=s;
329 *V2*=s;
330 } else {
331 *V2=1.;
332 }
333 }
334 }
335 /**
336 \brief
337 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
338 ordered by increasing value and eigen vectors are normalizeVector3d such that
339 length is zero and first non-zero component is positive.
340
341 \param A00 Input - A_00
342 \param A01 Input - A_01
343 \param A02 Input - A_02
344 \param A11 Input - A_11
345 \param A12 Input - A_12
346 \param A22 Input - A_22
347 \param ev0 Output - smallest eigenvalue
348 \param ev1 Output - eigenvalue
349 \param ev2 Output -
350 \param V00 Output - eigenvector componenent coresponding to ev0
351 \param V10 Output - eigenvector componenent coresponding to ev0
352 \param V20 Output -
353 \param V01 Output - eigenvector componenent coresponding to ev1
354 \param V11 Output - eigenvector componenent coresponding to ev1
355 \param V21 Output -
356 \param V02 Output -
357 \param V12 Output -
358 \param V22 Output -
359 \param tol Input - tolerance to identify to eigenvalues
360 */
361 inline
362 void eigenvalues_and_eigenvectors3(const double A00, const double A01, const double A02,
363 const double A11, const double A12, const double A22,
364 double* ev0, double* ev1, double* ev2,
365 double* V00, double* V10, double* V20,
366 double* V01, double* V11, double* V21,
367 double* V02, double* V12, double* V22,
368 const double tol)
369 {
370 register const double absA01=fabs(A01);
371 register const double absA02=fabs(A02);
372 register const double m=absA01>absA02 ? absA01 : absA02;
373 if (m<=0) {
374 double TEMP_V00,TEMP_V10,TEMP_V01,TEMP_V11,TEMP_EV0,TEMP_EV1;
375 eigenvalues_and_eigenvectors2(A11,A12,A22,
376 &TEMP_EV0,&TEMP_EV1,
377 &TEMP_V00,&TEMP_V10,&TEMP_V01,&TEMP_V11,tol);
378 if (A00<=TEMP_EV0) {
379 *V00=1.;
380 *V10=0.;
381 *V20=0.;
382 *V01=0.;
383 *V11=TEMP_V00;
384 *V21=TEMP_V10;
385 *V02=0.;
386 *V12=TEMP_V01;
387 *V22=TEMP_V11;
388 *ev0=A00;
389 *ev1=TEMP_EV0;
390 *ev2=TEMP_EV1;
391 } else if (A00>TEMP_EV1) {
392 *V02=1.;
393 *V12=0.;
394 *V22=0.;
395 *V00=0.;
396 *V10=TEMP_V00;
397 *V20=TEMP_V10;
398 *V01=0.;
399 *V11=TEMP_V01;
400 *V21=TEMP_V11;
401 *ev0=TEMP_EV0;
402 *ev1=TEMP_EV1;
403 *ev2=A00;
404 } else {
405 *V01=1.;
406 *V11=0.;
407 *V21=0.;
408 *V00=0.;
409 *V10=TEMP_V00;
410 *V20=TEMP_V10;
411 *V02=0.;
412 *V12=TEMP_V01;
413 *V22=TEMP_V11;
414 *ev0=TEMP_EV0;
415 *ev1=A00;
416 *ev2=TEMP_EV1;
417 }
418 } else {
419 eigenvalues3(A00,A01,A02,A11,A12,A22,ev0,ev1,ev2);
420 const register double absev0=fabs(*ev0);
421 const register double absev1=fabs(*ev1);
422 const register double absev2=fabs(*ev2);
423 register double max_ev=absev0>absev1 ? absev0 : absev1;
424 max_ev=max_ev>absev2 ? max_ev : absev2;
425 const register double d_01=fabs((*ev0)-(*ev1));
426 const register double d_12=fabs((*ev1)-(*ev2));
427 const register double max_d=d_01>d_12 ? d_01 : d_12;
428 if (max_d<=tol*max_ev) {
429 *V00=1.;
430 *V10=0;
431 *V20=0;
432 *V01=0;
433 *V11=1.;
434 *V21=0;
435 *V02=0;
436 *V12=0;
437 *V22=1.;
438 } else {
439 const register double S00=A00-(*ev0);
440 const register double absS00=fabs(S00);
441 if (absS00>m) {
442 vectorInKernel3__nonZeroA00(S00,A01,A02,A01,A11-(*ev0),A12,A02,A12,A22-(*ev0),V00,V10,V20);
443 } else if (absA02<m) {
444 vectorInKernel3__nonZeroA00(A01,A11-(*ev0),A12,S00,A01,A02,A02,A12,A22-(*ev0),V00,V10,V20);
445 } else {
446 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev0),S00,A01,A02,A01,A11-(*ev0),A12,V00,V10,V20);
447 }
448 normalizeVector3(V00,V10,V20);;
449 const register double T00=A00-(*ev2);
450 const register double absT00=fabs(T00);
451 if (absT00>m) {
452 vectorInKernel3__nonZeroA00(T00,A01,A02,A01,A11-(*ev2),A12,A02,A12,A22-(*ev2),V02,V12,V22);
453 } else if (absA02<m) {
454 vectorInKernel3__nonZeroA00(A01,A11-(*ev2),A12,T00,A01,A02,A02,A12,A22-(*ev2),V02,V12,V22);
455 } else {
456 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev2),T00,A01,A02,A01,A11-(*ev2),A12,V02,V12,V22);
457 }
458 const register double dot=(*V02)*(*V00)+(*V12)*(*V10)+(*V22)*(*V20);
459 *V02-=dot*(*V00);
460 *V12-=dot*(*V10);
461 *V22-=dot*(*V20);
462 normalizeVector3(V02,V12,V22);
463 *V01=(*V10)*(*V22)-(*V12)*(*V20);
464 *V11=(*V20)*(*V02)-(*V00)*(*V22);
465 *V21=(*V00)*(*V12)-(*V02)*(*V10);
466 normalizeVector3(V01,V11,V21);
467 }
468 }
469 }
470
471 // General tensor product: arg_2(SL x SR) = arg_0(SL x SM) * arg_1(SM x SR)
472 // SM is the product of the last axis_offset entries in arg_0.getShape().
473 inline
474 void matrix_matrix_product(const int SL, const int SM, const int SR, const double* A, const double* B, double* C, int transpose)
475 {
476 if (transpose == 0) {
477 for (int i=0; i<SL; i++) {
478 for (int j=0; j<SR; j++) {
479 double sum = 0.0;
480 for (int l=0; l<SM; l++) {
481 sum += A[i+SL*l] * B[l+SM*j];
482 }
483 C[i+SL*j] = sum;
484 }
485 }
486 }
487 else if (transpose == 1) {
488 for (int i=0; i<SL; i++) {
489 for (int j=0; j<SR; j++) {
490 double sum = 0.0;
491 for (int l=0; l<SM; l++) {
492 sum += A[i*SM+l] * B[l+SM*j];
493 }
494 C[i+SL*j] = sum;
495 }
496 }
497 }
498 else if (transpose == 2) {
499 for (int i=0; i<SL; i++) {
500 for (int j=0; j<SR; j++) {
501 double sum = 0.0;
502 for (int l=0; l<SM; l++) {
503 sum += A[i+SL*l] * B[l*SR+j];
504 }
505 C[i+SL*j] = sum;
506 }
507 }
508 }
509 }
510
511 template <typename UnaryFunction>
512 inline void tensor_unary_operation(const int size,
513 const double *arg1,
514 double * argRes,
515 UnaryFunction operation)
516 {
517 for (int i = 0; i < size; ++i) {
518 argRes[i] = operation(arg1[i]);
519 }
520 return;
521 }
522
523 template <typename BinaryFunction>
524 inline void tensor_binary_operation(const int size,
525 const double *arg1,
526 const double *arg2,
527 double * argRes,
528 BinaryFunction operation)
529 {
530 for (int i = 0; i < size; ++i) {
531 argRes[i] = operation(arg1[i], arg2[i]);
532 }
533 return;
534 }
535
536 template <typename BinaryFunction>
537 inline void tensor_binary_operation(const int size,
538 double arg1,
539 const double *arg2,
540 double *argRes,
541 BinaryFunction operation)
542 {
543 for (int i = 0; i < size; ++i) {
544 argRes[i] = operation(arg1, arg2[i]);
545 }
546 return;
547 }
548
549 template <typename BinaryFunction>
550 inline void tensor_binary_operation(const int size,
551 const double *arg1,
552 double arg2,
553 double *argRes,
554 BinaryFunction operation)
555 {
556 for (int i = 0; i < size; ++i) {
557 argRes[i] = operation(arg1[i], arg2);
558 }
559 return;
560 }
561
562 } // end of namespace
563 #endif

  ViewVC Help
Powered by ViewVC 1.1.26