/[escript]/trunk/escript/src/LocalOps.h
ViewVC logotype

Contents of /trunk/escript/src/LocalOps.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1334 - (show annotations)
Thu Oct 25 05:08:54 2007 UTC (11 years, 8 months ago) by matt
File MIME type: text/plain
File size: 15843 byte(s)
Initial rewrite of escript unary operations. The rewritten operations are now single-pass.

1
2 /* $Id$ */
3
4 /*******************************************************
5 *
6 * Copyright 2003-2007 by ACceSS MNRF
7 * Copyright 2007 by University of Queensland
8 *
9 * http://esscc.uq.edu.au
10 * Primary Business: Queensland, Australia
11 * Licensed under the Open Software License version 3.0
12 * http://www.opensource.org/licenses/osl-3.0.php
13 *
14 *******************************************************/
15
16 #if !defined escript_LocalOps_H
17 #define escript_LocalOps_H
18 #ifdef __INTEL_COMPILER
19 # include <mathimf.h>
20 #else
21 # include <math.h>
22 #endif
23 #ifndef M_PI
24 # define M_PI 3.14159265358979323846 /* pi */
25 #endif
26
27 namespace escript {
28
29
30 /**
31 \brief
32 solves a 1x1 eigenvalue A*V=ev*V problem
33
34 \param A00 Input - A_00
35 \param ev0 Output - eigenvalue
36 */
37 inline
38 void eigenvalues1(const double A00,double* ev0) {
39
40 *ev0=A00;
41
42 }
43 /**
44 \brief
45 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A
46
47 \param A00 Input - A_00
48 \param A01 Input - A_01
49 \param A11 Input - A_11
50 \param ev0 Output - smallest eigenvalue
51 \param ev1 Output - largest eigenvalue
52 */
53 inline
54 void eigenvalues2(const double A00,const double A01,const double A11,
55 double* ev0, double* ev1) {
56 const register double trA=(A00+A11)/2.;
57 const register double A_00=A00-trA;
58 const register double A_11=A11-trA;
59 const register double s=sqrt(A01*A01-A_00*A_11);
60 *ev0=trA-s;
61 *ev1=trA+s;
62 }
63 /**
64 \brief
65 solves a 3x3 eigenvalue A*V=ev*V problem for symmetric A
66
67 \param A00 Input - A_00
68 \param A01 Input - A_01
69 \param A02 Input - A_02
70 \param A11 Input - A_11
71 \param A12 Input - A_12
72 \param A22 Input - A_22
73 \param ev0 Output - smallest eigenvalue
74 \param ev1 Output - eigenvalue
75 \param ev2 Output - largest eigenvalue
76 */
77 inline
78 void eigenvalues3(const double A00, const double A01, const double A02,
79 const double A11, const double A12,
80 const double A22,
81 double* ev0, double* ev1,double* ev2) {
82
83 const register double trA=(A00+A11+A22)/3.;
84 const register double A_00=A00-trA;
85 const register double A_11=A11-trA;
86 const register double A_22=A22-trA;
87 const register double A01_2=A01*A01;
88 const register double A02_2=A02*A02;
89 const register double A12_2=A12*A12;
90 const register double p=A02_2+A12_2+A01_2+(A_00*A_00+A_11*A_11+A_22*A_22)/2.;
91 if (p<=0.) {
92 *ev2=trA;
93 *ev1=trA;
94 *ev0=trA;
95
96 } else {
97 const register double q=(A02_2*A_11+A12_2*A_00+A01_2*A_22)-(A_00*A_11*A_22+2*A01*A12*A02);
98 const register double sq_p=sqrt(p/3.);
99 register double z=-q/(2*pow(sq_p,3));
100 if (z<-1.) {
101 z=-1.;
102 } else if (z>1.) {
103 z=1.;
104 }
105 const register double alpha_3=acos(z)/3.;
106 *ev2=trA+2.*sq_p*cos(alpha_3);
107 *ev1=trA-2.*sq_p*cos(alpha_3+M_PI/3.);
108 *ev0=trA-2.*sq_p*cos(alpha_3-M_PI/3.);
109 }
110 }
111 /**
112 \brief
113 solves a 1x1 eigenvalue A*V=ev*V problem for symmetric A
114
115 \param A00 Input - A_00
116 \param ev0 Output - eigenvalue
117 \param V00 Output - eigenvector
118 \param tol Input - tolerance to identify to eigenvalues
119 */
120 inline
121 void eigenvalues_and_eigenvectors1(const double A00,double* ev0,double* V00,const double tol)
122 {
123 eigenvalues1(A00,ev0);
124 *V00=1.;
125 return;
126 }
127 /**
128 \brief
129 returns a non-zero vector in the kernel of [[A00,A01],[A01,A11]] assuming that the kernel dimension is at least 1.
130
131 \param A00 Input - matrix component
132 \param A10 Input - matrix component
133 \param A01 Input - matrix component
134 \param A11 Input - matrix component
135 \param V0 Output - vector component
136 \param V1 Output - vector component
137 */
138 inline
139 void vectorInKernel2(const double A00,const double A10,const double A01,const double A11,
140 double* V0, double*V1)
141 {
142 register double absA00=fabs(A00);
143 register double absA10=fabs(A10);
144 register double absA01=fabs(A01);
145 register double absA11=fabs(A11);
146 register double m=absA11>absA10 ? absA11 : absA10;
147 if (absA00>m || absA01>m) {
148 *V0=-A01;
149 *V1=A00;
150 } else {
151 if (m<=0) {
152 *V0=1.;
153 *V1=0.;
154 } else {
155 *V0=A11;
156 *V1=-A10;
157 }
158 }
159 }
160 /**
161 \brief
162 returns a non-zero vector in the kernel of [[A00,A01,A02],[A10,A11,A12],[A20,A21,A22]]
163 assuming that the kernel dimension is at least 1 and A00 is non zero.
164
165 \param A00 Input - matrix component
166 \param A10 Input - matrix component
167 \param A20 Input - matrix component
168 \param A01 Input - matrix component
169 \param A11 Input - matrix component
170 \param A21 Input - matrix component
171 \param A02 Input - matrix component
172 \param A12 Input - matrix component
173 \param A22 Input - matrix component
174 \param V0 Output - vector component
175 \param V1 Output - vector component
176 \param V2 Output - vector component
177 */
178 inline
179 void vectorInKernel3__nonZeroA00(const double A00,const double A10,const double A20,
180 const double A01,const double A11,const double A21,
181 const double A02,const double A12,const double A22,
182 double* V0,double* V1,double* V2)
183 {
184 double TEMP0,TEMP1;
185 register const double I00=1./A00;
186 register const double IA10=I00*A10;
187 register const double IA20=I00*A20;
188 vectorInKernel2(A11-IA10*A01,A12-IA10*A02,
189 A21-IA20*A01,A22-IA20*A02,&TEMP0,&TEMP1);
190 *V0=-(A10*TEMP0+A20*TEMP1);
191 *V1=A00*TEMP0;
192 *V2=A00*TEMP1;
193 }
194
195 /**
196 \brief
197 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
198 ordered by increasing value and eigen vectors are normalizeVector3d such that
199 length is zero and first non-zero component is positive.
200
201 \param A00 Input - A_00
202 \param A01 Input - A_01
203 \param A11 Input - A_11
204 \param ev0 Output - smallest eigenvalue
205 \param ev1 Output - eigenvalue
206 \param V00 Output - eigenvector componenent coresponding to ev0
207 \param V10 Output - eigenvector componenent coresponding to ev0
208 \param V01 Output - eigenvector componenent coresponding to ev1
209 \param V11 Output - eigenvector componenent coresponding to ev1
210 \param tol Input - tolerance to identify to eigenvalues
211 */
212 inline
213 void eigenvalues_and_eigenvectors2(const double A00,const double A01,const double A11,
214 double* ev0, double* ev1,
215 double* V00, double* V10, double* V01, double* V11,
216 const double tol)
217 {
218 double TEMP0,TEMP1;
219 eigenvalues2(A00,A01,A11,ev0,ev1);
220 const register double absev0=fabs(*ev0);
221 const register double absev1=fabs(*ev1);
222 register double max_ev=absev0>absev1 ? absev0 : absev1;
223 if (fabs((*ev0)-(*ev1))<tol*max_ev) {
224 *V00=1.;
225 *V10=0.;
226 *V01=0.;
227 *V11=1.;
228 } else {
229 vectorInKernel2(A00-(*ev0),A01,A01,A11-(*ev0),&TEMP0,&TEMP1);
230 const register double scale=1./sqrt(TEMP0*TEMP0+TEMP1*TEMP1);
231 if (TEMP0<0.) {
232 *V00=-TEMP0*scale;
233 *V10=-TEMP1*scale;
234 if (TEMP1<0.) {
235 *V01= *V10;
236 *V11=-(*V00);
237 } else {
238 *V01=-(*V10);
239 *V11= (*V10);
240 }
241 } else if (TEMP0>0.) {
242 *V00=TEMP0*scale;
243 *V10=TEMP1*scale;
244 if (TEMP1<0.) {
245 *V01=-(*V10);
246 *V11= (*V00);
247 } else {
248 *V01= (*V10);
249 *V11=-(*V00);
250 }
251 } else {
252 *V00=0.;
253 *V10=1;
254 *V11=0.;
255 *V01=1.;
256 }
257 }
258 }
259 /**
260 \brief
261 nomalizes a 3-d vector such that length is one and first non-zero component is positive.
262
263 \param V0 - vector componenent
264 \param V1 - vector componenent
265 \param V2 - vector componenent
266 */
267 inline
268 void normalizeVector3(double* V0,double* V1,double* V2)
269 {
270 register double s;
271 if (*V0>0) {
272 s=1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
273 *V0*=s;
274 *V1*=s;
275 *V2*=s;
276 } else if (*V0<0) {
277 s=-1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
278 *V0*=s;
279 *V1*=s;
280 *V2*=s;
281 } else {
282 if (*V1>0) {
283 s=1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
284 *V1*=s;
285 *V2*=s;
286 } else if (*V1<0) {
287 s=-1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
288 *V1*=s;
289 *V2*=s;
290 } else {
291 *V2=1.;
292 }
293 }
294 }
295 /**
296 \brief
297 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are
298 ordered by increasing value and eigen vectors are normalizeVector3d such that
299 length is zero and first non-zero component is positive.
300
301 \param A00 Input - A_00
302 \param A01 Input - A_01
303 \param A11 Input - A_11
304 \param ev0 Output - smallest eigenvalue
305 \param ev1 Output - eigenvalue
306 \param V00 Output - eigenvector componenent coresponding to ev0
307 \param V10 Output - eigenvector componenent coresponding to ev0
308 \param V01 Output - eigenvector componenent coresponding to ev1
309 \param V11 Output - eigenvector componenent coresponding to ev1
310 \param tol Input - tolerance to identify to eigenvalues
311 */
312 inline
313 void eigenvalues_and_eigenvectors3(const double A00, const double A01, const double A02,
314 const double A11, const double A12, const double A22,
315 double* ev0, double* ev1, double* ev2,
316 double* V00, double* V10, double* V20,
317 double* V01, double* V11, double* V21,
318 double* V02, double* V12, double* V22,
319 const double tol)
320 {
321 register const double absA01=fabs(A01);
322 register const double absA02=fabs(A02);
323 register const double m=absA01>absA02 ? absA01 : absA02;
324 if (m<=0) {
325 double TEMP_V00,TEMP_V10,TEMP_V01,TEMP_V11,TEMP_EV0,TEMP_EV1;
326 eigenvalues_and_eigenvectors2(A11,A12,A22,
327 &TEMP_EV0,&TEMP_EV1,
328 &TEMP_V00,&TEMP_V10,&TEMP_V01,&TEMP_V11,tol);
329 if (A00<=TEMP_EV0) {
330 *V00=1.;
331 *V10=0.;
332 *V20=0.;
333 *V01=0.;
334 *V11=TEMP_V00;
335 *V21=TEMP_V10;
336 *V02=0.;
337 *V12=TEMP_V01;
338 *V22=TEMP_V11;
339 *ev0=A00;
340 *ev1=TEMP_EV0;
341 *ev2=TEMP_EV1;
342 } else if (A00>TEMP_EV1) {
343 *V02=1.;
344 *V12=0.;
345 *V22=0.;
346 *V00=0.;
347 *V10=TEMP_V00;
348 *V20=TEMP_V10;
349 *V01=0.;
350 *V11=TEMP_V01;
351 *V21=TEMP_V11;
352 *ev0=TEMP_EV0;
353 *ev1=TEMP_EV1;
354 *ev2=A00;
355 } else {
356 *V01=1.;
357 *V11=0.;
358 *V21=0.;
359 *V00=0.;
360 *V10=TEMP_V00;
361 *V20=TEMP_V10;
362 *V02=0.;
363 *V12=TEMP_V01;
364 *V22=TEMP_V11;
365 *ev0=TEMP_EV0;
366 *ev1=A00;
367 *ev2=TEMP_EV1;
368 }
369 } else {
370 eigenvalues3(A00,A01,A02,A11,A12,A22,ev0,ev1,ev2);
371 const register double absev0=fabs(*ev0);
372 const register double absev1=fabs(*ev1);
373 const register double absev2=fabs(*ev2);
374 register double max_ev=absev0>absev1 ? absev0 : absev1;
375 max_ev=max_ev>absev2 ? max_ev : absev2;
376 const register double d_01=fabs((*ev0)-(*ev1));
377 const register double d_12=fabs((*ev1)-(*ev2));
378 const register double max_d=d_01>d_12 ? d_01 : d_12;
379 if (max_d<=tol*max_ev) {
380 *V00=1.;
381 *V10=0;
382 *V20=0;
383 *V01=0;
384 *V11=1.;
385 *V21=0;
386 *V02=0;
387 *V12=0;
388 *V22=1.;
389 } else {
390 const register double S00=A00-(*ev0);
391 const register double absS00=fabs(S00);
392 if (fabs(S00)>m) {
393 vectorInKernel3__nonZeroA00(S00,A01,A02,A01,A11-(*ev0),A12,A02,A12,A22-(*ev0),V00,V10,V20);
394 } else if (absA02<m) {
395 vectorInKernel3__nonZeroA00(A01,A11-(*ev0),A12,S00,A01,A02,A02,A12,A22-(*ev0),V00,V10,V20);
396 } else {
397 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev0),S00,A01,A02,A01,A11-(*ev0),A12,V00,V10,V20);
398 }
399 normalizeVector3(V00,V10,V20);;
400 const register double T00=A00-(*ev2);
401 const register double absT00=fabs(T00);
402 if (fabs(T00)>m) {
403 vectorInKernel3__nonZeroA00(T00,A01,A02,A01,A11-(*ev2),A12,A02,A12,A22-(*ev2),V02,V12,V22);
404 } else if (absA02<m) {
405 vectorInKernel3__nonZeroA00(A01,A11-(*ev2),A12,T00,A01,A02,A02,A12,A22-(*ev2),V02,V12,V22);
406 } else {
407 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev2),T00,A01,A02,A01,A11-(*ev2),A12,V02,V12,V22);
408 }
409 const register double dot=(*V02)*(*V00)+(*V12)*(*V10)+(*V22)*(*V20);
410 *V02-=dot*(*V00);
411 *V12-=dot*(*V10);
412 *V22-=dot*(*V20);
413 normalizeVector3(V02,V12,V22);
414 *V01=(*V10)*(*V22)-(*V12)*(*V20);
415 *V11=(*V20)*(*V02)-(*V00)*(*V22);
416 *V21=(*V00)*(*V12)-(*V02)*(*V10);
417 normalizeVector3(V01,V11,V21);
418 }
419 }
420 }
421
422 // General tensor product: arg_2(SL x SR) = arg_0(SL x SM) * arg_1(SM x SR)
423 // SM is the product of the last axis_offset entries in arg_0.getShape().
424 inline
425 void matrix_matrix_product(const int SL, const int SM, const int SR, const double* A, const double* B, double* C, int transpose)
426 {
427 if (transpose == 0) {
428 for (int i=0; i<SL; i++) {
429 for (int j=0; j<SR; j++) {
430 double sum = 0.0;
431 for (int l=0; l<SM; l++) {
432 sum += A[i+SL*l] * B[l+SM*j];
433 }
434 C[i+SL*j] = sum;
435 }
436 }
437 }
438 else if (transpose == 1) {
439 for (int i=0; i<SL; i++) {
440 for (int j=0; j<SR; j++) {
441 double sum = 0.0;
442 for (int l=0; l<SM; l++) {
443 sum += A[i*SM+l] * B[l+SM*j];
444 }
445 C[i+SL*j] = sum;
446 }
447 }
448 }
449 else if (transpose == 2) {
450 for (int i=0; i<SL; i++) {
451 for (int j=0; j<SR; j++) {
452 double sum = 0.0;
453 for (int l=0; l<SM; l++) {
454 sum += A[i+SL*l] * B[l*SR+j];
455 }
456 C[i+SL*j] = sum;
457 }
458 }
459 }
460 }
461
462 template <typename UnaryFunction>
463 inline void tensor_unary_operation(const int size,
464 const double *arg1,
465 double * argRes,
466 UnaryFunction operation)
467 {
468 for (int i = 0; i < size; ++i) {
469 argRes[i] = operation(arg1[i]);
470 }
471 return;
472 }
473
474 template <typename BinaryFunction>
475 inline void tensor_binary_operation(const int size,
476 const double *arg1,
477 const double *arg2,
478 double * argRes,
479 BinaryFunction operation)
480 {
481 for (int i = 0; i < size; ++i) {
482 argRes[i] = operation(arg1[i], arg2[i]);
483 }
484 return;
485 }
486
487 template <typename BinaryFunction>
488 inline void tensor_binary_operation(const int size,
489 double arg1,
490 const double *arg2,
491 double *argRes,
492 BinaryFunction operation)
493 {
494 for (int i = 0; i < size; ++i) {
495 argRes[i] = operation(arg1, arg2[i]);
496 }
497 return;
498 }
499
500 template <typename BinaryFunction>
501 inline void tensor_binary_operation(const int size,
502 const double *arg1,
503 double arg2,
504 double *argRes,
505 BinaryFunction operation)
506 {
507 for (int i = 0; i < size; ++i) {
508 argRes[i] = operation(arg1[i], arg2);
509 }
510 return;
511 }
512
513 } // end of namespace
514 #endif

  ViewVC Help
Powered by ViewVC 1.1.26