# Contents of /trunk/escript/src/LocalOps.h

Revision 757 - (show annotations)
Mon Jun 26 13:12:56 2006 UTC (13 years, 8 months ago) by woo409
File MIME type: text/plain
File size: 13819 byte(s)
```+ Merge of intelc_win32 branch (revision 741:755) with trunk. Tested on iVEC altix (run_tests and py_tests all pass)

```
 1 // \$Id\$ 2 /* 3 ************************************************************ 4 * Copyright 2006 by ACcESS MNRF * 5 * * 6 * http://www.access.edu.au * 7 * Primary Business: Queensland, Australia * 8 * Licensed under the Open Software License version 3.0 * 9 * http://www.opensource.org/licenses/osl-3.0.php * 10 * * 11 ************************************************************ 12 */ 13 14 #if !defined escript_LocalOps_H 15 #define escript_LocalOps_H 16 #ifdef _WIN32 && __INTEL_COMPILER 17 #include 18 #define M_PI 3.141592653589 19 #else 20 #include 21 #endif 22 23 namespace escript { 24 25 26 /** 27 \brief 28 solves a 1x1 eigenvalue A*V=ev*V problem 29 30 \param A00 Input - A_00 31 \param ev0 Output - eigenvalue 32 */ 33 inline 34 void eigenvalues1(const double A00,double* ev0) { 35 36 *ev0=A00; 37 38 } 39 /** 40 \brief 41 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A 42 43 \param A00 Input - A_00 44 \param A01 Input - A_01 45 \param A11 Input - A_11 46 \param ev0 Output - smallest eigenvalue 47 \param ev1 Output - largest eigenvalue 48 */ 49 inline 50 void eigenvalues2(const double A00,const double A01,const double A11, 51 double* ev0, double* ev1) { 52 const register double trA=(A00+A11)/2.; 53 const register double A_00=A00-trA; 54 const register double A_11=A11-trA; 55 const register double s=sqrt(A01*A01-A_00*A_11); 56 *ev0=trA-s; 57 *ev1=trA+s; 58 } 59 /** 60 \brief 61 solves a 3x3 eigenvalue A*V=ev*V problem for symmetric A 62 63 \param A00 Input - A_00 64 \param A01 Input - A_01 65 \param A02 Input - A_02 66 \param A11 Input - A_11 67 \param A12 Input - A_12 68 \param A22 Input - A_22 69 \param ev0 Output - smallest eigenvalue 70 \param ev1 Output - eigenvalue 71 \param ev2 Output - largest eigenvalue 72 */ 73 inline 74 void eigenvalues3(const double A00, const double A01, const double A02, 75 const double A11, const double A12, 76 const double A22, 77 double* ev0, double* ev1,double* ev2) { 78 79 const register double trA=(A00+A11+A22)/3.; 80 const register double A_00=A00-trA; 81 const register double A_11=A11-trA; 82 const register double A_22=A22-trA; 83 const register double A01_2=A01*A01; 84 const register double A02_2=A02*A02; 85 const register double A12_2=A12*A12; 86 const register double p=A02_2+A12_2+A01_2+(A_00*A_00+A_11*A_11+A_22*A_22)/2.; 87 if (p<=0.) { 88 *ev2=trA; 89 *ev1=trA; 90 *ev0=trA; 91 92 } else { 93 const register double q=(A02_2*A_11+A12_2*A_00+A01_2*A_22)-(A_00*A_11*A_22+2*A01*A12*A02); 94 const register double sq_p=sqrt(p/3.); 95 register double z=-q/(2*pow(sq_p,3)); 96 if (z<-1.) { 97 z=-1.; 98 } else if (z>1.) { 99 z=1.; 100 } 101 const register double alpha_3=acos(z)/3.; 102 *ev2=trA+2.*sq_p*cos(alpha_3); 103 *ev1=trA-2.*sq_p*cos(alpha_3+M_PI/3.); 104 *ev0=trA-2.*sq_p*cos(alpha_3-M_PI/3.); 105 } 106 } 107 /** 108 \brief 109 solves a 1x1 eigenvalue A*V=ev*V problem for symmetric A 110 111 \param A00 Input - A_00 112 \param ev0 Output - eigenvalue 113 \param V00 Output - eigenvector 114 \param tol Input - tolerance to identify to eigenvalues 115 */ 116 inline 117 void eigenvalues_and_eigenvectors1(const double A00,double* ev0,double* V00,const double tol) 118 { 119 eigenvalues1(A00,ev0); 120 *V00=1.; 121 return; 122 } 123 /** 124 \brief 125 returns a non-zero vector in the kernel of [[A00,A01],[A01,A11]] assuming that the kernel dimension is at least 1. 126 127 \param A00 Input - matrix component 128 \param A10 Input - matrix component 129 \param A01 Input - matrix component 130 \param A11 Input - matrix component 131 \param V0 Output - vector component 132 \param V1 Output - vector component 133 */ 134 inline 135 void vectorInKernel2(const double A00,const double A10,const double A01,const double A11, 136 double* V0, double*V1) 137 { 138 register double absA00=fabs(A00); 139 register double absA10=fabs(A10); 140 register double absA01=fabs(A01); 141 register double absA11=fabs(A11); 142 register double m=absA11>absA10 ? absA11 : absA10; 143 if (absA00>m || absA01>m) { 144 *V0=-A01; 145 *V1=A00; 146 } else { 147 if (m<=0) { 148 *V0=1.; 149 *V1=0.; 150 } else { 151 *V0=A11; 152 *V1=-A10; 153 } 154 } 155 } 156 /** 157 \brief 158 returns a non-zero vector in the kernel of [[A00,A01,A02],[A10,A11,A12],[A20,A21,A22]] 159 assuming that the kernel dimension is at least 1 and A00 is non zero. 160 161 \param A00 Input - matrix component 162 \param A10 Input - matrix component 163 \param A20 Input - matrix component 164 \param A01 Input - matrix component 165 \param A11 Input - matrix component 166 \param A21 Input - matrix component 167 \param A02 Input - matrix component 168 \param A12 Input - matrix component 169 \param A22 Input - matrix component 170 \param V0 Output - vector component 171 \param V1 Output - vector component 172 \param V2 Output - vector component 173 */ 174 inline 175 void vectorInKernel3__nonZeroA00(const double A00,const double A10,const double A20, 176 const double A01,const double A11,const double A21, 177 const double A02,const double A12,const double A22, 178 double* V0,double* V1,double* V2) 179 { 180 double TEMP0,TEMP1; 181 register const double I00=1./A00; 182 register const double IA10=I00*A10; 183 register const double IA20=I00*A20; 184 vectorInKernel2(A11-IA10*A01,A12-IA10*A02, 185 A21-IA20*A01,A22-IA20*A02,&TEMP0,&TEMP1); 186 *V0=-(A10*TEMP0+A20*TEMP1); 187 *V1=A00*TEMP0; 188 *V2=A00*TEMP1; 189 } 190 191 /** 192 \brief 193 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are 194 ordered by increasing value and eigen vectors are normalizeVector3d such that 195 length is zero and first non-zero component is positive. 196 197 \param A00 Input - A_00 198 \param A01 Input - A_01 199 \param A11 Input - A_11 200 \param ev0 Output - smallest eigenvalue 201 \param ev1 Output - eigenvalue 202 \param V00 Output - eigenvector componenent coresponding to ev0 203 \param V10 Output - eigenvector componenent coresponding to ev0 204 \param V01 Output - eigenvector componenent coresponding to ev1 205 \param V11 Output - eigenvector componenent coresponding to ev1 206 \param tol Input - tolerance to identify to eigenvalues 207 */ 208 inline 209 void eigenvalues_and_eigenvectors2(const double A00,const double A01,const double A11, 210 double* ev0, double* ev1, 211 double* V00, double* V10, double* V01, double* V11, 212 const double tol) 213 { 214 double TEMP0,TEMP1; 215 eigenvalues2(A00,A01,A11,ev0,ev1); 216 const register double absev0=fabs(*ev0); 217 const register double absev1=fabs(*ev1); 218 register double max_ev=absev0>absev1 ? absev0 : absev1; 219 if (fabs((*ev0)-(*ev1))0.) { 238 *V00=TEMP0*scale; 239 *V10=TEMP1*scale; 240 if (TEMP1<0.) { 241 *V01=-(*V10); 242 *V11= (*V00); 243 } else { 244 *V01= (*V10); 245 *V11=-(*V00); 246 } 247 } else { 248 *V00=0.; 249 *V10=1; 250 *V11=0.; 251 *V01=1.; 252 } 253 } 254 } 255 /** 256 \brief 257 nomalizes a 3-d vector such that length is one and first non-zero component is positive. 258 259 \param V0 - vector componenent 260 \param V1 - vector componenent 261 \param V2 - vector componenent 262 */ 263 inline 264 void normalizeVector3(double* V0,double* V1,double* V2) 265 { 266 register double s; 267 if (*V0>0) { 268 s=1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2)); 269 *V0*=s; 270 *V1*=s; 271 *V2*=s; 272 } else if (*V0<0) { 273 s=-1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2)); 274 *V0*=s; 275 *V1*=s; 276 *V2*=s; 277 } else { 278 if (*V1>0) { 279 s=1./sqrt((*V1)*(*V1)+(*V2)*(*V2)); 280 *V1*=s; 281 *V2*=s; 282 } else if (*V1<0) { 283 s=-1./sqrt((*V1)*(*V1)+(*V2)*(*V2)); 284 *V1*=s; 285 *V2*=s; 286 } else { 287 *V2=1.; 288 } 289 } 290 } 291 /** 292 \brief 293 solves a 2x2 eigenvalue A*V=ev*V problem for symmetric A. Eigenvectors are 294 ordered by increasing value and eigen vectors are normalizeVector3d such that 295 length is zero and first non-zero component is positive. 296 297 \param A00 Input - A_00 298 \param A01 Input - A_01 299 \param A11 Input - A_11 300 \param ev0 Output - smallest eigenvalue 301 \param ev1 Output - eigenvalue 302 \param V00 Output - eigenvector componenent coresponding to ev0 303 \param V10 Output - eigenvector componenent coresponding to ev0 304 \param V01 Output - eigenvector componenent coresponding to ev1 305 \param V11 Output - eigenvector componenent coresponding to ev1 306 \param tol Input - tolerance to identify to eigenvalues 307 */ 308 inline 309 void eigenvalues_and_eigenvectors3(const double A00, const double A01, const double A02, 310 const double A11, const double A12, const double A22, 311 double* ev0, double* ev1, double* ev2, 312 double* V00, double* V10, double* V20, 313 double* V01, double* V11, double* V21, 314 double* V02, double* V12, double* V22, 315 const double tol) 316 { 317 register const double absA01=fabs(A01); 318 register const double absA02=fabs(A02); 319 register const double m=absA01>absA02 ? absA01 : absA02; 320 if (m<=0) { 321 double TEMP_V00,TEMP_V10,TEMP_V01,TEMP_V11,TEMP_EV0,TEMP_EV1; 322 eigenvalues_and_eigenvectors2(A11,A12,A22, 323 &TEMP_EV0,&TEMP_EV1, 324 &TEMP_V00,&TEMP_V10,&TEMP_V01,&TEMP_V11,tol); 325 if (A00<=TEMP_EV0) { 326 *V00=1.; 327 *V10=0.; 328 *V20=0.; 329 *V01=0.; 330 *V11=TEMP_V00; 331 *V21=TEMP_V10; 332 *V02=0.; 333 *V12=TEMP_V01; 334 *V22=TEMP_V11; 335 *ev0=A00; 336 *ev1=TEMP_EV0; 337 *ev2=TEMP_EV1; 338 } else if (A00>TEMP_EV1) { 339 *V02=1.; 340 *V12=0.; 341 *V22=0.; 342 *V00=0.; 343 *V10=TEMP_V00; 344 *V20=TEMP_V10; 345 *V01=0.; 346 *V11=TEMP_V01; 347 *V21=TEMP_V11; 348 *ev0=TEMP_EV0; 349 *ev1=TEMP_EV1; 350 *ev2=A00; 351 } else { 352 *V01=1.; 353 *V11=0.; 354 *V21=0.; 355 *V00=0.; 356 *V10=TEMP_V00; 357 *V20=TEMP_V10; 358 *V02=0.; 359 *V12=TEMP_V01; 360 *V22=TEMP_V11; 361 *ev0=TEMP_EV0; 362 *ev1=A00; 363 *ev2=TEMP_EV1; 364 } 365 } else { 366 eigenvalues3(A00,A01,A02,A11,A12,A22,ev0,ev1,ev2); 367 const register double absev0=fabs(*ev0); 368 const register double absev1=fabs(*ev1); 369 const register double absev2=fabs(*ev2); 370 register double max_ev=absev0>absev1 ? absev0 : absev1; 371 max_ev=max_ev>absev2 ? max_ev : absev2; 372 const register double d_01=fabs((*ev0)-(*ev1)); 373 const register double d_12=fabs((*ev1)-(*ev2)); 374 const register double max_d=d_01>d_12 ? d_01 : d_12; 375 if (max_d<=tol*max_ev) { 376 *V00=1.; 377 *V10=0; 378 *V20=0; 379 *V01=0; 380 *V11=1.; 381 *V21=0; 382 *V02=0; 383 *V12=0; 384 *V22=1.; 385 } else { 386 const register double S00=A00-(*ev0); 387 const register double absS00=fabs(S00); 388 if (fabs(S00)>m) { 389 vectorInKernel3__nonZeroA00(S00,A01,A02,A01,A11-(*ev0),A12,A02,A12,A22-(*ev0),V00,V10,V20); 390 } else if (absA02m) { 399 vectorInKernel3__nonZeroA00(T00,A01,A02,A01,A11-(*ev2),A12,A02,A12,A22-(*ev2),V02,V12,V22); 400 } else if (absA02