/[escript]/trunk/dudley/src/Assemble_jacobeans.c
ViewVC logotype

Contents of /trunk/dudley/src/Assemble_jacobeans.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3259 - (show annotations)
Mon Oct 11 01:48:14 2010 UTC (9 years ago) by jfenwick
File MIME type: text/plain
File size: 13126 byte(s)
Merging dudley and scons updates from branches

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 #include "Assemble.h"
15 #include "Util.h"
16 #ifdef _OPENMP
17 #include <omp.h>
18 #endif
19
20 /* Unless the loops in here get complicated again, this file should be compiled with loop unrolling */
21
22 /* input:
23
24 double* coordinates[DIM*(*)]
25 dim_t numQuad
26 double* QuadWeights[numQuad]
27 dim_t numShape
28 dim_t numElements
29 dim_t numNodes
30 index_t* nodes[numNodes*numElements] where NUMSIDES*numShape<=numNodes
31 double* DSDv[numShape*DIM*numQuad]
32 dim_t numTest
33 double* DTDv[LOCDIM*numTest*numQuad]
34 index_t* element_id[numElements]
35
36 output:
37
38 double* dTdX[DIM*numTest*NUMSIDES*numQuad*numElements]
39 double* volume[numQuad*numElements]
40
41 */
42
43 #include "ShapeTable.h"
44
45 #define SCALING(_nsub_,_dim_) pow(1./(double)(_nsub_),1./(double)(_dim_))
46
47 /**************************************************************/
48 /* */
49 /* Jacobean 2D with area element */
50 /* */
51 void Dudley_Assemble_jacobeans_2D(double *coordinates, dim_t numQuad, dim_t numElements, dim_t numNodes, index_t * nodes,
52 double *dTdX, double *absD, double *quadweight, index_t * element_id)
53 {
54 #define DIM 2
55 #define LOCDIM 2
56 register int e, q;
57 char error_msg[LenErrorMsg_MAX];
58 const dim_t numTest = 3; /* hoping this is used in constant folding */
59 *quadweight = (numQuad == 1) ? 1. / 2 : 1. / 6; /* numQuad is 1 or 3 */
60 #pragma omp parallel
61 {
62 register double dXdv00, dXdv10, dXdv01, dXdv11, dvdX00, dvdX10, dvdX01, dvdX11, D, invD;
63 #pragma omp for private(e,q, dXdv00,dXdv10,dXdv01,dXdv11,dvdX00,dvdX10,dvdX01,dvdX11, D,invD) schedule(static)
64 for (e = 0; e < numElements; e++)
65 {
66 #define COMPDXDV0(P) coordinates[INDEX2(P,nodes[INDEX2(0,e,numNodes)],DIM)]*(-1)+\
67 coordinates[INDEX2(P,nodes[INDEX2(1,e,numNodes)],DIM)]*1+\
68 coordinates[INDEX2(P,nodes[INDEX2(2,e,numNodes)],DIM)]*(0)
69
70 #define COMPDXDV1(P) coordinates[INDEX2(P,nodes[INDEX2(0,e,numNodes)],DIM)]*(-1)+\
71 coordinates[INDEX2(P,nodes[INDEX2(1,e,numNodes)],DIM)]*(0)+\
72 coordinates[INDEX2(P,nodes[INDEX2(2,e,numNodes)],DIM)]*(1)
73
74 dXdv00 = 0;
75 dXdv10 = 0;
76 dXdv01 = 0;
77 dXdv11 = 0;
78 dXdv00 = COMPDXDV0(0);
79 dXdv10 = COMPDXDV0(1);
80 dXdv01 = COMPDXDV1(0);
81 dXdv11 = COMPDXDV1(1);
82 D = dXdv00 * dXdv11 - dXdv01 * dXdv10;
83 absD[e] = ABS(D);
84 if (D == 0.)
85 {
86 sprintf(error_msg, "Dudley_Assemble_jacobeans_2D: element %d (id %d) has area zero.", e, element_id[e]);
87 Dudley_setError(ZERO_DIVISION_ERROR, error_msg);
88 }
89 else
90 {
91 invD = 1. / D;
92 dvdX00 = dXdv11 * invD;
93 dvdX10 = -dXdv10 * invD;
94 dvdX01 = -dXdv01 * invD;
95 dvdX11 = dXdv00 * invD;
96 if (numQuad == 1)
97 {
98 dTdX[INDEX4(0, 0, 0, e, numTest, DIM, numQuad)] = DTDV_2D[0][0] * dvdX00 + DTDV_2D[1][1] * dvdX10;
99 dTdX[INDEX4(1, 0, 0, e, numTest, DIM, numQuad)] = DTDV_2D[0][1] * dvdX00 + DTDV_2D[1][0] * dvdX10;
100 dTdX[INDEX4(2, 0, 0, e, numTest, DIM, numQuad)] = DTDV_2D[2][0] * dvdX00 + DTDV_2D[2][1] * dvdX10;
101
102 dTdX[INDEX4(0, 1, 0, e, numTest, DIM, numQuad)] = DTDV_2D[0][0] * dvdX01 + DTDV_2D[1][1] * dvdX11;
103 dTdX[INDEX4(1, 1, 0, e, numTest, DIM, numQuad)] = DTDV_2D[0][1] * dvdX01 + DTDV_2D[1][0] * dvdX11;
104 dTdX[INDEX4(2, 1, 0, e, numTest, DIM, numQuad)] = DTDV_2D[2][0] * dvdX01 + DTDV_2D[2][1] * dvdX11;
105
106 }
107 else /* numQuad==3 */
108 {
109 for (q = 0; q < numTest; ++q) /* relying on unroll loops to optimise this */
110 {
111 dTdX[INDEX4(0, 0, q, e, numTest, DIM, numQuad)] =
112 DTDV_2D[0][0] * dvdX00 + DTDV_2D[1][1] * dvdX10;
113 dTdX[INDEX4(1, 0, q, e, numTest, DIM, numQuad)] =
114 DTDV_2D[0][1] * dvdX00 + DTDV_2D[1][0] * dvdX10;
115 dTdX[INDEX4(2, 0, q, e, numTest, DIM, numQuad)] =
116 DTDV_2D[2][0] * dvdX00 + DTDV_2D[2][1] * dvdX10;
117
118 dTdX[INDEX4(0, 1, q, e, numTest, DIM, numQuad)] =
119 DTDV_2D[0][0] * dvdX01 + DTDV_2D[1][1] * dvdX11;
120 dTdX[INDEX4(1, 1, q, e, numTest, DIM, numQuad)] =
121 DTDV_2D[0][1] * dvdX01 + DTDV_2D[1][0] * dvdX11;
122 dTdX[INDEX4(2, 1, q, e, numTest, DIM, numQuad)] =
123 DTDV_2D[2][0] * dvdX01 + DTDV_2D[2][1] * dvdX11;
124
125 }
126 }
127 }
128 }
129 } /* end parallel */
130 #undef DIM
131 #undef LOCDIM
132 #undef DTDXSET
133 #undef COMPDXDV0
134 #undef COMPDXDV1
135 }
136
137 /**************************************************************/
138 /* */
139 /* Jacobean 1D manifold in 2D and 1D elements */
140 /* */
141 void Dudley_Assemble_jacobeans_2D_M1D_E1D(double *coordinates, dim_t numQuad,
142 dim_t numElements, dim_t numNodes, index_t * nodes,
143 double *dTdX, double *absD, double *quadweight, index_t * element_id)
144 {
145 #define DIM 2
146 #define LOCDIM 1
147 register int e;
148 char error_msg[LenErrorMsg_MAX];
149 const dim_t numTest = 2;
150 *quadweight = (numQuad == 1) ? 1.0 : 0.5;
151 /* numQuad is 1 or 2 */
152 #pragma omp parallel
153 {
154 register double dXdv00, dXdv10, dvdX00, dvdX01, D, invD;
155 #pragma omp for private(e,dXdv00,dXdv10,dvdX00,dvdX01,D,invD) schedule(static)
156 for (e = 0; e < numElements; e++)
157 {
158 dXdv00 = 0;
159 dXdv10 = 0;
160 dXdv00 +=
161 coordinates[INDEX2(0, nodes[INDEX2(0, e, numNodes)], DIM)] * (-1.) +
162 coordinates[INDEX2(0, nodes[INDEX2(1, e, numNodes)], DIM)];
163 dXdv00 +=
164 coordinates[INDEX2(1, nodes[INDEX2(0, e, numNodes)], DIM)] * (-1.) +
165 coordinates[INDEX2(1, nodes[INDEX2(1, e, numNodes)], DIM)];
166 D = dXdv00 * dXdv00 + dXdv10 * dXdv10;
167 if (D == 0.)
168 {
169 sprintf(error_msg, "Dudley_Assemble_jacobeans_2D_M1D_E1D: element %d (id %d) has length zero.", e,
170 element_id[e]);
171 Dudley_setError(ZERO_DIVISION_ERROR, error_msg);
172 }
173 else
174 {
175 invD = 1. / D;
176 dvdX00 = dXdv00 * invD;
177 dvdX01 = dXdv10 * invD;
178 /* The number of quad points is 1 or 2 */
179 dTdX[INDEX4(0, 0, 0, e, numTest, DIM, numQuad)] = -1 * dvdX00;
180 dTdX[INDEX4(0, 1, 0, e, numTest, DIM, numQuad)] = -1 * dvdX01;
181 dTdX[INDEX4(1, 0, 0, e, numTest, DIM, numQuad)] = -1 * dvdX00;
182 dTdX[INDEX4(1, 1, 0, e, numTest, DIM, numQuad)] = -1 * dvdX01;
183 absD[e] = sqrt(D);
184 if (numQuad == 2)
185 {
186 dTdX[INDEX4(0, 0, 1, e, numTest, DIM, numQuad)] = dvdX00;
187 dTdX[INDEX4(0, 1, 1, e, numTest, DIM, numQuad)] = dvdX01;
188 dTdX[INDEX4(1, 0, 1, e, numTest, DIM, numQuad)] = dvdX00;
189 dTdX[INDEX4(1, 1, 1, e, numTest, DIM, numQuad)] = dvdX01;
190 }
191 }
192 }
193 } /* end parallel */
194 #undef DIM
195 #undef LOCDIM
196 }
197
198 /**************************************************************/
199 /* */
200 /* Jacobean 3D */
201 /* */
202 void Dudley_Assemble_jacobeans_3D(double *coordinates, dim_t numQuad, dim_t numElements, dim_t numNodes, index_t * nodes,
203 double *dTdX, double *absD, double *quadweight, index_t * element_id)
204 {
205 #define DIM 3
206 #define LOCDIM 3
207 int e, q, s;
208 char error_msg[LenErrorMsg_MAX];
209 /* numQuad is 1 or 4 */
210 const dim_t numShape = 4, numTest = 4;
211 *quadweight = (numQuad == 1) ? 1. / 6 : 1. / 24;
212
213 #pragma omp parallel
214 {
215 register double dXdv00, dXdv10, dXdv20, dXdv01, dXdv11, dXdv21, dXdv02, dXdv12, dXdv22,
216 dvdX00, dvdX10, dvdX20, dvdX01, dvdX11, dvdX21, dvdX02, dvdX12, dvdX22, D, invD, X0_loc, X1_loc, X2_loc;
217 #pragma omp for private(e,q,s,dXdv00,dXdv10,dXdv20,dXdv01,dXdv11,dXdv21,dXdv02,dXdv12,dXdv22,dvdX00,dvdX10,dvdX20,dvdX01,dvdX11,dvdX21,dvdX02,dvdX12,dvdX22,D,invD,X0_loc,X1_loc,X2_loc) schedule(static)
218 for (e = 0; e < numElements; e++)
219 {
220 dXdv00 = 0;
221 dXdv10 = 0;
222 dXdv20 = 0;
223 dXdv01 = 0;
224 dXdv11 = 0;
225 dXdv21 = 0;
226 dXdv02 = 0;
227 dXdv12 = 0;
228 dXdv22 = 0;
229 for (s = 0; s < numShape; s++)
230 {
231 X0_loc = coordinates[INDEX2(0, nodes[INDEX2(s, e, numNodes)], DIM)];
232 X1_loc = coordinates[INDEX2(1, nodes[INDEX2(s, e, numNodes)], DIM)];
233 X2_loc = coordinates[INDEX2(2, nodes[INDEX2(s, e, numNodes)], DIM)];
234 dXdv00 += X0_loc * DTDV_3D[s][0];
235 dXdv10 += X1_loc * DTDV_3D[s][0];
236 dXdv20 += X2_loc * DTDV_3D[s][0];
237 dXdv01 += X0_loc * DTDV_3D[s][1];
238 dXdv11 += X1_loc * DTDV_3D[s][1];
239 dXdv21 += X2_loc * DTDV_3D[s][1];
240 dXdv02 += X0_loc * DTDV_3D[s][2];
241 dXdv12 += X1_loc * DTDV_3D[s][2];
242 dXdv22 += X2_loc * DTDV_3D[s][2];
243 }
244 D = dXdv00 * (dXdv11 * dXdv22 - dXdv12 * dXdv21) + dXdv01 * (dXdv20 * dXdv12 - dXdv10 * dXdv22) +
245 dXdv02 * (dXdv10 * dXdv21 - dXdv20 * dXdv11);
246 absD[e] = ABS(D);
247 if (D == 0.)
248 {
249 sprintf(error_msg, "Dudley_Assemble_jacobeans_3D: element %d (id %d) has volume zero.", e, element_id[e]);
250 Dudley_setError(ZERO_DIVISION_ERROR, error_msg);
251 }
252 else
253 {
254 invD = 1. / D;
255 dvdX00 = (dXdv11 * dXdv22 - dXdv12 * dXdv21) * invD;
256 dvdX10 = (dXdv20 * dXdv12 - dXdv10 * dXdv22) * invD;
257 dvdX20 = (dXdv10 * dXdv21 - dXdv20 * dXdv11) * invD;
258 dvdX01 = (dXdv02 * dXdv21 - dXdv01 * dXdv22) * invD;
259 dvdX11 = (dXdv00 * dXdv22 - dXdv20 * dXdv02) * invD;
260 dvdX21 = (dXdv01 * dXdv20 - dXdv00 * dXdv21) * invD;
261 dvdX02 = (dXdv01 * dXdv12 - dXdv02 * dXdv11) * invD;
262 dvdX12 = (dXdv02 * dXdv10 - dXdv00 * dXdv12) * invD;
263 dvdX22 = (dXdv00 * dXdv11 - dXdv01 * dXdv10) * invD;
264 for (q = 0; q < numQuad; q++)
265 {
266 for (s = 0; s < numTest; s++)
267 {
268 dTdX[INDEX4(s, 0, q, e, numTest, DIM, numQuad)] =
269 DTDV_3D[s][0] * dvdX00 + DTDV_3D[s][1] * dvdX10 + DTDV_3D[s][2] * dvdX20;
270 dTdX[INDEX4(s, 1, q, e, numTest, DIM, numQuad)] =
271 DTDV_3D[s][0] * dvdX01 + DTDV_3D[s][1] * dvdX11 + DTDV_3D[s][2] * dvdX21;
272 dTdX[INDEX4(s, 2, q, e, numTest, DIM, numQuad)] =
273 DTDV_3D[s][0] * dvdX02 + DTDV_3D[s][1] * dvdX12 + DTDV_3D[s][2] * dvdX22;
274 }
275 }
276 }
277 }
278 } /* end parallel */
279 #undef DIM
280 #undef LOCDIM
281 }
282
283 /**************************************************************/
284 /* */
285 /* Jacobean 2D manifold in 3D with 2D elements */
286 /* */
287 void Dudley_Assemble_jacobeans_3D_M2D_E2D(double *coordinates, dim_t numQuad, dim_t numElements, dim_t numNodes,
288 index_t * nodes, double *dTdX, double *absD, double *quadweight,
289 index_t * element_id)
290 {
291 #define DIM 3
292 #define LOCDIM 2
293 register int e, q, s;
294 char error_msg[LenErrorMsg_MAX];
295 const double DTDV[3][2] = { {-1., -1.}, {1., 0.}, {0., 1.} };
296 const dim_t numShape = 3, numTest = 3;
297 /* numQuad is 1 or 3 */
298 *quadweight = (numQuad == 1) ? 1. / 2 : 1. / 6;
299 #pragma omp parallel
300 {
301 register double dXdv00, dXdv10, dXdv20, dXdv01, dXdv11, dXdv21, m00, m01, m11,
302 dvdX00, dvdX01, dvdX02, dvdX10, dvdX11, dvdX12, D, invD, X0_loc, X1_loc, X2_loc;
303 #pragma omp for private(e,q,s,dXdv00,dXdv10,dXdv20,dXdv01,dXdv11,dXdv21,m00,m01,m11,dvdX00,dvdX01,dvdX02,dvdX10,dvdX11,dvdX12,D,invD, X0_loc, X1_loc, X2_loc) schedule(static)
304 for (e = 0; e < numElements; e++)
305 {
306 dXdv00 = 0;
307 dXdv10 = 0;
308 dXdv20 = 0;
309 dXdv01 = 0;
310 dXdv11 = 0;
311 dXdv21 = 0;
312 for (s = 0; s < numShape; s++)
313 {
314 X0_loc = coordinates[INDEX2(0, nodes[INDEX2(s, e, numNodes)], DIM)];
315 X1_loc = coordinates[INDEX2(1, nodes[INDEX2(s, e, numNodes)], DIM)];
316 X2_loc = coordinates[INDEX2(2, nodes[INDEX2(s, e, numNodes)], DIM)];
317 dXdv00 += X0_loc * DTDV[s][0];
318 dXdv10 += X1_loc * DTDV[s][0];
319 dXdv20 += X2_loc * DTDV[s][0];
320 dXdv01 += X0_loc * DTDV[s][1];
321 dXdv11 += X1_loc * DTDV[s][1];
322 dXdv21 += X2_loc * DTDV[s][1];
323 }
324 m00 = dXdv00 * dXdv00 + dXdv10 * dXdv10 + dXdv20 * dXdv20;
325 m01 = dXdv00 * dXdv01 + dXdv10 * dXdv11 + dXdv20 * dXdv21;
326 m11 = dXdv01 * dXdv01 + dXdv11 * dXdv11 + dXdv21 * dXdv21;
327 D = m00 * m11 - m01 * m01;
328 absD[e] = sqrt(D);
329 if (D == 0.)
330 {
331 sprintf(error_msg, "Dudley_Assemble_jacobeans_3D_M2D: element %d (id %d) has area zero.", e, element_id[e]);
332 Dudley_setError(ZERO_DIVISION_ERROR, error_msg);
333 }
334 else
335 {
336 invD = 1. / D;
337 dvdX00 = (m00 * dXdv00 - m01 * dXdv01) * invD;
338 dvdX01 = (m00 * dXdv10 - m01 * dXdv11) * invD;
339 dvdX02 = (m00 * dXdv20 - m01 * dXdv21) * invD;
340 dvdX10 = (-m01 * dXdv00 + m11 * dXdv01) * invD;
341 dvdX11 = (-m01 * dXdv10 + m11 * dXdv11) * invD;
342 dvdX12 = (-m01 * dXdv20 + m11 * dXdv21) * invD;
343 for (q = 0; q < numQuad; q++)
344 {
345 for (s = 0; s < numTest; s++)
346 {
347 dTdX[INDEX4(s, 0, q, e, numTest, DIM, numQuad)] = DTDV[s][0] * dvdX00 + DTDV[s][1] * dvdX10;
348 dTdX[INDEX4(s, 1, q, e, numTest, DIM, numQuad)] = DTDV[s][0] * dvdX01 + DTDV[s][1] * dvdX11;
349 dTdX[INDEX4(s, 2, q, e, numTest, DIM, numQuad)] = DTDV[s][0] * dvdX02 + DTDV[s][1] * dvdX12;
350 }
351 }
352 }
353 }
354 } /* end parallel section */
355 #undef DIM
356 #undef LOCDIM
357 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26