/[escript]/trunk/dudley/src/Assemble_jacobeans.c
ViewVC logotype

Contents of /trunk/dudley/src/Assemble_jacobeans.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4154 - (show annotations)
Tue Jan 22 09:30:23 2013 UTC (6 years, 6 months ago) by jfenwick
File MIME type: text/plain
File size: 13333 byte(s)
Round 1 of copyright fixes
1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15
16 #include "Assemble.h"
17 #include "Util.h"
18 #ifdef _OPENMP
19 #include <omp.h>
20 #endif
21
22 /* Unless the loops in here get complicated again, this file should be compiled with loop unrolling */
23
24 /* input:
25
26 double* coordinates[DIM*(*)]
27 dim_t numQuad
28 double* QuadWeights[numQuad]
29 dim_t numShape
30 dim_t numElements
31 dim_t numNodes
32 index_t* nodes[numNodes*numElements] where NUMSIDES*numShape<=numNodes
33 double* DSDv[numShape*DIM*numQuad]
34 dim_t numTest
35 double* DTDv[LOCDIM*numTest*numQuad]
36 index_t* element_id[numElements]
37
38 output:
39
40 double* dTdX[DIM*numTest*NUMSIDES*numQuad*numElements]
41 double* volume[numQuad*numElements]
42
43 */
44
45 #include "ShapeTable.h"
46
47 #define SCALING(_nsub_,_dim_) pow(1./(double)(_nsub_),1./(double)(_dim_))
48
49 /************************************************************************************/
50 /* */
51 /* Jacobean 2D with area element */
52 /* */
53 void Dudley_Assemble_jacobeans_2D(double *coordinates, dim_t numQuad, dim_t numElements, dim_t numNodes, index_t * nodes,
54 double *dTdX, double *absD, double *quadweight, index_t * element_id)
55 {
56 #define DIM 2
57 #define LOCDIM 2
58 register int e, q;
59 char error_msg[LenErrorMsg_MAX];
60 const dim_t numTest = 3; /* hoping this is used in constant folding */
61 *quadweight = (numQuad == 1) ? 1. / 2 : 1. / 6; /* numQuad is 1 or 3 */
62 #pragma omp parallel
63 {
64 register double dXdv00, dXdv10, dXdv01, dXdv11, dvdX00, dvdX10, dvdX01, dvdX11, D, invD;
65 #pragma omp for private(e,q, dXdv00,dXdv10,dXdv01,dXdv11,dvdX00,dvdX10,dvdX01,dvdX11, D,invD) schedule(static)
66 for (e = 0; e < numElements; e++)
67 {
68 #define COMPDXDV0(P) coordinates[INDEX2(P,nodes[INDEX2(0,e,numNodes)],DIM)]*(-1)+\
69 coordinates[INDEX2(P,nodes[INDEX2(1,e,numNodes)],DIM)]*1+\
70 coordinates[INDEX2(P,nodes[INDEX2(2,e,numNodes)],DIM)]*(0)
71
72 #define COMPDXDV1(P) coordinates[INDEX2(P,nodes[INDEX2(0,e,numNodes)],DIM)]*(-1)+\
73 coordinates[INDEX2(P,nodes[INDEX2(1,e,numNodes)],DIM)]*(0)+\
74 coordinates[INDEX2(P,nodes[INDEX2(2,e,numNodes)],DIM)]*(1)
75
76 dXdv00 = 0;
77 dXdv10 = 0;
78 dXdv01 = 0;
79 dXdv11 = 0;
80 dXdv00 = COMPDXDV0(0);
81 dXdv10 = COMPDXDV0(1);
82 dXdv01 = COMPDXDV1(0);
83 dXdv11 = COMPDXDV1(1);
84 D = dXdv00 * dXdv11 - dXdv01 * dXdv10;
85 absD[e] = ABS(D);
86 if (D == 0.)
87 {
88 sprintf(error_msg, "Dudley_Assemble_jacobeans_2D: element %d (id %d) has area zero.", e, element_id[e]);
89 Dudley_setError(ZERO_DIVISION_ERROR, error_msg);
90 }
91 else
92 {
93 invD = 1. / D;
94 dvdX00 = dXdv11 * invD;
95 dvdX10 = -dXdv10 * invD;
96 dvdX01 = -dXdv01 * invD;
97 dvdX11 = dXdv00 * invD;
98 if (numQuad == 1)
99 {
100 dTdX[INDEX4(0, 0, 0, e, numTest, DIM, numQuad)] = DTDV_2D[0][0] * dvdX00 + DTDV_2D[1][1] * dvdX10;
101 dTdX[INDEX4(1, 0, 0, e, numTest, DIM, numQuad)] = DTDV_2D[0][1] * dvdX00 + DTDV_2D[1][0] * dvdX10;
102 dTdX[INDEX4(2, 0, 0, e, numTest, DIM, numQuad)] = DTDV_2D[2][0] * dvdX00 + DTDV_2D[2][1] * dvdX10;
103
104 dTdX[INDEX4(0, 1, 0, e, numTest, DIM, numQuad)] = DTDV_2D[0][0] * dvdX01 + DTDV_2D[1][1] * dvdX11;
105 dTdX[INDEX4(1, 1, 0, e, numTest, DIM, numQuad)] = DTDV_2D[0][1] * dvdX01 + DTDV_2D[1][0] * dvdX11;
106 dTdX[INDEX4(2, 1, 0, e, numTest, DIM, numQuad)] = DTDV_2D[2][0] * dvdX01 + DTDV_2D[2][1] * dvdX11;
107
108 }
109 else /* numQuad==3 */
110 {
111 for (q = 0; q < numTest; ++q) /* relying on unroll loops to optimise this */
112 {
113 dTdX[INDEX4(0, 0, q, e, numTest, DIM, numQuad)] =
114 DTDV_2D[0][0] * dvdX00 + DTDV_2D[1][1] * dvdX10;
115 dTdX[INDEX4(1, 0, q, e, numTest, DIM, numQuad)] =
116 DTDV_2D[0][1] * dvdX00 + DTDV_2D[1][0] * dvdX10;
117 dTdX[INDEX4(2, 0, q, e, numTest, DIM, numQuad)] =
118 DTDV_2D[2][0] * dvdX00 + DTDV_2D[2][1] * dvdX10;
119
120 dTdX[INDEX4(0, 1, q, e, numTest, DIM, numQuad)] =
121 DTDV_2D[0][0] * dvdX01 + DTDV_2D[1][1] * dvdX11;
122 dTdX[INDEX4(1, 1, q, e, numTest, DIM, numQuad)] =
123 DTDV_2D[0][1] * dvdX01 + DTDV_2D[1][0] * dvdX11;
124 dTdX[INDEX4(2, 1, q, e, numTest, DIM, numQuad)] =
125 DTDV_2D[2][0] * dvdX01 + DTDV_2D[2][1] * dvdX11;
126
127 }
128 }
129 }
130 }
131 } /* end parallel */
132 #undef DIM
133 #undef LOCDIM
134 #undef DTDXSET
135 #undef COMPDXDV0
136 #undef COMPDXDV1
137 }
138
139 /************************************************************************************/
140 /* */
141 /* Jacobean 1D manifold in 2D and 1D elements */
142 /* */
143 void Dudley_Assemble_jacobeans_2D_M1D_E1D(double *coordinates, dim_t numQuad,
144 dim_t numElements, dim_t numNodes, index_t * nodes,
145 double *dTdX, double *absD, double *quadweight, index_t * element_id)
146 {
147 #define DIM 2
148 #define LOCDIM 1
149 register int e;
150 char error_msg[LenErrorMsg_MAX];
151 const dim_t numTest = 2;
152 *quadweight = (numQuad == 1) ? 1.0 : 0.5;
153 /* numQuad is 1 or 2 */
154 #pragma omp parallel
155 {
156 register double dXdv00, dXdv10, dvdX00, dvdX01, D, invD;
157 #pragma omp for private(e,dXdv00,dXdv10,dvdX00,dvdX01,D,invD) schedule(static)
158 for (e = 0; e < numElements; e++)
159 {
160 dXdv00 = 0;
161 dXdv10 = 0;
162 dXdv00 +=
163 coordinates[INDEX2(0, nodes[INDEX2(0, e, numNodes)], DIM)] * (-1.) +
164 coordinates[INDEX2(0, nodes[INDEX2(1, e, numNodes)], DIM)];
165 dXdv00 +=
166 coordinates[INDEX2(1, nodes[INDEX2(0, e, numNodes)], DIM)] * (-1.) +
167 coordinates[INDEX2(1, nodes[INDEX2(1, e, numNodes)], DIM)];
168 D = dXdv00 * dXdv00 + dXdv10 * dXdv10;
169 if (D == 0.)
170 {
171 sprintf(error_msg, "Dudley_Assemble_jacobeans_2D_M1D_E1D: element %d (id %d) has length zero.", e,
172 element_id[e]);
173 Dudley_setError(ZERO_DIVISION_ERROR, error_msg);
174 }
175 else
176 {
177 invD = 1. / D;
178 dvdX00 = dXdv00 * invD;
179 dvdX01 = dXdv10 * invD;
180 /* The number of quad points is 1 or 2 */
181 dTdX[INDEX4(0, 0, 0, e, numTest, DIM, numQuad)] = -1 * dvdX00;
182 dTdX[INDEX4(0, 1, 0, e, numTest, DIM, numQuad)] = -1 * dvdX01;
183 dTdX[INDEX4(1, 0, 0, e, numTest, DIM, numQuad)] = -1 * dvdX00;
184 dTdX[INDEX4(1, 1, 0, e, numTest, DIM, numQuad)] = -1 * dvdX01;
185 absD[e] = sqrt(D);
186 if (numQuad == 2)
187 {
188 dTdX[INDEX4(0, 0, 1, e, numTest, DIM, numQuad)] = dvdX00;
189 dTdX[INDEX4(0, 1, 1, e, numTest, DIM, numQuad)] = dvdX01;
190 dTdX[INDEX4(1, 0, 1, e, numTest, DIM, numQuad)] = dvdX00;
191 dTdX[INDEX4(1, 1, 1, e, numTest, DIM, numQuad)] = dvdX01;
192 }
193 }
194 }
195 } /* end parallel */
196 #undef DIM
197 #undef LOCDIM
198 }
199
200 /************************************************************************************/
201 /* */
202 /* Jacobean 3D */
203 /* */
204 void Dudley_Assemble_jacobeans_3D(double *coordinates, dim_t numQuad, dim_t numElements, dim_t numNodes, index_t * nodes,
205 double *dTdX, double *absD, double *quadweight, index_t * element_id)
206 {
207 #define DIM 3
208 #define LOCDIM 3
209 int e, q, s;
210 char error_msg[LenErrorMsg_MAX];
211 /* numQuad is 1 or 4 */
212 const dim_t numShape = 4, numTest = 4;
213 *quadweight = (numQuad == 1) ? 1. / 6 : 1. / 24;
214
215 #pragma omp parallel
216 {
217 register double dXdv00, dXdv10, dXdv20, dXdv01, dXdv11, dXdv21, dXdv02, dXdv12, dXdv22,
218 dvdX00, dvdX10, dvdX20, dvdX01, dvdX11, dvdX21, dvdX02, dvdX12, dvdX22, D, invD, X0_loc, X1_loc, X2_loc;
219 #pragma omp for private(e,q,s,dXdv00,dXdv10,dXdv20,dXdv01,dXdv11,dXdv21,dXdv02,dXdv12,dXdv22,dvdX00,dvdX10,dvdX20,dvdX01,dvdX11,dvdX21,dvdX02,dvdX12,dvdX22,D,invD,X0_loc,X1_loc,X2_loc) schedule(static)
220 for (e = 0; e < numElements; e++)
221 {
222 dXdv00 = 0;
223 dXdv10 = 0;
224 dXdv20 = 0;
225 dXdv01 = 0;
226 dXdv11 = 0;
227 dXdv21 = 0;
228 dXdv02 = 0;
229 dXdv12 = 0;
230 dXdv22 = 0;
231 for (s = 0; s < numShape; s++)
232 {
233 X0_loc = coordinates[INDEX2(0, nodes[INDEX2(s, e, numNodes)], DIM)];
234 X1_loc = coordinates[INDEX2(1, nodes[INDEX2(s, e, numNodes)], DIM)];
235 X2_loc = coordinates[INDEX2(2, nodes[INDEX2(s, e, numNodes)], DIM)];
236 dXdv00 += X0_loc * DTDV_3D[s][0];
237 dXdv10 += X1_loc * DTDV_3D[s][0];
238 dXdv20 += X2_loc * DTDV_3D[s][0];
239 dXdv01 += X0_loc * DTDV_3D[s][1];
240 dXdv11 += X1_loc * DTDV_3D[s][1];
241 dXdv21 += X2_loc * DTDV_3D[s][1];
242 dXdv02 += X0_loc * DTDV_3D[s][2];
243 dXdv12 += X1_loc * DTDV_3D[s][2];
244 dXdv22 += X2_loc * DTDV_3D[s][2];
245 }
246 D = dXdv00 * (dXdv11 * dXdv22 - dXdv12 * dXdv21) + dXdv01 * (dXdv20 * dXdv12 - dXdv10 * dXdv22) +
247 dXdv02 * (dXdv10 * dXdv21 - dXdv20 * dXdv11);
248 absD[e] = ABS(D);
249 if (D == 0.)
250 {
251 sprintf(error_msg, "Dudley_Assemble_jacobeans_3D: element %d (id %d) has volume zero.", e, element_id[e]);
252 Dudley_setError(ZERO_DIVISION_ERROR, error_msg);
253 }
254 else
255 {
256 invD = 1. / D;
257 dvdX00 = (dXdv11 * dXdv22 - dXdv12 * dXdv21) * invD;
258 dvdX10 = (dXdv20 * dXdv12 - dXdv10 * dXdv22) * invD;
259 dvdX20 = (dXdv10 * dXdv21 - dXdv20 * dXdv11) * invD;
260 dvdX01 = (dXdv02 * dXdv21 - dXdv01 * dXdv22) * invD;
261 dvdX11 = (dXdv00 * dXdv22 - dXdv20 * dXdv02) * invD;
262 dvdX21 = (dXdv01 * dXdv20 - dXdv00 * dXdv21) * invD;
263 dvdX02 = (dXdv01 * dXdv12 - dXdv02 * dXdv11) * invD;
264 dvdX12 = (dXdv02 * dXdv10 - dXdv00 * dXdv12) * invD;
265 dvdX22 = (dXdv00 * dXdv11 - dXdv01 * dXdv10) * invD;
266 for (q = 0; q < numQuad; q++)
267 {
268 for (s = 0; s < numTest; s++)
269 {
270 dTdX[INDEX4(s, 0, q, e, numTest, DIM, numQuad)] =
271 DTDV_3D[s][0] * dvdX00 + DTDV_3D[s][1] * dvdX10 + DTDV_3D[s][2] * dvdX20;
272 dTdX[INDEX4(s, 1, q, e, numTest, DIM, numQuad)] =
273 DTDV_3D[s][0] * dvdX01 + DTDV_3D[s][1] * dvdX11 + DTDV_3D[s][2] * dvdX21;
274 dTdX[INDEX4(s, 2, q, e, numTest, DIM, numQuad)] =
275 DTDV_3D[s][0] * dvdX02 + DTDV_3D[s][1] * dvdX12 + DTDV_3D[s][2] * dvdX22;
276 }
277 }
278 }
279 }
280 } /* end parallel */
281 #undef DIM
282 #undef LOCDIM
283 }
284
285 /************************************************************************************/
286 /* */
287 /* Jacobean 2D manifold in 3D with 2D elements */
288 /* */
289 void Dudley_Assemble_jacobeans_3D_M2D_E2D(double *coordinates, dim_t numQuad, dim_t numElements, dim_t numNodes,
290 index_t * nodes, double *dTdX, double *absD, double *quadweight,
291 index_t * element_id)
292 {
293 #define DIM 3
294 #define LOCDIM 2
295 register int e, q, s;
296 char error_msg[LenErrorMsg_MAX];
297 const double DTDV[3][2] = { {-1., -1.}, {1., 0.}, {0., 1.} };
298 const dim_t numShape = 3, numTest = 3;
299 /* numQuad is 1 or 3 */
300 *quadweight = (numQuad == 1) ? 1. / 2 : 1. / 6;
301 #pragma omp parallel
302 {
303 register double dXdv00, dXdv10, dXdv20, dXdv01, dXdv11, dXdv21, m00, m01, m11,
304 dvdX00, dvdX01, dvdX02, dvdX10, dvdX11, dvdX12, D, invD, X0_loc, X1_loc, X2_loc;
305 #pragma omp for private(e,q,s,dXdv00,dXdv10,dXdv20,dXdv01,dXdv11,dXdv21,m00,m01,m11,dvdX00,dvdX01,dvdX02,dvdX10,dvdX11,dvdX12,D,invD, X0_loc, X1_loc, X2_loc) schedule(static)
306 for (e = 0; e < numElements; e++)
307 {
308 dXdv00 = 0;
309 dXdv10 = 0;
310 dXdv20 = 0;
311 dXdv01 = 0;
312 dXdv11 = 0;
313 dXdv21 = 0;
314 for (s = 0; s < numShape; s++)
315 {
316 X0_loc = coordinates[INDEX2(0, nodes[INDEX2(s, e, numNodes)], DIM)];
317 X1_loc = coordinates[INDEX2(1, nodes[INDEX2(s, e, numNodes)], DIM)];
318 X2_loc = coordinates[INDEX2(2, nodes[INDEX2(s, e, numNodes)], DIM)];
319 dXdv00 += X0_loc * DTDV[s][0];
320 dXdv10 += X1_loc * DTDV[s][0];
321 dXdv20 += X2_loc * DTDV[s][0];
322 dXdv01 += X0_loc * DTDV[s][1];
323 dXdv11 += X1_loc * DTDV[s][1];
324 dXdv21 += X2_loc * DTDV[s][1];
325 }
326 m00 = dXdv00 * dXdv00 + dXdv10 * dXdv10 + dXdv20 * dXdv20;
327 m01 = dXdv00 * dXdv01 + dXdv10 * dXdv11 + dXdv20 * dXdv21;
328 m11 = dXdv01 * dXdv01 + dXdv11 * dXdv11 + dXdv21 * dXdv21;
329 D = m00 * m11 - m01 * m01;
330 absD[e] = sqrt(D);
331 if (D == 0.)
332 {
333 sprintf(error_msg, "Dudley_Assemble_jacobeans_3D_M2D: element %d (id %d) has area zero.", e, element_id[e]);
334 Dudley_setError(ZERO_DIVISION_ERROR, error_msg);
335 }
336 else
337 {
338 invD = 1. / D;
339 dvdX00 = (m00 * dXdv00 - m01 * dXdv01) * invD;
340 dvdX01 = (m00 * dXdv10 - m01 * dXdv11) * invD;
341 dvdX02 = (m00 * dXdv20 - m01 * dXdv21) * invD;
342 dvdX10 = (-m01 * dXdv00 + m11 * dXdv01) * invD;
343 dvdX11 = (-m01 * dXdv10 + m11 * dXdv11) * invD;
344 dvdX12 = (-m01 * dXdv20 + m11 * dXdv21) * invD;
345 for (q = 0; q < numQuad; q++)
346 {
347 for (s = 0; s < numTest; s++)
348 {
349 dTdX[INDEX4(s, 0, q, e, numTest, DIM, numQuad)] = DTDV[s][0] * dvdX00 + DTDV[s][1] * dvdX10;
350 dTdX[INDEX4(s, 1, q, e, numTest, DIM, numQuad)] = DTDV[s][0] * dvdX01 + DTDV[s][1] * dvdX11;
351 dTdX[INDEX4(s, 2, q, e, numTest, DIM, numQuad)] = DTDV[s][0] * dvdX02 + DTDV[s][1] * dvdX12;
352 }
353 }
354 }
355 }
356 } /* end parallel section */
357 #undef DIM
358 #undef LOCDIM
359 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26