/[escript]/branches/domexper/dudley/src/Assemble_jacobeans.c
ViewVC logotype

Contents of /branches/domexper/dudley/src/Assemble_jacobeans.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3189 - (show annotations)
Thu Sep 16 05:20:34 2010 UTC (8 years, 9 months ago) by jfenwick
File MIME type: text/plain
File size: 11894 byte(s)
Adding support for selective compiling with loop unroll

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 #include "Assemble.h"
16 #include "Util.h"
17 #ifdef _OPENMP
18 #include <omp.h>
19 #endif
20
21 // Unless the loops in here get complicated again, this file should be compiled with loop unrolling
22
23
24 /* input:
25
26 double* coordinates[DIM*(*)]
27 dim_t numQuad
28 double* QuadWeights[numQuad]
29 dim_t numShape
30 dim_t numElements
31 dim_t numNodes
32 index_t* nodes[numNodes*numElements] where NUMSIDES*numShape<=numNodes
33 double* DSDv[numShape*DIM*numQuad]
34 dim_t numTest
35 double* DTDv[LOCDIM*numTest*numQuad]
36 index_t* element_id[numElements]
37
38 output:
39
40 double* dTdX[DIM*numTest*NUMSIDES*numQuad*numElements]
41 double* volume[numQuad*numElements]
42
43 */
44
45 #include "ShapeTable.h"
46
47 #define SCALING(_nsub_,_dim_) pow(1./(double)(_nsub_),1./(double)(_dim_))
48
49 /**************************************************************/
50 /* */
51 /* Jacobean 2D with area element */
52 /* */
53 void Assemble_jacobeans_2D(double* coordinates, dim_t numQuad, dim_t numElements, dim_t numNodes, index_t* nodes,
54 double* dTdX, double* absD, double* quadweight, index_t* element_id)
55 {
56 #define DIM 2
57 #define LOCDIM 2
58 register int e,q,s;
59 char error_msg[LenErrorMsg_MAX];
60 *quadweight=(numQuad==1)?1./2:1./6; /* numQuad is 1 or 3 */
61 const dim_t numTest=3; // hoping this is used in constant folding
62 #pragma omp parallel
63 {
64 register double dXdv00,dXdv10,dXdv01,dXdv11,
65 dvdX00,dvdX10,dvdX01,dvdX11, D,invD;
66 #pragma omp for private(e,q,s,dXdv00,dXdv10,dXdv01,dXdv11,dvdX00,dvdX10,dvdX01,dvdX11, D,invD,X0_loc, X1_loc) schedule(static)
67 for(e=0;e<numElements;e++)
68 {
69 #define COMPDXDV0(P) coordinates[INDEX2(P,nodes[INDEX2(0,e,numNodes)],DIM)]*(-1)+\
70 coordinates[INDEX2(P,nodes[INDEX2(1,e,numNodes)],DIM)]*1+\
71 coordinates[INDEX2(P,nodes[INDEX2(2,e,numNodes)],DIM)]*(0)
72
73 #define COMPDXDV1(P) coordinates[INDEX2(P,nodes[INDEX2(0,e,numNodes)],DIM)]*(-1)+\
74 coordinates[INDEX2(P,nodes[INDEX2(1,e,numNodes)],DIM)]*(0)+\
75 coordinates[INDEX2(P,nodes[INDEX2(2,e,numNodes)],DIM)]*(1)
76
77 dXdv00=0;
78 dXdv10=0;
79 dXdv01=0;
80 dXdv11=0;
81 dXdv00=COMPDXDV0(0);
82 dXdv10=COMPDXDV0(1);
83 dXdv01=COMPDXDV1(0);
84 dXdv11=COMPDXDV1(1);
85 D = dXdv00*dXdv11 - dXdv01*dXdv10;
86 absD[e]=ABS(D);
87 if (D==0.)
88 {
89 sprintf(error_msg,"Assemble_jacobeans_2D: element %d (id %d) has area zero.",e,element_id[e]);
90 Dudley_setError(ZERO_DIVISION_ERROR,error_msg);
91 }
92 else
93 {
94 invD=1./D;
95 dvdX00= dXdv11*invD;
96 dvdX10=-dXdv10*invD;
97 dvdX01=-dXdv01*invD;
98 dvdX11= dXdv00*invD;
99 if (numQuad==1)
100 {
101 for (s=0;s<3; s++)
102 {
103 #define DTDXSET(P,Q) dTdX[INDEX4(s,P,Q,e,numTest,DIM,numQuad)] = DTDV_2D[s][0]*dvdX0##P+DTDV_2D[s][1]*dvdX1##P
104
105 DTDXSET(0,0);
106 DTDXSET(1,0);
107 }
108 // volume[INDEX2(0,e,1)]=ABS(D)*(*quadweight);
109 }
110 else // numQuad==3
111 {
112 for (q=0;q<numTest;++q) // relying on unroll loops to optimise this
113 {
114 for (s=0;s<3; s++)
115 {
116 DTDXSET(0,q);
117 DTDXSET(1,q);
118 }
119 // volume[INDEX2(q,e,3)]=ABS(D)*(*quadweight);
120 }
121 }
122 }
123 }
124 } // end parallel
125 #undef DIM
126 #undef LOCDIM
127 #undef DTDXSET
128 #undef COMPDXDV0
129 #undef COMPDXDV1
130 }
131 /**************************************************************/
132 /* */
133 /* Jacobean 1D manifold in 2D and 1D elements */
134 /* */
135 void Assemble_jacobeans_2D_M1D_E1D(double* coordinates, dim_t numQuad,
136 dim_t numElements, dim_t numNodes, index_t* nodes,
137 double* dTdX, double* absD, double* quadweight, index_t* element_id)
138 {
139 #define DIM 2
140 #define LOCDIM 1
141 register int e;
142 char error_msg[LenErrorMsg_MAX];
143 const dim_t numTest=2;
144 *quadweight=(numQuad==1)?1.0:0.5;
145 // numQuad is 1 or 2
146 #pragma omp parallel
147 {
148 register double dXdv00,dXdv10,dvdX00,dvdX01,D,invD;
149 #pragma omp for private(e,q,s,dXdv00,dXdv10,dvdX00,dvdX01,D,invD,X0_loc, X1_loc) schedule(static)
150 for(e=0;e<numElements;e++)
151 {
152 dXdv00=0;
153 dXdv10=0;
154 dXdv00+=coordinates[INDEX2(0,nodes[INDEX2(0,e,numNodes)],DIM)]*(-1.)+coordinates[INDEX2(0,nodes[INDEX2(1,e,numNodes)],DIM)];
155 dXdv00+=coordinates[INDEX2(1,nodes[INDEX2(0,e,numNodes)],DIM)]*(-1.)+coordinates[INDEX2(1,nodes[INDEX2(1,e,numNodes)],DIM)];
156 D=dXdv00*dXdv00+dXdv10*dXdv10;
157 if (D==0.)
158 {
159 sprintf(error_msg,"Assemble_jacobeans_2D_M1D_E1D: element %d (id %d) has length zero.",e,element_id[e]);
160 Dudley_setError(ZERO_DIVISION_ERROR,error_msg);
161 } else {
162 invD=1./D;
163 dvdX00=dXdv00*invD;
164 dvdX01=dXdv10*invD;
165 // The number of quad points is 1 or 2
166 dTdX[INDEX4(0,0,0,e,numTest,DIM,numQuad)]=-1*dvdX00;
167 dTdX[INDEX4(0,1,0,e,numTest,DIM,numQuad)]=-1*dvdX01;
168 dTdX[INDEX4(1,0,0,e,numTest,DIM,numQuad)]=-1*dvdX00;
169 dTdX[INDEX4(1,1,0,e,numTest,DIM,numQuad)]=-1*dvdX01;
170 // volume[INDEX2(0,e,numQuad)]=sqrt(D)*(*quadweight);
171 absD[e]=sqrt(D);
172 if (numQuad==2)
173 {
174 dTdX[INDEX4(0,0,1,e,numTest,DIM,numQuad)]=dvdX00;
175 dTdX[INDEX4(0,1,1,e,numTest,DIM,numQuad)]=dvdX01;
176 dTdX[INDEX4(1,0,1,e,numTest,DIM,numQuad)]=dvdX00;
177 dTdX[INDEX4(1,1,1,e,numTest,DIM,numQuad)]=dvdX01;
178 // volume[INDEX2(1,e,numQuad)]=sqrt(D)*(*quadweight);
179 }
180 }
181 }
182 } // end parallel
183 #undef DIM
184 #undef LOCDIM
185 }
186
187 /**************************************************************/
188 /* */
189 /* Jacobean 3D */
190 /* */
191 void Assemble_jacobeans_3D(double* coordinates, dim_t numQuad, dim_t numElements, dim_t numNodes, index_t* nodes,
192 double* dTdX, double* absD, double* quadweight, index_t* element_id)
193 {
194 #define DIM 3
195 #define LOCDIM 3
196 int e,q,s;
197 char error_msg[LenErrorMsg_MAX];
198 // numQuad is 1 or 4
199 const dim_t numShape=4, numTest=4;
200 *quadweight=(numQuad==1)?1./6:1./24;
201
202 #pragma omp parallel
203 {
204 register double dXdv00,dXdv10,dXdv20,dXdv01,dXdv11,dXdv21,dXdv02,dXdv12,dXdv22,
205 dvdX00,dvdX10,dvdX20,dvdX01,dvdX11,dvdX21,dvdX02,dvdX12,dvdX22, D,invD,
206 X0_loc,X1_loc,X2_loc;
207 #pragma omp for private(e,q,s,dXdv00,dXdv10,dXdv20,dXdv01,dXdv11,dXdv21,dXdv02,dXdv12,dXdv22,dvdX00,dvdX10,dvdX20,dvdX01,dvdX11,dvdX21,dvdX02,dvdX12,dvdX22,D,invD,X0_loc,X1_loc,X2_loc) schedule(static)
208 for(e=0;e<numElements;e++)
209 {
210 dXdv00=0;
211 dXdv10=0;
212 dXdv20=0;
213 dXdv01=0;
214 dXdv11=0;
215 dXdv21=0;
216 dXdv02=0;
217 dXdv12=0;
218 dXdv22=0;
219 for (s=0;s<numShape; s++)
220 {
221 X0_loc=coordinates[INDEX2(0,nodes[INDEX2(s,e,numNodes)],DIM)];
222 X1_loc=coordinates[INDEX2(1,nodes[INDEX2(s,e,numNodes)],DIM)];
223 X2_loc=coordinates[INDEX2(2,nodes[INDEX2(s,e,numNodes)],DIM)];
224 dXdv00+=X0_loc*DTDV_3D[s][0];
225 dXdv10+=X1_loc*DTDV_3D[s][0];
226 dXdv20+=X2_loc*DTDV_3D[s][0];
227 dXdv01+=X0_loc*DTDV_3D[s][1];
228 dXdv11+=X1_loc*DTDV_3D[s][1];
229 dXdv21+=X2_loc*DTDV_3D[s][1];
230 dXdv02+=X0_loc*DTDV_3D[s][2];
231 dXdv12+=X1_loc*DTDV_3D[s][2];
232 dXdv22+=X2_loc*DTDV_3D[s][2];
233 }
234 D = dXdv00*(dXdv11*dXdv22-dXdv12*dXdv21)+ dXdv01*(dXdv20*dXdv12-dXdv10*dXdv22)+dXdv02*(dXdv10*dXdv21-dXdv20*dXdv11);
235 absD[e]=ABS(D);
236 if (D==0.)
237 {
238 sprintf(error_msg,"Assemble_jacobeans_3D: element %d (id %d) has volume zero.",e,element_id[e]);
239 Dudley_setError(ZERO_DIVISION_ERROR,error_msg);
240 } else {
241 invD=1./D;
242 dvdX00=(dXdv11*dXdv22-dXdv12*dXdv21)*invD;
243 dvdX10=(dXdv20*dXdv12-dXdv10*dXdv22)*invD;
244 dvdX20=(dXdv10*dXdv21-dXdv20*dXdv11)*invD;
245 dvdX01=(dXdv02*dXdv21-dXdv01*dXdv22)*invD;
246 dvdX11=(dXdv00*dXdv22-dXdv20*dXdv02)*invD;
247 dvdX21=(dXdv01*dXdv20-dXdv00*dXdv21)*invD;
248 dvdX02=(dXdv01*dXdv12-dXdv02*dXdv11)*invD;
249 dvdX12=(dXdv02*dXdv10-dXdv00*dXdv12)*invD;
250 dvdX22=(dXdv00*dXdv11-dXdv01*dXdv10)*invD;
251 for (q=0;q<numQuad;q++)
252 {
253 for (s=0;s<numTest; s++)
254 {
255 dTdX[INDEX4(s,0,q,e,numTest,DIM,numQuad)]=DTDV_3D[s][0]*dvdX00+DTDV_3D[s][1]*dvdX10+DTDV_3D[s][2]*dvdX20;
256 dTdX[INDEX4(s,1,q,e,numTest,DIM,numQuad)]=DTDV_3D[s][0]*dvdX01+DTDV_3D[s][1]*dvdX11+DTDV_3D[s][2]*dvdX21;
257 dTdX[INDEX4(s,2,q,e,numTest,DIM,numQuad)]=DTDV_3D[s][0]*dvdX02+DTDV_3D[s][1]*dvdX12+DTDV_3D[s][2]*dvdX22;
258 }
259 // volume[INDEX2(q,e,numQuad)]=ABS(D)*(*quadweight);
260 }
261 }
262 }
263 } // end parallel
264 #undef DIM
265 #undef LOCDIM
266 }
267
268 /**************************************************************/
269 /* */
270 /* Jacobean 2D manifold in 3D with 2D elements */
271 /* */
272 void Assemble_jacobeans_3D_M2D_E2D(double* coordinates, dim_t numQuad, dim_t numElements, dim_t numNodes, index_t* nodes,
273 double* dTdX, double* absD, double* quadweight, index_t* element_id)
274 {
275 #define DIM 3
276 #define LOCDIM 2
277 register int e,q,s;
278 char error_msg[LenErrorMsg_MAX];
279 // numQuad is 1 or 3
280 *quadweight=(numQuad==1)?1./2:1./6;
281 const double DTDV[3][2]={{-1.,-1.},{1.,0.},{0.,1.}};
282 const dim_t numShape=3, numTest=3;
283 #pragma omp parallel
284 {
285 register double dXdv00,dXdv10,dXdv20,dXdv01,dXdv11,dXdv21,m00,m01,m11,
286 dvdX00,dvdX01,dvdX02,dvdX10,dvdX11,dvdX12,D,invD,
287 X0_loc, X1_loc, X2_loc;
288 #pragma omp for private(e,q,s,dXdv00,dXdv10,dXdv20,dXdv01,dXdv11,dXdv21,m00,m01,m11,dvdX00,dvdX01,dvdX02,dvdX10,dvdX11,dvdX12,D,invD, X0_loc, X1_loc, X2_loc) schedule(static)
289 for(e=0;e<numElements;e++)
290 {
291 dXdv00=0;
292 dXdv10=0;
293 dXdv20=0;
294 dXdv01=0;
295 dXdv11=0;
296 dXdv21=0;
297 for (s=0;s<numShape; s++)
298 {
299 X0_loc=coordinates[INDEX2(0,nodes[INDEX2(s,e,numNodes)],DIM)];
300 X1_loc=coordinates[INDEX2(1,nodes[INDEX2(s,e,numNodes)],DIM)];
301 X2_loc=coordinates[INDEX2(2,nodes[INDEX2(s,e,numNodes)],DIM)];
302 dXdv00+=X0_loc*DTDV[s][0];
303 dXdv10+=X1_loc*DTDV[s][0];
304 dXdv20+=X2_loc*DTDV[s][0];
305 dXdv01+=X0_loc*DTDV[s][1];
306 dXdv11+=X1_loc*DTDV[s][1];
307 dXdv21+=X2_loc*DTDV[s][1];
308 }
309 m00=dXdv00*dXdv00+dXdv10*dXdv10+dXdv20*dXdv20;
310 m01=dXdv00*dXdv01+dXdv10*dXdv11+dXdv20*dXdv21;
311 m11=dXdv01*dXdv01+dXdv11*dXdv11+dXdv21*dXdv21;
312 D=m00*m11-m01*m01;
313 absD[e]=sqrt(D);
314 if (D==0.)
315 {
316 sprintf(error_msg,"Assemble_jacobeans_3D_M2D: element %d (id %d) has area zero.",e,element_id[e]);
317 Dudley_setError(ZERO_DIVISION_ERROR,error_msg);
318 } else {
319 invD=1./D;
320 dvdX00=( m00*dXdv00-m01*dXdv01)*invD;
321 dvdX01=( m00*dXdv10-m01*dXdv11)*invD;
322 dvdX02=( m00*dXdv20-m01*dXdv21)*invD;
323 dvdX10=(-m01*dXdv00+m11*dXdv01)*invD;
324 dvdX11=(-m01*dXdv10+m11*dXdv11)*invD;
325 dvdX12=(-m01*dXdv20+m11*dXdv21)*invD;
326 for (q=0;q<numQuad;q++)
327 {
328 for (s=0;s<numTest; s++)
329 {
330 dTdX[INDEX4(s,0,q,e,numTest,DIM,numQuad)]=DTDV[s][0]*dvdX00+DTDV[s][1]*dvdX10;
331 dTdX[INDEX4(s,1,q,e,numTest,DIM,numQuad)]=DTDV[s][0]*dvdX01+DTDV[s][1]*dvdX11;
332 dTdX[INDEX4(s,2,q,e,numTest,DIM,numQuad)]=DTDV[s][0]*dvdX02+DTDV[s][1]*dvdX12;
333 }
334 // volume[INDEX2(q,e,numQuad)]=sqrt(D)*(*quadweight);
335 }
336 }
337 }
338 } // end parallel section
339 #undef DIM
340 #undef LOCDIM
341 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26