Parent Directory
|
Revision Log
|
Patch
revision 3691 by caltinay, Wed Nov 23 23:07:37 2011 UTC | revision 3760 by caltinay, Mon Jan 9 05:21:18 2012 UTC | |
---|---|---|
# | Line 13 | Line 13 |
13 | ||
14 | #include <ripley/Rectangle.h> | #include <ripley/Rectangle.h> |
15 | extern "C" { | extern "C" { |
16 | #include "paso/SystemMatrixPattern.h" | #include "paso/SystemMatrix.h" |
17 | } | } |
18 | ||
19 | #if USE_SILO | #if USE_SILO |
# | Line 43 Rectangle::Rectangle(int n0, int n1, dou | Line 43 Rectangle::Rectangle(int n0, int n1, dou |
43 | if (m_NX*m_NY != m_mpiInfo->size) | if (m_NX*m_NY != m_mpiInfo->size) |
44 | throw RipleyException("Invalid number of spatial subdivisions"); | throw RipleyException("Invalid number of spatial subdivisions"); |
45 | ||
46 | if (n0%m_NX > 0 || n1%m_NY > 0) | if ((n0+1)%m_NX > 0 || (n1+1)%m_NY > 0) |
47 | throw RipleyException("Number of elements must be separable into number of ranks in each dimension"); | throw RipleyException("Number of elements+1 must be separable into number of ranks in each dimension"); |
48 | ||
49 | // local number of elements | if ((m_NX > 1 && (n0+1)/m_NX<2) || (m_NY > 1 && (n1+1)/m_NY<2)) |
50 | m_NE0 = n0/m_NX; | throw RipleyException("Too few elements for the number of ranks"); |
51 | m_NE1 = n1/m_NY; | |
52 | // local number of nodes (not necessarily owned) | // local number of elements (including overlap) |
53 | m_NE0 = (m_NX>1 ? (n0+1)/m_NX : n0); | |
54 | if (m_mpiInfo->rank%m_NX>0 && m_mpiInfo->rank%m_NX<m_NX-1) | |
55 | m_NE0++; | |
56 | m_NE1 = (m_NY>1 ? (n1+1)/m_NY : n1); | |
57 | if (m_mpiInfo->rank/m_NX>0 && m_mpiInfo->rank/m_NX<m_NY-1) | |
58 | m_NE1++; | |
59 | ||
60 | // local number of nodes | |
61 | m_N0 = m_NE0+1; | m_N0 = m_NE0+1; |
62 | m_N1 = m_NE1+1; | m_N1 = m_NE1+1; |
63 | ||
64 | // bottom-left node is at (offset0,offset1) in global mesh | // bottom-left node is at (offset0,offset1) in global mesh |
65 | m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX); | m_offset0 = (n0+1)/m_NX*(m_mpiInfo->rank%m_NX); |
66 | m_offset1 = m_NE1*(m_mpiInfo->rank/m_NX); | if (m_offset0 > 0) |
67 | m_offset0--; | |
68 | m_offset1 = (n1+1)/m_NY*(m_mpiInfo->rank/m_NX); | |
69 | if (m_offset1 > 0) | |
70 | m_offset1--; | |
71 | ||
72 | populateSampleIds(); | populateSampleIds(); |
73 | createPattern(); | |
74 | } | } |
75 | ||
76 | Rectangle::~Rectangle() | Rectangle::~Rectangle() |
# | Line 69 string Rectangle::getDescription() const | Line 84 string Rectangle::getDescription() const |
84 | ||
85 | bool Rectangle::operator==(const AbstractDomain& other) const | bool Rectangle::operator==(const AbstractDomain& other) const |
86 | { | { |
87 | if (dynamic_cast<const Rectangle*>(&other)) | const Rectangle* o=dynamic_cast<const Rectangle*>(&other); |
88 | return this==&other; | if (o) { |
89 | return (RipleyDomain::operator==(other) && | |
90 | m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1 | |
91 | && m_l0==o->m_l0 && m_l1==o->m_l1 | |
92 | && m_NX==o->m_NX && m_NY==o->m_NY); | |
93 | } | |
94 | ||
95 | return false; | return false; |
96 | } | } |
# | Line 138 void Rectangle::dump(const string& fileN | Line 158 void Rectangle::dump(const string& fileN |
158 | boost::scoped_ptr<double> x(new double[m_N0]); | boost::scoped_ptr<double> x(new double[m_N0]); |
159 | boost::scoped_ptr<double> y(new double[m_N1]); | boost::scoped_ptr<double> y(new double[m_N1]); |
160 | double* coords[2] = { x.get(), y.get() }; | double* coords[2] = { x.get(), y.get() }; |
161 | pair<double,double> xdx = getFirstCoordAndSpacing(0); | |
162 | pair<double,double> ydy = getFirstCoordAndSpacing(1); | |
163 | #pragma omp parallel | #pragma omp parallel |
164 | { | { |
165 | #pragma omp for | #pragma omp for nowait |
166 | for (dim_t i0 = 0; i0 < m_N0; i0++) { | for (dim_t i0 = 0; i0 < m_N0; i0++) { |
167 | coords[0][i0]=(m_l0*(i0+m_offset0))/m_gNE0; | coords[0][i0]=xdx.first+i0*xdx.second; |
168 | } | } |
169 | #pragma omp for | #pragma omp for nowait |
170 | for (dim_t i1 = 0; i1 < m_N1; i1++) { | for (dim_t i1 = 0; i1 < m_N1; i1++) { |
171 | coords[1][i1]=(m_l1*(i1+m_offset1))/m_gNE1; | coords[1][i1]=ydy.first+i1*ydy.second; |
172 | } | } |
173 | } | } |
174 | int dims[] = { m_N0, m_N1 }; | IndexVector dims = getNumNodesPerDim(); |
175 | DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 2, DB_DOUBLE, | |
176 | // write mesh | |
177 | DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE, | |
178 | DB_COLLINEAR, NULL); | DB_COLLINEAR, NULL); |
179 | ||
180 | // write node ids | |
181 | DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2, | |
182 | NULL, 0, DB_INT, DB_NODECENT, NULL); | |
183 | ||
184 | // write element ids | |
185 | dims = getNumElementsPerDim(); | |
186 | DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0], | |
187 | &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL); | |
188 | ||
189 | // rank 0 writes multimesh and multivar | |
190 | if (m_mpiInfo->rank == 0) { | if (m_mpiInfo->rank == 0) { |
191 | vector<string> tempstrings; | vector<string> tempstrings; |
192 | vector<char*> meshnames; | vector<char*> names; |
193 | for (dim_t i=0; i<m_mpiInfo->size; i++) { | for (dim_t i=0; i<m_mpiInfo->size; i++) { |
194 | stringstream path; | stringstream path; |
195 | path << "/block" << setw(4) << setfill('0') << right << i << "/mesh"; | path << "/block" << setw(4) << setfill('0') << right << i << "/mesh"; |
196 | tempstrings.push_back(path.str()); | tempstrings.push_back(path.str()); |
197 | meshnames.push_back((char*)tempstrings.back().c_str()); | names.push_back((char*)tempstrings.back().c_str()); |
198 | } | } |
199 | vector<int> meshtypes(m_mpiInfo->size, DB_QUAD_RECT); | vector<int> types(m_mpiInfo->size, DB_QUAD_RECT); |
200 | DBSetDir(dbfile, "/"); | DBSetDir(dbfile, "/"); |
201 | DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &meshnames[0], | DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0], |
202 | &meshtypes[0], NULL); | &types[0], NULL); |
203 | tempstrings.clear(); | |
204 | names.clear(); | |
205 | for (dim_t i=0; i<m_mpiInfo->size; i++) { | |
206 | stringstream path; | |
207 | path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId"; | |
208 | tempstrings.push_back(path.str()); | |
209 | names.push_back((char*)tempstrings.back().c_str()); | |
210 | } | |
211 | types.assign(m_mpiInfo->size, DB_QUADVAR); | |
212 | DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0], | |
213 | &types[0], NULL); | |
214 | tempstrings.clear(); | |
215 | names.clear(); | |
216 | for (dim_t i=0; i<m_mpiInfo->size; i++) { | |
217 | stringstream path; | |
218 | path << "/block" << setw(4) << setfill('0') << right << i << "/elementId"; | |
219 | tempstrings.push_back(path.str()); | |
220 | names.push_back((char*)tempstrings.back().c_str()); | |
221 | } | |
222 | DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0], | |
223 | &types[0], NULL); | |
224 | } | } |
225 | ||
226 | if (m_mpiInfo->size > 1) { | if (m_mpiInfo->size > 1) { |
# | Line 182 void Rectangle::dump(const string& fileN | Line 237 void Rectangle::dump(const string& fileN |
237 | #endif | #endif |
238 | } | } |
239 | ||
240 | const int* Rectangle::borrowSampleReferenceIDs(int functionSpaceType) const | const int* Rectangle::borrowSampleReferenceIDs(int fsType) const |
241 | { | { |
242 | switch (functionSpaceType) { | switch (fsType) { |
243 | case Nodes: | case Nodes: |
244 | case ReducedNodes: //FIXME: reduced | |
245 | return &m_nodeId[0]; | return &m_nodeId[0]; |
246 | case DegreesOfFreedom: | |
247 | case ReducedDegreesOfFreedom: //FIXME: reduced | |
248 | return &m_dofId[0]; | |
249 | case Elements: | case Elements: |
250 | case ReducedElements: | |
251 | return &m_elementId[0]; | return &m_elementId[0]; |
252 | case FaceElements: | case FaceElements: |
253 | case ReducedFaceElements: | |
254 | return &m_faceId[0]; | return &m_faceId[0]; |
255 | default: | default: |
256 | break; | break; |
# | Line 197 const int* Rectangle::borrowSampleRefere | Line 258 const int* Rectangle::borrowSampleRefere |
258 | ||
259 | stringstream msg; | stringstream msg; |
260 | msg << "borrowSampleReferenceIDs() not implemented for function space type " | msg << "borrowSampleReferenceIDs() not implemented for function space type " |
261 | << functionSpaceType; | << functionSpaceTypeAsString(fsType); |
262 | throw RipleyException(msg.str()); | throw RipleyException(msg.str()); |
263 | } | } |
264 | ||
265 | bool Rectangle::ownSample(int fsCode, index_t id) const | bool Rectangle::ownSample(int fsType, index_t id) const |
266 | { | { |
267 | #ifdef ESYS_MPI | if (getMPISize()==1) |
268 | if (fsCode == Nodes) { | return true; |
269 | const index_t myFirst=getNumNodes()*m_mpiInfo->rank; | |
270 | const index_t myLast=getNumNodes()*(m_mpiInfo->rank+1)-1; | switch (fsType) { |
271 | return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast); | case Nodes: |
272 | } else | case ReducedNodes: //FIXME: reduced |
273 | throw RipleyException("ownSample() only implemented for Nodes"); | return (m_dofMap[id] < getNumDOF()); |
274 | #else | case DegreesOfFreedom: |
275 | return true; | case ReducedDegreesOfFreedom: |
276 | #endif | return true; |
277 | case Elements: | |
278 | case ReducedElements: | |
279 | // check ownership of element's bottom left node | |
280 | return (m_dofMap[id%m_NE0+m_N0*(id/m_NE0)] < getNumDOF()); | |
281 | case FaceElements: | |
282 | case ReducedFaceElements: | |
283 | { | |
284 | // check ownership of face element's first node | |
285 | const IndexVector faces = getNumFacesPerBoundary(); | |
286 | dim_t n=0; | |
287 | for (size_t i=0; i<faces.size(); i++) { | |
288 | n+=faces[i]; | |
289 | if (id<n) { | |
290 | index_t k; | |
291 | if (i==1) | |
292 | k=m_N0-1; | |
293 | else if (i==3) | |
294 | k=m_N0*(m_N1-1); | |
295 | else | |
296 | k=0; | |
297 | // determine whether to move right or up | |
298 | const index_t delta=(i/2==0 ? m_N0 : 1); | |
299 | return (m_dofMap[k+(id-n+faces[i])*delta] < getNumDOF()); | |
300 | } | |
301 | } | |
302 | return false; | |
303 | } | |
304 | default: | |
305 | break; | |
306 | } | |
307 | ||
308 | stringstream msg; | |
309 | msg << "ownSample() not implemented for " | |
310 | << functionSpaceTypeAsString(fsType); | |
311 | throw RipleyException(msg.str()); | |
312 | } | |
313 | ||
314 | void Rectangle::setToGradient(escript::Data& out, const escript::Data& cIn) const | |
315 | { | |
316 | escript::Data& in = *const_cast<escript::Data*>(&cIn); | |
317 | const dim_t numComp = in.getDataPointSize(); | |
318 | const double h0 = m_l0/m_gNE0; | |
319 | const double h1 = m_l1/m_gNE1; | |
320 | const double cx0 = -1./h0; | |
321 | const double cx1 = -.78867513459481288225/h0; | |
322 | const double cx2 = -.5/h0; | |
323 | const double cx3 = -.21132486540518711775/h0; | |
324 | const double cx4 = .21132486540518711775/h0; | |
325 | const double cx5 = .5/h0; | |
326 | const double cx6 = .78867513459481288225/h0; | |
327 | const double cx7 = 1./h0; | |
328 | const double cy0 = -1./h1; | |
329 | const double cy1 = -.78867513459481288225/h1; | |
330 | const double cy2 = -.5/h1; | |
331 | const double cy3 = -.21132486540518711775/h1; | |
332 | const double cy4 = .21132486540518711775/h1; | |
333 | const double cy5 = .5/h1; | |
334 | const double cy6 = .78867513459481288225/h1; | |
335 | const double cy7 = 1./h1; | |
336 | ||
337 | if (out.getFunctionSpace().getTypeCode() == Elements) { | |
338 | out.requireWrite(); | |
339 | /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */ | |
340 | #pragma omp parallel for | |
341 | for (index_t k1=0; k1 < m_NE1; ++k1) { | |
342 | for (index_t k0=0; k0 < m_NE0; ++k0) { | |
343 | const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)); | |
344 | const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)); | |
345 | const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)); | |
346 | const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0)); | |
347 | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); | |
348 | for (index_t i=0; i < numComp; ++i) { | |
349 | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; | |
350 | o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4; | |
351 | o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; | |
352 | o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6; | |
353 | o[INDEX3(i,0,2,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6; | |
354 | o[INDEX3(i,1,2,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4; | |
355 | o[INDEX3(i,0,3,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6; | |
356 | o[INDEX3(i,1,3,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6; | |
357 | } /* end of component loop i */ | |
358 | } /* end of k0 loop */ | |
359 | } /* end of k1 loop */ | |
360 | /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */ | |
361 | } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) { | |
362 | out.requireWrite(); | |
363 | /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */ | |
364 | #pragma omp parallel for | |
365 | for (index_t k1=0; k1 < m_NE1; ++k1) { | |
366 | for (index_t k0=0; k0 < m_NE0; ++k0) { | |
367 | const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)); | |
368 | const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)); | |
369 | const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)); | |
370 | const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0)); | |
371 | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); | |
372 | for (index_t i=0; i < numComp; ++i) { | |
373 | o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]); | |
374 | o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]); | |
375 | } /* end of component loop i */ | |
376 | } /* end of k0 loop */ | |
377 | } /* end of k1 loop */ | |
378 | /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */ | |
379 | } else if (out.getFunctionSpace().getTypeCode() == FaceElements) { | |
380 | out.requireWrite(); | |
381 | #pragma omp parallel | |
382 | { | |
383 | /*** GENERATOR SNIP_GRAD_FACES TOP */ | |
384 | if (m_faceOffset[0] > -1) { | |
385 | #pragma omp for nowait | |
386 | for (index_t k1=0; k1 < m_NE1; ++k1) { | |
387 | const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0)); | |
388 | const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0)); | |
389 | const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0)); | |
390 | const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0)); | |
391 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | |
392 | for (index_t i=0; i < numComp; ++i) { | |
393 | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; | |
394 | o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7; | |
395 | o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6; | |
396 | o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7; | |
397 | } /* end of component loop i */ | |
398 | } /* end of k1 loop */ | |
399 | } /* end of face 0 */ | |
400 | if (m_faceOffset[1] > -1) { | |
401 | #pragma omp for nowait | |
402 | for (index_t k1=0; k1 < m_NE1; ++k1) { | |
403 | const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)); | |
404 | const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)); | |
405 | const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0)); | |
406 | const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0)); | |
407 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | |
408 | for (index_t i=0; i < numComp; ++i) { | |
409 | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4; | |
410 | o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7; | |
411 | o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6; | |
412 | o[INDEX3(i,1,1,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7; | |
413 | } /* end of component loop i */ | |
414 | } /* end of k1 loop */ | |
415 | } /* end of face 1 */ | |
416 | if (m_faceOffset[2] > -1) { | |
417 | #pragma omp for nowait | |
418 | for (index_t k0=0; k0 < m_NE0; ++k0) { | |
419 | const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0)); | |
420 | const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0)); | |
421 | const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0)); | |
422 | const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0)); | |
423 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | |
424 | for (index_t i=0; i < numComp; ++i) { | |
425 | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7; | |
426 | o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4; | |
427 | o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7; | |
428 | o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6; | |
429 | } /* end of component loop i */ | |
430 | } /* end of k0 loop */ | |
431 | } /* end of face 2 */ | |
432 | if (m_faceOffset[3] > -1) { | |
433 | #pragma omp for nowait | |
434 | for (index_t k0=0; k0 < m_NE0; ++k0) { | |
435 | const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)); | |
436 | const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)); | |
437 | const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0)); | |
438 | const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0)); | |
439 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | |
440 | for (index_t i=0; i < numComp; ++i) { | |
441 | o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7; | |
442 | o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4; | |
443 | o[INDEX3(i,0,1,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7; | |
444 | o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6; | |
445 | } /* end of component loop i */ | |
446 | } /* end of k0 loop */ | |
447 | } /* end of face 3 */ | |
448 | /* GENERATOR SNIP_GRAD_FACES BOTTOM */ | |
449 | } // end of parallel section | |
450 | } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) { | |
451 | out.requireWrite(); | |
452 | #pragma omp parallel | |
453 | { | |
454 | /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */ | |
455 | if (m_faceOffset[0] > -1) { | |
456 | #pragma omp for nowait | |
457 | for (index_t k1=0; k1 < m_NE1; ++k1) { | |
458 | const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0)); | |
459 | const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0)); | |
460 | const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0)); | |
461 | const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0)); | |
462 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | |
463 | for (index_t i=0; i < numComp; ++i) { | |
464 | o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]); | |
465 | o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7; | |
466 | } /* end of component loop i */ | |
467 | } /* end of k1 loop */ | |
468 | } /* end of face 0 */ | |
469 | if (m_faceOffset[1] > -1) { | |
470 | #pragma omp for nowait | |
471 | for (index_t k1=0; k1 < m_NE1; ++k1) { | |
472 | const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)); | |
473 | const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)); | |
474 | const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0)); | |
475 | const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0)); | |
476 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | |
477 | for (index_t i=0; i < numComp; ++i) { | |
478 | o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]); | |
479 | o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7; | |
480 | } /* end of component loop i */ | |
481 | } /* end of k1 loop */ | |
482 | } /* end of face 1 */ | |
483 | if (m_faceOffset[2] > -1) { | |
484 | #pragma omp for nowait | |
485 | for (index_t k0=0; k0 < m_NE0; ++k0) { | |
486 | const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0)); | |
487 | const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0)); | |
488 | const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0)); | |
489 | const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0)); | |
490 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | |
491 | for (index_t i=0; i < numComp; ++i) { | |
492 | o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7; | |
493 | o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]); | |
494 | } /* end of component loop i */ | |
495 | } /* end of k0 loop */ | |
496 | } /* end of face 2 */ | |
497 | if (m_faceOffset[3] > -1) { | |
498 | #pragma omp for nowait | |
499 | for (index_t k0=0; k0 < m_NE0; ++k0) { | |
500 | const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)); | |
501 | const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)); | |
502 | const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0)); | |
503 | const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0)); | |
504 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | |
505 | for (index_t i=0; i < numComp; ++i) { | |
506 | o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7; | |
507 | o[INDEX3(i,1,0,numComp,2)] = cy5*(f_01[i] + f_11[i]) + cy2*(f_00[i] + f_10[i]); | |
508 | } /* end of component loop i */ | |
509 | } /* end of k0 loop */ | |
510 | } /* end of face 3 */ | |
511 | /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */ | |
512 | } // end of parallel section | |
513 | } else { | |
514 | stringstream msg; | |
515 | msg << "setToGradient() not implemented for " | |
516 | << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode()); | |
517 | throw RipleyException(msg.str()); | |
518 | } | |
519 | } | |
520 | ||
521 | void Rectangle::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const | |
522 | { | |
523 | escript::Data& in = *const_cast<escript::Data*>(&arg); | |
524 | const dim_t numComp = in.getDataPointSize(); | |
525 | const double h0 = m_l0/m_gNE0; | |
526 | const double h1 = m_l1/m_gNE1; | |
527 | if (arg.getFunctionSpace().getTypeCode() == Elements) { | |
528 | const double w_0 = h0*h1/4.; | |
529 | #pragma omp parallel | |
530 | { | |
531 | vector<double> int_local(numComp, 0); | |
532 | #pragma omp for nowait | |
533 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | |
534 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | |
535 | const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0)); | |
536 | for (index_t i=0; i < numComp; ++i) { | |
537 | const register double f_0 = f[INDEX2(i,0,numComp)]; | |
538 | const register double f_1 = f[INDEX2(i,1,numComp)]; | |
539 | const register double f_2 = f[INDEX2(i,2,numComp)]; | |
540 | const register double f_3 = f[INDEX2(i,3,numComp)]; | |
541 | int_local[i]+=(f_0+f_1+f_2+f_3)*w_0; | |
542 | } /* end of component loop i */ | |
543 | } /* end of k0 loop */ | |
544 | } /* end of k1 loop */ | |
545 | ||
546 | #pragma omp critical | |
547 | for (index_t i=0; i<numComp; i++) | |
548 | integrals[i]+=int_local[i]; | |
549 | } // end of parallel section | |
550 | } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) { | |
551 | const double w_0 = h0*h1; | |
552 | #pragma omp parallel | |
553 | { | |
554 | vector<double> int_local(numComp, 0); | |
555 | #pragma omp for nowait | |
556 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | |
557 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | |
558 | const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0)); | |
559 | for (index_t i=0; i < numComp; ++i) { | |
560 | int_local[i]+=f[i]*w_0; | |
561 | } /* end of component loop i */ | |
562 | } /* end of k0 loop */ | |
563 | } /* end of k1 loop */ | |
564 | ||
565 | #pragma omp critical | |
566 | for (index_t i=0; i<numComp; i++) | |
567 | integrals[i]+=int_local[i]; | |
568 | } // end of parallel section | |
569 | } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) { | |
570 | const double w_0 = h0/2.; | |
571 | const double w_1 = h1/2.; | |
572 | #pragma omp parallel | |
573 | { | |
574 | vector<double> int_local(numComp, 0); | |
575 | if (m_faceOffset[0] > -1) { | |
576 | #pragma omp for nowait | |
577 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | |
578 | const double* f = in.getSampleDataRO(m_faceOffset[0]+k1); | |
579 | for (index_t i=0; i < numComp; ++i) { | |
580 | const register double f_0 = f[INDEX2(i,0,numComp)]; | |
581 | const register double f_1 = f[INDEX2(i,1,numComp)]; | |
582 | int_local[i]+=(f_0+f_1)*w_1; | |
583 | } /* end of component loop i */ | |
584 | } /* end of k1 loop */ | |
585 | } | |
586 | ||
587 | if (m_faceOffset[1] > -1) { | |
588 | #pragma omp for nowait | |
589 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | |
590 | const double* f = in.getSampleDataRO(m_faceOffset[1]+k1); | |
591 | for (index_t i=0; i < numComp; ++i) { | |
592 | const register double f_0 = f[INDEX2(i,0,numComp)]; | |
593 | const register double f_1 = f[INDEX2(i,1,numComp)]; | |
594 | int_local[i]+=(f_0+f_1)*w_1; | |
595 | } /* end of component loop i */ | |
596 | } /* end of k1 loop */ | |
597 | } | |
598 | ||
599 | if (m_faceOffset[2] > -1) { | |
600 | #pragma omp for nowait | |
601 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | |
602 | const double* f = in.getSampleDataRO(m_faceOffset[2]+k0); | |
603 | for (index_t i=0; i < numComp; ++i) { | |
604 | const register double f_0 = f[INDEX2(i,0,numComp)]; | |
605 | const register double f_1 = f[INDEX2(i,1,numComp)]; | |
606 | int_local[i]+=(f_0+f_1)*w_0; | |
607 | } /* end of component loop i */ | |
608 | } /* end of k0 loop */ | |
609 | } | |
610 | ||
611 | if (m_faceOffset[3] > -1) { | |
612 | #pragma omp for nowait | |
613 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | |
614 | const double* f = in.getSampleDataRO(m_faceOffset[3]+k0); | |
615 | for (index_t i=0; i < numComp; ++i) { | |
616 | const register double f_0 = f[INDEX2(i,0,numComp)]; | |
617 | const register double f_1 = f[INDEX2(i,1,numComp)]; | |
618 | int_local[i]+=(f_0+f_1)*w_0; | |
619 | } /* end of component loop i */ | |
620 | } /* end of k0 loop */ | |
621 | } | |
622 | ||
623 | #pragma omp critical | |
624 | for (index_t i=0; i<numComp; i++) | |
625 | integrals[i]+=int_local[i]; | |
626 | } // end of parallel section | |
627 | } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) { | |
628 | #pragma omp parallel | |
629 | { | |
630 | vector<double> int_local(numComp, 0); | |
631 | if (m_faceOffset[0] > -1) { | |
632 | #pragma omp for nowait | |
633 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | |
634 | const double* f = in.getSampleDataRO(m_faceOffset[0]+k1); | |
635 | for (index_t i=0; i < numComp; ++i) { | |
636 | int_local[i]+=f[i]*h1; | |
637 | } /* end of component loop i */ | |
638 | } /* end of k1 loop */ | |
639 | } | |
640 | ||
641 | if (m_faceOffset[1] > -1) { | |
642 | #pragma omp for nowait | |
643 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | |
644 | const double* f = in.getSampleDataRO(m_faceOffset[1]+k1); | |
645 | for (index_t i=0; i < numComp; ++i) { | |
646 | int_local[i]+=f[i]*h1; | |
647 | } /* end of component loop i */ | |
648 | } /* end of k1 loop */ | |
649 | } | |
650 | ||
651 | if (m_faceOffset[2] > -1) { | |
652 | #pragma omp for nowait | |
653 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | |
654 | const double* f = in.getSampleDataRO(m_faceOffset[2]+k0); | |
655 | for (index_t i=0; i < numComp; ++i) { | |
656 | int_local[i]+=f[i]*h0; | |
657 | } /* end of component loop i */ | |
658 | } /* end of k0 loop */ | |
659 | } | |
660 | ||
661 | if (m_faceOffset[3] > -1) { | |
662 | #pragma omp for nowait | |
663 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | |
664 | const double* f = in.getSampleDataRO(m_faceOffset[3]+k0); | |
665 | for (index_t i=0; i < numComp; ++i) { | |
666 | int_local[i]+=f[i]*h0; | |
667 | } /* end of component loop i */ | |
668 | } /* end of k0 loop */ | |
669 | } | |
670 | ||
671 | #pragma omp critical | |
672 | for (index_t i=0; i<numComp; i++) | |
673 | integrals[i]+=int_local[i]; | |
674 | } // end of parallel section | |
675 | } else { | |
676 | stringstream msg; | |
677 | msg << "setToIntegrals() not implemented for " | |
678 | << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode()); | |
679 | throw RipleyException(msg.str()); | |
680 | } | |
681 | } | |
682 | ||
683 | void Rectangle::setToNormal(escript::Data& out) const | |
684 | { | |
685 | if (out.getFunctionSpace().getTypeCode() == FaceElements) { | |
686 | out.requireWrite(); | |
687 | #pragma omp parallel | |
688 | { | |
689 | if (m_faceOffset[0] > -1) { | |
690 | #pragma omp for nowait | |
691 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | |
692 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | |
693 | // set vector at two quadrature points | |
694 | *o++ = -1.; | |
695 | *o++ = 0.; | |
696 | *o++ = -1.; | |
697 | *o = 0.; | |
698 | } | |
699 | } | |
700 | ||
701 | if (m_faceOffset[1] > -1) { | |
702 | #pragma omp for nowait | |
703 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | |
704 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | |
705 | // set vector at two quadrature points | |
706 | *o++ = 1.; | |
707 | *o++ = 0.; | |
708 | *o++ = 1.; | |
709 | *o = 0.; | |
710 | } | |
711 | } | |
712 | ||
713 | if (m_faceOffset[2] > -1) { | |
714 | #pragma omp for nowait | |
715 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | |
716 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | |
717 | // set vector at two quadrature points | |
718 | *o++ = 0.; | |
719 | *o++ = -1.; | |
720 | *o++ = 0.; | |
721 | *o = -1.; | |
722 | } | |
723 | } | |
724 | ||
725 | if (m_faceOffset[3] > -1) { | |
726 | #pragma omp for nowait | |
727 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | |
728 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | |
729 | // set vector at two quadrature points | |
730 | *o++ = 0.; | |
731 | *o++ = 1.; | |
732 | *o++ = 0.; | |
733 | *o = 1.; | |
734 | } | |
735 | } | |
736 | } // end of parallel section | |
737 | } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) { | |
738 | out.requireWrite(); | |
739 | #pragma omp parallel | |
740 | { | |
741 | if (m_faceOffset[0] > -1) { | |
742 | #pragma omp for nowait | |
743 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | |
744 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | |
745 | *o++ = -1.; | |
746 | *o = 0.; | |
747 | } | |
748 | } | |
749 | ||
750 | if (m_faceOffset[1] > -1) { | |
751 | #pragma omp for nowait | |
752 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | |
753 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | |
754 | *o++ = 1.; | |
755 | *o = 0.; | |
756 | } | |
757 | } | |
758 | ||
759 | if (m_faceOffset[2] > -1) { | |
760 | #pragma omp for nowait | |
761 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | |
762 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | |
763 | *o++ = 0.; | |
764 | *o = -1.; | |
765 | } | |
766 | } | |
767 | ||
768 | if (m_faceOffset[3] > -1) { | |
769 | #pragma omp for nowait | |
770 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | |
771 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | |
772 | *o++ = 0.; | |
773 | *o = 1.; | |
774 | } | |
775 | } | |
776 | } // end of parallel section | |
777 | ||
778 | } else { | |
779 | stringstream msg; | |
780 | msg << "setToNormal() not implemented for " | |
781 | << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode()); | |
782 | throw RipleyException(msg.str()); | |
783 | } | |
784 | } | } |
785 | ||
786 | void Rectangle::interpolateOnDomain(escript::Data& target, | void Rectangle::setToSize(escript::Data& out) const |
const escript::Data& in) const | ||
787 | { | { |
788 | const Rectangle& inDomain=dynamic_cast<const Rectangle&>(*(in.getFunctionSpace().getDomain())); | if (out.getFunctionSpace().getTypeCode() == Elements |
789 | const Rectangle& targetDomain=dynamic_cast<const Rectangle&>(*(target.getFunctionSpace().getDomain())); | || out.getFunctionSpace().getTypeCode() == ReducedElements) { |
790 | if (inDomain != *this) | out.requireWrite(); |
791 | throw RipleyException("Illegal domain of interpolant"); | const dim_t numQuad=out.getNumDataPointsPerSample(); |
792 | if (targetDomain != *this) | const double hSize=getFirstCoordAndSpacing(0).second; |
793 | throw RipleyException("Illegal domain of interpolation target"); | const double vSize=getFirstCoordAndSpacing(1).second; |
794 | const double size=min(hSize,vSize); | |
795 | #pragma omp parallel for | |
796 | for (index_t k = 0; k < getNumElements(); ++k) { | |
797 | double* o = out.getSampleDataRW(k); | |
798 | fill(o, o+numQuad, size); | |
799 | } | |
800 | } else if (out.getFunctionSpace().getTypeCode() == FaceElements | |
801 | || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) { | |
802 | out.requireWrite(); | |
803 | const dim_t numQuad=out.getNumDataPointsPerSample(); | |
804 | const double hSize=getFirstCoordAndSpacing(0).second; | |
805 | const double vSize=getFirstCoordAndSpacing(1).second; | |
806 | #pragma omp parallel | |
807 | { | |
808 | if (m_faceOffset[0] > -1) { | |
809 | #pragma omp for nowait | |
810 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | |
811 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | |
812 | fill(o, o+numQuad, vSize); | |
813 | } | |
814 | } | |
815 | ||
816 | if (m_faceOffset[1] > -1) { | |
817 | #pragma omp for nowait | |
818 | for (index_t k1 = 0; k1 < m_NE1; ++k1) { | |
819 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | |
820 | fill(o, o+numQuad, vSize); | |
821 | } | |
822 | } | |
823 | ||
824 | if (m_faceOffset[2] > -1) { | |
825 | #pragma omp for nowait | |
826 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | |
827 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | |
828 | fill(o, o+numQuad, hSize); | |
829 | } | |
830 | } | |
831 | ||
832 | if (m_faceOffset[3] > -1) { | |
833 | #pragma omp for nowait | |
834 | for (index_t k0 = 0; k0 < m_NE0; ++k0) { | |
835 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | |
836 | fill(o, o+numQuad, hSize); | |
837 | } | |
838 | } | |
839 | } // end of parallel section | |
840 | ||
841 | throw RipleyException("interpolateOnDomain() not implemented"); | } else { |
842 | stringstream msg; | |
843 | msg << "setToSize() not implemented for " | |
844 | << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode()); | |
845 | throw RipleyException(msg.str()); | |
846 | } | |
847 | } | } |
848 | ||
849 | Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder, | Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder, |
# | Line 234 Paso_SystemMatrixPattern* Rectangle::get | Line 852 Paso_SystemMatrixPattern* Rectangle::get |
852 | if (reducedRowOrder || reducedColOrder) | if (reducedRowOrder || reducedColOrder) |
853 | throw RipleyException("getPattern() not implemented for reduced order"); | throw RipleyException("getPattern() not implemented for reduced order"); |
854 | ||
855 | /* | return m_pattern; |
// create distribution | ||
IndexVector dist; | ||
for (index_t i=0; i<m_mpiInfo->size+1; i++) | ||
dist.push_back(i*getNumNodes()); | ||
Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo, | ||
&dist[0], 1, 0); | ||
// connectors | ||
dim_t numNeighbours = ...; | ||
RankVector neighbour(numNeighbours); | ||
IndexVector offsetInShared(numNeighbours+1); | ||
IndexVector shared(offsetInShared[numNeighbours]); | ||
Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc( | ||
getNumNodes(), numNeighbours, neighbour, shared, offsetInShared, | ||
1, 0, m_mpiInfo); | ||
Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc( | ||
getNumNodes(), numNeighbours, neighbour, shared, offsetInShared, | ||
1, 0, m_mpiInfo); | ||
Paso_Connector* connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp); | ||
// create patterns | ||
Paso_Pattern *mainPattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, | ||
M, N, ptr, index); | ||
Paso_Pattern *colCouplePattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, | ||
M, N, ...); | ||
Paso_Pattern *rowCouplePattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, | ||
...); | ||
Paso_SystemMatrixPattern* pattern = Paso_SystemMatrixPattern_alloc( | ||
MATRIX_FORMAT_DEFAULT, distribution, distribution, | ||
mainPattern, colCouplePattern, rowCouplePattern, | ||
connector, connector); | ||
Paso_Pattern_free(mainPattern); | ||
Paso_Pattern_free(colCouplePattern); | ||
Paso_Pattern_free(rowCouplePattern); | ||
Paso_Distribution_free(distribution); | ||
Paso_SharedComponents_free(snd_shcomp); | ||
Paso_SharedComponents_free(rcv_shcomp); | ||
*/ | ||
throw RipleyException("getPattern() not implemented"); | ||
856 | } | } |
857 | ||
858 | void Rectangle::Print_Mesh_Info(const bool full) const | void Rectangle::Print_Mesh_Info(const bool full) const |
# | Line 285 void Rectangle::Print_Mesh_Info(const bo | Line 862 void Rectangle::Print_Mesh_Info(const bo |
862 | cout << " Id Coordinates" << endl; | cout << " Id Coordinates" << endl; |
863 | cout.precision(15); | cout.precision(15); |
864 | cout.setf(ios::scientific, ios::floatfield); | cout.setf(ios::scientific, ios::floatfield); |
865 | pair<double,double> xdx = getFirstCoordAndSpacing(0); | |
866 | pair<double,double> ydy = getFirstCoordAndSpacing(1); | |
867 | for (index_t i=0; i < getNumNodes(); i++) { | for (index_t i=0; i < getNumNodes(); i++) { |
868 | cout << " " << setw(5) << m_nodeId[i] | cout << " " << setw(5) << m_nodeId[i] |
869 | << " " << (m_l0*(i%m_N0+m_offset0))/m_gNE0 | << " " << xdx.first+(i%m_N0)*xdx.second |
870 | << " " << (m_l1*(i/m_N0+m_offset1))/m_gNE1 << endl; | << " " << ydy.first+(i/m_N0)*ydy.second << endl; |
871 | } | } |
872 | } | } |
873 | } | } |
874 | ||
875 | //protected | IndexVector Rectangle::getNumNodesPerDim() const |
dim_t Rectangle::getNumFaceElements() const | ||
876 | { | { |
877 | dim_t n=0; | IndexVector ret; |
878 | ret.push_back(m_N0); | |
879 | ret.push_back(m_N1); | |
880 | return ret; | |
881 | } | |
882 | ||
883 | IndexVector Rectangle::getNumElementsPerDim() const | |
884 | { | |
885 | IndexVector ret; | |
886 | ret.push_back(m_NE0); | |
887 | ret.push_back(m_NE1); | |
888 | return ret; | |
889 | } | |
890 | ||
891 | IndexVector Rectangle::getNumFacesPerBoundary() const | |
892 | { | |
893 | IndexVector ret(4, 0); | |
894 | //left | //left |
895 | if (m_offset0==0) | if (m_offset0==0) |
896 | n+=m_NE1; | ret[0]=m_NE1; |
897 | //right | //right |
898 | if (m_mpiInfo->rank%m_NX==m_NX-1) | if (m_mpiInfo->rank%m_NX==m_NX-1) |
899 | n+=m_NE1; | ret[1]=m_NE1; |
900 | //bottom | //bottom |
901 | if (m_offset1==0) | if (m_offset1==0) |
902 | n+=m_NE0; | ret[2]=m_NE0; |
903 | //top | //top |
904 | if (m_mpiInfo->rank/m_NX==m_NY-1) | if (m_mpiInfo->rank/m_NX==m_NY-1) |
905 | n+=m_NE0; | ret[3]=m_NE0; |
906 | return ret; | |
907 | } | |
908 | ||
909 | pair<double,double> Rectangle::getFirstCoordAndSpacing(dim_t dim) const | |
910 | { | |
911 | if (dim==0) { | |
912 | return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0); | |
913 | } else if (dim==1) { | |
914 | return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1); | |
915 | } | |
916 | throw RipleyException("getFirstCoordAndSpacing(): invalid argument"); | |
917 | } | |
918 | ||
919 | //protected | |
920 | dim_t Rectangle::getNumDOF() const | |
921 | { | |
922 | return (m_gNE0+1)/m_NX*(m_gNE1+1)/m_NY; | |
923 | } | |
924 | ||
925 | //protected | |
926 | dim_t Rectangle::getNumFaceElements() const | |
927 | { | |
928 | const IndexVector faces = getNumFacesPerBoundary(); | |
929 | dim_t n=0; | |
930 | for (size_t i=0; i<faces.size(); i++) | |
931 | n+=faces[i]; | |
932 | return n; | return n; |
933 | } | } |
934 | ||
# | Line 323 void Rectangle::assembleCoordinates(escr | Line 942 void Rectangle::assembleCoordinates(escr |
942 | if (!numSamplesEqual(&x, 1, getNumNodes())) | if (!numSamplesEqual(&x, 1, getNumNodes())) |
943 | throw RipleyException("setToX: Illegal number of samples in Data object"); | throw RipleyException("setToX: Illegal number of samples in Data object"); |
944 | ||
945 | pair<double,double> xdx = getFirstCoordAndSpacing(0); | |
946 | pair<double,double> ydy = getFirstCoordAndSpacing(1); | |
947 | arg.requireWrite(); | arg.requireWrite(); |
948 | #pragma omp parallel for | #pragma omp parallel for |
949 | for (dim_t i1 = 0; i1 < m_N1; i1++) { | for (dim_t i1 = 0; i1 < m_N1; i1++) { |
950 | for (dim_t i0 = 0; i0 < m_N0; i0++) { | for (dim_t i0 = 0; i0 < m_N0; i0++) { |
951 | double* point = arg.getSampleDataRW(i0+m_N0*i1); | double* point = arg.getSampleDataRW(i0+m_N0*i1); |
952 | point[0] = (m_l0*(i0+m_offset0))/m_gNE0; | point[0] = xdx.first+i0*xdx.second; |
953 | point[1] = (m_l1*(i1+m_offset1))/m_gNE1; | point[1] = ydy.first+i1*ydy.second; |
954 | } | |
955 | } | |
956 | } | |
957 | ||
958 | //protected | |
959 | dim_t Rectangle::insertNeighbourNodes(IndexVector& index, index_t node) const | |
960 | { | |
961 | const dim_t nDOF0 = (m_gNE0+1)/m_NX; | |
962 | const dim_t nDOF1 = (m_gNE1+1)/m_NY; | |
963 | const int x=node%nDOF0; | |
964 | const int y=node/nDOF0; | |
965 | dim_t num=0; | |
966 | // loop through potential neighbours and add to index if positions are | |
967 | // within bounds | |
968 | for (int i1=-1; i1<2; i1++) { | |
969 | for (int i0=-1; i0<2; i0++) { | |
970 | // skip node itself | |
971 | if (i0==0 && i1==0) | |
972 | continue; | |
973 | // location of neighbour node | |
974 | const int nx=x+i0; | |
975 | const int ny=y+i1; | |
976 | if (nx>=0 && ny>=0 && nx<nDOF0 && ny<nDOF1) { | |
977 | index.push_back(ny*nDOF0+nx); | |
978 | num++; | |
979 | } | |
980 | } | |
981 | } | |
982 | ||
983 | return num; | |
984 | } | |
985 | ||
986 | //protected | |
987 | void Rectangle::nodesToDOF(escript::Data& out, escript::Data& in) const | |
988 | { | |
989 | const dim_t numComp = in.getDataPointSize(); | |
990 | out.requireWrite(); | |
991 | ||
992 | const index_t left = (m_offset0==0 ? 0 : 1); | |
993 | const index_t bottom = (m_offset1==0 ? 0 : 1); | |
994 | const dim_t nDOF0 = (m_gNE0+1)/m_NX; | |
995 | const dim_t nDOF1 = (m_gNE1+1)/m_NY; | |
996 | #pragma omp parallel for | |
997 | for (index_t i=0; i<nDOF1; i++) { | |
998 | for (index_t j=0; j<nDOF0; j++) { | |
999 | const index_t n=j+left+(i+bottom)*m_N0; | |
1000 | const double* src=in.getSampleDataRO(n); | |
1001 | copy(src, src+numComp, out.getSampleDataRW(j+i*nDOF0)); | |
1002 | } | } |
1003 | } | } |
1004 | } | } |
1005 | ||
1006 | //protected | |
1007 | void Rectangle::dofToNodes(escript::Data& out, escript::Data& in) const | |
1008 | { | |
1009 | const dim_t numComp = in.getDataPointSize(); | |
1010 | Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp); | |
1011 | in.requireWrite(); | |
1012 | Paso_Coupler_startCollect(coupler, in.getSampleDataRW(0)); | |
1013 | ||
1014 | const dim_t numDOF = getNumDOF(); | |
1015 | out.requireWrite(); | |
1016 | const double* buffer = Paso_Coupler_finishCollect(coupler); | |
1017 | ||
1018 | #pragma omp parallel for | |
1019 | for (index_t i=0; i<getNumNodes(); i++) { | |
1020 | const double* src=(m_dofMap[i]<numDOF ? | |
1021 | in.getSampleDataRO(m_dofMap[i]) | |
1022 | : &buffer[(m_dofMap[i]-numDOF)*numComp]); | |
1023 | copy(src, src+numComp, out.getSampleDataRW(i)); | |
1024 | } | |
1025 | } | |
1026 | ||
1027 | //private | //private |
1028 | void Rectangle::populateSampleIds() | void Rectangle::populateSampleIds() |
1029 | { | { |
1030 | const index_t firstId = getNumNodes()*m_mpiInfo->rank; | // identifiers are ordered from left to right, bottom to top globablly. |
1031 | const index_t diff0 = m_N0*(m_N1-1)+1; | |
1032 | const index_t diff1 = m_N0*((m_NX-1)*m_N1+1); | // build node distribution vector first. |
1033 | // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes | |
1034 | m_nodeDistribution.assign(m_mpiInfo->size+1, 0); | |
1035 | const dim_t numDOF=getNumDOF(); | |
1036 | for (dim_t k=1; k<m_mpiInfo->size; k++) { | |
1037 | m_nodeDistribution[k]=k*numDOF; | |
1038 | } | |
1039 | m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal(); | |
1040 | m_nodeId.resize(getNumNodes()); | m_nodeId.resize(getNumNodes()); |
1041 | m_dofId.resize(numDOF); | |
1042 | m_elementId.resize(getNumElements()); | |
1043 | m_faceId.resize(getNumFaceElements()); | |
1044 | ||
1045 | #pragma omp parallel | |
1046 | { | |
1047 | // nodes | |
1048 | #pragma omp for nowait | |
1049 | for (dim_t i1=0; i1<m_N1; i1++) { | |
1050 | for (dim_t i0=0; i0<m_N0; i0++) { | |
1051 | m_nodeId[i0+i1*m_N0] = (m_offset1+i1)*(m_gNE0+1)+m_offset0+i0; | |
1052 | } | |
1053 | } | |
1054 | ||
1055 | // degrees of freedom | |
1056 | #pragma omp for nowait | |
1057 | for (dim_t k=0; k<numDOF; k++) | |
1058 | m_dofId[k] = m_nodeDistribution[m_mpiInfo->rank]+k; | |
1059 | ||
1060 | // elements | |
1061 | #pragma omp for nowait | |
1062 | for (dim_t i1=0; i1<m_NE1; i1++) { | |
1063 | for (dim_t i0=0; i0<m_NE0; i0++) { | |
1064 | m_elementId[i0+i1*m_NE0]=(m_offset1+i1)*m_gNE0+m_offset0+i0; | |
1065 | } | |
1066 | } | |
1067 | ||
1068 | // face elements | |
1069 | #pragma omp for | |
1070 | for (dim_t k=0; k<getNumFaceElements(); k++) | |
1071 | m_faceId[k]=k; | |
1072 | } // end parallel section | |
1073 | ||
1074 | m_nodeTags.assign(getNumNodes(), 0); | |
1075 | updateTagsInUse(Nodes); | |
1076 | ||
1077 | m_elementTags.assign(getNumElements(), 0); | |
1078 | updateTagsInUse(Elements); | |
1079 | ||
1080 | // generate face offset vector and set face tags | |
1081 | const IndexVector facesPerEdge = getNumFacesPerBoundary(); | |
1082 | const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20; | |
1083 | const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP }; | |
1084 | m_faceOffset.assign(facesPerEdge.size(), -1); | |
1085 | m_faceTags.clear(); | |
1086 | index_t offset=0; | |
1087 | for (size_t i=0; i<facesPerEdge.size(); i++) { | |
1088 | if (facesPerEdge[i]>0) { | |
1089 | m_faceOffset[i]=offset; | |
1090 | offset+=facesPerEdge[i]; | |
1091 | m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]); | |
1092 | } | |
1093 | } | |
1094 | setTagMap("left", LEFT); | |
1095 | setTagMap("right", RIGHT); | |
1096 | setTagMap("bottom", BOTTOM); | |
1097 | setTagMap("top", TOP); | |
1098 | updateTagsInUse(FaceElements); | |
1099 | } | |
1100 | ||
1101 | //private | |
1102 | void Rectangle::createPattern() | |
1103 | { | |
1104 | const dim_t nDOF0 = (m_gNE0+1)/m_NX; | |
1105 | const dim_t nDOF1 = (m_gNE1+1)/m_NY; | |
1106 | const index_t left = (m_offset0==0 ? 0 : 1); | |
1107 | const index_t bottom = (m_offset1==0 ? 0 : 1); | |
1108 | ||
1109 | // populate node->DOF mapping with own degrees of freedom. | |
1110 | // The rest is assigned in the loop further down | |
1111 | m_dofMap.assign(getNumNodes(), 0); | |
1112 | #pragma omp parallel for | |
1113 | for (index_t i=bottom; i<m_N1; i++) { | |
1114 | for (index_t j=left; j<m_N0; j++) { | |
1115 | m_dofMap[i*m_N0+j]=(i-bottom)*nDOF0+j-left; | |
1116 | } | |
1117 | } | |
1118 | ||
1119 | // build list of shared components and neighbours by looping through | |
1120 | // all potential neighbouring ranks and checking if positions are | |
1121 | // within bounds | |
1122 | const dim_t numDOF=nDOF0*nDOF1; | |
1123 | vector<IndexVector> colIndices(numDOF); // for the couple blocks | |
1124 | RankVector neighbour; | |
1125 | IndexVector offsetInShared(1,0); | |
1126 | IndexVector sendShared, recvShared; | |
1127 | int numShared=0; | |
1128 | const int x=m_mpiInfo->rank%m_NX; | |
1129 | const int y=m_mpiInfo->rank/m_NX; | |
1130 | for (int i1=-1; i1<2; i1++) { | |
1131 | for (int i0=-1; i0<2; i0++) { | |
1132 | // skip this rank | |
1133 | if (i0==0 && i1==0) | |
1134 | continue; | |
1135 | // location of neighbour rank | |
1136 | const int nx=x+i0; | |
1137 | const int ny=y+i1; | |
1138 | if (nx>=0 && ny>=0 && nx<m_NX && ny<m_NY) { | |
1139 | neighbour.push_back(ny*m_NX+nx); | |
1140 | if (i0==0) { | |
1141 | // sharing top or bottom edge | |
1142 | const int firstDOF=(i1==-1 ? 0 : numDOF-nDOF0); | |
1143 | const int firstNode=(i1==-1 ? left : m_N0*(m_N1-1)+left); | |
1144 | offsetInShared.push_back(offsetInShared.back()+nDOF0); | |
1145 | for (dim_t i=0; i<nDOF0; i++, numShared++) { | |
1146 | sendShared.push_back(firstDOF+i); | |
1147 | recvShared.push_back(numDOF+numShared); | |
1148 | if (i>0) | |
1149 | colIndices[firstDOF+i-1].push_back(numShared); | |
1150 | colIndices[firstDOF+i].push_back(numShared); | |
1151 | if (i<nDOF0-1) | |
1152 | colIndices[firstDOF+i+1].push_back(numShared); | |
1153 | m_dofMap[firstNode+i]=numDOF+numShared; | |
1154 | } | |
1155 | } else if (i1==0) { | |
1156 | // sharing left or right edge | |
1157 | const int firstDOF=(i0==-1 ? 0 : nDOF0-1); | |
1158 | const int firstNode=(i0==-1 ? bottom*m_N0 : (bottom+1)*m_N0-1); | |
1159 | offsetInShared.push_back(offsetInShared.back()+nDOF1); | |
1160 | for (dim_t i=0; i<nDOF1; i++, numShared++) { | |
1161 | sendShared.push_back(firstDOF+i*nDOF0); | |
1162 | recvShared.push_back(numDOF+numShared); | |
1163 | if (i>0) | |
1164 | colIndices[firstDOF+(i-1)*nDOF0].push_back(numShared); | |
1165 | colIndices[firstDOF+i*nDOF0].push_back(numShared); | |
1166 | if (i<nDOF1-1) | |
1167 | colIndices[firstDOF+(i+1)*nDOF0].push_back(numShared); | |
1168 | m_dofMap[firstNode+i*m_N0]=numDOF+numShared; | |
1169 | } | |
1170 | } else { | |
1171 | // sharing a node | |
1172 | const int dof=(i0+1)/2*(nDOF0-1)+(i1+1)/2*(numDOF-nDOF0); | |
1173 | const int node=(i0+1)/2*(m_N0-1)+(i1+1)/2*m_N0*(m_N1-1); | |
1174 | offsetInShared.push_back(offsetInShared.back()+1); | |
1175 | sendShared.push_back(dof); | |
1176 | recvShared.push_back(numDOF+numShared); | |
1177 | colIndices[dof].push_back(numShared); | |
1178 | m_dofMap[node]=numDOF+numShared; | |
1179 | ++numShared; | |
1180 | } | |
1181 | } | |
1182 | } | |
1183 | } | |
1184 | ||
1185 | // create connector | |
1186 | Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc( | |
1187 | numDOF, neighbour.size(), &neighbour[0], &sendShared[0], | |
1188 | &offsetInShared[0], 1, 0, m_mpiInfo); | |
1189 | Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc( | |
1190 | numDOF, neighbour.size(), &neighbour[0], &recvShared[0], | |
1191 | &offsetInShared[0], 1, 0, m_mpiInfo); | |
1192 | m_connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp); | |
1193 | Paso_SharedComponents_free(snd_shcomp); | |
1194 | Paso_SharedComponents_free(rcv_shcomp); | |
1195 | ||
1196 | // create main and couple blocks | |
1197 | Paso_Pattern *mainPattern = createMainPattern(); | |
1198 | Paso_Pattern *colPattern, *rowPattern; | |
1199 | createCouplePatterns(colIndices, numShared, &colPattern, &rowPattern); | |
1200 | ||
1201 | // allocate paso distribution | |
1202 | Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo, | |
1203 | const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0); | |
1204 | ||
1205 | // finally create the system matrix | |
1206 | m_pattern = Paso_SystemMatrixPattern_alloc(MATRIX_FORMAT_DEFAULT, | |
1207 | distribution, distribution, mainPattern, colPattern, rowPattern, | |
1208 | m_connector, m_connector); | |
1209 | ||
1210 | Paso_Distribution_free(distribution); | |
1211 | ||
1212 | // useful debug output | |
1213 | /* | |
1214 | cout << "--- rcv_shcomp ---" << endl; | |
1215 | cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl; | |
1216 | for (size_t i=0; i<neighbour.size(); i++) { | |
1217 | cout << "neighbor[" << i << "]=" << neighbour[i] | |
1218 | << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl; | |
1219 | } | |
1220 | for (size_t i=0; i<recvShared.size(); i++) { | |
1221 | cout << "shared[" << i << "]=" << recvShared[i] << endl; | |
1222 | } | |
1223 | cout << "--- snd_shcomp ---" << endl; | |
1224 | for (size_t i=0; i<sendShared.size(); i++) { | |
1225 | cout << "shared[" << i << "]=" << sendShared[i] << endl; | |
1226 | } | |
1227 | cout << "--- dofMap ---" << endl; | |
1228 | for (size_t i=0; i<m_dofMap.size(); i++) { | |
1229 | cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << endl; | |
1230 | } | |
1231 | cout << "--- colIndices ---" << endl; | |
1232 | for (size_t i=0; i<colIndices.size(); i++) { | |
1233 | cout << "colIndices[" << i << "].size()=" << colIndices[i].size() << endl; | |
1234 | } | |
1235 | */ | |
1236 | ||
1237 | /* | |
1238 | cout << "--- main_pattern ---" << endl; | |
1239 | cout << "M=" << mainPattern->numOutput << ", N=" << mainPattern->numInput << endl; | |
1240 | for (size_t i=0; i<mainPattern->numOutput+1; i++) { | |
1241 | cout << "ptr[" << i << "]=" << mainPattern->ptr[i] << endl; | |
1242 | } | |
1243 | for (size_t i=0; i<mainPattern->ptr[mainPattern->numOutput]; i++) { | |
1244 | cout << "index[" << i << "]=" << mainPattern->index[i] << endl; | |
1245 | } | |
1246 | */ | |
1247 | ||
1248 | /* | |
1249 | cout << "--- colCouple_pattern ---" << endl; | |
1250 | cout << "M=" << colPattern->numOutput << ", N=" << colPattern->numInput << endl; | |
1251 | for (size_t i=0; i<colPattern->numOutput+1; i++) { | |
1252 | cout << "ptr[" << i << "]=" << colPattern->ptr[i] << endl; | |
1253 | } | |
1254 | for (size_t i=0; i<colPattern->ptr[colPattern->numOutput]; i++) { | |
1255 | cout << "index[" << i << "]=" << colPattern->index[i] << endl; | |
1256 | } | |
1257 | */ | |
1258 | ||
1259 | /* | |
1260 | cout << "--- rowCouple_pattern ---" << endl; | |
1261 | cout << "M=" << rowPattern->numOutput << ", N=" << rowPattern->numInput << endl; | |
1262 | for (size_t i=0; i<rowPattern->numOutput+1; i++) { | |
1263 | cout << "ptr[" << i << "]=" << rowPattern->ptr[i] << endl; | |
1264 | } | |
1265 | for (size_t i=0; i<rowPattern->ptr[rowPattern->numOutput]; i++) { | |
1266 | cout << "index[" << i << "]=" << rowPattern->index[i] << endl; | |
1267 | } | |
1268 | */ | |
1269 | ||
1270 | Paso_Pattern_free(mainPattern); | |
1271 | Paso_Pattern_free(colPattern); | |
1272 | Paso_Pattern_free(rowPattern); | |
1273 | } | |
1274 | ||
1275 | //protected | |
1276 | void Rectangle::interpolateNodesOnElements(escript::Data& out, | |
1277 | escript::Data& in, bool reduced) const | |
1278 | { | |
1279 | const dim_t numComp = in.getDataPointSize(); | |
1280 | if (reduced) { | |
1281 | out.requireWrite(); | |
1282 | /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */ | |
1283 | const double c0 = .25; | |
1284 | #pragma omp parallel for | |
1285 | for (index_t k1=0; k1 < m_NE1; ++k1) { | |
1286 | for (index_t k0=0; k0 < m_NE0; ++k0) { | |
1287 | const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)); | |
1288 | const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)); | |
1289 | const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0)); | |
1290 | const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)); | |
1291 | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); | |
1292 | for (index_t i=0; i < numComp; ++i) { | |
1293 | o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]); | |
1294 | } /* end of component loop i */ | |
1295 | } /* end of k0 loop */ | |
1296 | } /* end of k1 loop */ | |
1297 | /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */ | |
1298 | } else { | |
1299 | out.requireWrite(); | |
1300 | /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */ | |
1301 | const double c0 = .16666666666666666667; | |
1302 | const double c1 = .044658198738520451079; | |
1303 | const double c2 = .62200846792814621559; | |
1304 | #pragma omp parallel for | #pragma omp parallel for |
1305 | for (dim_t k=0; k<getNumNodes(); k++) { | for (index_t k1=0; k1 < m_NE1; ++k1) { |
1306 | index_t id = firstId+k; | for (index_t k0=0; k0 < m_NE0; ++k0) { |
1307 | if (m_offset0 > 0 && k%m_N0==0) | const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0)); |
1308 | id -= diff0; | const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0)); |
1309 | if (m_offset1 > 0 && k/m_N0==0) | const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0)); |
1310 | id -= diff1; | const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0)); |
1311 | m_nodeId[k]=id; | double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0)); |
1312 | for (index_t i=0; i < numComp; ++i) { | |
1313 | o[INDEX2(i,numComp,0)] = f_00[i]*c2 + f_11[i]*c1 + c0*(f_01[i] + f_10[i]); | |
1314 | o[INDEX2(i,numComp,1)] = f_01[i]*c1 + f_10[i]*c2 + c0*(f_00[i] + f_11[i]); | |
1315 | o[INDEX2(i,numComp,2)] = f_01[i]*c2 + f_10[i]*c1 + c0*(f_00[i] + f_11[i]); | |
1316 | o[INDEX2(i,numComp,3)] = f_00[i]*c1 + f_11[i]*c2 + c0*(f_01[i] + f_10[i]); | |
1317 | } /* end of component loop i */ | |
1318 | } /* end of k0 loop */ | |
1319 | } /* end of k1 loop */ | |
1320 | /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */ | |
1321 | } | } |
1322 | } | } |
1323 | ||
1324 | //protected | |
1325 | void Rectangle::interpolateNodesOnFaces(escript::Data& out, escript::Data& in, | |
1326 | bool reduced) const | |
1327 | { | |
1328 | const dim_t numComp = in.getDataPointSize(); | |
1329 | if (reduced) { | |
1330 | out.requireWrite(); | |
1331 | const double c0 = .5; | |
1332 | #pragma omp parallel | |
1333 | { | |
1334 | /*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */ | |
1335 | if (m_faceOffset[0] > -1) { | |
1336 | #pragma omp for nowait | |
1337 | for (index_t k1=0; k1 < m_NE1; ++k1) { | |
1338 | const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0)); | |
1339 | const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0)); | |
1340 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | |
1341 | for (index_t i=0; i < numComp; ++i) { | |
1342 | o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i]); | |
1343 | } /* end of component loop i */ | |
1344 | } /* end of k1 loop */ | |
1345 | } /* end of face 0 */ | |
1346 | if (m_faceOffset[1] > -1) { | |
1347 | #pragma omp for nowait | |
1348 | for (index_t k1=0; k1 < m_NE1; ++k1) { | |
1349 | const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)); | |
1350 | const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)); | |
1351 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | |
1352 | for (index_t i=0; i < numComp; ++i) { | |
1353 | o[INDEX2(i,numComp,0)] = c0*(f_10[i] + f_11[i]); | |
1354 | } /* end of component loop i */ | |
1355 | } /* end of k1 loop */ | |
1356 | } /* end of face 1 */ | |
1357 | if (m_faceOffset[2] > -1) { | |
1358 | #pragma omp for nowait | |
1359 | for (index_t k0=0; k0 < m_NE0; ++k0) { | |
1360 | const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0)); | |
1361 | const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0)); | |
1362 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | |
1363 | for (index_t i=0; i < numComp; ++i) { | |
1364 | o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_10[i]); | |
1365 | } /* end of component loop i */ | |
1366 | } /* end of k0 loop */ | |
1367 | } /* end of face 2 */ | |
1368 | if (m_faceOffset[3] > -1) { | |
1369 | #pragma omp for nowait | |
1370 | for (index_t k0=0; k0 < m_NE0; ++k0) { | |
1371 | const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)); | |
1372 | const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)); | |
1373 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | |
1374 | for (index_t i=0; i < numComp; ++i) { | |
1375 | o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_11[i]); | |
1376 | } /* end of component loop i */ | |
1377 | } /* end of k0 loop */ | |
1378 | } /* end of face 3 */ | |
1379 | /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */ | |
1380 | } // end of parallel section | |
1381 | } else { | |
1382 | out.requireWrite(); | |
1383 | const double c0 = 0.21132486540518711775; | |
1384 | const double c1 = 0.78867513459481288225; | |
1385 | #pragma omp parallel | |
1386 | { | |
1387 | /*** GENERATOR SNIP_INTERPOLATE_FACES TOP */ | |
1388 | if (m_faceOffset[0] > -1) { | |
1389 | #pragma omp for nowait | |
1390 | for (index_t k1=0; k1 < m_NE1; ++k1) { | |
1391 | const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0)); | |
1392 | const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0)); | |
1393 | double* o = out.getSampleDataRW(m_faceOffset[0]+k1); | |
1394 | for (index_t i=0; i < numComp; ++i) { | |
1395 | o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_01[i]*c0; | |
1396 | o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_01[i]*c1; | |
1397 | } /* end of component loop i */ | |
1398 | } /* end of k1 loop */ | |
1399 | } /* end of face 0 */ | |
1400 | if (m_faceOffset[1] > -1) { | |
1401 | #pragma omp for nowait | |
1402 | for (index_t k1=0; k1 < m_NE1; ++k1) { | |
1403 | const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0)); | |
1404 | const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0)); | |
1405 | double* o = out.getSampleDataRW(m_faceOffset[1]+k1); | |
1406 | for (index_t i=0; i < numComp; ++i) { | |
1407 | o[INDEX2(i,numComp,0)] = f_10[i]*c1 + f_11[i]*c0; | |
1408 | o[INDEX2(i,numComp,1)] = f_10[i]*c0 + f_11[i]*c1; | |
1409 | } /* end of component loop i */ | |
1410 | } /* end of k1 loop */ | |
1411 | } /* end of face 1 */ | |
1412 | if (m_faceOffset[2] > -1) { | |
1413 | #pragma omp for nowait | |
1414 | for (index_t k0=0; k0 < m_NE0; ++k0) { | |
1415 | const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0)); | |
1416 | const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0)); | |
1417 | double* o = out.getSampleDataRW(m_faceOffset[2]+k0); | |
1418 | for (index_t i=0; i < numComp; ++i) { | |
1419 | o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_10[i]*c0; | |
1420 | o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_10[i]*c1; | |
1421 | } /* end of component loop i */ | |
1422 | } /* end of k0 loop */ | |
1423 | } /* end of face 2 */ | |
1424 | if (m_faceOffset[3] > -1) { | |
1425 | #pragma omp for nowait | |
1426 | for (index_t k0=0; k0 < m_NE0; ++k0) { | |
1427 | const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0)); | |
1428 | const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0)); | |
1429 | double* o = out.getSampleDataRW(m_faceOffset[3]+k0); | |
1430 | for (index_t i=0; i < numComp; ++i) { | |
1431 | o[INDEX2(i,numComp,0)] = f_01[i]*c1 + f_11[i]*c0; | |
1432 | o[INDEX2(i,numComp,1)] = f_01[i]*c0 + f_11[i]*c1; | |
1433 | } /* end of component loop i */ | |
1434 | } /* end of k0 loop */ | |
1435 | } /* end of face 3 */ | |
1436 | /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */ | |
1437 | } // end of parallel section | |
1438 | } | |
1439 | } | |
1440 | ||
1441 | //protected | |
1442 | void Rectangle::assemblePDESingle(Paso_SystemMatrix* mat, | |
1443 | escript::Data& rhs, const escript::Data& A, const escript::Data& B, | |
1444 | const escript::Data& C, const escript::Data& D, | |
1445 | const escript::Data& X, const escript::Data& Y, | |
1446 | const escript::Data& d, const escript::Data& y) const | |
1447 | { | |
1448 | const double h0 = m_l0/m_gNE0; | |
1449 | const double h1 = m_l1/m_gNE1; | |
1450 | /*** GENERATOR SNIP_PDE_SINGLE_PRE TOP */ | |
1451 | const double w0 = -0.1555021169820365539*h1/h0; | |
1452 | const double w1 = 0.041666666666666666667; | |
1453 | const double w10 = -0.041666666666666666667*h0/h1; | |
1454 | const double w11 = 0.1555021169820365539*h1/h0; | |
1455 | const double w12 = 0.1555021169820365539*h0/h1; | |
1456 | const double w13 = 0.01116454968463011277*h0/h1; | |
1457 | const double w14 = 0.01116454968463011277*h1/h0; | |
1458 | const double w15 = 0.041666666666666666667*h1/h0; | |
1459 | const double w16 = -0.01116454968463011277*h0/h1; | |
1460 | const double w17 = -0.1555021169820365539*h0/h1; | |
1461 | const double w18 = -0.33333333333333333333*h1/h0; | |
1462 | const double w19 = 0.25000000000000000000; | |
1463 | const double w2 = -0.15550211698203655390; | |
1464 | const double w20 = -0.25000000000000000000; | |
1465 | const double w21 = 0.16666666666666666667*h0/h1; | |
1466 | const double w22 = -0.16666666666666666667*h1/h0; | |
1467 | const double w23 = -0.16666666666666666667*h0/h1; | |
1468 | const double w24 = 0.33333333333333333333*h1/h0; | |
1469 | const double w25 = 0.33333333333333333333*h0/h1; | |
1470 | const double w26 = 0.16666666666666666667*h1/h0; | |
1471 | const double w27 = -0.33333333333333333333*h0/h1; | |
1472 | const double w28 = -0.032861463941450536761*h1; | |
1473 | const double w29 = -0.032861463941450536761*h0; | |
1474 | const double w3 = 0.041666666666666666667*h0/h1; | |
1475 | const double w30 = -0.12264065304058601714*h1; | |
1476 | const double w31 = -0.0023593469594139828636*h1; | |
1477 | const double w32 = -0.008805202725216129906*h0; | |
1478 | const double w33 = -0.008805202725216129906*h1; | |
1479 | const double w34 = 0.032861463941450536761*h1; | |
1480 | const double w35 = 0.008805202725216129906*h1; | |
1481 | const double w36 = 0.008805202725216129906*h0; | |
1482 | const double w37 = 0.0023593469594139828636*h1; | |
1483 | const double w38 = 0.12264065304058601714*h1; | |
1484 | const double w39 = 0.032861463941450536761*h0; | |
1485 | const double w4 = 0.15550211698203655390; | |
1486 | const double w40 = -0.12264065304058601714*h0; | |
1487 | const double w41 = -0.0023593469594139828636*h0; | |
1488 | const double w42 = 0.0023593469594139828636*h0; | |
1489 | const double w43 = 0.12264065304058601714*h0; | |
1490 | const double w44 = -0.16666666666666666667*h1; | |
1491 | const double w45 = -0.083333333333333333333*h0; | |
1492 | const double w46 = 0.083333333333333333333*h1; | |
1493 | const double w47 = 0.16666666666666666667*h1; | |
1494 | const double w48 = 0.083333333333333333333*h0; | |
1495 | const double w49 = -0.16666666666666666667*h0; | |
1496 | const double w5 = -0.041666666666666666667; | |
1497 | const double w50 = 0.16666666666666666667*h0; | |
1498 | const double w51 = -0.083333333333333333333*h1; | |
1499 | const double w52 = 0.025917019497006092316*h0*h1; | |
1500 | const double w53 = 0.0018607582807716854616*h0*h1; | |
1501 | const double w54 = 0.0069444444444444444444*h0*h1; | |
1502 | const double w55 = 0.09672363354357992482*h0*h1; | |
1503 | const double w56 = 0.00049858867864229740201*h0*h1; | |
1504 | const double w57 = 0.055555555555555555556*h0*h1; | |
1505 | const double w58 = 0.027777777777777777778*h0*h1; | |
1506 | const double w59 = 0.11111111111111111111*h0*h1; | |
1507 | const double w6 = -0.01116454968463011277*h1/h0; | |
1508 | const double w60 = -0.19716878364870322056*h1; | |
1509 | const double w61 = -0.19716878364870322056*h0; | |
1510 | const double w62 = -0.052831216351296779436*h0; | |
1511 | const double w63 = -0.052831216351296779436*h1; | |
1512 | const double w64 = 0.19716878364870322056*h1; | |
1513 | const double w65 = 0.052831216351296779436*h1; | |
1514 | const double w66 = 0.19716878364870322056*h0; | |
1515 | const double w67 = 0.052831216351296779436*h0; | |
1516 | const double w68 = -0.5*h1; | |
1517 | const double w69 = -0.5*h0; | |
1518 | const double w7 = 0.011164549684630112770; | |
1519 | const double w70 = 0.5*h1; | |
1520 | const double w71 = 0.5*h0; | |
1521 | const double w72 = 0.1555021169820365539*h0*h1; | |
1522 | const double w73 = 0.041666666666666666667*h0*h1; | |
1523 | const double w74 = 0.01116454968463011277*h0*h1; | |
1524 | const double w75 = 0.25*h0*h1; | |
1525 | const double w8 = -0.011164549684630112770; | |
1526 | const double w9 = -0.041666666666666666667*h1/h0; | |
1527 | /* GENERATOR SNIP_PDE_SINGLE_PRE BOTTOM */ | |
1528 | ||
1529 | rhs.requireWrite(); | |
1530 | #pragma omp parallel | |
1531 | { | |
1532 | for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring | |
1533 | #pragma omp for | |
1534 | for (index_t k1=k1_0; k1<m_NE1; k1+=2) { | |
1535 | for (index_t k0=0; k0<m_NE0; ++k0) { | |
1536 | bool add_EM_S=false; | |
1537 | bool add_EM_F=false; | |
1538 | vector<double> EM_S(4*4, 0); | |
1539 | vector<double> EM_F(4, 0); | |
1540 | const index_t e = k0 + m_NE0*k1; | |
1541 | /*** GENERATOR SNIP_PDE_SINGLE TOP */ | |
1542 | /////////////// | |
1543 | // process A // | |
1544 | /////////////// | |
1545 | if (!A.isEmpty()) { | |
1546 | add_EM_S=true; | |
1547 | const double* A_p=const_cast<escript::Data*>(&A)->getSampleDataRO(e); | |
1548 | if (A.actsExpanded()) { | |
1549 | const register double A_00_0 = A_p[INDEX3(0,0,0,2,2)]; | |
1550 | const register double A_01_0 = A_p[INDEX3(0,1,0,2,2)]; | |
1551 | const register double A_10_0 = A_p[INDEX3(1,0,0,2,2)]; | |
1552 | const register double A_11_0 = A_p[INDEX3(1,1,0,2,2)]; | |
1553 | const register double A_00_1 = A_p[INDEX3(0,0,1,2,2)]; | |
1554 | const register double A_01_1 = A_p[INDEX3(0,1,1,2,2)]; | |
1555 | const register double A_10_1 = A_p[INDEX3(1,0,1,2,2)]; | |
1556 | const register double A_11_1 = A_p[INDEX3(1,1,1,2,2)]; | |
1557 | const register double A_00_2 = A_p[INDEX3(0,0,2,2,2)]; | |
1558 | const register double A_01_2 = A_p[INDEX3(0,1,2,2,2)]; | |
1559 | const register double A_10_2 = A_p[INDEX3(1,0,2,2,2)]; | |
1560 | const register double A_11_2 = A_p[INDEX3(1,1,2,2,2)]; | |
1561 | const register double A_00_3 = A_p[INDEX3(0,0,3,2,2)]; | |
1562 | const register double A_01_3 = A_p[INDEX3(0,1,3,2,2)]; | |
1563 | const register double A_10_3 = A_p[INDEX3(1,0,3,2,2)]; | |
1564 | const register double A_11_3 = A_p[INDEX3(1,1,3,2,2)]; | |
1565 | const register double tmp4_0 = A_10_1 + A_10_2; | |
1566 | const register double tmp12_0 = A_11_0 + A_11_2; | |
1567 | const register double tmp2_0 = A_11_0 + A_11_1 + A_11_2 + A_11_3; | |
1568 | const register double tmp10_0 = A_01_3 + A_10_3; | |
1569 | const register double tmp14_0 = A_01_0 + A_01_3 + A_10_0 + A_10_3; | |
1570 | const register double tmp0_0 = A_01_0 + A_01_3; | |
1571 | const register double tmp13_0 = A_01_2 + A_10_1; | |
1572 | const register double tmp3_0 = A_00_2 + A_00_3; | |
1573 | const register double tmp11_0 = A_11_1 + A_11_3; | |
1574 | const register double tmp18_0 = A_01_1 + A_10_1; | |
1575 | const register double tmp1_0 = A_00_0 + A_00_1; | |
1576 | const register double tmp15_0 = A_01_1 + A_10_2; | |
1577 | const register double tmp5_0 = A_00_0 + A_00_1 + A_00_2 + A_00_3; | |
1578 | const register double tmp16_0 = A_10_0 + A_10_3; | |
1579 | const register double tmp6_0 = A_01_3 + A_10_0; | |
1580 | const register double tmp17_0 = A_01_1 + A_01_2; | |
1581 | const register double tmp9_0 = A_01_0 + A_10_0; | |
1582 | const register double tmp7_0 = A_01_0 + A_10_3; | |
1583 | const register double tmp8_0 = A_01_1 + A_01_2 + A_10_1 + A_10_2; | |
1584 | const register double tmp19_0 = A_01_2 + A_10_2; | |
1585 | const register double tmp14_1 = A_10_0*w8; | |
1586 | const register double tmp23_1 = tmp3_0*w14; | |
1587 | const register double tmp35_1 = A_01_0*w8; | |
1588 | const register double tmp54_1 = tmp13_0*w8; | |
1589 | const register double tmp20_1 = tmp9_0*w4; | |
1590 | const register double tmp25_1 = tmp12_0*w12; | |
1591 | const register double tmp2_1 = A_01_1*w4; | |
1592 | const register double tmp44_1 = tmp7_0*w7; | |
1593 | const register double tmp26_1 = tmp10_0*w4; | |
1594 | const register double tmp52_1 = tmp18_0*w8; | |
1595 | const register double tmp48_1 = A_10_1*w7; | |
1596 | const register double tmp46_1 = A_01_3*w8; | |
1597 | const register double tmp50_1 = A_01_0*w2; | |
1598 | const register double tmp8_1 = tmp4_0*w5; | |
1599 | const register double tmp56_1 = tmp19_0*w8; | |
1600 | const register double tmp9_1 = tmp2_0*w10; | |
1601 | const register double tmp19_1 = A_10_3*w2; | |
1602 | const register double tmp47_1 = A_10_2*w4; | |
1603 | const register double tmp16_1 = tmp3_0*w0; | |
1604 | const register double tmp18_1 = tmp1_0*w6; | |
1605 | const register double tmp31_1 = tmp11_0*w12; | |
1606 | const register double tmp55_1 = tmp15_0*w2; | |
1607 | const register double tmp39_1 = A_10_2*w7; | |
1608 | const register double tmp11_1 = tmp6_0*w7; | |
1609 | const register double tmp40_1 = tmp11_0*w17; | |
1610 | const register double tmp34_1 = tmp15_0*w8; | |
1611 | const register double tmp33_1 = tmp14_0*w5; | |
1612 | const register double tmp24_1 = tmp11_0*w13; | |
1613 | const register double tmp3_1 = tmp1_0*w0; | |
1614 | const register double tmp5_1 = tmp2_0*w3; | |
1615 | const register double tmp43_1 = tmp17_0*w5; | |
1616 | const register double tmp15_1 = A_01_2*w4; | |
1617 | const register double tmp53_1 = tmp19_0*w2; | |
1618 | const register double tmp27_1 = tmp3_0*w11; | |
1619 | const register double tmp32_1 = tmp13_0*w2; | |
1620 | const register double tmp10_1 = tmp5_0*w9; | |
1621 | const register double tmp37_1 = A_10_1*w4; | |
1622 | const register double tmp38_1 = tmp5_0*w15; | |
1623 | const register double tmp17_1 = A_01_1*w7; | |
1624 | const register double tmp12_1 = tmp7_0*w4; | |
1625 | const register double tmp22_1 = tmp10_0*w7; | |
1626 | const register double tmp57_1 = tmp18_0*w2; | |
1627 | const register double tmp28_1 = tmp9_0*w7; | |
1628 | const register double tmp29_1 = tmp1_0*w14; | |
1629 | const register double tmp51_1 = tmp11_0*w16; | |
1630 | const register double tmp42_1 = tmp12_0*w16; | |
1631 | const register double tmp49_1 = tmp12_0*w17; | |
1632 | const register double tmp21_1 = tmp1_0*w11; | |
1633 | const register double tmp1_1 = tmp0_0*w1; | |
1634 | const register double tmp45_1 = tmp6_0*w4; | |
1635 | const register double tmp7_1 = A_10_0*w2; | |
1636 | const register double tmp6_1 = tmp3_0*w6; | |
1637 | const register double tmp13_1 = tmp8_0*w1; | |
1638 | const register double tmp36_1 = tmp16_0*w1; | |
1639 | const register double tmp41_1 = A_01_3*w2; | |
1640 | const register double tmp30_1 = tmp12_0*w13; | |
1641 | const register double tmp4_1 = A_01_2*w7; | |
1642 | const register double tmp0_1 = A_10_3*w8; | |
1643 | EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1 + tmp8_1; | |
1644 | EM_S[INDEX2(1,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp9_1; | |
1645 | EM_S[INDEX2(3,2,4)]+=tmp14_1 + tmp15_1 + tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp1_1 + tmp5_1 + tmp8_1; | |
1646 | EM_S[INDEX2(0,0,4)]+=tmp13_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1 + tmp24_1 + tmp25_1; | |
1647 | EM_S[INDEX2(3,3,4)]+=tmp13_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; | |
1648 | EM_S[INDEX2(3,0,4)]+=tmp10_1 + tmp32_1 + tmp33_1 + tmp34_1 + tmp9_1; | |
1649 | EM_S[INDEX2(3,1,4)]+=tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1 + tmp40_1 + tmp41_1 + tmp42_1 + tmp43_1; | |
1650 | EM_S[INDEX2(2,1,4)]+=tmp10_1 + tmp13_1 + tmp44_1 + tmp45_1 + tmp9_1; | |
1651 | EM_S[INDEX2(0,2,4)]+=tmp36_1 + tmp38_1 + tmp43_1 + tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; | |
1652 | EM_S[INDEX2(2,0,4)]+=tmp0_1 + tmp15_1 + tmp17_1 + tmp1_1 + tmp38_1 + tmp49_1 + tmp51_1 + tmp7_1 + tmp8_1; | |
1653 | EM_S[INDEX2(1,3,4)]+=tmp14_1 + tmp19_1 + tmp1_1 + tmp2_1 + tmp38_1 + tmp40_1 + tmp42_1 + tmp4_1 + tmp8_1; | |
1654 | EM_S[INDEX2(2,3,4)]+=tmp16_1 + tmp18_1 + tmp35_1 + tmp36_1 + tmp41_1 + tmp43_1 + tmp47_1 + tmp48_1 + tmp5_1; | |
1655 | EM_S[INDEX2(2,2,4)]+=tmp24_1 + tmp25_1 + tmp27_1 + tmp29_1 + tmp33_1 + tmp52_1 + tmp53_1; | |
1656 | EM_S[INDEX2(1,0,4)]+=tmp36_1 + tmp37_1 + tmp39_1 + tmp3_1 + tmp43_1 + tmp46_1 + tmp50_1 + tmp5_1 + tmp6_1; | |
1657 | EM_S[INDEX2(0,3,4)]+=tmp10_1 + tmp33_1 + tmp54_1 + tmp55_1 + tmp9_1; | |
1658 | EM_S[INDEX2(1,1,4)]+=tmp21_1 + tmp23_1 + tmp30_1 + tmp31_1 + tmp33_1 + tmp56_1 + tmp57_1; | |
1659 | } else { /* constant data */ | |
1660 | const register double A_00 = A_p[INDEX2(0,0,2)]; | |
1661 | const register double A_01 = A_p[INDEX2(0,1,2)]; | |
1662 | const register double A_10 = A_p[INDEX2(1,0,2)]; | |
1663 | const register double A_11 = A_p[INDEX2(1,1,2)]; | |
1664 | const register double tmp0_0 = A_01 + A_10; | |
1665 | const register double tmp0_1 = A_00*w18; | |
1666 | const register double tmp10_1 = A_01*w20; | |
1667 | const register double tmp12_1 = A_00*w26; | |
1668 | const register double tmp4_1 = A_00*w22; | |
1669 | const register double tmp8_1 = A_00*w24; | |
1670 | const register double tmp13_1 = A_10*w19; | |
1671 | const register double tmp9_1 = tmp0_0*w20; | |
1672 | const register double tmp3_1 = A_11*w21; | |
1673 | const register double tmp11_1 = A_11*w27; | |
1674 | const register double tmp1_1 = A_01*w19; | |
1675 | const register double tmp6_1 = A_11*w23; | |
1676 | const register double tmp7_1 = A_11*w25; | |
1677 | const register double tmp2_1 = A_10*w20; | |
1678 | const register double tmp5_1 = tmp0_0*w19; | |
1679 | EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | |
1680 | EM_S[INDEX2(1,2,4)]+=tmp4_1 + tmp5_1 + tmp6_1; | |
1681 | EM_S[INDEX2(3,2,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | |
1682 | EM_S[INDEX2(0,0,4)]+=tmp5_1 + tmp7_1 + tmp8_1; | |
1683 | EM_S[INDEX2(3,3,4)]+=tmp5_1 + tmp7_1 + tmp8_1; | |
1684 | EM_S[INDEX2(3,0,4)]+=tmp4_1 + tmp6_1 + tmp9_1; | |
1685 | EM_S[INDEX2(3,1,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; | |
1686 | EM_S[INDEX2(2,1,4)]+=tmp4_1 + tmp5_1 + tmp6_1; | |
1687 | EM_S[INDEX2(0,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; | |
1688 | EM_S[INDEX2(2,0,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; | |
1689 | EM_S[INDEX2(1,3,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; | |
1690 | EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; | |
1691 | EM_S[INDEX2(2,2,4)]+=tmp7_1 + tmp8_1 + tmp9_1; | |
1692 | EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; | |
1693 | EM_S[INDEX2(0,3,4)]+=tmp4_1 + tmp6_1 + tmp9_1; | |
1694 | EM_S[INDEX2(1,1,4)]+=tmp7_1 + tmp8_1 + tmp9_1; | |
1695 | } | |
1696 | } | |
1697 | /////////////// | |
1698 | // process B // | |
1699 | /////////////// | |
1700 | if (!B.isEmpty()) { | |
1701 | add_EM_S=true; | |
1702 | const double* B_p=const_cast<escript::Data*>(&B)->getSampleDataRO(e); | |
1703 | if (B.actsExpanded()) { | |
1704 | const register double B_0_0 = B_p[INDEX2(0,0,2)]; | |
1705 | const register double B_1_0 = B_p[INDEX2(1,0,2)]; | |
1706 | const register double B_0_1 = B_p[INDEX2(0,1,2)]; | |
1707 | const register double B_1_1 = B_p[INDEX2(1,1,2)]; | |
1708 | const register double B_0_2 = B_p[INDEX2(0,2,2)]; | |
1709 | const register double B_1_2 = B_p[INDEX2(1,2,2)]; | |
1710 | const register double B_0_3 = B_p[INDEX2(0,3,2)]; | |
1711 | const register double B_1_3 = B_p[INDEX2(1,3,2)]; | |
1712 | const register double tmp3_0 = B_0_0 + B_0_2; | |
1713 | const register double tmp1_0 = B_1_2 + B_1_3; | |
1714 | const register double tmp2_0 = B_0_1 + B_0_3; | |
1715 | const register double tmp0_0 = B_1_0 + B_1_1; | |
1716 | const register double tmp63_1 = B_1_1*w42; | |
1717 | const register double tmp79_1 = B_1_1*w40; | |
1718 | const register double tmp37_1 = tmp3_0*w35; | |
1719 | const register double tmp8_1 = tmp0_0*w32; | |
1720 | const register double tmp71_1 = B_0_1*w34; | |
1721 | const register double tmp19_1 = B_0_3*w31; | |
1722 | const register double tmp15_1 = B_0_3*w34; | |
1723 | const register double tmp9_1 = tmp3_0*w34; | |
1724 | const register double tmp35_1 = B_1_0*w36; | |
1725 | const register double tmp66_1 = B_0_3*w28; | |
1726 | const register double tmp28_1 = B_1_0*w42; | |
1727 | const register double tmp22_1 = B_1_0*w40; | |
1728 | const register double tmp16_1 = B_1_2*w29; | |
1729 | const register double tmp6_1 = tmp2_0*w35; | |
1730 | const register double tmp55_1 = B_1_3*w40; | |
1731 | const register double tmp50_1 = B_1_3*w42; | |
1732 | const register double tmp7_1 = tmp1_0*w29; | |
1733 | const register double tmp1_1 = tmp1_0*w32; | |
1734 | const register double tmp57_1 = B_0_3*w30; | |
1735 | const register double tmp18_1 = B_1_1*w32; | |
1736 | const register double tmp53_1 = B_1_0*w41; | |
1737 | const register double tmp61_1 = B_1_3*w36; | |
1738 | const register double tmp27_1 = B_0_3*w38; | |
1739 | const register double tmp64_1 = B_0_2*w30; | |
1740 | const register double tmp76_1 = B_0_1*w38; | |
1741 | const register double tmp39_1 = tmp2_0*w34; | |
1742 | const register double tmp62_1 = B_0_1*w31; | |
1743 | const register double tmp56_1 = B_0_0*w31; | |
1744 | const register double tmp49_1 = B_1_1*w36; | |
1745 | const register double tmp2_1 = B_0_2*w31; | |
1746 | const register double tmp23_1 = B_0_2*w33; | |
1747 | const register double tmp38_1 = B_1_1*w43; | |
1748 | const register double tmp74_1 = B_1_2*w41; | |
1749 | const register double tmp43_1 = B_1_1*w41; | |
1750 | const register double tmp58_1 = B_0_2*w28; | |
1751 | const register double tmp67_1 = B_0_0*w33; | |
1752 | const register double tmp33_1 = tmp0_0*w39; | |
1753 | const register double tmp4_1 = B_0_0*w28; | |
1754 | const register double tmp20_1 = B_0_0*w30; | |
1755 | const register double tmp13_1 = B_0_2*w38; | |
1756 | const register double tmp65_1 = B_1_2*w43; | |
1757 | const register double tmp0_1 = tmp0_0*w29; | |
1758 | const register double tmp41_1 = tmp3_0*w33; | |
1759 | const register double tmp73_1 = B_0_2*w37; | |
1760 | const register double tmp69_1 = B_0_0*w38; | |
1761 | const register double tmp48_1 = B_1_2*w39; | |
1762 | const register double tmp59_1 = B_0_1*w33; | |
1763 | const register double tmp17_1 = B_1_3*w41; | |
1764 | const register double tmp5_1 = B_0_3*w33; | |
1765 | const register double tmp3_1 = B_0_1*w30; | |
1766 | const register double tmp21_1 = B_0_1*w28; | |
1767 | const register double tmp42_1 = B_1_0*w29; | |
1768 | const register double tmp54_1 = B_1_2*w32; | |
1769 | const register double tmp60_1 = B_1_0*w39; | |
1770 | const register double tmp32_1 = tmp1_0*w36; | |
1771 | const register double tmp10_1 = B_0_1*w37; | |
1772 | const register double tmp14_1 = B_0_0*w35; | |
1773 | const register double tmp29_1 = B_0_1*w35; | |
1774 | const register double tmp26_1 = B_1_2*w36; | |
1775 | const register double tmp30_1 = B_1_3*w43; | |
1776 | const register double tmp70_1 = B_0_2*w35; | |
1777 | const register double tmp34_1 = B_1_3*w39; | |
1778 | const register double tmp51_1 = B_1_0*w43; | |
1779 | const register double tmp31_1 = B_0_2*w34; | |
1780 | const register double tmp45_1 = tmp3_0*w28; | |
1781 | const register double tmp11_1 = tmp1_0*w39; | |
1782 | const register double tmp52_1 = B_1_1*w29; | |
1783 | const register double tmp44_1 = B_1_3*w32; | |
1784 | const register double tmp25_1 = B_1_1*w39; | |
1785 | const register double tmp47_1 = tmp2_0*w33; | |
1786 | const register double tmp72_1 = B_1_3*w29; | |
1787 | const register double tmp40_1 = tmp2_0*w28; | |
1788 | const register double tmp46_1 = B_1_2*w40; | |
1789 | const register double tmp36_1 = B_1_2*w42; | |
1790 | const register double tmp24_1 = B_0_0*w37; | |
1791 | const register double tmp77_1 = B_0_3*w35; | |
1792 | const register double tmp68_1 = B_0_3*w37; | |
1793 | const register double tmp78_1 = B_0_0*w34; | |
1794 | const register double tmp12_1 = tmp0_0*w36; | |
1795 | const register double tmp75_1 = B_1_0*w32; | |
1796 | EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; | |
1797 | EM_S[INDEX2(1,2,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; | |
1798 | EM_S[INDEX2(3,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; | |
1799 | EM_S[INDEX2(0,0,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; | |
1800 | EM_S[INDEX2(3,3,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; | |
1801 | EM_S[INDEX2(3,0,4)]+=tmp32_1 + tmp33_1 + tmp6_1 + tmp9_1; | |
1802 | EM_S[INDEX2(3,1,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; | |
1803 | EM_S[INDEX2(2,1,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; | |
1804 | EM_S[INDEX2(0,2,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; | |
1805 | EM_S[INDEX2(2,0,4)]+=tmp45_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; | |
1806 | EM_S[INDEX2(1,3,4)]+=tmp37_1 + tmp39_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; | |
1807 | EM_S[INDEX2(2,3,4)]+=tmp11_1 + tmp12_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; | |
1808 | EM_S[INDEX2(2,2,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; | |
1809 | EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp1_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; | |
1810 | EM_S[INDEX2(0,3,4)]+=tmp40_1 + tmp41_1 + tmp7_1 + tmp8_1; | |
1811 | EM_S[INDEX2(1,1,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; | |
1812 | } else { /* constant data */ | |
1813 | const register double B_0 = B_p[0]; | |
1814 | const register double B_1 = B_p[1]; | |
1815 | const register double tmp6_1 = B_1*w50; | |
1816 | const register double tmp1_1 = B_1*w45; | |
1817 | const register double tmp5_1 = B_1*w49; | |
1818 | const register double tmp4_1 = B_1*w48; | |
1819 | const register double tmp0_1 = B_0*w44; | |
1820 | const register double tmp2_1 = B_0*w46; | |
1821 | const register double tmp7_1 = B_0*w51; | |
1822 | const register double tmp3_1 = B_0*w47; | |
1823 | EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; | |
1824 | EM_S[INDEX2(1,2,4)]+=tmp1_1 + tmp2_1; | |
1825 | EM_S[INDEX2(3,2,4)]+=tmp3_1 + tmp4_1; | |
1826 | EM_S[INDEX2(0,0,4)]+=tmp0_1 + tmp5_1; | |
1827 | EM_S[INDEX2(3,3,4)]+=tmp3_1 + tmp6_1; | |
1828 | EM_S[INDEX2(3,0,4)]+=tmp2_1 + tmp4_1; | |
1829 | EM_S[INDEX2(3,1,4)]+=tmp2_1 + tmp6_1; | |
1830 | EM_S[INDEX2(2,1,4)]+=tmp4_1 + tmp7_1; | |
1831 | EM_S[INDEX2(0,2,4)]+=tmp5_1 + tmp7_1; | |
1832 | EM_S[INDEX2(2,0,4)]+=tmp6_1 + tmp7_1; | |
1833 | EM_S[INDEX2(1,3,4)]+=tmp2_1 + tmp5_1; | |
1834 | EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp4_1; | |
1835 | EM_S[INDEX2(2,2,4)]+=tmp0_1 + tmp6_1; | |
1836 | EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp3_1; | |
1837 | EM_S[INDEX2(0,3,4)]+=tmp1_1 + tmp7_1; | |
1838 | EM_S[INDEX2(1,1,4)]+=tmp3_1 + tmp5_1; | |
1839 | } | |
1840 | } | |
1841 | /////////////// | |
1842 | // process C // | |
1843 | /////////////// | |
1844 | if (!C.isEmpty()) { | |
1845 | add_EM_S=true; | |
1846 | const double* C_p=const_cast<escript::Data*>(&C)->getSampleDataRO(e); | |
1847 | if (C.actsExpanded()) { | |
1848 | const register double C_0_0 = C_p[INDEX2(0,0,2)]; | |
1849 | const register double C_1_0 = C_p[INDEX2(1,0,2)]; | |
1850 | const register double C_0_1 = C_p[INDEX2(0,1,2)]; | |
1851 | const register double C_1_1 = C_p[INDEX2(1,1,2)]; | |
1852 | const register double C_0_2 = C_p[INDEX2(0,2,2)]; | |
1853 | const register double C_1_2 = C_p[INDEX2(1,2,2)]; | |
1854 | const register double C_0_3 = C_p[INDEX2(0,3,2)]; | |
1855 | const register double C_1_3 = C_p[INDEX2(1,3,2)]; | |
1856 | const register double tmp2_0 = C_0_1 + C_0_3; | |
1857 | const register double tmp1_0 = C_1_2 + C_1_3; | |
1858 | const register double tmp3_0 = C_0_0 + C_0_2; | |
1859 | const register double tmp0_0 = C_1_0 + C_1_1; | |
1860 | const register double tmp64_1 = C_0_2*w30; | |
1861 | const register double tmp14_1 = C_0_2*w28; | |
1862 | const register double tmp19_1 = C_0_3*w31; | |
1863 | const register double tmp22_1 = C_1_0*w40; | |
1864 | const register double tmp37_1 = tmp3_0*w35; | |
1865 | const register double tmp29_1 = C_0_1*w35; | |
1866 | const register double tmp73_1 = C_0_2*w37; | |
1867 | const register double tmp74_1 = C_1_2*w41; | |
1868 | const register double tmp52_1 = C_1_3*w39; | |
1869 | const register double tmp25_1 = C_1_1*w39; | |
1870 | const register double tmp62_1 = C_0_1*w31; | |
1871 | const register double tmp79_1 = C_1_1*w40; | |
1872 | const register double tmp43_1 = C_1_1*w36; | |
1873 | const register double tmp27_1 = C_0_3*w38; | |
1874 | const register double tmp28_1 = C_1_0*w42; | |
1875 | const register double tmp63_1 = C_1_1*w42; | |
1876 | const register double tmp59_1 = C_0_3*w34; | |
1877 | const register double tmp72_1 = C_1_3*w29; | |
1878 | const register double tmp40_1 = tmp2_0*w35; | |
1879 | const register double tmp13_1 = C_0_3*w30; | |
1880 | const register double tmp51_1 = C_1_2*w40; | |
1881 | const register double tmp54_1 = C_1_2*w42; | |
1882 | const register double tmp12_1 = C_0_0*w31; | |
1883 | const register double tmp2_1 = tmp1_0*w32; | |
1884 | const register double tmp68_1 = C_0_2*w31; | |
1885 | const register double tmp75_1 = C_1_0*w32; | |
1886 | const register double tmp49_1 = C_1_1*w41; | |
1887 | const register double tmp4_1 = C_0_2*w35; | |
1888 | const register double tmp66_1 = C_0_3*w28; | |
1889 | const register double tmp56_1 = C_0_1*w37; | |
1890 | const register double tmp5_1 = C_0_1*w34; | |
1891 | const register double tmp38_1 = tmp2_0*w34; | |
1892 | const register double tmp76_1 = C_0_1*w38; | |
1893 | const register double tmp21_1 = C_0_1*w28; | |
1894 | const register double tmp69_1 = C_0_1*w30; | |
1895 | const register double tmp53_1 = C_1_0*w36; | |
1896 | const register double tmp42_1 = C_1_2*w39; | |
1897 | const register double tmp32_1 = tmp1_0*w29; | |
1898 | const register double tmp45_1 = C_1_0*w43; | |
1899 | const register double tmp33_1 = tmp0_0*w32; | |
1900 | const register double tmp35_1 = C_1_0*w41; | |
1901 | const register double tmp26_1 = C_1_2*w36; | |
1902 | const register double tmp67_1 = C_0_0*w33; | |
1903 | const register double tmp31_1 = C_0_2*w34; | |
1904 | const register double tmp20_1 = C_0_0*w30; | |
1905 | const register double tmp70_1 = C_0_0*w28; | |
1906 | const register double tmp8_1 = tmp0_0*w39; | |
1907 | const register double tmp30_1 = C_1_3*w43; | |
1908 | const register double tmp0_1 = tmp0_0*w29; | |
1909 | const register double tmp17_1 = C_1_3*w41; | |
1910 | const register double tmp58_1 = C_0_0*w35; | |
1911 | const register double tmp9_1 = tmp3_0*w33; | |
1912 | const register double tmp61_1 = C_1_3*w36; | |
1913 | const register double tmp41_1 = tmp3_0*w34; | |
1914 | const register double tmp50_1 = C_1_3*w32; | |
1915 | const register double tmp18_1 = C_1_1*w32; | |
1916 | const register double tmp6_1 = tmp1_0*w36; | |
1917 | const register double tmp3_1 = C_0_0*w38; | |
1918 | const register double tmp34_1 = C_1_1*w29; | |
1919 | const register double tmp77_1 = C_0_3*w35; | |
1920 | const register double tmp65_1 = C_1_2*w43; | |
1921 | const register double tmp71_1 = C_0_3*w33; | |
1922 | const register double tmp55_1 = C_1_1*w43; | |
1923 | const register double tmp46_1 = tmp3_0*w28; | |
1924 | const register double tmp24_1 = C_0_0*w37; | |
1925 | const register double tmp10_1 = tmp1_0*w39; | |
1926 | const register double tmp48_1 = C_1_0*w29; | |
1927 | const register double tmp15_1 = C_0_1*w33; | |
1928 | const register double tmp36_1 = C_1_2*w32; | |
1929 | const register double tmp60_1 = C_1_0*w39; | |
1930 | const register double tmp47_1 = tmp2_0*w33; | |
1931 | const register double tmp16_1 = C_1_2*w29; | |
1932 | const register double tmp1_1 = C_0_3*w37; | |
1933 | const register double tmp7_1 = tmp2_0*w28; | |
1934 | const register double tmp39_1 = C_1_3*w40; | |
1935 | const register double tmp44_1 = C_1_3*w42; | |
1936 | const register double tmp57_1 = C_0_2*w38; | |
1937 | const register double tmp78_1 = C_0_0*w34; | |
1938 | const register double tmp11_1 = tmp0_0*w36; | |
1939 | const register double tmp23_1 = C_0_2*w33; | |
1940 | EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; | |
1941 | EM_S[INDEX2(1,2,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; | |
1942 | EM_S[INDEX2(3,2,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; | |
1943 | EM_S[INDEX2(0,0,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; | |
1944 | EM_S[INDEX2(3,3,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; | |
1945 | EM_S[INDEX2(3,0,4)]+=tmp32_1 + tmp33_1 + tmp7_1 + tmp9_1; | |
1946 | EM_S[INDEX2(3,1,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; | |
1947 | EM_S[INDEX2(2,1,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; | |
1948 | EM_S[INDEX2(0,2,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; | |
1949 | EM_S[INDEX2(2,0,4)]+=tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; | |
1950 | EM_S[INDEX2(1,3,4)]+=tmp37_1 + tmp38_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; | |
1951 | EM_S[INDEX2(2,3,4)]+=tmp10_1 + tmp11_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; | |
1952 | EM_S[INDEX2(2,2,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; | |
1953 | EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp2_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; | |
1954 | EM_S[INDEX2(0,3,4)]+=tmp40_1 + tmp41_1 + tmp6_1 + tmp8_1; | |
1955 | EM_S[INDEX2(1,1,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; | |
1956 | } else { /* constant data */ | |
1957 | const register double C_0 = C_p[0]; | |
1958 | const register double C_1 = C_p[1]; | |
1959 | const register double tmp1_1 = C_1*w45; | |
1960 | const register double tmp3_1 = C_0*w51; | |
1961 | const register double tmp4_1 = C_0*w44; | |
1962 | const register double tmp7_1 = C_0*w46; | |
1963 | const register double tmp5_1 = C_1*w49; | |
1964 | const register double tmp2_1 = C_1*w48; | |
1965 | const register double tmp0_1 = C_0*w47; | |
1966 | const register double tmp6_1 = C_1*w50; | |
1967 | EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; | |
1968 | EM_S[INDEX2(1,2,4)]+=tmp2_1 + tmp3_1; | |
1969 | EM_S[INDEX2(3,2,4)]+=tmp2_1 + tmp4_1; | |
1970 | EM_S[INDEX2(0,0,4)]+=tmp4_1 + tmp5_1; | |
1971 | EM_S[INDEX2(3,3,4)]+=tmp0_1 + tmp6_1; | |
1972 | EM_S[INDEX2(3,0,4)]+=tmp1_1 + tmp3_1; | |
1973 | EM_S[INDEX2(3,1,4)]+=tmp5_1 + tmp7_1; | |
1974 | EM_S[INDEX2(2,1,4)]+=tmp1_1 + tmp7_1; | |
1975 | EM_S[INDEX2(0,2,4)]+=tmp3_1 + tmp6_1; | |
1976 | EM_S[INDEX2(2,0,4)]+=tmp3_1 + tmp5_1; | |
1977 | EM_S[INDEX2(1,3,4)]+=tmp6_1 + tmp7_1; | |
1978 | EM_S[INDEX2(2,3,4)]+=tmp0_1 + tmp2_1; | |
1979 | EM_S[INDEX2(2,2,4)]+=tmp4_1 + tmp6_1; | |
1980 | EM_S[INDEX2(1,0,4)]+=tmp1_1 + tmp4_1; | |
1981 | EM_S[INDEX2(0,3,4)]+=tmp2_1 + tmp7_1; | |
1982 | EM_S[INDEX2(1,1,4)]+=tmp0_1 + tmp5_1; | |
1983 | } | |
1984 | } | |
1985 | /////////////// | |
1986 | // process D // | |
1987 | /////////////// | |
1988 | if (!D.isEmpty()) { | |
1989 | add_EM_S=true; | |
1990 | const double* D_p=const_cast<escript::Data*>(&D)->getSampleDataRO(e); | |
1991 | if (D.actsExpanded()) { | |
1992 | const register double D_0 = D_p[0]; | |
1993 | const register double D_1 = D_p[1]; | |
1994 | const register double D_2 = D_p[2]; | |
1995 | const register double D_3 = D_p[3]; | |
1996 | const register double tmp4_0 = D_1 + D_3; | |
1997 | const register double tmp2_0 = D_0 + D_1 + D_2 + D_3; | |
1998 | const register double tmp5_0 = D_0 + D_2; | |
1999 | const register double tmp0_0 = D_0 + D_1; | |
2000 | const register double tmp6_0 = D_0 + D_3; | |
2001 | const register double tmp1_0 = D_2 + D_3; | |
2002 | const register double tmp3_0 = D_1 + D_2; | |
2003 | const register double tmp16_1 = D_1*w56; | |
2004 | const register double tmp14_1 = tmp6_0*w54; | |
2005 | const register double tmp8_1 = D_3*w55; | |
2006 | const register double tmp2_1 = tmp2_0*w54; | |
2007 | const register double tmp12_1 = tmp5_0*w52; | |
2008 | const register double tmp4_1 = tmp0_0*w53; | |
2009 | const register double tmp3_1 = tmp1_0*w52; | |
2010 | const register double tmp13_1 = tmp4_0*w53; | |
2011 | const register double tmp10_1 = tmp4_0*w52; | |
2012 | const register double tmp1_1 = tmp1_0*w53; | |
2013 | const register double tmp7_1 = D_3*w56; | |
2014 | const register double tmp0_1 = tmp0_0*w52; | |
2015 | const register double tmp11_1 = tmp5_0*w53; | |
2016 | const register double tmp9_1 = D_0*w56; | |
2017 | const register double tmp5_1 = tmp3_0*w54; | |
2018 | const register double tmp18_1 = D_2*w56; | |
2019 | const register double tmp17_1 = D_1*w55; | |
2020 | const register double tmp6_1 = D_0*w55; | |
2021 | const register double tmp15_1 = D_2*w55; | |
2022 | EM_S[INDEX2(0,1,4)]+=tmp0_1 + tmp1_1; | |
2023 | EM_S[INDEX2(1,2,4)]+=tmp2_1; | |
2024 | EM_S[INDEX2(3,2,4)]+=tmp3_1 + tmp4_1; | |
2025 | EM_S[INDEX2(0,0,4)]+=tmp5_1 + tmp6_1 + tmp7_1; | |
2026 | EM_S[INDEX2(3,3,4)]+=tmp5_1 + tmp8_1 + tmp9_1; | |
2027 | EM_S[INDEX2(3,0,4)]+=tmp2_1; | |
2028 | EM_S[INDEX2(3,1,4)]+=tmp10_1 + tmp11_1; | |
2029 | EM_S[INDEX2(2,1,4)]+=tmp2_1; | |
2030 | EM_S[INDEX2(0,2,4)]+=tmp12_1 + tmp13_1; | |
2031 | EM_S[INDEX2(2,0,4)]+=tmp12_1 + tmp13_1; | |
2032 | EM_S[INDEX2(1,3,4)]+=tmp10_1 + tmp11_1; | |
2033 | EM_S[INDEX2(2,3,4)]+=tmp3_1 + tmp4_1; | |
2034 | EM_S[INDEX2(2,2,4)]+=tmp14_1 + tmp15_1 + tmp16_1; | |
2035 | EM_S[INDEX2(1,0,4)]+=tmp0_1 + tmp1_1; | |
2036 | EM_S[INDEX2(0,3,4)]+=tmp2_1; | |
2037 | EM_S[INDEX2(1,1,4)]+=tmp14_1 + tmp17_1 + tmp18_1; | |
2038 | } else { /* constant data */ | |
2039 | const register double D_0 = D_p[0]; | |
2040 | const register double tmp2_1 = D_0*w59; | |
2041 | const register double tmp1_1 = D_0*w58; | |
2042 | const register double tmp0_1 = D_0*w57; | |
2043 | EM_S[INDEX2(0,1,4)]+=tmp0_1; | |
2044 | EM_S[INDEX2(1,2,4)]+=tmp1_1; | |
2045 | EM_S[INDEX2(3,2,4)]+=tmp0_1; | |
2046 | EM_S[INDEX2(0,0,4)]+=tmp2_1; | |
2047 | EM_S[INDEX2(3,3,4)]+=tmp2_1; | |
2048 | EM_S[INDEX2(3,0,4)]+=tmp1_1; | |
2049 | EM_S[INDEX2(3,1,4)]+=tmp0_1; | |
2050 | EM_S[INDEX2(2,1,4)]+=tmp1_1; | |
2051 | EM_S[INDEX2(0,2,4)]+=tmp0_1; | |
2052 | EM_S[INDEX2(2,0,4)]+=tmp0_1; | |
2053 | EM_S[INDEX2(1,3,4)]+=tmp0_1; | |
2054 | EM_S[INDEX2(2,3,4)]+=tmp0_1; | |
2055 | EM_S[INDEX2(2,2,4)]+=tmp2_1; | |
2056 | EM_S[INDEX2(1,0,4)]+=tmp0_1; | |
2057 | EM_S[INDEX2(0,3,4)]+=tmp1_1; | |
2058 | EM_S[INDEX2(1,1,4)]+=tmp2_1; | |
2059 | } | |
2060 | } | |
2061 | /////////////// | |
2062 | // process X // | |
2063 | /////////////// | |
2064 | if (!X.isEmpty()) { | |
2065 | add_EM_F=true; | |
2066 | const double* X_p=const_cast<escript::Data*>(&X)->getSampleDataRO(e); | |
2067 | if (X.actsExpanded()) { | |
2068 | const register double X_0_0 = X_p[INDEX2(0,0,2)]; | |
2069 | const register double X_1_0 = X_p[INDEX2(1,0,2)]; | |
2070 | const register double X_0_1 = X_p[INDEX2(0,1,2)]; | |
2071 | const register double X_1_1 = X_p[INDEX2(1,1,2)]; | |
2072 | const register double X_0_2 = X_p[INDEX2(0,2,2)]; | |
2073 | const register double X_1_2 = X_p[INDEX2(1,2,2)]; | |
2074 | const register double X_0_3 = X_p[INDEX2(0,3,2)]; | |
2075 | const register double X_1_3 = X_p[INDEX2(1,3,2)]; | |
2076 | const register double tmp1_0 = X_1_1 + X_1_3; | |
2077 | const register double tmp3_0 = X_0_0 + X_0_1; | |
2078 | const register double tmp2_0 = X_1_0 + X_1_2; | |
2079 | const register double tmp0_0 = X_0_2 + X_0_3; | |
2080 | const register double tmp8_1 = tmp2_0*w66; | |
2081 | const register double tmp5_1 = tmp3_0*w64; | |
2082 | const register double tmp14_1 = tmp0_0*w64; | |
2083 | const register double tmp3_1 = tmp3_0*w60; | |
2084 | const register double tmp9_1 = tmp3_0*w63; | |
2085 | const register double tmp13_1 = tmp3_0*w65; | |
2086 | const register double tmp12_1 = tmp1_0*w66; | |
2087 | const register double tmp10_1 = tmp0_0*w60; | |
2088 | const register double tmp2_1 = tmp2_0*w61; | |
2089 | const register double tmp6_1 = tmp2_0*w62; | |
2090 | const register double tmp4_1 = tmp0_0*w65; | |
2091 | const register double tmp11_1 = tmp1_0*w67; | |
2092 | const register double tmp1_1 = tmp1_0*w62; | |
2093 | const register double tmp7_1 = tmp1_0*w61; | |
2094 | const register double tmp0_1 = tmp0_0*w63; | |
2095 | const register double tmp15_1 = tmp2_0*w67; | |
2096 | EM_F[0]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | |
2097 | EM_F[1]+=tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1; | |
2098 | EM_F[2]+=tmp10_1 + tmp11_1 + tmp8_1 + tmp9_1; | |
2099 | EM_F[3]+=tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; | |
2100 | } else { /* constant data */ | |
2101 | const register double X_0 = X_p[0]; | |
2102 | const register double X_1 = X_p[1]; | |
2103 | const register double tmp3_1 = X_1*w71; | |
2104 | const register double tmp2_1 = X_0*w70; | |
2105 | const register double tmp1_1 = X_0*w68; | |
2106 | const register double tmp0_1 = X_1*w69; | |
2107 | EM_F[0]+=tmp0_1 + tmp1_1; | |
2108 | EM_F[1]+=tmp0_1 + tmp2_1; | |
2109 | EM_F[2]+=tmp1_1 + tmp3_1; | |
2110 | EM_F[3]+=tmp2_1 + tmp3_1; | |
2111 | } | |
2112 | } | |
2113 | /////////////// | |
2114 | // process Y // | |
2115 | /////////////// | |
2116 | if (!Y.isEmpty()) { | |
2117 | add_EM_F=true; | |
2118 | const double* Y_p=const_cast<escript::Data*>(&Y)->getSampleDataRO(e); | |
2119 | if (Y.actsExpanded()) { | |
2120 | const register double Y_0 = Y_p[0]; | |
2121 | const register double Y_1 = Y_p[1]; | |
2122 | const register double Y_2 = Y_p[2]; | |
2123 | const register double Y_3 = Y_p[3]; | |
2124 | const register double tmp0_0 = Y_1 + Y_2; | |
2125 | const register double tmp1_0 = Y_0 + Y_3; | |
2126 | const register double tmp9_1 = Y_0*w74; | |
2127 | const register double tmp4_1 = tmp1_0*w73; | |
2128 | const register double tmp5_1 = Y_2*w74; | |
2129 | const register double tmp7_1 = Y_1*w74; | |
2130 | const register double tmp6_1 = Y_2*w72; | |
2131 | const register double tmp2_1 = Y_3*w74; | |
2132 | const register double tmp8_1 = Y_3*w72; | |
2133 | const register double tmp3_1 = Y_1*w72; | |
2134 | const register double tmp0_1 = Y_0*w72; | |
2135 | const register double tmp1_1 = tmp0_0*w73; | |
2136 | EM_F[0]+=tmp0_1 + tmp1_1 + tmp2_1; | |
2137 | EM_F[1]+=tmp3_1 + tmp4_1 + tmp5_1; | |
2138 | EM_F[2]+=tmp4_1 + tmp6_1 + tmp7_1; | |
2139 | EM_F[3]+=tmp1_1 + tmp8_1 + tmp9_1; | |
2140 | } else { /* constant data */ | |
2141 | const register double Y_0 = Y_p[0]; | |
2142 | const register double tmp0_1 = Y_0*w75; | |
2143 | EM_F[0]+=tmp0_1; | |
2144 | EM_F[1]+=tmp0_1; | |
2145 | EM_F[2]+=tmp0_1; | |
2146 | EM_F[3]+=tmp0_1; | |
2147 | } | |
2148 | } | |
2149 | /* GENERATOR SNIP_PDE_SINGLE BOTTOM */ | |
2150 | ||
2151 | // add to matrix (if add_EM_S) and RHS (if add_EM_F) | |
2152 | const index_t firstNode=m_N0*k1+k0; | |
2153 | IndexVector rowIndex; | |
2154 | rowIndex.push_back(m_dofMap[firstNode]); | |
2155 | rowIndex.push_back(m_dofMap[firstNode+1]); | |
2156 | rowIndex.push_back(m_dofMap[firstNode+m_N0]); | |
2157 | rowIndex.push_back(m_dofMap[firstNode+m_N0+1]); | |
2158 | if (add_EM_F) { | |
2159 | //cout << "-- AddtoRHS -- " << endl; | |
2160 | double *F_p=rhs.getSampleDataRW(0); | |
2161 | for (index_t i=0; i<rowIndex.size(); i++) { | |
2162 | if (rowIndex[i]<getNumDOF()) { | |
2163 | F_p[rowIndex[i]]+=EM_F[i]; | |
2164 | //cout << "F[" << rowIndex[i] << "]=" << F_p[rowIndex[i]] << endl; | |
2165 | } | |
2166 | } | |
2167 | //cout << "---"<<endl; | |
2168 | } | |
2169 | if (add_EM_S) { | |
2170 | //cout << "-- AddtoSystem -- " << endl; | |
2171 | addToSystemMatrix(mat, rowIndex, 1, rowIndex, 1, EM_S); | |
2172 | } | |
2173 | ||
2174 | } // end k0 loop | |
2175 | } // end k1 loop | |
2176 | } // end of coloring | |
2177 | } // end of parallel region | |
2178 | } | |
2179 | ||
2180 | //protected | |
2181 | void Rectangle::assemblePDESystem(Paso_SystemMatrix* mat, | |
2182 | escript::Data& rhs, const escript::Data& A, const escript::Data& B, | |
2183 | const escript::Data& C, const escript::Data& D, | |
2184 | const escript::Data& X, const escript::Data& Y, | |
2185 | const escript::Data& d, const escript::Data& y) const | |
2186 | { | |
2187 | const double h0 = m_l0/m_gNE0; | |
2188 | const double h1 = m_l1/m_gNE1; | |
2189 | dim_t numEq, numComp; | |
2190 | if (!mat) | |
2191 | numEq=numComp=(rhs.isEmpty() ? 1 : rhs.getDataPointSize()); | |
2192 | else { | |
2193 | numEq=mat->logical_row_block_size; | |
2194 | numComp=mat->logical_col_block_size; | |
2195 | } | |
2196 | ||
2197 | /* GENERATOR SNIP_PDE_SYSTEM_PRE TOP */ | |
2198 | const double w0 = -0.1555021169820365539*h1/h0; | |
2199 | const double w1 = 0.041666666666666666667; | |
2200 | const double w10 = -0.041666666666666666667*h0/h1; | |
2201 | const double w11 = 0.1555021169820365539*h1/h0; | |
2202 | const double w12 = 0.1555021169820365539*h0/h1; | |
2203 | const double w13 = 0.01116454968463011277*h0/h1; | |
2204 | const double w14 = 0.01116454968463011277*h1/h0; | |
2205 | const double w15 = 0.041666666666666666667*h1/h0; | |
2206 | const double w16 = -0.01116454968463011277*h0/h1; | |
2207 | const double w17 = -0.1555021169820365539*h0/h1; | |
2208 | const double w18 = -0.33333333333333333333*h1/h0; | |
2209 | const double w19 = 0.25000000000000000000; | |
2210 | const double w2 = -0.15550211698203655390; | |
2211 | const double w20 = -0.25000000000000000000; | |
2212 | const double w21 = 0.16666666666666666667*h0/h1; | |
2213 | const double w22 = -0.16666666666666666667*h1/h0; | |
2214 | const double w23 = -0.16666666666666666667*h0/h1; | |
2215 | const double w24 = 0.33333333333333333333*h1/h0; | |
2216 | const double w25 = 0.33333333333333333333*h0/h1; | |
2217 | const double w26 = 0.16666666666666666667*h1/h0; | |
2218 | const double w27 = -0.33333333333333333333*h0/h1; | |
2219 | const double w28 = -0.032861463941450536761*h1; | |
2220 | const double w29 = -0.032861463941450536761*h0; | |
2221 | const double w3 = 0.041666666666666666667*h0/h1; | |
2222 | const double w30 = -0.12264065304058601714*h1; | |
2223 | const double w31 = -0.0023593469594139828636*h1; | |
2224 | const double w32 = -0.008805202725216129906*h0; | |
2225 | const double w33 = -0.008805202725216129906*h1; | |
2226 | const double w34 = 0.032861463941450536761*h1; | |
2227 | const double w35 = 0.008805202725216129906*h1; | |
2228 | const double w36 = 0.008805202725216129906*h0; | |
2229 | const double w37 = 0.0023593469594139828636*h1; | |
2230 | const double w38 = 0.12264065304058601714*h1; | |
2231 | const double w39 = 0.032861463941450536761*h0; | |
2232 | const double w4 = 0.15550211698203655390; | |
2233 | const double w40 = -0.12264065304058601714*h0; | |
2234 | const double w41 = -0.0023593469594139828636*h0; | |
2235 | const double w42 = 0.0023593469594139828636*h0; | |
2236 | const double w43 = 0.12264065304058601714*h0; | |
2237 | const double w44 = -0.16666666666666666667*h1; | |
2238 | const double w45 = -0.083333333333333333333*h0; | |
2239 | const double w46 = 0.083333333333333333333*h1; | |
2240 | const double w47 = 0.16666666666666666667*h1; | |
2241 | const double w48 = 0.083333333333333333333*h0; | |
2242 | const double w49 = -0.16666666666666666667*h0; | |
2243 | const double w5 = -0.041666666666666666667; | |
2244 | const double w50 = 0.16666666666666666667*h0; | |
2245 | const double w51 = -0.083333333333333333333*h1; | |
2246 | const double w52 = 0.025917019497006092316*h0*h1; | |
2247 | const double w53 = 0.0018607582807716854616*h0*h1; | |
2248 | const double w54 = 0.0069444444444444444444*h0*h1; | |
2249 | const double w55 = 0.09672363354357992482*h0*h1; | |
2250 | const double w56 = 0.00049858867864229740201*h0*h1; | |
2251 | const double w57 = 0.055555555555555555556*h0*h1; | |
2252 | const double w58 = 0.027777777777777777778*h0*h1; | |
2253 | const double w59 = 0.11111111111111111111*h0*h1; | |
2254 | const double w6 = -0.01116454968463011277*h1/h0; | |
2255 | const double w60 = -0.19716878364870322056*h1; | |
2256 | const double w61 = -0.19716878364870322056*h0; | |
2257 | const double w62 = -0.052831216351296779436*h0; | |
2258 | const double w63 = -0.052831216351296779436*h1; | |
2259 | const double w64 = 0.19716878364870322056*h1; | |
2260 | const double w65 = 0.052831216351296779436*h1; | |
2261 | const double w66 = 0.19716878364870322056*h0; | |
2262 | const double w67 = 0.052831216351296779436*h0; | |
2263 | const double w68 = -0.5*h1; | |
2264 | const double w69 = -0.5*h0; | |
2265 | const double w7 = 0.011164549684630112770; | |
2266 | const double w70 = 0.5*h1; | |
2267 | const double w71 = 0.5*h0; | |
2268 | const double w72 = 0.1555021169820365539*h0*h1; | |
2269 | const double w73 = 0.041666666666666666667*h0*h1; | |
2270 | const double w74 = 0.01116454968463011277*h0*h1; | |
2271 | const double w75 = 0.25*h0*h1; | |
2272 | const double w8 = -0.011164549684630112770; | |
2273 | const double w9 = -0.041666666666666666667*h1/h0; | |
2274 | /* GENERATOR SNIP_PDE_SYSTEM_PRE BOTTOM */ | |
2275 | ||
2276 | rhs.requireWrite(); | |
2277 | #pragma omp parallel | |
2278 | { | |
2279 | for (index_t k1_0=0; k1_0<2; k1_0++) { // colouring | |
2280 | #pragma omp for | |
2281 | for (index_t k1=k1_0; k1<m_NE1; k1+=2) { | |
2282 | for (index_t k0=0; k0<m_NE0; ++k0) { | |
2283 | bool add_EM_S=false; | |
2284 | bool add_EM_F=false; | |
2285 | vector<double> EM_S(4*4*numEq*numComp, 0); | |
2286 | vector<double> EM_F(4*numEq, 0); | |
2287 | const index_t e = k0 + m_NE0*k1; | |
2288 | /* GENERATOR SNIP_PDE_SYSTEM TOP */ | |
2289 | /////////////// | |
2290 | // process A // | |
2291 | /////////////// | |
2292 | if (!A.isEmpty()) { | |
2293 | add_EM_S=true; | |
2294 | const double* A_p=const_cast<escript::Data*>(&A)->getSampleDataRO(e); | |
2295 | if (A.actsExpanded()) { | |
2296 | for (index_t k=0; k<numEq; k++) { | |
2297 | for (index_t m=0; m<numComp; m++) { | |
2298 | const register double A_00_0 = A_p[INDEX5(k,0,m,0,0, numEq,2,numComp,2)]; | |
2299 | const register double A_01_0 = A_p[INDEX5(k,0,m,1,0, numEq,2,numComp,2)]; | |
2300 | const register double A_10_0 = A_p[INDEX5(k,1,m,0,0, numEq,2,numComp,2)]; | |
2301 | const register double A_11_0 = A_p[INDEX5(k,1,m,1,0, numEq,2,numComp,2)]; | |
2302 | const register double A_00_1 = A_p[INDEX5(k,0,m,0,1, numEq,2,numComp,2)]; | |
2303 | const register double A_01_1 = A_p[INDEX5(k,0,m,1,1, numEq,2,numComp,2)]; | |
2304 | const register double A_10_1 = A_p[INDEX5(k,1,m,0,1, numEq,2,numComp,2)]; | |
2305 | const register double A_11_1 = A_p[INDEX5(k,1,m,1,1, numEq,2,numComp,2)]; | |
2306 | const register double A_00_2 = A_p[INDEX5(k,0,m,0,2, numEq,2,numComp,2)]; | |
2307 | const register double A_01_2 = A_p[INDEX5(k,0,m,1,2, numEq,2,numComp,2)]; | |
2308 | const register double A_10_2 = A_p[INDEX5(k,1,m,0,2, numEq,2,numComp,2)]; | |
2309 | const register double A_11_2 = A_p[INDEX5(k,1,m,1,2, numEq,2,numComp,2)]; | |
2310 | const register double A_00_3 = A_p[INDEX5(k,0,m,0,3, numEq,2,numComp,2)]; | |
2311 | const register double A_01_3 = A_p[INDEX5(k,0,m,1,3, numEq,2,numComp,2)]; | |
2312 | const register double A_10_3 = A_p[INDEX5(k,1,m,0,3, numEq,2,numComp,2)]; | |
2313 | const register double A_11_3 = A_p[INDEX5(k,1,m,1,3, numEq,2,numComp,2)]; | |
2314 | const register double tmp4_0 = A_10_1 + A_10_2; | |
2315 | const register double tmp12_0 = A_11_0 + A_11_2; | |
2316 | const register double tmp2_0 = A_11_0 + A_11_1 + A_11_2 + A_11_3; | |
2317 | const register double tmp10_0 = A_01_3 + A_10_3; | |
2318 | const register double tmp14_0 = A_01_0 + A_01_3 + A_10_0 + A_10_3; | |
2319 | const register double tmp0_0 = A_01_0 + A_01_3; | |
2320 | const register double tmp13_0 = A_01_2 + A_10_1; | |
2321 | const register double tmp3_0 = A_00_2 + A_00_3; | |
2322 | const register double tmp11_0 = A_11_1 + A_11_3; | |
2323 | const register double tmp18_0 = A_01_1 + A_10_1; | |
2324 | const register double tmp1_0 = A_00_0 + A_00_1; | |
2325 | const register double tmp15_0 = A_01_1 + A_10_2; | |
2326 | const register double tmp5_0 = A_00_0 + A_00_1 + A_00_2 + A_00_3; | |
2327 | const register double tmp16_0 = A_10_0 + A_10_3; | |
2328 | const register double tmp6_0 = A_01_3 + A_10_0; | |
2329 | const register double tmp17_0 = A_01_1 + A_01_2; | |
2330 | const register double tmp9_0 = A_01_0 + A_10_0; | |
2331 | const register double tmp7_0 = A_01_0 + A_10_3; | |
2332 | const register double tmp8_0 = A_01_1 + A_01_2 + A_10_1 + A_10_2; | |
2333 | const register double tmp19_0 = A_01_2 + A_10_2; | |
2334 | const register double tmp14_1 = A_10_0*w8; | |
2335 | const register double tmp23_1 = tmp3_0*w14; | |
2336 | const register double tmp35_1 = A_01_0*w8; | |
2337 | const register double tmp54_1 = tmp13_0*w8; | |
2338 | const register double tmp20_1 = tmp9_0*w4; | |
2339 | const register double tmp25_1 = tmp12_0*w12; | |
2340 | const register double tmp2_1 = A_01_1*w4; | |
2341 | const register double tmp44_1 = tmp7_0*w7; | |
2342 | const register double tmp26_1 = tmp10_0*w4; | |
2343 | const register double tmp52_1 = tmp18_0*w8; | |
2344 | const register double tmp48_1 = A_10_1*w7; | |
2345 | const register double tmp46_1 = A_01_3*w8; | |
2346 | const register double tmp50_1 = A_01_0*w2; | |
2347 | const register double tmp8_1 = tmp4_0*w5; | |
2348 | const register double tmp56_1 = tmp19_0*w8; | |
2349 | const register double tmp9_1 = tmp2_0*w10; | |
2350 | const register double tmp19_1 = A_10_3*w2; | |
2351 | const register double tmp47_1 = A_10_2*w4; | |
2352 | const register double tmp16_1 = tmp3_0*w0; | |
2353 | const register double tmp18_1 = tmp1_0*w6; | |
2354 | const register double tmp31_1 = tmp11_0*w12; | |
2355 | const register double tmp55_1 = tmp15_0*w2; | |
2356 | const register double tmp39_1 = A_10_2*w7; | |
2357 | const register double tmp11_1 = tmp6_0*w7; | |
2358 | const register double tmp40_1 = tmp11_0*w17; | |
2359 | const register double tmp34_1 = tmp15_0*w8; | |
2360 | const register double tmp33_1 = tmp14_0*w5; | |
2361 | const register double tmp24_1 = tmp11_0*w13; | |
2362 | const register double tmp3_1 = tmp1_0*w0; | |
2363 | const register double tmp5_1 = tmp2_0*w3; | |
2364 | const register double tmp43_1 = tmp17_0*w5; | |
2365 | const register double tmp15_1 = A_01_2*w4; | |
2366 | const register double tmp53_1 = tmp19_0*w2; | |
2367 | const register double tmp27_1 = tmp3_0*w11; | |
2368 | const register double tmp32_1 = tmp13_0*w2; | |
2369 | const register double tmp10_1 = tmp5_0*w9; | |
2370 | const register double tmp37_1 = A_10_1*w4; | |
2371 | const register double tmp38_1 = tmp5_0*w15; | |
2372 | const register double tmp17_1 = A_01_1*w7; | |
2373 | const register double tmp12_1 = tmp7_0*w4; | |
2374 | const register double tmp22_1 = tmp10_0*w7; | |
2375 | const register double tmp57_1 = tmp18_0*w2; | |
2376 | const register double tmp28_1 = tmp9_0*w7; | |
2377 | const register double tmp29_1 = tmp1_0*w14; | |
2378 | const register double tmp51_1 = tmp11_0*w16; | |
2379 | const register double tmp42_1 = tmp12_0*w16; | |
2380 | const register double tmp49_1 = tmp12_0*w17; | |
2381 | const register double tmp21_1 = tmp1_0*w11; | |
2382 | const register double tmp1_1 = tmp0_0*w1; | |
2383 | const register double tmp45_1 = tmp6_0*w4; | |
2384 | const register double tmp7_1 = A_10_0*w2; | |
2385 | const register double tmp6_1 = tmp3_0*w6; | |
2386 | const register double tmp13_1 = tmp8_0*w1; | |
2387 | const register double tmp36_1 = tmp16_0*w1; | |
2388 | const register double tmp41_1 = A_01_3*w2; | |
2389 | const register double tmp30_1 = tmp12_0*w13; | |
2390 | const register double tmp4_1 = A_01_2*w7; | |
2391 | const register double tmp0_1 = A_10_3*w8; | |
2392 | EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1 + tmp6_1 + tmp7_1 + tmp8_1; | |
2393 | EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp9_1; | |
2394 | EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp14_1 + tmp15_1 + tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp1_1 + tmp5_1 + tmp8_1; | |
2395 | EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp13_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1 + tmp24_1 + tmp25_1; | |
2396 | EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp13_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; | |
2397 | EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp10_1 + tmp32_1 + tmp33_1 + tmp34_1 + tmp9_1; | |
2398 | EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1 + tmp40_1 + tmp41_1 + tmp42_1 + tmp43_1; | |
2399 | EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp10_1 + tmp13_1 + tmp44_1 + tmp45_1 + tmp9_1; | |
2400 | EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp36_1 + tmp38_1 + tmp43_1 + tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; | |
2401 | EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp0_1 + tmp15_1 + tmp17_1 + tmp1_1 + tmp38_1 + tmp49_1 + tmp51_1 + tmp7_1 + tmp8_1; | |
2402 | EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp14_1 + tmp19_1 + tmp1_1 + tmp2_1 + tmp38_1 + tmp40_1 + tmp42_1 + tmp4_1 + tmp8_1; | |
2403 | EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp16_1 + tmp18_1 + tmp35_1 + tmp36_1 + tmp41_1 + tmp43_1 + tmp47_1 + tmp48_1 + tmp5_1; | |
2404 | EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp24_1 + tmp25_1 + tmp27_1 + tmp29_1 + tmp33_1 + tmp52_1 + tmp53_1; | |
2405 | EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp36_1 + tmp37_1 + tmp39_1 + tmp3_1 + tmp43_1 + tmp46_1 + tmp50_1 + tmp5_1 + tmp6_1; | |
2406 | EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp10_1 + tmp33_1 + tmp54_1 + tmp55_1 + tmp9_1; | |
2407 | EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp21_1 + tmp23_1 + tmp30_1 + tmp31_1 + tmp33_1 + tmp56_1 + tmp57_1; | |
2408 | } | |
2409 | } | |
2410 | } else { /* constant data */ | |
2411 | for (index_t k=0; k<numEq; k++) { | |
2412 | for (index_t m=0; m<numComp; m++) { | |
2413 | const register double A_00 = A_p[INDEX4(k,0,m,0, numEq,2, numComp)]; | |
2414 | const register double A_01 = A_p[INDEX4(k,0,m,1, numEq,2, numComp)]; | |
2415 | const register double A_10 = A_p[INDEX4(k,1,m,0, numEq,2, numComp)]; | |
2416 | const register double A_11 = A_p[INDEX4(k,1,m,1, numEq,2, numComp)]; | |
2417 | const register double tmp0_0 = A_01 + A_10; | |
2418 | const register double tmp0_1 = A_00*w18; | |
2419 | const register double tmp10_1 = A_01*w20; | |
2420 | const register double tmp12_1 = A_00*w26; | |
2421 | const register double tmp4_1 = A_00*w22; | |
2422 | const register double tmp8_1 = A_00*w24; | |
2423 | const register double tmp13_1 = A_10*w19; | |
2424 | const register double tmp9_1 = tmp0_0*w20; | |
2425 | const register double tmp3_1 = A_11*w21; | |
2426 | const register double tmp11_1 = A_11*w27; | |
2427 | const register double tmp1_1 = A_01*w19; | |
2428 | const register double tmp6_1 = A_11*w23; | |
2429 | const register double tmp7_1 = A_11*w25; | |
2430 | const register double tmp2_1 = A_10*w20; | |
2431 | const register double tmp5_1 = tmp0_0*w19; | |
2432 | EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | |
2433 | EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp4_1 + tmp5_1 + tmp6_1; | |
2434 | EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1; | |
2435 | EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp5_1 + tmp7_1 + tmp8_1; | |
2436 | EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp5_1 + tmp7_1 + tmp8_1; | |
2437 | EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp4_1 + tmp6_1 + tmp9_1; | |
2438 | EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; | |
2439 | EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp4_1 + tmp5_1 + tmp6_1; | |
2440 | EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1; | |
2441 | EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; | |
2442 | EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp11_1 + tmp12_1 + tmp1_1 + tmp2_1; | |
2443 | EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; | |
2444 | EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp7_1 + tmp8_1 + tmp9_1; | |
2445 | EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1 + tmp10_1 + tmp13_1 + tmp3_1; | |
2446 | EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp4_1 + tmp6_1 + tmp9_1; | |
2447 | EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp7_1 + tmp8_1 + tmp9_1; | |
2448 | } | |
2449 | } | |
2450 | } | |
2451 | } | |
2452 | /////////////// | |
2453 | // process B // | |
2454 | /////////////// | |
2455 | if (!B.isEmpty()) { | |
2456 | add_EM_S=true; | |
2457 | const double* B_p=const_cast<escript::Data*>(&B)->getSampleDataRO(e); | |
2458 | if (B.actsExpanded()) { | |
2459 | for (index_t k=0; k<numEq; k++) { | |
2460 | for (index_t m=0; m<numComp; m++) { | |
2461 | const register double B_0_0 = B_p[INDEX4(k,0,m,0, numEq,2,numComp)]; | |
2462 | const register double B_1_0 = B_p[INDEX4(k,1,m,0, numEq,2,numComp)]; | |
2463 | const register double B_0_1 = B_p[INDEX4(k,0,m,1, numEq,2,numComp)]; | |
2464 | const register double B_1_1 = B_p[INDEX4(k,1,m,1, numEq,2,numComp)]; | |
2465 | const register double B_0_2 = B_p[INDEX4(k,0,m,2, numEq,2,numComp)]; | |
2466 | const register double B_1_2 = B_p[INDEX4(k,1,m,2, numEq,2,numComp)]; | |
2467 | const register double B_0_3 = B_p[INDEX4(k,0,m,3, numEq,2,numComp)]; | |
2468 | const register double B_1_3 = B_p[INDEX4(k,1,m,3, numEq,2,numComp)]; | |
2469 | const register double tmp3_0 = B_0_0 + B_0_2; | |
2470 | const register double tmp1_0 = B_1_2 + B_1_3; | |
2471 | const register double tmp2_0 = B_0_1 + B_0_3; | |
2472 | const register double tmp0_0 = B_1_0 + B_1_1; | |
2473 | const register double tmp63_1 = B_1_1*w42; | |
2474 | const register double tmp79_1 = B_1_1*w40; | |
2475 | const register double tmp37_1 = tmp3_0*w35; | |
2476 | const register double tmp8_1 = tmp0_0*w32; | |
2477 | const register double tmp71_1 = B_0_1*w34; | |
2478 | const register double tmp19_1 = B_0_3*w31; | |
2479 | const register double tmp15_1 = B_0_3*w34; | |
2480 | const register double tmp9_1 = tmp3_0*w34; | |
2481 | const register double tmp35_1 = B_1_0*w36; | |
2482 | const register double tmp66_1 = B_0_3*w28; | |
2483 | const register double tmp28_1 = B_1_0*w42; | |
2484 | const register double tmp22_1 = B_1_0*w40; | |
2485 | const register double tmp16_1 = B_1_2*w29; | |
2486 | const register double tmp6_1 = tmp2_0*w35; | |
2487 | const register double tmp55_1 = B_1_3*w40; | |
2488 | const register double tmp50_1 = B_1_3*w42; | |
2489 | const register double tmp7_1 = tmp1_0*w29; | |
2490 | const register double tmp1_1 = tmp1_0*w32; | |
2491 | const register double tmp57_1 = B_0_3*w30; | |
2492 | const register double tmp18_1 = B_1_1*w32; | |
2493 | const register double tmp53_1 = B_1_0*w41; | |
2494 | const register double tmp61_1 = B_1_3*w36; | |
2495 | const register double tmp27_1 = B_0_3*w38; | |
2496 | const register double tmp64_1 = B_0_2*w30; | |
2497 | const register double tmp76_1 = B_0_1*w38; | |
2498 | const register double tmp39_1 = tmp2_0*w34; | |
2499 | const register double tmp62_1 = B_0_1*w31; | |
2500 | const register double tmp56_1 = B_0_0*w31; | |
2501 | const register double tmp49_1 = B_1_1*w36; | |
2502 | const register double tmp2_1 = B_0_2*w31; | |
2503 | const register double tmp23_1 = B_0_2*w33; | |
2504 | const register double tmp38_1 = B_1_1*w43; | |
2505 | const register double tmp74_1 = B_1_2*w41; | |
2506 | const register double tmp43_1 = B_1_1*w41; | |
2507 | const register double tmp58_1 = B_0_2*w28; | |
2508 | const register double tmp67_1 = B_0_0*w33; | |
2509 | const register double tmp33_1 = tmp0_0*w39; | |
2510 | const register double tmp4_1 = B_0_0*w28; | |
2511 | const register double tmp20_1 = B_0_0*w30; | |
2512 | const register double tmp13_1 = B_0_2*w38; | |
2513 | const register double tmp65_1 = B_1_2*w43; | |
2514 | const register double tmp0_1 = tmp0_0*w29; | |
2515 | const register double tmp41_1 = tmp3_0*w33; | |
2516 | const register double tmp73_1 = B_0_2*w37; | |
2517 | const register double tmp69_1 = B_0_0*w38; | |
2518 | const register double tmp48_1 = B_1_2*w39; | |
2519 | const register double tmp59_1 = B_0_1*w33; | |
2520 | const register double tmp17_1 = B_1_3*w41; | |
2521 | const register double tmp5_1 = B_0_3*w33; | |
2522 | const register double tmp3_1 = B_0_1*w30; | |
2523 | const register double tmp21_1 = B_0_1*w28; | |
2524 | const register double tmp42_1 = B_1_0*w29; | |
2525 | const register double tmp54_1 = B_1_2*w32; | |
2526 | const register double tmp60_1 = B_1_0*w39; | |
2527 | const register double tmp32_1 = tmp1_0*w36; | |
2528 | const register double tmp10_1 = B_0_1*w37; | |
2529 | const register double tmp14_1 = B_0_0*w35; | |
2530 | const register double tmp29_1 = B_0_1*w35; | |
2531 | const register double tmp26_1 = B_1_2*w36; | |
2532 | const register double tmp30_1 = B_1_3*w43; | |
2533 | const register double tmp70_1 = B_0_2*w35; | |
2534 | const register double tmp34_1 = B_1_3*w39; | |
2535 | const register double tmp51_1 = B_1_0*w43; | |
2536 | const register double tmp31_1 = B_0_2*w34; | |
2537 | const register double tmp45_1 = tmp3_0*w28; | |
2538 | const register double tmp11_1 = tmp1_0*w39; | |
2539 | const register double tmp52_1 = B_1_1*w29; | |
2540 | const register double tmp44_1 = B_1_3*w32; | |
2541 | const register double tmp25_1 = B_1_1*w39; | |
2542 | const register double tmp47_1 = tmp2_0*w33; | |
2543 | const register double tmp72_1 = B_1_3*w29; | |
2544 | const register double tmp40_1 = tmp2_0*w28; | |
2545 | const register double tmp46_1 = B_1_2*w40; | |
2546 | const register double tmp36_1 = B_1_2*w42; | |
2547 | const register double tmp24_1 = B_0_0*w37; | |
2548 | const register double tmp77_1 = B_0_3*w35; | |
2549 | const register double tmp68_1 = B_0_3*w37; | |
2550 | const register double tmp78_1 = B_0_0*w34; | |
2551 | const register double tmp12_1 = tmp0_0*w36; | |
2552 | const register double tmp75_1 = B_1_0*w32; | |
2553 | EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; | |
2554 | EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; | |
2555 | EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; | |
2556 | EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; | |
2557 | EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; | |
2558 | EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp32_1 + tmp33_1 + tmp6_1 + tmp9_1; | |
2559 | EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; | |
2560 | EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; | |
2561 | EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; | |
2562 | EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp45_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; | |
2563 | EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp37_1 + tmp39_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; | |
2564 | EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp11_1 + tmp12_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; | |
2565 | EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; | |
2566 | EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; | |
2567 | EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp40_1 + tmp41_1 + tmp7_1 + tmp8_1; | |
2568 | EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; | |
2569 | } | |
2570 | } | |
2571 | } else { /* constant data */ | |
2572 | for (index_t k=0; k<numEq; k++) { | |
2573 | for (index_t m=0; m<numComp; m++) { | |
2574 | const register double B_0 = B_p[INDEX3(k,0,m, numEq, 2)]; | |
2575 | const register double B_1 = B_p[INDEX3(k,1,m, numEq, 2)]; | |
2576 | const register double tmp6_1 = B_1*w50; | |
2577 | const register double tmp1_1 = B_1*w45; | |
2578 | const register double tmp5_1 = B_1*w49; | |
2579 | const register double tmp4_1 = B_1*w48; | |
2580 | const register double tmp0_1 = B_0*w44; | |
2581 | const register double tmp2_1 = B_0*w46; | |
2582 | const register double tmp7_1 = B_0*w51; | |
2583 | const register double tmp3_1 = B_0*w47; | |
2584 | EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1; | |
2585 | EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp1_1 + tmp2_1; | |
2586 | EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp3_1 + tmp4_1; | |
2587 | EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp0_1 + tmp5_1; | |
2588 | EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp3_1 + tmp6_1; | |
2589 | EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp2_1 + tmp4_1; | |
2590 | EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp2_1 + tmp6_1; | |
2591 | EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp4_1 + tmp7_1; | |
2592 | EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp5_1 + tmp7_1; | |
2593 | EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp6_1 + tmp7_1; | |
2594 | EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp2_1 + tmp5_1; | |
2595 | EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1 + tmp4_1; | |
2596 | EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp0_1 + tmp6_1; | |
2597 | EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp1_1 + tmp3_1; | |
2598 | EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp1_1 + tmp7_1; | |
2599 | EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp3_1 + tmp5_1; | |
2600 | } | |
2601 | } | |
2602 | } | |
2603 | } | |
2604 | /////////////// | |
2605 | // process C // | |
2606 | /////////////// | |
2607 | if (!C.isEmpty()) { | |
2608 | add_EM_S=true; | |
2609 | const double* C_p=const_cast<escript::Data*>(&C)->getSampleDataRO(e); | |
2610 | if (C.actsExpanded()) { | |
2611 | for (index_t k=0; k<numEq; k++) { | |
2612 | for (index_t m=0; m<numComp; m++) { | |
2613 | const register double C_0_0 = C_p[INDEX4(k,m,0, 0, numEq,numComp,2)]; | |
2614 | const register double C_1_0 = C_p[INDEX4(k,m,1, 0, numEq,numComp,2)]; | |
2615 | const register double C_0_1 = C_p[INDEX4(k,m,0, 1, numEq,numComp,2)]; | |
2616 | const register double C_1_1 = C_p[INDEX4(k,m,1, 1, numEq,numComp,2)]; | |
2617 | const register double C_0_2 = C_p[INDEX4(k,m,0, 2, numEq,numComp,2)]; | |
2618 | const register double C_1_2 = C_p[INDEX4(k,m,1, 2, numEq,numComp,2)]; | |
2619 | const register double C_0_3 = C_p[INDEX4(k,m,0, 3, numEq,numComp,2)]; | |
2620 | const register double C_1_3 = C_p[INDEX4(k,m,1, 3, numEq,numComp,2)]; | |
2621 | const register double tmp2_0 = C_0_1 + C_0_3; | |
2622 | const register double tmp1_0 = C_1_2 + C_1_3; | |
2623 | const register double tmp3_0 = C_0_0 + C_0_2; | |
2624 | const register double tmp0_0 = C_1_0 + C_1_1; | |
2625 | const register double tmp64_1 = C_0_2*w30; | |
2626 | const register double tmp14_1 = C_0_2*w28; | |
2627 | const register double tmp19_1 = C_0_3*w31; | |
2628 | const register double tmp22_1 = C_1_0*w40; | |
2629 | const register double tmp37_1 = tmp3_0*w35; | |
2630 | const register double tmp29_1 = C_0_1*w35; | |
2631 | const register double tmp73_1 = C_0_2*w37; | |
2632 | const register double tmp74_1 = C_1_2*w41; | |
2633 | const register double tmp52_1 = C_1_3*w39; | |
2634 | const register double tmp25_1 = C_1_1*w39; | |
2635 | const register double tmp62_1 = C_0_1*w31; | |
2636 | const register double tmp79_1 = C_1_1*w40; | |
2637 | const register double tmp43_1 = C_1_1*w36; | |
2638 | const register double tmp27_1 = C_0_3*w38; | |
2639 | const register double tmp28_1 = C_1_0*w42; | |
2640 | const register double tmp63_1 = C_1_1*w42; | |
2641 | const register double tmp59_1 = C_0_3*w34; | |
2642 | const register double tmp72_1 = C_1_3*w29; | |
2643 | const register double tmp40_1 = tmp2_0*w35; | |
2644 | const register double tmp13_1 = C_0_3*w30; | |
2645 | const register double tmp51_1 = C_1_2*w40; | |
2646 | const register double tmp54_1 = C_1_2*w42; | |
2647 | const register double tmp12_1 = C_0_0*w31; | |
2648 | const register double tmp2_1 = tmp1_0*w32; | |
2649 | const register double tmp68_1 = C_0_2*w31; | |
2650 | const register double tmp75_1 = C_1_0*w32; | |
2651 | const register double tmp49_1 = C_1_1*w41; | |
2652 | const register double tmp4_1 = C_0_2*w35; | |
2653 | const register double tmp66_1 = C_0_3*w28; | |
2654 | const register double tmp56_1 = C_0_1*w37; | |
2655 | const register double tmp5_1 = C_0_1*w34; | |
2656 | const register double tmp38_1 = tmp2_0*w34; | |
2657 | const register double tmp76_1 = C_0_1*w38; | |
2658 | const register double tmp21_1 = C_0_1*w28; | |
2659 | const register double tmp69_1 = C_0_1*w30; | |
2660 | const register double tmp53_1 = C_1_0*w36; | |
2661 | const register double tmp42_1 = C_1_2*w39; | |
2662 | const register double tmp32_1 = tmp1_0*w29; | |
2663 | const register double tmp45_1 = C_1_0*w43; | |
2664 | const register double tmp33_1 = tmp0_0*w32; | |
2665 | const register double tmp35_1 = C_1_0*w41; | |
2666 | const register double tmp26_1 = C_1_2*w36; | |
2667 | const register double tmp67_1 = C_0_0*w33; | |
2668 | const register double tmp31_1 = C_0_2*w34; | |
2669 | const register double tmp20_1 = C_0_0*w30; | |
2670 | const register double tmp70_1 = C_0_0*w28; | |
2671 | const register double tmp8_1 = tmp0_0*w39; | |
2672 | const register double tmp30_1 = C_1_3*w43; | |
2673 | const register double tmp0_1 = tmp0_0*w29; | |
2674 | const register double tmp17_1 = C_1_3*w41; | |
2675 | const register double tmp58_1 = C_0_0*w35; | |
2676 | const register double tmp9_1 = tmp3_0*w33; | |
2677 | const register double tmp61_1 = C_1_3*w36; | |
2678 | const register double tmp41_1 = tmp3_0*w34; | |
2679 | const register double tmp50_1 = C_1_3*w32; | |
2680 | const register double tmp18_1 = C_1_1*w32; | |
2681 | const register double tmp6_1 = tmp1_0*w36; | |
2682 | const register double tmp3_1 = C_0_0*w38; | |
2683 | const register double tmp34_1 = C_1_1*w29; | |
2684 | const register double tmp77_1 = C_0_3*w35; | |
2685 | const register double tmp65_1 = C_1_2*w43; | |
2686 | const register double tmp71_1 = C_0_3*w33; | |
2687 | const register double tmp55_1 = C_1_1*w43; | |
2688 | const register double tmp46_1 = tmp3_0*w28; | |
2689 | const register double tmp24_1 = C_0_0*w37; | |
2690 | const register double tmp10_1 = tmp1_0*w39; | |
2691 | const register double tmp48_1 = C_1_0*w29; | |
2692 | const register double tmp15_1 = C_0_1*w33; | |
2693 | const register double tmp36_1 = C_1_2*w32; | |
2694 | const register double tmp60_1 = C_1_0*w39; | |
2695 | const register double tmp47_1 = tmp2_0*w33; | |
2696 | const register double tmp16_1 = C_1_2*w29; | |
2697 | const register double tmp1_1 = C_0_3*w37; | |
2698 | const register double tmp7_1 = tmp2_0*w28; | |
2699 | const register double tmp39_1 = C_1_3*w40; | |
2700 | const register double tmp44_1 = C_1_3*w42; | |
2701 | const register double tmp57_1 = C_0_2*w38; | |
2702 | const register double tmp78_1 = C_0_0*w34; | |
2703 | const register double tmp11_1 = tmp0_0*w36; | |
2704 | const register double tmp23_1 = C_0_2*w33; | |
2705 | EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1 + tmp2_1 + tmp3_1 + tmp4_1 + tmp5_1; | |
2706 | EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp6_1 + tmp7_1 + tmp8_1 + tmp9_1; | |
2707 | EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp12_1 + tmp13_1 + tmp14_1 + tmp15_1; | |
2708 | EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp16_1 + tmp17_1 + tmp18_1 + tmp19_1 + tmp20_1 + tmp21_1 + tmp22_1 + tmp23_1; | |
2709 | EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp24_1 + tmp25_1 + tmp26_1 + tmp27_1 + tmp28_1 + tmp29_1 + tmp30_1 + tmp31_1; | |
2710 | EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp32_1 + tmp33_1 + tmp7_1 + tmp9_1; | |
2711 | EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp34_1 + tmp35_1 + tmp36_1 + tmp37_1 + tmp38_1 + tmp39_1; | |
2712 | EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp32_1 + tmp33_1 + tmp40_1 + tmp41_1; | |
2713 | EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp42_1 + tmp43_1 + tmp44_1 + tmp45_1 + tmp46_1 + tmp47_1; | |
2714 | EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp46_1 + tmp47_1 + tmp48_1 + tmp49_1 + tmp50_1 + tmp51_1; | |
2715 | EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp37_1 + tmp38_1 + tmp52_1 + tmp53_1 + tmp54_1 + tmp55_1; | |
2716 | EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp10_1 + tmp11_1 + tmp56_1 + tmp57_1 + tmp58_1 + tmp59_1; | |
2717 | EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp60_1 + tmp61_1 + tmp62_1 + tmp63_1 + tmp64_1 + tmp65_1 + tmp66_1 + tmp67_1; | |
2718 | EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1 + tmp2_1 + tmp68_1 + tmp69_1 + tmp70_1 + tmp71_1; | |
2719 | EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp40_1 + tmp41_1 + tmp6_1 + tmp8_1; | |
2720 | EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp72_1 + tmp73_1 + tmp74_1 + tmp75_1 + tmp76_1 + tmp77_1 + tmp78_1 + tmp79_1; | |
2721 | } | |
2722 | } | |
2723 | } else { /* constant data */ | |
2724 | for (index_t k=0; k<numEq; k++) { | |
2725 | for (index_t m=0; m<numComp; m++) { | |
2726 | const register double C_0 = C_p[INDEX3(k, m, 0, numEq, numComp)]; | |
2727 | const register double C_1 = C_p[INDEX3(k, m, 1, numEq, numComp)]; | |
2728 | const register double tmp1_1 = C_1*w45; | |
2729 | const register double tmp3_1 = C_0*w51; | |
2730 | const register double tmp4_1 = C_0*w44; | |
2731 | const register double tmp7_1 = C_0*w46; | |
2732 | const register double tmp5_1 = C_1*w49; | |
2733 | const register double tmp2_1 = C_1*w48; | |
2734 | const register double tmp0_1 = C_0*w47; | |
2735 | const register double tmp6_1 = C_1*w50; | |
2736 | EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1; | |
2737 | EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp2_1 + tmp3_1; | |
2738 | EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp2_1 + tmp4_1; | |
2739 | EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp4_1 + tmp5_1; | |
2740 | EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp0_1 + tmp6_1; | |
2741 | EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp1_1 + tmp3_1; | |
2742 | EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp5_1 + tmp7_1; | |
2743 | EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp1_1 + tmp7_1; | |
2744 | EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp3_1 + tmp6_1; | |
2745 | EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp3_1 + tmp5_1; | |
2746 | EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp6_1 + tmp7_1; | |
2747 | EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp0_1 + tmp2_1; | |
2748 | EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp4_1 + tmp6_1; | |
2749 | EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp1_1 + tmp4_1; | |
2750 | EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp2_1 + tmp7_1; | |
2751 | EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp0_1 + tmp5_1; | |
2752 | } | |
2753 | } | |
2754 | } | |
2755 | } | |
2756 | /////////////// | |
2757 | // process D // | |
2758 | /////////////// | |
2759 | if (!D.isEmpty()) { | |
2760 | add_EM_S=true; | |
2761 | const double* D_p=const_cast<escript::Data*>(&D)->getSampleDataRO(e); | |
2762 | if (D.actsExpanded()) { | |
2763 | for (index_t k=0; k<numEq; k++) { | |
2764 | for (index_t m=0; m<numComp; m++) { | |
2765 | const register double D_0 = D_p[INDEX3(k, m, 0, numEq, numComp)]; | |
2766 | const register double D_1 = D_p[INDEX3(k, m, 1, numEq, numComp)]; | |
2767 | const register double D_2 = D_p[INDEX3(k, m, 2, numEq, numComp)]; | |
2768 | const register double D_3 = D_p[INDEX3(k, m, 3, numEq, numComp)]; | |
2769 | const register double tmp4_0 = D_1 + D_3; | |
2770 | const register double tmp2_0 = D_0 + D_1 + D_2 + D_3; | |
2771 | const register double tmp5_0 = D_0 + D_2; | |
2772 | const register double tmp0_0 = D_0 + D_1; | |
2773 | const register double tmp6_0 = D_0 + D_3; | |
2774 | const register double tmp1_0 = D_2 + D_3; | |
2775 | const register double tmp3_0 = D_1 + D_2; | |
2776 | const register double tmp16_1 = D_1*w56; | |
2777 | const register double tmp14_1 = tmp6_0*w54; | |
2778 | const register double tmp8_1 = D_3*w55; | |
2779 | const register double tmp2_1 = tmp2_0*w54; | |
2780 | const register double tmp12_1 = tmp5_0*w52; | |
2781 | const register double tmp4_1 = tmp0_0*w53; | |
2782 | const register double tmp3_1 = tmp1_0*w52; | |
2783 | const register double tmp13_1 = tmp4_0*w53; | |
2784 | const register double tmp10_1 = tmp4_0*w52; | |
2785 | const register double tmp1_1 = tmp1_0*w53; | |
2786 | const register double tmp7_1 = D_3*w56; | |
2787 | const register double tmp0_1 = tmp0_0*w52; | |
2788 | const register double tmp11_1 = tmp5_0*w53; | |
2789 | const register double tmp9_1 = D_0*w56; | |
2790 | const register double tmp5_1 = tmp3_0*w54; | |
2791 | const register double tmp18_1 = D_2*w56; | |
2792 | const register double tmp17_1 = D_1*w55; | |
2793 | const register double tmp6_1 = D_0*w55; | |
2794 | const register double tmp15_1 = D_2*w55; | |
2795 | EM_S[INDEX4(k,m,0,1,numEq,numComp,4)]+=tmp0_1 + tmp1_1; | |
2796 | EM_S[INDEX4(k,m,1,2,numEq,numComp,4)]+=tmp2_1; | |
2797 | EM_S[INDEX4(k,m,3,2,numEq,numComp,4)]+=tmp3_1 + tmp4_1; | |
2798 | EM_S[INDEX4(k,m,0,0,numEq,numComp,4)]+=tmp5_1 + tmp6_1 + tmp7_1; | |
2799 | EM_S[INDEX4(k,m,3,3,numEq,numComp,4)]+=tmp5_1 + tmp8_1 + tmp9_1; | |
2800 | EM_S[INDEX4(k,m,3,0,numEq,numComp,4)]+=tmp2_1; | |
2801 | EM_S[INDEX4(k,m,3,1,numEq,numComp,4)]+=tmp10_1 + tmp11_1; | |
2802 | EM_S[INDEX4(k,m,2,1,numEq,numComp,4)]+=tmp2_1; | |
2803 | EM_S[INDEX4(k,m,0,2,numEq,numComp,4)]+=tmp12_1 + tmp13_1; | |
2804 | EM_S[INDEX4(k,m,2,0,numEq,numComp,4)]+=tmp12_1 + tmp13_1; | |
2805 | EM_S[INDEX4(k,m,1,3,numEq,numComp,4)]+=tmp10_1 + tmp11_1; | |
2806 | EM_S[INDEX4(k,m,2,3,numEq,numComp,4)]+=tmp3_1 + tmp4_1; | |
2807 | EM_S[INDEX4(k,m,2,2,numEq,numComp,4)]+=tmp14_1 + tmp15_1 + tmp16_1; | |
2808 | EM_S[INDEX4(k,m,1,0,numEq,numComp,4)]+=tmp0_1 + tmp1_1; | |
2809 | EM_S[INDEX4(k,m,0,3,numEq,numComp,4)]+=tmp2_1; | |
2810 | EM_S[INDEX4(k,m,1,1,numEq,numComp,4)]+=tmp14_1 + tmp17_1 + tmp18_1; | |
2811 | } | |
2812 | } | |
2813 | } else { /* constant data */ | |
2814 | for (index_t k=0; k<numEq; k++) { | |
2815 | for (index_t m=0; m<numComp; m++) { | |
2816 | const register double D_0 = D_p[INDEX2(k, m, numEq)]; | |
2817 | const register double tmp2_1 = D_0*w59; | |
2818 | const register double tmp1_1 = D_0*w58; | |
2819 | const register double tmp0_1 = D_0*w57; | |