/[escript]/branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp
ViewVC logotype

Contents of /branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3707 - (show annotations)
Mon Dec 5 05:48:25 2011 UTC (8 years ago) by caltinay
File size: 42671 byte(s)
Fixes to the face element code.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2011 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 #include <ripley/Rectangle.h>
15 extern "C" {
16 #include "paso/SystemMatrixPattern.h"
17 }
18
19 #if USE_SILO
20 #include <silo.h>
21 #ifdef ESYS_MPI
22 #include <pmpio.h>
23 #endif
24 #endif
25
26 #include <iomanip>
27
28 using namespace std;
29
30 namespace ripley {
31
32 Rectangle::Rectangle(int n0, int n1, double l0, double l1, int d0, int d1) :
33 RipleyDomain(2),
34 m_gNE0(n0),
35 m_gNE1(n1),
36 m_l0(l0),
37 m_l1(l1),
38 m_NX(d0),
39 m_NY(d1)
40 {
41 // ensure number of subdivisions is valid and nodes can be distributed
42 // among number of ranks
43 if (m_NX*m_NY != m_mpiInfo->size)
44 throw RipleyException("Invalid number of spatial subdivisions");
45
46 if (n0%m_NX > 0 || n1%m_NY > 0)
47 throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
48
49 // local number of elements
50 m_NE0 = n0/m_NX;
51 m_NE1 = n1/m_NY;
52 // local number of nodes (not necessarily owned)
53 m_N0 = m_NE0+1;
54 m_N1 = m_NE1+1;
55 // bottom-left node is at (offset0,offset1) in global mesh
56 m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
57 m_offset1 = m_NE1*(m_mpiInfo->rank/m_NX);
58 populateSampleIds();
59 }
60
61 Rectangle::~Rectangle()
62 {
63 }
64
65 string Rectangle::getDescription() const
66 {
67 return "ripley::Rectangle";
68 }
69
70 bool Rectangle::operator==(const AbstractDomain& other) const
71 {
72 if (dynamic_cast<const Rectangle*>(&other))
73 return this==&other;
74
75 return false;
76 }
77
78 void Rectangle::dump(const string& fileName) const
79 {
80 #if USE_SILO
81 string fn(fileName);
82 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
83 fn+=".silo";
84 }
85
86 const int NUM_SILO_FILES = 1;
87 const char* blockDirFmt = "/block%04d";
88 int driver=DB_HDF5;
89 string siloPath;
90 DBfile* dbfile = NULL;
91
92 #ifdef ESYS_MPI
93 PMPIO_baton_t* baton = NULL;
94 #endif
95
96 if (m_mpiInfo->size > 1) {
97 #ifdef ESYS_MPI
98 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
99 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
100 PMPIO_DefaultClose, (void*)&driver);
101 // try the fallback driver in case of error
102 if (!baton && driver != DB_PDB) {
103 driver = DB_PDB;
104 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
105 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
106 PMPIO_DefaultClose, (void*)&driver);
107 }
108 if (baton) {
109 char str[64];
110 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
111 siloPath = str;
112 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
113 }
114 #endif
115 } else {
116 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
117 getDescription().c_str(), driver);
118 // try the fallback driver in case of error
119 if (!dbfile && driver != DB_PDB) {
120 driver = DB_PDB;
121 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
122 getDescription().c_str(), driver);
123 }
124 }
125
126 if (!dbfile)
127 throw RipleyException("dump: Could not create Silo file");
128
129 /*
130 if (driver==DB_HDF5) {
131 // gzip level 1 already provides good compression with minimal
132 // performance penalty. Some tests showed that gzip levels >3 performed
133 // rather badly on escript data both in terms of time and space
134 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
135 }
136 */
137
138 boost::scoped_ptr<double> x(new double[m_N0]);
139 boost::scoped_ptr<double> y(new double[m_N1]);
140 double* coords[2] = { x.get(), y.get() };
141 pair<double,double> xdx = getFirstCoordAndSpacing(0);
142 pair<double,double> ydy = getFirstCoordAndSpacing(1);
143 #pragma omp parallel
144 {
145 #pragma omp for
146 for (dim_t i0 = 0; i0 < m_N0; i0++) {
147 coords[0][i0]=xdx.first+i0*xdx.second;
148 }
149 #pragma omp for
150 for (dim_t i1 = 0; i1 < m_N1; i1++) {
151 coords[1][i1]=ydy.first+i1*ydy.second;
152 }
153 }
154 IndexVector dims = getNumNodesPerDim();
155
156 // write mesh
157 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE,
158 DB_COLLINEAR, NULL);
159
160 // write node ids
161 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2,
162 NULL, 0, DB_INT, DB_NODECENT, NULL);
163
164 // write element ids
165 dims = getNumElementsPerDim();
166 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
167 &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
168
169 // rank 0 writes multimesh and multivar
170 if (m_mpiInfo->rank == 0) {
171 vector<string> tempstrings;
172 vector<char*> names;
173 for (dim_t i=0; i<m_mpiInfo->size; i++) {
174 stringstream path;
175 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
176 tempstrings.push_back(path.str());
177 names.push_back((char*)tempstrings.back().c_str());
178 }
179 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
180 DBSetDir(dbfile, "/");
181 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
182 &types[0], NULL);
183 tempstrings.clear();
184 names.clear();
185 for (dim_t i=0; i<m_mpiInfo->size; i++) {
186 stringstream path;
187 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
188 tempstrings.push_back(path.str());
189 names.push_back((char*)tempstrings.back().c_str());
190 }
191 types.assign(m_mpiInfo->size, DB_QUADVAR);
192 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
193 &types[0], NULL);
194 tempstrings.clear();
195 names.clear();
196 for (dim_t i=0; i<m_mpiInfo->size; i++) {
197 stringstream path;
198 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
199 tempstrings.push_back(path.str());
200 names.push_back((char*)tempstrings.back().c_str());
201 }
202 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
203 &types[0], NULL);
204 }
205
206 if (m_mpiInfo->size > 1) {
207 #ifdef ESYS_MPI
208 PMPIO_HandOffBaton(baton, dbfile);
209 PMPIO_Finish(baton);
210 #endif
211 } else {
212 DBClose(dbfile);
213 }
214
215 #else // USE_SILO
216 throw RipleyException("dump(): no Silo support");
217 #endif
218 }
219
220 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
221 {
222 switch (fsType) {
223 case Nodes:
224 return &m_nodeId[0];
225 case Elements:
226 return &m_elementId[0];
227 case FaceElements:
228 return &m_faceId[0];
229 default:
230 break;
231 }
232
233 stringstream msg;
234 msg << "borrowSampleReferenceIDs() not implemented for function space type "
235 << functionSpaceTypeAsString(fsType);
236 throw RipleyException(msg.str());
237 }
238
239 bool Rectangle::ownSample(int fsCode, index_t id) const
240 {
241 #ifdef ESYS_MPI
242 if (fsCode != ReducedNodes) {
243 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
244 const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
245 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
246 } else {
247 stringstream msg;
248 msg << "ownSample() not implemented for "
249 << functionSpaceTypeAsString(fsCode);
250 throw RipleyException(msg.str());
251 }
252 #else
253 return true;
254 #endif
255 }
256
257 void Rectangle::setToGradient(escript::Data& out, const escript::Data& cIn) const
258 {
259 escript::Data& in = *const_cast<escript::Data*>(&cIn);
260 const dim_t numComp = in.getDataPointSize();
261 const double h0 = m_l0/m_gNE0;
262 const double h1 = m_l1/m_gNE1;
263 if (out.getFunctionSpace().getTypeCode() == Elements) {
264 /* GENERATOR SNIP_GRAD_ELEMENTS TOP */
265 const double tmp0_13 = 0.78867513459481288225/h1;
266 const double tmp0_0 = 0.78867513459481288225/h0;
267 const double tmp0_4 = 0.21132486540518711775/h0;
268 const double tmp0_10 = 0.78867513459481288225/h1;
269 const double tmp0_1 = 0.21132486540518711775/h0;
270 const double tmp0_8 = -0.21132486540518711775/h1;
271 const double tmp0_14 = 0.21132486540518711775/h1;
272 const double tmp0_5 = 0.78867513459481288225/h0;
273 const double tmp0_11 = -0.78867513459481288225/h1;
274 const double tmp0_2 = -0.21132486540518711775/h0;
275 const double tmp0_9 = 0.21132486540518711775/h1;
276 const double tmp0_15 = -0.21132486540518711775/h1;
277 const double tmp0_6 = -0.78867513459481288225/h0;
278 const double tmp0_3 = -0.78867513459481288225/h0;
279 const double tmp0_12 = -0.78867513459481288225/h1;
280 const double tmp0_7 = -0.21132486540518711775/h0;
281 #pragma omp parallel for
282 for (index_t k1=0; k1 < m_NE1; ++k1) {
283 for (index_t k0=0; k0 < m_NE0; ++k0) {
284 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
285 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
286 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
287 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
288 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
289 for (index_t i=0; i < numComp; ++i) {
290 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
291 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_11 + f_01[i]*tmp0_10 + f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
292 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
293 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_15 + f_01[i]*tmp0_14 + f_10[i]*tmp0_12 + f_11[i]*tmp0_13;
294 o[INDEX3(i,0,2,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
295 o[INDEX3(i,1,2,numComp,2)] = f_00[i]*tmp0_11 + f_01[i]*tmp0_10 + f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
296 o[INDEX3(i,0,3,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
297 o[INDEX3(i,1,3,numComp,2)] = f_00[i]*tmp0_15 + f_01[i]*tmp0_14 + f_10[i]*tmp0_12 + f_11[i]*tmp0_13;
298 } /* end of component loop i */
299 } /* end of k0 loop */
300 } /* end of k1 loop */
301 /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
302 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
303 /* GENERATOR SNIP_GRAD_FACES TOP */
304 if (m_faceOffset[0] > -1) {
305 const double tmp0_0 = 0.78867513459481288225/h0;
306 const double tmp0_4 = 0.21132486540518711775/h0;
307 const double tmp0_1 = 0.21132486540518711775/h0;
308 const double tmp0_8 = 1.0/h1;
309 const double tmp0_5 = 0.78867513459481288225/h0;
310 const double tmp0_2 = -0.21132486540518711775/h0;
311 const double tmp0_9 = -1/h1;
312 const double tmp0_6 = -0.78867513459481288225/h0;
313 const double tmp0_3 = -0.78867513459481288225/h0;
314 const double tmp0_7 = -0.21132486540518711775/h0;
315 #pragma omp parallel for
316 for (index_t k1=0; k1 < m_NE1; ++k1) {
317 const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
318 const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
319 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
320 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
321 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
322 for (index_t i=0; i < numComp; ++i) {
323 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
324 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_8;
325 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
326 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_8;
327 } /* end of component loop i */
328 } /* end of k1 loop */
329 } /* end of face 0 */
330 if (m_faceOffset[1] > -1) {
331 const double tmp0_0 = 0.78867513459481288225/h0;
332 const double tmp0_4 = 0.21132486540518711775/h0;
333 const double tmp0_1 = 0.21132486540518711775/h0;
334 const double tmp0_8 = -1/h1;
335 const double tmp0_5 = 0.78867513459481288225/h0;
336 const double tmp0_2 = -0.21132486540518711775/h0;
337 const double tmp0_9 = 1.0/h1;
338 const double tmp0_6 = -0.78867513459481288225/h0;
339 const double tmp0_3 = -0.78867513459481288225/h0;
340 const double tmp0_7 = -0.21132486540518711775/h0;
341 #pragma omp parallel for
342 for (index_t k1=0; k1 < m_NE1; ++k1) {
343 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
344 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
345 const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
346 const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
347 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
348 for (index_t i=0; i < numComp; ++i) {
349 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
350 o[INDEX3(i,1,0,numComp,2)] = f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
351 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
352 o[INDEX3(i,1,1,numComp,2)] = f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
353 } /* end of component loop i */
354 } /* end of k1 loop */
355 } /* end of face 1 */
356 if (m_faceOffset[2] > -1) {
357 const double tmp0_0 = -1/h0;
358 const double tmp0_4 = 0.21132486540518711775/h1;
359 const double tmp0_1 = 1.0/h0;
360 const double tmp0_8 = 0.78867513459481288225/h1;
361 const double tmp0_5 = 0.78867513459481288225/h1;
362 const double tmp0_2 = -0.78867513459481288225/h1;
363 const double tmp0_9 = 0.21132486540518711775/h1;
364 const double tmp0_6 = -0.21132486540518711775/h1;
365 const double tmp0_3 = -0.21132486540518711775/h1;
366 const double tmp0_7 = -0.78867513459481288225/h1;
367 #pragma omp parallel for
368 for (index_t k0=0; k0 < m_NE0; ++k0) {
369 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
370 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
371 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
372 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
373 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
374 for (index_t i=0; i < numComp; ++i) {
375 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
376 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_2 + f_01[i]*tmp0_5 + f_10[i]*tmp0_3 + f_11[i]*tmp0_4;
377 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
378 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_6 + f_01[i]*tmp0_9 + f_10[i]*tmp0_7 + f_11[i]*tmp0_8;
379 } /* end of component loop i */
380 } /* end of k0 loop */
381 } /* end of face 2 */
382 if (m_faceOffset[3] > -1) {
383 const double tmp0_0 = 1.0/h0;
384 const double tmp0_4 = -0.21132486540518711775/h1;
385 const double tmp0_1 = -1/h0;
386 const double tmp0_8 = -0.78867513459481288225/h1;
387 const double tmp0_5 = -0.78867513459481288225/h1;
388 const double tmp0_2 = 0.21132486540518711775/h1;
389 const double tmp0_9 = -0.21132486540518711775/h1;
390 const double tmp0_6 = 0.78867513459481288225/h1;
391 const double tmp0_3 = 0.78867513459481288225/h1;
392 const double tmp0_7 = 0.21132486540518711775/h1;
393 #pragma omp parallel for
394 for (index_t k0=0; k0 < m_NE0; ++k0) {
395 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
396 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
397 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
398 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
399 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
400 for (index_t i=0; i < numComp; ++i) {
401 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
402 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_5 + f_01[i]*tmp0_3 + f_10[i]*tmp0_4 + f_11[i]*tmp0_2;
403 o[INDEX3(i,0,1,numComp,2)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
404 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_7 + f_10[i]*tmp0_8 + f_11[i]*tmp0_6;
405 } /* end of component loop i */
406 } /* end of k0 loop */
407 } /* end of face 3 */
408 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
409 } else {
410 stringstream msg;
411 msg << "setToGradient() not implemented for "
412 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
413 throw RipleyException(msg.str());
414 }
415 }
416
417 void Rectangle::addPDEToSystem(escript::AbstractSystemMatrix& mat,
418 escript::Data& rhs, const escript::Data& A, const escript::Data& B,
419 const escript::Data& C, const escript::Data& D,
420 const escript::Data& X, const escript::Data& Y,
421 const escript::Data& d, const escript::Data& y,
422 const escript::Data& d_contact, const escript::Data& y_contact,
423 const escript::Data& d_dirac, const escript::Data& y_dirac) const
424 {
425 throw RipleyException("addPDEToSystem() not implemented");
426 }
427
428 Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder,
429 bool reducedColOrder) const
430 {
431 if (reducedRowOrder || reducedColOrder)
432 throw RipleyException("getPattern() not implemented for reduced order");
433
434 // connector
435 RankVector neighbour;
436 IndexVector offsetInShared(1,0);
437 IndexVector sendShared, recvShared;
438 const IndexVector faces=getNumFacesPerBoundary();
439 const index_t left = (m_offset0==0 ? 0 : 1);
440 const index_t bottom = (m_offset1==0 ? 0 : 1);
441 // corner node from bottom-left
442 if (faces[0] == 0 && faces[2] == 0) {
443 neighbour.push_back(m_mpiInfo->rank-m_NX-1);
444 offsetInShared.push_back(offsetInShared.back()+1);
445 sendShared.push_back(m_nodeId[m_N0+left]);
446 recvShared.push_back(m_nodeId[0]);
447 }
448 // bottom edge
449 if (faces[2] == 0) {
450 neighbour.push_back(m_mpiInfo->rank-m_NX);
451 offsetInShared.push_back(offsetInShared.back()+m_N0-left);
452 for (dim_t i=left; i<m_N0; i++) {
453 // easy case, we know the neighbour id's
454 sendShared.push_back(m_nodeId[m_N0+i]);
455 recvShared.push_back(m_nodeId[i]);
456 }
457 }
458 // corner node from bottom-right
459 if (faces[1] == 0 && faces[2] == 0) {
460 neighbour.push_back(m_mpiInfo->rank-m_NX+1);
461 const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
462 const index_t N1=(neighbour.back()/m_NX == 0 ? m_N1 : m_N1-1);
463 const index_t first=m_nodeDistribution[neighbour.back()];
464 offsetInShared.push_back(offsetInShared.back()+1);
465 sendShared.push_back(m_nodeId[(bottom+1)*m_N0-1]);
466 recvShared.push_back(first+N0*(N1-1));
467 }
468 // left edge
469 if (faces[0] == 0) {
470 neighbour.push_back(m_mpiInfo->rank-1);
471 offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
472 for (dim_t i=bottom; i<m_N1; i++) {
473 // easy case, we know the neighbour id's
474 sendShared.push_back(m_nodeId[i*m_N0+1]);
475 recvShared.push_back(m_nodeId[i*m_N0]);
476 }
477 }
478 // right edge
479 if (faces[1] == 0) {
480 neighbour.push_back(m_mpiInfo->rank+1);
481 const index_t rightN0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
482 const index_t first=m_nodeDistribution[neighbour.back()];
483 offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
484 for (dim_t i=bottom; i<m_N1; i++) {
485 sendShared.push_back(m_nodeId[(i+1)*m_N0-1]);
486 recvShared.push_back(first+rightN0*(i-bottom));
487 }
488 }
489 // corner node from top-left
490 if (faces[0] == 0 && faces[3] == 0) {
491 neighbour.push_back(m_mpiInfo->rank+m_NX-1);
492 const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
493 const index_t first=m_nodeDistribution[neighbour.back()];
494 offsetInShared.push_back(offsetInShared.back()+1);
495 sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+left]);
496 recvShared.push_back(first+N0-1);
497 }
498 // top edge
499 if (faces[3] == 0) {
500 neighbour.push_back(m_mpiInfo->rank+m_NX);
501 const index_t first=m_nodeDistribution[neighbour.back()];
502 offsetInShared.push_back(offsetInShared.back()+m_N0-left);
503 for (dim_t i=left; i<m_N0; i++) {
504 sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+i]);
505 recvShared.push_back(first+i-left);
506 }
507 }
508 // corner node from top-right
509 if (faces[1] == 0 && faces[3] == 0) {
510 neighbour.push_back(m_mpiInfo->rank+m_NX+1);
511 const index_t first=m_nodeDistribution[neighbour.back()];
512 offsetInShared.push_back(offsetInShared.back()+1);
513 sendShared.push_back(m_nodeId[m_N0*m_N1-1]);
514 recvShared.push_back(first);
515 }
516 const int numDOF=m_nodeDistribution[m_mpiInfo->rank+1]-m_nodeDistribution[m_mpiInfo->rank];
517 /*
518 cout << "--- rcv_shcomp ---" << endl;
519 cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
520 for (size_t i=0; i<neighbour.size(); i++) {
521 cout << "neighbor[" << i << "]=" << neighbour[i]
522 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
523 }
524 for (size_t i=0; i<recvShared.size(); i++) {
525 cout << "shared[" << i << "]=" << recvShared[i] << endl;
526 }
527 cout << "--- snd_shcomp ---" << endl;
528 for (size_t i=0; i<sendShared.size(); i++) {
529 cout << "shared[" << i << "]=" << sendShared[i] << endl;
530 }
531 */
532
533 Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
534 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
535 &offsetInShared[0], 1, 0, m_mpiInfo);
536 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
537 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
538 &offsetInShared[0], 1, 0, m_mpiInfo);
539 Paso_Connector* connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
540 Paso_SharedComponents_free(snd_shcomp);
541 Paso_SharedComponents_free(rcv_shcomp);
542
543 // create patterns
544 dim_t M, N;
545 IndexVector ptr(1,0);
546 IndexVector index;
547
548 // main pattern
549 for (index_t i=0; i<numDOF; i++) {
550 // always add the node itself
551 index.push_back(i);
552 int num=insertNeighbours(index, i);
553 ptr.push_back(ptr.back()+num+1);
554 }
555 M=N=ptr.size()-1;
556 // paso will manage the memory
557 index_t* indexC = MEMALLOC(index.size(),index_t);
558 index_t* ptrC = MEMALLOC(ptr.size(), index_t);
559 copy(index.begin(), index.end(), indexC);
560 copy(ptr.begin(), ptr.end(), ptrC);
561 Paso_Pattern *mainPattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
562 M, N, ptrC, indexC);
563
564 /*
565 cout << "--- main_pattern ---" << endl;
566 cout << "M=" << M << ", N=" << N << endl;
567 for (size_t i=0; i<ptr.size(); i++) {
568 cout << "ptr[" << i << "]=" << ptr[i] << endl;
569 }
570 for (size_t i=0; i<index.size(); i++) {
571 cout << "index[" << i << "]=" << index[i] << endl;
572 }
573 */
574
575 ptr.clear();
576 index.clear();
577
578 // column & row couple patterns
579 Paso_Pattern *colCouplePattern, *rowCouplePattern;
580 generateCouplePatterns(&colCouplePattern, &rowCouplePattern);
581
582 // allocate paso distribution
583 Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
584 const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
585
586 Paso_SystemMatrixPattern* pattern = Paso_SystemMatrixPattern_alloc(
587 MATRIX_FORMAT_DEFAULT, distribution, distribution,
588 mainPattern, colCouplePattern, rowCouplePattern,
589 connector, connector);
590 Paso_Pattern_free(mainPattern);
591 Paso_Pattern_free(colCouplePattern);
592 Paso_Pattern_free(rowCouplePattern);
593 Paso_Distribution_free(distribution);
594 return pattern;
595 }
596
597 void Rectangle::Print_Mesh_Info(const bool full) const
598 {
599 RipleyDomain::Print_Mesh_Info(full);
600 if (full) {
601 cout << " Id Coordinates" << endl;
602 cout.precision(15);
603 cout.setf(ios::scientific, ios::floatfield);
604 pair<double,double> xdx = getFirstCoordAndSpacing(0);
605 pair<double,double> ydy = getFirstCoordAndSpacing(1);
606 for (index_t i=0; i < getNumNodes(); i++) {
607 cout << " " << setw(5) << m_nodeId[i]
608 << " " << xdx.first+(i%m_N0)*xdx.second
609 << " " << ydy.first+(i/m_N0)*ydy.second << endl;
610 }
611 }
612 }
613
614 IndexVector Rectangle::getNumNodesPerDim() const
615 {
616 IndexVector ret;
617 ret.push_back(m_N0);
618 ret.push_back(m_N1);
619 return ret;
620 }
621
622 IndexVector Rectangle::getNumElementsPerDim() const
623 {
624 IndexVector ret;
625 ret.push_back(m_NE0);
626 ret.push_back(m_NE1);
627 return ret;
628 }
629
630 IndexVector Rectangle::getNumFacesPerBoundary() const
631 {
632 IndexVector ret(4, 0);
633 //left
634 if (m_offset0==0)
635 ret[0]=m_NE1;
636 //right
637 if (m_mpiInfo->rank%m_NX==m_NX-1)
638 ret[1]=m_NE1;
639 //bottom
640 if (m_offset1==0)
641 ret[2]=m_NE0;
642 //top
643 if (m_mpiInfo->rank/m_NX==m_NY-1)
644 ret[3]=m_NE0;
645 return ret;
646 }
647
648 pair<double,double> Rectangle::getFirstCoordAndSpacing(dim_t dim) const
649 {
650 if (dim==0) {
651 return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
652 } else if (dim==1) {
653 return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
654 }
655 throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
656 }
657
658 //protected
659 dim_t Rectangle::getNumFaceElements() const
660 {
661 const IndexVector faces = getNumFacesPerBoundary();
662 dim_t n=0;
663 for (size_t i=0; i<faces.size(); i++)
664 n+=faces[i];
665 return n;
666 }
667
668 //protected
669 void Rectangle::assembleCoordinates(escript::Data& arg) const
670 {
671 escriptDataC x = arg.getDataC();
672 int numDim = m_numDim;
673 if (!isDataPointShapeEqual(&x, 1, &numDim))
674 throw RipleyException("setToX: Invalid Data object shape");
675 if (!numSamplesEqual(&x, 1, getNumNodes()))
676 throw RipleyException("setToX: Illegal number of samples in Data object");
677
678 pair<double,double> xdx = getFirstCoordAndSpacing(0);
679 pair<double,double> ydy = getFirstCoordAndSpacing(1);
680 arg.requireWrite();
681 #pragma omp parallel for
682 for (dim_t i1 = 0; i1 < m_N1; i1++) {
683 for (dim_t i0 = 0; i0 < m_N0; i0++) {
684 double* point = arg.getSampleDataRW(i0+m_N0*i1);
685 point[0] = xdx.first+i0*xdx.second;
686 point[1] = ydy.first+i1*ydy.second;
687 }
688 }
689 }
690
691 //private
692 void Rectangle::populateSampleIds()
693 {
694 // identifiers are ordered from left to right, bottom to top on each rank,
695 // except for the shared nodes which are owned by the rank below / to the
696 // left of the current rank
697
698 // build node distribution vector first.
699 // m_nodeDistribution[i] is the first node id on rank i, that is
700 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
701 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
702 m_nodeDistribution[1]=getNumNodes();
703 for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
704 const index_t x=k%m_NX;
705 const index_t y=k/m_NX;
706 index_t numNodes=getNumNodes();
707 if (x>0)
708 numNodes-=m_N1;
709 if (y>0)
710 numNodes-=m_N0;
711 if (x>0 && y>0)
712 numNodes++; // subtracted corner twice -> fix that
713 m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
714 }
715 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
716
717 m_nodeId.resize(getNumNodes());
718
719 // the bottom row and left column are not owned by this rank so the
720 // identifiers need to be computed accordingly
721 const index_t left = (m_offset0==0 ? 0 : 1);
722 const index_t bottom = (m_offset1==0 ? 0 : 1);
723 if (left>0) {
724 const int neighbour=m_mpiInfo->rank-1;
725 const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
726 #pragma omp parallel for
727 for (dim_t i1=bottom; i1<m_N1; i1++) {
728 m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
729 + (i1-bottom+1)*leftN0-1;
730 }
731 }
732 if (bottom>0) {
733 const int neighbour=m_mpiInfo->rank-m_NX;
734 const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
735 const index_t bottomN1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
736 #pragma omp parallel for
737 for (dim_t i0=left; i0<m_N0; i0++) {
738 m_nodeId[i0]=m_nodeDistribution[neighbour]
739 + (bottomN1-1)*bottomN0 + i0 - left;
740 }
741 }
742 if (left>0 && bottom>0) {
743 const int neighbour=m_mpiInfo->rank-m_NX-1;
744 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
745 const index_t N1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
746 m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1-1;
747 }
748
749 // the rest of the id's are contiguous
750 const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
751 #pragma omp parallel for
752 for (dim_t i1=bottom; i1<m_N1; i1++) {
753 for (dim_t i0=left; i0<m_N0; i0++) {
754 m_nodeId[i0+i1*m_N0] = firstId+i0-left+(i1-bottom)*(m_N0-left);
755 }
756 }
757
758 // element id's
759 m_elementId.resize(getNumElements());
760 #pragma omp parallel for
761 for (dim_t k=0; k<getNumElements(); k++) {
762 m_elementId[k]=k;
763 }
764
765 // face element id's
766 m_faceId.resize(getNumFaceElements());
767 #pragma omp parallel for
768 for (dim_t k=0; k<getNumFaceElements(); k++) {
769 m_faceId[k]=k;
770 }
771
772 // generate face offset vector
773 const IndexVector facesPerEdge = getNumFacesPerBoundary();
774 m_faceOffset.assign(facesPerEdge.size(), -1);
775 index_t offset=0;
776 for (size_t i=0; i<facesPerEdge.size(); i++) {
777 if (facesPerEdge[i]>0) {
778 m_faceOffset[i]=offset;
779 offset+=facesPerEdge[i];
780 }
781 }
782 }
783
784 //private
785 int Rectangle::insertNeighbours(IndexVector& index, index_t node) const
786 {
787 const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
788 const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
789 const int x=node%myN0;
790 const int y=node/myN0;
791 int num=0;
792 if (y>0) {
793 if (x>0) {
794 // bottom-left
795 index.push_back(node-myN0-1);
796 num++;
797 }
798 // bottom
799 index.push_back(node-myN0);
800 num++;
801 if (x<myN0-1) {
802 // bottom-right
803 index.push_back(node-myN0+1);
804 num++;
805 }
806 }
807 if (x<myN0-1) {
808 // right
809 index.push_back(node+1);
810 num++;
811 if (y<myN1-1) {
812 // top-right
813 index.push_back(node+myN0+1);
814 num++;
815 }
816 }
817 if (y<myN1-1) {
818 // top
819 index.push_back(node+myN0);
820 num++;
821 if (x>0) {
822 // top-left
823 index.push_back(node+myN0-1);
824 num++;
825 }
826 }
827 if (x>0) {
828 // left
829 index.push_back(node-1);
830 num++;
831 }
832
833 return num;
834 }
835
836 //private
837 void Rectangle::generateCouplePatterns(Paso_Pattern** colPattern, Paso_Pattern** rowPattern) const
838 {
839 IndexVector ptr(1,0);
840 IndexVector index;
841 const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
842 const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
843 const IndexVector faces=getNumFacesPerBoundary();
844
845 // bottom edge
846 dim_t n=0;
847 if (faces[0] == 0) {
848 index.push_back(2*(myN0+myN1+1));
849 index.push_back(2*(myN0+myN1+1)+1);
850 n+=2;
851 if (faces[2] == 0) {
852 index.push_back(0);
853 index.push_back(1);
854 index.push_back(2);
855 n+=3;
856 }
857 } else if (faces[2] == 0) {
858 index.push_back(1);
859 index.push_back(2);
860 n+=2;
861 }
862 // n=neighbours of bottom-left corner node
863 ptr.push_back(ptr.back()+n);
864 n=0;
865 if (faces[2] == 0) {
866 for (dim_t i=1; i<myN0-1; i++) {
867 index.push_back(i);
868 index.push_back(i+1);
869 index.push_back(i+2);
870 ptr.push_back(ptr.back()+3);
871 }
872 index.push_back(myN0-1);
873 index.push_back(myN0);
874 n+=2;
875 if (faces[1] == 0) {
876 index.push_back(myN0+1);
877 index.push_back(myN0+2);
878 index.push_back(myN0+3);
879 n+=3;
880 }
881 } else {
882 for (dim_t i=1; i<myN0-1; i++) {
883 ptr.push_back(ptr.back());
884 }
885 if (faces[1] == 0) {
886 index.push_back(myN0+2);
887 index.push_back(myN0+3);
888 n+=2;
889 }
890 }
891 // n=neighbours of bottom-right corner node
892 ptr.push_back(ptr.back()+n);
893
894 // 2nd row to 2nd last row
895 for (dim_t i=1; i<myN1-1; i++) {
896 // left edge
897 if (faces[0] == 0) {
898 index.push_back(2*(myN0+myN1+2)-i);
899 index.push_back(2*(myN0+myN1+2)-i-1);
900 index.push_back(2*(myN0+myN1+2)-i-2);
901 ptr.push_back(ptr.back()+3);
902 } else {
903 ptr.push_back(ptr.back());
904 }
905 for (dim_t j=1; j<myN0-1; j++) {
906 ptr.push_back(ptr.back());
907 }
908 // right edge
909 if (faces[1] == 0) {
910 index.push_back(myN0+i+1);
911 index.push_back(myN0+i+2);
912 index.push_back(myN0+i+3);
913 ptr.push_back(ptr.back()+3);
914 } else {
915 ptr.push_back(ptr.back());
916 }
917 }
918
919 // top edge
920 n=0;
921 if (faces[0] == 0) {
922 index.push_back(2*myN0+myN1+5);
923 index.push_back(2*myN0+myN1+4);
924 n+=2;
925 if (faces[3] == 0) {
926 index.push_back(2*myN0+myN1+3);
927 index.push_back(2*myN0+myN1+2);
928 index.push_back(2*myN0+myN1+1);
929 n+=3;
930 }
931 } else if (faces[3] == 0) {
932 index.push_back(2*myN0+myN1+2);
933 index.push_back(2*myN0+myN1+1);
934 n+=2;
935 }
936 // n=neighbours of top-left corner node
937 ptr.push_back(ptr.back()+n);
938 n=0;
939 if (faces[3] == 0) {
940 for (dim_t i=1; i<myN0-1; i++) {
941 index.push_back(2*myN0+myN1+i+1);
942 index.push_back(2*myN0+myN1+i);
943 index.push_back(2*myN0+myN1+i-1);
944 ptr.push_back(ptr.back()+3);
945 }
946 index.push_back(myN0+myN1+4);
947 index.push_back(myN0+myN1+3);
948 n+=2;
949 if (faces[1] == 0) {
950 index.push_back(myN0+myN1+2);
951 index.push_back(myN0+myN1+1);
952 index.push_back(myN0+myN1);
953 n+=3;
954 }
955 } else {
956 for (dim_t i=1; i<myN0-1; i++) {
957 ptr.push_back(ptr.back());
958 }
959 if (faces[1] == 0) {
960 index.push_back(myN0+myN1+1);
961 index.push_back(myN0+myN1);
962 n+=2;
963 }
964 }
965 // n=neighbours of top-right corner node
966 ptr.push_back(ptr.back()+n);
967
968 dim_t M=ptr.size()-1;
969 map<index_t,index_t> idMap;
970 dim_t N=0;
971 for (IndexVector::iterator it=index.begin(); it!=index.end(); it++) {
972 if (idMap.find(*it)==idMap.end()) {
973 idMap[*it]=N;
974 *it=N++;
975 } else {
976 *it=idMap[*it];
977 }
978 }
979
980 /*
981 cout << "--- colCouple_pattern ---" << endl;
982 cout << "M=" << M << ", N=" << N << endl;
983 for (size_t i=0; i<ptr.size(); i++) {
984 cout << "ptr[" << i << "]=" << ptr[i] << endl;
985 }
986 for (size_t i=0; i<index.size(); i++) {
987 cout << "index[" << i << "]=" << index[i] << endl;
988 }
989 */
990
991 // now build the row couple pattern
992 IndexVector ptr2(1,0);
993 IndexVector index2;
994 for (dim_t id=0; id<N; id++) {
995 n=0;
996 for (dim_t i=0; i<M; i++) {
997 for (dim_t j=ptr[i]; j<ptr[i+1]; j++) {
998 if (index[j]==id) {
999 index2.push_back(i);
1000 n++;
1001 break;
1002 }
1003 }
1004 }
1005 ptr2.push_back(ptr2.back()+n);
1006 }
1007
1008 /*
1009 cout << "--- rowCouple_pattern ---" << endl;
1010 cout << "M=" << N << ", N=" << M << endl;
1011 for (size_t i=0; i<ptr2.size(); i++) {
1012 cout << "ptr[" << i << "]=" << ptr2[i] << endl;
1013 }
1014 for (size_t i=0; i<index2.size(); i++) {
1015 cout << "index[" << i << "]=" << index2[i] << endl;
1016 }
1017 */
1018
1019 // paso will manage the memory
1020 index_t* indexC = MEMALLOC(index.size(), index_t);
1021 index_t* ptrC = MEMALLOC(ptr.size(), index_t);
1022 copy(index.begin(), index.end(), indexC);
1023 copy(ptr.begin(), ptr.end(), ptrC);
1024 *colPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, M, N, ptrC, indexC);
1025
1026 // paso will manage the memory
1027 indexC = MEMALLOC(index2.size(), index_t);
1028 ptrC = MEMALLOC(ptr2.size(), index_t);
1029 copy(index2.begin(), index2.end(), indexC);
1030 copy(ptr2.begin(), ptr2.end(), ptrC);
1031 *rowPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, N, M, ptrC, indexC);
1032 }
1033
1034 //protected
1035 void Rectangle::interpolateNodesOnElements(escript::Data& out, escript::Data& in) const
1036 {
1037 const dim_t numComp = in.getDataPointSize();
1038 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1039 const double tmp0_2 = 0.62200846792814621559;
1040 const double tmp0_1 = 0.044658198738520451079;
1041 const double tmp0_0 = 0.16666666666666666667;
1042 #pragma omp parallel for
1043 for (index_t k1=0; k1 < m_NE1; ++k1) {
1044 for (index_t k0=0; k0 < m_NE0; ++k0) {
1045 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1046 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1047 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1048 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1049 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1050 for (index_t i=0; i < numComp; ++i) {
1051 o[INDEX2(i,numComp,0)] = f_00[i]*tmp0_2 + f_11[i]*tmp0_1 + tmp0_0*(f_01[i] + f_10[i]);
1052 o[INDEX2(i,numComp,1)] = f_01[i]*tmp0_1 + f_10[i]*tmp0_2 + tmp0_0*(f_00[i] + f_11[i]);
1053 o[INDEX2(i,numComp,2)] = f_01[i]*tmp0_2 + f_10[i]*tmp0_1 + tmp0_0*(f_00[i] + f_11[i]);
1054 o[INDEX2(i,numComp,3)] = f_00[i]*tmp0_1 + f_11[i]*tmp0_2 + tmp0_0*(f_01[i] + f_10[i]);
1055 } /* end of component loop i */
1056 } /* end of k0 loop */
1057 } /* end of k1 loop */
1058 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1059 }
1060
1061 //protected
1062 void Rectangle::interpolateNodesOnFaces(escript::Data& out, escript::Data& in) const
1063 {
1064 const dim_t numComp = in.getDataPointSize();
1065 /* GENERATOR SNIP_INTERPOLATE_FACES TOP */
1066 if (m_faceOffset[0] > -1) {
1067 const double tmp0_1 = 0.78867513459481288225;
1068 const double tmp0_0 = 0.21132486540518711775;
1069 #pragma omp parallel for
1070 for (index_t k1=0; k1 < m_NE1; ++k1) {
1071 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1072 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1073 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1074 for (index_t i=0; i < numComp; ++i) {
1075 o[INDEX2(i,numComp,0)] = f_00[i]*tmp0_1 + f_01[i]*tmp0_0;
1076 o[INDEX2(i,numComp,1)] = f_00[i]*tmp0_0 + f_01[i]*tmp0_1;
1077 } /* end of component loop i */
1078 } /* end of k1 loop */
1079 } /* end of face 0 */
1080 if (m_faceOffset[1] > -1) {
1081 const double tmp0_1 = 0.21132486540518711775;
1082 const double tmp0_0 = 0.78867513459481288225;
1083 #pragma omp parallel for
1084 for (index_t k1=0; k1 < m_NE1; ++k1) {
1085 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1086 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1087 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1088 for (index_t i=0; i < numComp; ++i) {
1089 o[INDEX2(i,numComp,0)] = f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
1090 o[INDEX2(i,numComp,1)] = f_10[i]*tmp0_1 + f_11[i]*tmp0_0;
1091 } /* end of component loop i */
1092 } /* end of k1 loop */
1093 } /* end of face 1 */
1094 if (m_faceOffset[2] > -1) {
1095 const double tmp0_1 = 0.78867513459481288225;
1096 const double tmp0_0 = 0.21132486540518711775;
1097 #pragma omp parallel for
1098 for (index_t k0=0; k0 < m_NE0; ++k0) {
1099 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1100 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1101 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1102 for (index_t i=0; i < numComp; ++i) {
1103 o[INDEX2(i,numComp,0)] = f_00[i]*tmp0_1 + f_10[i]*tmp0_0;
1104 o[INDEX2(i,numComp,1)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
1105 } /* end of component loop i */
1106 } /* end of k0 loop */
1107 } /* end of face 2 */
1108 if (m_faceOffset[3] > -1) {
1109 const double tmp0_1 = 0.78867513459481288225;
1110 const double tmp0_0 = 0.21132486540518711775;
1111 #pragma omp parallel for
1112 for (index_t k0=0; k0 < m_NE0; ++k0) {
1113 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1114 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1115 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1116 for (index_t i=0; i < numComp; ++i) {
1117 o[INDEX2(i,numComp,0)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
1118 o[INDEX2(i,numComp,1)] = f_01[i]*tmp0_0 + f_11[i]*tmp0_1;
1119 } /* end of component loop i */
1120 } /* end of k0 loop */
1121 } /* end of face 3 */
1122 /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */
1123 }
1124
1125 } // end of namespace ripley
1126

  ViewVC Help
Powered by ViewVC 1.1.26