/[escript]/branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp
ViewVC logotype

Contents of /branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3722 - (show annotations)
Wed Dec 7 05:53:22 2011 UTC (8 years ago) by caltinay
File size: 64100 byte(s)
All "util on ripley" tests pass now :-)
-added support for node/element/face tags
-implemented setToNormal()
-added a bunch of omp nowait

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2011 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 #include <ripley/Rectangle.h>
15 extern "C" {
16 #include "paso/SystemMatrixPattern.h"
17 }
18
19 #if USE_SILO
20 #include <silo.h>
21 #ifdef ESYS_MPI
22 #include <pmpio.h>
23 #endif
24 #endif
25
26 #include <iomanip>
27
28 using namespace std;
29
30 namespace ripley {
31
32 Rectangle::Rectangle(int n0, int n1, double l0, double l1, int d0, int d1) :
33 RipleyDomain(2),
34 m_gNE0(n0),
35 m_gNE1(n1),
36 m_l0(l0),
37 m_l1(l1),
38 m_NX(d0),
39 m_NY(d1)
40 {
41 // ensure number of subdivisions is valid and nodes can be distributed
42 // among number of ranks
43 if (m_NX*m_NY != m_mpiInfo->size)
44 throw RipleyException("Invalid number of spatial subdivisions");
45
46 if (n0%m_NX > 0 || n1%m_NY > 0)
47 throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
48
49 // local number of elements
50 m_NE0 = n0/m_NX;
51 m_NE1 = n1/m_NY;
52 // local number of nodes (not necessarily owned)
53 m_N0 = m_NE0+1;
54 m_N1 = m_NE1+1;
55 // bottom-left node is at (offset0,offset1) in global mesh
56 m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
57 m_offset1 = m_NE1*(m_mpiInfo->rank/m_NX);
58 populateSampleIds();
59 }
60
61 Rectangle::~Rectangle()
62 {
63 }
64
65 string Rectangle::getDescription() const
66 {
67 return "ripley::Rectangle";
68 }
69
70 bool Rectangle::operator==(const AbstractDomain& other) const
71 {
72 if (dynamic_cast<const Rectangle*>(&other))
73 return this==&other;
74
75 return false;
76 }
77
78 void Rectangle::dump(const string& fileName) const
79 {
80 #if USE_SILO
81 string fn(fileName);
82 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
83 fn+=".silo";
84 }
85
86 const int NUM_SILO_FILES = 1;
87 const char* blockDirFmt = "/block%04d";
88 int driver=DB_HDF5;
89 string siloPath;
90 DBfile* dbfile = NULL;
91
92 #ifdef ESYS_MPI
93 PMPIO_baton_t* baton = NULL;
94 #endif
95
96 if (m_mpiInfo->size > 1) {
97 #ifdef ESYS_MPI
98 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
99 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
100 PMPIO_DefaultClose, (void*)&driver);
101 // try the fallback driver in case of error
102 if (!baton && driver != DB_PDB) {
103 driver = DB_PDB;
104 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
105 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
106 PMPIO_DefaultClose, (void*)&driver);
107 }
108 if (baton) {
109 char str[64];
110 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
111 siloPath = str;
112 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
113 }
114 #endif
115 } else {
116 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
117 getDescription().c_str(), driver);
118 // try the fallback driver in case of error
119 if (!dbfile && driver != DB_PDB) {
120 driver = DB_PDB;
121 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
122 getDescription().c_str(), driver);
123 }
124 }
125
126 if (!dbfile)
127 throw RipleyException("dump: Could not create Silo file");
128
129 /*
130 if (driver==DB_HDF5) {
131 // gzip level 1 already provides good compression with minimal
132 // performance penalty. Some tests showed that gzip levels >3 performed
133 // rather badly on escript data both in terms of time and space
134 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
135 }
136 */
137
138 boost::scoped_ptr<double> x(new double[m_N0]);
139 boost::scoped_ptr<double> y(new double[m_N1]);
140 double* coords[2] = { x.get(), y.get() };
141 pair<double,double> xdx = getFirstCoordAndSpacing(0);
142 pair<double,double> ydy = getFirstCoordAndSpacing(1);
143 #pragma omp parallel
144 {
145 #pragma omp for nowait
146 for (dim_t i0 = 0; i0 < m_N0; i0++) {
147 coords[0][i0]=xdx.first+i0*xdx.second;
148 }
149 #pragma omp for nowait
150 for (dim_t i1 = 0; i1 < m_N1; i1++) {
151 coords[1][i1]=ydy.first+i1*ydy.second;
152 }
153 }
154 IndexVector dims = getNumNodesPerDim();
155
156 // write mesh
157 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE,
158 DB_COLLINEAR, NULL);
159
160 // write node ids
161 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2,
162 NULL, 0, DB_INT, DB_NODECENT, NULL);
163
164 // write element ids
165 dims = getNumElementsPerDim();
166 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
167 &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
168
169 // rank 0 writes multimesh and multivar
170 if (m_mpiInfo->rank == 0) {
171 vector<string> tempstrings;
172 vector<char*> names;
173 for (dim_t i=0; i<m_mpiInfo->size; i++) {
174 stringstream path;
175 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
176 tempstrings.push_back(path.str());
177 names.push_back((char*)tempstrings.back().c_str());
178 }
179 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
180 DBSetDir(dbfile, "/");
181 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
182 &types[0], NULL);
183 tempstrings.clear();
184 names.clear();
185 for (dim_t i=0; i<m_mpiInfo->size; i++) {
186 stringstream path;
187 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
188 tempstrings.push_back(path.str());
189 names.push_back((char*)tempstrings.back().c_str());
190 }
191 types.assign(m_mpiInfo->size, DB_QUADVAR);
192 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
193 &types[0], NULL);
194 tempstrings.clear();
195 names.clear();
196 for (dim_t i=0; i<m_mpiInfo->size; i++) {
197 stringstream path;
198 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
199 tempstrings.push_back(path.str());
200 names.push_back((char*)tempstrings.back().c_str());
201 }
202 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
203 &types[0], NULL);
204 }
205
206 if (m_mpiInfo->size > 1) {
207 #ifdef ESYS_MPI
208 PMPIO_HandOffBaton(baton, dbfile);
209 PMPIO_Finish(baton);
210 #endif
211 } else {
212 DBClose(dbfile);
213 }
214
215 #else // USE_SILO
216 throw RipleyException("dump(): no Silo support");
217 #endif
218 }
219
220 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
221 {
222 switch (fsType) {
223 case Nodes:
224 return &m_nodeId[0];
225 case Elements:
226 return &m_elementId[0];
227 case FaceElements:
228 return &m_faceId[0];
229 default:
230 break;
231 }
232
233 stringstream msg;
234 msg << "borrowSampleReferenceIDs() not implemented for function space type "
235 << functionSpaceTypeAsString(fsType);
236 throw RipleyException(msg.str());
237 }
238
239 bool Rectangle::ownSample(int fsCode, index_t id) const
240 {
241 #ifdef ESYS_MPI
242 if (fsCode != ReducedNodes) {
243 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
244 const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
245 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
246 } else {
247 stringstream msg;
248 msg << "ownSample() not implemented for "
249 << functionSpaceTypeAsString(fsCode);
250 throw RipleyException(msg.str());
251 }
252 #else
253 return true;
254 #endif
255 }
256
257 void Rectangle::setToGradient(escript::Data& out, const escript::Data& cIn) const
258 {
259 escript::Data& in = *const_cast<escript::Data*>(&cIn);
260 const dim_t numComp = in.getDataPointSize();
261 const double h0 = m_l0/m_gNE0;
262 const double h1 = m_l1/m_gNE1;
263 if (out.getFunctionSpace().getTypeCode() == Elements) {
264 /* GENERATOR SNIP_GRAD_ELEMENTS TOP */
265 const double tmp0_13 = 0.78867513459481288225/h1;
266 const double tmp0_0 = 0.78867513459481288225/h0;
267 const double tmp0_4 = 0.21132486540518711775/h0;
268 const double tmp0_10 = 0.78867513459481288225/h1;
269 const double tmp0_1 = 0.21132486540518711775/h0;
270 const double tmp0_8 = -0.21132486540518711775/h1;
271 const double tmp0_14 = 0.21132486540518711775/h1;
272 const double tmp0_5 = 0.78867513459481288225/h0;
273 const double tmp0_11 = -0.78867513459481288225/h1;
274 const double tmp0_2 = -0.21132486540518711775/h0;
275 const double tmp0_9 = 0.21132486540518711775/h1;
276 const double tmp0_15 = -0.21132486540518711775/h1;
277 const double tmp0_6 = -0.78867513459481288225/h0;
278 const double tmp0_3 = -0.78867513459481288225/h0;
279 const double tmp0_12 = -0.78867513459481288225/h1;
280 const double tmp0_7 = -0.21132486540518711775/h0;
281 #pragma omp parallel for
282 for (index_t k1=0; k1 < m_NE1; ++k1) {
283 for (index_t k0=0; k0 < m_NE0; ++k0) {
284 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
285 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
286 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
287 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
288 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
289 for (index_t i=0; i < numComp; ++i) {
290 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
291 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_11 + f_01[i]*tmp0_10 + f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
292 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
293 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_15 + f_01[i]*tmp0_14 + f_10[i]*tmp0_12 + f_11[i]*tmp0_13;
294 o[INDEX3(i,0,2,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
295 o[INDEX3(i,1,2,numComp,2)] = f_00[i]*tmp0_11 + f_01[i]*tmp0_10 + f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
296 o[INDEX3(i,0,3,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
297 o[INDEX3(i,1,3,numComp,2)] = f_00[i]*tmp0_15 + f_01[i]*tmp0_14 + f_10[i]*tmp0_12 + f_11[i]*tmp0_13;
298 } /* end of component loop i */
299 } /* end of k0 loop */
300 } /* end of k1 loop */
301 /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
302 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
303 /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
304 const double tmp0_3 = 0.5/h1;
305 const double tmp0_2 = -0.5/h1;
306 const double tmp0_1 = -0.5/h0;
307 const double tmp0_0 = 0.5/h0;
308 #pragma omp parallel for
309 for (index_t k1=0; k1 < m_NE1; ++k1) {
310 for (index_t k0=0; k0 < m_NE0; ++k0) {
311 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
312 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
313 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
314 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
315 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
316 for (index_t i=0; i < numComp; ++i) {
317 o[INDEX3(i,0,0,numComp,2)] = tmp0_0*(f_10[i] + f_11[i]) + tmp0_1*(f_00[i] + f_01[i]);
318 o[INDEX3(i,1,0,numComp,2)] = tmp0_2*(f_00[i] + f_10[i]) + tmp0_3*(f_01[i] + f_11[i]);
319 } /* end of component loop i */
320 } /* end of k0 loop */
321 } /* end of k1 loop */
322 /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
323 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
324 #pragma omp parallel
325 {
326 /* GENERATOR SNIP_GRAD_FACES TOP */
327 if (m_faceOffset[0] > -1) {
328 const double tmp0_0 = 0.78867513459481288225/h0;
329 const double tmp0_4 = 0.21132486540518711775/h0;
330 const double tmp0_1 = 0.21132486540518711775/h0;
331 const double tmp0_8 = 1.0/h1;
332 const double tmp0_5 = 0.78867513459481288225/h0;
333 const double tmp0_2 = -0.21132486540518711775/h0;
334 const double tmp0_9 = -1/h1;
335 const double tmp0_6 = -0.78867513459481288225/h0;
336 const double tmp0_3 = -0.78867513459481288225/h0;
337 const double tmp0_7 = -0.21132486540518711775/h0;
338 #pragma omp for nowait
339 for (index_t k1=0; k1 < m_NE1; ++k1) {
340 const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
341 const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
342 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
343 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
344 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
345 for (index_t i=0; i < numComp; ++i) {
346 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
347 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_8;
348 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
349 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_8;
350 } /* end of component loop i */
351 } /* end of k1 loop */
352 } /* end of face 0 */
353 if (m_faceOffset[1] > -1) {
354 const double tmp0_0 = 0.78867513459481288225/h0;
355 const double tmp0_4 = 0.21132486540518711775/h0;
356 const double tmp0_1 = 0.21132486540518711775/h0;
357 const double tmp0_8 = -1/h1;
358 const double tmp0_5 = 0.78867513459481288225/h0;
359 const double tmp0_2 = -0.21132486540518711775/h0;
360 const double tmp0_9 = 1.0/h1;
361 const double tmp0_6 = -0.78867513459481288225/h0;
362 const double tmp0_3 = -0.78867513459481288225/h0;
363 const double tmp0_7 = -0.21132486540518711775/h0;
364 #pragma omp for nowait
365 for (index_t k1=0; k1 < m_NE1; ++k1) {
366 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
367 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
368 const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
369 const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
370 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
371 for (index_t i=0; i < numComp; ++i) {
372 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
373 o[INDEX3(i,1,0,numComp,2)] = f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
374 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
375 o[INDEX3(i,1,1,numComp,2)] = f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
376 } /* end of component loop i */
377 } /* end of k1 loop */
378 } /* end of face 1 */
379 if (m_faceOffset[2] > -1) {
380 const double tmp0_0 = -1/h0;
381 const double tmp0_4 = 0.21132486540518711775/h1;
382 const double tmp0_1 = 1.0/h0;
383 const double tmp0_8 = 0.78867513459481288225/h1;
384 const double tmp0_5 = 0.78867513459481288225/h1;
385 const double tmp0_2 = -0.78867513459481288225/h1;
386 const double tmp0_9 = 0.21132486540518711775/h1;
387 const double tmp0_6 = -0.21132486540518711775/h1;
388 const double tmp0_3 = -0.21132486540518711775/h1;
389 const double tmp0_7 = -0.78867513459481288225/h1;
390 #pragma omp for nowait
391 for (index_t k0=0; k0 < m_NE0; ++k0) {
392 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
393 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
394 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
395 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
396 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
397 for (index_t i=0; i < numComp; ++i) {
398 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
399 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_2 + f_01[i]*tmp0_5 + f_10[i]*tmp0_3 + f_11[i]*tmp0_4;
400 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
401 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_6 + f_01[i]*tmp0_9 + f_10[i]*tmp0_7 + f_11[i]*tmp0_8;
402 } /* end of component loop i */
403 } /* end of k0 loop */
404 } /* end of face 2 */
405 if (m_faceOffset[3] > -1) {
406 const double tmp0_0 = 1.0/h0;
407 const double tmp0_4 = -0.21132486540518711775/h1;
408 const double tmp0_1 = -1/h0;
409 const double tmp0_8 = -0.78867513459481288225/h1;
410 const double tmp0_5 = -0.78867513459481288225/h1;
411 const double tmp0_2 = 0.21132486540518711775/h1;
412 const double tmp0_9 = -0.21132486540518711775/h1;
413 const double tmp0_6 = 0.78867513459481288225/h1;
414 const double tmp0_3 = 0.78867513459481288225/h1;
415 const double tmp0_7 = 0.21132486540518711775/h1;
416 #pragma omp for nowait
417 for (index_t k0=0; k0 < m_NE0; ++k0) {
418 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
419 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
420 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
421 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
422 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
423 for (index_t i=0; i < numComp; ++i) {
424 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
425 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_5 + f_01[i]*tmp0_3 + f_10[i]*tmp0_4 + f_11[i]*tmp0_2;
426 o[INDEX3(i,0,1,numComp,2)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
427 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_7 + f_10[i]*tmp0_8 + f_11[i]*tmp0_6;
428 } /* end of component loop i */
429 } /* end of k0 loop */
430 } /* end of face 3 */
431 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
432 } // end of parallel section
433 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
434 #pragma omp parallel
435 {
436 /* GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
437 if (m_faceOffset[0] > -1) {
438 const double tmp0_3 = -1/h1;
439 const double tmp0_2 = 1.0/h1;
440 const double tmp0_1 = -0.5/h0;
441 const double tmp0_0 = 0.5/h0;
442 #pragma omp for nowait
443 for (index_t k1=0; k1 < m_NE1; ++k1) {
444 const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
445 const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
446 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
447 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
448 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
449 for (index_t i=0; i < numComp; ++i) {
450 o[INDEX3(i,0,0,numComp,2)] = tmp0_0*(f_10[i] + f_11[i]) + tmp0_1*(f_00[i] + f_01[i]);
451 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2;
452 } /* end of component loop i */
453 } /* end of k1 loop */
454 } /* end of face 0 */
455 if (m_faceOffset[1] > -1) {
456 const double tmp0_3 = 1.0/h1;
457 const double tmp0_2 = -1/h1;
458 const double tmp0_1 = -0.5/h0;
459 const double tmp0_0 = 0.5/h0;
460 #pragma omp for nowait
461 for (index_t k1=0; k1 < m_NE1; ++k1) {
462 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
463 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
464 const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
465 const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
466 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
467 for (index_t i=0; i < numComp; ++i) {
468 o[INDEX3(i,0,0,numComp,2)] = tmp0_0*(f_10[i] + f_11[i]) + tmp0_1*(f_00[i] + f_01[i]);
469 o[INDEX3(i,1,0,numComp,2)] = f_10[i]*tmp0_2 + f_11[i]*tmp0_3;
470 } /* end of component loop i */
471 } /* end of k1 loop */
472 } /* end of face 1 */
473 if (m_faceOffset[2] > -1) {
474 const double tmp0_3 = 0.5/h1;
475 const double tmp0_2 = -0.5/h1;
476 const double tmp0_1 = 1.0/h0;
477 const double tmp0_0 = -1/h0;
478 #pragma omp for nowait
479 for (index_t k0=0; k0 < m_NE0; ++k0) {
480 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
481 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
482 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
483 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
484 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
485 for (index_t i=0; i < numComp; ++i) {
486 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
487 o[INDEX3(i,1,0,numComp,2)] = tmp0_2*(f_00[i] + f_10[i]) + tmp0_3*(f_01[i] + f_11[i]);
488 } /* end of component loop i */
489 } /* end of k0 loop */
490 } /* end of face 2 */
491 if (m_faceOffset[3] > -1) {
492 const double tmp0_3 = -0.5/h1;
493 const double tmp0_2 = 0.5/h1;
494 const double tmp0_1 = -1/h0;
495 const double tmp0_0 = 1.0/h0;
496 #pragma omp for nowait
497 for (index_t k0=0; k0 < m_NE0; ++k0) {
498 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
499 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
500 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
501 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
502 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
503 for (index_t i=0; i < numComp; ++i) {
504 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
505 o[INDEX3(i,1,0,numComp,2)] = tmp0_2*(f_01[i] + f_11[i]) + tmp0_3*(f_00[i] + f_10[i]);
506 } /* end of component loop i */
507 } /* end of k0 loop */
508 } /* end of face 3 */
509 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
510 } // end of parallel section
511 } else {
512 stringstream msg;
513 msg << "setToGradient() not implemented for "
514 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
515 throw RipleyException(msg.str());
516 }
517 }
518
519 void Rectangle::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
520 {
521 escript::Data& in = *const_cast<escript::Data*>(&arg);
522 const dim_t numComp = in.getDataPointSize();
523 const double h0 = m_l0/m_gNE0;
524 const double h1 = m_l1/m_gNE1;
525 if (arg.getFunctionSpace().getTypeCode() == Elements) {
526 const double w_0 = h0*h1/4.;
527 #pragma omp parallel
528 {
529 vector<double> int_local(numComp, 0);
530 #pragma omp for nowait
531 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
532 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
533 const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
534 for (index_t i=0; i < numComp; ++i) {
535 const register double f_0 = f[INDEX2(i,0,numComp)];
536 const register double f_1 = f[INDEX2(i,1,numComp)];
537 const register double f_2 = f[INDEX2(i,2,numComp)];
538 const register double f_3 = f[INDEX2(i,3,numComp)];
539 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
540 } /* end of component loop i */
541 } /* end of k0 loop */
542 } /* end of k1 loop */
543
544 #pragma omp critical
545 for (index_t i=0; i<numComp; i++)
546 integrals[i]+=int_local[i];
547 } // end of parallel section
548 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
549 const double w_0 = h0*h1;
550 #pragma omp parallel
551 {
552 vector<double> int_local(numComp, 0);
553 #pragma omp for nowait
554 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
555 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
556 const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
557 for (index_t i=0; i < numComp; ++i) {
558 int_local[i]+=f[i]*w_0;
559 } /* end of component loop i */
560 } /* end of k0 loop */
561 } /* end of k1 loop */
562
563 #pragma omp critical
564 for (index_t i=0; i<numComp; i++)
565 integrals[i]+=int_local[i];
566 } // end of parallel section
567 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
568 const double w_0 = h0/2.;
569 const double w_1 = h1/2.;
570 #pragma omp parallel
571 {
572 vector<double> int_local(numComp, 0);
573 if (m_faceOffset[0] > -1) {
574 #pragma omp for nowait
575 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
576 const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
577 for (index_t i=0; i < numComp; ++i) {
578 const register double f_0 = f[INDEX2(i,0,numComp)];
579 const register double f_1 = f[INDEX2(i,1,numComp)];
580 int_local[i]+=(f_0+f_1)*w_1;
581 } /* end of component loop i */
582 } /* end of k1 loop */
583 }
584
585 if (m_faceOffset[1] > -1) {
586 #pragma omp for nowait
587 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
588 const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
589 for (index_t i=0; i < numComp; ++i) {
590 const register double f_0 = f[INDEX2(i,0,numComp)];
591 const register double f_1 = f[INDEX2(i,1,numComp)];
592 int_local[i]+=(f_0+f_1)*w_1;
593 } /* end of component loop i */
594 } /* end of k1 loop */
595 }
596
597 if (m_faceOffset[2] > -1) {
598 #pragma omp for nowait
599 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
600 const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
601 for (index_t i=0; i < numComp; ++i) {
602 const register double f_0 = f[INDEX2(i,0,numComp)];
603 const register double f_1 = f[INDEX2(i,1,numComp)];
604 int_local[i]+=(f_0+f_1)*w_0;
605 } /* end of component loop i */
606 } /* end of k0 loop */
607 }
608
609 if (m_faceOffset[3] > -1) {
610 #pragma omp for nowait
611 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
612 const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
613 for (index_t i=0; i < numComp; ++i) {
614 const register double f_0 = f[INDEX2(i,0,numComp)];
615 const register double f_1 = f[INDEX2(i,1,numComp)];
616 int_local[i]+=(f_0+f_1)*w_0;
617 } /* end of component loop i */
618 } /* end of k0 loop */
619 }
620
621 #pragma omp critical
622 for (index_t i=0; i<numComp; i++)
623 integrals[i]+=int_local[i];
624 } // end of parallel section
625 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
626 #pragma omp parallel
627 {
628 vector<double> int_local(numComp, 0);
629 if (m_faceOffset[0] > -1) {
630 #pragma omp for nowait
631 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
632 const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
633 for (index_t i=0; i < numComp; ++i) {
634 int_local[i]+=f[i]*h1;
635 } /* end of component loop i */
636 } /* end of k1 loop */
637 }
638
639 if (m_faceOffset[1] > -1) {
640 #pragma omp for nowait
641 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
642 const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
643 for (index_t i=0; i < numComp; ++i) {
644 int_local[i]+=f[i]*h1;
645 } /* end of component loop i */
646 } /* end of k1 loop */
647 }
648
649 if (m_faceOffset[2] > -1) {
650 #pragma omp for nowait
651 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
652 const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
653 for (index_t i=0; i < numComp; ++i) {
654 int_local[i]+=f[i]*h0;
655 } /* end of component loop i */
656 } /* end of k0 loop */
657 }
658
659 if (m_faceOffset[3] > -1) {
660 #pragma omp for nowait
661 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
662 const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
663 for (index_t i=0; i < numComp; ++i) {
664 int_local[i]+=f[i]*h0;
665 } /* end of component loop i */
666 } /* end of k0 loop */
667 }
668
669 #pragma omp critical
670 for (index_t i=0; i<numComp; i++)
671 integrals[i]+=int_local[i];
672 } // end of parallel section
673 } else {
674 stringstream msg;
675 msg << "setToIntegrals() not implemented for "
676 << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
677 throw RipleyException(msg.str());
678 }
679 }
680
681 void Rectangle::setToNormal(escript::Data& out) const
682 {
683 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
684 #pragma omp parallel
685 {
686 if (m_faceOffset[0] > -1) {
687 #pragma omp for nowait
688 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
689 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
690 // set vector at two quadrature points
691 *o++ = -1.;
692 *o++ = 0.;
693 *o++ = -1.;
694 *o = 0.;
695 }
696 }
697
698 if (m_faceOffset[1] > -1) {
699 #pragma omp for nowait
700 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
701 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
702 // set vector at two quadrature points
703 *o++ = 1.;
704 *o++ = 0.;
705 *o++ = 1.;
706 *o = 0.;
707 }
708 }
709
710 if (m_faceOffset[2] > -1) {
711 #pragma omp for nowait
712 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
713 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
714 // set vector at two quadrature points
715 *o++ = 0.;
716 *o++ = -1.;
717 *o++ = 0.;
718 *o = -1.;
719 }
720 }
721
722 if (m_faceOffset[3] > -1) {
723 #pragma omp for nowait
724 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
725 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
726 // set vector at two quadrature points
727 *o++ = 0.;
728 *o++ = 1.;
729 *o++ = 0.;
730 *o = 1.;
731 }
732 }
733 } // end of parallel section
734 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
735 #pragma omp parallel
736 {
737 if (m_faceOffset[0] > -1) {
738 #pragma omp for nowait
739 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
740 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
741 *o++ = -1.;
742 *o = 0.;
743 }
744 }
745
746 if (m_faceOffset[1] > -1) {
747 #pragma omp for nowait
748 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
749 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
750 *o++ = 1.;
751 *o = 0.;
752 }
753 }
754
755 if (m_faceOffset[2] > -1) {
756 #pragma omp for nowait
757 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
758 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
759 *o++ = 0.;
760 *o = -1.;
761 }
762 }
763
764 if (m_faceOffset[3] > -1) {
765 #pragma omp for nowait
766 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
767 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
768 *o++ = 0.;
769 *o = 1.;
770 }
771 }
772 } // end of parallel section
773
774 } else {
775 stringstream msg;
776 msg << "setToNormal() not implemented for "
777 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
778 throw RipleyException(msg.str());
779 }
780 }
781
782 void Rectangle::addPDEToSystem(escript::AbstractSystemMatrix& mat,
783 escript::Data& rhs, const escript::Data& A, const escript::Data& B,
784 const escript::Data& C, const escript::Data& D,
785 const escript::Data& X, const escript::Data& Y,
786 const escript::Data& d, const escript::Data& y,
787 const escript::Data& d_contact, const escript::Data& y_contact,
788 const escript::Data& d_dirac, const escript::Data& y_dirac) const
789 {
790 throw RipleyException("addPDEToSystem() not implemented");
791 }
792
793 Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder,
794 bool reducedColOrder) const
795 {
796 if (reducedRowOrder || reducedColOrder)
797 throw RipleyException("getPattern() not implemented for reduced order");
798
799 // connector
800 RankVector neighbour;
801 IndexVector offsetInShared(1,0);
802 IndexVector sendShared, recvShared;
803 const IndexVector faces=getNumFacesPerBoundary();
804 const index_t left = (m_offset0==0 ? 0 : 1);
805 const index_t bottom = (m_offset1==0 ? 0 : 1);
806 // corner node from bottom-left
807 if (faces[0] == 0 && faces[2] == 0) {
808 neighbour.push_back(m_mpiInfo->rank-m_NX-1);
809 offsetInShared.push_back(offsetInShared.back()+1);
810 sendShared.push_back(m_nodeId[m_N0+left]);
811 recvShared.push_back(m_nodeId[0]);
812 }
813 // bottom edge
814 if (faces[2] == 0) {
815 neighbour.push_back(m_mpiInfo->rank-m_NX);
816 offsetInShared.push_back(offsetInShared.back()+m_N0-left);
817 for (dim_t i=left; i<m_N0; i++) {
818 // easy case, we know the neighbour id's
819 sendShared.push_back(m_nodeId[m_N0+i]);
820 recvShared.push_back(m_nodeId[i]);
821 }
822 }
823 // corner node from bottom-right
824 if (faces[1] == 0 && faces[2] == 0) {
825 neighbour.push_back(m_mpiInfo->rank-m_NX+1);
826 const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
827 const index_t N1=(neighbour.back()/m_NX == 0 ? m_N1 : m_N1-1);
828 const index_t first=m_nodeDistribution[neighbour.back()];
829 offsetInShared.push_back(offsetInShared.back()+1);
830 sendShared.push_back(m_nodeId[(bottom+1)*m_N0-1]);
831 recvShared.push_back(first+N0*(N1-1));
832 }
833 // left edge
834 if (faces[0] == 0) {
835 neighbour.push_back(m_mpiInfo->rank-1);
836 offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
837 for (dim_t i=bottom; i<m_N1; i++) {
838 // easy case, we know the neighbour id's
839 sendShared.push_back(m_nodeId[i*m_N0+1]);
840 recvShared.push_back(m_nodeId[i*m_N0]);
841 }
842 }
843 // right edge
844 if (faces[1] == 0) {
845 neighbour.push_back(m_mpiInfo->rank+1);
846 const index_t rightN0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
847 const index_t first=m_nodeDistribution[neighbour.back()];
848 offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
849 for (dim_t i=bottom; i<m_N1; i++) {
850 sendShared.push_back(m_nodeId[(i+1)*m_N0-1]);
851 recvShared.push_back(first+rightN0*(i-bottom));
852 }
853 }
854 // corner node from top-left
855 if (faces[0] == 0 && faces[3] == 0) {
856 neighbour.push_back(m_mpiInfo->rank+m_NX-1);
857 const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
858 const index_t first=m_nodeDistribution[neighbour.back()];
859 offsetInShared.push_back(offsetInShared.back()+1);
860 sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+left]);
861 recvShared.push_back(first+N0-1);
862 }
863 // top edge
864 if (faces[3] == 0) {
865 neighbour.push_back(m_mpiInfo->rank+m_NX);
866 const index_t first=m_nodeDistribution[neighbour.back()];
867 offsetInShared.push_back(offsetInShared.back()+m_N0-left);
868 for (dim_t i=left; i<m_N0; i++) {
869 sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+i]);
870 recvShared.push_back(first+i-left);
871 }
872 }
873 // corner node from top-right
874 if (faces[1] == 0 && faces[3] == 0) {
875 neighbour.push_back(m_mpiInfo->rank+m_NX+1);
876 const index_t first=m_nodeDistribution[neighbour.back()];
877 offsetInShared.push_back(offsetInShared.back()+1);
878 sendShared.push_back(m_nodeId[m_N0*m_N1-1]);
879 recvShared.push_back(first);
880 }
881 const int numDOF=m_nodeDistribution[m_mpiInfo->rank+1]-m_nodeDistribution[m_mpiInfo->rank];
882 /*
883 cout << "--- rcv_shcomp ---" << endl;
884 cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
885 for (size_t i=0; i<neighbour.size(); i++) {
886 cout << "neighbor[" << i << "]=" << neighbour[i]
887 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
888 }
889 for (size_t i=0; i<recvShared.size(); i++) {
890 cout << "shared[" << i << "]=" << recvShared[i] << endl;
891 }
892 cout << "--- snd_shcomp ---" << endl;
893 for (size_t i=0; i<sendShared.size(); i++) {
894 cout << "shared[" << i << "]=" << sendShared[i] << endl;
895 }
896 */
897
898 Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
899 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
900 &offsetInShared[0], 1, 0, m_mpiInfo);
901 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
902 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
903 &offsetInShared[0], 1, 0, m_mpiInfo);
904 Paso_Connector* connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
905 Paso_SharedComponents_free(snd_shcomp);
906 Paso_SharedComponents_free(rcv_shcomp);
907
908 // create patterns
909 dim_t M, N;
910 IndexVector ptr(1,0);
911 IndexVector index;
912
913 // main pattern
914 for (index_t i=0; i<numDOF; i++) {
915 // always add the node itself
916 index.push_back(i);
917 int num=insertNeighbours(index, i);
918 ptr.push_back(ptr.back()+num+1);
919 }
920 M=N=ptr.size()-1;
921 // paso will manage the memory
922 index_t* indexC = MEMALLOC(index.size(),index_t);
923 index_t* ptrC = MEMALLOC(ptr.size(), index_t);
924 copy(index.begin(), index.end(), indexC);
925 copy(ptr.begin(), ptr.end(), ptrC);
926 Paso_Pattern *mainPattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
927 M, N, ptrC, indexC);
928
929 /*
930 cout << "--- main_pattern ---" << endl;
931 cout << "M=" << M << ", N=" << N << endl;
932 for (size_t i=0; i<ptr.size(); i++) {
933 cout << "ptr[" << i << "]=" << ptr[i] << endl;
934 }
935 for (size_t i=0; i<index.size(); i++) {
936 cout << "index[" << i << "]=" << index[i] << endl;
937 }
938 */
939
940 ptr.clear();
941 index.clear();
942
943 // column & row couple patterns
944 Paso_Pattern *colCouplePattern, *rowCouplePattern;
945 generateCouplePatterns(&colCouplePattern, &rowCouplePattern);
946
947 // allocate paso distribution
948 Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
949 const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
950
951 Paso_SystemMatrixPattern* pattern = Paso_SystemMatrixPattern_alloc(
952 MATRIX_FORMAT_DEFAULT, distribution, distribution,
953 mainPattern, colCouplePattern, rowCouplePattern,
954 connector, connector);
955 Paso_Pattern_free(mainPattern);
956 Paso_Pattern_free(colCouplePattern);
957 Paso_Pattern_free(rowCouplePattern);
958 Paso_Distribution_free(distribution);
959 return pattern;
960 }
961
962 void Rectangle::Print_Mesh_Info(const bool full) const
963 {
964 RipleyDomain::Print_Mesh_Info(full);
965 if (full) {
966 cout << " Id Coordinates" << endl;
967 cout.precision(15);
968 cout.setf(ios::scientific, ios::floatfield);
969 pair<double,double> xdx = getFirstCoordAndSpacing(0);
970 pair<double,double> ydy = getFirstCoordAndSpacing(1);
971 for (index_t i=0; i < getNumNodes(); i++) {
972 cout << " " << setw(5) << m_nodeId[i]
973 << " " << xdx.first+(i%m_N0)*xdx.second
974 << " " << ydy.first+(i/m_N0)*ydy.second << endl;
975 }
976 }
977 }
978
979 IndexVector Rectangle::getNumNodesPerDim() const
980 {
981 IndexVector ret;
982 ret.push_back(m_N0);
983 ret.push_back(m_N1);
984 return ret;
985 }
986
987 IndexVector Rectangle::getNumElementsPerDim() const
988 {
989 IndexVector ret;
990 ret.push_back(m_NE0);
991 ret.push_back(m_NE1);
992 return ret;
993 }
994
995 IndexVector Rectangle::getNumFacesPerBoundary() const
996 {
997 IndexVector ret(4, 0);
998 //left
999 if (m_offset0==0)
1000 ret[0]=m_NE1;
1001 //right
1002 if (m_mpiInfo->rank%m_NX==m_NX-1)
1003 ret[1]=m_NE1;
1004 //bottom
1005 if (m_offset1==0)
1006 ret[2]=m_NE0;
1007 //top
1008 if (m_mpiInfo->rank/m_NX==m_NY-1)
1009 ret[3]=m_NE0;
1010 return ret;
1011 }
1012
1013 pair<double,double> Rectangle::getFirstCoordAndSpacing(dim_t dim) const
1014 {
1015 if (dim==0) {
1016 return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
1017 } else if (dim==1) {
1018 return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
1019 }
1020 throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
1021 }
1022
1023 //protected
1024 dim_t Rectangle::getNumFaceElements() const
1025 {
1026 const IndexVector faces = getNumFacesPerBoundary();
1027 dim_t n=0;
1028 for (size_t i=0; i<faces.size(); i++)
1029 n+=faces[i];
1030 return n;
1031 }
1032
1033 //protected
1034 void Rectangle::assembleCoordinates(escript::Data& arg) const
1035 {
1036 escriptDataC x = arg.getDataC();
1037 int numDim = m_numDim;
1038 if (!isDataPointShapeEqual(&x, 1, &numDim))
1039 throw RipleyException("setToX: Invalid Data object shape");
1040 if (!numSamplesEqual(&x, 1, getNumNodes()))
1041 throw RipleyException("setToX: Illegal number of samples in Data object");
1042
1043 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1044 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1045 arg.requireWrite();
1046 #pragma omp parallel for
1047 for (dim_t i1 = 0; i1 < m_N1; i1++) {
1048 for (dim_t i0 = 0; i0 < m_N0; i0++) {
1049 double* point = arg.getSampleDataRW(i0+m_N0*i1);
1050 point[0] = xdx.first+i0*xdx.second;
1051 point[1] = ydy.first+i1*ydy.second;
1052 }
1053 }
1054 }
1055
1056 //private
1057 void Rectangle::populateSampleIds()
1058 {
1059 // identifiers are ordered from left to right, bottom to top on each rank,
1060 // except for the shared nodes which are owned by the rank below / to the
1061 // left of the current rank
1062
1063 // build node distribution vector first.
1064 // m_nodeDistribution[i] is the first node id on rank i, that is
1065 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1066 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1067 m_nodeDistribution[1]=getNumNodes();
1068 for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
1069 const index_t x=k%m_NX;
1070 const index_t y=k/m_NX;
1071 index_t numNodes=getNumNodes();
1072 if (x>0)
1073 numNodes-=m_N1;
1074 if (y>0)
1075 numNodes-=m_N0;
1076 if (x>0 && y>0)
1077 numNodes++; // subtracted corner twice -> fix that
1078 m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
1079 }
1080 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1081
1082 m_nodeId.resize(getNumNodes());
1083
1084 // the bottom row and left column are not owned by this rank so the
1085 // identifiers need to be computed accordingly
1086 const index_t left = (m_offset0==0 ? 0 : 1);
1087 const index_t bottom = (m_offset1==0 ? 0 : 1);
1088 if (left>0) {
1089 const int neighbour=m_mpiInfo->rank-1;
1090 const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1091 #pragma omp parallel for
1092 for (dim_t i1=bottom; i1<m_N1; i1++) {
1093 m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
1094 + (i1-bottom+1)*leftN0-1;
1095 }
1096 }
1097 if (bottom>0) {
1098 const int neighbour=m_mpiInfo->rank-m_NX;
1099 const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1100 const index_t bottomN1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1101 #pragma omp parallel for
1102 for (dim_t i0=left; i0<m_N0; i0++) {
1103 m_nodeId[i0]=m_nodeDistribution[neighbour]
1104 + (bottomN1-1)*bottomN0 + i0 - left;
1105 }
1106 }
1107 if (left>0 && bottom>0) {
1108 const int neighbour=m_mpiInfo->rank-m_NX-1;
1109 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1110 const index_t N1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1111 m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1-1;
1112 }
1113
1114 // the rest of the id's are contiguous
1115 const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
1116 #pragma omp parallel for
1117 for (dim_t i1=bottom; i1<m_N1; i1++) {
1118 for (dim_t i0=left; i0<m_N0; i0++) {
1119 m_nodeId[i0+i1*m_N0] = firstId+i0-left+(i1-bottom)*(m_N0-left);
1120 }
1121 }
1122
1123 // element id's
1124 m_elementId.resize(getNumElements());
1125 #pragma omp parallel for
1126 for (dim_t k=0; k<getNumElements(); k++) {
1127 m_elementId[k]=k;
1128 }
1129
1130 // face element id's
1131 m_faceId.resize(getNumFaceElements());
1132 #pragma omp parallel for
1133 for (dim_t k=0; k<getNumFaceElements(); k++) {
1134 m_faceId[k]=k;
1135 }
1136
1137 // generate face offset vector and set face tags
1138 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1139 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
1140 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
1141 m_faceOffset.assign(facesPerEdge.size(), -1);
1142 m_faceTags.clear();
1143 index_t offset=0;
1144 for (size_t i=0; i<facesPerEdge.size(); i++) {
1145 if (facesPerEdge[i]>0) {
1146 m_faceOffset[i]=offset;
1147 offset+=facesPerEdge[i];
1148 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1149 }
1150 }
1151 setTagMap("left", LEFT);
1152 setTagMap("right", RIGHT);
1153 setTagMap("bottom", BOTTOM);
1154 setTagMap("top", TOP);
1155 updateTagsInUse(FaceElements);
1156 }
1157
1158 //private
1159 int Rectangle::insertNeighbours(IndexVector& index, index_t node) const
1160 {
1161 const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1162 const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1163 const int x=node%myN0;
1164 const int y=node/myN0;
1165 int num=0;
1166 if (y>0) {
1167 if (x>0) {
1168 // bottom-left
1169 index.push_back(node-myN0-1);
1170 num++;
1171 }
1172 // bottom
1173 index.push_back(node-myN0);
1174 num++;
1175 if (x<myN0-1) {
1176 // bottom-right
1177 index.push_back(node-myN0+1);
1178 num++;
1179 }
1180 }
1181 if (x<myN0-1) {
1182 // right
1183 index.push_back(node+1);
1184 num++;
1185 if (y<myN1-1) {
1186 // top-right
1187 index.push_back(node+myN0+1);
1188 num++;
1189 }
1190 }
1191 if (y<myN1-1) {
1192 // top
1193 index.push_back(node+myN0);
1194 num++;
1195 if (x>0) {
1196 // top-left
1197 index.push_back(node+myN0-1);
1198 num++;
1199 }
1200 }
1201 if (x>0) {
1202 // left
1203 index.push_back(node-1);
1204 num++;
1205 }
1206
1207 return num;
1208 }
1209
1210 //private
1211 void Rectangle::generateCouplePatterns(Paso_Pattern** colPattern, Paso_Pattern** rowPattern) const
1212 {
1213 IndexVector ptr(1,0);
1214 IndexVector index;
1215 const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1216 const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1217 const IndexVector faces=getNumFacesPerBoundary();
1218
1219 // bottom edge
1220 dim_t n=0;
1221 if (faces[0] == 0) {
1222 index.push_back(2*(myN0+myN1+1));
1223 index.push_back(2*(myN0+myN1+1)+1);
1224 n+=2;
1225 if (faces[2] == 0) {
1226 index.push_back(0);
1227 index.push_back(1);
1228 index.push_back(2);
1229 n+=3;
1230 }
1231 } else if (faces[2] == 0) {
1232 index.push_back(1);
1233 index.push_back(2);
1234 n+=2;
1235 }
1236 // n=neighbours of bottom-left corner node
1237 ptr.push_back(ptr.back()+n);
1238 n=0;
1239 if (faces[2] == 0) {
1240 for (dim_t i=1; i<myN0-1; i++) {
1241 index.push_back(i);
1242 index.push_back(i+1);
1243 index.push_back(i+2);
1244 ptr.push_back(ptr.back()+3);
1245 }
1246 index.push_back(myN0-1);
1247 index.push_back(myN0);
1248 n+=2;
1249 if (faces[1] == 0) {
1250 index.push_back(myN0+1);
1251 index.push_back(myN0+2);
1252 index.push_back(myN0+3);
1253 n+=3;
1254 }
1255 } else {
1256 for (dim_t i=1; i<myN0-1; i++) {
1257 ptr.push_back(ptr.back());
1258 }
1259 if (faces[1] == 0) {
1260 index.push_back(myN0+2);
1261 index.push_back(myN0+3);
1262 n+=2;
1263 }
1264 }
1265 // n=neighbours of bottom-right corner node
1266 ptr.push_back(ptr.back()+n);
1267
1268 // 2nd row to 2nd last row
1269 for (dim_t i=1; i<myN1-1; i++) {
1270 // left edge
1271 if (faces[0] == 0) {
1272 index.push_back(2*(myN0+myN1+2)-i);
1273 index.push_back(2*(myN0+myN1+2)-i-1);
1274 index.push_back(2*(myN0+myN1+2)-i-2);
1275 ptr.push_back(ptr.back()+3);
1276 } else {
1277 ptr.push_back(ptr.back());
1278 }
1279 for (dim_t j=1; j<myN0-1; j++) {
1280 ptr.push_back(ptr.back());
1281 }
1282 // right edge
1283 if (faces[1] == 0) {
1284 index.push_back(myN0+i+1);
1285 index.push_back(myN0+i+2);
1286 index.push_back(myN0+i+3);
1287 ptr.push_back(ptr.back()+3);
1288 } else {
1289 ptr.push_back(ptr.back());
1290 }
1291 }
1292
1293 // top edge
1294 n=0;
1295 if (faces[0] == 0) {
1296 index.push_back(2*myN0+myN1+5);
1297 index.push_back(2*myN0+myN1+4);
1298 n+=2;
1299 if (faces[3] == 0) {
1300 index.push_back(2*myN0+myN1+3);
1301 index.push_back(2*myN0+myN1+2);
1302 index.push_back(2*myN0+myN1+1);
1303 n+=3;
1304 }
1305 } else if (faces[3] == 0) {
1306 index.push_back(2*myN0+myN1+2);
1307 index.push_back(2*myN0+myN1+1);
1308 n+=2;
1309 }
1310 // n=neighbours of top-left corner node
1311 ptr.push_back(ptr.back()+n);
1312 n=0;
1313 if (faces[3] == 0) {
1314 for (dim_t i=1; i<myN0-1; i++) {
1315 index.push_back(2*myN0+myN1+i+1);
1316 index.push_back(2*myN0+myN1+i);
1317 index.push_back(2*myN0+myN1+i-1);
1318 ptr.push_back(ptr.back()+3);
1319 }
1320 index.push_back(myN0+myN1+4);
1321 index.push_back(myN0+myN1+3);
1322 n+=2;
1323 if (faces[1] == 0) {
1324 index.push_back(myN0+myN1+2);
1325 index.push_back(myN0+myN1+1);
1326 index.push_back(myN0+myN1);
1327 n+=3;
1328 }
1329 } else {
1330 for (dim_t i=1; i<myN0-1; i++) {
1331 ptr.push_back(ptr.back());
1332 }
1333 if (faces[1] == 0) {
1334 index.push_back(myN0+myN1+1);
1335 index.push_back(myN0+myN1);
1336 n+=2;
1337 }
1338 }
1339 // n=neighbours of top-right corner node
1340 ptr.push_back(ptr.back()+n);
1341
1342 dim_t M=ptr.size()-1;
1343 map<index_t,index_t> idMap;
1344 dim_t N=0;
1345 for (IndexVector::iterator it=index.begin(); it!=index.end(); it++) {
1346 if (idMap.find(*it)==idMap.end()) {
1347 idMap[*it]=N;
1348 *it=N++;
1349 } else {
1350 *it=idMap[*it];
1351 }
1352 }
1353
1354 /*
1355 cout << "--- colCouple_pattern ---" << endl;
1356 cout << "M=" << M << ", N=" << N << endl;
1357 for (size_t i=0; i<ptr.size(); i++) {
1358 cout << "ptr[" << i << "]=" << ptr[i] << endl;
1359 }
1360 for (size_t i=0; i<index.size(); i++) {
1361 cout << "index[" << i << "]=" << index[i] << endl;
1362 }
1363 */
1364
1365 // now build the row couple pattern
1366 IndexVector ptr2(1,0);
1367 IndexVector index2;
1368 for (dim_t id=0; id<N; id++) {
1369 n=0;
1370 for (dim_t i=0; i<M; i++) {
1371 for (dim_t j=ptr[i]; j<ptr[i+1]; j++) {
1372 if (index[j]==id) {
1373 index2.push_back(i);
1374 n++;
1375 break;
1376 }
1377 }
1378 }
1379 ptr2.push_back(ptr2.back()+n);
1380 }
1381
1382 /*
1383 cout << "--- rowCouple_pattern ---" << endl;
1384 cout << "M=" << N << ", N=" << M << endl;
1385 for (size_t i=0; i<ptr2.size(); i++) {
1386 cout << "ptr[" << i << "]=" << ptr2[i] << endl;
1387 }
1388 for (size_t i=0; i<index2.size(); i++) {
1389 cout << "index[" << i << "]=" << index2[i] << endl;
1390 }
1391 */
1392
1393 // paso will manage the memory
1394 index_t* indexC = MEMALLOC(index.size(), index_t);
1395 index_t* ptrC = MEMALLOC(ptr.size(), index_t);
1396 copy(index.begin(), index.end(), indexC);
1397 copy(ptr.begin(), ptr.end(), ptrC);
1398 *colPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, M, N, ptrC, indexC);
1399
1400 // paso will manage the memory
1401 indexC = MEMALLOC(index2.size(), index_t);
1402 ptrC = MEMALLOC(ptr2.size(), index_t);
1403 copy(index2.begin(), index2.end(), indexC);
1404 copy(ptr2.begin(), ptr2.end(), ptrC);
1405 *rowPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, N, M, ptrC, indexC);
1406 }
1407
1408 //protected
1409 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1410 escript::Data& in, bool reduced) const
1411 {
1412 const dim_t numComp = in.getDataPointSize();
1413 if (reduced) {
1414 /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1415 const double tmp0_0 = 0.25000000000000000000;
1416 #pragma omp parallel for
1417 for (index_t k1=0; k1 < m_NE1; ++k1) {
1418 for (index_t k0=0; k0 < m_NE0; ++k0) {
1419 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1420 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1421 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1422 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1423 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1424 for (index_t i=0; i < numComp; ++i) {
1425 o[INDEX2(i,numComp,0)] = tmp0_0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
1426 } /* end of component loop i */
1427 } /* end of k0 loop */
1428 } /* end of k1 loop */
1429 /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1430 } else {
1431 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1432 const double tmp0_2 = 0.62200846792814621559;
1433 const double tmp0_1 = 0.044658198738520451079;
1434 const double tmp0_0 = 0.16666666666666666667;
1435 #pragma omp parallel for
1436 for (index_t k1=0; k1 < m_NE1; ++k1) {
1437 for (index_t k0=0; k0 < m_NE0; ++k0) {
1438 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1439 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1440 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1441 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1442 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1443 for (index_t i=0; i < numComp; ++i) {
1444 o[INDEX2(i,numComp,0)] = f_00[i]*tmp0_2 + f_11[i]*tmp0_1 + tmp0_0*(f_01[i] + f_10[i]);
1445 o[INDEX2(i,numComp,1)] = f_01[i]*tmp0_1 + f_10[i]*tmp0_2 + tmp0_0*(f_00[i] + f_11[i]);
1446 o[INDEX2(i,numComp,2)] = f_01[i]*tmp0_2 + f_10[i]*tmp0_1 + tmp0_0*(f_00[i] + f_11[i]);
1447 o[INDEX2(i,numComp,3)] = f_00[i]*tmp0_1 + f_11[i]*tmp0_2 + tmp0_0*(f_01[i] + f_10[i]);
1448 } /* end of component loop i */
1449 } /* end of k0 loop */
1450 } /* end of k1 loop */
1451 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1452 }
1453 }
1454
1455 //protected
1456 void Rectangle::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1457 bool reduced) const
1458 {
1459 const dim_t numComp = in.getDataPointSize();
1460 if (reduced) {
1461 /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1462 if (m_faceOffset[0] > -1) {
1463 const double tmp0_0 = 0.50000000000000000000;
1464 #pragma omp parallel for
1465 for (index_t k1=0; k1 < m_NE1; ++k1) {
1466 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1467 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1468 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1469 for (index_t i=0; i < numComp; ++i) {
1470 o[INDEX2(i,numComp,0)] = tmp0_0*(f_00[i] + f_01[i]);
1471 } /* end of component loop i */
1472 } /* end of k1 loop */
1473 } /* end of face 0 */
1474 if (m_faceOffset[1] > -1) {
1475 const double tmp0_0 = 0.50000000000000000000;
1476 #pragma omp parallel for
1477 for (index_t k1=0; k1 < m_NE1; ++k1) {
1478 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1479 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1480 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1481 for (index_t i=0; i < numComp; ++i) {
1482 o[INDEX2(i,numComp,0)] = tmp0_0*(f_10[i] + f_11[i]);
1483 } /* end of component loop i */
1484 } /* end of k1 loop */
1485 } /* end of face 1 */
1486 if (m_faceOffset[2] > -1) {
1487 const double tmp0_0 = 0.50000000000000000000;
1488 #pragma omp parallel for
1489 for (index_t k0=0; k0 < m_NE0; ++k0) {
1490 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1491 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1492 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1493 for (index_t i=0; i < numComp; ++i) {
1494 o[INDEX2(i,numComp,0)] = tmp0_0*(f_00[i] + f_10[i]);
1495 } /* end of component loop i */
1496 } /* end of k0 loop */
1497 } /* end of face 2 */
1498 if (m_faceOffset[3] > -1) {
1499 const double tmp0_0 = 0.50000000000000000000;
1500 #pragma omp parallel for
1501 for (index_t k0=0; k0 < m_NE0; ++k0) {
1502 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1503 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1504 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1505 for (index_t i=0; i < numComp; ++i) {
1506 o[INDEX2(i,numComp,0)] = tmp0_0*(f_01[i] + f_11[i]);
1507 } /* end of component loop i */
1508 } /* end of k0 loop */
1509 } /* end of face 3 */
1510 /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */
1511 } else {
1512 /* GENERATOR SNIP_INTERPOLATE_FACES TOP */
1513 if (m_faceOffset[0] > -1) {
1514 const double tmp0_1 = 0.78867513459481288225;
1515 const double tmp0_0 = 0.21132486540518711775;
1516 #pragma omp parallel for
1517 for (index_t k1=0; k1 < m_NE1; ++k1) {
1518 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1519 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1520 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1521 for (index_t i=0; i < numComp; ++i) {
1522 o[INDEX2(i,numComp,0)] = f_00[i]*tmp0_1 + f_01[i]*tmp0_0;
1523 o[INDEX2(i,numComp,1)] = f_00[i]*tmp0_0 + f_01[i]*tmp0_1;
1524 } /* end of component loop i */
1525 } /* end of k1 loop */
1526 } /* end of face 0 */
1527 if (m_faceOffset[1] > -1) {
1528 const double tmp0_1 = 0.21132486540518711775;
1529 const double tmp0_0 = 0.78867513459481288225;
1530 #pragma omp parallel for
1531 for (index_t k1=0; k1 < m_NE1; ++k1) {
1532 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1533 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1534 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1535 for (index_t i=0; i < numComp; ++i) {
1536 o[INDEX2(i,numComp,0)] = f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
1537 o[INDEX2(i,numComp,1)] = f_10[i]*tmp0_1 + f_11[i]*tmp0_0;
1538 } /* end of component loop i */
1539 } /* end of k1 loop */
1540 } /* end of face 1 */
1541 if (m_faceOffset[2] > -1) {
1542 const double tmp0_1 = 0.78867513459481288225;
1543 const double tmp0_0 = 0.21132486540518711775;
1544 #pragma omp parallel for
1545 for (index_t k0=0; k0 < m_NE0; ++k0) {
1546 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1547 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1548 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1549 for (index_t i=0; i < numComp; ++i) {
1550 o[INDEX2(i,numComp,0)] = f_00[i]*tmp0_1 + f_10[i]*tmp0_0;
1551 o[INDEX2(i,numComp,1)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
1552 } /* end of component loop i */
1553 } /* end of k0 loop */
1554 } /* end of face 2 */
1555 if (m_faceOffset[3] > -1) {
1556 const double tmp0_1 = 0.78867513459481288225;
1557 const double tmp0_0 = 0.21132486540518711775;
1558 #pragma omp parallel for
1559 for (index_t k0=0; k0 < m_NE0; ++k0) {
1560 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1561 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1562 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1563 for (index_t i=0; i < numComp; ++i) {
1564 o[INDEX2(i,numComp,0)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
1565 o[INDEX2(i,numComp,1)] = f_01[i]*tmp0_0 + f_11[i]*tmp0_1;
1566 } /* end of component loop i */
1567 } /* end of k0 loop */
1568 } /* end of face 3 */
1569 /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */
1570 }
1571 }
1572
1573 } // end of namespace ripley
1574

  ViewVC Help
Powered by ViewVC 1.1.26