/[escript]/trunk/ripley/src/Rectangle.cpp
ViewVC logotype

Contents of /trunk/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3735 - (show annotations)
Fri Dec 9 05:03:08 2011 UTC (7 years, 10 months ago) by caltinay
Original Path: branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp
File size: 60197 byte(s)
Set initial node/element/face tags to 0 and updated tests.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2011 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 #include <ripley/Rectangle.h>
15 extern "C" {
16 #include "paso/SystemMatrixPattern.h"
17 }
18
19 #if USE_SILO
20 #include <silo.h>
21 #ifdef ESYS_MPI
22 #include <pmpio.h>
23 #endif
24 #endif
25
26 #include <iomanip>
27
28 using namespace std;
29
30 namespace ripley {
31
32 Rectangle::Rectangle(int n0, int n1, double l0, double l1, int d0, int d1) :
33 RipleyDomain(2),
34 m_gNE0(n0),
35 m_gNE1(n1),
36 m_l0(l0),
37 m_l1(l1),
38 m_NX(d0),
39 m_NY(d1)
40 {
41 // ensure number of subdivisions is valid and nodes can be distributed
42 // among number of ranks
43 if (m_NX*m_NY != m_mpiInfo->size)
44 throw RipleyException("Invalid number of spatial subdivisions");
45
46 if (n0%m_NX > 0 || n1%m_NY > 0)
47 throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
48
49 // local number of elements
50 m_NE0 = n0/m_NX;
51 m_NE1 = n1/m_NY;
52 // local number of nodes (not necessarily owned)
53 m_N0 = m_NE0+1;
54 m_N1 = m_NE1+1;
55 // bottom-left node is at (offset0,offset1) in global mesh
56 m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
57 m_offset1 = m_NE1*(m_mpiInfo->rank/m_NX);
58 populateSampleIds();
59 }
60
61 Rectangle::~Rectangle()
62 {
63 }
64
65 string Rectangle::getDescription() const
66 {
67 return "ripley::Rectangle";
68 }
69
70 bool Rectangle::operator==(const AbstractDomain& other) const
71 {
72 if (dynamic_cast<const Rectangle*>(&other))
73 return this==&other;
74
75 return false;
76 }
77
78 void Rectangle::dump(const string& fileName) const
79 {
80 #if USE_SILO
81 string fn(fileName);
82 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
83 fn+=".silo";
84 }
85
86 const int NUM_SILO_FILES = 1;
87 const char* blockDirFmt = "/block%04d";
88 int driver=DB_HDF5;
89 string siloPath;
90 DBfile* dbfile = NULL;
91
92 #ifdef ESYS_MPI
93 PMPIO_baton_t* baton = NULL;
94 #endif
95
96 if (m_mpiInfo->size > 1) {
97 #ifdef ESYS_MPI
98 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
99 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
100 PMPIO_DefaultClose, (void*)&driver);
101 // try the fallback driver in case of error
102 if (!baton && driver != DB_PDB) {
103 driver = DB_PDB;
104 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
105 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
106 PMPIO_DefaultClose, (void*)&driver);
107 }
108 if (baton) {
109 char str[64];
110 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
111 siloPath = str;
112 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
113 }
114 #endif
115 } else {
116 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
117 getDescription().c_str(), driver);
118 // try the fallback driver in case of error
119 if (!dbfile && driver != DB_PDB) {
120 driver = DB_PDB;
121 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
122 getDescription().c_str(), driver);
123 }
124 }
125
126 if (!dbfile)
127 throw RipleyException("dump: Could not create Silo file");
128
129 /*
130 if (driver==DB_HDF5) {
131 // gzip level 1 already provides good compression with minimal
132 // performance penalty. Some tests showed that gzip levels >3 performed
133 // rather badly on escript data both in terms of time and space
134 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
135 }
136 */
137
138 boost::scoped_ptr<double> x(new double[m_N0]);
139 boost::scoped_ptr<double> y(new double[m_N1]);
140 double* coords[2] = { x.get(), y.get() };
141 pair<double,double> xdx = getFirstCoordAndSpacing(0);
142 pair<double,double> ydy = getFirstCoordAndSpacing(1);
143 #pragma omp parallel
144 {
145 #pragma omp for nowait
146 for (dim_t i0 = 0; i0 < m_N0; i0++) {
147 coords[0][i0]=xdx.first+i0*xdx.second;
148 }
149 #pragma omp for nowait
150 for (dim_t i1 = 0; i1 < m_N1; i1++) {
151 coords[1][i1]=ydy.first+i1*ydy.second;
152 }
153 }
154 IndexVector dims = getNumNodesPerDim();
155
156 // write mesh
157 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE,
158 DB_COLLINEAR, NULL);
159
160 // write node ids
161 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2,
162 NULL, 0, DB_INT, DB_NODECENT, NULL);
163
164 // write element ids
165 dims = getNumElementsPerDim();
166 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
167 &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
168
169 // rank 0 writes multimesh and multivar
170 if (m_mpiInfo->rank == 0) {
171 vector<string> tempstrings;
172 vector<char*> names;
173 for (dim_t i=0; i<m_mpiInfo->size; i++) {
174 stringstream path;
175 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
176 tempstrings.push_back(path.str());
177 names.push_back((char*)tempstrings.back().c_str());
178 }
179 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
180 DBSetDir(dbfile, "/");
181 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
182 &types[0], NULL);
183 tempstrings.clear();
184 names.clear();
185 for (dim_t i=0; i<m_mpiInfo->size; i++) {
186 stringstream path;
187 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
188 tempstrings.push_back(path.str());
189 names.push_back((char*)tempstrings.back().c_str());
190 }
191 types.assign(m_mpiInfo->size, DB_QUADVAR);
192 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
193 &types[0], NULL);
194 tempstrings.clear();
195 names.clear();
196 for (dim_t i=0; i<m_mpiInfo->size; i++) {
197 stringstream path;
198 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
199 tempstrings.push_back(path.str());
200 names.push_back((char*)tempstrings.back().c_str());
201 }
202 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
203 &types[0], NULL);
204 }
205
206 if (m_mpiInfo->size > 1) {
207 #ifdef ESYS_MPI
208 PMPIO_HandOffBaton(baton, dbfile);
209 PMPIO_Finish(baton);
210 #endif
211 } else {
212 DBClose(dbfile);
213 }
214
215 #else // USE_SILO
216 throw RipleyException("dump(): no Silo support");
217 #endif
218 }
219
220 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
221 {
222 switch (fsType) {
223 case Nodes:
224 case DegreesOfFreedom: //FIXME
225 case ReducedDegreesOfFreedom: //FIXME
226 return &m_nodeId[0];
227 case Elements:
228 case ReducedElements:
229 return &m_elementId[0];
230 case FaceElements:
231 case ReducedFaceElements:
232 return &m_faceId[0];
233 default:
234 break;
235 }
236
237 stringstream msg;
238 msg << "borrowSampleReferenceIDs() not implemented for function space type "
239 << functionSpaceTypeAsString(fsType);
240 throw RipleyException(msg.str());
241 }
242
243 bool Rectangle::ownSample(int fsCode, index_t id) const
244 {
245 #ifdef ESYS_MPI
246 if (fsCode != ReducedNodes) {
247 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
248 const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
249 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
250 } else {
251 stringstream msg;
252 msg << "ownSample() not implemented for "
253 << functionSpaceTypeAsString(fsCode);
254 throw RipleyException(msg.str());
255 }
256 #else
257 return true;
258 #endif
259 }
260
261 void Rectangle::setToGradient(escript::Data& out, const escript::Data& cIn) const
262 {
263 escript::Data& in = *const_cast<escript::Data*>(&cIn);
264 const dim_t numComp = in.getDataPointSize();
265 const double h0 = m_l0/m_gNE0;
266 const double h1 = m_l1/m_gNE1;
267 const double cx0 = -1./h0;
268 const double cx1 = -.78867513459481288225/h0;
269 const double cx2 = -.5/h0;
270 const double cx3 = -.21132486540518711775/h0;
271 const double cx4 = .21132486540518711775/h0;
272 const double cx5 = .5/h0;
273 const double cx6 = .78867513459481288225/h0;
274 const double cx7 = 1./h0;
275 const double cy0 = -1./h1;
276 const double cy1 = -.78867513459481288225/h1;
277 const double cy2 = -.5/h1;
278 const double cy3 = -.21132486540518711775/h1;
279 const double cy4 = .21132486540518711775/h1;
280 const double cy5 = .5/h1;
281 const double cy6 = .78867513459481288225/h1;
282 const double cy7 = 1./h1;
283
284 if (out.getFunctionSpace().getTypeCode() == Elements) {
285 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
286 #pragma omp parallel for
287 for (index_t k1=0; k1 < m_NE1; ++k1) {
288 for (index_t k0=0; k0 < m_NE0; ++k0) {
289 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
290 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
291 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
292 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
293 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
294 for (index_t i=0; i < numComp; ++i) {
295 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
296 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
297 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
298 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
299 o[INDEX3(i,0,2,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
300 o[INDEX3(i,1,2,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
301 o[INDEX3(i,0,3,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
302 o[INDEX3(i,1,3,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
303 } /* end of component loop i */
304 } /* end of k0 loop */
305 } /* end of k1 loop */
306 /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
307 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
308 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
309 #pragma omp parallel for
310 for (index_t k1=0; k1 < m_NE1; ++k1) {
311 for (index_t k0=0; k0 < m_NE0; ++k0) {
312 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
313 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
314 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
315 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
316 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
317 for (index_t i=0; i < numComp; ++i) {
318 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
319 o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]);
320 } /* end of component loop i */
321 } /* end of k0 loop */
322 } /* end of k1 loop */
323 /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
324 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
325 #pragma omp parallel
326 {
327 /*** GENERATOR SNIP_GRAD_FACES TOP */
328 if (m_faceOffset[0] > -1) {
329 #pragma omp for nowait
330 for (index_t k1=0; k1 < m_NE1; ++k1) {
331 const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
332 const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
333 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
334 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
335 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
336 for (index_t i=0; i < numComp; ++i) {
337 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
338 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
339 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
340 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
341 } /* end of component loop i */
342 } /* end of k1 loop */
343 } /* end of face 0 */
344 if (m_faceOffset[1] > -1) {
345 #pragma omp for nowait
346 for (index_t k1=0; k1 < m_NE1; ++k1) {
347 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
348 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
349 const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
350 const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
351 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
352 for (index_t i=0; i < numComp; ++i) {
353 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
354 o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
355 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
356 o[INDEX3(i,1,1,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
357 } /* end of component loop i */
358 } /* end of k1 loop */
359 } /* end of face 1 */
360 if (m_faceOffset[2] > -1) {
361 #pragma omp for nowait
362 for (index_t k0=0; k0 < m_NE0; ++k0) {
363 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
364 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
365 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
366 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
367 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
368 for (index_t i=0; i < numComp; ++i) {
369 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
370 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
371 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
372 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
373 } /* end of component loop i */
374 } /* end of k0 loop */
375 } /* end of face 2 */
376 if (m_faceOffset[3] > -1) {
377 #pragma omp for nowait
378 for (index_t k0=0; k0 < m_NE0; ++k0) {
379 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
380 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
381 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
382 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
383 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
384 for (index_t i=0; i < numComp; ++i) {
385 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
386 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
387 o[INDEX3(i,0,1,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
388 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
389 } /* end of component loop i */
390 } /* end of k0 loop */
391 } /* end of face 3 */
392 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
393 } // end of parallel section
394 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
395 #pragma omp parallel
396 {
397 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
398 if (m_faceOffset[0] > -1) {
399 #pragma omp for nowait
400 for (index_t k1=0; k1 < m_NE1; ++k1) {
401 const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
402 const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
403 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
404 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
405 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
406 for (index_t i=0; i < numComp; ++i) {
407 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
408 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
409 } /* end of component loop i */
410 } /* end of k1 loop */
411 } /* end of face 0 */
412 if (m_faceOffset[1] > -1) {
413 #pragma omp for nowait
414 for (index_t k1=0; k1 < m_NE1; ++k1) {
415 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
416 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
417 const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
418 const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
419 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
420 for (index_t i=0; i < numComp; ++i) {
421 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
422 o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
423 } /* end of component loop i */
424 } /* end of k1 loop */
425 } /* end of face 1 */
426 if (m_faceOffset[2] > -1) {
427 #pragma omp for nowait
428 for (index_t k0=0; k0 < m_NE0; ++k0) {
429 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
430 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
431 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
432 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
433 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
434 for (index_t i=0; i < numComp; ++i) {
435 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
436 o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]);
437 } /* end of component loop i */
438 } /* end of k0 loop */
439 } /* end of face 2 */
440 if (m_faceOffset[3] > -1) {
441 #pragma omp for nowait
442 for (index_t k0=0; k0 < m_NE0; ++k0) {
443 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
444 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
445 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
446 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
447 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
448 for (index_t i=0; i < numComp; ++i) {
449 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
450 o[INDEX3(i,1,0,numComp,2)] = cy5*(f_01[i] + f_11[i]) + cy2*(f_00[i] + f_10[i]);
451 } /* end of component loop i */
452 } /* end of k0 loop */
453 } /* end of face 3 */
454 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
455 } // end of parallel section
456 } else {
457 stringstream msg;
458 msg << "setToGradient() not implemented for "
459 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
460 throw RipleyException(msg.str());
461 }
462 }
463
464 void Rectangle::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
465 {
466 escript::Data& in = *const_cast<escript::Data*>(&arg);
467 const dim_t numComp = in.getDataPointSize();
468 const double h0 = m_l0/m_gNE0;
469 const double h1 = m_l1/m_gNE1;
470 if (arg.getFunctionSpace().getTypeCode() == Elements) {
471 const double w_0 = h0*h1/4.;
472 #pragma omp parallel
473 {
474 vector<double> int_local(numComp, 0);
475 #pragma omp for nowait
476 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
477 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
478 const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
479 for (index_t i=0; i < numComp; ++i) {
480 const register double f_0 = f[INDEX2(i,0,numComp)];
481 const register double f_1 = f[INDEX2(i,1,numComp)];
482 const register double f_2 = f[INDEX2(i,2,numComp)];
483 const register double f_3 = f[INDEX2(i,3,numComp)];
484 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
485 } /* end of component loop i */
486 } /* end of k0 loop */
487 } /* end of k1 loop */
488
489 #pragma omp critical
490 for (index_t i=0; i<numComp; i++)
491 integrals[i]+=int_local[i];
492 } // end of parallel section
493 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
494 const double w_0 = h0*h1;
495 #pragma omp parallel
496 {
497 vector<double> int_local(numComp, 0);
498 #pragma omp for nowait
499 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
500 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
501 const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
502 for (index_t i=0; i < numComp; ++i) {
503 int_local[i]+=f[i]*w_0;
504 } /* end of component loop i */
505 } /* end of k0 loop */
506 } /* end of k1 loop */
507
508 #pragma omp critical
509 for (index_t i=0; i<numComp; i++)
510 integrals[i]+=int_local[i];
511 } // end of parallel section
512 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
513 const double w_0 = h0/2.;
514 const double w_1 = h1/2.;
515 #pragma omp parallel
516 {
517 vector<double> int_local(numComp, 0);
518 if (m_faceOffset[0] > -1) {
519 #pragma omp for nowait
520 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
521 const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
522 for (index_t i=0; i < numComp; ++i) {
523 const register double f_0 = f[INDEX2(i,0,numComp)];
524 const register double f_1 = f[INDEX2(i,1,numComp)];
525 int_local[i]+=(f_0+f_1)*w_1;
526 } /* end of component loop i */
527 } /* end of k1 loop */
528 }
529
530 if (m_faceOffset[1] > -1) {
531 #pragma omp for nowait
532 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
533 const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
534 for (index_t i=0; i < numComp; ++i) {
535 const register double f_0 = f[INDEX2(i,0,numComp)];
536 const register double f_1 = f[INDEX2(i,1,numComp)];
537 int_local[i]+=(f_0+f_1)*w_1;
538 } /* end of component loop i */
539 } /* end of k1 loop */
540 }
541
542 if (m_faceOffset[2] > -1) {
543 #pragma omp for nowait
544 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
545 const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
546 for (index_t i=0; i < numComp; ++i) {
547 const register double f_0 = f[INDEX2(i,0,numComp)];
548 const register double f_1 = f[INDEX2(i,1,numComp)];
549 int_local[i]+=(f_0+f_1)*w_0;
550 } /* end of component loop i */
551 } /* end of k0 loop */
552 }
553
554 if (m_faceOffset[3] > -1) {
555 #pragma omp for nowait
556 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
557 const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
558 for (index_t i=0; i < numComp; ++i) {
559 const register double f_0 = f[INDEX2(i,0,numComp)];
560 const register double f_1 = f[INDEX2(i,1,numComp)];
561 int_local[i]+=(f_0+f_1)*w_0;
562 } /* end of component loop i */
563 } /* end of k0 loop */
564 }
565
566 #pragma omp critical
567 for (index_t i=0; i<numComp; i++)
568 integrals[i]+=int_local[i];
569 } // end of parallel section
570 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
571 #pragma omp parallel
572 {
573 vector<double> int_local(numComp, 0);
574 if (m_faceOffset[0] > -1) {
575 #pragma omp for nowait
576 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
577 const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
578 for (index_t i=0; i < numComp; ++i) {
579 int_local[i]+=f[i]*h1;
580 } /* end of component loop i */
581 } /* end of k1 loop */
582 }
583
584 if (m_faceOffset[1] > -1) {
585 #pragma omp for nowait
586 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
587 const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
588 for (index_t i=0; i < numComp; ++i) {
589 int_local[i]+=f[i]*h1;
590 } /* end of component loop i */
591 } /* end of k1 loop */
592 }
593
594 if (m_faceOffset[2] > -1) {
595 #pragma omp for nowait
596 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
597 const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
598 for (index_t i=0; i < numComp; ++i) {
599 int_local[i]+=f[i]*h0;
600 } /* end of component loop i */
601 } /* end of k0 loop */
602 }
603
604 if (m_faceOffset[3] > -1) {
605 #pragma omp for nowait
606 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
607 const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
608 for (index_t i=0; i < numComp; ++i) {
609 int_local[i]+=f[i]*h0;
610 } /* end of component loop i */
611 } /* end of k0 loop */
612 }
613
614 #pragma omp critical
615 for (index_t i=0; i<numComp; i++)
616 integrals[i]+=int_local[i];
617 } // end of parallel section
618 } else {
619 stringstream msg;
620 msg << "setToIntegrals() not implemented for "
621 << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
622 throw RipleyException(msg.str());
623 }
624 }
625
626 void Rectangle::setToNormal(escript::Data& out) const
627 {
628 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
629 #pragma omp parallel
630 {
631 if (m_faceOffset[0] > -1) {
632 #pragma omp for nowait
633 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
634 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
635 // set vector at two quadrature points
636 *o++ = -1.;
637 *o++ = 0.;
638 *o++ = -1.;
639 *o = 0.;
640 }
641 }
642
643 if (m_faceOffset[1] > -1) {
644 #pragma omp for nowait
645 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
646 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
647 // set vector at two quadrature points
648 *o++ = 1.;
649 *o++ = 0.;
650 *o++ = 1.;
651 *o = 0.;
652 }
653 }
654
655 if (m_faceOffset[2] > -1) {
656 #pragma omp for nowait
657 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
658 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
659 // set vector at two quadrature points
660 *o++ = 0.;
661 *o++ = -1.;
662 *o++ = 0.;
663 *o = -1.;
664 }
665 }
666
667 if (m_faceOffset[3] > -1) {
668 #pragma omp for nowait
669 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
670 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
671 // set vector at two quadrature points
672 *o++ = 0.;
673 *o++ = 1.;
674 *o++ = 0.;
675 *o = 1.;
676 }
677 }
678 } // end of parallel section
679 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
680 #pragma omp parallel
681 {
682 if (m_faceOffset[0] > -1) {
683 #pragma omp for nowait
684 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
685 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
686 *o++ = -1.;
687 *o = 0.;
688 }
689 }
690
691 if (m_faceOffset[1] > -1) {
692 #pragma omp for nowait
693 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
694 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
695 *o++ = 1.;
696 *o = 0.;
697 }
698 }
699
700 if (m_faceOffset[2] > -1) {
701 #pragma omp for nowait
702 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
703 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
704 *o++ = 0.;
705 *o = -1.;
706 }
707 }
708
709 if (m_faceOffset[3] > -1) {
710 #pragma omp for nowait
711 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
712 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
713 *o++ = 0.;
714 *o = 1.;
715 }
716 }
717 } // end of parallel section
718
719 } else {
720 stringstream msg;
721 msg << "setToNormal() not implemented for "
722 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
723 throw RipleyException(msg.str());
724 }
725 }
726
727 void Rectangle::addPDEToSystem(escript::AbstractSystemMatrix& mat,
728 escript::Data& rhs, const escript::Data& A, const escript::Data& B,
729 const escript::Data& C, const escript::Data& D,
730 const escript::Data& X, const escript::Data& Y,
731 const escript::Data& d, const escript::Data& y,
732 const escript::Data& d_contact, const escript::Data& y_contact,
733 const escript::Data& d_dirac, const escript::Data& y_dirac) const
734 {
735 throw RipleyException("addPDEToSystem() not implemented");
736 }
737
738 Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder,
739 bool reducedColOrder) const
740 {
741 if (reducedRowOrder || reducedColOrder)
742 throw RipleyException("getPattern() not implemented for reduced order");
743
744 // connector
745 RankVector neighbour;
746 IndexVector offsetInShared(1,0);
747 IndexVector sendShared, recvShared;
748 const IndexVector faces=getNumFacesPerBoundary();
749 const index_t left = (m_offset0==0 ? 0 : 1);
750 const index_t bottom = (m_offset1==0 ? 0 : 1);
751 // corner node from bottom-left
752 if (faces[0] == 0 && faces[2] == 0) {
753 neighbour.push_back(m_mpiInfo->rank-m_NX-1);
754 offsetInShared.push_back(offsetInShared.back()+1);
755 sendShared.push_back(m_nodeId[m_N0+left]);
756 recvShared.push_back(m_nodeId[0]);
757 }
758 // bottom edge
759 if (faces[2] == 0) {
760 neighbour.push_back(m_mpiInfo->rank-m_NX);
761 offsetInShared.push_back(offsetInShared.back()+m_N0-left);
762 for (dim_t i=left; i<m_N0; i++) {
763 // easy case, we know the neighbour id's
764 sendShared.push_back(m_nodeId[m_N0+i]);
765 recvShared.push_back(m_nodeId[i]);
766 }
767 }
768 // corner node from bottom-right
769 if (faces[1] == 0 && faces[2] == 0) {
770 neighbour.push_back(m_mpiInfo->rank-m_NX+1);
771 const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
772 const index_t N1=(neighbour.back()/m_NX == 0 ? m_N1 : m_N1-1);
773 const index_t first=m_nodeDistribution[neighbour.back()];
774 offsetInShared.push_back(offsetInShared.back()+1);
775 sendShared.push_back(m_nodeId[(bottom+1)*m_N0-1]);
776 recvShared.push_back(first+N0*(N1-1));
777 }
778 // left edge
779 if (faces[0] == 0) {
780 neighbour.push_back(m_mpiInfo->rank-1);
781 offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
782 for (dim_t i=bottom; i<m_N1; i++) {
783 // easy case, we know the neighbour id's
784 sendShared.push_back(m_nodeId[i*m_N0+1]);
785 recvShared.push_back(m_nodeId[i*m_N0]);
786 }
787 }
788 // right edge
789 if (faces[1] == 0) {
790 neighbour.push_back(m_mpiInfo->rank+1);
791 const index_t rightN0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
792 const index_t first=m_nodeDistribution[neighbour.back()];
793 offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
794 for (dim_t i=bottom; i<m_N1; i++) {
795 sendShared.push_back(m_nodeId[(i+1)*m_N0-1]);
796 recvShared.push_back(first+rightN0*(i-bottom));
797 }
798 }
799 // corner node from top-left
800 if (faces[0] == 0 && faces[3] == 0) {
801 neighbour.push_back(m_mpiInfo->rank+m_NX-1);
802 const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
803 const index_t first=m_nodeDistribution[neighbour.back()];
804 offsetInShared.push_back(offsetInShared.back()+1);
805 sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+left]);
806 recvShared.push_back(first+N0-1);
807 }
808 // top edge
809 if (faces[3] == 0) {
810 neighbour.push_back(m_mpiInfo->rank+m_NX);
811 const index_t first=m_nodeDistribution[neighbour.back()];
812 offsetInShared.push_back(offsetInShared.back()+m_N0-left);
813 for (dim_t i=left; i<m_N0; i++) {
814 sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+i]);
815 recvShared.push_back(first+i-left);
816 }
817 }
818 // corner node from top-right
819 if (faces[1] == 0 && faces[3] == 0) {
820 neighbour.push_back(m_mpiInfo->rank+m_NX+1);
821 const index_t first=m_nodeDistribution[neighbour.back()];
822 offsetInShared.push_back(offsetInShared.back()+1);
823 sendShared.push_back(m_nodeId[m_N0*m_N1-1]);
824 recvShared.push_back(first);
825 }
826 const int numDOF=m_nodeDistribution[m_mpiInfo->rank+1]-m_nodeDistribution[m_mpiInfo->rank];
827 /*
828 cout << "--- rcv_shcomp ---" << endl;
829 cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
830 for (size_t i=0; i<neighbour.size(); i++) {
831 cout << "neighbor[" << i << "]=" << neighbour[i]
832 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
833 }
834 for (size_t i=0; i<recvShared.size(); i++) {
835 cout << "shared[" << i << "]=" << recvShared[i] << endl;
836 }
837 cout << "--- snd_shcomp ---" << endl;
838 for (size_t i=0; i<sendShared.size(); i++) {
839 cout << "shared[" << i << "]=" << sendShared[i] << endl;
840 }
841 */
842
843 Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
844 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
845 &offsetInShared[0], 1, 0, m_mpiInfo);
846 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
847 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
848 &offsetInShared[0], 1, 0, m_mpiInfo);
849 Paso_Connector* connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
850 Paso_SharedComponents_free(snd_shcomp);
851 Paso_SharedComponents_free(rcv_shcomp);
852
853 // create patterns
854 dim_t M, N;
855 IndexVector ptr(1,0);
856 IndexVector index;
857
858 // main pattern
859 for (index_t i=0; i<numDOF; i++) {
860 // always add the node itself
861 index.push_back(i);
862 int num=insertNeighbours(index, i);
863 ptr.push_back(ptr.back()+num+1);
864 }
865 M=N=ptr.size()-1;
866 // paso will manage the memory
867 index_t* indexC = MEMALLOC(index.size(),index_t);
868 index_t* ptrC = MEMALLOC(ptr.size(), index_t);
869 copy(index.begin(), index.end(), indexC);
870 copy(ptr.begin(), ptr.end(), ptrC);
871 Paso_Pattern *mainPattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
872 M, N, ptrC, indexC);
873
874 /*
875 cout << "--- main_pattern ---" << endl;
876 cout << "M=" << M << ", N=" << N << endl;
877 for (size_t i=0; i<ptr.size(); i++) {
878 cout << "ptr[" << i << "]=" << ptr[i] << endl;
879 }
880 for (size_t i=0; i<index.size(); i++) {
881 cout << "index[" << i << "]=" << index[i] << endl;
882 }
883 */
884
885 ptr.clear();
886 index.clear();
887
888 // column & row couple patterns
889 Paso_Pattern *colCouplePattern, *rowCouplePattern;
890 generateCouplePatterns(&colCouplePattern, &rowCouplePattern);
891
892 // allocate paso distribution
893 Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
894 const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
895
896 Paso_SystemMatrixPattern* pattern = Paso_SystemMatrixPattern_alloc(
897 MATRIX_FORMAT_DEFAULT, distribution, distribution,
898 mainPattern, colCouplePattern, rowCouplePattern,
899 connector, connector);
900 Paso_Pattern_free(mainPattern);
901 Paso_Pattern_free(colCouplePattern);
902 Paso_Pattern_free(rowCouplePattern);
903 Paso_Distribution_free(distribution);
904 return pattern;
905 }
906
907 void Rectangle::Print_Mesh_Info(const bool full) const
908 {
909 RipleyDomain::Print_Mesh_Info(full);
910 if (full) {
911 cout << " Id Coordinates" << endl;
912 cout.precision(15);
913 cout.setf(ios::scientific, ios::floatfield);
914 pair<double,double> xdx = getFirstCoordAndSpacing(0);
915 pair<double,double> ydy = getFirstCoordAndSpacing(1);
916 for (index_t i=0; i < getNumNodes(); i++) {
917 cout << " " << setw(5) << m_nodeId[i]
918 << " " << xdx.first+(i%m_N0)*xdx.second
919 << " " << ydy.first+(i/m_N0)*ydy.second << endl;
920 }
921 }
922 }
923
924 IndexVector Rectangle::getNumNodesPerDim() const
925 {
926 IndexVector ret;
927 ret.push_back(m_N0);
928 ret.push_back(m_N1);
929 return ret;
930 }
931
932 IndexVector Rectangle::getNumElementsPerDim() const
933 {
934 IndexVector ret;
935 ret.push_back(m_NE0);
936 ret.push_back(m_NE1);
937 return ret;
938 }
939
940 IndexVector Rectangle::getNumFacesPerBoundary() const
941 {
942 IndexVector ret(4, 0);
943 //left
944 if (m_offset0==0)
945 ret[0]=m_NE1;
946 //right
947 if (m_mpiInfo->rank%m_NX==m_NX-1)
948 ret[1]=m_NE1;
949 //bottom
950 if (m_offset1==0)
951 ret[2]=m_NE0;
952 //top
953 if (m_mpiInfo->rank/m_NX==m_NY-1)
954 ret[3]=m_NE0;
955 return ret;
956 }
957
958 pair<double,double> Rectangle::getFirstCoordAndSpacing(dim_t dim) const
959 {
960 if (dim==0) {
961 return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
962 } else if (dim==1) {
963 return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
964 }
965 throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
966 }
967
968 //protected
969 dim_t Rectangle::getNumFaceElements() const
970 {
971 const IndexVector faces = getNumFacesPerBoundary();
972 dim_t n=0;
973 for (size_t i=0; i<faces.size(); i++)
974 n+=faces[i];
975 return n;
976 }
977
978 //protected
979 void Rectangle::assembleCoordinates(escript::Data& arg) const
980 {
981 escriptDataC x = arg.getDataC();
982 int numDim = m_numDim;
983 if (!isDataPointShapeEqual(&x, 1, &numDim))
984 throw RipleyException("setToX: Invalid Data object shape");
985 if (!numSamplesEqual(&x, 1, getNumNodes()))
986 throw RipleyException("setToX: Illegal number of samples in Data object");
987
988 pair<double,double> xdx = getFirstCoordAndSpacing(0);
989 pair<double,double> ydy = getFirstCoordAndSpacing(1);
990 arg.requireWrite();
991 #pragma omp parallel for
992 for (dim_t i1 = 0; i1 < m_N1; i1++) {
993 for (dim_t i0 = 0; i0 < m_N0; i0++) {
994 double* point = arg.getSampleDataRW(i0+m_N0*i1);
995 point[0] = xdx.first+i0*xdx.second;
996 point[1] = ydy.first+i1*ydy.second;
997 }
998 }
999 }
1000
1001 //private
1002 void Rectangle::populateSampleIds()
1003 {
1004 // identifiers are ordered from left to right, bottom to top on each rank,
1005 // except for the shared nodes which are owned by the rank below / to the
1006 // left of the current rank
1007
1008 // build node distribution vector first.
1009 // m_nodeDistribution[i] is the first node id on rank i, that is
1010 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1011 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1012 m_nodeDistribution[1]=getNumNodes();
1013 for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
1014 const index_t x=k%m_NX;
1015 const index_t y=k/m_NX;
1016 index_t numNodes=getNumNodes();
1017 if (x>0)
1018 numNodes-=m_N1;
1019 if (y>0)
1020 numNodes-=m_N0;
1021 if (x>0 && y>0)
1022 numNodes++; // subtracted corner twice -> fix that
1023 m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
1024 }
1025 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1026
1027 m_nodeId.resize(getNumNodes());
1028
1029 // the bottom row and left column are not owned by this rank so the
1030 // identifiers need to be computed accordingly
1031 const index_t left = (m_offset0==0 ? 0 : 1);
1032 const index_t bottom = (m_offset1==0 ? 0 : 1);
1033 if (left>0) {
1034 const int neighbour=m_mpiInfo->rank-1;
1035 const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1036 #pragma omp parallel for
1037 for (dim_t i1=bottom; i1<m_N1; i1++) {
1038 m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
1039 + (i1-bottom+1)*leftN0-1;
1040 }
1041 }
1042 if (bottom>0) {
1043 const int neighbour=m_mpiInfo->rank-m_NX;
1044 const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1045 const index_t bottomN1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1046 #pragma omp parallel for
1047 for (dim_t i0=left; i0<m_N0; i0++) {
1048 m_nodeId[i0]=m_nodeDistribution[neighbour]
1049 + (bottomN1-1)*bottomN0 + i0 - left;
1050 }
1051 }
1052 if (left>0 && bottom>0) {
1053 const int neighbour=m_mpiInfo->rank-m_NX-1;
1054 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1055 const index_t N1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1056 m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1-1;
1057 }
1058
1059 // the rest of the id's are contiguous
1060 const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
1061 #pragma omp parallel for
1062 for (dim_t i1=bottom; i1<m_N1; i1++) {
1063 for (dim_t i0=left; i0<m_N0; i0++) {
1064 m_nodeId[i0+i1*m_N0] = firstId+i0-left+(i1-bottom)*(m_N0-left);
1065 }
1066 }
1067 m_nodeTags.assign(getNumNodes(), 0);
1068 updateTagsInUse(Nodes);
1069
1070 // elements
1071 m_elementId.resize(getNumElements());
1072 #pragma omp parallel for
1073 for (dim_t k=0; k<getNumElements(); k++) {
1074 m_elementId[k]=k;
1075 }
1076 m_elementTags.assign(getNumElements(), 0);
1077 updateTagsInUse(Elements);
1078
1079 // face element id's
1080 m_faceId.resize(getNumFaceElements());
1081 #pragma omp parallel for
1082 for (dim_t k=0; k<getNumFaceElements(); k++) {
1083 m_faceId[k]=k;
1084 }
1085
1086 // generate face offset vector and set face tags
1087 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1088 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
1089 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
1090 m_faceOffset.assign(facesPerEdge.size(), -1);
1091 m_faceTags.clear();
1092 index_t offset=0;
1093 for (size_t i=0; i<facesPerEdge.size(); i++) {
1094 if (facesPerEdge[i]>0) {
1095 m_faceOffset[i]=offset;
1096 offset+=facesPerEdge[i];
1097 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1098 }
1099 }
1100 setTagMap("left", LEFT);
1101 setTagMap("right", RIGHT);
1102 setTagMap("bottom", BOTTOM);
1103 setTagMap("top", TOP);
1104 updateTagsInUse(FaceElements);
1105 }
1106
1107 //private
1108 int Rectangle::insertNeighbours(IndexVector& index, index_t node) const
1109 {
1110 const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1111 const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1112 const int x=node%myN0;
1113 const int y=node/myN0;
1114 int num=0;
1115 if (y>0) {
1116 if (x>0) {
1117 // bottom-left
1118 index.push_back(node-myN0-1);
1119 num++;
1120 }
1121 // bottom
1122 index.push_back(node-myN0);
1123 num++;
1124 if (x<myN0-1) {
1125 // bottom-right
1126 index.push_back(node-myN0+1);
1127 num++;
1128 }
1129 }
1130 if (x<myN0-1) {
1131 // right
1132 index.push_back(node+1);
1133 num++;
1134 if (y<myN1-1) {
1135 // top-right
1136 index.push_back(node+myN0+1);
1137 num++;
1138 }
1139 }
1140 if (y<myN1-1) {
1141 // top
1142 index.push_back(node+myN0);
1143 num++;
1144 if (x>0) {
1145 // top-left
1146 index.push_back(node+myN0-1);
1147 num++;
1148 }
1149 }
1150 if (x>0) {
1151 // left
1152 index.push_back(node-1);
1153 num++;
1154 }
1155
1156 return num;
1157 }
1158
1159 //private
1160 void Rectangle::generateCouplePatterns(Paso_Pattern** colPattern, Paso_Pattern** rowPattern) const
1161 {
1162 IndexVector ptr(1,0);
1163 IndexVector index;
1164 const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1165 const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1166 const IndexVector faces=getNumFacesPerBoundary();
1167
1168 // bottom edge
1169 dim_t n=0;
1170 if (faces[0] == 0) {
1171 index.push_back(2*(myN0+myN1+1));
1172 index.push_back(2*(myN0+myN1+1)+1);
1173 n+=2;
1174 if (faces[2] == 0) {
1175 index.push_back(0);
1176 index.push_back(1);
1177 index.push_back(2);
1178 n+=3;
1179 }
1180 } else if (faces[2] == 0) {
1181 index.push_back(1);
1182 index.push_back(2);
1183 n+=2;
1184 }
1185 // n=neighbours of bottom-left corner node
1186 ptr.push_back(ptr.back()+n);
1187 n=0;
1188 if (faces[2] == 0) {
1189 for (dim_t i=1; i<myN0-1; i++) {
1190 index.push_back(i);
1191 index.push_back(i+1);
1192 index.push_back(i+2);
1193 ptr.push_back(ptr.back()+3);
1194 }
1195 index.push_back(myN0-1);
1196 index.push_back(myN0);
1197 n+=2;
1198 if (faces[1] == 0) {
1199 index.push_back(myN0+1);
1200 index.push_back(myN0+2);
1201 index.push_back(myN0+3);
1202 n+=3;
1203 }
1204 } else {
1205 for (dim_t i=1; i<myN0-1; i++) {
1206 ptr.push_back(ptr.back());
1207 }
1208 if (faces[1] == 0) {
1209 index.push_back(myN0+2);
1210 index.push_back(myN0+3);
1211 n+=2;
1212 }
1213 }
1214 // n=neighbours of bottom-right corner node
1215 ptr.push_back(ptr.back()+n);
1216
1217 // 2nd row to 2nd last row
1218 for (dim_t i=1; i<myN1-1; i++) {
1219 // left edge
1220 if (faces[0] == 0) {
1221 index.push_back(2*(myN0+myN1+2)-i);
1222 index.push_back(2*(myN0+myN1+2)-i-1);
1223 index.push_back(2*(myN0+myN1+2)-i-2);
1224 ptr.push_back(ptr.back()+3);
1225 } else {
1226 ptr.push_back(ptr.back());
1227 }
1228 for (dim_t j=1; j<myN0-1; j++) {
1229 ptr.push_back(ptr.back());
1230 }
1231 // right edge
1232 if (faces[1] == 0) {
1233 index.push_back(myN0+i+1);
1234 index.push_back(myN0+i+2);
1235 index.push_back(myN0+i+3);
1236 ptr.push_back(ptr.back()+3);
1237 } else {
1238 ptr.push_back(ptr.back());
1239 }
1240 }
1241
1242 // top edge
1243 n=0;
1244 if (faces[0] == 0) {
1245 index.push_back(2*myN0+myN1+5);
1246 index.push_back(2*myN0+myN1+4);
1247 n+=2;
1248 if (faces[3] == 0) {
1249 index.push_back(2*myN0+myN1+3);
1250 index.push_back(2*myN0+myN1+2);
1251 index.push_back(2*myN0+myN1+1);
1252 n+=3;
1253 }
1254 } else if (faces[3] == 0) {
1255 index.push_back(2*myN0+myN1+2);
1256 index.push_back(2*myN0+myN1+1);
1257 n+=2;
1258 }
1259 // n=neighbours of top-left corner node
1260 ptr.push_back(ptr.back()+n);
1261 n=0;
1262 if (faces[3] == 0) {
1263 for (dim_t i=1; i<myN0-1; i++) {
1264 index.push_back(2*myN0+myN1+i+1);
1265 index.push_back(2*myN0+myN1+i);
1266 index.push_back(2*myN0+myN1+i-1);
1267 ptr.push_back(ptr.back()+3);
1268 }
1269 index.push_back(myN0+myN1+4);
1270 index.push_back(myN0+myN1+3);
1271 n+=2;
1272 if (faces[1] == 0) {
1273 index.push_back(myN0+myN1+2);
1274 index.push_back(myN0+myN1+1);
1275 index.push_back(myN0+myN1);
1276 n+=3;
1277 }
1278 } else {
1279 for (dim_t i=1; i<myN0-1; i++) {
1280 ptr.push_back(ptr.back());
1281 }
1282 if (faces[1] == 0) {
1283 index.push_back(myN0+myN1+1);
1284 index.push_back(myN0+myN1);
1285 n+=2;
1286 }
1287 }
1288 // n=neighbours of top-right corner node
1289 ptr.push_back(ptr.back()+n);
1290
1291 dim_t M=ptr.size()-1;
1292 map<index_t,index_t> idMap;
1293 dim_t N=0;
1294 for (IndexVector::iterator it=index.begin(); it!=index.end(); it++) {
1295 if (idMap.find(*it)==idMap.end()) {
1296 idMap[*it]=N;
1297 *it=N++;
1298 } else {
1299 *it=idMap[*it];
1300 }
1301 }
1302
1303 /*
1304 cout << "--- colCouple_pattern ---" << endl;
1305 cout << "M=" << M << ", N=" << N << endl;
1306 for (size_t i=0; i<ptr.size(); i++) {
1307 cout << "ptr[" << i << "]=" << ptr[i] << endl;
1308 }
1309 for (size_t i=0; i<index.size(); i++) {
1310 cout << "index[" << i << "]=" << index[i] << endl;
1311 }
1312 */
1313
1314 // now build the row couple pattern
1315 IndexVector ptr2(1,0);
1316 IndexVector index2;
1317 for (dim_t id=0; id<N; id++) {
1318 n=0;
1319 for (dim_t i=0; i<M; i++) {
1320 for (dim_t j=ptr[i]; j<ptr[i+1]; j++) {
1321 if (index[j]==id) {
1322 index2.push_back(i);
1323 n++;
1324 break;
1325 }
1326 }
1327 }
1328 ptr2.push_back(ptr2.back()+n);
1329 }
1330
1331 /*
1332 cout << "--- rowCouple_pattern ---" << endl;
1333 cout << "M=" << N << ", N=" << M << endl;
1334 for (size_t i=0; i<ptr2.size(); i++) {
1335 cout << "ptr[" << i << "]=" << ptr2[i] << endl;
1336 }
1337 for (size_t i=0; i<index2.size(); i++) {
1338 cout << "index[" << i << "]=" << index2[i] << endl;
1339 }
1340 */
1341
1342 // paso will manage the memory
1343 index_t* indexC = MEMALLOC(index.size(), index_t);
1344 index_t* ptrC = MEMALLOC(ptr.size(), index_t);
1345 copy(index.begin(), index.end(), indexC);
1346 copy(ptr.begin(), ptr.end(), ptrC);
1347 *colPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, M, N, ptrC, indexC);
1348
1349 // paso will manage the memory
1350 indexC = MEMALLOC(index2.size(), index_t);
1351 ptrC = MEMALLOC(ptr2.size(), index_t);
1352 copy(index2.begin(), index2.end(), indexC);
1353 copy(ptr2.begin(), ptr2.end(), ptrC);
1354 *rowPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, N, M, ptrC, indexC);
1355 }
1356
1357 //protected
1358 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1359 escript::Data& in, bool reduced) const
1360 {
1361 const dim_t numComp = in.getDataPointSize();
1362 if (reduced) {
1363 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1364 const double c0 = .25;
1365 #pragma omp parallel for
1366 for (index_t k1=0; k1 < m_NE1; ++k1) {
1367 for (index_t k0=0; k0 < m_NE0; ++k0) {
1368 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1369 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1370 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1371 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1372 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1373 for (index_t i=0; i < numComp; ++i) {
1374 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
1375 } /* end of component loop i */
1376 } /* end of k0 loop */
1377 } /* end of k1 loop */
1378 /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1379 } else {
1380 /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1381 const double c0 = .16666666666666666667;
1382 const double c1 = .044658198738520451079;
1383 const double c2 = .62200846792814621559;
1384 #pragma omp parallel for
1385 for (index_t k1=0; k1 < m_NE1; ++k1) {
1386 for (index_t k0=0; k0 < m_NE0; ++k0) {
1387 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1388 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1389 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1390 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1391 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1392 for (index_t i=0; i < numComp; ++i) {
1393 o[INDEX2(i,numComp,0)] = f_00[i]*c2 + f_11[i]*c1 + c0*(f_01[i] + f_10[i]);
1394 o[INDEX2(i,numComp,1)] = f_01[i]*c1 + f_10[i]*c2 + c0*(f_00[i] + f_11[i]);
1395 o[INDEX2(i,numComp,2)] = f_01[i]*c2 + f_10[i]*c1 + c0*(f_00[i] + f_11[i]);
1396 o[INDEX2(i,numComp,3)] = f_00[i]*c1 + f_11[i]*c2 + c0*(f_01[i] + f_10[i]);
1397 } /* end of component loop i */
1398 } /* end of k0 loop */
1399 } /* end of k1 loop */
1400 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1401 }
1402 }
1403
1404 //protected
1405 void Rectangle::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1406 bool reduced) const
1407 {
1408 const dim_t numComp = in.getDataPointSize();
1409 if (reduced) {
1410 const double c0 = .5;
1411 #pragma omp parallel
1412 {
1413 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1414 if (m_faceOffset[0] > -1) {
1415 #pragma omp for nowait
1416 for (index_t k1=0; k1 < m_NE1; ++k1) {
1417 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1418 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1419 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1420 for (index_t i=0; i < numComp; ++i) {
1421 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i]);
1422 } /* end of component loop i */
1423 } /* end of k1 loop */
1424 } /* end of face 0 */
1425 if (m_faceOffset[1] > -1) {
1426 #pragma omp for nowait
1427 for (index_t k1=0; k1 < m_NE1; ++k1) {
1428 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1429 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1430 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1431 for (index_t i=0; i < numComp; ++i) {
1432 o[INDEX2(i,numComp,0)] = c0*(f_10[i] + f_11[i]);
1433 } /* end of component loop i */
1434 } /* end of k1 loop */
1435 } /* end of face 1 */
1436 if (m_faceOffset[2] > -1) {
1437 #pragma omp for nowait
1438 for (index_t k0=0; k0 < m_NE0; ++k0) {
1439 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1440 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1441 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1442 for (index_t i=0; i < numComp; ++i) {
1443 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_10[i]);
1444 } /* end of component loop i */
1445 } /* end of k0 loop */
1446 } /* end of face 2 */
1447 if (m_faceOffset[3] > -1) {
1448 #pragma omp for nowait
1449 for (index_t k0=0; k0 < m_NE0; ++k0) {
1450 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1451 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1452 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1453 for (index_t i=0; i < numComp; ++i) {
1454 o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_11[i]);
1455 } /* end of component loop i */
1456 } /* end of k0 loop */
1457 } /* end of face 3 */
1458 /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */
1459 } // end of parallel section
1460 } else {
1461 const double c0 = 0.21132486540518711775;
1462 const double c1 = 0.78867513459481288225;
1463 #pragma omp parallel
1464 {
1465 /*** GENERATOR SNIP_INTERPOLATE_FACES TOP */
1466 if (m_faceOffset[0] > -1) {
1467 #pragma omp for nowait
1468 for (index_t k1=0; k1 < m_NE1; ++k1) {
1469 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1470 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1471 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1472 for (index_t i=0; i < numComp; ++i) {
1473 o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_01[i]*c0;
1474 o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_01[i]*c1;
1475 } /* end of component loop i */
1476 } /* end of k1 loop */
1477 } /* end of face 0 */
1478 if (m_faceOffset[1] > -1) {
1479 #pragma omp for nowait
1480 for (index_t k1=0; k1 < m_NE1; ++k1) {
1481 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1482 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1483 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1484 for (index_t i=0; i < numComp; ++i) {
1485 o[INDEX2(i,numComp,0)] = f_10[i]*c1 + f_11[i]*c0;
1486 o[INDEX2(i,numComp,1)] = f_10[i]*c0 + f_11[i]*c1;
1487 } /* end of component loop i */
1488 } /* end of k1 loop */
1489 } /* end of face 1 */
1490 if (m_faceOffset[2] > -1) {
1491 #pragma omp for nowait
1492 for (index_t k0=0; k0 < m_NE0; ++k0) {
1493 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1494 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1495 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1496 for (index_t i=0; i < numComp; ++i) {
1497 o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_10[i]*c0;
1498 o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_10[i]*c1;
1499 } /* end of component loop i */
1500 } /* end of k0 loop */
1501 } /* end of face 2 */
1502 if (m_faceOffset[3] > -1) {
1503 #pragma omp for nowait
1504 for (index_t k0=0; k0 < m_NE0; ++k0) {
1505 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1506 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1507 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1508 for (index_t i=0; i < numComp; ++i) {
1509 o[INDEX2(i,numComp,0)] = f_01[i]*c1 + f_11[i]*c0;
1510 o[INDEX2(i,numComp,1)] = f_01[i]*c0 + f_11[i]*c1;
1511 } /* end of component loop i */
1512 } /* end of k0 loop */
1513 } /* end of face 3 */
1514 /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */
1515 } // end of parallel section
1516 }
1517 }
1518
1519 } // end of namespace ripley
1520

  ViewVC Help
Powered by ViewVC 1.1.26