/[escript]/trunk/ripley/src/Rectangle.cpp
ViewVC logotype

Contents of /trunk/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3746 - (show annotations)
Thu Dec 15 00:02:22 2011 UTC (7 years, 10 months ago) by caltinay
Original Path: branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp
File size: 60325 byte(s)
In Ripley Solution==ContinuousFunction and ReducedSolution==ReducedCF now.
Removed a test in escript that relied on failure when trying to tag Data on
Solution/ReducedSolution.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2011 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 #include <ripley/Rectangle.h>
15 extern "C" {
16 #include "paso/SystemMatrixPattern.h"
17 }
18
19 #if USE_SILO
20 #include <silo.h>
21 #ifdef ESYS_MPI
22 #include <pmpio.h>
23 #endif
24 #endif
25
26 #include <iomanip>
27
28 using namespace std;
29
30 namespace ripley {
31
32 Rectangle::Rectangle(int n0, int n1, double l0, double l1, int d0, int d1) :
33 RipleyDomain(2),
34 m_gNE0(n0),
35 m_gNE1(n1),
36 m_l0(l0),
37 m_l1(l1),
38 m_NX(d0),
39 m_NY(d1)
40 {
41 // ensure number of subdivisions is valid and nodes can be distributed
42 // among number of ranks
43 if (m_NX*m_NY != m_mpiInfo->size)
44 throw RipleyException("Invalid number of spatial subdivisions");
45
46 if (n0%m_NX > 0 || n1%m_NY > 0)
47 throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
48
49 // local number of elements
50 m_NE0 = n0/m_NX;
51 m_NE1 = n1/m_NY;
52 // local number of nodes (not necessarily owned)
53 m_N0 = m_NE0+1;
54 m_N1 = m_NE1+1;
55 // bottom-left node is at (offset0,offset1) in global mesh
56 m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
57 m_offset1 = m_NE1*(m_mpiInfo->rank/m_NX);
58 populateSampleIds();
59 }
60
61 Rectangle::~Rectangle()
62 {
63 }
64
65 string Rectangle::getDescription() const
66 {
67 return "ripley::Rectangle";
68 }
69
70 bool Rectangle::operator==(const AbstractDomain& other) const
71 {
72 const Rectangle* o=dynamic_cast<const Rectangle*>(&other);
73 if (o) {
74 return (RipleyDomain::operator==(other) &&
75 m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1
76 && m_l0==o->m_l0 && m_l1==o->m_l1
77 && m_NX==o->m_NX && m_NY==o->m_NY);
78 }
79
80 return false;
81 }
82
83 void Rectangle::dump(const string& fileName) const
84 {
85 #if USE_SILO
86 string fn(fileName);
87 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
88 fn+=".silo";
89 }
90
91 const int NUM_SILO_FILES = 1;
92 const char* blockDirFmt = "/block%04d";
93 int driver=DB_HDF5;
94 string siloPath;
95 DBfile* dbfile = NULL;
96
97 #ifdef ESYS_MPI
98 PMPIO_baton_t* baton = NULL;
99 #endif
100
101 if (m_mpiInfo->size > 1) {
102 #ifdef ESYS_MPI
103 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
104 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
105 PMPIO_DefaultClose, (void*)&driver);
106 // try the fallback driver in case of error
107 if (!baton && driver != DB_PDB) {
108 driver = DB_PDB;
109 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
110 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
111 PMPIO_DefaultClose, (void*)&driver);
112 }
113 if (baton) {
114 char str[64];
115 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
116 siloPath = str;
117 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
118 }
119 #endif
120 } else {
121 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
122 getDescription().c_str(), driver);
123 // try the fallback driver in case of error
124 if (!dbfile && driver != DB_PDB) {
125 driver = DB_PDB;
126 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
127 getDescription().c_str(), driver);
128 }
129 }
130
131 if (!dbfile)
132 throw RipleyException("dump: Could not create Silo file");
133
134 /*
135 if (driver==DB_HDF5) {
136 // gzip level 1 already provides good compression with minimal
137 // performance penalty. Some tests showed that gzip levels >3 performed
138 // rather badly on escript data both in terms of time and space
139 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
140 }
141 */
142
143 boost::scoped_ptr<double> x(new double[m_N0]);
144 boost::scoped_ptr<double> y(new double[m_N1]);
145 double* coords[2] = { x.get(), y.get() };
146 pair<double,double> xdx = getFirstCoordAndSpacing(0);
147 pair<double,double> ydy = getFirstCoordAndSpacing(1);
148 #pragma omp parallel
149 {
150 #pragma omp for nowait
151 for (dim_t i0 = 0; i0 < m_N0; i0++) {
152 coords[0][i0]=xdx.first+i0*xdx.second;
153 }
154 #pragma omp for nowait
155 for (dim_t i1 = 0; i1 < m_N1; i1++) {
156 coords[1][i1]=ydy.first+i1*ydy.second;
157 }
158 }
159 IndexVector dims = getNumNodesPerDim();
160
161 // write mesh
162 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE,
163 DB_COLLINEAR, NULL);
164
165 // write node ids
166 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2,
167 NULL, 0, DB_INT, DB_NODECENT, NULL);
168
169 // write element ids
170 dims = getNumElementsPerDim();
171 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
172 &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
173
174 // rank 0 writes multimesh and multivar
175 if (m_mpiInfo->rank == 0) {
176 vector<string> tempstrings;
177 vector<char*> names;
178 for (dim_t i=0; i<m_mpiInfo->size; i++) {
179 stringstream path;
180 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
181 tempstrings.push_back(path.str());
182 names.push_back((char*)tempstrings.back().c_str());
183 }
184 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
185 DBSetDir(dbfile, "/");
186 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
187 &types[0], NULL);
188 tempstrings.clear();
189 names.clear();
190 for (dim_t i=0; i<m_mpiInfo->size; i++) {
191 stringstream path;
192 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
193 tempstrings.push_back(path.str());
194 names.push_back((char*)tempstrings.back().c_str());
195 }
196 types.assign(m_mpiInfo->size, DB_QUADVAR);
197 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
198 &types[0], NULL);
199 tempstrings.clear();
200 names.clear();
201 for (dim_t i=0; i<m_mpiInfo->size; i++) {
202 stringstream path;
203 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
204 tempstrings.push_back(path.str());
205 names.push_back((char*)tempstrings.back().c_str());
206 }
207 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
208 &types[0], NULL);
209 }
210
211 if (m_mpiInfo->size > 1) {
212 #ifdef ESYS_MPI
213 PMPIO_HandOffBaton(baton, dbfile);
214 PMPIO_Finish(baton);
215 #endif
216 } else {
217 DBClose(dbfile);
218 }
219
220 #else // USE_SILO
221 throw RipleyException("dump(): no Silo support");
222 #endif
223 }
224
225 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
226 {
227 switch (fsType) {
228 case Nodes:
229 return &m_nodeId[0];
230 case Elements:
231 case ReducedElements:
232 return &m_elementId[0];
233 case FaceElements:
234 case ReducedFaceElements:
235 return &m_faceId[0];
236 default:
237 break;
238 }
239
240 stringstream msg;
241 msg << "borrowSampleReferenceIDs() not implemented for function space type "
242 << functionSpaceTypeAsString(fsType);
243 throw RipleyException(msg.str());
244 }
245
246 bool Rectangle::ownSample(int fsCode, index_t id) const
247 {
248 #ifdef ESYS_MPI
249 if (fsCode != ReducedNodes) {
250 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
251 const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
252 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
253 } else {
254 stringstream msg;
255 msg << "ownSample() not implemented for "
256 << functionSpaceTypeAsString(fsCode);
257 throw RipleyException(msg.str());
258 }
259 #else
260 return true;
261 #endif
262 }
263
264 void Rectangle::setToGradient(escript::Data& out, const escript::Data& cIn) const
265 {
266 escript::Data& in = *const_cast<escript::Data*>(&cIn);
267 const dim_t numComp = in.getDataPointSize();
268 const double h0 = m_l0/m_gNE0;
269 const double h1 = m_l1/m_gNE1;
270 const double cx0 = -1./h0;
271 const double cx1 = -.78867513459481288225/h0;
272 const double cx2 = -.5/h0;
273 const double cx3 = -.21132486540518711775/h0;
274 const double cx4 = .21132486540518711775/h0;
275 const double cx5 = .5/h0;
276 const double cx6 = .78867513459481288225/h0;
277 const double cx7 = 1./h0;
278 const double cy0 = -1./h1;
279 const double cy1 = -.78867513459481288225/h1;
280 const double cy2 = -.5/h1;
281 const double cy3 = -.21132486540518711775/h1;
282 const double cy4 = .21132486540518711775/h1;
283 const double cy5 = .5/h1;
284 const double cy6 = .78867513459481288225/h1;
285 const double cy7 = 1./h1;
286
287 if (out.getFunctionSpace().getTypeCode() == Elements) {
288 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
289 #pragma omp parallel for
290 for (index_t k1=0; k1 < m_NE1; ++k1) {
291 for (index_t k0=0; k0 < m_NE0; ++k0) {
292 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
293 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
294 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
295 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
296 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
297 for (index_t i=0; i < numComp; ++i) {
298 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
299 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
300 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
301 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
302 o[INDEX3(i,0,2,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
303 o[INDEX3(i,1,2,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
304 o[INDEX3(i,0,3,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
305 o[INDEX3(i,1,3,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
306 } /* end of component loop i */
307 } /* end of k0 loop */
308 } /* end of k1 loop */
309 /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
310 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
311 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
312 #pragma omp parallel for
313 for (index_t k1=0; k1 < m_NE1; ++k1) {
314 for (index_t k0=0; k0 < m_NE0; ++k0) {
315 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
316 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
317 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
318 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
319 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
320 for (index_t i=0; i < numComp; ++i) {
321 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
322 o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]);
323 } /* end of component loop i */
324 } /* end of k0 loop */
325 } /* end of k1 loop */
326 /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
327 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
328 #pragma omp parallel
329 {
330 /*** GENERATOR SNIP_GRAD_FACES TOP */
331 if (m_faceOffset[0] > -1) {
332 #pragma omp for nowait
333 for (index_t k1=0; k1 < m_NE1; ++k1) {
334 const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
335 const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
336 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
337 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
338 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
339 for (index_t i=0; i < numComp; ++i) {
340 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
341 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
342 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
343 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
344 } /* end of component loop i */
345 } /* end of k1 loop */
346 } /* end of face 0 */
347 if (m_faceOffset[1] > -1) {
348 #pragma omp for nowait
349 for (index_t k1=0; k1 < m_NE1; ++k1) {
350 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
351 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
352 const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
353 const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
354 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
355 for (index_t i=0; i < numComp; ++i) {
356 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
357 o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
358 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
359 o[INDEX3(i,1,1,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
360 } /* end of component loop i */
361 } /* end of k1 loop */
362 } /* end of face 1 */
363 if (m_faceOffset[2] > -1) {
364 #pragma omp for nowait
365 for (index_t k0=0; k0 < m_NE0; ++k0) {
366 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
367 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
368 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
369 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
370 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
371 for (index_t i=0; i < numComp; ++i) {
372 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
373 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
374 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
375 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
376 } /* end of component loop i */
377 } /* end of k0 loop */
378 } /* end of face 2 */
379 if (m_faceOffset[3] > -1) {
380 #pragma omp for nowait
381 for (index_t k0=0; k0 < m_NE0; ++k0) {
382 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
383 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
384 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
385 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
386 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
387 for (index_t i=0; i < numComp; ++i) {
388 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
389 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
390 o[INDEX3(i,0,1,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
391 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
392 } /* end of component loop i */
393 } /* end of k0 loop */
394 } /* end of face 3 */
395 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
396 } // end of parallel section
397 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
398 #pragma omp parallel
399 {
400 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
401 if (m_faceOffset[0] > -1) {
402 #pragma omp for nowait
403 for (index_t k1=0; k1 < m_NE1; ++k1) {
404 const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
405 const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
406 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
407 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
408 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
409 for (index_t i=0; i < numComp; ++i) {
410 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
411 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
412 } /* end of component loop i */
413 } /* end of k1 loop */
414 } /* end of face 0 */
415 if (m_faceOffset[1] > -1) {
416 #pragma omp for nowait
417 for (index_t k1=0; k1 < m_NE1; ++k1) {
418 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
419 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
420 const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
421 const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
422 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
423 for (index_t i=0; i < numComp; ++i) {
424 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
425 o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
426 } /* end of component loop i */
427 } /* end of k1 loop */
428 } /* end of face 1 */
429 if (m_faceOffset[2] > -1) {
430 #pragma omp for nowait
431 for (index_t k0=0; k0 < m_NE0; ++k0) {
432 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
433 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
434 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
435 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
436 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
437 for (index_t i=0; i < numComp; ++i) {
438 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
439 o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]);
440 } /* end of component loop i */
441 } /* end of k0 loop */
442 } /* end of face 2 */
443 if (m_faceOffset[3] > -1) {
444 #pragma omp for nowait
445 for (index_t k0=0; k0 < m_NE0; ++k0) {
446 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
447 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
448 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
449 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
450 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
451 for (index_t i=0; i < numComp; ++i) {
452 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
453 o[INDEX3(i,1,0,numComp,2)] = cy5*(f_01[i] + f_11[i]) + cy2*(f_00[i] + f_10[i]);
454 } /* end of component loop i */
455 } /* end of k0 loop */
456 } /* end of face 3 */
457 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
458 } // end of parallel section
459 } else {
460 stringstream msg;
461 msg << "setToGradient() not implemented for "
462 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
463 throw RipleyException(msg.str());
464 }
465 }
466
467 void Rectangle::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
468 {
469 escript::Data& in = *const_cast<escript::Data*>(&arg);
470 const dim_t numComp = in.getDataPointSize();
471 const double h0 = m_l0/m_gNE0;
472 const double h1 = m_l1/m_gNE1;
473 if (arg.getFunctionSpace().getTypeCode() == Elements) {
474 const double w_0 = h0*h1/4.;
475 #pragma omp parallel
476 {
477 vector<double> int_local(numComp, 0);
478 #pragma omp for nowait
479 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
480 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
481 const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
482 for (index_t i=0; i < numComp; ++i) {
483 const register double f_0 = f[INDEX2(i,0,numComp)];
484 const register double f_1 = f[INDEX2(i,1,numComp)];
485 const register double f_2 = f[INDEX2(i,2,numComp)];
486 const register double f_3 = f[INDEX2(i,3,numComp)];
487 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
488 } /* end of component loop i */
489 } /* end of k0 loop */
490 } /* end of k1 loop */
491
492 #pragma omp critical
493 for (index_t i=0; i<numComp; i++)
494 integrals[i]+=int_local[i];
495 } // end of parallel section
496 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
497 const double w_0 = h0*h1;
498 #pragma omp parallel
499 {
500 vector<double> int_local(numComp, 0);
501 #pragma omp for nowait
502 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
503 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
504 const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
505 for (index_t i=0; i < numComp; ++i) {
506 int_local[i]+=f[i]*w_0;
507 } /* end of component loop i */
508 } /* end of k0 loop */
509 } /* end of k1 loop */
510
511 #pragma omp critical
512 for (index_t i=0; i<numComp; i++)
513 integrals[i]+=int_local[i];
514 } // end of parallel section
515 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
516 const double w_0 = h0/2.;
517 const double w_1 = h1/2.;
518 #pragma omp parallel
519 {
520 vector<double> int_local(numComp, 0);
521 if (m_faceOffset[0] > -1) {
522 #pragma omp for nowait
523 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
524 const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
525 for (index_t i=0; i < numComp; ++i) {
526 const register double f_0 = f[INDEX2(i,0,numComp)];
527 const register double f_1 = f[INDEX2(i,1,numComp)];
528 int_local[i]+=(f_0+f_1)*w_1;
529 } /* end of component loop i */
530 } /* end of k1 loop */
531 }
532
533 if (m_faceOffset[1] > -1) {
534 #pragma omp for nowait
535 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
536 const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
537 for (index_t i=0; i < numComp; ++i) {
538 const register double f_0 = f[INDEX2(i,0,numComp)];
539 const register double f_1 = f[INDEX2(i,1,numComp)];
540 int_local[i]+=(f_0+f_1)*w_1;
541 } /* end of component loop i */
542 } /* end of k1 loop */
543 }
544
545 if (m_faceOffset[2] > -1) {
546 #pragma omp for nowait
547 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
548 const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
549 for (index_t i=0; i < numComp; ++i) {
550 const register double f_0 = f[INDEX2(i,0,numComp)];
551 const register double f_1 = f[INDEX2(i,1,numComp)];
552 int_local[i]+=(f_0+f_1)*w_0;
553 } /* end of component loop i */
554 } /* end of k0 loop */
555 }
556
557 if (m_faceOffset[3] > -1) {
558 #pragma omp for nowait
559 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
560 const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
561 for (index_t i=0; i < numComp; ++i) {
562 const register double f_0 = f[INDEX2(i,0,numComp)];
563 const register double f_1 = f[INDEX2(i,1,numComp)];
564 int_local[i]+=(f_0+f_1)*w_0;
565 } /* end of component loop i */
566 } /* end of k0 loop */
567 }
568
569 #pragma omp critical
570 for (index_t i=0; i<numComp; i++)
571 integrals[i]+=int_local[i];
572 } // end of parallel section
573 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
574 #pragma omp parallel
575 {
576 vector<double> int_local(numComp, 0);
577 if (m_faceOffset[0] > -1) {
578 #pragma omp for nowait
579 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
580 const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
581 for (index_t i=0; i < numComp; ++i) {
582 int_local[i]+=f[i]*h1;
583 } /* end of component loop i */
584 } /* end of k1 loop */
585 }
586
587 if (m_faceOffset[1] > -1) {
588 #pragma omp for nowait
589 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
590 const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
591 for (index_t i=0; i < numComp; ++i) {
592 int_local[i]+=f[i]*h1;
593 } /* end of component loop i */
594 } /* end of k1 loop */
595 }
596
597 if (m_faceOffset[2] > -1) {
598 #pragma omp for nowait
599 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
600 const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
601 for (index_t i=0; i < numComp; ++i) {
602 int_local[i]+=f[i]*h0;
603 } /* end of component loop i */
604 } /* end of k0 loop */
605 }
606
607 if (m_faceOffset[3] > -1) {
608 #pragma omp for nowait
609 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
610 const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
611 for (index_t i=0; i < numComp; ++i) {
612 int_local[i]+=f[i]*h0;
613 } /* end of component loop i */
614 } /* end of k0 loop */
615 }
616
617 #pragma omp critical
618 for (index_t i=0; i<numComp; i++)
619 integrals[i]+=int_local[i];
620 } // end of parallel section
621 } else {
622 stringstream msg;
623 msg << "setToIntegrals() not implemented for "
624 << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
625 throw RipleyException(msg.str());
626 }
627 }
628
629 void Rectangle::setToNormal(escript::Data& out) const
630 {
631 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
632 #pragma omp parallel
633 {
634 if (m_faceOffset[0] > -1) {
635 #pragma omp for nowait
636 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
637 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
638 // set vector at two quadrature points
639 *o++ = -1.;
640 *o++ = 0.;
641 *o++ = -1.;
642 *o = 0.;
643 }
644 }
645
646 if (m_faceOffset[1] > -1) {
647 #pragma omp for nowait
648 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
649 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
650 // set vector at two quadrature points
651 *o++ = 1.;
652 *o++ = 0.;
653 *o++ = 1.;
654 *o = 0.;
655 }
656 }
657
658 if (m_faceOffset[2] > -1) {
659 #pragma omp for nowait
660 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
661 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
662 // set vector at two quadrature points
663 *o++ = 0.;
664 *o++ = -1.;
665 *o++ = 0.;
666 *o = -1.;
667 }
668 }
669
670 if (m_faceOffset[3] > -1) {
671 #pragma omp for nowait
672 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
673 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
674 // set vector at two quadrature points
675 *o++ = 0.;
676 *o++ = 1.;
677 *o++ = 0.;
678 *o = 1.;
679 }
680 }
681 } // end of parallel section
682 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
683 #pragma omp parallel
684 {
685 if (m_faceOffset[0] > -1) {
686 #pragma omp for nowait
687 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
688 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
689 *o++ = -1.;
690 *o = 0.;
691 }
692 }
693
694 if (m_faceOffset[1] > -1) {
695 #pragma omp for nowait
696 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
697 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
698 *o++ = 1.;
699 *o = 0.;
700 }
701 }
702
703 if (m_faceOffset[2] > -1) {
704 #pragma omp for nowait
705 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
706 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
707 *o++ = 0.;
708 *o = -1.;
709 }
710 }
711
712 if (m_faceOffset[3] > -1) {
713 #pragma omp for nowait
714 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
715 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
716 *o++ = 0.;
717 *o = 1.;
718 }
719 }
720 } // end of parallel section
721
722 } else {
723 stringstream msg;
724 msg << "setToNormal() not implemented for "
725 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
726 throw RipleyException(msg.str());
727 }
728 }
729
730 void Rectangle::addPDEToSystem(escript::AbstractSystemMatrix& mat,
731 escript::Data& rhs, const escript::Data& A, const escript::Data& B,
732 const escript::Data& C, const escript::Data& D,
733 const escript::Data& X, const escript::Data& Y,
734 const escript::Data& d, const escript::Data& y,
735 const escript::Data& d_contact, const escript::Data& y_contact,
736 const escript::Data& d_dirac, const escript::Data& y_dirac) const
737 {
738 throw RipleyException("addPDEToSystem() not implemented");
739 }
740
741 Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder,
742 bool reducedColOrder) const
743 {
744 if (reducedRowOrder || reducedColOrder)
745 throw RipleyException("getPattern() not implemented for reduced order");
746
747 // connector
748 RankVector neighbour;
749 IndexVector offsetInShared(1,0);
750 IndexVector sendShared, recvShared;
751 const IndexVector faces=getNumFacesPerBoundary();
752 const index_t left = (m_offset0==0 ? 0 : 1);
753 const index_t bottom = (m_offset1==0 ? 0 : 1);
754 // corner node from bottom-left
755 if (faces[0] == 0 && faces[2] == 0) {
756 neighbour.push_back(m_mpiInfo->rank-m_NX-1);
757 offsetInShared.push_back(offsetInShared.back()+1);
758 sendShared.push_back(m_nodeId[m_N0+left]);
759 recvShared.push_back(m_nodeId[0]);
760 }
761 // bottom edge
762 if (faces[2] == 0) {
763 neighbour.push_back(m_mpiInfo->rank-m_NX);
764 offsetInShared.push_back(offsetInShared.back()+m_N0-left);
765 for (dim_t i=left; i<m_N0; i++) {
766 // easy case, we know the neighbour id's
767 sendShared.push_back(m_nodeId[m_N0+i]);
768 recvShared.push_back(m_nodeId[i]);
769 }
770 }
771 // corner node from bottom-right
772 if (faces[1] == 0 && faces[2] == 0) {
773 neighbour.push_back(m_mpiInfo->rank-m_NX+1);
774 const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
775 const index_t N1=(neighbour.back()/m_NX == 0 ? m_N1 : m_N1-1);
776 const index_t first=m_nodeDistribution[neighbour.back()];
777 offsetInShared.push_back(offsetInShared.back()+1);
778 sendShared.push_back(m_nodeId[(bottom+1)*m_N0-1]);
779 recvShared.push_back(first+N0*(N1-1));
780 }
781 // left edge
782 if (faces[0] == 0) {
783 neighbour.push_back(m_mpiInfo->rank-1);
784 offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
785 for (dim_t i=bottom; i<m_N1; i++) {
786 // easy case, we know the neighbour id's
787 sendShared.push_back(m_nodeId[i*m_N0+1]);
788 recvShared.push_back(m_nodeId[i*m_N0]);
789 }
790 }
791 // right edge
792 if (faces[1] == 0) {
793 neighbour.push_back(m_mpiInfo->rank+1);
794 const index_t rightN0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
795 const index_t first=m_nodeDistribution[neighbour.back()];
796 offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
797 for (dim_t i=bottom; i<m_N1; i++) {
798 sendShared.push_back(m_nodeId[(i+1)*m_N0-1]);
799 recvShared.push_back(first+rightN0*(i-bottom));
800 }
801 }
802 // corner node from top-left
803 if (faces[0] == 0 && faces[3] == 0) {
804 neighbour.push_back(m_mpiInfo->rank+m_NX-1);
805 const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
806 const index_t first=m_nodeDistribution[neighbour.back()];
807 offsetInShared.push_back(offsetInShared.back()+1);
808 sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+left]);
809 recvShared.push_back(first+N0-1);
810 }
811 // top edge
812 if (faces[3] == 0) {
813 neighbour.push_back(m_mpiInfo->rank+m_NX);
814 const index_t first=m_nodeDistribution[neighbour.back()];
815 offsetInShared.push_back(offsetInShared.back()+m_N0-left);
816 for (dim_t i=left; i<m_N0; i++) {
817 sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+i]);
818 recvShared.push_back(first+i-left);
819 }
820 }
821 // corner node from top-right
822 if (faces[1] == 0 && faces[3] == 0) {
823 neighbour.push_back(m_mpiInfo->rank+m_NX+1);
824 const index_t first=m_nodeDistribution[neighbour.back()];
825 offsetInShared.push_back(offsetInShared.back()+1);
826 sendShared.push_back(m_nodeId[m_N0*m_N1-1]);
827 recvShared.push_back(first);
828 }
829 const int numDOF=m_nodeDistribution[m_mpiInfo->rank+1]-m_nodeDistribution[m_mpiInfo->rank];
830 /*
831 cout << "--- rcv_shcomp ---" << endl;
832 cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
833 for (size_t i=0; i<neighbour.size(); i++) {
834 cout << "neighbor[" << i << "]=" << neighbour[i]
835 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
836 }
837 for (size_t i=0; i<recvShared.size(); i++) {
838 cout << "shared[" << i << "]=" << recvShared[i] << endl;
839 }
840 cout << "--- snd_shcomp ---" << endl;
841 for (size_t i=0; i<sendShared.size(); i++) {
842 cout << "shared[" << i << "]=" << sendShared[i] << endl;
843 }
844 */
845
846 Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
847 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
848 &offsetInShared[0], 1, 0, m_mpiInfo);
849 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
850 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
851 &offsetInShared[0], 1, 0, m_mpiInfo);
852 Paso_Connector* connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
853 Paso_SharedComponents_free(snd_shcomp);
854 Paso_SharedComponents_free(rcv_shcomp);
855
856 // create patterns
857 dim_t M, N;
858 IndexVector ptr(1,0);
859 IndexVector index;
860
861 // main pattern
862 for (index_t i=0; i<numDOF; i++) {
863 // always add the node itself
864 index.push_back(i);
865 int num=insertNeighbours(index, i);
866 ptr.push_back(ptr.back()+num+1);
867 }
868 M=N=ptr.size()-1;
869 // paso will manage the memory
870 index_t* indexC = MEMALLOC(index.size(),index_t);
871 index_t* ptrC = MEMALLOC(ptr.size(), index_t);
872 copy(index.begin(), index.end(), indexC);
873 copy(ptr.begin(), ptr.end(), ptrC);
874 Paso_Pattern *mainPattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
875 M, N, ptrC, indexC);
876
877 /*
878 cout << "--- main_pattern ---" << endl;
879 cout << "M=" << M << ", N=" << N << endl;
880 for (size_t i=0; i<ptr.size(); i++) {
881 cout << "ptr[" << i << "]=" << ptr[i] << endl;
882 }
883 for (size_t i=0; i<index.size(); i++) {
884 cout << "index[" << i << "]=" << index[i] << endl;
885 }
886 */
887
888 ptr.clear();
889 index.clear();
890
891 // column & row couple patterns
892 Paso_Pattern *colCouplePattern, *rowCouplePattern;
893 generateCouplePatterns(&colCouplePattern, &rowCouplePattern);
894
895 // allocate paso distribution
896 Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
897 const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
898
899 Paso_SystemMatrixPattern* pattern = Paso_SystemMatrixPattern_alloc(
900 MATRIX_FORMAT_DEFAULT, distribution, distribution,
901 mainPattern, colCouplePattern, rowCouplePattern,
902 connector, connector);
903 Paso_Pattern_free(mainPattern);
904 Paso_Pattern_free(colCouplePattern);
905 Paso_Pattern_free(rowCouplePattern);
906 Paso_Distribution_free(distribution);
907 return pattern;
908 }
909
910 void Rectangle::Print_Mesh_Info(const bool full) const
911 {
912 RipleyDomain::Print_Mesh_Info(full);
913 if (full) {
914 cout << " Id Coordinates" << endl;
915 cout.precision(15);
916 cout.setf(ios::scientific, ios::floatfield);
917 pair<double,double> xdx = getFirstCoordAndSpacing(0);
918 pair<double,double> ydy = getFirstCoordAndSpacing(1);
919 for (index_t i=0; i < getNumNodes(); i++) {
920 cout << " " << setw(5) << m_nodeId[i]
921 << " " << xdx.first+(i%m_N0)*xdx.second
922 << " " << ydy.first+(i/m_N0)*ydy.second << endl;
923 }
924 }
925 }
926
927 IndexVector Rectangle::getNumNodesPerDim() const
928 {
929 IndexVector ret;
930 ret.push_back(m_N0);
931 ret.push_back(m_N1);
932 return ret;
933 }
934
935 IndexVector Rectangle::getNumElementsPerDim() const
936 {
937 IndexVector ret;
938 ret.push_back(m_NE0);
939 ret.push_back(m_NE1);
940 return ret;
941 }
942
943 IndexVector Rectangle::getNumFacesPerBoundary() const
944 {
945 IndexVector ret(4, 0);
946 //left
947 if (m_offset0==0)
948 ret[0]=m_NE1;
949 //right
950 if (m_mpiInfo->rank%m_NX==m_NX-1)
951 ret[1]=m_NE1;
952 //bottom
953 if (m_offset1==0)
954 ret[2]=m_NE0;
955 //top
956 if (m_mpiInfo->rank/m_NX==m_NY-1)
957 ret[3]=m_NE0;
958 return ret;
959 }
960
961 pair<double,double> Rectangle::getFirstCoordAndSpacing(dim_t dim) const
962 {
963 if (dim==0) {
964 return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
965 } else if (dim==1) {
966 return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
967 }
968 throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
969 }
970
971 //protected
972 dim_t Rectangle::getNumFaceElements() const
973 {
974 const IndexVector faces = getNumFacesPerBoundary();
975 dim_t n=0;
976 for (size_t i=0; i<faces.size(); i++)
977 n+=faces[i];
978 return n;
979 }
980
981 //protected
982 void Rectangle::assembleCoordinates(escript::Data& arg) const
983 {
984 escriptDataC x = arg.getDataC();
985 int numDim = m_numDim;
986 if (!isDataPointShapeEqual(&x, 1, &numDim))
987 throw RipleyException("setToX: Invalid Data object shape");
988 if (!numSamplesEqual(&x, 1, getNumNodes()))
989 throw RipleyException("setToX: Illegal number of samples in Data object");
990
991 pair<double,double> xdx = getFirstCoordAndSpacing(0);
992 pair<double,double> ydy = getFirstCoordAndSpacing(1);
993 arg.requireWrite();
994 #pragma omp parallel for
995 for (dim_t i1 = 0; i1 < m_N1; i1++) {
996 for (dim_t i0 = 0; i0 < m_N0; i0++) {
997 double* point = arg.getSampleDataRW(i0+m_N0*i1);
998 point[0] = xdx.first+i0*xdx.second;
999 point[1] = ydy.first+i1*ydy.second;
1000 }
1001 }
1002 }
1003
1004 //private
1005 void Rectangle::populateSampleIds()
1006 {
1007 // identifiers are ordered from left to right, bottom to top on each rank,
1008 // except for the shared nodes which are owned by the rank below / to the
1009 // left of the current rank
1010
1011 // build node distribution vector first.
1012 // m_nodeDistribution[i] is the first node id on rank i, that is
1013 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1014 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1015 m_nodeDistribution[1]=getNumNodes();
1016 for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
1017 const index_t x=k%m_NX;
1018 const index_t y=k/m_NX;
1019 index_t numNodes=getNumNodes();
1020 if (x>0)
1021 numNodes-=m_N1;
1022 if (y>0)
1023 numNodes-=m_N0;
1024 if (x>0 && y>0)
1025 numNodes++; // subtracted corner twice -> fix that
1026 m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
1027 }
1028 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1029
1030 m_nodeId.resize(getNumNodes());
1031
1032 // the bottom row and left column are not owned by this rank so the
1033 // identifiers need to be computed accordingly
1034 const index_t left = (m_offset0==0 ? 0 : 1);
1035 const index_t bottom = (m_offset1==0 ? 0 : 1);
1036 if (left>0) {
1037 const int neighbour=m_mpiInfo->rank-1;
1038 const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1039 #pragma omp parallel for
1040 for (dim_t i1=bottom; i1<m_N1; i1++) {
1041 m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
1042 + (i1-bottom+1)*leftN0-1;
1043 }
1044 }
1045 if (bottom>0) {
1046 const int neighbour=m_mpiInfo->rank-m_NX;
1047 const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1048 const index_t bottomN1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1049 #pragma omp parallel for
1050 for (dim_t i0=left; i0<m_N0; i0++) {
1051 m_nodeId[i0]=m_nodeDistribution[neighbour]
1052 + (bottomN1-1)*bottomN0 + i0 - left;
1053 }
1054 }
1055 if (left>0 && bottom>0) {
1056 const int neighbour=m_mpiInfo->rank-m_NX-1;
1057 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1058 const index_t N1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1059 m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1-1;
1060 }
1061
1062 // the rest of the id's are contiguous
1063 const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
1064 #pragma omp parallel for
1065 for (dim_t i1=bottom; i1<m_N1; i1++) {
1066 for (dim_t i0=left; i0<m_N0; i0++) {
1067 m_nodeId[i0+i1*m_N0] = firstId+i0-left+(i1-bottom)*(m_N0-left);
1068 }
1069 }
1070 m_nodeTags.assign(getNumNodes(), 0);
1071 updateTagsInUse(Nodes);
1072
1073 // elements
1074 m_elementId.resize(getNumElements());
1075 #pragma omp parallel for
1076 for (dim_t k=0; k<getNumElements(); k++) {
1077 m_elementId[k]=k;
1078 }
1079 m_elementTags.assign(getNumElements(), 0);
1080 updateTagsInUse(Elements);
1081
1082 // face element id's
1083 m_faceId.resize(getNumFaceElements());
1084 #pragma omp parallel for
1085 for (dim_t k=0; k<getNumFaceElements(); k++) {
1086 m_faceId[k]=k;
1087 }
1088
1089 // generate face offset vector and set face tags
1090 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1091 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
1092 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
1093 m_faceOffset.assign(facesPerEdge.size(), -1);
1094 m_faceTags.clear();
1095 index_t offset=0;
1096 for (size_t i=0; i<facesPerEdge.size(); i++) {
1097 if (facesPerEdge[i]>0) {
1098 m_faceOffset[i]=offset;
1099 offset+=facesPerEdge[i];
1100 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1101 }
1102 }
1103 setTagMap("left", LEFT);
1104 setTagMap("right", RIGHT);
1105 setTagMap("bottom", BOTTOM);
1106 setTagMap("top", TOP);
1107 updateTagsInUse(FaceElements);
1108 }
1109
1110 //private
1111 int Rectangle::insertNeighbours(IndexVector& index, index_t node) const
1112 {
1113 const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1114 const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1115 const int x=node%myN0;
1116 const int y=node/myN0;
1117 int num=0;
1118 if (y>0) {
1119 if (x>0) {
1120 // bottom-left
1121 index.push_back(node-myN0-1);
1122 num++;
1123 }
1124 // bottom
1125 index.push_back(node-myN0);
1126 num++;
1127 if (x<myN0-1) {
1128 // bottom-right
1129 index.push_back(node-myN0+1);
1130 num++;
1131 }
1132 }
1133 if (x<myN0-1) {
1134 // right
1135 index.push_back(node+1);
1136 num++;
1137 if (y<myN1-1) {
1138 // top-right
1139 index.push_back(node+myN0+1);
1140 num++;
1141 }
1142 }
1143 if (y<myN1-1) {
1144 // top
1145 index.push_back(node+myN0);
1146 num++;
1147 if (x>0) {
1148 // top-left
1149 index.push_back(node+myN0-1);
1150 num++;
1151 }
1152 }
1153 if (x>0) {
1154 // left
1155 index.push_back(node-1);
1156 num++;
1157 }
1158
1159 return num;
1160 }
1161
1162 //private
1163 void Rectangle::generateCouplePatterns(Paso_Pattern** colPattern, Paso_Pattern** rowPattern) const
1164 {
1165 IndexVector ptr(1,0);
1166 IndexVector index;
1167 const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1168 const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1169 const IndexVector faces=getNumFacesPerBoundary();
1170
1171 // bottom edge
1172 dim_t n=0;
1173 if (faces[0] == 0) {
1174 index.push_back(2*(myN0+myN1+1));
1175 index.push_back(2*(myN0+myN1+1)+1);
1176 n+=2;
1177 if (faces[2] == 0) {
1178 index.push_back(0);
1179 index.push_back(1);
1180 index.push_back(2);
1181 n+=3;
1182 }
1183 } else if (faces[2] == 0) {
1184 index.push_back(1);
1185 index.push_back(2);
1186 n+=2;
1187 }
1188 // n=neighbours of bottom-left corner node
1189 ptr.push_back(ptr.back()+n);
1190 n=0;
1191 if (faces[2] == 0) {
1192 for (dim_t i=1; i<myN0-1; i++) {
1193 index.push_back(i);
1194 index.push_back(i+1);
1195 index.push_back(i+2);
1196 ptr.push_back(ptr.back()+3);
1197 }
1198 index.push_back(myN0-1);
1199 index.push_back(myN0);
1200 n+=2;
1201 if (faces[1] == 0) {
1202 index.push_back(myN0+1);
1203 index.push_back(myN0+2);
1204 index.push_back(myN0+3);
1205 n+=3;
1206 }
1207 } else {
1208 for (dim_t i=1; i<myN0-1; i++) {
1209 ptr.push_back(ptr.back());
1210 }
1211 if (faces[1] == 0) {
1212 index.push_back(myN0+2);
1213 index.push_back(myN0+3);
1214 n+=2;
1215 }
1216 }
1217 // n=neighbours of bottom-right corner node
1218 ptr.push_back(ptr.back()+n);
1219
1220 // 2nd row to 2nd last row
1221 for (dim_t i=1; i<myN1-1; i++) {
1222 // left edge
1223 if (faces[0] == 0) {
1224 index.push_back(2*(myN0+myN1+2)-i);
1225 index.push_back(2*(myN0+myN1+2)-i-1);
1226 index.push_back(2*(myN0+myN1+2)-i-2);
1227 ptr.push_back(ptr.back()+3);
1228 } else {
1229 ptr.push_back(ptr.back());
1230 }
1231 for (dim_t j=1; j<myN0-1; j++) {
1232 ptr.push_back(ptr.back());
1233 }
1234 // right edge
1235 if (faces[1] == 0) {
1236 index.push_back(myN0+i+1);
1237 index.push_back(myN0+i+2);
1238 index.push_back(myN0+i+3);
1239 ptr.push_back(ptr.back()+3);
1240 } else {
1241 ptr.push_back(ptr.back());
1242 }
1243 }
1244
1245 // top edge
1246 n=0;
1247 if (faces[0] == 0) {
1248 index.push_back(2*myN0+myN1+5);
1249 index.push_back(2*myN0+myN1+4);
1250 n+=2;
1251 if (faces[3] == 0) {
1252 index.push_back(2*myN0+myN1+3);
1253 index.push_back(2*myN0+myN1+2);
1254 index.push_back(2*myN0+myN1+1);
1255 n+=3;
1256 }
1257 } else if (faces[3] == 0) {
1258 index.push_back(2*myN0+myN1+2);
1259 index.push_back(2*myN0+myN1+1);
1260 n+=2;
1261 }
1262 // n=neighbours of top-left corner node
1263 ptr.push_back(ptr.back()+n);
1264 n=0;
1265 if (faces[3] == 0) {
1266 for (dim_t i=1; i<myN0-1; i++) {
1267 index.push_back(2*myN0+myN1+i+1);
1268 index.push_back(2*myN0+myN1+i);
1269 index.push_back(2*myN0+myN1+i-1);
1270 ptr.push_back(ptr.back()+3);
1271 }
1272 index.push_back(myN0+myN1+4);
1273 index.push_back(myN0+myN1+3);
1274 n+=2;
1275 if (faces[1] == 0) {
1276 index.push_back(myN0+myN1+2);
1277 index.push_back(myN0+myN1+1);
1278 index.push_back(myN0+myN1);
1279 n+=3;
1280 }
1281 } else {
1282 for (dim_t i=1; i<myN0-1; i++) {
1283 ptr.push_back(ptr.back());
1284 }
1285 if (faces[1] == 0) {
1286 index.push_back(myN0+myN1+1);
1287 index.push_back(myN0+myN1);
1288 n+=2;
1289 }
1290 }
1291 // n=neighbours of top-right corner node
1292 ptr.push_back(ptr.back()+n);
1293
1294 dim_t M=ptr.size()-1;
1295 map<index_t,index_t> idMap;
1296 dim_t N=0;
1297 for (IndexVector::iterator it=index.begin(); it!=index.end(); it++) {
1298 if (idMap.find(*it)==idMap.end()) {
1299 idMap[*it]=N;
1300 *it=N++;
1301 } else {
1302 *it=idMap[*it];
1303 }
1304 }
1305
1306 /*
1307 cout << "--- colCouple_pattern ---" << endl;
1308 cout << "M=" << M << ", N=" << N << endl;
1309 for (size_t i=0; i<ptr.size(); i++) {
1310 cout << "ptr[" << i << "]=" << ptr[i] << endl;
1311 }
1312 for (size_t i=0; i<index.size(); i++) {
1313 cout << "index[" << i << "]=" << index[i] << endl;
1314 }
1315 */
1316
1317 // now build the row couple pattern
1318 IndexVector ptr2(1,0);
1319 IndexVector index2;
1320 for (dim_t id=0; id<N; id++) {
1321 n=0;
1322 for (dim_t i=0; i<M; i++) {
1323 for (dim_t j=ptr[i]; j<ptr[i+1]; j++) {
1324 if (index[j]==id) {
1325 index2.push_back(i);
1326 n++;
1327 break;
1328 }
1329 }
1330 }
1331 ptr2.push_back(ptr2.back()+n);
1332 }
1333
1334 /*
1335 cout << "--- rowCouple_pattern ---" << endl;
1336 cout << "M=" << N << ", N=" << M << endl;
1337 for (size_t i=0; i<ptr2.size(); i++) {
1338 cout << "ptr[" << i << "]=" << ptr2[i] << endl;
1339 }
1340 for (size_t i=0; i<index2.size(); i++) {
1341 cout << "index[" << i << "]=" << index2[i] << endl;
1342 }
1343 */
1344
1345 // paso will manage the memory
1346 index_t* indexC = MEMALLOC(index.size(), index_t);
1347 index_t* ptrC = MEMALLOC(ptr.size(), index_t);
1348 copy(index.begin(), index.end(), indexC);
1349 copy(ptr.begin(), ptr.end(), ptrC);
1350 *colPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, M, N, ptrC, indexC);
1351
1352 // paso will manage the memory
1353 indexC = MEMALLOC(index2.size(), index_t);
1354 ptrC = MEMALLOC(ptr2.size(), index_t);
1355 copy(index2.begin(), index2.end(), indexC);
1356 copy(ptr2.begin(), ptr2.end(), ptrC);
1357 *rowPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, N, M, ptrC, indexC);
1358 }
1359
1360 //protected
1361 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1362 escript::Data& in, bool reduced) const
1363 {
1364 const dim_t numComp = in.getDataPointSize();
1365 if (reduced) {
1366 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1367 const double c0 = .25;
1368 #pragma omp parallel for
1369 for (index_t k1=0; k1 < m_NE1; ++k1) {
1370 for (index_t k0=0; k0 < m_NE0; ++k0) {
1371 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1372 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1373 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1374 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1375 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1376 for (index_t i=0; i < numComp; ++i) {
1377 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
1378 } /* end of component loop i */
1379 } /* end of k0 loop */
1380 } /* end of k1 loop */
1381 /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1382 } else {
1383 /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1384 const double c0 = .16666666666666666667;
1385 const double c1 = .044658198738520451079;
1386 const double c2 = .62200846792814621559;
1387 #pragma omp parallel for
1388 for (index_t k1=0; k1 < m_NE1; ++k1) {
1389 for (index_t k0=0; k0 < m_NE0; ++k0) {
1390 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1391 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1392 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1393 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1394 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1395 for (index_t i=0; i < numComp; ++i) {
1396 o[INDEX2(i,numComp,0)] = f_00[i]*c2 + f_11[i]*c1 + c0*(f_01[i] + f_10[i]);
1397 o[INDEX2(i,numComp,1)] = f_01[i]*c1 + f_10[i]*c2 + c0*(f_00[i] + f_11[i]);
1398 o[INDEX2(i,numComp,2)] = f_01[i]*c2 + f_10[i]*c1 + c0*(f_00[i] + f_11[i]);
1399 o[INDEX2(i,numComp,3)] = f_00[i]*c1 + f_11[i]*c2 + c0*(f_01[i] + f_10[i]);
1400 } /* end of component loop i */
1401 } /* end of k0 loop */
1402 } /* end of k1 loop */
1403 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1404 }
1405 }
1406
1407 //protected
1408 void Rectangle::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1409 bool reduced) const
1410 {
1411 const dim_t numComp = in.getDataPointSize();
1412 if (reduced) {
1413 const double c0 = .5;
1414 #pragma omp parallel
1415 {
1416 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1417 if (m_faceOffset[0] > -1) {
1418 #pragma omp for nowait
1419 for (index_t k1=0; k1 < m_NE1; ++k1) {
1420 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1421 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1422 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1423 for (index_t i=0; i < numComp; ++i) {
1424 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i]);
1425 } /* end of component loop i */
1426 } /* end of k1 loop */
1427 } /* end of face 0 */
1428 if (m_faceOffset[1] > -1) {
1429 #pragma omp for nowait
1430 for (index_t k1=0; k1 < m_NE1; ++k1) {
1431 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1432 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1433 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1434 for (index_t i=0; i < numComp; ++i) {
1435 o[INDEX2(i,numComp,0)] = c0*(f_10[i] + f_11[i]);
1436 } /* end of component loop i */
1437 } /* end of k1 loop */
1438 } /* end of face 1 */
1439 if (m_faceOffset[2] > -1) {
1440 #pragma omp for nowait
1441 for (index_t k0=0; k0 < m_NE0; ++k0) {
1442 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1443 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1444 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1445 for (index_t i=0; i < numComp; ++i) {
1446 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_10[i]);
1447 } /* end of component loop i */
1448 } /* end of k0 loop */
1449 } /* end of face 2 */
1450 if (m_faceOffset[3] > -1) {
1451 #pragma omp for nowait
1452 for (index_t k0=0; k0 < m_NE0; ++k0) {
1453 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1454 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1455 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1456 for (index_t i=0; i < numComp; ++i) {
1457 o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_11[i]);
1458 } /* end of component loop i */
1459 } /* end of k0 loop */
1460 } /* end of face 3 */
1461 /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */
1462 } // end of parallel section
1463 } else {
1464 const double c0 = 0.21132486540518711775;
1465 const double c1 = 0.78867513459481288225;
1466 #pragma omp parallel
1467 {
1468 /*** GENERATOR SNIP_INTERPOLATE_FACES TOP */
1469 if (m_faceOffset[0] > -1) {
1470 #pragma omp for nowait
1471 for (index_t k1=0; k1 < m_NE1; ++k1) {
1472 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1473 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1474 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1475 for (index_t i=0; i < numComp; ++i) {
1476 o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_01[i]*c0;
1477 o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_01[i]*c1;
1478 } /* end of component loop i */
1479 } /* end of k1 loop */
1480 } /* end of face 0 */
1481 if (m_faceOffset[1] > -1) {
1482 #pragma omp for nowait
1483 for (index_t k1=0; k1 < m_NE1; ++k1) {
1484 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1485 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1486 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1487 for (index_t i=0; i < numComp; ++i) {
1488 o[INDEX2(i,numComp,0)] = f_10[i]*c1 + f_11[i]*c0;
1489 o[INDEX2(i,numComp,1)] = f_10[i]*c0 + f_11[i]*c1;
1490 } /* end of component loop i */
1491 } /* end of k1 loop */
1492 } /* end of face 1 */
1493 if (m_faceOffset[2] > -1) {
1494 #pragma omp for nowait
1495 for (index_t k0=0; k0 < m_NE0; ++k0) {
1496 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1497 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1498 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1499 for (index_t i=0; i < numComp; ++i) {
1500 o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_10[i]*c0;
1501 o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_10[i]*c1;
1502 } /* end of component loop i */
1503 } /* end of k0 loop */
1504 } /* end of face 2 */
1505 if (m_faceOffset[3] > -1) {
1506 #pragma omp for nowait
1507 for (index_t k0=0; k0 < m_NE0; ++k0) {
1508 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1509 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1510 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1511 for (index_t i=0; i < numComp; ++i) {
1512 o[INDEX2(i,numComp,0)] = f_01[i]*c1 + f_11[i]*c0;
1513 o[INDEX2(i,numComp,1)] = f_01[i]*c0 + f_11[i]*c1;
1514 } /* end of component loop i */
1515 } /* end of k0 loop */
1516 } /* end of face 3 */
1517 /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */
1518 } // end of parallel section
1519 }
1520 }
1521
1522 } // end of namespace ripley
1523

  ViewVC Help
Powered by ViewVC 1.1.26