/[escript]/branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp
ViewVC logotype

Contents of /branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3733 - (show annotations)
Fri Dec 9 04:02:56 2011 UTC (7 years, 11 months ago) by caltinay
File size: 60054 byte(s)
escript on ripley tests pass. However, returning 0 for number of point elements
and DOF id's are bogus.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2011 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 #include <ripley/Rectangle.h>
15 extern "C" {
16 #include "paso/SystemMatrixPattern.h"
17 }
18
19 #if USE_SILO
20 #include <silo.h>
21 #ifdef ESYS_MPI
22 #include <pmpio.h>
23 #endif
24 #endif
25
26 #include <iomanip>
27
28 using namespace std;
29
30 namespace ripley {
31
32 Rectangle::Rectangle(int n0, int n1, double l0, double l1, int d0, int d1) :
33 RipleyDomain(2),
34 m_gNE0(n0),
35 m_gNE1(n1),
36 m_l0(l0),
37 m_l1(l1),
38 m_NX(d0),
39 m_NY(d1)
40 {
41 // ensure number of subdivisions is valid and nodes can be distributed
42 // among number of ranks
43 if (m_NX*m_NY != m_mpiInfo->size)
44 throw RipleyException("Invalid number of spatial subdivisions");
45
46 if (n0%m_NX > 0 || n1%m_NY > 0)
47 throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
48
49 // local number of elements
50 m_NE0 = n0/m_NX;
51 m_NE1 = n1/m_NY;
52 // local number of nodes (not necessarily owned)
53 m_N0 = m_NE0+1;
54 m_N1 = m_NE1+1;
55 // bottom-left node is at (offset0,offset1) in global mesh
56 m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
57 m_offset1 = m_NE1*(m_mpiInfo->rank/m_NX);
58 populateSampleIds();
59 }
60
61 Rectangle::~Rectangle()
62 {
63 }
64
65 string Rectangle::getDescription() const
66 {
67 return "ripley::Rectangle";
68 }
69
70 bool Rectangle::operator==(const AbstractDomain& other) const
71 {
72 if (dynamic_cast<const Rectangle*>(&other))
73 return this==&other;
74
75 return false;
76 }
77
78 void Rectangle::dump(const string& fileName) const
79 {
80 #if USE_SILO
81 string fn(fileName);
82 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
83 fn+=".silo";
84 }
85
86 const int NUM_SILO_FILES = 1;
87 const char* blockDirFmt = "/block%04d";
88 int driver=DB_HDF5;
89 string siloPath;
90 DBfile* dbfile = NULL;
91
92 #ifdef ESYS_MPI
93 PMPIO_baton_t* baton = NULL;
94 #endif
95
96 if (m_mpiInfo->size > 1) {
97 #ifdef ESYS_MPI
98 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
99 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
100 PMPIO_DefaultClose, (void*)&driver);
101 // try the fallback driver in case of error
102 if (!baton && driver != DB_PDB) {
103 driver = DB_PDB;
104 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
105 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
106 PMPIO_DefaultClose, (void*)&driver);
107 }
108 if (baton) {
109 char str[64];
110 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
111 siloPath = str;
112 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
113 }
114 #endif
115 } else {
116 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
117 getDescription().c_str(), driver);
118 // try the fallback driver in case of error
119 if (!dbfile && driver != DB_PDB) {
120 driver = DB_PDB;
121 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
122 getDescription().c_str(), driver);
123 }
124 }
125
126 if (!dbfile)
127 throw RipleyException("dump: Could not create Silo file");
128
129 /*
130 if (driver==DB_HDF5) {
131 // gzip level 1 already provides good compression with minimal
132 // performance penalty. Some tests showed that gzip levels >3 performed
133 // rather badly on escript data both in terms of time and space
134 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
135 }
136 */
137
138 boost::scoped_ptr<double> x(new double[m_N0]);
139 boost::scoped_ptr<double> y(new double[m_N1]);
140 double* coords[2] = { x.get(), y.get() };
141 pair<double,double> xdx = getFirstCoordAndSpacing(0);
142 pair<double,double> ydy = getFirstCoordAndSpacing(1);
143 #pragma omp parallel
144 {
145 #pragma omp for nowait
146 for (dim_t i0 = 0; i0 < m_N0; i0++) {
147 coords[0][i0]=xdx.first+i0*xdx.second;
148 }
149 #pragma omp for nowait
150 for (dim_t i1 = 0; i1 < m_N1; i1++) {
151 coords[1][i1]=ydy.first+i1*ydy.second;
152 }
153 }
154 IndexVector dims = getNumNodesPerDim();
155
156 // write mesh
157 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE,
158 DB_COLLINEAR, NULL);
159
160 // write node ids
161 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2,
162 NULL, 0, DB_INT, DB_NODECENT, NULL);
163
164 // write element ids
165 dims = getNumElementsPerDim();
166 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
167 &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
168
169 // rank 0 writes multimesh and multivar
170 if (m_mpiInfo->rank == 0) {
171 vector<string> tempstrings;
172 vector<char*> names;
173 for (dim_t i=0; i<m_mpiInfo->size; i++) {
174 stringstream path;
175 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
176 tempstrings.push_back(path.str());
177 names.push_back((char*)tempstrings.back().c_str());
178 }
179 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
180 DBSetDir(dbfile, "/");
181 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
182 &types[0], NULL);
183 tempstrings.clear();
184 names.clear();
185 for (dim_t i=0; i<m_mpiInfo->size; i++) {
186 stringstream path;
187 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
188 tempstrings.push_back(path.str());
189 names.push_back((char*)tempstrings.back().c_str());
190 }
191 types.assign(m_mpiInfo->size, DB_QUADVAR);
192 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
193 &types[0], NULL);
194 tempstrings.clear();
195 names.clear();
196 for (dim_t i=0; i<m_mpiInfo->size; i++) {
197 stringstream path;
198 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
199 tempstrings.push_back(path.str());
200 names.push_back((char*)tempstrings.back().c_str());
201 }
202 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
203 &types[0], NULL);
204 }
205
206 if (m_mpiInfo->size > 1) {
207 #ifdef ESYS_MPI
208 PMPIO_HandOffBaton(baton, dbfile);
209 PMPIO_Finish(baton);
210 #endif
211 } else {
212 DBClose(dbfile);
213 }
214
215 #else // USE_SILO
216 throw RipleyException("dump(): no Silo support");
217 #endif
218 }
219
220 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
221 {
222 switch (fsType) {
223 case Nodes:
224 case DegreesOfFreedom: //FIXME
225 case ReducedDegreesOfFreedom: //FIXME
226 return &m_nodeId[0];
227 case Elements:
228 case ReducedElements:
229 return &m_elementId[0];
230 case FaceElements:
231 case ReducedFaceElements:
232 return &m_faceId[0];
233 default:
234 break;
235 }
236
237 stringstream msg;
238 msg << "borrowSampleReferenceIDs() not implemented for function space type "
239 << functionSpaceTypeAsString(fsType);
240 throw RipleyException(msg.str());
241 }
242
243 bool Rectangle::ownSample(int fsCode, index_t id) const
244 {
245 #ifdef ESYS_MPI
246 if (fsCode != ReducedNodes) {
247 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
248 const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
249 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
250 } else {
251 stringstream msg;
252 msg << "ownSample() not implemented for "
253 << functionSpaceTypeAsString(fsCode);
254 throw RipleyException(msg.str());
255 }
256 #else
257 return true;
258 #endif
259 }
260
261 void Rectangle::setToGradient(escript::Data& out, const escript::Data& cIn) const
262 {
263 escript::Data& in = *const_cast<escript::Data*>(&cIn);
264 const dim_t numComp = in.getDataPointSize();
265 const double h0 = m_l0/m_gNE0;
266 const double h1 = m_l1/m_gNE1;
267 const double cx0 = -1./h0;
268 const double cx1 = -.78867513459481288225/h0;
269 const double cx2 = -.5/h0;
270 const double cx3 = -.21132486540518711775/h0;
271 const double cx4 = .21132486540518711775/h0;
272 const double cx5 = .5/h0;
273 const double cx6 = .78867513459481288225/h0;
274 const double cx7 = 1./h0;
275 const double cy0 = -1./h1;
276 const double cy1 = -.78867513459481288225/h1;
277 const double cy2 = -.5/h1;
278 const double cy3 = -.21132486540518711775/h1;
279 const double cy4 = .21132486540518711775/h1;
280 const double cy5 = .5/h1;
281 const double cy6 = .78867513459481288225/h1;
282 const double cy7 = 1./h1;
283
284 if (out.getFunctionSpace().getTypeCode() == Elements) {
285 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
286 #pragma omp parallel for
287 for (index_t k1=0; k1 < m_NE1; ++k1) {
288 for (index_t k0=0; k0 < m_NE0; ++k0) {
289 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
290 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
291 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
292 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
293 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
294 for (index_t i=0; i < numComp; ++i) {
295 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
296 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
297 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
298 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
299 o[INDEX3(i,0,2,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
300 o[INDEX3(i,1,2,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
301 o[INDEX3(i,0,3,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
302 o[INDEX3(i,1,3,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
303 } /* end of component loop i */
304 } /* end of k0 loop */
305 } /* end of k1 loop */
306 /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
307 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
308 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
309 #pragma omp parallel for
310 for (index_t k1=0; k1 < m_NE1; ++k1) {
311 for (index_t k0=0; k0 < m_NE0; ++k0) {
312 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
313 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
314 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
315 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
316 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
317 for (index_t i=0; i < numComp; ++i) {
318 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
319 o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]);
320 } /* end of component loop i */
321 } /* end of k0 loop */
322 } /* end of k1 loop */
323 /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
324 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
325 #pragma omp parallel
326 {
327 /*** GENERATOR SNIP_GRAD_FACES TOP */
328 if (m_faceOffset[0] > -1) {
329 #pragma omp for nowait
330 for (index_t k1=0; k1 < m_NE1; ++k1) {
331 const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
332 const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
333 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
334 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
335 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
336 for (index_t i=0; i < numComp; ++i) {
337 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
338 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
339 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
340 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
341 } /* end of component loop i */
342 } /* end of k1 loop */
343 } /* end of face 0 */
344 if (m_faceOffset[1] > -1) {
345 #pragma omp for nowait
346 for (index_t k1=0; k1 < m_NE1; ++k1) {
347 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
348 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
349 const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
350 const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
351 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
352 for (index_t i=0; i < numComp; ++i) {
353 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
354 o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
355 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
356 o[INDEX3(i,1,1,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
357 } /* end of component loop i */
358 } /* end of k1 loop */
359 } /* end of face 1 */
360 if (m_faceOffset[2] > -1) {
361 #pragma omp for nowait
362 for (index_t k0=0; k0 < m_NE0; ++k0) {
363 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
364 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
365 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
366 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
367 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
368 for (index_t i=0; i < numComp; ++i) {
369 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
370 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
371 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
372 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
373 } /* end of component loop i */
374 } /* end of k0 loop */
375 } /* end of face 2 */
376 if (m_faceOffset[3] > -1) {
377 #pragma omp for nowait
378 for (index_t k0=0; k0 < m_NE0; ++k0) {
379 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
380 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
381 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
382 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
383 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
384 for (index_t i=0; i < numComp; ++i) {
385 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
386 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
387 o[INDEX3(i,0,1,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
388 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
389 } /* end of component loop i */
390 } /* end of k0 loop */
391 } /* end of face 3 */
392 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
393 } // end of parallel section
394 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
395 #pragma omp parallel
396 {
397 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
398 if (m_faceOffset[0] > -1) {
399 #pragma omp for nowait
400 for (index_t k1=0; k1 < m_NE1; ++k1) {
401 const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
402 const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
403 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
404 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
405 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
406 for (index_t i=0; i < numComp; ++i) {
407 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
408 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
409 } /* end of component loop i */
410 } /* end of k1 loop */
411 } /* end of face 0 */
412 if (m_faceOffset[1] > -1) {
413 #pragma omp for nowait
414 for (index_t k1=0; k1 < m_NE1; ++k1) {
415 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
416 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
417 const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
418 const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
419 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
420 for (index_t i=0; i < numComp; ++i) {
421 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
422 o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
423 } /* end of component loop i */
424 } /* end of k1 loop */
425 } /* end of face 1 */
426 if (m_faceOffset[2] > -1) {
427 #pragma omp for nowait
428 for (index_t k0=0; k0 < m_NE0; ++k0) {
429 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
430 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
431 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
432 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
433 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
434 for (index_t i=0; i < numComp; ++i) {
435 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
436 o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]);
437 } /* end of component loop i */
438 } /* end of k0 loop */
439 } /* end of face 2 */
440 if (m_faceOffset[3] > -1) {
441 #pragma omp for nowait
442 for (index_t k0=0; k0 < m_NE0; ++k0) {
443 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
444 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
445 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
446 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
447 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
448 for (index_t i=0; i < numComp; ++i) {
449 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
450 o[INDEX3(i,1,0,numComp,2)] = cy5*(f_01[i] + f_11[i]) + cy2*(f_00[i] + f_10[i]);
451 } /* end of component loop i */
452 } /* end of k0 loop */
453 } /* end of face 3 */
454 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
455 } // end of parallel section
456 } else {
457 stringstream msg;
458 msg << "setToGradient() not implemented for "
459 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
460 throw RipleyException(msg.str());
461 }
462 }
463
464 void Rectangle::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
465 {
466 escript::Data& in = *const_cast<escript::Data*>(&arg);
467 const dim_t numComp = in.getDataPointSize();
468 const double h0 = m_l0/m_gNE0;
469 const double h1 = m_l1/m_gNE1;
470 if (arg.getFunctionSpace().getTypeCode() == Elements) {
471 const double w_0 = h0*h1/4.;
472 #pragma omp parallel
473 {
474 vector<double> int_local(numComp, 0);
475 #pragma omp for nowait
476 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
477 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
478 const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
479 for (index_t i=0; i < numComp; ++i) {
480 const register double f_0 = f[INDEX2(i,0,numComp)];
481 const register double f_1 = f[INDEX2(i,1,numComp)];
482 const register double f_2 = f[INDEX2(i,2,numComp)];
483 const register double f_3 = f[INDEX2(i,3,numComp)];
484 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
485 } /* end of component loop i */
486 } /* end of k0 loop */
487 } /* end of k1 loop */
488
489 #pragma omp critical
490 for (index_t i=0; i<numComp; i++)
491 integrals[i]+=int_local[i];
492 } // end of parallel section
493 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
494 const double w_0 = h0*h1;
495 #pragma omp parallel
496 {
497 vector<double> int_local(numComp, 0);
498 #pragma omp for nowait
499 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
500 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
501 const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
502 for (index_t i=0; i < numComp; ++i) {
503 int_local[i]+=f[i]*w_0;
504 } /* end of component loop i */
505 } /* end of k0 loop */
506 } /* end of k1 loop */
507
508 #pragma omp critical
509 for (index_t i=0; i<numComp; i++)
510 integrals[i]+=int_local[i];
511 } // end of parallel section
512 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
513 const double w_0 = h0/2.;
514 const double w_1 = h1/2.;
515 #pragma omp parallel
516 {
517 vector<double> int_local(numComp, 0);
518 if (m_faceOffset[0] > -1) {
519 #pragma omp for nowait
520 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
521 const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
522 for (index_t i=0; i < numComp; ++i) {
523 const register double f_0 = f[INDEX2(i,0,numComp)];
524 const register double f_1 = f[INDEX2(i,1,numComp)];
525 int_local[i]+=(f_0+f_1)*w_1;
526 } /* end of component loop i */
527 } /* end of k1 loop */
528 }
529
530 if (m_faceOffset[1] > -1) {
531 #pragma omp for nowait
532 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
533 const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
534 for (index_t i=0; i < numComp; ++i) {
535 const register double f_0 = f[INDEX2(i,0,numComp)];
536 const register double f_1 = f[INDEX2(i,1,numComp)];
537 int_local[i]+=(f_0+f_1)*w_1;
538 } /* end of component loop i */
539 } /* end of k1 loop */
540 }
541
542 if (m_faceOffset[2] > -1) {
543 #pragma omp for nowait
544 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
545 const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
546 for (index_t i=0; i < numComp; ++i) {
547 const register double f_0 = f[INDEX2(i,0,numComp)];
548 const register double f_1 = f[INDEX2(i,1,numComp)];
549 int_local[i]+=(f_0+f_1)*w_0;
550 } /* end of component loop i */
551 } /* end of k0 loop */
552 }
553
554 if (m_faceOffset[3] > -1) {
555 #pragma omp for nowait
556 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
557 const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
558 for (index_t i=0; i < numComp; ++i) {
559 const register double f_0 = f[INDEX2(i,0,numComp)];
560 const register double f_1 = f[INDEX2(i,1,numComp)];
561 int_local[i]+=(f_0+f_1)*w_0;
562 } /* end of component loop i */
563 } /* end of k0 loop */
564 }
565
566 #pragma omp critical
567 for (index_t i=0; i<numComp; i++)
568 integrals[i]+=int_local[i];
569 } // end of parallel section
570 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
571 #pragma omp parallel
572 {
573 vector<double> int_local(numComp, 0);
574 if (m_faceOffset[0] > -1) {
575 #pragma omp for nowait
576 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
577 const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
578 for (index_t i=0; i < numComp; ++i) {
579 int_local[i]+=f[i]*h1;
580 } /* end of component loop i */
581 } /* end of k1 loop */
582 }
583
584 if (m_faceOffset[1] > -1) {
585 #pragma omp for nowait
586 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
587 const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
588 for (index_t i=0; i < numComp; ++i) {
589 int_local[i]+=f[i]*h1;
590 } /* end of component loop i */
591 } /* end of k1 loop */
592 }
593
594 if (m_faceOffset[2] > -1) {
595 #pragma omp for nowait
596 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
597 const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
598 for (index_t i=0; i < numComp; ++i) {
599 int_local[i]+=f[i]*h0;
600 } /* end of component loop i */
601 } /* end of k0 loop */
602 }
603
604 if (m_faceOffset[3] > -1) {
605 #pragma omp for nowait
606 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
607 const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
608 for (index_t i=0; i < numComp; ++i) {
609 int_local[i]+=f[i]*h0;
610 } /* end of component loop i */
611 } /* end of k0 loop */
612 }
613
614 #pragma omp critical
615 for (index_t i=0; i<numComp; i++)
616 integrals[i]+=int_local[i];
617 } // end of parallel section
618 } else {
619 stringstream msg;
620 msg << "setToIntegrals() not implemented for "
621 << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
622 throw RipleyException(msg.str());
623 }
624 }
625
626 void Rectangle::setToNormal(escript::Data& out) const
627 {
628 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
629 #pragma omp parallel
630 {
631 if (m_faceOffset[0] > -1) {
632 #pragma omp for nowait
633 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
634 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
635 // set vector at two quadrature points
636 *o++ = -1.;
637 *o++ = 0.;
638 *o++ = -1.;
639 *o = 0.;
640 }
641 }
642
643 if (m_faceOffset[1] > -1) {
644 #pragma omp for nowait
645 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
646 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
647 // set vector at two quadrature points
648 *o++ = 1.;
649 *o++ = 0.;
650 *o++ = 1.;
651 *o = 0.;
652 }
653 }
654
655 if (m_faceOffset[2] > -1) {
656 #pragma omp for nowait
657 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
658 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
659 // set vector at two quadrature points
660 *o++ = 0.;
661 *o++ = -1.;
662 *o++ = 0.;
663 *o = -1.;
664 }
665 }
666
667 if (m_faceOffset[3] > -1) {
668 #pragma omp for nowait
669 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
670 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
671 // set vector at two quadrature points
672 *o++ = 0.;
673 *o++ = 1.;
674 *o++ = 0.;
675 *o = 1.;
676 }
677 }
678 } // end of parallel section
679 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
680 #pragma omp parallel
681 {
682 if (m_faceOffset[0] > -1) {
683 #pragma omp for nowait
684 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
685 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
686 *o++ = -1.;
687 *o = 0.;
688 }
689 }
690
691 if (m_faceOffset[1] > -1) {
692 #pragma omp for nowait
693 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
694 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
695 *o++ = 1.;
696 *o = 0.;
697 }
698 }
699
700 if (m_faceOffset[2] > -1) {
701 #pragma omp for nowait
702 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
703 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
704 *o++ = 0.;
705 *o = -1.;
706 }
707 }
708
709 if (m_faceOffset[3] > -1) {
710 #pragma omp for nowait
711 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
712 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
713 *o++ = 0.;
714 *o = 1.;
715 }
716 }
717 } // end of parallel section
718
719 } else {
720 stringstream msg;
721 msg << "setToNormal() not implemented for "
722 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
723 throw RipleyException(msg.str());
724 }
725 }
726
727 void Rectangle::addPDEToSystem(escript::AbstractSystemMatrix& mat,
728 escript::Data& rhs, const escript::Data& A, const escript::Data& B,
729 const escript::Data& C, const escript::Data& D,
730 const escript::Data& X, const escript::Data& Y,
731 const escript::Data& d, const escript::Data& y,
732 const escript::Data& d_contact, const escript::Data& y_contact,
733 const escript::Data& d_dirac, const escript::Data& y_dirac) const
734 {
735 throw RipleyException("addPDEToSystem() not implemented");
736 }
737
738 Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder,
739 bool reducedColOrder) const
740 {
741 if (reducedRowOrder || reducedColOrder)
742 throw RipleyException("getPattern() not implemented for reduced order");
743
744 // connector
745 RankVector neighbour;
746 IndexVector offsetInShared(1,0);
747 IndexVector sendShared, recvShared;
748 const IndexVector faces=getNumFacesPerBoundary();
749 const index_t left = (m_offset0==0 ? 0 : 1);
750 const index_t bottom = (m_offset1==0 ? 0 : 1);
751 // corner node from bottom-left
752 if (faces[0] == 0 && faces[2] == 0) {
753 neighbour.push_back(m_mpiInfo->rank-m_NX-1);
754 offsetInShared.push_back(offsetInShared.back()+1);
755 sendShared.push_back(m_nodeId[m_N0+left]);
756 recvShared.push_back(m_nodeId[0]);
757 }
758 // bottom edge
759 if (faces[2] == 0) {
760 neighbour.push_back(m_mpiInfo->rank-m_NX);
761 offsetInShared.push_back(offsetInShared.back()+m_N0-left);
762 for (dim_t i=left; i<m_N0; i++) {
763 // easy case, we know the neighbour id's
764 sendShared.push_back(m_nodeId[m_N0+i]);
765 recvShared.push_back(m_nodeId[i]);
766 }
767 }
768 // corner node from bottom-right
769 if (faces[1] == 0 && faces[2] == 0) {
770 neighbour.push_back(m_mpiInfo->rank-m_NX+1);
771 const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
772 const index_t N1=(neighbour.back()/m_NX == 0 ? m_N1 : m_N1-1);
773 const index_t first=m_nodeDistribution[neighbour.back()];
774 offsetInShared.push_back(offsetInShared.back()+1);
775 sendShared.push_back(m_nodeId[(bottom+1)*m_N0-1]);
776 recvShared.push_back(first+N0*(N1-1));
777 }
778 // left edge
779 if (faces[0] == 0) {
780 neighbour.push_back(m_mpiInfo->rank-1);
781 offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
782 for (dim_t i=bottom; i<m_N1; i++) {
783 // easy case, we know the neighbour id's
784 sendShared.push_back(m_nodeId[i*m_N0+1]);
785 recvShared.push_back(m_nodeId[i*m_N0]);
786 }
787 }
788 // right edge
789 if (faces[1] == 0) {
790 neighbour.push_back(m_mpiInfo->rank+1);
791 const index_t rightN0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
792 const index_t first=m_nodeDistribution[neighbour.back()];
793 offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
794 for (dim_t i=bottom; i<m_N1; i++) {
795 sendShared.push_back(m_nodeId[(i+1)*m_N0-1]);
796 recvShared.push_back(first+rightN0*(i-bottom));
797 }
798 }
799 // corner node from top-left
800 if (faces[0] == 0 && faces[3] == 0) {
801 neighbour.push_back(m_mpiInfo->rank+m_NX-1);
802 const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
803 const index_t first=m_nodeDistribution[neighbour.back()];
804 offsetInShared.push_back(offsetInShared.back()+1);
805 sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+left]);
806 recvShared.push_back(first+N0-1);
807 }
808 // top edge
809 if (faces[3] == 0) {
810 neighbour.push_back(m_mpiInfo->rank+m_NX);
811 const index_t first=m_nodeDistribution[neighbour.back()];
812 offsetInShared.push_back(offsetInShared.back()+m_N0-left);
813 for (dim_t i=left; i<m_N0; i++) {
814 sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+i]);
815 recvShared.push_back(first+i-left);
816 }
817 }
818 // corner node from top-right
819 if (faces[1] == 0 && faces[3] == 0) {
820 neighbour.push_back(m_mpiInfo->rank+m_NX+1);
821 const index_t first=m_nodeDistribution[neighbour.back()];
822 offsetInShared.push_back(offsetInShared.back()+1);
823 sendShared.push_back(m_nodeId[m_N0*m_N1-1]);
824 recvShared.push_back(first);
825 }
826 const int numDOF=m_nodeDistribution[m_mpiInfo->rank+1]-m_nodeDistribution[m_mpiInfo->rank];
827 /*
828 cout << "--- rcv_shcomp ---" << endl;
829 cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
830 for (size_t i=0; i<neighbour.size(); i++) {
831 cout << "neighbor[" << i << "]=" << neighbour[i]
832 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
833 }
834 for (size_t i=0; i<recvShared.size(); i++) {
835 cout << "shared[" << i << "]=" << recvShared[i] << endl;
836 }
837 cout << "--- snd_shcomp ---" << endl;
838 for (size_t i=0; i<sendShared.size(); i++) {
839 cout << "shared[" << i << "]=" << sendShared[i] << endl;
840 }
841 */
842
843 Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
844 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
845 &offsetInShared[0], 1, 0, m_mpiInfo);
846 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
847 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
848 &offsetInShared[0], 1, 0, m_mpiInfo);
849 Paso_Connector* connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
850 Paso_SharedComponents_free(snd_shcomp);
851 Paso_SharedComponents_free(rcv_shcomp);
852
853 // create patterns
854 dim_t M, N;
855 IndexVector ptr(1,0);
856 IndexVector index;
857
858 // main pattern
859 for (index_t i=0; i<numDOF; i++) {
860 // always add the node itself
861 index.push_back(i);
862 int num=insertNeighbours(index, i);
863 ptr.push_back(ptr.back()+num+1);
864 }
865 M=N=ptr.size()-1;
866 // paso will manage the memory
867 index_t* indexC = MEMALLOC(index.size(),index_t);
868 index_t* ptrC = MEMALLOC(ptr.size(), index_t);
869 copy(index.begin(), index.end(), indexC);
870 copy(ptr.begin(), ptr.end(), ptrC);
871 Paso_Pattern *mainPattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
872 M, N, ptrC, indexC);
873
874 /*
875 cout << "--- main_pattern ---" << endl;
876 cout << "M=" << M << ", N=" << N << endl;
877 for (size_t i=0; i<ptr.size(); i++) {
878 cout << "ptr[" << i << "]=" << ptr[i] << endl;
879 }
880 for (size_t i=0; i<index.size(); i++) {
881 cout << "index[" << i << "]=" << index[i] << endl;
882 }
883 */
884
885 ptr.clear();
886 index.clear();
887
888 // column & row couple patterns
889 Paso_Pattern *colCouplePattern, *rowCouplePattern;
890 generateCouplePatterns(&colCouplePattern, &rowCouplePattern);
891
892 // allocate paso distribution
893 Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
894 const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
895
896 Paso_SystemMatrixPattern* pattern = Paso_SystemMatrixPattern_alloc(
897 MATRIX_FORMAT_DEFAULT, distribution, distribution,
898 mainPattern, colCouplePattern, rowCouplePattern,
899 connector, connector);
900 Paso_Pattern_free(mainPattern);
901 Paso_Pattern_free(colCouplePattern);
902 Paso_Pattern_free(rowCouplePattern);
903 Paso_Distribution_free(distribution);
904 return pattern;
905 }
906
907 void Rectangle::Print_Mesh_Info(const bool full) const
908 {
909 RipleyDomain::Print_Mesh_Info(full);
910 if (full) {
911 cout << " Id Coordinates" << endl;
912 cout.precision(15);
913 cout.setf(ios::scientific, ios::floatfield);
914 pair<double,double> xdx = getFirstCoordAndSpacing(0);
915 pair<double,double> ydy = getFirstCoordAndSpacing(1);
916 for (index_t i=0; i < getNumNodes(); i++) {
917 cout << " " << setw(5) << m_nodeId[i]
918 << " " << xdx.first+(i%m_N0)*xdx.second
919 << " " << ydy.first+(i/m_N0)*ydy.second << endl;
920 }
921 }
922 }
923
924 IndexVector Rectangle::getNumNodesPerDim() const
925 {
926 IndexVector ret;
927 ret.push_back(m_N0);
928 ret.push_back(m_N1);
929 return ret;
930 }
931
932 IndexVector Rectangle::getNumElementsPerDim() const
933 {
934 IndexVector ret;
935 ret.push_back(m_NE0);
936 ret.push_back(m_NE1);
937 return ret;
938 }
939
940 IndexVector Rectangle::getNumFacesPerBoundary() const
941 {
942 IndexVector ret(4, 0);
943 //left
944 if (m_offset0==0)
945 ret[0]=m_NE1;
946 //right
947 if (m_mpiInfo->rank%m_NX==m_NX-1)
948 ret[1]=m_NE1;
949 //bottom
950 if (m_offset1==0)
951 ret[2]=m_NE0;
952 //top
953 if (m_mpiInfo->rank/m_NX==m_NY-1)
954 ret[3]=m_NE0;
955 return ret;
956 }
957
958 pair<double,double> Rectangle::getFirstCoordAndSpacing(dim_t dim) const
959 {
960 if (dim==0) {
961 return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
962 } else if (dim==1) {
963 return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
964 }
965 throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
966 }
967
968 //protected
969 dim_t Rectangle::getNumFaceElements() const
970 {
971 const IndexVector faces = getNumFacesPerBoundary();
972 dim_t n=0;
973 for (size_t i=0; i<faces.size(); i++)
974 n+=faces[i];
975 return n;
976 }
977
978 //protected
979 void Rectangle::assembleCoordinates(escript::Data& arg) const
980 {
981 escriptDataC x = arg.getDataC();
982 int numDim = m_numDim;
983 if (!isDataPointShapeEqual(&x, 1, &numDim))
984 throw RipleyException("setToX: Invalid Data object shape");
985 if (!numSamplesEqual(&x, 1, getNumNodes()))
986 throw RipleyException("setToX: Illegal number of samples in Data object");
987
988 pair<double,double> xdx = getFirstCoordAndSpacing(0);
989 pair<double,double> ydy = getFirstCoordAndSpacing(1);
990 arg.requireWrite();
991 #pragma omp parallel for
992 for (dim_t i1 = 0; i1 < m_N1; i1++) {
993 for (dim_t i0 = 0; i0 < m_N0; i0++) {
994 double* point = arg.getSampleDataRW(i0+m_N0*i1);
995 point[0] = xdx.first+i0*xdx.second;
996 point[1] = ydy.first+i1*ydy.second;
997 }
998 }
999 }
1000
1001 //private
1002 void Rectangle::populateSampleIds()
1003 {
1004 // identifiers are ordered from left to right, bottom to top on each rank,
1005 // except for the shared nodes which are owned by the rank below / to the
1006 // left of the current rank
1007
1008 // build node distribution vector first.
1009 // m_nodeDistribution[i] is the first node id on rank i, that is
1010 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1011 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1012 m_nodeDistribution[1]=getNumNodes();
1013 for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
1014 const index_t x=k%m_NX;
1015 const index_t y=k/m_NX;
1016 index_t numNodes=getNumNodes();
1017 if (x>0)
1018 numNodes-=m_N1;
1019 if (y>0)
1020 numNodes-=m_N0;
1021 if (x>0 && y>0)
1022 numNodes++; // subtracted corner twice -> fix that
1023 m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
1024 }
1025 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1026
1027 m_nodeId.resize(getNumNodes());
1028
1029 // the bottom row and left column are not owned by this rank so the
1030 // identifiers need to be computed accordingly
1031 const index_t left = (m_offset0==0 ? 0 : 1);
1032 const index_t bottom = (m_offset1==0 ? 0 : 1);
1033 if (left>0) {
1034 const int neighbour=m_mpiInfo->rank-1;
1035 const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1036 #pragma omp parallel for
1037 for (dim_t i1=bottom; i1<m_N1; i1++) {
1038 m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
1039 + (i1-bottom+1)*leftN0-1;
1040 }
1041 }
1042 if (bottom>0) {
1043 const int neighbour=m_mpiInfo->rank-m_NX;
1044 const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1045 const index_t bottomN1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1046 #pragma omp parallel for
1047 for (dim_t i0=left; i0<m_N0; i0++) {
1048 m_nodeId[i0]=m_nodeDistribution[neighbour]
1049 + (bottomN1-1)*bottomN0 + i0 - left;
1050 }
1051 }
1052 if (left>0 && bottom>0) {
1053 const int neighbour=m_mpiInfo->rank-m_NX-1;
1054 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1055 const index_t N1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1056 m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1-1;
1057 }
1058
1059 // the rest of the id's are contiguous
1060 const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
1061 #pragma omp parallel for
1062 for (dim_t i1=bottom; i1<m_N1; i1++) {
1063 for (dim_t i0=left; i0<m_N0; i0++) {
1064 m_nodeId[i0+i1*m_N0] = firstId+i0-left+(i1-bottom)*(m_N0-left);
1065 }
1066 }
1067
1068 // element id's
1069 m_elementId.resize(getNumElements());
1070 #pragma omp parallel for
1071 for (dim_t k=0; k<getNumElements(); k++) {
1072 m_elementId[k]=k;
1073 }
1074
1075 // face element id's
1076 m_faceId.resize(getNumFaceElements());
1077 #pragma omp parallel for
1078 for (dim_t k=0; k<getNumFaceElements(); k++) {
1079 m_faceId[k]=k;
1080 }
1081
1082 // generate face offset vector and set face tags
1083 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1084 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
1085 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
1086 m_faceOffset.assign(facesPerEdge.size(), -1);
1087 m_faceTags.clear();
1088 index_t offset=0;
1089 for (size_t i=0; i<facesPerEdge.size(); i++) {
1090 if (facesPerEdge[i]>0) {
1091 m_faceOffset[i]=offset;
1092 offset+=facesPerEdge[i];
1093 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1094 }
1095 }
1096 setTagMap("left", LEFT);
1097 setTagMap("right", RIGHT);
1098 setTagMap("bottom", BOTTOM);
1099 setTagMap("top", TOP);
1100 updateTagsInUse(FaceElements);
1101 }
1102
1103 //private
1104 int Rectangle::insertNeighbours(IndexVector& index, index_t node) const
1105 {
1106 const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1107 const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1108 const int x=node%myN0;
1109 const int y=node/myN0;
1110 int num=0;
1111 if (y>0) {
1112 if (x>0) {
1113 // bottom-left
1114 index.push_back(node-myN0-1);
1115 num++;
1116 }
1117 // bottom
1118 index.push_back(node-myN0);
1119 num++;
1120 if (x<myN0-1) {
1121 // bottom-right
1122 index.push_back(node-myN0+1);
1123 num++;
1124 }
1125 }
1126 if (x<myN0-1) {
1127 // right
1128 index.push_back(node+1);
1129 num++;
1130 if (y<myN1-1) {
1131 // top-right
1132 index.push_back(node+myN0+1);
1133 num++;
1134 }
1135 }
1136 if (y<myN1-1) {
1137 // top
1138 index.push_back(node+myN0);
1139 num++;
1140 if (x>0) {
1141 // top-left
1142 index.push_back(node+myN0-1);
1143 num++;
1144 }
1145 }
1146 if (x>0) {
1147 // left
1148 index.push_back(node-1);
1149 num++;
1150 }
1151
1152 return num;
1153 }
1154
1155 //private
1156 void Rectangle::generateCouplePatterns(Paso_Pattern** colPattern, Paso_Pattern** rowPattern) const
1157 {
1158 IndexVector ptr(1,0);
1159 IndexVector index;
1160 const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1161 const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1162 const IndexVector faces=getNumFacesPerBoundary();
1163
1164 // bottom edge
1165 dim_t n=0;
1166 if (faces[0] == 0) {
1167 index.push_back(2*(myN0+myN1+1));
1168 index.push_back(2*(myN0+myN1+1)+1);
1169 n+=2;
1170 if (faces[2] == 0) {
1171 index.push_back(0);
1172 index.push_back(1);
1173 index.push_back(2);
1174 n+=3;
1175 }
1176 } else if (faces[2] == 0) {
1177 index.push_back(1);
1178 index.push_back(2);
1179 n+=2;
1180 }
1181 // n=neighbours of bottom-left corner node
1182 ptr.push_back(ptr.back()+n);
1183 n=0;
1184 if (faces[2] == 0) {
1185 for (dim_t i=1; i<myN0-1; i++) {
1186 index.push_back(i);
1187 index.push_back(i+1);
1188 index.push_back(i+2);
1189 ptr.push_back(ptr.back()+3);
1190 }
1191 index.push_back(myN0-1);
1192 index.push_back(myN0);
1193 n+=2;
1194 if (faces[1] == 0) {
1195 index.push_back(myN0+1);
1196 index.push_back(myN0+2);
1197 index.push_back(myN0+3);
1198 n+=3;
1199 }
1200 } else {
1201 for (dim_t i=1; i<myN0-1; i++) {
1202 ptr.push_back(ptr.back());
1203 }
1204 if (faces[1] == 0) {
1205 index.push_back(myN0+2);
1206 index.push_back(myN0+3);
1207 n+=2;
1208 }
1209 }
1210 // n=neighbours of bottom-right corner node
1211 ptr.push_back(ptr.back()+n);
1212
1213 // 2nd row to 2nd last row
1214 for (dim_t i=1; i<myN1-1; i++) {
1215 // left edge
1216 if (faces[0] == 0) {
1217 index.push_back(2*(myN0+myN1+2)-i);
1218 index.push_back(2*(myN0+myN1+2)-i-1);
1219 index.push_back(2*(myN0+myN1+2)-i-2);
1220 ptr.push_back(ptr.back()+3);
1221 } else {
1222 ptr.push_back(ptr.back());
1223 }
1224 for (dim_t j=1; j<myN0-1; j++) {
1225 ptr.push_back(ptr.back());
1226 }
1227 // right edge
1228 if (faces[1] == 0) {
1229 index.push_back(myN0+i+1);
1230 index.push_back(myN0+i+2);
1231 index.push_back(myN0+i+3);
1232 ptr.push_back(ptr.back()+3);
1233 } else {
1234 ptr.push_back(ptr.back());
1235 }
1236 }
1237
1238 // top edge
1239 n=0;
1240 if (faces[0] == 0) {
1241 index.push_back(2*myN0+myN1+5);
1242 index.push_back(2*myN0+myN1+4);
1243 n+=2;
1244 if (faces[3] == 0) {
1245 index.push_back(2*myN0+myN1+3);
1246 index.push_back(2*myN0+myN1+2);
1247 index.push_back(2*myN0+myN1+1);
1248 n+=3;
1249 }
1250 } else if (faces[3] == 0) {
1251 index.push_back(2*myN0+myN1+2);
1252 index.push_back(2*myN0+myN1+1);
1253 n+=2;
1254 }
1255 // n=neighbours of top-left corner node
1256 ptr.push_back(ptr.back()+n);
1257 n=0;
1258 if (faces[3] == 0) {
1259 for (dim_t i=1; i<myN0-1; i++) {
1260 index.push_back(2*myN0+myN1+i+1);
1261 index.push_back(2*myN0+myN1+i);
1262 index.push_back(2*myN0+myN1+i-1);
1263 ptr.push_back(ptr.back()+3);
1264 }
1265 index.push_back(myN0+myN1+4);
1266 index.push_back(myN0+myN1+3);
1267 n+=2;
1268 if (faces[1] == 0) {
1269 index.push_back(myN0+myN1+2);
1270 index.push_back(myN0+myN1+1);
1271 index.push_back(myN0+myN1);
1272 n+=3;
1273 }
1274 } else {
1275 for (dim_t i=1; i<myN0-1; i++) {
1276 ptr.push_back(ptr.back());
1277 }
1278 if (faces[1] == 0) {
1279 index.push_back(myN0+myN1+1);
1280 index.push_back(myN0+myN1);
1281 n+=2;
1282 }
1283 }
1284 // n=neighbours of top-right corner node
1285 ptr.push_back(ptr.back()+n);
1286
1287 dim_t M=ptr.size()-1;
1288 map<index_t,index_t> idMap;
1289 dim_t N=0;
1290 for (IndexVector::iterator it=index.begin(); it!=index.end(); it++) {
1291 if (idMap.find(*it)==idMap.end()) {
1292 idMap[*it]=N;
1293 *it=N++;
1294 } else {
1295 *it=idMap[*it];
1296 }
1297 }
1298
1299 /*
1300 cout << "--- colCouple_pattern ---" << endl;
1301 cout << "M=" << M << ", N=" << N << endl;
1302 for (size_t i=0; i<ptr.size(); i++) {
1303 cout << "ptr[" << i << "]=" << ptr[i] << endl;
1304 }
1305 for (size_t i=0; i<index.size(); i++) {
1306 cout << "index[" << i << "]=" << index[i] << endl;
1307 }
1308 */
1309
1310 // now build the row couple pattern
1311 IndexVector ptr2(1,0);
1312 IndexVector index2;
1313 for (dim_t id=0; id<N; id++) {
1314 n=0;
1315 for (dim_t i=0; i<M; i++) {
1316 for (dim_t j=ptr[i]; j<ptr[i+1]; j++) {
1317 if (index[j]==id) {
1318 index2.push_back(i);
1319 n++;
1320 break;
1321 }
1322 }
1323 }
1324 ptr2.push_back(ptr2.back()+n);
1325 }
1326
1327 /*
1328 cout << "--- rowCouple_pattern ---" << endl;
1329 cout << "M=" << N << ", N=" << M << endl;
1330 for (size_t i=0; i<ptr2.size(); i++) {
1331 cout << "ptr[" << i << "]=" << ptr2[i] << endl;
1332 }
1333 for (size_t i=0; i<index2.size(); i++) {
1334 cout << "index[" << i << "]=" << index2[i] << endl;
1335 }
1336 */
1337
1338 // paso will manage the memory
1339 index_t* indexC = MEMALLOC(index.size(), index_t);
1340 index_t* ptrC = MEMALLOC(ptr.size(), index_t);
1341 copy(index.begin(), index.end(), indexC);
1342 copy(ptr.begin(), ptr.end(), ptrC);
1343 *colPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, M, N, ptrC, indexC);
1344
1345 // paso will manage the memory
1346 indexC = MEMALLOC(index2.size(), index_t);
1347 ptrC = MEMALLOC(ptr2.size(), index_t);
1348 copy(index2.begin(), index2.end(), indexC);
1349 copy(ptr2.begin(), ptr2.end(), ptrC);
1350 *rowPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, N, M, ptrC, indexC);
1351 }
1352
1353 //protected
1354 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1355 escript::Data& in, bool reduced) const
1356 {
1357 const dim_t numComp = in.getDataPointSize();
1358 if (reduced) {
1359 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1360 const double c0 = .25;
1361 #pragma omp parallel for
1362 for (index_t k1=0; k1 < m_NE1; ++k1) {
1363 for (index_t k0=0; k0 < m_NE0; ++k0) {
1364 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1365 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1366 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1367 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1368 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1369 for (index_t i=0; i < numComp; ++i) {
1370 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
1371 } /* end of component loop i */
1372 } /* end of k0 loop */
1373 } /* end of k1 loop */
1374 /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1375 } else {
1376 /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1377 const double c0 = .16666666666666666667;
1378 const double c1 = .044658198738520451079;
1379 const double c2 = .62200846792814621559;
1380 #pragma omp parallel for
1381 for (index_t k1=0; k1 < m_NE1; ++k1) {
1382 for (index_t k0=0; k0 < m_NE0; ++k0) {
1383 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1384 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1385 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1386 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1387 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1388 for (index_t i=0; i < numComp; ++i) {
1389 o[INDEX2(i,numComp,0)] = f_00[i]*c2 + f_11[i]*c1 + c0*(f_01[i] + f_10[i]);
1390 o[INDEX2(i,numComp,1)] = f_01[i]*c1 + f_10[i]*c2 + c0*(f_00[i] + f_11[i]);
1391 o[INDEX2(i,numComp,2)] = f_01[i]*c2 + f_10[i]*c1 + c0*(f_00[i] + f_11[i]);
1392 o[INDEX2(i,numComp,3)] = f_00[i]*c1 + f_11[i]*c2 + c0*(f_01[i] + f_10[i]);
1393 } /* end of component loop i */
1394 } /* end of k0 loop */
1395 } /* end of k1 loop */
1396 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1397 }
1398 }
1399
1400 //protected
1401 void Rectangle::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1402 bool reduced) const
1403 {
1404 const dim_t numComp = in.getDataPointSize();
1405 if (reduced) {
1406 const double c0 = .5;
1407 #pragma omp parallel
1408 {
1409 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1410 if (m_faceOffset[0] > -1) {
1411 #pragma omp for nowait
1412 for (index_t k1=0; k1 < m_NE1; ++k1) {
1413 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1414 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1415 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1416 for (index_t i=0; i < numComp; ++i) {
1417 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i]);
1418 } /* end of component loop i */
1419 } /* end of k1 loop */
1420 } /* end of face 0 */
1421 if (m_faceOffset[1] > -1) {
1422 #pragma omp for nowait
1423 for (index_t k1=0; k1 < m_NE1; ++k1) {
1424 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1425 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1426 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1427 for (index_t i=0; i < numComp; ++i) {
1428 o[INDEX2(i,numComp,0)] = c0*(f_10[i] + f_11[i]);
1429 } /* end of component loop i */
1430 } /* end of k1 loop */
1431 } /* end of face 1 */
1432 if (m_faceOffset[2] > -1) {
1433 #pragma omp for nowait
1434 for (index_t k0=0; k0 < m_NE0; ++k0) {
1435 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1436 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1437 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1438 for (index_t i=0; i < numComp; ++i) {
1439 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_10[i]);
1440 } /* end of component loop i */
1441 } /* end of k0 loop */
1442 } /* end of face 2 */
1443 if (m_faceOffset[3] > -1) {
1444 #pragma omp for nowait
1445 for (index_t k0=0; k0 < m_NE0; ++k0) {
1446 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1447 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1448 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1449 for (index_t i=0; i < numComp; ++i) {
1450 o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_11[i]);
1451 } /* end of component loop i */
1452 } /* end of k0 loop */
1453 } /* end of face 3 */
1454 /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */
1455 } // end of parallel section
1456 } else {
1457 const double c0 = 0.21132486540518711775;
1458 const double c1 = 0.78867513459481288225;
1459 #pragma omp parallel
1460 {
1461 /*** GENERATOR SNIP_INTERPOLATE_FACES TOP */
1462 if (m_faceOffset[0] > -1) {
1463 #pragma omp for nowait
1464 for (index_t k1=0; k1 < m_NE1; ++k1) {
1465 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1466 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1467 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1468 for (index_t i=0; i < numComp; ++i) {
1469 o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_01[i]*c0;
1470 o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_01[i]*c1;
1471 } /* end of component loop i */
1472 } /* end of k1 loop */
1473 } /* end of face 0 */
1474 if (m_faceOffset[1] > -1) {
1475 #pragma omp for nowait
1476 for (index_t k1=0; k1 < m_NE1; ++k1) {
1477 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1478 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1479 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1480 for (index_t i=0; i < numComp; ++i) {
1481 o[INDEX2(i,numComp,0)] = f_10[i]*c1 + f_11[i]*c0;
1482 o[INDEX2(i,numComp,1)] = f_10[i]*c0 + f_11[i]*c1;
1483 } /* end of component loop i */
1484 } /* end of k1 loop */
1485 } /* end of face 1 */
1486 if (m_faceOffset[2] > -1) {
1487 #pragma omp for nowait
1488 for (index_t k0=0; k0 < m_NE0; ++k0) {
1489 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1490 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1491 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1492 for (index_t i=0; i < numComp; ++i) {
1493 o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_10[i]*c0;
1494 o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_10[i]*c1;
1495 } /* end of component loop i */
1496 } /* end of k0 loop */
1497 } /* end of face 2 */
1498 if (m_faceOffset[3] > -1) {
1499 #pragma omp for nowait
1500 for (index_t k0=0; k0 < m_NE0; ++k0) {
1501 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1502 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1503 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1504 for (index_t i=0; i < numComp; ++i) {
1505 o[INDEX2(i,numComp,0)] = f_01[i]*c1 + f_11[i]*c0;
1506 o[INDEX2(i,numComp,1)] = f_01[i]*c0 + f_11[i]*c1;
1507 } /* end of component loop i */
1508 } /* end of k0 loop */
1509 } /* end of face 3 */
1510 /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */
1511 } // end of parallel section
1512 }
1513 }
1514
1515 } // end of namespace ripley
1516

  ViewVC Help
Powered by ViewVC 1.1.26