/[escript]/branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp
ViewVC logotype

Contents of /branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3744 - (show annotations)
Tue Dec 13 06:41:54 2011 UTC (7 years, 11 months ago) by caltinay
File size: 60698 byte(s)
Implemented (Face)Elements->Reduced(Face)Elements interpolation and started
separating DOF and nodes.
Also, implemented operator==() so that a==b for different domain objects a and
b which are in the same state.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2011 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 #include <ripley/Rectangle.h>
15 extern "C" {
16 #include "paso/SystemMatrixPattern.h"
17 }
18
19 #if USE_SILO
20 #include <silo.h>
21 #ifdef ESYS_MPI
22 #include <pmpio.h>
23 #endif
24 #endif
25
26 #include <iomanip>
27
28 using namespace std;
29
30 namespace ripley {
31
32 Rectangle::Rectangle(int n0, int n1, double l0, double l1, int d0, int d1) :
33 RipleyDomain(2),
34 m_gNE0(n0),
35 m_gNE1(n1),
36 m_l0(l0),
37 m_l1(l1),
38 m_NX(d0),
39 m_NY(d1)
40 {
41 // ensure number of subdivisions is valid and nodes can be distributed
42 // among number of ranks
43 if (m_NX*m_NY != m_mpiInfo->size)
44 throw RipleyException("Invalid number of spatial subdivisions");
45
46 if (n0%m_NX > 0 || n1%m_NY > 0)
47 throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
48
49 // local number of elements
50 m_NE0 = n0/m_NX;
51 m_NE1 = n1/m_NY;
52 // local number of nodes (not necessarily owned)
53 m_N0 = m_NE0+1;
54 m_N1 = m_NE1+1;
55 // bottom-left node is at (offset0,offset1) in global mesh
56 m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
57 m_offset1 = m_NE1*(m_mpiInfo->rank/m_NX);
58 populateSampleIds();
59 }
60
61 Rectangle::~Rectangle()
62 {
63 }
64
65 string Rectangle::getDescription() const
66 {
67 return "ripley::Rectangle";
68 }
69
70 bool Rectangle::operator==(const AbstractDomain& other) const
71 {
72 const Rectangle* o=dynamic_cast<const Rectangle*>(&other);
73 if (o) {
74 return (RipleyDomain::operator==(other) &&
75 m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1
76 && m_l0==o->m_l0 && m_l1==o->m_l1
77 && m_NX==o->m_NX && m_NY==o->m_NY);
78 }
79
80 return false;
81 }
82
83 void Rectangle::dump(const string& fileName) const
84 {
85 #if USE_SILO
86 string fn(fileName);
87 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
88 fn+=".silo";
89 }
90
91 const int NUM_SILO_FILES = 1;
92 const char* blockDirFmt = "/block%04d";
93 int driver=DB_HDF5;
94 string siloPath;
95 DBfile* dbfile = NULL;
96
97 #ifdef ESYS_MPI
98 PMPIO_baton_t* baton = NULL;
99 #endif
100
101 if (m_mpiInfo->size > 1) {
102 #ifdef ESYS_MPI
103 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
104 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
105 PMPIO_DefaultClose, (void*)&driver);
106 // try the fallback driver in case of error
107 if (!baton && driver != DB_PDB) {
108 driver = DB_PDB;
109 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
110 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
111 PMPIO_DefaultClose, (void*)&driver);
112 }
113 if (baton) {
114 char str[64];
115 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
116 siloPath = str;
117 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
118 }
119 #endif
120 } else {
121 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
122 getDescription().c_str(), driver);
123 // try the fallback driver in case of error
124 if (!dbfile && driver != DB_PDB) {
125 driver = DB_PDB;
126 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
127 getDescription().c_str(), driver);
128 }
129 }
130
131 if (!dbfile)
132 throw RipleyException("dump: Could not create Silo file");
133
134 /*
135 if (driver==DB_HDF5) {
136 // gzip level 1 already provides good compression with minimal
137 // performance penalty. Some tests showed that gzip levels >3 performed
138 // rather badly on escript data both in terms of time and space
139 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
140 }
141 */
142
143 boost::scoped_ptr<double> x(new double[m_N0]);
144 boost::scoped_ptr<double> y(new double[m_N1]);
145 double* coords[2] = { x.get(), y.get() };
146 pair<double,double> xdx = getFirstCoordAndSpacing(0);
147 pair<double,double> ydy = getFirstCoordAndSpacing(1);
148 #pragma omp parallel
149 {
150 #pragma omp for nowait
151 for (dim_t i0 = 0; i0 < m_N0; i0++) {
152 coords[0][i0]=xdx.first+i0*xdx.second;
153 }
154 #pragma omp for nowait
155 for (dim_t i1 = 0; i1 < m_N1; i1++) {
156 coords[1][i1]=ydy.first+i1*ydy.second;
157 }
158 }
159 IndexVector dims = getNumNodesPerDim();
160
161 // write mesh
162 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE,
163 DB_COLLINEAR, NULL);
164
165 // write node ids
166 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2,
167 NULL, 0, DB_INT, DB_NODECENT, NULL);
168
169 // write element ids
170 dims = getNumElementsPerDim();
171 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
172 &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
173
174 // rank 0 writes multimesh and multivar
175 if (m_mpiInfo->rank == 0) {
176 vector<string> tempstrings;
177 vector<char*> names;
178 for (dim_t i=0; i<m_mpiInfo->size; i++) {
179 stringstream path;
180 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
181 tempstrings.push_back(path.str());
182 names.push_back((char*)tempstrings.back().c_str());
183 }
184 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
185 DBSetDir(dbfile, "/");
186 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
187 &types[0], NULL);
188 tempstrings.clear();
189 names.clear();
190 for (dim_t i=0; i<m_mpiInfo->size; i++) {
191 stringstream path;
192 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
193 tempstrings.push_back(path.str());
194 names.push_back((char*)tempstrings.back().c_str());
195 }
196 types.assign(m_mpiInfo->size, DB_QUADVAR);
197 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
198 &types[0], NULL);
199 tempstrings.clear();
200 names.clear();
201 for (dim_t i=0; i<m_mpiInfo->size; i++) {
202 stringstream path;
203 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
204 tempstrings.push_back(path.str());
205 names.push_back((char*)tempstrings.back().c_str());
206 }
207 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
208 &types[0], NULL);
209 }
210
211 if (m_mpiInfo->size > 1) {
212 #ifdef ESYS_MPI
213 PMPIO_HandOffBaton(baton, dbfile);
214 PMPIO_Finish(baton);
215 #endif
216 } else {
217 DBClose(dbfile);
218 }
219
220 #else // USE_SILO
221 throw RipleyException("dump(): no Silo support");
222 #endif
223 }
224
225 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
226 {
227 switch (fsType) {
228 case Nodes:
229 return &m_nodeId[0];
230 case DegreesOfFreedom:
231 case ReducedDegreesOfFreedom: //FIXME: reduced
232 return &m_dofId[0];
233 case Elements:
234 case ReducedElements:
235 return &m_elementId[0];
236 case FaceElements:
237 case ReducedFaceElements:
238 return &m_faceId[0];
239 default:
240 break;
241 }
242
243 stringstream msg;
244 msg << "borrowSampleReferenceIDs() not implemented for function space type "
245 << functionSpaceTypeAsString(fsType);
246 throw RipleyException(msg.str());
247 }
248
249 bool Rectangle::ownSample(int fsCode, index_t id) const
250 {
251 #ifdef ESYS_MPI
252 if (fsCode != ReducedNodes) {
253 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
254 const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
255 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
256 } else {
257 stringstream msg;
258 msg << "ownSample() not implemented for "
259 << functionSpaceTypeAsString(fsCode);
260 throw RipleyException(msg.str());
261 }
262 #else
263 return true;
264 #endif
265 }
266
267 void Rectangle::setToGradient(escript::Data& out, const escript::Data& cIn) const
268 {
269 escript::Data& in = *const_cast<escript::Data*>(&cIn);
270 const dim_t numComp = in.getDataPointSize();
271 const double h0 = m_l0/m_gNE0;
272 const double h1 = m_l1/m_gNE1;
273 const double cx0 = -1./h0;
274 const double cx1 = -.78867513459481288225/h0;
275 const double cx2 = -.5/h0;
276 const double cx3 = -.21132486540518711775/h0;
277 const double cx4 = .21132486540518711775/h0;
278 const double cx5 = .5/h0;
279 const double cx6 = .78867513459481288225/h0;
280 const double cx7 = 1./h0;
281 const double cy0 = -1./h1;
282 const double cy1 = -.78867513459481288225/h1;
283 const double cy2 = -.5/h1;
284 const double cy3 = -.21132486540518711775/h1;
285 const double cy4 = .21132486540518711775/h1;
286 const double cy5 = .5/h1;
287 const double cy6 = .78867513459481288225/h1;
288 const double cy7 = 1./h1;
289
290 if (out.getFunctionSpace().getTypeCode() == Elements) {
291 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
292 #pragma omp parallel for
293 for (index_t k1=0; k1 < m_NE1; ++k1) {
294 for (index_t k0=0; k0 < m_NE0; ++k0) {
295 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
296 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
297 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
298 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
299 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
300 for (index_t i=0; i < numComp; ++i) {
301 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
302 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
303 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
304 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
305 o[INDEX3(i,0,2,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
306 o[INDEX3(i,1,2,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
307 o[INDEX3(i,0,3,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
308 o[INDEX3(i,1,3,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
309 } /* end of component loop i */
310 } /* end of k0 loop */
311 } /* end of k1 loop */
312 /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
313 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
314 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
315 #pragma omp parallel for
316 for (index_t k1=0; k1 < m_NE1; ++k1) {
317 for (index_t k0=0; k0 < m_NE0; ++k0) {
318 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
319 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
320 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
321 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
322 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
323 for (index_t i=0; i < numComp; ++i) {
324 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
325 o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]);
326 } /* end of component loop i */
327 } /* end of k0 loop */
328 } /* end of k1 loop */
329 /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
330 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
331 #pragma omp parallel
332 {
333 /*** GENERATOR SNIP_GRAD_FACES TOP */
334 if (m_faceOffset[0] > -1) {
335 #pragma omp for nowait
336 for (index_t k1=0; k1 < m_NE1; ++k1) {
337 const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
338 const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
339 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
340 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
341 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
342 for (index_t i=0; i < numComp; ++i) {
343 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
344 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
345 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
346 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
347 } /* end of component loop i */
348 } /* end of k1 loop */
349 } /* end of face 0 */
350 if (m_faceOffset[1] > -1) {
351 #pragma omp for nowait
352 for (index_t k1=0; k1 < m_NE1; ++k1) {
353 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
354 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
355 const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
356 const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
357 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
358 for (index_t i=0; i < numComp; ++i) {
359 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
360 o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
361 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
362 o[INDEX3(i,1,1,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
363 } /* end of component loop i */
364 } /* end of k1 loop */
365 } /* end of face 1 */
366 if (m_faceOffset[2] > -1) {
367 #pragma omp for nowait
368 for (index_t k0=0; k0 < m_NE0; ++k0) {
369 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
370 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
371 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
372 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
373 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
374 for (index_t i=0; i < numComp; ++i) {
375 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
376 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
377 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
378 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
379 } /* end of component loop i */
380 } /* end of k0 loop */
381 } /* end of face 2 */
382 if (m_faceOffset[3] > -1) {
383 #pragma omp for nowait
384 for (index_t k0=0; k0 < m_NE0; ++k0) {
385 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
386 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
387 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
388 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
389 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
390 for (index_t i=0; i < numComp; ++i) {
391 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
392 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
393 o[INDEX3(i,0,1,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
394 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
395 } /* end of component loop i */
396 } /* end of k0 loop */
397 } /* end of face 3 */
398 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
399 } // end of parallel section
400 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
401 #pragma omp parallel
402 {
403 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
404 if (m_faceOffset[0] > -1) {
405 #pragma omp for nowait
406 for (index_t k1=0; k1 < m_NE1; ++k1) {
407 const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
408 const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
409 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
410 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
411 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
412 for (index_t i=0; i < numComp; ++i) {
413 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
414 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
415 } /* end of component loop i */
416 } /* end of k1 loop */
417 } /* end of face 0 */
418 if (m_faceOffset[1] > -1) {
419 #pragma omp for nowait
420 for (index_t k1=0; k1 < m_NE1; ++k1) {
421 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
422 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
423 const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
424 const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
425 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
426 for (index_t i=0; i < numComp; ++i) {
427 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
428 o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
429 } /* end of component loop i */
430 } /* end of k1 loop */
431 } /* end of face 1 */
432 if (m_faceOffset[2] > -1) {
433 #pragma omp for nowait
434 for (index_t k0=0; k0 < m_NE0; ++k0) {
435 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
436 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
437 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
438 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
439 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
440 for (index_t i=0; i < numComp; ++i) {
441 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
442 o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]);
443 } /* end of component loop i */
444 } /* end of k0 loop */
445 } /* end of face 2 */
446 if (m_faceOffset[3] > -1) {
447 #pragma omp for nowait
448 for (index_t k0=0; k0 < m_NE0; ++k0) {
449 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
450 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
451 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
452 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
453 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
454 for (index_t i=0; i < numComp; ++i) {
455 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
456 o[INDEX3(i,1,0,numComp,2)] = cy5*(f_01[i] + f_11[i]) + cy2*(f_00[i] + f_10[i]);
457 } /* end of component loop i */
458 } /* end of k0 loop */
459 } /* end of face 3 */
460 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
461 } // end of parallel section
462 } else {
463 stringstream msg;
464 msg << "setToGradient() not implemented for "
465 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
466 throw RipleyException(msg.str());
467 }
468 }
469
470 void Rectangle::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
471 {
472 escript::Data& in = *const_cast<escript::Data*>(&arg);
473 const dim_t numComp = in.getDataPointSize();
474 const double h0 = m_l0/m_gNE0;
475 const double h1 = m_l1/m_gNE1;
476 if (arg.getFunctionSpace().getTypeCode() == Elements) {
477 const double w_0 = h0*h1/4.;
478 #pragma omp parallel
479 {
480 vector<double> int_local(numComp, 0);
481 #pragma omp for nowait
482 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
483 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
484 const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
485 for (index_t i=0; i < numComp; ++i) {
486 const register double f_0 = f[INDEX2(i,0,numComp)];
487 const register double f_1 = f[INDEX2(i,1,numComp)];
488 const register double f_2 = f[INDEX2(i,2,numComp)];
489 const register double f_3 = f[INDEX2(i,3,numComp)];
490 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
491 } /* end of component loop i */
492 } /* end of k0 loop */
493 } /* end of k1 loop */
494
495 #pragma omp critical
496 for (index_t i=0; i<numComp; i++)
497 integrals[i]+=int_local[i];
498 } // end of parallel section
499 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
500 const double w_0 = h0*h1;
501 #pragma omp parallel
502 {
503 vector<double> int_local(numComp, 0);
504 #pragma omp for nowait
505 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
506 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
507 const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
508 for (index_t i=0; i < numComp; ++i) {
509 int_local[i]+=f[i]*w_0;
510 } /* end of component loop i */
511 } /* end of k0 loop */
512 } /* end of k1 loop */
513
514 #pragma omp critical
515 for (index_t i=0; i<numComp; i++)
516 integrals[i]+=int_local[i];
517 } // end of parallel section
518 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
519 const double w_0 = h0/2.;
520 const double w_1 = h1/2.;
521 #pragma omp parallel
522 {
523 vector<double> int_local(numComp, 0);
524 if (m_faceOffset[0] > -1) {
525 #pragma omp for nowait
526 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
527 const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
528 for (index_t i=0; i < numComp; ++i) {
529 const register double f_0 = f[INDEX2(i,0,numComp)];
530 const register double f_1 = f[INDEX2(i,1,numComp)];
531 int_local[i]+=(f_0+f_1)*w_1;
532 } /* end of component loop i */
533 } /* end of k1 loop */
534 }
535
536 if (m_faceOffset[1] > -1) {
537 #pragma omp for nowait
538 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
539 const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
540 for (index_t i=0; i < numComp; ++i) {
541 const register double f_0 = f[INDEX2(i,0,numComp)];
542 const register double f_1 = f[INDEX2(i,1,numComp)];
543 int_local[i]+=(f_0+f_1)*w_1;
544 } /* end of component loop i */
545 } /* end of k1 loop */
546 }
547
548 if (m_faceOffset[2] > -1) {
549 #pragma omp for nowait
550 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
551 const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
552 for (index_t i=0; i < numComp; ++i) {
553 const register double f_0 = f[INDEX2(i,0,numComp)];
554 const register double f_1 = f[INDEX2(i,1,numComp)];
555 int_local[i]+=(f_0+f_1)*w_0;
556 } /* end of component loop i */
557 } /* end of k0 loop */
558 }
559
560 if (m_faceOffset[3] > -1) {
561 #pragma omp for nowait
562 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
563 const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
564 for (index_t i=0; i < numComp; ++i) {
565 const register double f_0 = f[INDEX2(i,0,numComp)];
566 const register double f_1 = f[INDEX2(i,1,numComp)];
567 int_local[i]+=(f_0+f_1)*w_0;
568 } /* end of component loop i */
569 } /* end of k0 loop */
570 }
571
572 #pragma omp critical
573 for (index_t i=0; i<numComp; i++)
574 integrals[i]+=int_local[i];
575 } // end of parallel section
576 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
577 #pragma omp parallel
578 {
579 vector<double> int_local(numComp, 0);
580 if (m_faceOffset[0] > -1) {
581 #pragma omp for nowait
582 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
583 const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
584 for (index_t i=0; i < numComp; ++i) {
585 int_local[i]+=f[i]*h1;
586 } /* end of component loop i */
587 } /* end of k1 loop */
588 }
589
590 if (m_faceOffset[1] > -1) {
591 #pragma omp for nowait
592 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
593 const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
594 for (index_t i=0; i < numComp; ++i) {
595 int_local[i]+=f[i]*h1;
596 } /* end of component loop i */
597 } /* end of k1 loop */
598 }
599
600 if (m_faceOffset[2] > -1) {
601 #pragma omp for nowait
602 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
603 const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
604 for (index_t i=0; i < numComp; ++i) {
605 int_local[i]+=f[i]*h0;
606 } /* end of component loop i */
607 } /* end of k0 loop */
608 }
609
610 if (m_faceOffset[3] > -1) {
611 #pragma omp for nowait
612 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
613 const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
614 for (index_t i=0; i < numComp; ++i) {
615 int_local[i]+=f[i]*h0;
616 } /* end of component loop i */
617 } /* end of k0 loop */
618 }
619
620 #pragma omp critical
621 for (index_t i=0; i<numComp; i++)
622 integrals[i]+=int_local[i];
623 } // end of parallel section
624 } else {
625 stringstream msg;
626 msg << "setToIntegrals() not implemented for "
627 << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
628 throw RipleyException(msg.str());
629 }
630 }
631
632 void Rectangle::setToNormal(escript::Data& out) const
633 {
634 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
635 #pragma omp parallel
636 {
637 if (m_faceOffset[0] > -1) {
638 #pragma omp for nowait
639 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
640 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
641 // set vector at two quadrature points
642 *o++ = -1.;
643 *o++ = 0.;
644 *o++ = -1.;
645 *o = 0.;
646 }
647 }
648
649 if (m_faceOffset[1] > -1) {
650 #pragma omp for nowait
651 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
652 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
653 // set vector at two quadrature points
654 *o++ = 1.;
655 *o++ = 0.;
656 *o++ = 1.;
657 *o = 0.;
658 }
659 }
660
661 if (m_faceOffset[2] > -1) {
662 #pragma omp for nowait
663 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
664 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
665 // set vector at two quadrature points
666 *o++ = 0.;
667 *o++ = -1.;
668 *o++ = 0.;
669 *o = -1.;
670 }
671 }
672
673 if (m_faceOffset[3] > -1) {
674 #pragma omp for nowait
675 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
676 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
677 // set vector at two quadrature points
678 *o++ = 0.;
679 *o++ = 1.;
680 *o++ = 0.;
681 *o = 1.;
682 }
683 }
684 } // end of parallel section
685 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
686 #pragma omp parallel
687 {
688 if (m_faceOffset[0] > -1) {
689 #pragma omp for nowait
690 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
691 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
692 *o++ = -1.;
693 *o = 0.;
694 }
695 }
696
697 if (m_faceOffset[1] > -1) {
698 #pragma omp for nowait
699 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
700 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
701 *o++ = 1.;
702 *o = 0.;
703 }
704 }
705
706 if (m_faceOffset[2] > -1) {
707 #pragma omp for nowait
708 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
709 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
710 *o++ = 0.;
711 *o = -1.;
712 }
713 }
714
715 if (m_faceOffset[3] > -1) {
716 #pragma omp for nowait
717 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
718 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
719 *o++ = 0.;
720 *o = 1.;
721 }
722 }
723 } // end of parallel section
724
725 } else {
726 stringstream msg;
727 msg << "setToNormal() not implemented for "
728 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
729 throw RipleyException(msg.str());
730 }
731 }
732
733 void Rectangle::addPDEToSystem(escript::AbstractSystemMatrix& mat,
734 escript::Data& rhs, const escript::Data& A, const escript::Data& B,
735 const escript::Data& C, const escript::Data& D,
736 const escript::Data& X, const escript::Data& Y,
737 const escript::Data& d, const escript::Data& y,
738 const escript::Data& d_contact, const escript::Data& y_contact,
739 const escript::Data& d_dirac, const escript::Data& y_dirac) const
740 {
741 throw RipleyException("addPDEToSystem() not implemented");
742 }
743
744 Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder,
745 bool reducedColOrder) const
746 {
747 if (reducedRowOrder || reducedColOrder)
748 throw RipleyException("getPattern() not implemented for reduced order");
749
750 // connector
751 RankVector neighbour;
752 IndexVector offsetInShared(1,0);
753 IndexVector sendShared, recvShared;
754 const IndexVector faces=getNumFacesPerBoundary();
755 const index_t left = (m_offset0==0 ? 0 : 1);
756 const index_t bottom = (m_offset1==0 ? 0 : 1);
757 // corner node from bottom-left
758 if (faces[0] == 0 && faces[2] == 0) {
759 neighbour.push_back(m_mpiInfo->rank-m_NX-1);
760 offsetInShared.push_back(offsetInShared.back()+1);
761 sendShared.push_back(m_nodeId[m_N0+left]);
762 recvShared.push_back(m_nodeId[0]);
763 }
764 // bottom edge
765 if (faces[2] == 0) {
766 neighbour.push_back(m_mpiInfo->rank-m_NX);
767 offsetInShared.push_back(offsetInShared.back()+m_N0-left);
768 for (dim_t i=left; i<m_N0; i++) {
769 // easy case, we know the neighbour id's
770 sendShared.push_back(m_nodeId[m_N0+i]);
771 recvShared.push_back(m_nodeId[i]);
772 }
773 }
774 // corner node from bottom-right
775 if (faces[1] == 0 && faces[2] == 0) {
776 neighbour.push_back(m_mpiInfo->rank-m_NX+1);
777 const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
778 const index_t N1=(neighbour.back()/m_NX == 0 ? m_N1 : m_N1-1);
779 const index_t first=m_nodeDistribution[neighbour.back()];
780 offsetInShared.push_back(offsetInShared.back()+1);
781 sendShared.push_back(m_nodeId[(bottom+1)*m_N0-1]);
782 recvShared.push_back(first+N0*(N1-1));
783 }
784 // left edge
785 if (faces[0] == 0) {
786 neighbour.push_back(m_mpiInfo->rank-1);
787 offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
788 for (dim_t i=bottom; i<m_N1; i++) {
789 // easy case, we know the neighbour id's
790 sendShared.push_back(m_nodeId[i*m_N0+1]);
791 recvShared.push_back(m_nodeId[i*m_N0]);
792 }
793 }
794 // right edge
795 if (faces[1] == 0) {
796 neighbour.push_back(m_mpiInfo->rank+1);
797 const index_t rightN0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
798 const index_t first=m_nodeDistribution[neighbour.back()];
799 offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
800 for (dim_t i=bottom; i<m_N1; i++) {
801 sendShared.push_back(m_nodeId[(i+1)*m_N0-1]);
802 recvShared.push_back(first+rightN0*(i-bottom));
803 }
804 }
805 // corner node from top-left
806 if (faces[0] == 0 && faces[3] == 0) {
807 neighbour.push_back(m_mpiInfo->rank+m_NX-1);
808 const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
809 const index_t first=m_nodeDistribution[neighbour.back()];
810 offsetInShared.push_back(offsetInShared.back()+1);
811 sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+left]);
812 recvShared.push_back(first+N0-1);
813 }
814 // top edge
815 if (faces[3] == 0) {
816 neighbour.push_back(m_mpiInfo->rank+m_NX);
817 const index_t first=m_nodeDistribution[neighbour.back()];
818 offsetInShared.push_back(offsetInShared.back()+m_N0-left);
819 for (dim_t i=left; i<m_N0; i++) {
820 sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+i]);
821 recvShared.push_back(first+i-left);
822 }
823 }
824 // corner node from top-right
825 if (faces[1] == 0 && faces[3] == 0) {
826 neighbour.push_back(m_mpiInfo->rank+m_NX+1);
827 const index_t first=m_nodeDistribution[neighbour.back()];
828 offsetInShared.push_back(offsetInShared.back()+1);
829 sendShared.push_back(m_nodeId[m_N0*m_N1-1]);
830 recvShared.push_back(first);
831 }
832 const int numDOF=m_nodeDistribution[m_mpiInfo->rank+1]-m_nodeDistribution[m_mpiInfo->rank];
833 /*
834 cout << "--- rcv_shcomp ---" << endl;
835 cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
836 for (size_t i=0; i<neighbour.size(); i++) {
837 cout << "neighbor[" << i << "]=" << neighbour[i]
838 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
839 }
840 for (size_t i=0; i<recvShared.size(); i++) {
841 cout << "shared[" << i << "]=" << recvShared[i] << endl;
842 }
843 cout << "--- snd_shcomp ---" << endl;
844 for (size_t i=0; i<sendShared.size(); i++) {
845 cout << "shared[" << i << "]=" << sendShared[i] << endl;
846 }
847 */
848
849 Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
850 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
851 &offsetInShared[0], 1, 0, m_mpiInfo);
852 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
853 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
854 &offsetInShared[0], 1, 0, m_mpiInfo);
855 Paso_Connector* connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
856 Paso_SharedComponents_free(snd_shcomp);
857 Paso_SharedComponents_free(rcv_shcomp);
858
859 // create patterns
860 dim_t M, N;
861 IndexVector ptr(1,0);
862 IndexVector index;
863
864 // main pattern
865 for (index_t i=0; i<numDOF; i++) {
866 // always add the node itself
867 index.push_back(i);
868 int num=insertNeighbours(index, i);
869 ptr.push_back(ptr.back()+num+1);
870 }
871 M=N=ptr.size()-1;
872 // paso will manage the memory
873 index_t* indexC = MEMALLOC(index.size(),index_t);
874 index_t* ptrC = MEMALLOC(ptr.size(), index_t);
875 copy(index.begin(), index.end(), indexC);
876 copy(ptr.begin(), ptr.end(), ptrC);
877 Paso_Pattern *mainPattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
878 M, N, ptrC, indexC);
879
880 /*
881 cout << "--- main_pattern ---" << endl;
882 cout << "M=" << M << ", N=" << N << endl;
883 for (size_t i=0; i<ptr.size(); i++) {
884 cout << "ptr[" << i << "]=" << ptr[i] << endl;
885 }
886 for (size_t i=0; i<index.size(); i++) {
887 cout << "index[" << i << "]=" << index[i] << endl;
888 }
889 */
890
891 ptr.clear();
892 index.clear();
893
894 // column & row couple patterns
895 Paso_Pattern *colCouplePattern, *rowCouplePattern;
896 generateCouplePatterns(&colCouplePattern, &rowCouplePattern);
897
898 // allocate paso distribution
899 Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
900 const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
901
902 Paso_SystemMatrixPattern* pattern = Paso_SystemMatrixPattern_alloc(
903 MATRIX_FORMAT_DEFAULT, distribution, distribution,
904 mainPattern, colCouplePattern, rowCouplePattern,
905 connector, connector);
906 Paso_Pattern_free(mainPattern);
907 Paso_Pattern_free(colCouplePattern);
908 Paso_Pattern_free(rowCouplePattern);
909 Paso_Distribution_free(distribution);
910 return pattern;
911 }
912
913 void Rectangle::Print_Mesh_Info(const bool full) const
914 {
915 RipleyDomain::Print_Mesh_Info(full);
916 if (full) {
917 cout << " Id Coordinates" << endl;
918 cout.precision(15);
919 cout.setf(ios::scientific, ios::floatfield);
920 pair<double,double> xdx = getFirstCoordAndSpacing(0);
921 pair<double,double> ydy = getFirstCoordAndSpacing(1);
922 for (index_t i=0; i < getNumNodes(); i++) {
923 cout << " " << setw(5) << m_nodeId[i]
924 << " " << xdx.first+(i%m_N0)*xdx.second
925 << " " << ydy.first+(i/m_N0)*ydy.second << endl;
926 }
927 }
928 }
929
930 IndexVector Rectangle::getNumNodesPerDim() const
931 {
932 IndexVector ret;
933 ret.push_back(m_N0);
934 ret.push_back(m_N1);
935 return ret;
936 }
937
938 IndexVector Rectangle::getNumElementsPerDim() const
939 {
940 IndexVector ret;
941 ret.push_back(m_NE0);
942 ret.push_back(m_NE1);
943 return ret;
944 }
945
946 IndexVector Rectangle::getNumFacesPerBoundary() const
947 {
948 IndexVector ret(4, 0);
949 //left
950 if (m_offset0==0)
951 ret[0]=m_NE1;
952 //right
953 if (m_mpiInfo->rank%m_NX==m_NX-1)
954 ret[1]=m_NE1;
955 //bottom
956 if (m_offset1==0)
957 ret[2]=m_NE0;
958 //top
959 if (m_mpiInfo->rank/m_NX==m_NY-1)
960 ret[3]=m_NE0;
961 return ret;
962 }
963
964 pair<double,double> Rectangle::getFirstCoordAndSpacing(dim_t dim) const
965 {
966 if (dim==0) {
967 return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
968 } else if (dim==1) {
969 return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
970 }
971 throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
972 }
973
974 //protected
975 dim_t Rectangle::getNumDOF() const
976 {
977 return m_nodeDistribution[m_mpiInfo->rank+1]
978 -m_nodeDistribution[m_mpiInfo->rank];
979 }
980
981 //protected
982 dim_t Rectangle::getNumFaceElements() const
983 {
984 const IndexVector faces = getNumFacesPerBoundary();
985 dim_t n=0;
986 for (size_t i=0; i<faces.size(); i++)
987 n+=faces[i];
988 return n;
989 }
990
991 //protected
992 void Rectangle::assembleCoordinates(escript::Data& arg) const
993 {
994 escriptDataC x = arg.getDataC();
995 int numDim = m_numDim;
996 if (!isDataPointShapeEqual(&x, 1, &numDim))
997 throw RipleyException("setToX: Invalid Data object shape");
998 if (!numSamplesEqual(&x, 1, getNumNodes()))
999 throw RipleyException("setToX: Illegal number of samples in Data object");
1000
1001 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1002 pair<double,double> ydy = getFirstCoordAndSpacing(1);
1003 arg.requireWrite();
1004 #pragma omp parallel for
1005 for (dim_t i1 = 0; i1 < m_N1; i1++) {
1006 for (dim_t i0 = 0; i0 < m_N0; i0++) {
1007 double* point = arg.getSampleDataRW(i0+m_N0*i1);
1008 point[0] = xdx.first+i0*xdx.second;
1009 point[1] = ydy.first+i1*ydy.second;
1010 }
1011 }
1012 }
1013
1014 //private
1015 void Rectangle::populateSampleIds()
1016 {
1017 // identifiers are ordered from left to right, bottom to top on each rank,
1018 // except for the shared nodes which are owned by the rank below / to the
1019 // left of the current rank
1020
1021 // build node distribution vector first.
1022 // m_nodeDistribution[i] is the first node id on rank i, that is
1023 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1024 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1025 m_nodeDistribution[1]=getNumNodes();
1026 for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
1027 const index_t x=k%m_NX;
1028 const index_t y=k/m_NX;
1029 index_t numNodes=getNumNodes();
1030 if (x>0)
1031 numNodes-=m_N1;
1032 if (y>0)
1033 numNodes-=m_N0;
1034 if (x>0 && y>0)
1035 numNodes++; // subtracted corner twice -> fix that
1036 m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
1037 }
1038 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1039
1040 m_dofId.resize(getNumDOF());
1041 m_nodeId.resize(getNumNodes());
1042
1043 // the bottom row and left column are not owned by this rank so the
1044 // identifiers need to be computed accordingly
1045 const index_t left = (m_offset0==0 ? 0 : 1);
1046 const index_t bottom = (m_offset1==0 ? 0 : 1);
1047 if (left>0) {
1048 const int neighbour=m_mpiInfo->rank-1;
1049 const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1050 #pragma omp parallel for
1051 for (dim_t i1=bottom; i1<m_N1; i1++) {
1052 m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
1053 + (i1-bottom+1)*leftN0-1;
1054 }
1055 }
1056 if (bottom>0) {
1057 const int neighbour=m_mpiInfo->rank-m_NX;
1058 const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1059 const index_t bottomN1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1060 #pragma omp parallel for
1061 for (dim_t i0=left; i0<m_N0; i0++) {
1062 m_nodeId[i0]=m_nodeDistribution[neighbour]
1063 + (bottomN1-1)*bottomN0 + i0 - left;
1064 }
1065 }
1066 if (left>0 && bottom>0) {
1067 const int neighbour=m_mpiInfo->rank-m_NX-1;
1068 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1069 const index_t N1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1070 m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1-1;
1071 }
1072
1073 // the rest of the id's are contiguous
1074 const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
1075 #pragma omp parallel for
1076 for (dim_t i1=bottom; i1<m_N1; i1++) {
1077 for (dim_t i0=left; i0<m_N0; i0++) {
1078 const index_t idx=i0-left+(i1-bottom)*(m_N0-left);
1079 m_nodeId[i0+i1*m_N0] = firstId+idx;
1080 m_dofId[idx] = firstId+idx;
1081 }
1082 }
1083 m_nodeTags.assign(getNumNodes(), 0);
1084 updateTagsInUse(Nodes);
1085
1086 // elements
1087 m_elementId.resize(getNumElements());
1088 #pragma omp parallel for
1089 for (dim_t k=0; k<getNumElements(); k++) {
1090 m_elementId[k]=k;
1091 }
1092 m_elementTags.assign(getNumElements(), 0);
1093 updateTagsInUse(Elements);
1094
1095 // face element id's
1096 m_faceId.resize(getNumFaceElements());
1097 #pragma omp parallel for
1098 for (dim_t k=0; k<getNumFaceElements(); k++) {
1099 m_faceId[k]=k;
1100 }
1101
1102 // generate face offset vector and set face tags
1103 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1104 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
1105 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
1106 m_faceOffset.assign(facesPerEdge.size(), -1);
1107 m_faceTags.clear();
1108 index_t offset=0;
1109 for (size_t i=0; i<facesPerEdge.size(); i++) {
1110 if (facesPerEdge[i]>0) {
1111 m_faceOffset[i]=offset;
1112 offset+=facesPerEdge[i];
1113 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1114 }
1115 }
1116 setTagMap("left", LEFT);
1117 setTagMap("right", RIGHT);
1118 setTagMap("bottom", BOTTOM);
1119 setTagMap("top", TOP);
1120 updateTagsInUse(FaceElements);
1121 }
1122
1123 //private
1124 int Rectangle::insertNeighbours(IndexVector& index, index_t node) const
1125 {
1126 const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1127 const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1128 const int x=node%myN0;
1129 const int y=node/myN0;
1130 int num=0;
1131 if (y>0) {
1132 if (x>0) {
1133 // bottom-left
1134 index.push_back(node-myN0-1);
1135 num++;
1136 }
1137 // bottom
1138 index.push_back(node-myN0);
1139 num++;
1140 if (x<myN0-1) {
1141 // bottom-right
1142 index.push_back(node-myN0+1);
1143 num++;
1144 }
1145 }
1146 if (x<myN0-1) {
1147 // right
1148 index.push_back(node+1);
1149 num++;
1150 if (y<myN1-1) {
1151 // top-right
1152 index.push_back(node+myN0+1);
1153 num++;
1154 }
1155 }
1156 if (y<myN1-1) {
1157 // top
1158 index.push_back(node+myN0);
1159 num++;
1160 if (x>0) {
1161 // top-left
1162 index.push_back(node+myN0-1);
1163 num++;
1164 }
1165 }
1166 if (x>0) {
1167 // left
1168 index.push_back(node-1);
1169 num++;
1170 }
1171
1172 return num;
1173 }
1174
1175 //private
1176 void Rectangle::generateCouplePatterns(Paso_Pattern** colPattern, Paso_Pattern** rowPattern) const
1177 {
1178 IndexVector ptr(1,0);
1179 IndexVector index;
1180 const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1181 const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1182 const IndexVector faces=getNumFacesPerBoundary();
1183
1184 // bottom edge
1185 dim_t n=0;
1186 if (faces[0] == 0) {
1187 index.push_back(2*(myN0+myN1+1));
1188 index.push_back(2*(myN0+myN1+1)+1);
1189 n+=2;
1190 if (faces[2] == 0) {
1191 index.push_back(0);
1192 index.push_back(1);
1193 index.push_back(2);
1194 n+=3;
1195 }
1196 } else if (faces[2] == 0) {
1197 index.push_back(1);
1198 index.push_back(2);
1199 n+=2;
1200 }
1201 // n=neighbours of bottom-left corner node
1202 ptr.push_back(ptr.back()+n);
1203 n=0;
1204 if (faces[2] == 0) {
1205 for (dim_t i=1; i<myN0-1; i++) {
1206 index.push_back(i);
1207 index.push_back(i+1);
1208 index.push_back(i+2);
1209 ptr.push_back(ptr.back()+3);
1210 }
1211 index.push_back(myN0-1);
1212 index.push_back(myN0);
1213 n+=2;
1214 if (faces[1] == 0) {
1215 index.push_back(myN0+1);
1216 index.push_back(myN0+2);
1217 index.push_back(myN0+3);
1218 n+=3;
1219 }
1220 } else {
1221 for (dim_t i=1; i<myN0-1; i++) {
1222 ptr.push_back(ptr.back());
1223 }
1224 if (faces[1] == 0) {
1225 index.push_back(myN0+2);
1226 index.push_back(myN0+3);
1227 n+=2;
1228 }
1229 }
1230 // n=neighbours of bottom-right corner node
1231 ptr.push_back(ptr.back()+n);
1232
1233 // 2nd row to 2nd last row
1234 for (dim_t i=1; i<myN1-1; i++) {
1235 // left edge
1236 if (faces[0] == 0) {
1237 index.push_back(2*(myN0+myN1+2)-i);
1238 index.push_back(2*(myN0+myN1+2)-i-1);
1239 index.push_back(2*(myN0+myN1+2)-i-2);
1240 ptr.push_back(ptr.back()+3);
1241 } else {
1242 ptr.push_back(ptr.back());
1243 }
1244 for (dim_t j=1; j<myN0-1; j++) {
1245 ptr.push_back(ptr.back());
1246 }
1247 // right edge
1248 if (faces[1] == 0) {
1249 index.push_back(myN0+i+1);
1250 index.push_back(myN0+i+2);
1251 index.push_back(myN0+i+3);
1252 ptr.push_back(ptr.back()+3);
1253 } else {
1254 ptr.push_back(ptr.back());
1255 }
1256 }
1257
1258 // top edge
1259 n=0;
1260 if (faces[0] == 0) {
1261 index.push_back(2*myN0+myN1+5);
1262 index.push_back(2*myN0+myN1+4);
1263 n+=2;
1264 if (faces[3] == 0) {
1265 index.push_back(2*myN0+myN1+3);
1266 index.push_back(2*myN0+myN1+2);
1267 index.push_back(2*myN0+myN1+1);
1268 n+=3;
1269 }
1270 } else if (faces[3] == 0) {
1271 index.push_back(2*myN0+myN1+2);
1272 index.push_back(2*myN0+myN1+1);
1273 n+=2;
1274 }
1275 // n=neighbours of top-left corner node
1276 ptr.push_back(ptr.back()+n);
1277 n=0;
1278 if (faces[3] == 0) {
1279 for (dim_t i=1; i<myN0-1; i++) {
1280 index.push_back(2*myN0+myN1+i+1);
1281 index.push_back(2*myN0+myN1+i);
1282 index.push_back(2*myN0+myN1+i-1);
1283 ptr.push_back(ptr.back()+3);
1284 }
1285 index.push_back(myN0+myN1+4);
1286 index.push_back(myN0+myN1+3);
1287 n+=2;
1288 if (faces[1] == 0) {
1289 index.push_back(myN0+myN1+2);
1290 index.push_back(myN0+myN1+1);
1291 index.push_back(myN0+myN1);
1292 n+=3;
1293 }
1294 } else {
1295 for (dim_t i=1; i<myN0-1; i++) {
1296 ptr.push_back(ptr.back());
1297 }
1298 if (faces[1] == 0) {
1299 index.push_back(myN0+myN1+1);
1300 index.push_back(myN0+myN1);
1301 n+=2;
1302 }
1303 }
1304 // n=neighbours of top-right corner node
1305 ptr.push_back(ptr.back()+n);
1306
1307 dim_t M=ptr.size()-1;
1308 map<index_t,index_t> idMap;
1309 dim_t N=0;
1310 for (IndexVector::iterator it=index.begin(); it!=index.end(); it++) {
1311 if (idMap.find(*it)==idMap.end()) {
1312 idMap[*it]=N;
1313 *it=N++;
1314 } else {
1315 *it=idMap[*it];
1316 }
1317 }
1318
1319 /*
1320 cout << "--- colCouple_pattern ---" << endl;
1321 cout << "M=" << M << ", N=" << N << endl;
1322 for (size_t i=0; i<ptr.size(); i++) {
1323 cout << "ptr[" << i << "]=" << ptr[i] << endl;
1324 }
1325 for (size_t i=0; i<index.size(); i++) {
1326 cout << "index[" << i << "]=" << index[i] << endl;
1327 }
1328 */
1329
1330 // now build the row couple pattern
1331 IndexVector ptr2(1,0);
1332 IndexVector index2;
1333 for (dim_t id=0; id<N; id++) {
1334 n=0;
1335 for (dim_t i=0; i<M; i++) {
1336 for (dim_t j=ptr[i]; j<ptr[i+1]; j++) {
1337 if (index[j]==id) {
1338 index2.push_back(i);
1339 n++;
1340 break;
1341 }
1342 }
1343 }
1344 ptr2.push_back(ptr2.back()+n);
1345 }
1346
1347 /*
1348 cout << "--- rowCouple_pattern ---" << endl;
1349 cout << "M=" << N << ", N=" << M << endl;
1350 for (size_t i=0; i<ptr2.size(); i++) {
1351 cout << "ptr[" << i << "]=" << ptr2[i] << endl;
1352 }
1353 for (size_t i=0; i<index2.size(); i++) {
1354 cout << "index[" << i << "]=" << index2[i] << endl;
1355 }
1356 */
1357
1358 // paso will manage the memory
1359 index_t* indexC = MEMALLOC(index.size(), index_t);
1360 index_t* ptrC = MEMALLOC(ptr.size(), index_t);
1361 copy(index.begin(), index.end(), indexC);
1362 copy(ptr.begin(), ptr.end(), ptrC);
1363 *colPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, M, N, ptrC, indexC);
1364
1365 // paso will manage the memory
1366 indexC = MEMALLOC(index2.size(), index_t);
1367 ptrC = MEMALLOC(ptr2.size(), index_t);
1368 copy(index2.begin(), index2.end(), indexC);
1369 copy(ptr2.begin(), ptr2.end(), ptrC);
1370 *rowPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, N, M, ptrC, indexC);
1371 }
1372
1373 //protected
1374 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1375 escript::Data& in, bool reduced) const
1376 {
1377 const dim_t numComp = in.getDataPointSize();
1378 if (reduced) {
1379 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1380 const double c0 = .25;
1381 #pragma omp parallel for
1382 for (index_t k1=0; k1 < m_NE1; ++k1) {
1383 for (index_t k0=0; k0 < m_NE0; ++k0) {
1384 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1385 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1386 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1387 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1388 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1389 for (index_t i=0; i < numComp; ++i) {
1390 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
1391 } /* end of component loop i */
1392 } /* end of k0 loop */
1393 } /* end of k1 loop */
1394 /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1395 } else {
1396 /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1397 const double c0 = .16666666666666666667;
1398 const double c1 = .044658198738520451079;
1399 const double c2 = .62200846792814621559;
1400 #pragma omp parallel for
1401 for (index_t k1=0; k1 < m_NE1; ++k1) {
1402 for (index_t k0=0; k0 < m_NE0; ++k0) {
1403 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1404 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1405 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1406 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1407 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1408 for (index_t i=0; i < numComp; ++i) {
1409 o[INDEX2(i,numComp,0)] = f_00[i]*c2 + f_11[i]*c1 + c0*(f_01[i] + f_10[i]);
1410 o[INDEX2(i,numComp,1)] = f_01[i]*c1 + f_10[i]*c2 + c0*(f_00[i] + f_11[i]);
1411 o[INDEX2(i,numComp,2)] = f_01[i]*c2 + f_10[i]*c1 + c0*(f_00[i] + f_11[i]);
1412 o[INDEX2(i,numComp,3)] = f_00[i]*c1 + f_11[i]*c2 + c0*(f_01[i] + f_10[i]);
1413 } /* end of component loop i */
1414 } /* end of k0 loop */
1415 } /* end of k1 loop */
1416 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1417 }
1418 }
1419
1420 //protected
1421 void Rectangle::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1422 bool reduced) const
1423 {
1424 const dim_t numComp = in.getDataPointSize();
1425 if (reduced) {
1426 const double c0 = .5;
1427 #pragma omp parallel
1428 {
1429 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1430 if (m_faceOffset[0] > -1) {
1431 #pragma omp for nowait
1432 for (index_t k1=0; k1 < m_NE1; ++k1) {
1433 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1434 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1435 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1436 for (index_t i=0; i < numComp; ++i) {
1437 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i]);
1438 } /* end of component loop i */
1439 } /* end of k1 loop */
1440 } /* end of face 0 */
1441 if (m_faceOffset[1] > -1) {
1442 #pragma omp for nowait
1443 for (index_t k1=0; k1 < m_NE1; ++k1) {
1444 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1445 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1446 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1447 for (index_t i=0; i < numComp; ++i) {
1448 o[INDEX2(i,numComp,0)] = c0*(f_10[i] + f_11[i]);
1449 } /* end of component loop i */
1450 } /* end of k1 loop */
1451 } /* end of face 1 */
1452 if (m_faceOffset[2] > -1) {
1453 #pragma omp for nowait
1454 for (index_t k0=0; k0 < m_NE0; ++k0) {
1455 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1456 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1457 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1458 for (index_t i=0; i < numComp; ++i) {
1459 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_10[i]);
1460 } /* end of component loop i */
1461 } /* end of k0 loop */
1462 } /* end of face 2 */
1463 if (m_faceOffset[3] > -1) {
1464 #pragma omp for nowait
1465 for (index_t k0=0; k0 < m_NE0; ++k0) {
1466 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1467 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1468 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1469 for (index_t i=0; i < numComp; ++i) {
1470 o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_11[i]);
1471 } /* end of component loop i */
1472 } /* end of k0 loop */
1473 } /* end of face 3 */
1474 /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */
1475 } // end of parallel section
1476 } else {
1477 const double c0 = 0.21132486540518711775;
1478 const double c1 = 0.78867513459481288225;
1479 #pragma omp parallel
1480 {
1481 /*** GENERATOR SNIP_INTERPOLATE_FACES TOP */
1482 if (m_faceOffset[0] > -1) {
1483 #pragma omp for nowait
1484 for (index_t k1=0; k1 < m_NE1; ++k1) {
1485 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1486 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1487 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1488 for (index_t i=0; i < numComp; ++i) {
1489 o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_01[i]*c0;
1490 o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_01[i]*c1;
1491 } /* end of component loop i */
1492 } /* end of k1 loop */
1493 } /* end of face 0 */
1494 if (m_faceOffset[1] > -1) {
1495 #pragma omp for nowait
1496 for (index_t k1=0; k1 < m_NE1; ++k1) {
1497 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1498 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1499 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1500 for (index_t i=0; i < numComp; ++i) {
1501 o[INDEX2(i,numComp,0)] = f_10[i]*c1 + f_11[i]*c0;
1502 o[INDEX2(i,numComp,1)] = f_10[i]*c0 + f_11[i]*c1;
1503 } /* end of component loop i */
1504 } /* end of k1 loop */
1505 } /* end of face 1 */
1506 if (m_faceOffset[2] > -1) {
1507 #pragma omp for nowait
1508 for (index_t k0=0; k0 < m_NE0; ++k0) {
1509 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1510 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1511 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1512 for (index_t i=0; i < numComp; ++i) {
1513 o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_10[i]*c0;
1514 o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_10[i]*c1;
1515 } /* end of component loop i */
1516 } /* end of k0 loop */
1517 } /* end of face 2 */
1518 if (m_faceOffset[3] > -1) {
1519 #pragma omp for nowait
1520 for (index_t k0=0; k0 < m_NE0; ++k0) {
1521 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1522 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1523 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1524 for (index_t i=0; i < numComp; ++i) {
1525 o[INDEX2(i,numComp,0)] = f_01[i]*c1 + f_11[i]*c0;
1526 o[INDEX2(i,numComp,1)] = f_01[i]*c0 + f_11[i]*c1;
1527 } /* end of component loop i */
1528 } /* end of k0 loop */
1529 } /* end of face 3 */
1530 /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */
1531 } // end of parallel section
1532 }
1533 }
1534
1535 } // end of namespace ripley
1536

  ViewVC Help
Powered by ViewVC 1.1.26