/[escript]/branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp
ViewVC logotype

Contents of /branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3724 - (show annotations)
Wed Dec 7 07:35:21 2011 UTC (8 years ago) by caltinay
File size: 59905 byte(s)
Edits to generated code in Rectangle (renamed constants & removed duplicates,
more nowait optimizations)

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2011 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 #include <ripley/Rectangle.h>
15 extern "C" {
16 #include "paso/SystemMatrixPattern.h"
17 }
18
19 #if USE_SILO
20 #include <silo.h>
21 #ifdef ESYS_MPI
22 #include <pmpio.h>
23 #endif
24 #endif
25
26 #include <iomanip>
27
28 using namespace std;
29
30 namespace ripley {
31
32 Rectangle::Rectangle(int n0, int n1, double l0, double l1, int d0, int d1) :
33 RipleyDomain(2),
34 m_gNE0(n0),
35 m_gNE1(n1),
36 m_l0(l0),
37 m_l1(l1),
38 m_NX(d0),
39 m_NY(d1)
40 {
41 // ensure number of subdivisions is valid and nodes can be distributed
42 // among number of ranks
43 if (m_NX*m_NY != m_mpiInfo->size)
44 throw RipleyException("Invalid number of spatial subdivisions");
45
46 if (n0%m_NX > 0 || n1%m_NY > 0)
47 throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
48
49 // local number of elements
50 m_NE0 = n0/m_NX;
51 m_NE1 = n1/m_NY;
52 // local number of nodes (not necessarily owned)
53 m_N0 = m_NE0+1;
54 m_N1 = m_NE1+1;
55 // bottom-left node is at (offset0,offset1) in global mesh
56 m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
57 m_offset1 = m_NE1*(m_mpiInfo->rank/m_NX);
58 populateSampleIds();
59 }
60
61 Rectangle::~Rectangle()
62 {
63 }
64
65 string Rectangle::getDescription() const
66 {
67 return "ripley::Rectangle";
68 }
69
70 bool Rectangle::operator==(const AbstractDomain& other) const
71 {
72 if (dynamic_cast<const Rectangle*>(&other))
73 return this==&other;
74
75 return false;
76 }
77
78 void Rectangle::dump(const string& fileName) const
79 {
80 #if USE_SILO
81 string fn(fileName);
82 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
83 fn+=".silo";
84 }
85
86 const int NUM_SILO_FILES = 1;
87 const char* blockDirFmt = "/block%04d";
88 int driver=DB_HDF5;
89 string siloPath;
90 DBfile* dbfile = NULL;
91
92 #ifdef ESYS_MPI
93 PMPIO_baton_t* baton = NULL;
94 #endif
95
96 if (m_mpiInfo->size > 1) {
97 #ifdef ESYS_MPI
98 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
99 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
100 PMPIO_DefaultClose, (void*)&driver);
101 // try the fallback driver in case of error
102 if (!baton && driver != DB_PDB) {
103 driver = DB_PDB;
104 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
105 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
106 PMPIO_DefaultClose, (void*)&driver);
107 }
108 if (baton) {
109 char str[64];
110 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
111 siloPath = str;
112 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
113 }
114 #endif
115 } else {
116 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
117 getDescription().c_str(), driver);
118 // try the fallback driver in case of error
119 if (!dbfile && driver != DB_PDB) {
120 driver = DB_PDB;
121 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
122 getDescription().c_str(), driver);
123 }
124 }
125
126 if (!dbfile)
127 throw RipleyException("dump: Could not create Silo file");
128
129 /*
130 if (driver==DB_HDF5) {
131 // gzip level 1 already provides good compression with minimal
132 // performance penalty. Some tests showed that gzip levels >3 performed
133 // rather badly on escript data both in terms of time and space
134 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
135 }
136 */
137
138 boost::scoped_ptr<double> x(new double[m_N0]);
139 boost::scoped_ptr<double> y(new double[m_N1]);
140 double* coords[2] = { x.get(), y.get() };
141 pair<double,double> xdx = getFirstCoordAndSpacing(0);
142 pair<double,double> ydy = getFirstCoordAndSpacing(1);
143 #pragma omp parallel
144 {
145 #pragma omp for nowait
146 for (dim_t i0 = 0; i0 < m_N0; i0++) {
147 coords[0][i0]=xdx.first+i0*xdx.second;
148 }
149 #pragma omp for nowait
150 for (dim_t i1 = 0; i1 < m_N1; i1++) {
151 coords[1][i1]=ydy.first+i1*ydy.second;
152 }
153 }
154 IndexVector dims = getNumNodesPerDim();
155
156 // write mesh
157 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE,
158 DB_COLLINEAR, NULL);
159
160 // write node ids
161 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2,
162 NULL, 0, DB_INT, DB_NODECENT, NULL);
163
164 // write element ids
165 dims = getNumElementsPerDim();
166 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
167 &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
168
169 // rank 0 writes multimesh and multivar
170 if (m_mpiInfo->rank == 0) {
171 vector<string> tempstrings;
172 vector<char*> names;
173 for (dim_t i=0; i<m_mpiInfo->size; i++) {
174 stringstream path;
175 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
176 tempstrings.push_back(path.str());
177 names.push_back((char*)tempstrings.back().c_str());
178 }
179 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
180 DBSetDir(dbfile, "/");
181 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
182 &types[0], NULL);
183 tempstrings.clear();
184 names.clear();
185 for (dim_t i=0; i<m_mpiInfo->size; i++) {
186 stringstream path;
187 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
188 tempstrings.push_back(path.str());
189 names.push_back((char*)tempstrings.back().c_str());
190 }
191 types.assign(m_mpiInfo->size, DB_QUADVAR);
192 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
193 &types[0], NULL);
194 tempstrings.clear();
195 names.clear();
196 for (dim_t i=0; i<m_mpiInfo->size; i++) {
197 stringstream path;
198 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
199 tempstrings.push_back(path.str());
200 names.push_back((char*)tempstrings.back().c_str());
201 }
202 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
203 &types[0], NULL);
204 }
205
206 if (m_mpiInfo->size > 1) {
207 #ifdef ESYS_MPI
208 PMPIO_HandOffBaton(baton, dbfile);
209 PMPIO_Finish(baton);
210 #endif
211 } else {
212 DBClose(dbfile);
213 }
214
215 #else // USE_SILO
216 throw RipleyException("dump(): no Silo support");
217 #endif
218 }
219
220 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
221 {
222 switch (fsType) {
223 case Nodes:
224 return &m_nodeId[0];
225 case Elements:
226 return &m_elementId[0];
227 case FaceElements:
228 return &m_faceId[0];
229 default:
230 break;
231 }
232
233 stringstream msg;
234 msg << "borrowSampleReferenceIDs() not implemented for function space type "
235 << functionSpaceTypeAsString(fsType);
236 throw RipleyException(msg.str());
237 }
238
239 bool Rectangle::ownSample(int fsCode, index_t id) const
240 {
241 #ifdef ESYS_MPI
242 if (fsCode != ReducedNodes) {
243 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
244 const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
245 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
246 } else {
247 stringstream msg;
248 msg << "ownSample() not implemented for "
249 << functionSpaceTypeAsString(fsCode);
250 throw RipleyException(msg.str());
251 }
252 #else
253 return true;
254 #endif
255 }
256
257 void Rectangle::setToGradient(escript::Data& out, const escript::Data& cIn) const
258 {
259 escript::Data& in = *const_cast<escript::Data*>(&cIn);
260 const dim_t numComp = in.getDataPointSize();
261 const double h0 = m_l0/m_gNE0;
262 const double h1 = m_l1/m_gNE1;
263 const double cx0 = -1./h0;
264 const double cx1 = -.78867513459481288225/h0;
265 const double cx2 = -.5/h0;
266 const double cx3 = -.21132486540518711775/h0;
267 const double cx4 = .21132486540518711775/h0;
268 const double cx5 = .5/h0;
269 const double cx6 = .78867513459481288225/h0;
270 const double cx7 = 1./h0;
271 const double cy0 = -1./h1;
272 const double cy1 = -.78867513459481288225/h1;
273 const double cy2 = -.5/h1;
274 const double cy3 = -.21132486540518711775/h1;
275 const double cy4 = .21132486540518711775/h1;
276 const double cy5 = .5/h1;
277 const double cy6 = .78867513459481288225/h1;
278 const double cy7 = 1./h1;
279
280 if (out.getFunctionSpace().getTypeCode() == Elements) {
281 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
282 #pragma omp parallel for
283 for (index_t k1=0; k1 < m_NE1; ++k1) {
284 for (index_t k0=0; k0 < m_NE0; ++k0) {
285 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
286 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
287 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
288 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
289 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
290 for (index_t i=0; i < numComp; ++i) {
291 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
292 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
293 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
294 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
295 o[INDEX3(i,0,2,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
296 o[INDEX3(i,1,2,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
297 o[INDEX3(i,0,3,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
298 o[INDEX3(i,1,3,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
299 } /* end of component loop i */
300 } /* end of k0 loop */
301 } /* end of k1 loop */
302 /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
303 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
304 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
305 #pragma omp parallel for
306 for (index_t k1=0; k1 < m_NE1; ++k1) {
307 for (index_t k0=0; k0 < m_NE0; ++k0) {
308 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
309 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
310 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
311 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
312 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
313 for (index_t i=0; i < numComp; ++i) {
314 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
315 o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]);
316 } /* end of component loop i */
317 } /* end of k0 loop */
318 } /* end of k1 loop */
319 /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
320 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
321 #pragma omp parallel
322 {
323 /*** GENERATOR SNIP_GRAD_FACES TOP */
324 if (m_faceOffset[0] > -1) {
325 #pragma omp for nowait
326 for (index_t k1=0; k1 < m_NE1; ++k1) {
327 const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
328 const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
329 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
330 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
331 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
332 for (index_t i=0; i < numComp; ++i) {
333 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
334 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
335 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
336 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
337 } /* end of component loop i */
338 } /* end of k1 loop */
339 } /* end of face 0 */
340 if (m_faceOffset[1] > -1) {
341 #pragma omp for nowait
342 for (index_t k1=0; k1 < m_NE1; ++k1) {
343 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
344 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
345 const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
346 const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
347 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
348 for (index_t i=0; i < numComp; ++i) {
349 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
350 o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
351 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
352 o[INDEX3(i,1,1,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
353 } /* end of component loop i */
354 } /* end of k1 loop */
355 } /* end of face 1 */
356 if (m_faceOffset[2] > -1) {
357 #pragma omp for nowait
358 for (index_t k0=0; k0 < m_NE0; ++k0) {
359 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
360 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
361 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
362 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
363 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
364 for (index_t i=0; i < numComp; ++i) {
365 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
366 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
367 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
368 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
369 } /* end of component loop i */
370 } /* end of k0 loop */
371 } /* end of face 2 */
372 if (m_faceOffset[3] > -1) {
373 #pragma omp for nowait
374 for (index_t k0=0; k0 < m_NE0; ++k0) {
375 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
376 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
377 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
378 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
379 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
380 for (index_t i=0; i < numComp; ++i) {
381 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
382 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
383 o[INDEX3(i,0,1,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
384 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
385 } /* end of component loop i */
386 } /* end of k0 loop */
387 } /* end of face 3 */
388 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
389 } // end of parallel section
390 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
391 #pragma omp parallel
392 {
393 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
394 if (m_faceOffset[0] > -1) {
395 #pragma omp for nowait
396 for (index_t k1=0; k1 < m_NE1; ++k1) {
397 const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
398 const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
399 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
400 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
401 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
402 for (index_t i=0; i < numComp; ++i) {
403 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
404 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
405 } /* end of component loop i */
406 } /* end of k1 loop */
407 } /* end of face 0 */
408 if (m_faceOffset[1] > -1) {
409 #pragma omp for nowait
410 for (index_t k1=0; k1 < m_NE1; ++k1) {
411 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
412 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
413 const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
414 const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
415 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
416 for (index_t i=0; i < numComp; ++i) {
417 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
418 o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
419 } /* end of component loop i */
420 } /* end of k1 loop */
421 } /* end of face 1 */
422 if (m_faceOffset[2] > -1) {
423 #pragma omp for nowait
424 for (index_t k0=0; k0 < m_NE0; ++k0) {
425 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
426 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
427 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
428 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
429 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
430 for (index_t i=0; i < numComp; ++i) {
431 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
432 o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]);
433 } /* end of component loop i */
434 } /* end of k0 loop */
435 } /* end of face 2 */
436 if (m_faceOffset[3] > -1) {
437 #pragma omp for nowait
438 for (index_t k0=0; k0 < m_NE0; ++k0) {
439 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
440 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
441 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
442 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
443 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
444 for (index_t i=0; i < numComp; ++i) {
445 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
446 o[INDEX3(i,1,0,numComp,2)] = cy5*(f_01[i] + f_11[i]) + cy2*(f_00[i] + f_10[i]);
447 } /* end of component loop i */
448 } /* end of k0 loop */
449 } /* end of face 3 */
450 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
451 } // end of parallel section
452 } else {
453 stringstream msg;
454 msg << "setToGradient() not implemented for "
455 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
456 throw RipleyException(msg.str());
457 }
458 }
459
460 void Rectangle::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
461 {
462 escript::Data& in = *const_cast<escript::Data*>(&arg);
463 const dim_t numComp = in.getDataPointSize();
464 const double h0 = m_l0/m_gNE0;
465 const double h1 = m_l1/m_gNE1;
466 if (arg.getFunctionSpace().getTypeCode() == Elements) {
467 const double w_0 = h0*h1/4.;
468 #pragma omp parallel
469 {
470 vector<double> int_local(numComp, 0);
471 #pragma omp for nowait
472 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
473 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
474 const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
475 for (index_t i=0; i < numComp; ++i) {
476 const register double f_0 = f[INDEX2(i,0,numComp)];
477 const register double f_1 = f[INDEX2(i,1,numComp)];
478 const register double f_2 = f[INDEX2(i,2,numComp)];
479 const register double f_3 = f[INDEX2(i,3,numComp)];
480 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
481 } /* end of component loop i */
482 } /* end of k0 loop */
483 } /* end of k1 loop */
484
485 #pragma omp critical
486 for (index_t i=0; i<numComp; i++)
487 integrals[i]+=int_local[i];
488 } // end of parallel section
489 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
490 const double w_0 = h0*h1;
491 #pragma omp parallel
492 {
493 vector<double> int_local(numComp, 0);
494 #pragma omp for nowait
495 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
496 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
497 const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
498 for (index_t i=0; i < numComp; ++i) {
499 int_local[i]+=f[i]*w_0;
500 } /* end of component loop i */
501 } /* end of k0 loop */
502 } /* end of k1 loop */
503
504 #pragma omp critical
505 for (index_t i=0; i<numComp; i++)
506 integrals[i]+=int_local[i];
507 } // end of parallel section
508 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
509 const double w_0 = h0/2.;
510 const double w_1 = h1/2.;
511 #pragma omp parallel
512 {
513 vector<double> int_local(numComp, 0);
514 if (m_faceOffset[0] > -1) {
515 #pragma omp for nowait
516 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
517 const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
518 for (index_t i=0; i < numComp; ++i) {
519 const register double f_0 = f[INDEX2(i,0,numComp)];
520 const register double f_1 = f[INDEX2(i,1,numComp)];
521 int_local[i]+=(f_0+f_1)*w_1;
522 } /* end of component loop i */
523 } /* end of k1 loop */
524 }
525
526 if (m_faceOffset[1] > -1) {
527 #pragma omp for nowait
528 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
529 const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
530 for (index_t i=0; i < numComp; ++i) {
531 const register double f_0 = f[INDEX2(i,0,numComp)];
532 const register double f_1 = f[INDEX2(i,1,numComp)];
533 int_local[i]+=(f_0+f_1)*w_1;
534 } /* end of component loop i */
535 } /* end of k1 loop */
536 }
537
538 if (m_faceOffset[2] > -1) {
539 #pragma omp for nowait
540 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
541 const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
542 for (index_t i=0; i < numComp; ++i) {
543 const register double f_0 = f[INDEX2(i,0,numComp)];
544 const register double f_1 = f[INDEX2(i,1,numComp)];
545 int_local[i]+=(f_0+f_1)*w_0;
546 } /* end of component loop i */
547 } /* end of k0 loop */
548 }
549
550 if (m_faceOffset[3] > -1) {
551 #pragma omp for nowait
552 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
553 const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
554 for (index_t i=0; i < numComp; ++i) {
555 const register double f_0 = f[INDEX2(i,0,numComp)];
556 const register double f_1 = f[INDEX2(i,1,numComp)];
557 int_local[i]+=(f_0+f_1)*w_0;
558 } /* end of component loop i */
559 } /* end of k0 loop */
560 }
561
562 #pragma omp critical
563 for (index_t i=0; i<numComp; i++)
564 integrals[i]+=int_local[i];
565 } // end of parallel section
566 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
567 #pragma omp parallel
568 {
569 vector<double> int_local(numComp, 0);
570 if (m_faceOffset[0] > -1) {
571 #pragma omp for nowait
572 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
573 const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
574 for (index_t i=0; i < numComp; ++i) {
575 int_local[i]+=f[i]*h1;
576 } /* end of component loop i */
577 } /* end of k1 loop */
578 }
579
580 if (m_faceOffset[1] > -1) {
581 #pragma omp for nowait
582 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
583 const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
584 for (index_t i=0; i < numComp; ++i) {
585 int_local[i]+=f[i]*h1;
586 } /* end of component loop i */
587 } /* end of k1 loop */
588 }
589
590 if (m_faceOffset[2] > -1) {
591 #pragma omp for nowait
592 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
593 const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
594 for (index_t i=0; i < numComp; ++i) {
595 int_local[i]+=f[i]*h0;
596 } /* end of component loop i */
597 } /* end of k0 loop */
598 }
599
600 if (m_faceOffset[3] > -1) {
601 #pragma omp for nowait
602 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
603 const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
604 for (index_t i=0; i < numComp; ++i) {
605 int_local[i]+=f[i]*h0;
606 } /* end of component loop i */
607 } /* end of k0 loop */
608 }
609
610 #pragma omp critical
611 for (index_t i=0; i<numComp; i++)
612 integrals[i]+=int_local[i];
613 } // end of parallel section
614 } else {
615 stringstream msg;
616 msg << "setToIntegrals() not implemented for "
617 << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
618 throw RipleyException(msg.str());
619 }
620 }
621
622 void Rectangle::setToNormal(escript::Data& out) const
623 {
624 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
625 #pragma omp parallel
626 {
627 if (m_faceOffset[0] > -1) {
628 #pragma omp for nowait
629 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
630 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
631 // set vector at two quadrature points
632 *o++ = -1.;
633 *o++ = 0.;
634 *o++ = -1.;
635 *o = 0.;
636 }
637 }
638
639 if (m_faceOffset[1] > -1) {
640 #pragma omp for nowait
641 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
642 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
643 // set vector at two quadrature points
644 *o++ = 1.;
645 *o++ = 0.;
646 *o++ = 1.;
647 *o = 0.;
648 }
649 }
650
651 if (m_faceOffset[2] > -1) {
652 #pragma omp for nowait
653 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
654 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
655 // set vector at two quadrature points
656 *o++ = 0.;
657 *o++ = -1.;
658 *o++ = 0.;
659 *o = -1.;
660 }
661 }
662
663 if (m_faceOffset[3] > -1) {
664 #pragma omp for nowait
665 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
666 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
667 // set vector at two quadrature points
668 *o++ = 0.;
669 *o++ = 1.;
670 *o++ = 0.;
671 *o = 1.;
672 }
673 }
674 } // end of parallel section
675 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
676 #pragma omp parallel
677 {
678 if (m_faceOffset[0] > -1) {
679 #pragma omp for nowait
680 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
681 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
682 *o++ = -1.;
683 *o = 0.;
684 }
685 }
686
687 if (m_faceOffset[1] > -1) {
688 #pragma omp for nowait
689 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
690 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
691 *o++ = 1.;
692 *o = 0.;
693 }
694 }
695
696 if (m_faceOffset[2] > -1) {
697 #pragma omp for nowait
698 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
699 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
700 *o++ = 0.;
701 *o = -1.;
702 }
703 }
704
705 if (m_faceOffset[3] > -1) {
706 #pragma omp for nowait
707 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
708 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
709 *o++ = 0.;
710 *o = 1.;
711 }
712 }
713 } // end of parallel section
714
715 } else {
716 stringstream msg;
717 msg << "setToNormal() not implemented for "
718 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
719 throw RipleyException(msg.str());
720 }
721 }
722
723 void Rectangle::addPDEToSystem(escript::AbstractSystemMatrix& mat,
724 escript::Data& rhs, const escript::Data& A, const escript::Data& B,
725 const escript::Data& C, const escript::Data& D,
726 const escript::Data& X, const escript::Data& Y,
727 const escript::Data& d, const escript::Data& y,
728 const escript::Data& d_contact, const escript::Data& y_contact,
729 const escript::Data& d_dirac, const escript::Data& y_dirac) const
730 {
731 throw RipleyException("addPDEToSystem() not implemented");
732 }
733
734 Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder,
735 bool reducedColOrder) const
736 {
737 if (reducedRowOrder || reducedColOrder)
738 throw RipleyException("getPattern() not implemented for reduced order");
739
740 // connector
741 RankVector neighbour;
742 IndexVector offsetInShared(1,0);
743 IndexVector sendShared, recvShared;
744 const IndexVector faces=getNumFacesPerBoundary();
745 const index_t left = (m_offset0==0 ? 0 : 1);
746 const index_t bottom = (m_offset1==0 ? 0 : 1);
747 // corner node from bottom-left
748 if (faces[0] == 0 && faces[2] == 0) {
749 neighbour.push_back(m_mpiInfo->rank-m_NX-1);
750 offsetInShared.push_back(offsetInShared.back()+1);
751 sendShared.push_back(m_nodeId[m_N0+left]);
752 recvShared.push_back(m_nodeId[0]);
753 }
754 // bottom edge
755 if (faces[2] == 0) {
756 neighbour.push_back(m_mpiInfo->rank-m_NX);
757 offsetInShared.push_back(offsetInShared.back()+m_N0-left);
758 for (dim_t i=left; i<m_N0; i++) {
759 // easy case, we know the neighbour id's
760 sendShared.push_back(m_nodeId[m_N0+i]);
761 recvShared.push_back(m_nodeId[i]);
762 }
763 }
764 // corner node from bottom-right
765 if (faces[1] == 0 && faces[2] == 0) {
766 neighbour.push_back(m_mpiInfo->rank-m_NX+1);
767 const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
768 const index_t N1=(neighbour.back()/m_NX == 0 ? m_N1 : m_N1-1);
769 const index_t first=m_nodeDistribution[neighbour.back()];
770 offsetInShared.push_back(offsetInShared.back()+1);
771 sendShared.push_back(m_nodeId[(bottom+1)*m_N0-1]);
772 recvShared.push_back(first+N0*(N1-1));
773 }
774 // left edge
775 if (faces[0] == 0) {
776 neighbour.push_back(m_mpiInfo->rank-1);
777 offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
778 for (dim_t i=bottom; i<m_N1; i++) {
779 // easy case, we know the neighbour id's
780 sendShared.push_back(m_nodeId[i*m_N0+1]);
781 recvShared.push_back(m_nodeId[i*m_N0]);
782 }
783 }
784 // right edge
785 if (faces[1] == 0) {
786 neighbour.push_back(m_mpiInfo->rank+1);
787 const index_t rightN0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
788 const index_t first=m_nodeDistribution[neighbour.back()];
789 offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
790 for (dim_t i=bottom; i<m_N1; i++) {
791 sendShared.push_back(m_nodeId[(i+1)*m_N0-1]);
792 recvShared.push_back(first+rightN0*(i-bottom));
793 }
794 }
795 // corner node from top-left
796 if (faces[0] == 0 && faces[3] == 0) {
797 neighbour.push_back(m_mpiInfo->rank+m_NX-1);
798 const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
799 const index_t first=m_nodeDistribution[neighbour.back()];
800 offsetInShared.push_back(offsetInShared.back()+1);
801 sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+left]);
802 recvShared.push_back(first+N0-1);
803 }
804 // top edge
805 if (faces[3] == 0) {
806 neighbour.push_back(m_mpiInfo->rank+m_NX);
807 const index_t first=m_nodeDistribution[neighbour.back()];
808 offsetInShared.push_back(offsetInShared.back()+m_N0-left);
809 for (dim_t i=left; i<m_N0; i++) {
810 sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+i]);
811 recvShared.push_back(first+i-left);
812 }
813 }
814 // corner node from top-right
815 if (faces[1] == 0 && faces[3] == 0) {
816 neighbour.push_back(m_mpiInfo->rank+m_NX+1);
817 const index_t first=m_nodeDistribution[neighbour.back()];
818 offsetInShared.push_back(offsetInShared.back()+1);
819 sendShared.push_back(m_nodeId[m_N0*m_N1-1]);
820 recvShared.push_back(first);
821 }
822 const int numDOF=m_nodeDistribution[m_mpiInfo->rank+1]-m_nodeDistribution[m_mpiInfo->rank];
823 /*
824 cout << "--- rcv_shcomp ---" << endl;
825 cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
826 for (size_t i=0; i<neighbour.size(); i++) {
827 cout << "neighbor[" << i << "]=" << neighbour[i]
828 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
829 }
830 for (size_t i=0; i<recvShared.size(); i++) {
831 cout << "shared[" << i << "]=" << recvShared[i] << endl;
832 }
833 cout << "--- snd_shcomp ---" << endl;
834 for (size_t i=0; i<sendShared.size(); i++) {
835 cout << "shared[" << i << "]=" << sendShared[i] << endl;
836 }
837 */
838
839 Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
840 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
841 &offsetInShared[0], 1, 0, m_mpiInfo);
842 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
843 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
844 &offsetInShared[0], 1, 0, m_mpiInfo);
845 Paso_Connector* connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
846 Paso_SharedComponents_free(snd_shcomp);
847 Paso_SharedComponents_free(rcv_shcomp);
848
849 // create patterns
850 dim_t M, N;
851 IndexVector ptr(1,0);
852 IndexVector index;
853
854 // main pattern
855 for (index_t i=0; i<numDOF; i++) {
856 // always add the node itself
857 index.push_back(i);
858 int num=insertNeighbours(index, i);
859 ptr.push_back(ptr.back()+num+1);
860 }
861 M=N=ptr.size()-1;
862 // paso will manage the memory
863 index_t* indexC = MEMALLOC(index.size(),index_t);
864 index_t* ptrC = MEMALLOC(ptr.size(), index_t);
865 copy(index.begin(), index.end(), indexC);
866 copy(ptr.begin(), ptr.end(), ptrC);
867 Paso_Pattern *mainPattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
868 M, N, ptrC, indexC);
869
870 /*
871 cout << "--- main_pattern ---" << endl;
872 cout << "M=" << M << ", N=" << N << endl;
873 for (size_t i=0; i<ptr.size(); i++) {
874 cout << "ptr[" << i << "]=" << ptr[i] << endl;
875 }
876 for (size_t i=0; i<index.size(); i++) {
877 cout << "index[" << i << "]=" << index[i] << endl;
878 }
879 */
880
881 ptr.clear();
882 index.clear();
883
884 // column & row couple patterns
885 Paso_Pattern *colCouplePattern, *rowCouplePattern;
886 generateCouplePatterns(&colCouplePattern, &rowCouplePattern);
887
888 // allocate paso distribution
889 Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
890 const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
891
892 Paso_SystemMatrixPattern* pattern = Paso_SystemMatrixPattern_alloc(
893 MATRIX_FORMAT_DEFAULT, distribution, distribution,
894 mainPattern, colCouplePattern, rowCouplePattern,
895 connector, connector);
896 Paso_Pattern_free(mainPattern);
897 Paso_Pattern_free(colCouplePattern);
898 Paso_Pattern_free(rowCouplePattern);
899 Paso_Distribution_free(distribution);
900 return pattern;
901 }
902
903 void Rectangle::Print_Mesh_Info(const bool full) const
904 {
905 RipleyDomain::Print_Mesh_Info(full);
906 if (full) {
907 cout << " Id Coordinates" << endl;
908 cout.precision(15);
909 cout.setf(ios::scientific, ios::floatfield);
910 pair<double,double> xdx = getFirstCoordAndSpacing(0);
911 pair<double,double> ydy = getFirstCoordAndSpacing(1);
912 for (index_t i=0; i < getNumNodes(); i++) {
913 cout << " " << setw(5) << m_nodeId[i]
914 << " " << xdx.first+(i%m_N0)*xdx.second
915 << " " << ydy.first+(i/m_N0)*ydy.second << endl;
916 }
917 }
918 }
919
920 IndexVector Rectangle::getNumNodesPerDim() const
921 {
922 IndexVector ret;
923 ret.push_back(m_N0);
924 ret.push_back(m_N1);
925 return ret;
926 }
927
928 IndexVector Rectangle::getNumElementsPerDim() const
929 {
930 IndexVector ret;
931 ret.push_back(m_NE0);
932 ret.push_back(m_NE1);
933 return ret;
934 }
935
936 IndexVector Rectangle::getNumFacesPerBoundary() const
937 {
938 IndexVector ret(4, 0);
939 //left
940 if (m_offset0==0)
941 ret[0]=m_NE1;
942 //right
943 if (m_mpiInfo->rank%m_NX==m_NX-1)
944 ret[1]=m_NE1;
945 //bottom
946 if (m_offset1==0)
947 ret[2]=m_NE0;
948 //top
949 if (m_mpiInfo->rank/m_NX==m_NY-1)
950 ret[3]=m_NE0;
951 return ret;
952 }
953
954 pair<double,double> Rectangle::getFirstCoordAndSpacing(dim_t dim) const
955 {
956 if (dim==0) {
957 return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
958 } else if (dim==1) {
959 return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
960 }
961 throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
962 }
963
964 //protected
965 dim_t Rectangle::getNumFaceElements() const
966 {
967 const IndexVector faces = getNumFacesPerBoundary();
968 dim_t n=0;
969 for (size_t i=0; i<faces.size(); i++)
970 n+=faces[i];
971 return n;
972 }
973
974 //protected
975 void Rectangle::assembleCoordinates(escript::Data& arg) const
976 {
977 escriptDataC x = arg.getDataC();
978 int numDim = m_numDim;
979 if (!isDataPointShapeEqual(&x, 1, &numDim))
980 throw RipleyException("setToX: Invalid Data object shape");
981 if (!numSamplesEqual(&x, 1, getNumNodes()))
982 throw RipleyException("setToX: Illegal number of samples in Data object");
983
984 pair<double,double> xdx = getFirstCoordAndSpacing(0);
985 pair<double,double> ydy = getFirstCoordAndSpacing(1);
986 arg.requireWrite();
987 #pragma omp parallel for
988 for (dim_t i1 = 0; i1 < m_N1; i1++) {
989 for (dim_t i0 = 0; i0 < m_N0; i0++) {
990 double* point = arg.getSampleDataRW(i0+m_N0*i1);
991 point[0] = xdx.first+i0*xdx.second;
992 point[1] = ydy.first+i1*ydy.second;
993 }
994 }
995 }
996
997 //private
998 void Rectangle::populateSampleIds()
999 {
1000 // identifiers are ordered from left to right, bottom to top on each rank,
1001 // except for the shared nodes which are owned by the rank below / to the
1002 // left of the current rank
1003
1004 // build node distribution vector first.
1005 // m_nodeDistribution[i] is the first node id on rank i, that is
1006 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1007 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1008 m_nodeDistribution[1]=getNumNodes();
1009 for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
1010 const index_t x=k%m_NX;
1011 const index_t y=k/m_NX;
1012 index_t numNodes=getNumNodes();
1013 if (x>0)
1014 numNodes-=m_N1;
1015 if (y>0)
1016 numNodes-=m_N0;
1017 if (x>0 && y>0)
1018 numNodes++; // subtracted corner twice -> fix that
1019 m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
1020 }
1021 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1022
1023 m_nodeId.resize(getNumNodes());
1024
1025 // the bottom row and left column are not owned by this rank so the
1026 // identifiers need to be computed accordingly
1027 const index_t left = (m_offset0==0 ? 0 : 1);
1028 const index_t bottom = (m_offset1==0 ? 0 : 1);
1029 if (left>0) {
1030 const int neighbour=m_mpiInfo->rank-1;
1031 const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1032 #pragma omp parallel for
1033 for (dim_t i1=bottom; i1<m_N1; i1++) {
1034 m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
1035 + (i1-bottom+1)*leftN0-1;
1036 }
1037 }
1038 if (bottom>0) {
1039 const int neighbour=m_mpiInfo->rank-m_NX;
1040 const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1041 const index_t bottomN1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1042 #pragma omp parallel for
1043 for (dim_t i0=left; i0<m_N0; i0++) {
1044 m_nodeId[i0]=m_nodeDistribution[neighbour]
1045 + (bottomN1-1)*bottomN0 + i0 - left;
1046 }
1047 }
1048 if (left>0 && bottom>0) {
1049 const int neighbour=m_mpiInfo->rank-m_NX-1;
1050 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1051 const index_t N1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1052 m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1-1;
1053 }
1054
1055 // the rest of the id's are contiguous
1056 const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
1057 #pragma omp parallel for
1058 for (dim_t i1=bottom; i1<m_N1; i1++) {
1059 for (dim_t i0=left; i0<m_N0; i0++) {
1060 m_nodeId[i0+i1*m_N0] = firstId+i0-left+(i1-bottom)*(m_N0-left);
1061 }
1062 }
1063
1064 // element id's
1065 m_elementId.resize(getNumElements());
1066 #pragma omp parallel for
1067 for (dim_t k=0; k<getNumElements(); k++) {
1068 m_elementId[k]=k;
1069 }
1070
1071 // face element id's
1072 m_faceId.resize(getNumFaceElements());
1073 #pragma omp parallel for
1074 for (dim_t k=0; k<getNumFaceElements(); k++) {
1075 m_faceId[k]=k;
1076 }
1077
1078 // generate face offset vector and set face tags
1079 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1080 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
1081 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
1082 m_faceOffset.assign(facesPerEdge.size(), -1);
1083 m_faceTags.clear();
1084 index_t offset=0;
1085 for (size_t i=0; i<facesPerEdge.size(); i++) {
1086 if (facesPerEdge[i]>0) {
1087 m_faceOffset[i]=offset;
1088 offset+=facesPerEdge[i];
1089 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1090 }
1091 }
1092 setTagMap("left", LEFT);
1093 setTagMap("right", RIGHT);
1094 setTagMap("bottom", BOTTOM);
1095 setTagMap("top", TOP);
1096 updateTagsInUse(FaceElements);
1097 }
1098
1099 //private
1100 int Rectangle::insertNeighbours(IndexVector& index, index_t node) const
1101 {
1102 const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1103 const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1104 const int x=node%myN0;
1105 const int y=node/myN0;
1106 int num=0;
1107 if (y>0) {
1108 if (x>0) {
1109 // bottom-left
1110 index.push_back(node-myN0-1);
1111 num++;
1112 }
1113 // bottom
1114 index.push_back(node-myN0);
1115 num++;
1116 if (x<myN0-1) {
1117 // bottom-right
1118 index.push_back(node-myN0+1);
1119 num++;
1120 }
1121 }
1122 if (x<myN0-1) {
1123 // right
1124 index.push_back(node+1);
1125 num++;
1126 if (y<myN1-1) {
1127 // top-right
1128 index.push_back(node+myN0+1);
1129 num++;
1130 }
1131 }
1132 if (y<myN1-1) {
1133 // top
1134 index.push_back(node+myN0);
1135 num++;
1136 if (x>0) {
1137 // top-left
1138 index.push_back(node+myN0-1);
1139 num++;
1140 }
1141 }
1142 if (x>0) {
1143 // left
1144 index.push_back(node-1);
1145 num++;
1146 }
1147
1148 return num;
1149 }
1150
1151 //private
1152 void Rectangle::generateCouplePatterns(Paso_Pattern** colPattern, Paso_Pattern** rowPattern) const
1153 {
1154 IndexVector ptr(1,0);
1155 IndexVector index;
1156 const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1157 const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1158 const IndexVector faces=getNumFacesPerBoundary();
1159
1160 // bottom edge
1161 dim_t n=0;
1162 if (faces[0] == 0) {
1163 index.push_back(2*(myN0+myN1+1));
1164 index.push_back(2*(myN0+myN1+1)+1);
1165 n+=2;
1166 if (faces[2] == 0) {
1167 index.push_back(0);
1168 index.push_back(1);
1169 index.push_back(2);
1170 n+=3;
1171 }
1172 } else if (faces[2] == 0) {
1173 index.push_back(1);
1174 index.push_back(2);
1175 n+=2;
1176 }
1177 // n=neighbours of bottom-left corner node
1178 ptr.push_back(ptr.back()+n);
1179 n=0;
1180 if (faces[2] == 0) {
1181 for (dim_t i=1; i<myN0-1; i++) {
1182 index.push_back(i);
1183 index.push_back(i+1);
1184 index.push_back(i+2);
1185 ptr.push_back(ptr.back()+3);
1186 }
1187 index.push_back(myN0-1);
1188 index.push_back(myN0);
1189 n+=2;
1190 if (faces[1] == 0) {
1191 index.push_back(myN0+1);
1192 index.push_back(myN0+2);
1193 index.push_back(myN0+3);
1194 n+=3;
1195 }
1196 } else {
1197 for (dim_t i=1; i<myN0-1; i++) {
1198 ptr.push_back(ptr.back());
1199 }
1200 if (faces[1] == 0) {
1201 index.push_back(myN0+2);
1202 index.push_back(myN0+3);
1203 n+=2;
1204 }
1205 }
1206 // n=neighbours of bottom-right corner node
1207 ptr.push_back(ptr.back()+n);
1208
1209 // 2nd row to 2nd last row
1210 for (dim_t i=1; i<myN1-1; i++) {
1211 // left edge
1212 if (faces[0] == 0) {
1213 index.push_back(2*(myN0+myN1+2)-i);
1214 index.push_back(2*(myN0+myN1+2)-i-1);
1215 index.push_back(2*(myN0+myN1+2)-i-2);
1216 ptr.push_back(ptr.back()+3);
1217 } else {
1218 ptr.push_back(ptr.back());
1219 }
1220 for (dim_t j=1; j<myN0-1; j++) {
1221 ptr.push_back(ptr.back());
1222 }
1223 // right edge
1224 if (faces[1] == 0) {
1225 index.push_back(myN0+i+1);
1226 index.push_back(myN0+i+2);
1227 index.push_back(myN0+i+3);
1228 ptr.push_back(ptr.back()+3);
1229 } else {
1230 ptr.push_back(ptr.back());
1231 }
1232 }
1233
1234 // top edge
1235 n=0;
1236 if (faces[0] == 0) {
1237 index.push_back(2*myN0+myN1+5);
1238 index.push_back(2*myN0+myN1+4);
1239 n+=2;
1240 if (faces[3] == 0) {
1241 index.push_back(2*myN0+myN1+3);
1242 index.push_back(2*myN0+myN1+2);
1243 index.push_back(2*myN0+myN1+1);
1244 n+=3;
1245 }
1246 } else if (faces[3] == 0) {
1247 index.push_back(2*myN0+myN1+2);
1248 index.push_back(2*myN0+myN1+1);
1249 n+=2;
1250 }
1251 // n=neighbours of top-left corner node
1252 ptr.push_back(ptr.back()+n);
1253 n=0;
1254 if (faces[3] == 0) {
1255 for (dim_t i=1; i<myN0-1; i++) {
1256 index.push_back(2*myN0+myN1+i+1);
1257 index.push_back(2*myN0+myN1+i);
1258 index.push_back(2*myN0+myN1+i-1);
1259 ptr.push_back(ptr.back()+3);
1260 }
1261 index.push_back(myN0+myN1+4);
1262 index.push_back(myN0+myN1+3);
1263 n+=2;
1264 if (faces[1] == 0) {
1265 index.push_back(myN0+myN1+2);
1266 index.push_back(myN0+myN1+1);
1267 index.push_back(myN0+myN1);
1268 n+=3;
1269 }
1270 } else {
1271 for (dim_t i=1; i<myN0-1; i++) {
1272 ptr.push_back(ptr.back());
1273 }
1274 if (faces[1] == 0) {
1275 index.push_back(myN0+myN1+1);
1276 index.push_back(myN0+myN1);
1277 n+=2;
1278 }
1279 }
1280 // n=neighbours of top-right corner node
1281 ptr.push_back(ptr.back()+n);
1282
1283 dim_t M=ptr.size()-1;
1284 map<index_t,index_t> idMap;
1285 dim_t N=0;
1286 for (IndexVector::iterator it=index.begin(); it!=index.end(); it++) {
1287 if (idMap.find(*it)==idMap.end()) {
1288 idMap[*it]=N;
1289 *it=N++;
1290 } else {
1291 *it=idMap[*it];
1292 }
1293 }
1294
1295 /*
1296 cout << "--- colCouple_pattern ---" << endl;
1297 cout << "M=" << M << ", N=" << N << endl;
1298 for (size_t i=0; i<ptr.size(); i++) {
1299 cout << "ptr[" << i << "]=" << ptr[i] << endl;
1300 }
1301 for (size_t i=0; i<index.size(); i++) {
1302 cout << "index[" << i << "]=" << index[i] << endl;
1303 }
1304 */
1305
1306 // now build the row couple pattern
1307 IndexVector ptr2(1,0);
1308 IndexVector index2;
1309 for (dim_t id=0; id<N; id++) {
1310 n=0;
1311 for (dim_t i=0; i<M; i++) {
1312 for (dim_t j=ptr[i]; j<ptr[i+1]; j++) {
1313 if (index[j]==id) {
1314 index2.push_back(i);
1315 n++;
1316 break;
1317 }
1318 }
1319 }
1320 ptr2.push_back(ptr2.back()+n);
1321 }
1322
1323 /*
1324 cout << "--- rowCouple_pattern ---" << endl;
1325 cout << "M=" << N << ", N=" << M << endl;
1326 for (size_t i=0; i<ptr2.size(); i++) {
1327 cout << "ptr[" << i << "]=" << ptr2[i] << endl;
1328 }
1329 for (size_t i=0; i<index2.size(); i++) {
1330 cout << "index[" << i << "]=" << index2[i] << endl;
1331 }
1332 */
1333
1334 // paso will manage the memory
1335 index_t* indexC = MEMALLOC(index.size(), index_t);
1336 index_t* ptrC = MEMALLOC(ptr.size(), index_t);
1337 copy(index.begin(), index.end(), indexC);
1338 copy(ptr.begin(), ptr.end(), ptrC);
1339 *colPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, M, N, ptrC, indexC);
1340
1341 // paso will manage the memory
1342 indexC = MEMALLOC(index2.size(), index_t);
1343 ptrC = MEMALLOC(ptr2.size(), index_t);
1344 copy(index2.begin(), index2.end(), indexC);
1345 copy(ptr2.begin(), ptr2.end(), ptrC);
1346 *rowPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, N, M, ptrC, indexC);
1347 }
1348
1349 //protected
1350 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1351 escript::Data& in, bool reduced) const
1352 {
1353 const dim_t numComp = in.getDataPointSize();
1354 if (reduced) {
1355 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1356 const double c0 = .25;
1357 #pragma omp parallel for
1358 for (index_t k1=0; k1 < m_NE1; ++k1) {
1359 for (index_t k0=0; k0 < m_NE0; ++k0) {
1360 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1361 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1362 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1363 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1364 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1365 for (index_t i=0; i < numComp; ++i) {
1366 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
1367 } /* end of component loop i */
1368 } /* end of k0 loop */
1369 } /* end of k1 loop */
1370 /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1371 } else {
1372 /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1373 const double c0 = .16666666666666666667;
1374 const double c1 = .044658198738520451079;
1375 const double c2 = .62200846792814621559;
1376 #pragma omp parallel for
1377 for (index_t k1=0; k1 < m_NE1; ++k1) {
1378 for (index_t k0=0; k0 < m_NE0; ++k0) {
1379 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1380 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1381 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1382 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1383 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1384 for (index_t i=0; i < numComp; ++i) {
1385 o[INDEX2(i,numComp,0)] = f_00[i]*c2 + f_11[i]*c1 + c0*(f_01[i] + f_10[i]);
1386 o[INDEX2(i,numComp,1)] = f_01[i]*c1 + f_10[i]*c2 + c0*(f_00[i] + f_11[i]);
1387 o[INDEX2(i,numComp,2)] = f_01[i]*c2 + f_10[i]*c1 + c0*(f_00[i] + f_11[i]);
1388 o[INDEX2(i,numComp,3)] = f_00[i]*c1 + f_11[i]*c2 + c0*(f_01[i] + f_10[i]);
1389 } /* end of component loop i */
1390 } /* end of k0 loop */
1391 } /* end of k1 loop */
1392 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1393 }
1394 }
1395
1396 //protected
1397 void Rectangle::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1398 bool reduced) const
1399 {
1400 const dim_t numComp = in.getDataPointSize();
1401 if (reduced) {
1402 const double c0 = .5;
1403 #pragma omp parallel
1404 {
1405 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1406 if (m_faceOffset[0] > -1) {
1407 #pragma omp for nowait
1408 for (index_t k1=0; k1 < m_NE1; ++k1) {
1409 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1410 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1411 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1412 for (index_t i=0; i < numComp; ++i) {
1413 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i]);
1414 } /* end of component loop i */
1415 } /* end of k1 loop */
1416 } /* end of face 0 */
1417 if (m_faceOffset[1] > -1) {
1418 #pragma omp for nowait
1419 for (index_t k1=0; k1 < m_NE1; ++k1) {
1420 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1421 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1422 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1423 for (index_t i=0; i < numComp; ++i) {
1424 o[INDEX2(i,numComp,0)] = c0*(f_10[i] + f_11[i]);
1425 } /* end of component loop i */
1426 } /* end of k1 loop */
1427 } /* end of face 1 */
1428 if (m_faceOffset[2] > -1) {
1429 #pragma omp for nowait
1430 for (index_t k0=0; k0 < m_NE0; ++k0) {
1431 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1432 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1433 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1434 for (index_t i=0; i < numComp; ++i) {
1435 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_10[i]);
1436 } /* end of component loop i */
1437 } /* end of k0 loop */
1438 } /* end of face 2 */
1439 if (m_faceOffset[3] > -1) {
1440 #pragma omp for nowait
1441 for (index_t k0=0; k0 < m_NE0; ++k0) {
1442 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1443 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1444 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1445 for (index_t i=0; i < numComp; ++i) {
1446 o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_11[i]);
1447 } /* end of component loop i */
1448 } /* end of k0 loop */
1449 } /* end of face 3 */
1450 /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */
1451 } // end of parallel section
1452 } else {
1453 const double c0 = 0.21132486540518711775;
1454 const double c1 = 0.78867513459481288225;
1455 #pragma omp parallel
1456 {
1457 /*** GENERATOR SNIP_INTERPOLATE_FACES TOP */
1458 if (m_faceOffset[0] > -1) {
1459 #pragma omp for nowait
1460 for (index_t k1=0; k1 < m_NE1; ++k1) {
1461 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1462 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1463 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1464 for (index_t i=0; i < numComp; ++i) {
1465 o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_01[i]*c0;
1466 o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_01[i]*c1;
1467 } /* end of component loop i */
1468 } /* end of k1 loop */
1469 } /* end of face 0 */
1470 if (m_faceOffset[1] > -1) {
1471 #pragma omp for nowait
1472 for (index_t k1=0; k1 < m_NE1; ++k1) {
1473 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1474 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1475 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1476 for (index_t i=0; i < numComp; ++i) {
1477 o[INDEX2(i,numComp,0)] = f_10[i]*c1 + f_11[i]*c0;
1478 o[INDEX2(i,numComp,1)] = f_10[i]*c0 + f_11[i]*c1;
1479 } /* end of component loop i */
1480 } /* end of k1 loop */
1481 } /* end of face 1 */
1482 if (m_faceOffset[2] > -1) {
1483 #pragma omp for nowait
1484 for (index_t k0=0; k0 < m_NE0; ++k0) {
1485 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1486 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1487 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1488 for (index_t i=0; i < numComp; ++i) {
1489 o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_10[i]*c0;
1490 o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_10[i]*c1;
1491 } /* end of component loop i */
1492 } /* end of k0 loop */
1493 } /* end of face 2 */
1494 if (m_faceOffset[3] > -1) {
1495 #pragma omp for nowait
1496 for (index_t k0=0; k0 < m_NE0; ++k0) {
1497 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1498 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1499 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1500 for (index_t i=0; i < numComp; ++i) {
1501 o[INDEX2(i,numComp,0)] = f_01[i]*c1 + f_11[i]*c0;
1502 o[INDEX2(i,numComp,1)] = f_01[i]*c0 + f_11[i]*c1;
1503 } /* end of component loop i */
1504 } /* end of k0 loop */
1505 } /* end of face 3 */
1506 /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */
1507 } // end of parallel section
1508 }
1509 }
1510
1511 } // end of namespace ripley
1512

  ViewVC Help
Powered by ViewVC 1.1.26