/[escript]/branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp
ViewVC logotype

Contents of /branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3713 - (show annotations)
Tue Dec 6 04:43:29 2011 UTC (8 years ago) by caltinay
File size: 59378 byte(s)
Integrals in 2D and 3D for rectangle & brick elements & face elements in
regular and reduced orders.

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2011 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14 #include <ripley/Rectangle.h>
15 extern "C" {
16 #include "paso/SystemMatrixPattern.h"
17 }
18
19 #if USE_SILO
20 #include <silo.h>
21 #ifdef ESYS_MPI
22 #include <pmpio.h>
23 #endif
24 #endif
25
26 #include <iomanip>
27
28 using namespace std;
29
30 namespace ripley {
31
32 Rectangle::Rectangle(int n0, int n1, double l0, double l1, int d0, int d1) :
33 RipleyDomain(2),
34 m_gNE0(n0),
35 m_gNE1(n1),
36 m_l0(l0),
37 m_l1(l1),
38 m_NX(d0),
39 m_NY(d1)
40 {
41 // ensure number of subdivisions is valid and nodes can be distributed
42 // among number of ranks
43 if (m_NX*m_NY != m_mpiInfo->size)
44 throw RipleyException("Invalid number of spatial subdivisions");
45
46 if (n0%m_NX > 0 || n1%m_NY > 0)
47 throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
48
49 // local number of elements
50 m_NE0 = n0/m_NX;
51 m_NE1 = n1/m_NY;
52 // local number of nodes (not necessarily owned)
53 m_N0 = m_NE0+1;
54 m_N1 = m_NE1+1;
55 // bottom-left node is at (offset0,offset1) in global mesh
56 m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
57 m_offset1 = m_NE1*(m_mpiInfo->rank/m_NX);
58 populateSampleIds();
59 }
60
61 Rectangle::~Rectangle()
62 {
63 }
64
65 string Rectangle::getDescription() const
66 {
67 return "ripley::Rectangle";
68 }
69
70 bool Rectangle::operator==(const AbstractDomain& other) const
71 {
72 if (dynamic_cast<const Rectangle*>(&other))
73 return this==&other;
74
75 return false;
76 }
77
78 void Rectangle::dump(const string& fileName) const
79 {
80 #if USE_SILO
81 string fn(fileName);
82 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
83 fn+=".silo";
84 }
85
86 const int NUM_SILO_FILES = 1;
87 const char* blockDirFmt = "/block%04d";
88 int driver=DB_HDF5;
89 string siloPath;
90 DBfile* dbfile = NULL;
91
92 #ifdef ESYS_MPI
93 PMPIO_baton_t* baton = NULL;
94 #endif
95
96 if (m_mpiInfo->size > 1) {
97 #ifdef ESYS_MPI
98 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
99 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
100 PMPIO_DefaultClose, (void*)&driver);
101 // try the fallback driver in case of error
102 if (!baton && driver != DB_PDB) {
103 driver = DB_PDB;
104 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
105 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
106 PMPIO_DefaultClose, (void*)&driver);
107 }
108 if (baton) {
109 char str[64];
110 snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
111 siloPath = str;
112 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
113 }
114 #endif
115 } else {
116 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
117 getDescription().c_str(), driver);
118 // try the fallback driver in case of error
119 if (!dbfile && driver != DB_PDB) {
120 driver = DB_PDB;
121 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
122 getDescription().c_str(), driver);
123 }
124 }
125
126 if (!dbfile)
127 throw RipleyException("dump: Could not create Silo file");
128
129 /*
130 if (driver==DB_HDF5) {
131 // gzip level 1 already provides good compression with minimal
132 // performance penalty. Some tests showed that gzip levels >3 performed
133 // rather badly on escript data both in terms of time and space
134 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
135 }
136 */
137
138 boost::scoped_ptr<double> x(new double[m_N0]);
139 boost::scoped_ptr<double> y(new double[m_N1]);
140 double* coords[2] = { x.get(), y.get() };
141 pair<double,double> xdx = getFirstCoordAndSpacing(0);
142 pair<double,double> ydy = getFirstCoordAndSpacing(1);
143 #pragma omp parallel
144 {
145 #pragma omp for
146 for (dim_t i0 = 0; i0 < m_N0; i0++) {
147 coords[0][i0]=xdx.first+i0*xdx.second;
148 }
149 #pragma omp for
150 for (dim_t i1 = 0; i1 < m_N1; i1++) {
151 coords[1][i1]=ydy.first+i1*ydy.second;
152 }
153 }
154 IndexVector dims = getNumNodesPerDim();
155
156 // write mesh
157 DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE,
158 DB_COLLINEAR, NULL);
159
160 // write node ids
161 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2,
162 NULL, 0, DB_INT, DB_NODECENT, NULL);
163
164 // write element ids
165 dims = getNumElementsPerDim();
166 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
167 &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
168
169 // rank 0 writes multimesh and multivar
170 if (m_mpiInfo->rank == 0) {
171 vector<string> tempstrings;
172 vector<char*> names;
173 for (dim_t i=0; i<m_mpiInfo->size; i++) {
174 stringstream path;
175 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
176 tempstrings.push_back(path.str());
177 names.push_back((char*)tempstrings.back().c_str());
178 }
179 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
180 DBSetDir(dbfile, "/");
181 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
182 &types[0], NULL);
183 tempstrings.clear();
184 names.clear();
185 for (dim_t i=0; i<m_mpiInfo->size; i++) {
186 stringstream path;
187 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
188 tempstrings.push_back(path.str());
189 names.push_back((char*)tempstrings.back().c_str());
190 }
191 types.assign(m_mpiInfo->size, DB_QUADVAR);
192 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
193 &types[0], NULL);
194 tempstrings.clear();
195 names.clear();
196 for (dim_t i=0; i<m_mpiInfo->size; i++) {
197 stringstream path;
198 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
199 tempstrings.push_back(path.str());
200 names.push_back((char*)tempstrings.back().c_str());
201 }
202 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
203 &types[0], NULL);
204 }
205
206 if (m_mpiInfo->size > 1) {
207 #ifdef ESYS_MPI
208 PMPIO_HandOffBaton(baton, dbfile);
209 PMPIO_Finish(baton);
210 #endif
211 } else {
212 DBClose(dbfile);
213 }
214
215 #else // USE_SILO
216 throw RipleyException("dump(): no Silo support");
217 #endif
218 }
219
220 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
221 {
222 switch (fsType) {
223 case Nodes:
224 return &m_nodeId[0];
225 case Elements:
226 return &m_elementId[0];
227 case FaceElements:
228 return &m_faceId[0];
229 default:
230 break;
231 }
232
233 stringstream msg;
234 msg << "borrowSampleReferenceIDs() not implemented for function space type "
235 << functionSpaceTypeAsString(fsType);
236 throw RipleyException(msg.str());
237 }
238
239 bool Rectangle::ownSample(int fsCode, index_t id) const
240 {
241 #ifdef ESYS_MPI
242 if (fsCode != ReducedNodes) {
243 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
244 const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
245 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
246 } else {
247 stringstream msg;
248 msg << "ownSample() not implemented for "
249 << functionSpaceTypeAsString(fsCode);
250 throw RipleyException(msg.str());
251 }
252 #else
253 return true;
254 #endif
255 }
256
257 void Rectangle::setToGradient(escript::Data& out, const escript::Data& cIn) const
258 {
259 escript::Data& in = *const_cast<escript::Data*>(&cIn);
260 const dim_t numComp = in.getDataPointSize();
261 const double h0 = m_l0/m_gNE0;
262 const double h1 = m_l1/m_gNE1;
263 if (out.getFunctionSpace().getTypeCode() == Elements) {
264 /* GENERATOR SNIP_GRAD_ELEMENTS TOP */
265 const double tmp0_13 = 0.78867513459481288225/h1;
266 const double tmp0_0 = 0.78867513459481288225/h0;
267 const double tmp0_4 = 0.21132486540518711775/h0;
268 const double tmp0_10 = 0.78867513459481288225/h1;
269 const double tmp0_1 = 0.21132486540518711775/h0;
270 const double tmp0_8 = -0.21132486540518711775/h1;
271 const double tmp0_14 = 0.21132486540518711775/h1;
272 const double tmp0_5 = 0.78867513459481288225/h0;
273 const double tmp0_11 = -0.78867513459481288225/h1;
274 const double tmp0_2 = -0.21132486540518711775/h0;
275 const double tmp0_9 = 0.21132486540518711775/h1;
276 const double tmp0_15 = -0.21132486540518711775/h1;
277 const double tmp0_6 = -0.78867513459481288225/h0;
278 const double tmp0_3 = -0.78867513459481288225/h0;
279 const double tmp0_12 = -0.78867513459481288225/h1;
280 const double tmp0_7 = -0.21132486540518711775/h0;
281 #pragma omp parallel for
282 for (index_t k1=0; k1 < m_NE1; ++k1) {
283 for (index_t k0=0; k0 < m_NE0; ++k0) {
284 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
285 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
286 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
287 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
288 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
289 for (index_t i=0; i < numComp; ++i) {
290 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
291 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_11 + f_01[i]*tmp0_10 + f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
292 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
293 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_15 + f_01[i]*tmp0_14 + f_10[i]*tmp0_12 + f_11[i]*tmp0_13;
294 o[INDEX3(i,0,2,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
295 o[INDEX3(i,1,2,numComp,2)] = f_00[i]*tmp0_11 + f_01[i]*tmp0_10 + f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
296 o[INDEX3(i,0,3,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
297 o[INDEX3(i,1,3,numComp,2)] = f_00[i]*tmp0_15 + f_01[i]*tmp0_14 + f_10[i]*tmp0_12 + f_11[i]*tmp0_13;
298 } /* end of component loop i */
299 } /* end of k0 loop */
300 } /* end of k1 loop */
301 /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
302 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
303 /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
304 const double tmp0_3 = 0.5/h1;
305 const double tmp0_2 = -0.5/h1;
306 const double tmp0_1 = -0.5/h0;
307 const double tmp0_0 = 0.5/h0;
308 #pragma omp parallel for
309 for (index_t k1=0; k1 < m_NE1; ++k1) {
310 for (index_t k0=0; k0 < m_NE0; ++k0) {
311 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
312 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
313 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
314 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
315 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
316 for (index_t i=0; i < numComp; ++i) {
317 o[INDEX3(i,0,0,numComp,2)] = tmp0_0*(f_10[i] + f_11[i]) + tmp0_1*(f_00[i] + f_01[i]);
318 o[INDEX3(i,1,0,numComp,2)] = tmp0_2*(f_00[i] + f_10[i]) + tmp0_3*(f_01[i] + f_11[i]);
319 } /* end of component loop i */
320 } /* end of k0 loop */
321 } /* end of k1 loop */
322 /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
323 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
324 /* GENERATOR SNIP_GRAD_FACES TOP */
325 if (m_faceOffset[0] > -1) {
326 const double tmp0_0 = 0.78867513459481288225/h0;
327 const double tmp0_4 = 0.21132486540518711775/h0;
328 const double tmp0_1 = 0.21132486540518711775/h0;
329 const double tmp0_8 = 1.0/h1;
330 const double tmp0_5 = 0.78867513459481288225/h0;
331 const double tmp0_2 = -0.21132486540518711775/h0;
332 const double tmp0_9 = -1/h1;
333 const double tmp0_6 = -0.78867513459481288225/h0;
334 const double tmp0_3 = -0.78867513459481288225/h0;
335 const double tmp0_7 = -0.21132486540518711775/h0;
336 #pragma omp parallel for
337 for (index_t k1=0; k1 < m_NE1; ++k1) {
338 const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
339 const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
340 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
341 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
342 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
343 for (index_t i=0; i < numComp; ++i) {
344 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
345 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_8;
346 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
347 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_8;
348 } /* end of component loop i */
349 } /* end of k1 loop */
350 } /* end of face 0 */
351 if (m_faceOffset[1] > -1) {
352 const double tmp0_0 = 0.78867513459481288225/h0;
353 const double tmp0_4 = 0.21132486540518711775/h0;
354 const double tmp0_1 = 0.21132486540518711775/h0;
355 const double tmp0_8 = -1/h1;
356 const double tmp0_5 = 0.78867513459481288225/h0;
357 const double tmp0_2 = -0.21132486540518711775/h0;
358 const double tmp0_9 = 1.0/h1;
359 const double tmp0_6 = -0.78867513459481288225/h0;
360 const double tmp0_3 = -0.78867513459481288225/h0;
361 const double tmp0_7 = -0.21132486540518711775/h0;
362 #pragma omp parallel for
363 for (index_t k1=0; k1 < m_NE1; ++k1) {
364 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
365 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
366 const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
367 const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
368 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
369 for (index_t i=0; i < numComp; ++i) {
370 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
371 o[INDEX3(i,1,0,numComp,2)] = f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
372 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
373 o[INDEX3(i,1,1,numComp,2)] = f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
374 } /* end of component loop i */
375 } /* end of k1 loop */
376 } /* end of face 1 */
377 if (m_faceOffset[2] > -1) {
378 const double tmp0_0 = -1/h0;
379 const double tmp0_4 = 0.21132486540518711775/h1;
380 const double tmp0_1 = 1.0/h0;
381 const double tmp0_8 = 0.78867513459481288225/h1;
382 const double tmp0_5 = 0.78867513459481288225/h1;
383 const double tmp0_2 = -0.78867513459481288225/h1;
384 const double tmp0_9 = 0.21132486540518711775/h1;
385 const double tmp0_6 = -0.21132486540518711775/h1;
386 const double tmp0_3 = -0.21132486540518711775/h1;
387 const double tmp0_7 = -0.78867513459481288225/h1;
388 #pragma omp parallel for
389 for (index_t k0=0; k0 < m_NE0; ++k0) {
390 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
391 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
392 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
393 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
394 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
395 for (index_t i=0; i < numComp; ++i) {
396 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
397 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_2 + f_01[i]*tmp0_5 + f_10[i]*tmp0_3 + f_11[i]*tmp0_4;
398 o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
399 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_6 + f_01[i]*tmp0_9 + f_10[i]*tmp0_7 + f_11[i]*tmp0_8;
400 } /* end of component loop i */
401 } /* end of k0 loop */
402 } /* end of face 2 */
403 if (m_faceOffset[3] > -1) {
404 const double tmp0_0 = 1.0/h0;
405 const double tmp0_4 = -0.21132486540518711775/h1;
406 const double tmp0_1 = -1/h0;
407 const double tmp0_8 = -0.78867513459481288225/h1;
408 const double tmp0_5 = -0.78867513459481288225/h1;
409 const double tmp0_2 = 0.21132486540518711775/h1;
410 const double tmp0_9 = -0.21132486540518711775/h1;
411 const double tmp0_6 = 0.78867513459481288225/h1;
412 const double tmp0_3 = 0.78867513459481288225/h1;
413 const double tmp0_7 = 0.21132486540518711775/h1;
414 #pragma omp parallel for
415 for (index_t k0=0; k0 < m_NE0; ++k0) {
416 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
417 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
418 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
419 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
420 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
421 for (index_t i=0; i < numComp; ++i) {
422 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
423 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_5 + f_01[i]*tmp0_3 + f_10[i]*tmp0_4 + f_11[i]*tmp0_2;
424 o[INDEX3(i,0,1,numComp,2)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
425 o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_7 + f_10[i]*tmp0_8 + f_11[i]*tmp0_6;
426 } /* end of component loop i */
427 } /* end of k0 loop */
428 } /* end of face 3 */
429 /* GENERATOR SNIP_GRAD_FACES BOTTOM */
430 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
431 /* GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
432 if (m_faceOffset[0] > -1) {
433 const double tmp0_3 = -1/h1;
434 const double tmp0_2 = 1.0/h1;
435 const double tmp0_1 = -0.5/h0;
436 const double tmp0_0 = 0.5/h0;
437 #pragma omp parallel for
438 for (index_t k1=0; k1 < m_NE1; ++k1) {
439 const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
440 const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
441 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
442 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
443 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
444 for (index_t i=0; i < numComp; ++i) {
445 o[INDEX3(i,0,0,numComp,2)] = tmp0_0*(f_10[i] + f_11[i]) + tmp0_1*(f_00[i] + f_01[i]);
446 o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2;
447 } /* end of component loop i */
448 } /* end of k1 loop */
449 } /* end of face 0 */
450 if (m_faceOffset[1] > -1) {
451 const double tmp0_3 = 1.0/h1;
452 const double tmp0_2 = -1/h1;
453 const double tmp0_1 = -0.5/h0;
454 const double tmp0_0 = 0.5/h0;
455 #pragma omp parallel for
456 for (index_t k1=0; k1 < m_NE1; ++k1) {
457 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
458 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
459 const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
460 const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
461 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
462 for (index_t i=0; i < numComp; ++i) {
463 o[INDEX3(i,0,0,numComp,2)] = tmp0_0*(f_10[i] + f_11[i]) + tmp0_1*(f_00[i] + f_01[i]);
464 o[INDEX3(i,1,0,numComp,2)] = f_10[i]*tmp0_2 + f_11[i]*tmp0_3;
465 } /* end of component loop i */
466 } /* end of k1 loop */
467 } /* end of face 1 */
468 if (m_faceOffset[2] > -1) {
469 const double tmp0_3 = 0.5/h1;
470 const double tmp0_2 = -0.5/h1;
471 const double tmp0_1 = 1.0/h0;
472 const double tmp0_0 = -1/h0;
473 #pragma omp parallel for
474 for (index_t k0=0; k0 < m_NE0; ++k0) {
475 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
476 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
477 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
478 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
479 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
480 for (index_t i=0; i < numComp; ++i) {
481 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
482 o[INDEX3(i,1,0,numComp,2)] = tmp0_2*(f_00[i] + f_10[i]) + tmp0_3*(f_01[i] + f_11[i]);
483 } /* end of component loop i */
484 } /* end of k0 loop */
485 } /* end of face 2 */
486 if (m_faceOffset[3] > -1) {
487 const double tmp0_3 = -0.5/h1;
488 const double tmp0_2 = 0.5/h1;
489 const double tmp0_1 = -1/h0;
490 const double tmp0_0 = 1.0/h0;
491 #pragma omp parallel for
492 for (index_t k0=0; k0 < m_NE0; ++k0) {
493 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
494 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
495 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
496 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
497 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
498 for (index_t i=0; i < numComp; ++i) {
499 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
500 o[INDEX3(i,1,0,numComp,2)] = tmp0_2*(f_01[i] + f_11[i]) + tmp0_3*(f_00[i] + f_10[i]);
501 } /* end of component loop i */
502 } /* end of k0 loop */
503 } /* end of face 3 */
504 /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
505 } else {
506 stringstream msg;
507 msg << "setToGradient() not implemented for "
508 << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
509 throw RipleyException(msg.str());
510 }
511 }
512
513 void Rectangle::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
514 {
515 escript::Data& in = *const_cast<escript::Data*>(&arg);
516 const dim_t numComp = in.getDataPointSize();
517 const double h0 = m_l0/m_gNE0;
518 const double h1 = m_l1/m_gNE1;
519 if (arg.getFunctionSpace().getTypeCode() == Elements) {
520 const double w_0 = h0*h1/4.;
521 #pragma omp parallel
522 {
523 vector<double> int_local(numComp, 0);
524 #pragma omp for
525 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
526 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
527 const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
528 for (index_t i=0; i < numComp; ++i) {
529 const register double f_0 = f[INDEX2(i,0,numComp)];
530 const register double f_1 = f[INDEX2(i,1,numComp)];
531 const register double f_2 = f[INDEX2(i,2,numComp)];
532 const register double f_3 = f[INDEX2(i,3,numComp)];
533 int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
534 } /* end of component loop i */
535 } /* end of k0 loop */
536 } /* end of k1 loop */
537
538 #pragma omp critical
539 for (index_t i=0; i<numComp; i++)
540 integrals[i]+=int_local[i];
541 }
542 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
543 const double w_0 = h0*h1;
544 #pragma omp parallel
545 {
546 vector<double> int_local(numComp, 0);
547 #pragma omp for
548 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
549 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
550 const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
551 for (index_t i=0; i < numComp; ++i) {
552 int_local[i]+=f[i]*w_0;
553 } /* end of component loop i */
554 } /* end of k0 loop */
555 } /* end of k1 loop */
556
557 #pragma omp critical
558 for (index_t i=0; i<numComp; i++)
559 integrals[i]+=int_local[i];
560 }
561 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
562 const double w_0 = h0/2.;
563 const double w_1 = h1/2.;
564 #pragma omp parallel
565 {
566 vector<double> int_local(numComp, 0);
567 if (m_faceOffset[0] > -1) {
568 #pragma omp for
569 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
570 const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
571 for (index_t i=0; i < numComp; ++i) {
572 const register double f_0 = f[INDEX2(i,0,numComp)];
573 const register double f_1 = f[INDEX2(i,1,numComp)];
574 int_local[i]+=(f_0+f_1)*w_1;
575 } /* end of component loop i */
576 } /* end of k1 loop */
577 }
578
579 if (m_faceOffset[1] > -1) {
580 #pragma omp for
581 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
582 const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
583 for (index_t i=0; i < numComp; ++i) {
584 const register double f_0 = f[INDEX2(i,0,numComp)];
585 const register double f_1 = f[INDEX2(i,1,numComp)];
586 int_local[i]+=(f_0+f_1)*w_1;
587 } /* end of component loop i */
588 } /* end of k1 loop */
589 }
590
591 if (m_faceOffset[2] > -1) {
592 #pragma omp for
593 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
594 const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
595 for (index_t i=0; i < numComp; ++i) {
596 const register double f_0 = f[INDEX2(i,0,numComp)];
597 const register double f_1 = f[INDEX2(i,1,numComp)];
598 int_local[i]+=(f_0+f_1)*w_0;
599 } /* end of component loop i */
600 } /* end of k0 loop */
601 }
602
603 if (m_faceOffset[3] > -1) {
604 #pragma omp for
605 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
606 const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
607 for (index_t i=0; i < numComp; ++i) {
608 const register double f_0 = f[INDEX2(i,0,numComp)];
609 const register double f_1 = f[INDEX2(i,1,numComp)];
610 int_local[i]+=(f_0+f_1)*w_0;
611 } /* end of component loop i */
612 } /* end of k0 loop */
613 }
614
615 #pragma omp critical
616 for (index_t i=0; i<numComp; i++)
617 integrals[i]+=int_local[i];
618 }
619 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
620 #pragma omp parallel
621 {
622 vector<double> int_local(numComp, 0);
623 if (m_faceOffset[0] > -1) {
624 #pragma omp for
625 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
626 const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
627 for (index_t i=0; i < numComp; ++i) {
628 int_local[i]+=f[i]*h1;
629 } /* end of component loop i */
630 } /* end of k1 loop */
631 }
632
633 if (m_faceOffset[1] > -1) {
634 #pragma omp for
635 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
636 const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
637 for (index_t i=0; i < numComp; ++i) {
638 int_local[i]+=f[i]*h1;
639 } /* end of component loop i */
640 } /* end of k1 loop */
641 }
642
643 if (m_faceOffset[2] > -1) {
644 #pragma omp for
645 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
646 const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
647 for (index_t i=0; i < numComp; ++i) {
648 int_local[i]+=f[i]*h0;
649 } /* end of component loop i */
650 } /* end of k0 loop */
651 }
652
653 if (m_faceOffset[3] > -1) {
654 #pragma omp for
655 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
656 const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
657 for (index_t i=0; i < numComp; ++i) {
658 int_local[i]+=f[i]*h0;
659 } /* end of component loop i */
660 } /* end of k0 loop */
661 }
662
663 #pragma omp critical
664 for (index_t i=0; i<numComp; i++)
665 integrals[i]+=int_local[i];
666 }
667 } else {
668 stringstream msg;
669 msg << "setToIntegrals() not implemented for "
670 << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
671 throw RipleyException(msg.str());
672 }
673 }
674
675 void Rectangle::addPDEToSystem(escript::AbstractSystemMatrix& mat,
676 escript::Data& rhs, const escript::Data& A, const escript::Data& B,
677 const escript::Data& C, const escript::Data& D,
678 const escript::Data& X, const escript::Data& Y,
679 const escript::Data& d, const escript::Data& y,
680 const escript::Data& d_contact, const escript::Data& y_contact,
681 const escript::Data& d_dirac, const escript::Data& y_dirac) const
682 {
683 throw RipleyException("addPDEToSystem() not implemented");
684 }
685
686 Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder,
687 bool reducedColOrder) const
688 {
689 if (reducedRowOrder || reducedColOrder)
690 throw RipleyException("getPattern() not implemented for reduced order");
691
692 // connector
693 RankVector neighbour;
694 IndexVector offsetInShared(1,0);
695 IndexVector sendShared, recvShared;
696 const IndexVector faces=getNumFacesPerBoundary();
697 const index_t left = (m_offset0==0 ? 0 : 1);
698 const index_t bottom = (m_offset1==0 ? 0 : 1);
699 // corner node from bottom-left
700 if (faces[0] == 0 && faces[2] == 0) {
701 neighbour.push_back(m_mpiInfo->rank-m_NX-1);
702 offsetInShared.push_back(offsetInShared.back()+1);
703 sendShared.push_back(m_nodeId[m_N0+left]);
704 recvShared.push_back(m_nodeId[0]);
705 }
706 // bottom edge
707 if (faces[2] == 0) {
708 neighbour.push_back(m_mpiInfo->rank-m_NX);
709 offsetInShared.push_back(offsetInShared.back()+m_N0-left);
710 for (dim_t i=left; i<m_N0; i++) {
711 // easy case, we know the neighbour id's
712 sendShared.push_back(m_nodeId[m_N0+i]);
713 recvShared.push_back(m_nodeId[i]);
714 }
715 }
716 // corner node from bottom-right
717 if (faces[1] == 0 && faces[2] == 0) {
718 neighbour.push_back(m_mpiInfo->rank-m_NX+1);
719 const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
720 const index_t N1=(neighbour.back()/m_NX == 0 ? m_N1 : m_N1-1);
721 const index_t first=m_nodeDistribution[neighbour.back()];
722 offsetInShared.push_back(offsetInShared.back()+1);
723 sendShared.push_back(m_nodeId[(bottom+1)*m_N0-1]);
724 recvShared.push_back(first+N0*(N1-1));
725 }
726 // left edge
727 if (faces[0] == 0) {
728 neighbour.push_back(m_mpiInfo->rank-1);
729 offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
730 for (dim_t i=bottom; i<m_N1; i++) {
731 // easy case, we know the neighbour id's
732 sendShared.push_back(m_nodeId[i*m_N0+1]);
733 recvShared.push_back(m_nodeId[i*m_N0]);
734 }
735 }
736 // right edge
737 if (faces[1] == 0) {
738 neighbour.push_back(m_mpiInfo->rank+1);
739 const index_t rightN0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
740 const index_t first=m_nodeDistribution[neighbour.back()];
741 offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
742 for (dim_t i=bottom; i<m_N1; i++) {
743 sendShared.push_back(m_nodeId[(i+1)*m_N0-1]);
744 recvShared.push_back(first+rightN0*(i-bottom));
745 }
746 }
747 // corner node from top-left
748 if (faces[0] == 0 && faces[3] == 0) {
749 neighbour.push_back(m_mpiInfo->rank+m_NX-1);
750 const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
751 const index_t first=m_nodeDistribution[neighbour.back()];
752 offsetInShared.push_back(offsetInShared.back()+1);
753 sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+left]);
754 recvShared.push_back(first+N0-1);
755 }
756 // top edge
757 if (faces[3] == 0) {
758 neighbour.push_back(m_mpiInfo->rank+m_NX);
759 const index_t first=m_nodeDistribution[neighbour.back()];
760 offsetInShared.push_back(offsetInShared.back()+m_N0-left);
761 for (dim_t i=left; i<m_N0; i++) {
762 sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+i]);
763 recvShared.push_back(first+i-left);
764 }
765 }
766 // corner node from top-right
767 if (faces[1] == 0 && faces[3] == 0) {
768 neighbour.push_back(m_mpiInfo->rank+m_NX+1);
769 const index_t first=m_nodeDistribution[neighbour.back()];
770 offsetInShared.push_back(offsetInShared.back()+1);
771 sendShared.push_back(m_nodeId[m_N0*m_N1-1]);
772 recvShared.push_back(first);
773 }
774 const int numDOF=m_nodeDistribution[m_mpiInfo->rank+1]-m_nodeDistribution[m_mpiInfo->rank];
775 /*
776 cout << "--- rcv_shcomp ---" << endl;
777 cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
778 for (size_t i=0; i<neighbour.size(); i++) {
779 cout << "neighbor[" << i << "]=" << neighbour[i]
780 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
781 }
782 for (size_t i=0; i<recvShared.size(); i++) {
783 cout << "shared[" << i << "]=" << recvShared[i] << endl;
784 }
785 cout << "--- snd_shcomp ---" << endl;
786 for (size_t i=0; i<sendShared.size(); i++) {
787 cout << "shared[" << i << "]=" << sendShared[i] << endl;
788 }
789 */
790
791 Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
792 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
793 &offsetInShared[0], 1, 0, m_mpiInfo);
794 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
795 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
796 &offsetInShared[0], 1, 0, m_mpiInfo);
797 Paso_Connector* connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
798 Paso_SharedComponents_free(snd_shcomp);
799 Paso_SharedComponents_free(rcv_shcomp);
800
801 // create patterns
802 dim_t M, N;
803 IndexVector ptr(1,0);
804 IndexVector index;
805
806 // main pattern
807 for (index_t i=0; i<numDOF; i++) {
808 // always add the node itself
809 index.push_back(i);
810 int num=insertNeighbours(index, i);
811 ptr.push_back(ptr.back()+num+1);
812 }
813 M=N=ptr.size()-1;
814 // paso will manage the memory
815 index_t* indexC = MEMALLOC(index.size(),index_t);
816 index_t* ptrC = MEMALLOC(ptr.size(), index_t);
817 copy(index.begin(), index.end(), indexC);
818 copy(ptr.begin(), ptr.end(), ptrC);
819 Paso_Pattern *mainPattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
820 M, N, ptrC, indexC);
821
822 /*
823 cout << "--- main_pattern ---" << endl;
824 cout << "M=" << M << ", N=" << N << endl;
825 for (size_t i=0; i<ptr.size(); i++) {
826 cout << "ptr[" << i << "]=" << ptr[i] << endl;
827 }
828 for (size_t i=0; i<index.size(); i++) {
829 cout << "index[" << i << "]=" << index[i] << endl;
830 }
831 */
832
833 ptr.clear();
834 index.clear();
835
836 // column & row couple patterns
837 Paso_Pattern *colCouplePattern, *rowCouplePattern;
838 generateCouplePatterns(&colCouplePattern, &rowCouplePattern);
839
840 // allocate paso distribution
841 Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
842 const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
843
844 Paso_SystemMatrixPattern* pattern = Paso_SystemMatrixPattern_alloc(
845 MATRIX_FORMAT_DEFAULT, distribution, distribution,
846 mainPattern, colCouplePattern, rowCouplePattern,
847 connector, connector);
848 Paso_Pattern_free(mainPattern);
849 Paso_Pattern_free(colCouplePattern);
850 Paso_Pattern_free(rowCouplePattern);
851 Paso_Distribution_free(distribution);
852 return pattern;
853 }
854
855 void Rectangle::Print_Mesh_Info(const bool full) const
856 {
857 RipleyDomain::Print_Mesh_Info(full);
858 if (full) {
859 cout << " Id Coordinates" << endl;
860 cout.precision(15);
861 cout.setf(ios::scientific, ios::floatfield);
862 pair<double,double> xdx = getFirstCoordAndSpacing(0);
863 pair<double,double> ydy = getFirstCoordAndSpacing(1);
864 for (index_t i=0; i < getNumNodes(); i++) {
865 cout << " " << setw(5) << m_nodeId[i]
866 << " " << xdx.first+(i%m_N0)*xdx.second
867 << " " << ydy.first+(i/m_N0)*ydy.second << endl;
868 }
869 }
870 }
871
872 IndexVector Rectangle::getNumNodesPerDim() const
873 {
874 IndexVector ret;
875 ret.push_back(m_N0);
876 ret.push_back(m_N1);
877 return ret;
878 }
879
880 IndexVector Rectangle::getNumElementsPerDim() const
881 {
882 IndexVector ret;
883 ret.push_back(m_NE0);
884 ret.push_back(m_NE1);
885 return ret;
886 }
887
888 IndexVector Rectangle::getNumFacesPerBoundary() const
889 {
890 IndexVector ret(4, 0);
891 //left
892 if (m_offset0==0)
893 ret[0]=m_NE1;
894 //right
895 if (m_mpiInfo->rank%m_NX==m_NX-1)
896 ret[1]=m_NE1;
897 //bottom
898 if (m_offset1==0)
899 ret[2]=m_NE0;
900 //top
901 if (m_mpiInfo->rank/m_NX==m_NY-1)
902 ret[3]=m_NE0;
903 return ret;
904 }
905
906 pair<double,double> Rectangle::getFirstCoordAndSpacing(dim_t dim) const
907 {
908 if (dim==0) {
909 return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
910 } else if (dim==1) {
911 return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
912 }
913 throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
914 }
915
916 //protected
917 dim_t Rectangle::getNumFaceElements() const
918 {
919 const IndexVector faces = getNumFacesPerBoundary();
920 dim_t n=0;
921 for (size_t i=0; i<faces.size(); i++)
922 n+=faces[i];
923 return n;
924 }
925
926 //protected
927 void Rectangle::assembleCoordinates(escript::Data& arg) const
928 {
929 escriptDataC x = arg.getDataC();
930 int numDim = m_numDim;
931 if (!isDataPointShapeEqual(&x, 1, &numDim))
932 throw RipleyException("setToX: Invalid Data object shape");
933 if (!numSamplesEqual(&x, 1, getNumNodes()))
934 throw RipleyException("setToX: Illegal number of samples in Data object");
935
936 pair<double,double> xdx = getFirstCoordAndSpacing(0);
937 pair<double,double> ydy = getFirstCoordAndSpacing(1);
938 arg.requireWrite();
939 #pragma omp parallel for
940 for (dim_t i1 = 0; i1 < m_N1; i1++) {
941 for (dim_t i0 = 0; i0 < m_N0; i0++) {
942 double* point = arg.getSampleDataRW(i0+m_N0*i1);
943 point[0] = xdx.first+i0*xdx.second;
944 point[1] = ydy.first+i1*ydy.second;
945 }
946 }
947 }
948
949 //private
950 void Rectangle::populateSampleIds()
951 {
952 // identifiers are ordered from left to right, bottom to top on each rank,
953 // except for the shared nodes which are owned by the rank below / to the
954 // left of the current rank
955
956 // build node distribution vector first.
957 // m_nodeDistribution[i] is the first node id on rank i, that is
958 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
959 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
960 m_nodeDistribution[1]=getNumNodes();
961 for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
962 const index_t x=k%m_NX;
963 const index_t y=k/m_NX;
964 index_t numNodes=getNumNodes();
965 if (x>0)
966 numNodes-=m_N1;
967 if (y>0)
968 numNodes-=m_N0;
969 if (x>0 && y>0)
970 numNodes++; // subtracted corner twice -> fix that
971 m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
972 }
973 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
974
975 m_nodeId.resize(getNumNodes());
976
977 // the bottom row and left column are not owned by this rank so the
978 // identifiers need to be computed accordingly
979 const index_t left = (m_offset0==0 ? 0 : 1);
980 const index_t bottom = (m_offset1==0 ? 0 : 1);
981 if (left>0) {
982 const int neighbour=m_mpiInfo->rank-1;
983 const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
984 #pragma omp parallel for
985 for (dim_t i1=bottom; i1<m_N1; i1++) {
986 m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
987 + (i1-bottom+1)*leftN0-1;
988 }
989 }
990 if (bottom>0) {
991 const int neighbour=m_mpiInfo->rank-m_NX;
992 const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
993 const index_t bottomN1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
994 #pragma omp parallel for
995 for (dim_t i0=left; i0<m_N0; i0++) {
996 m_nodeId[i0]=m_nodeDistribution[neighbour]
997 + (bottomN1-1)*bottomN0 + i0 - left;
998 }
999 }
1000 if (left>0 && bottom>0) {
1001 const int neighbour=m_mpiInfo->rank-m_NX-1;
1002 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1003 const index_t N1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1004 m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1-1;
1005 }
1006
1007 // the rest of the id's are contiguous
1008 const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
1009 #pragma omp parallel for
1010 for (dim_t i1=bottom; i1<m_N1; i1++) {
1011 for (dim_t i0=left; i0<m_N0; i0++) {
1012 m_nodeId[i0+i1*m_N0] = firstId+i0-left+(i1-bottom)*(m_N0-left);
1013 }
1014 }
1015
1016 // element id's
1017 m_elementId.resize(getNumElements());
1018 #pragma omp parallel for
1019 for (dim_t k=0; k<getNumElements(); k++) {
1020 m_elementId[k]=k;
1021 }
1022
1023 // face element id's
1024 m_faceId.resize(getNumFaceElements());
1025 #pragma omp parallel for
1026 for (dim_t k=0; k<getNumFaceElements(); k++) {
1027 m_faceId[k]=k;
1028 }
1029
1030 // generate face offset vector
1031 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1032 m_faceOffset.assign(facesPerEdge.size(), -1);
1033 index_t offset=0;
1034 for (size_t i=0; i<facesPerEdge.size(); i++) {
1035 if (facesPerEdge[i]>0) {
1036 m_faceOffset[i]=offset;
1037 offset+=facesPerEdge[i];
1038 }
1039 }
1040 }
1041
1042 //private
1043 int Rectangle::insertNeighbours(IndexVector& index, index_t node) const
1044 {
1045 const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1046 const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1047 const int x=node%myN0;
1048 const int y=node/myN0;
1049 int num=0;
1050 if (y>0) {
1051 if (x>0) {
1052 // bottom-left
1053 index.push_back(node-myN0-1);
1054 num++;
1055 }
1056 // bottom
1057 index.push_back(node-myN0);
1058 num++;
1059 if (x<myN0-1) {
1060 // bottom-right
1061 index.push_back(node-myN0+1);
1062 num++;
1063 }
1064 }
1065 if (x<myN0-1) {
1066 // right
1067 index.push_back(node+1);
1068 num++;
1069 if (y<myN1-1) {
1070 // top-right
1071 index.push_back(node+myN0+1);
1072 num++;
1073 }
1074 }
1075 if (y<myN1-1) {
1076 // top
1077 index.push_back(node+myN0);
1078 num++;
1079 if (x>0) {
1080 // top-left
1081 index.push_back(node+myN0-1);
1082 num++;
1083 }
1084 }
1085 if (x>0) {
1086 // left
1087 index.push_back(node-1);
1088 num++;
1089 }
1090
1091 return num;
1092 }
1093
1094 //private
1095 void Rectangle::generateCouplePatterns(Paso_Pattern** colPattern, Paso_Pattern** rowPattern) const
1096 {
1097 IndexVector ptr(1,0);
1098 IndexVector index;
1099 const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1100 const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1101 const IndexVector faces=getNumFacesPerBoundary();
1102
1103 // bottom edge
1104 dim_t n=0;
1105 if (faces[0] == 0) {
1106 index.push_back(2*(myN0+myN1+1));
1107 index.push_back(2*(myN0+myN1+1)+1);
1108 n+=2;
1109 if (faces[2] == 0) {
1110 index.push_back(0);
1111 index.push_back(1);
1112 index.push_back(2);
1113 n+=3;
1114 }
1115 } else if (faces[2] == 0) {
1116 index.push_back(1);
1117 index.push_back(2);
1118 n+=2;
1119 }
1120 // n=neighbours of bottom-left corner node
1121 ptr.push_back(ptr.back()+n);
1122 n=0;
1123 if (faces[2] == 0) {
1124 for (dim_t i=1; i<myN0-1; i++) {
1125 index.push_back(i);
1126 index.push_back(i+1);
1127 index.push_back(i+2);
1128 ptr.push_back(ptr.back()+3);
1129 }
1130 index.push_back(myN0-1);
1131 index.push_back(myN0);
1132 n+=2;
1133 if (faces[1] == 0) {
1134 index.push_back(myN0+1);
1135 index.push_back(myN0+2);
1136 index.push_back(myN0+3);
1137 n+=3;
1138 }
1139 } else {
1140 for (dim_t i=1; i<myN0-1; i++) {
1141 ptr.push_back(ptr.back());
1142 }
1143 if (faces[1] == 0) {
1144 index.push_back(myN0+2);
1145 index.push_back(myN0+3);
1146 n+=2;
1147 }
1148 }
1149 // n=neighbours of bottom-right corner node
1150 ptr.push_back(ptr.back()+n);
1151
1152 // 2nd row to 2nd last row
1153 for (dim_t i=1; i<myN1-1; i++) {
1154 // left edge
1155 if (faces[0] == 0) {
1156 index.push_back(2*(myN0+myN1+2)-i);
1157 index.push_back(2*(myN0+myN1+2)-i-1);
1158 index.push_back(2*(myN0+myN1+2)-i-2);
1159 ptr.push_back(ptr.back()+3);
1160 } else {
1161 ptr.push_back(ptr.back());
1162 }
1163 for (dim_t j=1; j<myN0-1; j++) {
1164 ptr.push_back(ptr.back());
1165 }
1166 // right edge
1167 if (faces[1] == 0) {
1168 index.push_back(myN0+i+1);
1169 index.push_back(myN0+i+2);
1170 index.push_back(myN0+i+3);
1171 ptr.push_back(ptr.back()+3);
1172 } else {
1173 ptr.push_back(ptr.back());
1174 }
1175 }
1176
1177 // top edge
1178 n=0;
1179 if (faces[0] == 0) {
1180 index.push_back(2*myN0+myN1+5);
1181 index.push_back(2*myN0+myN1+4);
1182 n+=2;
1183 if (faces[3] == 0) {
1184 index.push_back(2*myN0+myN1+3);
1185 index.push_back(2*myN0+myN1+2);
1186 index.push_back(2*myN0+myN1+1);
1187 n+=3;
1188 }
1189 } else if (faces[3] == 0) {
1190 index.push_back(2*myN0+myN1+2);
1191 index.push_back(2*myN0+myN1+1);
1192 n+=2;
1193 }
1194 // n=neighbours of top-left corner node
1195 ptr.push_back(ptr.back()+n);
1196 n=0;
1197 if (faces[3] == 0) {
1198 for (dim_t i=1; i<myN0-1; i++) {
1199 index.push_back(2*myN0+myN1+i+1);
1200 index.push_back(2*myN0+myN1+i);
1201 index.push_back(2*myN0+myN1+i-1);
1202 ptr.push_back(ptr.back()+3);
1203 }
1204 index.push_back(myN0+myN1+4);
1205 index.push_back(myN0+myN1+3);
1206 n+=2;
1207 if (faces[1] == 0) {
1208 index.push_back(myN0+myN1+2);
1209 index.push_back(myN0+myN1+1);
1210 index.push_back(myN0+myN1);
1211 n+=3;
1212 }
1213 } else {
1214 for (dim_t i=1; i<myN0-1; i++) {
1215 ptr.push_back(ptr.back());
1216 }
1217 if (faces[1] == 0) {
1218 index.push_back(myN0+myN1+1);
1219 index.push_back(myN0+myN1);
1220 n+=2;
1221 }
1222 }
1223 // n=neighbours of top-right corner node
1224 ptr.push_back(ptr.back()+n);
1225
1226 dim_t M=ptr.size()-1;
1227 map<index_t,index_t> idMap;
1228 dim_t N=0;
1229 for (IndexVector::iterator it=index.begin(); it!=index.end(); it++) {
1230 if (idMap.find(*it)==idMap.end()) {
1231 idMap[*it]=N;
1232 *it=N++;
1233 } else {
1234 *it=idMap[*it];
1235 }
1236 }
1237
1238 /*
1239 cout << "--- colCouple_pattern ---" << endl;
1240 cout << "M=" << M << ", N=" << N << endl;
1241 for (size_t i=0; i<ptr.size(); i++) {
1242 cout << "ptr[" << i << "]=" << ptr[i] << endl;
1243 }
1244 for (size_t i=0; i<index.size(); i++) {
1245 cout << "index[" << i << "]=" << index[i] << endl;
1246 }
1247 */
1248
1249 // now build the row couple pattern
1250 IndexVector ptr2(1,0);
1251 IndexVector index2;
1252 for (dim_t id=0; id<N; id++) {
1253 n=0;
1254 for (dim_t i=0; i<M; i++) {
1255 for (dim_t j=ptr[i]; j<ptr[i+1]; j++) {
1256 if (index[j]==id) {
1257 index2.push_back(i);
1258 n++;
1259 break;
1260 }
1261 }
1262 }
1263 ptr2.push_back(ptr2.back()+n);
1264 }
1265
1266 /*
1267 cout << "--- rowCouple_pattern ---" << endl;
1268 cout << "M=" << N << ", N=" << M << endl;
1269 for (size_t i=0; i<ptr2.size(); i++) {
1270 cout << "ptr[" << i << "]=" << ptr2[i] << endl;
1271 }
1272 for (size_t i=0; i<index2.size(); i++) {
1273 cout << "index[" << i << "]=" << index2[i] << endl;
1274 }
1275 */
1276
1277 // paso will manage the memory
1278 index_t* indexC = MEMALLOC(index.size(), index_t);
1279 index_t* ptrC = MEMALLOC(ptr.size(), index_t);
1280 copy(index.begin(), index.end(), indexC);
1281 copy(ptr.begin(), ptr.end(), ptrC);
1282 *colPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, M, N, ptrC, indexC);
1283
1284 // paso will manage the memory
1285 indexC = MEMALLOC(index2.size(), index_t);
1286 ptrC = MEMALLOC(ptr2.size(), index_t);
1287 copy(index2.begin(), index2.end(), indexC);
1288 copy(ptr2.begin(), ptr2.end(), ptrC);
1289 *rowPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, N, M, ptrC, indexC);
1290 }
1291
1292 //protected
1293 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1294 escript::Data& in, bool reduced) const
1295 {
1296 const dim_t numComp = in.getDataPointSize();
1297 if (reduced) {
1298 /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1299 const double tmp0_0 = 0.25000000000000000000;
1300 #pragma omp parallel for
1301 for (index_t k1=0; k1 < m_NE1; ++k1) {
1302 for (index_t k0=0; k0 < m_NE0; ++k0) {
1303 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1304 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1305 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1306 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1307 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1308 for (index_t i=0; i < numComp; ++i) {
1309 o[INDEX2(i,numComp,0)] = tmp0_0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
1310 } /* end of component loop i */
1311 } /* end of k0 loop */
1312 } /* end of k1 loop */
1313 /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1314 } else {
1315 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1316 const double tmp0_2 = 0.62200846792814621559;
1317 const double tmp0_1 = 0.044658198738520451079;
1318 const double tmp0_0 = 0.16666666666666666667;
1319 #pragma omp parallel for
1320 for (index_t k1=0; k1 < m_NE1; ++k1) {
1321 for (index_t k0=0; k0 < m_NE0; ++k0) {
1322 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1323 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1324 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1325 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1326 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1327 for (index_t i=0; i < numComp; ++i) {
1328 o[INDEX2(i,numComp,0)] = f_00[i]*tmp0_2 + f_11[i]*tmp0_1 + tmp0_0*(f_01[i] + f_10[i]);
1329 o[INDEX2(i,numComp,1)] = f_01[i]*tmp0_1 + f_10[i]*tmp0_2 + tmp0_0*(f_00[i] + f_11[i]);
1330 o[INDEX2(i,numComp,2)] = f_01[i]*tmp0_2 + f_10[i]*tmp0_1 + tmp0_0*(f_00[i] + f_11[i]);
1331 o[INDEX2(i,numComp,3)] = f_00[i]*tmp0_1 + f_11[i]*tmp0_2 + tmp0_0*(f_01[i] + f_10[i]);
1332 } /* end of component loop i */
1333 } /* end of k0 loop */
1334 } /* end of k1 loop */
1335 /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1336 }
1337 }
1338
1339 //protected
1340 void Rectangle::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1341 bool reduced) const
1342 {
1343 const dim_t numComp = in.getDataPointSize();
1344 if (reduced) {
1345 /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1346 if (m_faceOffset[0] > -1) {
1347 const double tmp0_0 = 0.50000000000000000000;
1348 #pragma omp parallel for
1349 for (index_t k1=0; k1 < m_NE1; ++k1) {
1350 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1351 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1352 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1353 for (index_t i=0; i < numComp; ++i) {
1354 o[INDEX2(i,numComp,0)] = tmp0_0*(f_00[i] + f_01[i]);
1355 } /* end of component loop i */
1356 } /* end of k1 loop */
1357 } /* end of face 0 */
1358 if (m_faceOffset[1] > -1) {
1359 const double tmp0_0 = 0.50000000000000000000;
1360 #pragma omp parallel for
1361 for (index_t k1=0; k1 < m_NE1; ++k1) {
1362 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1363 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1364 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1365 for (index_t i=0; i < numComp; ++i) {
1366 o[INDEX2(i,numComp,0)] = tmp0_0*(f_10[i] + f_11[i]);
1367 } /* end of component loop i */
1368 } /* end of k1 loop */
1369 } /* end of face 1 */
1370 if (m_faceOffset[2] > -1) {
1371 const double tmp0_0 = 0.50000000000000000000;
1372 #pragma omp parallel for
1373 for (index_t k0=0; k0 < m_NE0; ++k0) {
1374 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1375 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1376 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1377 for (index_t i=0; i < numComp; ++i) {
1378 o[INDEX2(i,numComp,0)] = tmp0_0*(f_00[i] + f_10[i]);
1379 } /* end of component loop i */
1380 } /* end of k0 loop */
1381 } /* end of face 2 */
1382 if (m_faceOffset[3] > -1) {
1383 const double tmp0_0 = 0.50000000000000000000;
1384 #pragma omp parallel for
1385 for (index_t k0=0; k0 < m_NE0; ++k0) {
1386 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1387 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1388 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1389 for (index_t i=0; i < numComp; ++i) {
1390 o[INDEX2(i,numComp,0)] = tmp0_0*(f_01[i] + f_11[i]);
1391 } /* end of component loop i */
1392 } /* end of k0 loop */
1393 } /* end of face 3 */
1394 /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */
1395 } else {
1396 /* GENERATOR SNIP_INTERPOLATE_FACES TOP */
1397 if (m_faceOffset[0] > -1) {
1398 const double tmp0_1 = 0.78867513459481288225;
1399 const double tmp0_0 = 0.21132486540518711775;
1400 #pragma omp parallel for
1401 for (index_t k1=0; k1 < m_NE1; ++k1) {
1402 const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1403 const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1404 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1405 for (index_t i=0; i < numComp; ++i) {
1406 o[INDEX2(i,numComp,0)] = f_00[i]*tmp0_1 + f_01[i]*tmp0_0;
1407 o[INDEX2(i,numComp,1)] = f_00[i]*tmp0_0 + f_01[i]*tmp0_1;
1408 } /* end of component loop i */
1409 } /* end of k1 loop */
1410 } /* end of face 0 */
1411 if (m_faceOffset[1] > -1) {
1412 const double tmp0_1 = 0.21132486540518711775;
1413 const double tmp0_0 = 0.78867513459481288225;
1414 #pragma omp parallel for
1415 for (index_t k1=0; k1 < m_NE1; ++k1) {
1416 const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1417 const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1418 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1419 for (index_t i=0; i < numComp; ++i) {
1420 o[INDEX2(i,numComp,0)] = f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
1421 o[INDEX2(i,numComp,1)] = f_10[i]*tmp0_1 + f_11[i]*tmp0_0;
1422 } /* end of component loop i */
1423 } /* end of k1 loop */
1424 } /* end of face 1 */
1425 if (m_faceOffset[2] > -1) {
1426 const double tmp0_1 = 0.78867513459481288225;
1427 const double tmp0_0 = 0.21132486540518711775;
1428 #pragma omp parallel for
1429 for (index_t k0=0; k0 < m_NE0; ++k0) {
1430 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1431 const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1432 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1433 for (index_t i=0; i < numComp; ++i) {
1434 o[INDEX2(i,numComp,0)] = f_00[i]*tmp0_1 + f_10[i]*tmp0_0;
1435 o[INDEX2(i,numComp,1)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
1436 } /* end of component loop i */
1437 } /* end of k0 loop */
1438 } /* end of face 2 */
1439 if (m_faceOffset[3] > -1) {
1440 const double tmp0_1 = 0.78867513459481288225;
1441 const double tmp0_0 = 0.21132486540518711775;
1442 #pragma omp parallel for
1443 for (index_t k0=0; k0 < m_NE0; ++k0) {
1444 const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1445 const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1446 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1447 for (index_t i=0; i < numComp; ++i) {
1448 o[INDEX2(i,numComp,0)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
1449 o[INDEX2(i,numComp,1)] = f_01[i]*tmp0_0 + f_11[i]*tmp0_1;
1450 } /* end of component loop i */
1451 } /* end of k0 loop */
1452 } /* end of face 3 */
1453 /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */
1454 }
1455 }
1456
1457 } // end of namespace ripley
1458

  ViewVC Help
Powered by ViewVC 1.1.26