/[escript]/branches/ripleygmg_from_3668/ripley/src/Brick.cpp
ViewVC logotype

Annotation of /branches/ripleygmg_from_3668/ripley/src/Brick.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3703 - (hide annotations)
Sun Dec 4 23:42:52 2011 UTC (7 years, 5 months ago) by caltinay
File size: 31602 byte(s)
Gradient and interpolation on elements for Brick domain.

1 caltinay 3691
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2011 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     #include <ripley/Brick.h>
15     extern "C" {
16     #include "paso/SystemMatrixPattern.h"
17     }
18    
19     #if USE_SILO
20     #include <silo.h>
21     #ifdef ESYS_MPI
22     #include <pmpio.h>
23     #endif
24     #endif
25    
26     #include <iomanip>
27    
28     using namespace std;
29    
30     namespace ripley {
31    
32     Brick::Brick(int n0, int n1, int n2, double l0, double l1, double l2, int d0,
33     int d1, int d2) :
34     RipleyDomain(3),
35     m_gNE0(n0),
36     m_gNE1(n1),
37     m_gNE2(n2),
38     m_l0(l0),
39     m_l1(l1),
40     m_l2(l2),
41     m_NX(d0),
42     m_NY(d1),
43     m_NZ(d2)
44     {
45     // ensure number of subdivisions is valid and nodes can be distributed
46     // among number of ranks
47     if (m_NX*m_NY*m_NZ != m_mpiInfo->size)
48     throw RipleyException("Invalid number of spatial subdivisions");
49    
50     if (n0%m_NX > 0 || n1%m_NY > 0 || n2%m_NZ > 0)
51     throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
52    
53     // local number of elements
54     m_NE0 = n0/m_NX;
55     m_NE1 = n1/m_NY;
56     m_NE2 = n2/m_NZ;
57     // local number of nodes (not necessarily owned)
58     m_N0 = m_NE0+1;
59     m_N1 = m_NE1+1;
60     m_N2 = m_NE2+1;
61     // bottom-left-front node is at (offset0,offset1,offset2) in global mesh
62     m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
63     m_offset1 = m_NE1*(m_mpiInfo->rank%(m_NX*m_NY)/m_NX);
64     m_offset2 = m_NE2*(m_mpiInfo->rank/(m_NX*m_NY));
65     populateSampleIds();
66     }
67    
68    
69     Brick::~Brick()
70     {
71     }
72    
73     string Brick::getDescription() const
74     {
75     return "ripley::Brick";
76     }
77    
78     bool Brick::operator==(const AbstractDomain& other) const
79     {
80     if (dynamic_cast<const Brick*>(&other))
81     return this==&other;
82    
83     return false;
84     }
85    
86     void Brick::dump(const string& fileName) const
87     {
88     #if USE_SILO
89     string fn(fileName);
90     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
91     fn+=".silo";
92     }
93    
94     const int NUM_SILO_FILES = 1;
95     const char* blockDirFmt = "/block%04d";
96     int driver=DB_HDF5;
97     string siloPath;
98     DBfile* dbfile = NULL;
99    
100     #ifdef ESYS_MPI
101     PMPIO_baton_t* baton = NULL;
102     #endif
103    
104     if (m_mpiInfo->size > 1) {
105     #ifdef ESYS_MPI
106     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
107     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
108     PMPIO_DefaultClose, (void*)&driver);
109     // try the fallback driver in case of error
110     if (!baton && driver != DB_PDB) {
111     driver = DB_PDB;
112     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
113     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
114     PMPIO_DefaultClose, (void*)&driver);
115     }
116     if (baton) {
117     char str[64];
118     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
119     siloPath = str;
120     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
121     }
122     #endif
123     } else {
124     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
125     getDescription().c_str(), driver);
126     // try the fallback driver in case of error
127     if (!dbfile && driver != DB_PDB) {
128     driver = DB_PDB;
129     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
130     getDescription().c_str(), driver);
131     }
132     }
133    
134     if (!dbfile)
135     throw RipleyException("dump: Could not create Silo file");
136    
137     /*
138     if (driver==DB_HDF5) {
139     // gzip level 1 already provides good compression with minimal
140     // performance penalty. Some tests showed that gzip levels >3 performed
141     // rather badly on escript data both in terms of time and space
142     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
143     }
144     */
145    
146     boost::scoped_ptr<double> x(new double[m_N0]);
147     boost::scoped_ptr<double> y(new double[m_N1]);
148     boost::scoped_ptr<double> z(new double[m_N2]);
149     double* coords[3] = { x.get(), y.get(), z.get() };
150 caltinay 3698 pair<double,double> xdx = getFirstCoordAndSpacing(0);
151     pair<double,double> ydy = getFirstCoordAndSpacing(1);
152     pair<double,double> zdz = getFirstCoordAndSpacing(2);
153 caltinay 3691 #pragma omp parallel
154     {
155     #pragma omp for
156     for (dim_t i0 = 0; i0 < m_N0; i0++) {
157 caltinay 3698 coords[0][i0]=xdx.first+i0*xdx.second;
158 caltinay 3691 }
159     #pragma omp for
160     for (dim_t i1 = 0; i1 < m_N1; i1++) {
161 caltinay 3698 coords[1][i1]=ydy.first+i1*ydy.second;
162 caltinay 3691 }
163     #pragma omp for
164     for (dim_t i2 = 0; i2 < m_N2; i2++) {
165 caltinay 3698 coords[2][i2]=zdz.first+i2*zdz.second;
166 caltinay 3691 }
167     }
168 caltinay 3698 IndexVector dims = getNumNodesPerDim();
169     DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 3, DB_DOUBLE,
170 caltinay 3691 DB_COLLINEAR, NULL);
171    
172 caltinay 3698 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 3,
173     NULL, 0, DB_INT, DB_NODECENT, NULL);
174 caltinay 3691
175 caltinay 3698 // write element ids
176     dims = getNumElementsPerDim();
177     DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
178     &dims[0], 3, NULL, 0, DB_INT, DB_ZONECENT, NULL);
179    
180     // rank 0 writes multimesh and multivar
181 caltinay 3691 if (m_mpiInfo->rank == 0) {
182     vector<string> tempstrings;
183     vector<char*> names;
184     for (dim_t i=0; i<m_mpiInfo->size; i++) {
185     stringstream path;
186     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
187     tempstrings.push_back(path.str());
188     names.push_back((char*)tempstrings.back().c_str());
189     }
190     vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
191     DBSetDir(dbfile, "/");
192     DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
193     &types[0], NULL);
194     tempstrings.clear();
195     names.clear();
196     for (dim_t i=0; i<m_mpiInfo->size; i++) {
197     stringstream path;
198     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
199     tempstrings.push_back(path.str());
200     names.push_back((char*)tempstrings.back().c_str());
201     }
202     types.assign(m_mpiInfo->size, DB_QUADVAR);
203     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
204     &types[0], NULL);
205 caltinay 3698 tempstrings.clear();
206     names.clear();
207     for (dim_t i=0; i<m_mpiInfo->size; i++) {
208     stringstream path;
209     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
210     tempstrings.push_back(path.str());
211     names.push_back((char*)tempstrings.back().c_str());
212     }
213     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
214     &types[0], NULL);
215 caltinay 3691 }
216    
217     if (m_mpiInfo->size > 1) {
218     #ifdef ESYS_MPI
219     PMPIO_HandOffBaton(baton, dbfile);
220     PMPIO_Finish(baton);
221     #endif
222     } else {
223     DBClose(dbfile);
224     }
225    
226     #else // USE_SILO
227     throw RipleyException("dump(): no Silo support");
228     #endif
229     }
230    
231     const int* Brick::borrowSampleReferenceIDs(int fsType) const
232     {
233 caltinay 3697 switch (fsType) {
234     case Nodes:
235     return &m_nodeId[0];
236     case Elements:
237     return &m_elementId[0];
238     case FaceElements:
239     return &m_faceId[0];
240     default:
241     break;
242     }
243 caltinay 3691
244 caltinay 3697 stringstream msg;
245     msg << "borrowSampleReferenceIDs() not implemented for function space type "
246     << fsType;
247     throw RipleyException(msg.str());
248 caltinay 3691 }
249    
250     bool Brick::ownSample(int fsCode, index_t id) const
251     {
252     #ifdef ESYS_MPI
253     if (fsCode == Nodes) {
254 caltinay 3698 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
255     const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
256 caltinay 3691 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
257     } else
258     throw RipleyException("ownSample() only implemented for Nodes");
259     #else
260     return true;
261     #endif
262     }
263    
264 caltinay 3703 void Brick::setToGradient(escript::Data& out, const escript::Data& cIn) const
265     {
266     escript::Data& in = *const_cast<escript::Data*>(&cIn);
267     const dim_t numComp = in.getDataPointSize();
268     const double h0 = m_l0/m_gNE0;
269     const double h1 = m_l1/m_gNE1;
270     const double h2 = m_l1/m_gNE2;
271     if (out.getFunctionSpace().getTypeCode() == Elements) {
272     /* GENERATOR SNIP_GRAD_ELEMENTS TOP */
273     const double tmp0_22 = -0.044658198738520451079/h1;
274     const double tmp0_16 = 0.16666666666666666667/h0;
275     const double tmp0_33 = -0.62200846792814621559/h1;
276     const double tmp0_0 = -0.62200846792814621559/h0;
277     const double tmp0_21 = -0.16666666666666666667/h1;
278     const double tmp0_17 = 0.62200846792814621559/h0;
279     const double tmp0_52 = -0.044658198738520451079/h2;
280     const double tmp0_1 = -0.16666666666666666667/h0;
281     const double tmp0_20 = -0.62200846792814621559/h1;
282     const double tmp0_14 = -0.044658198738520451079/h0;
283     const double tmp0_53 = -0.62200846792814621559/h2;
284     const double tmp0_49 = 0.16666666666666666667/h2;
285     const double tmp0_2 = 0.16666666666666666667/h0;
286     const double tmp0_27 = -0.044658198738520451079/h1;
287     const double tmp0_15 = -0.16666666666666666667/h0;
288     const double tmp0_50 = -0.16666666666666666667/h2;
289     const double tmp0_48 = 0.62200846792814621559/h2;
290     const double tmp0_3 = 0.044658198738520451079/h0;
291     const double tmp0_26 = -0.16666666666666666667/h1;
292     const double tmp0_12 = -0.62200846792814621559/h0;
293     const double tmp0_51 = 0.044658198738520451079/h2;
294     const double tmp0_25 = 0.62200846792814621559/h1;
295     const double tmp0_13 = 0.16666666666666666667/h0;
296     const double tmp0_56 = 0.16666666666666666667/h2;
297     const double tmp0_24 = 0.16666666666666666667/h1;
298     const double tmp0_10 = 0.62200846792814621559/h0;
299     const double tmp0_57 = 0.62200846792814621559/h2;
300     const double tmp0_11 = -0.16666666666666666667/h0;
301     const double tmp0_54 = -0.044658198738520451079/h2;
302     const double tmp0_38 = 0.16666666666666666667/h1;
303     const double tmp0_34 = -0.044658198738520451079/h1;
304     const double tmp0_42 = 0.16666666666666666667/h2;
305     const double tmp0_35 = -0.16666666666666666667/h1;
306     const double tmp0_36 = -0.62200846792814621559/h1;
307     const double tmp0_41 = 0.62200846792814621559/h2;
308     const double tmp0_8 = 0.044658198738520451079/h0;
309     const double tmp0_37 = 0.62200846792814621559/h1;
310     const double tmp0_29 = 0.16666666666666666667/h1;
311     const double tmp0_40 = -0.62200846792814621559/h2;
312     const double tmp0_9 = 0.16666666666666666667/h0;
313     const double tmp0_30 = 0.62200846792814621559/h1;
314     const double tmp0_28 = -0.16666666666666666667/h1;
315     const double tmp0_43 = 0.044658198738520451079/h2;
316     const double tmp0_32 = 0.16666666666666666667/h1;
317     const double tmp0_31 = 0.044658198738520451079/h1;
318     const double tmp0_39 = 0.044658198738520451079/h1;
319     const double tmp0_58 = -0.62200846792814621559/h2;
320     const double tmp0_55 = 0.044658198738520451079/h2;
321     const double tmp0_18 = -0.62200846792814621559/h0;
322     const double tmp0_45 = -0.16666666666666666667/h2;
323     const double tmp0_59 = -0.16666666666666666667/h2;
324     const double tmp0_4 = -0.044658198738520451079/h0;
325     const double tmp0_19 = 0.044658198738520451079/h0;
326     const double tmp0_44 = -0.044658198738520451079/h2;
327     const double tmp0_5 = 0.62200846792814621559/h0;
328     const double tmp0_47 = 0.16666666666666666667/h2;
329     const double tmp0_6 = -0.16666666666666666667/h0;
330     const double tmp0_23 = 0.044658198738520451079/h1;
331     const double tmp0_46 = -0.16666666666666666667/h2;
332     const double tmp0_7 = -0.044658198738520451079/h0;
333     #pragma omp parallel for
334     for (index_t k2 =0; k2 < m_NE2; ++k2) {
335     for (index_t k1 =0; k1 < m_NE1; ++k1) {
336     for (index_t k0 =0; k0 < m_NE0; ++k0) {
337     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
338     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
339     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
340     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
341     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
342     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
343     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
344     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
345     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
346     for (index_t i=0; i < numComp; ++i) {
347     o[INDEX3(i,0,0,numComp,3)] = f_000[i]*tmp0_0 + f_011[i]*tmp0_4 + f_100[i]*tmp0_5 + f_111[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i]) + tmp0_2*(f_101[i] + f_110[i]);
348     o[INDEX3(i,1,0,numComp,3)] = f_000[i]*tmp0_20 + f_010[i]*tmp0_25 + f_101[i]*tmp0_22 + f_111[i]*tmp0_23 + tmp0_21*(f_001[i] + f_100[i]) + tmp0_24*(f_011[i] + f_110[i]);
349     o[INDEX3(i,2,0,numComp,3)] = f_000[i]*tmp0_40 + f_001[i]*tmp0_41 + f_110[i]*tmp0_44 + f_111[i]*tmp0_43 + tmp0_42*(f_011[i] + f_101[i]) + tmp0_45*(f_010[i] + f_100[i]);
350     o[INDEX3(i,0,1,numComp,3)] = f_000[i]*tmp0_0 + f_011[i]*tmp0_4 + f_100[i]*tmp0_5 + f_111[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i]) + tmp0_2*(f_101[i] + f_110[i]);
351     o[INDEX3(i,1,1,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_27 + f_010[i]*tmp0_32 + f_011[i]*tmp0_31 + f_100[i]*tmp0_33 + f_101[i]*tmp0_28 + f_110[i]*tmp0_30 + f_111[i]*tmp0_29;
352     o[INDEX3(i,2,1,numComp,3)] = f_000[i]*tmp0_46 + f_001[i]*tmp0_47 + f_010[i]*tmp0_52 + f_011[i]*tmp0_51 + f_100[i]*tmp0_53 + f_101[i]*tmp0_48 + f_110[i]*tmp0_50 + f_111[i]*tmp0_49;
353     o[INDEX3(i,0,2,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_7 + f_010[i]*tmp0_12 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_8 + f_110[i]*tmp0_10 + f_111[i]*tmp0_9;
354     o[INDEX3(i,1,2,numComp,3)] = f_000[i]*tmp0_20 + f_010[i]*tmp0_25 + f_101[i]*tmp0_22 + f_111[i]*tmp0_23 + tmp0_21*(f_001[i] + f_100[i]) + tmp0_24*(f_011[i] + f_110[i]);
355     o[INDEX3(i,2,2,numComp,3)] = f_000[i]*tmp0_46 + f_001[i]*tmp0_47 + f_010[i]*tmp0_53 + f_011[i]*tmp0_48 + f_100[i]*tmp0_52 + f_101[i]*tmp0_51 + f_110[i]*tmp0_50 + f_111[i]*tmp0_49;
356     o[INDEX3(i,0,3,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_7 + f_010[i]*tmp0_12 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_8 + f_110[i]*tmp0_10 + f_111[i]*tmp0_9;
357     o[INDEX3(i,1,3,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_27 + f_010[i]*tmp0_32 + f_011[i]*tmp0_31 + f_100[i]*tmp0_33 + f_101[i]*tmp0_28 + f_110[i]*tmp0_30 + f_111[i]*tmp0_29;
358     o[INDEX3(i,2,3,numComp,3)] = f_000[i]*tmp0_54 + f_001[i]*tmp0_55 + f_110[i]*tmp0_58 + f_111[i]*tmp0_57 + tmp0_56*(f_011[i] + f_101[i]) + tmp0_59*(f_010[i] + f_100[i]);
359     o[INDEX3(i,0,4,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_12 + f_010[i]*tmp0_7 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_10 + f_110[i]*tmp0_8 + f_111[i]*tmp0_9;
360     o[INDEX3(i,1,4,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_33 + f_010[i]*tmp0_32 + f_011[i]*tmp0_30 + f_100[i]*tmp0_27 + f_101[i]*tmp0_28 + f_110[i]*tmp0_31 + f_111[i]*tmp0_29;
361     o[INDEX3(i,2,4,numComp,3)] = f_000[i]*tmp0_40 + f_001[i]*tmp0_41 + f_110[i]*tmp0_44 + f_111[i]*tmp0_43 + tmp0_42*(f_011[i] + f_101[i]) + tmp0_45*(f_010[i] + f_100[i]);
362     o[INDEX3(i,0,5,numComp,3)] = f_000[i]*tmp0_6 + f_001[i]*tmp0_12 + f_010[i]*tmp0_7 + f_011[i]*tmp0_11 + f_100[i]*tmp0_13 + f_101[i]*tmp0_10 + f_110[i]*tmp0_8 + f_111[i]*tmp0_9;
363     o[INDEX3(i,1,5,numComp,3)] = f_000[i]*tmp0_34 + f_010[i]*tmp0_39 + f_101[i]*tmp0_36 + f_111[i]*tmp0_37 + tmp0_35*(f_001[i] + f_100[i]) + tmp0_38*(f_011[i] + f_110[i]);
364     o[INDEX3(i,2,5,numComp,3)] = f_000[i]*tmp0_46 + f_001[i]*tmp0_47 + f_010[i]*tmp0_52 + f_011[i]*tmp0_51 + f_100[i]*tmp0_53 + f_101[i]*tmp0_48 + f_110[i]*tmp0_50 + f_111[i]*tmp0_49;
365     o[INDEX3(i,0,6,numComp,3)] = f_000[i]*tmp0_14 + f_011[i]*tmp0_18 + f_100[i]*tmp0_19 + f_111[i]*tmp0_17 + tmp0_15*(f_001[i] + f_010[i]) + tmp0_16*(f_101[i] + f_110[i]);
366     o[INDEX3(i,1,6,numComp,3)] = f_000[i]*tmp0_26 + f_001[i]*tmp0_33 + f_010[i]*tmp0_32 + f_011[i]*tmp0_30 + f_100[i]*tmp0_27 + f_101[i]*tmp0_28 + f_110[i]*tmp0_31 + f_111[i]*tmp0_29;
367     o[INDEX3(i,2,6,numComp,3)] = f_000[i]*tmp0_46 + f_001[i]*tmp0_47 + f_010[i]*tmp0_53 + f_011[i]*tmp0_48 + f_100[i]*tmp0_52 + f_101[i]*tmp0_51 + f_110[i]*tmp0_50 + f_111[i]*tmp0_49;
368     o[INDEX3(i,0,7,numComp,3)] = f_000[i]*tmp0_14 + f_011[i]*tmp0_18 + f_100[i]*tmp0_19 + f_111[i]*tmp0_17 + tmp0_15*(f_001[i] + f_010[i]) + tmp0_16*(f_101[i] + f_110[i]);
369     o[INDEX3(i,1,7,numComp,3)] = f_000[i]*tmp0_34 + f_010[i]*tmp0_39 + f_101[i]*tmp0_36 + f_111[i]*tmp0_37 + tmp0_35*(f_001[i] + f_100[i]) + tmp0_38*(f_011[i] + f_110[i]);
370     o[INDEX3(i,2,7,numComp,3)] = f_000[i]*tmp0_54 + f_001[i]*tmp0_55 + f_110[i]*tmp0_58 + f_111[i]*tmp0_57 + tmp0_56*(f_011[i] + f_101[i]) + tmp0_59*(f_010[i] + f_100[i]);
371     } /* end of component loop i */
372     } /* end of k0 loop */
373     } /* end of k1 loop */
374     } /* end of k2 loop */
375     /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
376     } else {
377     throw RipleyException("setToGradient() not implemented");
378     }
379     }
380    
381 caltinay 3691 Paso_SystemMatrixPattern* Brick::getPattern(bool reducedRowOrder,
382     bool reducedColOrder) const
383     {
384     if (reducedRowOrder || reducedColOrder)
385     throw RipleyException("getPattern() not implemented for reduced order");
386    
387     throw RipleyException("getPattern() not implemented");
388     }
389    
390     void Brick::Print_Mesh_Info(const bool full) const
391     {
392     RipleyDomain::Print_Mesh_Info(full);
393     if (full) {
394     cout << " Id Coordinates" << endl;
395     cout.precision(15);
396     cout.setf(ios::scientific, ios::floatfield);
397 caltinay 3698 pair<double,double> xdx = getFirstCoordAndSpacing(0);
398     pair<double,double> ydy = getFirstCoordAndSpacing(1);
399     pair<double,double> zdz = getFirstCoordAndSpacing(2);
400 caltinay 3691 for (index_t i=0; i < getNumNodes(); i++) {
401     cout << " " << setw(5) << m_nodeId[i]
402 caltinay 3698 << " " << xdx.first+(i%m_N0)*xdx.second
403     << " " << ydy.first+(i%(m_N0*m_N1)/m_N0)*ydy.second
404     << " " << zdz.first+(i/(m_N0*m_N1))*zdz.second << endl;
405 caltinay 3691 }
406     }
407     }
408    
409 caltinay 3698 IndexVector Brick::getNumNodesPerDim() const
410     {
411     IndexVector ret;
412     ret.push_back(m_N0);
413     ret.push_back(m_N1);
414     ret.push_back(m_N2);
415     return ret;
416     }
417    
418     IndexVector Brick::getNumElementsPerDim() const
419     {
420     IndexVector ret;
421     ret.push_back(m_NE0);
422     ret.push_back(m_NE1);
423     ret.push_back(m_NE2);
424     return ret;
425     }
426    
427     IndexVector Brick::getNumFacesPerBoundary() const
428     {
429     IndexVector ret(6, 0);
430     //left
431     if (m_offset0==0)
432     ret[0]=m_NE1*m_NE2;
433     //right
434     if (m_mpiInfo->rank%m_NX==m_NX-1)
435     ret[1]=m_NE1*m_NE2;
436     //bottom
437     if (m_offset1==0)
438     ret[2]=m_NE0*m_NE2;
439     //top
440     if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
441     ret[3]=m_NE0*m_NE2;
442     //front
443     if (m_offset2==0)
444     ret[4]=m_NE0*m_NE1;
445     //back
446     if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
447     ret[5]=m_NE0*m_NE1;
448     return ret;
449     }
450    
451     pair<double,double> Brick::getFirstCoordAndSpacing(dim_t dim) const
452     {
453     if (dim==0)
454     return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
455     else if (dim==1)
456     return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
457     else if (dim==2)
458     return pair<double,double>((m_l2*m_offset2)/m_gNE2, m_l2/m_gNE2);
459    
460     throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
461     }
462    
463    
464 caltinay 3691 //protected
465     dim_t Brick::getNumFaceElements() const
466     {
467     dim_t n=0;
468     //left
469     if (m_offset0==0)
470     n+=m_NE1*m_NE2;
471     //right
472     if (m_mpiInfo->rank%m_NX==m_NX-1)
473     n+=m_NE1*m_NE2;
474     //bottom
475     if (m_offset1==0)
476     n+=m_NE0*m_NE2;
477     //top
478     if (m_mpiInfo->rank%(m_NX*m_NY)/m_NX==m_NY-1)
479     n+=m_NE0*m_NE2;
480     //front
481     if (m_offset2==0)
482     n+=m_NE0*m_NE1;
483     //back
484     if (m_mpiInfo->rank/(m_NX*m_NY)==m_NZ-1)
485     n+=m_NE0*m_NE1;
486    
487     return n;
488     }
489    
490     //protected
491     void Brick::assembleCoordinates(escript::Data& arg) const
492     {
493     escriptDataC x = arg.getDataC();
494     int numDim = m_numDim;
495     if (!isDataPointShapeEqual(&x, 1, &numDim))
496     throw RipleyException("setToX: Invalid Data object shape");
497     if (!numSamplesEqual(&x, 1, getNumNodes()))
498     throw RipleyException("setToX: Illegal number of samples in Data object");
499    
500 caltinay 3698 pair<double,double> xdx = getFirstCoordAndSpacing(0);
501     pair<double,double> ydy = getFirstCoordAndSpacing(1);
502     pair<double,double> zdz = getFirstCoordAndSpacing(2);
503 caltinay 3691 arg.requireWrite();
504     #pragma omp parallel for
505     for (dim_t i2 = 0; i2 < m_N2; i2++) {
506     for (dim_t i1 = 0; i1 < m_N1; i1++) {
507     for (dim_t i0 = 0; i0 < m_N0; i0++) {
508     double* point = arg.getSampleDataRW(i0+m_N0*i1+m_N0*m_N1*i2);
509 caltinay 3698 point[0] = xdx.first+i0*xdx.second;
510     point[1] = ydy.first+i1*ydy.second;
511     point[2] = zdz.first+i2*zdz.second;
512 caltinay 3691 }
513     }
514     }
515     }
516    
517     //private
518     void Brick::populateSampleIds()
519     {
520 caltinay 3697 // identifiers are ordered from left to right, bottom to top, front to back
521     // on each rank, except for the shared nodes which are owned by the rank
522     // below / to the left / to the front of the current rank
523 caltinay 3698
524     // build node distribution vector first.
525     // m_nodeDistribution[i] is the first node id on rank i, that is
526     // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
527     m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
528     m_nodeDistribution[1]=getNumNodes();
529     for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
530     const index_t x = k%m_NX;
531     const index_t y = k%(m_NX*m_NY)/m_NX;
532     const index_t z = k/(m_NX*m_NY);
533     index_t numNodes=getNumNodes();
534     if (x>0)
535     numNodes-=m_N1*m_N2;
536     if (y>0)
537     numNodes-=m_N0*m_N2;
538     if (z>0)
539     numNodes-=m_N0*m_N1;
540     // if an edge was subtracted twice add it back
541     if (x>0 && y>0)
542     numNodes+=m_N2;
543     if (x>0 && z>0)
544     numNodes+=m_N1;
545     if (y>0 && z>0)
546     numNodes+=m_N0;
547     // the corner node was removed 3x and added back 3x, so subtract it
548     if (x>0 && y>0 && z>0)
549     numNodes--;
550     m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
551     }
552     m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
553    
554 caltinay 3691 m_nodeId.resize(getNumNodes());
555 caltinay 3698
556     // the bottom, left and front planes are not owned by this rank so the
557     // identifiers need to be computed accordingly
558     const index_t left = (m_offset0==0 ? 0 : 1);
559     const index_t bottom = (m_offset1==0 ? 0 : 1);
560     const index_t front = (m_offset2==0 ? 0 : 1);
561    
562     // case 1: all nodes on left plane are owned by rank on the left
563     if (left>0) {
564     const int neighbour=m_mpiInfo->rank-1;
565     const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
566     const index_t leftN1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
567 caltinay 3691 #pragma omp parallel for
568 caltinay 3698 for (dim_t i2=front; i2<m_N2; i2++) {
569     for (dim_t i1=bottom; i1<m_N1; i1++) {
570     m_nodeId[i1*m_N0+i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
571     + (i1-bottom+1)*leftN0
572     + (i2-front)*leftN0*leftN1 - 1;
573     }
574     }
575 caltinay 3691 }
576 caltinay 3698 // case 2: all nodes on bottom plane are owned by rank below
577     if (bottom>0) {
578     const int neighbour=m_mpiInfo->rank-m_NX;
579     const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
580     const index_t bottomN1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
581     #pragma omp parallel for
582     for (dim_t i2=front; i2<m_N2; i2++) {
583     for (dim_t i0=left; i0<m_N0; i0++) {
584     m_nodeId[i0+i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
585     + bottomN0*(bottomN1-1)
586     + (i2-front)*bottomN0*bottomN1 + i0-left;
587     }
588     }
589     }
590     // case 3: all nodes on front plane are owned by rank in front
591     if (front>0) {
592     const int neighbour=m_mpiInfo->rank-m_NX*m_NY;
593     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
594     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
595     const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
596     #pragma omp parallel for
597     for (dim_t i1=bottom; i1<m_N1; i1++) {
598     for (dim_t i0=left; i0<m_N0; i0++) {
599     m_nodeId[i0+i1*m_N0]=m_nodeDistribution[neighbour]
600     + N0*N1*(N2-1)+(i1-bottom)*N0 + i0-left;
601     }
602     }
603     }
604     // case 4: nodes on front bottom edge are owned by the corresponding rank
605     if (front>0 && bottom>0) {
606     const int neighbour=m_mpiInfo->rank-m_NX*(m_NY+1);
607     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
608     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
609     const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
610     #pragma omp parallel for
611     for (dim_t i0=left; i0<m_N0; i0++) {
612     m_nodeId[i0]=m_nodeDistribution[neighbour]
613     + N0*N1*(N2-1)+(N1-1)*N0 + i0-left;
614     }
615     }
616     // case 5: nodes on left bottom edge are owned by the corresponding rank
617     if (left>0 && bottom>0) {
618     const int neighbour=m_mpiInfo->rank-m_NX-1;
619     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
620     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
621     #pragma omp parallel for
622     for (dim_t i2=front; i2<m_N2; i2++) {
623     m_nodeId[i2*m_N0*m_N1]=m_nodeDistribution[neighbour]
624     + (1+i2-front)*N0*N1-1;
625     }
626     }
627     // case 6: nodes on left front edge are owned by the corresponding rank
628     if (left>0 && front>0) {
629     const int neighbour=m_mpiInfo->rank-m_NX*m_NY-1;
630     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
631     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
632     const index_t N2=(neighbour/(m_NX*m_NY)==0 ? m_N2 : m_N2-1);
633     #pragma omp parallel for
634     for (dim_t i1=bottom; i1<m_N1; i1++) {
635     m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
636     + N0*N1*(N2-1)+N0-1+(i1-bottom)*N0;
637     }
638     }
639     // case 7: bottom-left-front corner node owned by corresponding rank
640     if (left>0 && bottom>0 && front>0) {
641     const int neighbour=m_mpiInfo->rank-m_NX*(m_NY+1)-1;
642     const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
643     const index_t N1=(neighbour%(m_NX*m_NY)/m_NX==0 ? m_N1 : m_N1-1);
644     const index_t N2=(neighbour/(m_NX*m_NY) == 0 ? m_N2 : m_N2-1);
645     m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1*N2-1;
646     }
647 caltinay 3697
648 caltinay 3698 // the rest of the id's are contiguous
649     const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
650     #pragma omp parallel for
651     for (dim_t i2=front; i2<m_N2; i2++) {
652     for (dim_t i1=bottom; i1<m_N1; i1++) {
653     for (dim_t i0=left; i0<m_N0; i0++) {
654     m_nodeId[i0+i1*m_N0+i2*m_N0*m_N1] = firstId+i0-left
655     +(i1-bottom)*(m_N0-left)
656     +(i2-front)*(m_N0-left)*(m_N1-bottom);
657     }
658     }
659     }
660    
661 caltinay 3697 // elements
662     m_elementId.resize(getNumElements());
663     #pragma omp parallel for
664     for (dim_t k=0; k<getNumElements(); k++) {
665     m_elementId[k]=k;
666     }
667    
668     // face elements
669     m_faceId.resize(getNumFaceElements());
670     #pragma omp parallel for
671     for (dim_t k=0; k<getNumFaceElements(); k++) {
672     m_faceId[k]=k;
673     }
674 caltinay 3691 }
675    
676 caltinay 3703 //protected
677     void Brick::interpolateNodesOnElements(escript::Data& out, escript::Data& in) const
678     {
679     const dim_t numComp = in.getDataPointSize();
680     /* GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
681     const double tmp0_3 = 0.0094373878376559314545;
682     const double tmp0_2 = 0.035220810900864519624;
683     const double tmp0_1 = 0.13144585576580214704;
684     const double tmp0_0 = 0.49056261216234406855;
685     #pragma omp parallel for
686     for (index_t k2=0; k2 < m_NE2; ++k2) {
687     for (index_t k1=0; k1 < m_NE1; ++k1) {
688     for (index_t k0=0; k0 < m_NE0; ++k0) {
689     const register double* f_000 = in.getSampleDataRO(INDEX3(k0,k1,k2, m_N0,m_N1));
690     const register double* f_001 = in.getSampleDataRO(INDEX3(k0,k1,k2+1, m_N0,m_N1));
691     const register double* f_101 = in.getSampleDataRO(INDEX3(k0+1,k1,k2+1, m_N0,m_N1));
692     const register double* f_011 = in.getSampleDataRO(INDEX3(k0,k1+1,k2+1, m_N0,m_N1));
693     const register double* f_110 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2, m_N0,m_N1));
694     const register double* f_010 = in.getSampleDataRO(INDEX3(k0,k1+1,k2, m_N0,m_N1));
695     const register double* f_100 = in.getSampleDataRO(INDEX3(k0+1,k1,k2, m_N0,m_N1));
696     const register double* f_111 = in.getSampleDataRO(INDEX3(k0+1,k1+1,k2+1, m_N0,m_N1));
697     double* o = out.getSampleDataRW(INDEX3(k0,k1,k2,m_NE0,m_NE1));
698     for (index_t i=0; i < numComp; ++i) {
699     o[INDEX2(i,numComp,0)] = f_000[i]*tmp0_0 + f_111[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i] + f_100[i]) + tmp0_2*(f_011[i] + f_101[i] + f_110[i]);
700     o[INDEX2(i,numComp,1)] = f_011[i]*tmp0_3 + f_100[i]*tmp0_0 + tmp0_1*(f_000[i] + f_101[i] + f_110[i]) + tmp0_2*(f_001[i] + f_010[i] + f_111[i]);
701     o[INDEX2(i,numComp,2)] = f_010[i]*tmp0_0 + f_101[i]*tmp0_3 + tmp0_1*(f_000[i] + f_011[i] + f_110[i]) + tmp0_2*(f_001[i] + f_100[i] + f_111[i]);
702     o[INDEX2(i,numComp,3)] = f_001[i]*tmp0_3 + f_110[i]*tmp0_0 + tmp0_1*(f_010[i] + f_100[i] + f_111[i]) + tmp0_2*(f_000[i] + f_011[i] + f_101[i]);
703     o[INDEX2(i,numComp,4)] = f_001[i]*tmp0_0 + f_110[i]*tmp0_3 + tmp0_1*(f_000[i] + f_011[i] + f_101[i]) + tmp0_2*(f_010[i] + f_100[i] + f_111[i]);
704     o[INDEX2(i,numComp,5)] = f_010[i]*tmp0_3 + f_101[i]*tmp0_0 + tmp0_1*(f_001[i] + f_100[i] + f_111[i]) + tmp0_2*(f_000[i] + f_011[i] + f_110[i]);
705     o[INDEX2(i,numComp,6)] = f_011[i]*tmp0_0 + f_100[i]*tmp0_3 + tmp0_1*(f_001[i] + f_010[i] + f_111[i]) + tmp0_2*(f_000[i] + f_101[i] + f_110[i]);
706     o[INDEX2(i,numComp,7)] = f_000[i]*tmp0_3 + f_111[i]*tmp0_0 + tmp0_1*(f_011[i] + f_101[i] + f_110[i]) + tmp0_2*(f_001[i] + f_010[i] + f_100[i]);
707     } /* end of component loop i */
708     } /* end of k0 loop */
709     } /* end of k1 loop */
710     } /* end of k2 loop */
711     /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
712     }
713    
714     //protected
715     void Brick::interpolateNodesOnFaces(escript::Data& out, escript::Data& in) const
716     {
717     throw RipleyException("interpolateNodesOnFaces() not implemented");
718     }
719    
720 caltinay 3691 } // end of namespace ripley
721    

  ViewVC Help
Powered by ViewVC 1.1.26