/[escript]/branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp
ViewVC logotype

Annotation of /branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3707 - (hide annotations)
Mon Dec 5 05:48:25 2011 UTC (8 years ago) by caltinay
File size: 42671 byte(s)
Fixes to the face element code.

1 caltinay 3691
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2011 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     #include <ripley/Rectangle.h>
15     extern "C" {
16     #include "paso/SystemMatrixPattern.h"
17     }
18    
19     #if USE_SILO
20     #include <silo.h>
21     #ifdef ESYS_MPI
22     #include <pmpio.h>
23     #endif
24     #endif
25    
26     #include <iomanip>
27    
28     using namespace std;
29    
30     namespace ripley {
31    
32     Rectangle::Rectangle(int n0, int n1, double l0, double l1, int d0, int d1) :
33     RipleyDomain(2),
34     m_gNE0(n0),
35     m_gNE1(n1),
36     m_l0(l0),
37     m_l1(l1),
38     m_NX(d0),
39     m_NY(d1)
40     {
41     // ensure number of subdivisions is valid and nodes can be distributed
42     // among number of ranks
43     if (m_NX*m_NY != m_mpiInfo->size)
44     throw RipleyException("Invalid number of spatial subdivisions");
45    
46     if (n0%m_NX > 0 || n1%m_NY > 0)
47     throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
48    
49     // local number of elements
50     m_NE0 = n0/m_NX;
51     m_NE1 = n1/m_NY;
52     // local number of nodes (not necessarily owned)
53     m_N0 = m_NE0+1;
54     m_N1 = m_NE1+1;
55     // bottom-left node is at (offset0,offset1) in global mesh
56     m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
57     m_offset1 = m_NE1*(m_mpiInfo->rank/m_NX);
58     populateSampleIds();
59     }
60    
61     Rectangle::~Rectangle()
62     {
63     }
64    
65     string Rectangle::getDescription() const
66     {
67     return "ripley::Rectangle";
68     }
69    
70     bool Rectangle::operator==(const AbstractDomain& other) const
71     {
72     if (dynamic_cast<const Rectangle*>(&other))
73     return this==&other;
74    
75     return false;
76     }
77    
78     void Rectangle::dump(const string& fileName) const
79     {
80     #if USE_SILO
81     string fn(fileName);
82     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
83     fn+=".silo";
84     }
85    
86     const int NUM_SILO_FILES = 1;
87     const char* blockDirFmt = "/block%04d";
88     int driver=DB_HDF5;
89     string siloPath;
90     DBfile* dbfile = NULL;
91    
92     #ifdef ESYS_MPI
93     PMPIO_baton_t* baton = NULL;
94     #endif
95    
96     if (m_mpiInfo->size > 1) {
97     #ifdef ESYS_MPI
98     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
99     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
100     PMPIO_DefaultClose, (void*)&driver);
101     // try the fallback driver in case of error
102     if (!baton && driver != DB_PDB) {
103     driver = DB_PDB;
104     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
105     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
106     PMPIO_DefaultClose, (void*)&driver);
107     }
108     if (baton) {
109     char str[64];
110     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
111     siloPath = str;
112     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
113     }
114     #endif
115     } else {
116     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
117     getDescription().c_str(), driver);
118     // try the fallback driver in case of error
119     if (!dbfile && driver != DB_PDB) {
120     driver = DB_PDB;
121     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
122     getDescription().c_str(), driver);
123     }
124     }
125    
126     if (!dbfile)
127     throw RipleyException("dump: Could not create Silo file");
128    
129     /*
130     if (driver==DB_HDF5) {
131     // gzip level 1 already provides good compression with minimal
132     // performance penalty. Some tests showed that gzip levels >3 performed
133     // rather badly on escript data both in terms of time and space
134     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
135     }
136     */
137    
138     boost::scoped_ptr<double> x(new double[m_N0]);
139     boost::scoped_ptr<double> y(new double[m_N1]);
140     double* coords[2] = { x.get(), y.get() };
141 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
142     pair<double,double> ydy = getFirstCoordAndSpacing(1);
143 caltinay 3691 #pragma omp parallel
144     {
145     #pragma omp for
146     for (dim_t i0 = 0; i0 < m_N0; i0++) {
147 caltinay 3697 coords[0][i0]=xdx.first+i0*xdx.second;
148 caltinay 3691 }
149     #pragma omp for
150     for (dim_t i1 = 0; i1 < m_N1; i1++) {
151 caltinay 3697 coords[1][i1]=ydy.first+i1*ydy.second;
152 caltinay 3691 }
153     }
154 caltinay 3697 IndexVector dims = getNumNodesPerDim();
155    
156     // write mesh
157     DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE,
158 caltinay 3691 DB_COLLINEAR, NULL);
159    
160 caltinay 3697 // write node ids
161     DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2,
162     NULL, 0, DB_INT, DB_NODECENT, NULL);
163    
164     // write element ids
165     dims = getNumElementsPerDim();
166     DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
167     &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
168    
169     // rank 0 writes multimesh and multivar
170 caltinay 3691 if (m_mpiInfo->rank == 0) {
171     vector<string> tempstrings;
172 caltinay 3697 vector<char*> names;
173 caltinay 3691 for (dim_t i=0; i<m_mpiInfo->size; i++) {
174     stringstream path;
175     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
176     tempstrings.push_back(path.str());
177 caltinay 3697 names.push_back((char*)tempstrings.back().c_str());
178 caltinay 3691 }
179 caltinay 3697 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
180 caltinay 3691 DBSetDir(dbfile, "/");
181 caltinay 3697 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
182     &types[0], NULL);
183     tempstrings.clear();
184     names.clear();
185     for (dim_t i=0; i<m_mpiInfo->size; i++) {
186     stringstream path;
187     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
188     tempstrings.push_back(path.str());
189     names.push_back((char*)tempstrings.back().c_str());
190     }
191     types.assign(m_mpiInfo->size, DB_QUADVAR);
192     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
193     &types[0], NULL);
194     tempstrings.clear();
195     names.clear();
196     for (dim_t i=0; i<m_mpiInfo->size; i++) {
197     stringstream path;
198     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
199     tempstrings.push_back(path.str());
200     names.push_back((char*)tempstrings.back().c_str());
201     }
202     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
203     &types[0], NULL);
204 caltinay 3691 }
205    
206     if (m_mpiInfo->size > 1) {
207     #ifdef ESYS_MPI
208     PMPIO_HandOffBaton(baton, dbfile);
209     PMPIO_Finish(baton);
210     #endif
211     } else {
212     DBClose(dbfile);
213     }
214    
215     #else // USE_SILO
216     throw RipleyException("dump(): no Silo support");
217     #endif
218     }
219    
220 caltinay 3697 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
221 caltinay 3691 {
222 caltinay 3697 switch (fsType) {
223 caltinay 3691 case Nodes:
224     return &m_nodeId[0];
225     case Elements:
226     return &m_elementId[0];
227     case FaceElements:
228     return &m_faceId[0];
229     default:
230     break;
231     }
232    
233     stringstream msg;
234     msg << "borrowSampleReferenceIDs() not implemented for function space type "
235 caltinay 3701 << functionSpaceTypeAsString(fsType);
236 caltinay 3691 throw RipleyException(msg.str());
237     }
238    
239     bool Rectangle::ownSample(int fsCode, index_t id) const
240     {
241     #ifdef ESYS_MPI
242 caltinay 3702 if (fsCode != ReducedNodes) {
243 caltinay 3699 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
244     const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
245 caltinay 3691 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
246 caltinay 3702 } else {
247     stringstream msg;
248     msg << "ownSample() not implemented for "
249     << functionSpaceTypeAsString(fsCode);
250     throw RipleyException(msg.str());
251     }
252 caltinay 3691 #else
253     return true;
254     #endif
255     }
256    
257 caltinay 3702 void Rectangle::setToGradient(escript::Data& out, const escript::Data& cIn) const
258 caltinay 3691 {
259 caltinay 3702 escript::Data& in = *const_cast<escript::Data*>(&cIn);
260 caltinay 3701 const dim_t numComp = in.getDataPointSize();
261 caltinay 3702 const double h0 = m_l0/m_gNE0;
262     const double h1 = m_l1/m_gNE1;
263     if (out.getFunctionSpace().getTypeCode() == Elements) {
264     /* GENERATOR SNIP_GRAD_ELEMENTS TOP */
265     const double tmp0_13 = 0.78867513459481288225/h1;
266     const double tmp0_0 = 0.78867513459481288225/h0;
267     const double tmp0_4 = 0.21132486540518711775/h0;
268     const double tmp0_10 = 0.78867513459481288225/h1;
269     const double tmp0_1 = 0.21132486540518711775/h0;
270     const double tmp0_8 = -0.21132486540518711775/h1;
271     const double tmp0_14 = 0.21132486540518711775/h1;
272     const double tmp0_5 = 0.78867513459481288225/h0;
273     const double tmp0_11 = -0.78867513459481288225/h1;
274     const double tmp0_2 = -0.21132486540518711775/h0;
275     const double tmp0_9 = 0.21132486540518711775/h1;
276     const double tmp0_15 = -0.21132486540518711775/h1;
277     const double tmp0_6 = -0.78867513459481288225/h0;
278     const double tmp0_3 = -0.78867513459481288225/h0;
279     const double tmp0_12 = -0.78867513459481288225/h1;
280     const double tmp0_7 = -0.21132486540518711775/h0;
281 caltinay 3697 #pragma omp parallel for
282 caltinay 3707 for (index_t k1=0; k1 < m_NE1; ++k1) {
283     for (index_t k0=0; k0 < m_NE0; ++k0) {
284 caltinay 3702 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
285     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
286     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
287     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
288     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
289     for (index_t i=0; i < numComp; ++i) {
290     o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
291     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_11 + f_01[i]*tmp0_10 + f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
292     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
293     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_15 + f_01[i]*tmp0_14 + f_10[i]*tmp0_12 + f_11[i]*tmp0_13;
294     o[INDEX3(i,0,2,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
295     o[INDEX3(i,1,2,numComp,2)] = f_00[i]*tmp0_11 + f_01[i]*tmp0_10 + f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
296     o[INDEX3(i,0,3,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
297     o[INDEX3(i,1,3,numComp,2)] = f_00[i]*tmp0_15 + f_01[i]*tmp0_14 + f_10[i]*tmp0_12 + f_11[i]*tmp0_13;
298     } /* end of component loop i */
299     } /* end of k0 loop */
300     } /* end of k1 loop */
301     /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
302 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
303     /* GENERATOR SNIP_GRAD_FACES TOP */
304     if (m_faceOffset[0] > -1) {
305     const double tmp0_0 = 0.78867513459481288225/h0;
306     const double tmp0_4 = 0.21132486540518711775/h0;
307     const double tmp0_1 = 0.21132486540518711775/h0;
308     const double tmp0_8 = 1.0/h1;
309     const double tmp0_5 = 0.78867513459481288225/h0;
310     const double tmp0_2 = -0.21132486540518711775/h0;
311     const double tmp0_9 = -1/h1;
312     const double tmp0_6 = -0.78867513459481288225/h0;
313     const double tmp0_3 = -0.78867513459481288225/h0;
314     const double tmp0_7 = -0.21132486540518711775/h0;
315     #pragma omp parallel for
316     for (index_t k1=0; k1 < m_NE1; ++k1) {
317     const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
318     const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
319     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
320     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
321     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
322     for (index_t i=0; i < numComp; ++i) {
323     o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
324     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_8;
325     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
326     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_8;
327     } /* end of component loop i */
328     } /* end of k1 loop */
329     } /* end of face 0 */
330     if (m_faceOffset[1] > -1) {
331     const double tmp0_0 = 0.78867513459481288225/h0;
332     const double tmp0_4 = 0.21132486540518711775/h0;
333     const double tmp0_1 = 0.21132486540518711775/h0;
334     const double tmp0_8 = -1/h1;
335     const double tmp0_5 = 0.78867513459481288225/h0;
336     const double tmp0_2 = -0.21132486540518711775/h0;
337     const double tmp0_9 = 1.0/h1;
338     const double tmp0_6 = -0.78867513459481288225/h0;
339     const double tmp0_3 = -0.78867513459481288225/h0;
340     const double tmp0_7 = -0.21132486540518711775/h0;
341     #pragma omp parallel for
342     for (index_t k1=0; k1 < m_NE1; ++k1) {
343     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
344     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
345     const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
346     const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
347     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
348     for (index_t i=0; i < numComp; ++i) {
349     o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
350     o[INDEX3(i,1,0,numComp,2)] = f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
351     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
352     o[INDEX3(i,1,1,numComp,2)] = f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
353     } /* end of component loop i */
354     } /* end of k1 loop */
355     } /* end of face 1 */
356     if (m_faceOffset[2] > -1) {
357     const double tmp0_0 = -1/h0;
358     const double tmp0_4 = 0.21132486540518711775/h1;
359     const double tmp0_1 = 1.0/h0;
360     const double tmp0_8 = 0.78867513459481288225/h1;
361     const double tmp0_5 = 0.78867513459481288225/h1;
362     const double tmp0_2 = -0.78867513459481288225/h1;
363     const double tmp0_9 = 0.21132486540518711775/h1;
364     const double tmp0_6 = -0.21132486540518711775/h1;
365     const double tmp0_3 = -0.21132486540518711775/h1;
366     const double tmp0_7 = -0.78867513459481288225/h1;
367     #pragma omp parallel for
368     for (index_t k0=0; k0 < m_NE0; ++k0) {
369     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
370     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
371     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
372     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
373     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
374     for (index_t i=0; i < numComp; ++i) {
375     o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
376     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_2 + f_01[i]*tmp0_5 + f_10[i]*tmp0_3 + f_11[i]*tmp0_4;
377     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
378     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_6 + f_01[i]*tmp0_9 + f_10[i]*tmp0_7 + f_11[i]*tmp0_8;
379     } /* end of component loop i */
380     } /* end of k0 loop */
381     } /* end of face 2 */
382     if (m_faceOffset[3] > -1) {
383     const double tmp0_0 = 1.0/h0;
384     const double tmp0_4 = -0.21132486540518711775/h1;
385     const double tmp0_1 = -1/h0;
386     const double tmp0_8 = -0.78867513459481288225/h1;
387     const double tmp0_5 = -0.78867513459481288225/h1;
388     const double tmp0_2 = 0.21132486540518711775/h1;
389     const double tmp0_9 = -0.21132486540518711775/h1;
390     const double tmp0_6 = 0.78867513459481288225/h1;
391     const double tmp0_3 = 0.78867513459481288225/h1;
392     const double tmp0_7 = 0.21132486540518711775/h1;
393     #pragma omp parallel for
394     for (index_t k0=0; k0 < m_NE0; ++k0) {
395     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
396     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
397     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
398     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
399     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
400     for (index_t i=0; i < numComp; ++i) {
401     o[INDEX3(i,0,0,numComp,2)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
402     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_5 + f_01[i]*tmp0_3 + f_10[i]*tmp0_4 + f_11[i]*tmp0_2;
403     o[INDEX3(i,0,1,numComp,2)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
404     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_7 + f_10[i]*tmp0_8 + f_11[i]*tmp0_6;
405     } /* end of component loop i */
406     } /* end of k0 loop */
407     } /* end of face 3 */
408     /* GENERATOR SNIP_GRAD_FACES BOTTOM */
409 caltinay 3702 } else {
410 caltinay 3707 stringstream msg;
411     msg << "setToGradient() not implemented for "
412     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
413     throw RipleyException(msg.str());
414 caltinay 3702 }
415 caltinay 3701 }
416 caltinay 3697
417 caltinay 3701 void Rectangle::addPDEToSystem(escript::AbstractSystemMatrix& mat,
418     escript::Data& rhs, const escript::Data& A, const escript::Data& B,
419     const escript::Data& C, const escript::Data& D,
420     const escript::Data& X, const escript::Data& Y,
421     const escript::Data& d, const escript::Data& y,
422     const escript::Data& d_contact, const escript::Data& y_contact,
423     const escript::Data& d_dirac, const escript::Data& y_dirac) const
424     {
425     throw RipleyException("addPDEToSystem() not implemented");
426     }
427    
428 caltinay 3691 Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder,
429     bool reducedColOrder) const
430     {
431     if (reducedRowOrder || reducedColOrder)
432     throw RipleyException("getPattern() not implemented for reduced order");
433    
434 caltinay 3699 // connector
435     RankVector neighbour;
436     IndexVector offsetInShared(1,0);
437     IndexVector sendShared, recvShared;
438     const IndexVector faces=getNumFacesPerBoundary();
439     const index_t left = (m_offset0==0 ? 0 : 1);
440     const index_t bottom = (m_offset1==0 ? 0 : 1);
441     // corner node from bottom-left
442     if (faces[0] == 0 && faces[2] == 0) {
443     neighbour.push_back(m_mpiInfo->rank-m_NX-1);
444     offsetInShared.push_back(offsetInShared.back()+1);
445     sendShared.push_back(m_nodeId[m_N0+left]);
446     recvShared.push_back(m_nodeId[0]);
447     }
448     // bottom edge
449     if (faces[2] == 0) {
450     neighbour.push_back(m_mpiInfo->rank-m_NX);
451     offsetInShared.push_back(offsetInShared.back()+m_N0-left);
452     for (dim_t i=left; i<m_N0; i++) {
453     // easy case, we know the neighbour id's
454     sendShared.push_back(m_nodeId[m_N0+i]);
455     recvShared.push_back(m_nodeId[i]);
456     }
457     }
458     // corner node from bottom-right
459     if (faces[1] == 0 && faces[2] == 0) {
460     neighbour.push_back(m_mpiInfo->rank-m_NX+1);
461     const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
462     const index_t N1=(neighbour.back()/m_NX == 0 ? m_N1 : m_N1-1);
463     const index_t first=m_nodeDistribution[neighbour.back()];
464     offsetInShared.push_back(offsetInShared.back()+1);
465     sendShared.push_back(m_nodeId[(bottom+1)*m_N0-1]);
466     recvShared.push_back(first+N0*(N1-1));
467     }
468     // left edge
469     if (faces[0] == 0) {
470     neighbour.push_back(m_mpiInfo->rank-1);
471     offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
472     for (dim_t i=bottom; i<m_N1; i++) {
473     // easy case, we know the neighbour id's
474     sendShared.push_back(m_nodeId[i*m_N0+1]);
475     recvShared.push_back(m_nodeId[i*m_N0]);
476     }
477     }
478     // right edge
479     if (faces[1] == 0) {
480     neighbour.push_back(m_mpiInfo->rank+1);
481     const index_t rightN0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
482     const index_t first=m_nodeDistribution[neighbour.back()];
483     offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
484     for (dim_t i=bottom; i<m_N1; i++) {
485     sendShared.push_back(m_nodeId[(i+1)*m_N0-1]);
486     recvShared.push_back(first+rightN0*(i-bottom));
487     }
488     }
489     // corner node from top-left
490     if (faces[0] == 0 && faces[3] == 0) {
491     neighbour.push_back(m_mpiInfo->rank+m_NX-1);
492     const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
493     const index_t first=m_nodeDistribution[neighbour.back()];
494     offsetInShared.push_back(offsetInShared.back()+1);
495     sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+left]);
496     recvShared.push_back(first+N0-1);
497     }
498     // top edge
499     if (faces[3] == 0) {
500     neighbour.push_back(m_mpiInfo->rank+m_NX);
501     const index_t first=m_nodeDistribution[neighbour.back()];
502     offsetInShared.push_back(offsetInShared.back()+m_N0-left);
503     for (dim_t i=left; i<m_N0; i++) {
504     sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+i]);
505     recvShared.push_back(first+i-left);
506     }
507     }
508     // corner node from top-right
509     if (faces[1] == 0 && faces[3] == 0) {
510     neighbour.push_back(m_mpiInfo->rank+m_NX+1);
511     const index_t first=m_nodeDistribution[neighbour.back()];
512     offsetInShared.push_back(offsetInShared.back()+1);
513     sendShared.push_back(m_nodeId[m_N0*m_N1-1]);
514     recvShared.push_back(first);
515     }
516     const int numDOF=m_nodeDistribution[m_mpiInfo->rank+1]-m_nodeDistribution[m_mpiInfo->rank];
517 caltinay 3702 /*
518 caltinay 3699 cout << "--- rcv_shcomp ---" << endl;
519     cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
520     for (size_t i=0; i<neighbour.size(); i++) {
521     cout << "neighbor[" << i << "]=" << neighbour[i]
522     << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
523     }
524     for (size_t i=0; i<recvShared.size(); i++) {
525     cout << "shared[" << i << "]=" << recvShared[i] << endl;
526     }
527     cout << "--- snd_shcomp ---" << endl;
528     for (size_t i=0; i<sendShared.size(); i++) {
529     cout << "shared[" << i << "]=" << sendShared[i] << endl;
530     }
531 caltinay 3702 */
532 caltinay 3691
533     Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
534 caltinay 3699 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
535 caltinay 3697 &offsetInShared[0], 1, 0, m_mpiInfo);
536 caltinay 3691 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
537 caltinay 3699 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
538 caltinay 3697 &offsetInShared[0], 1, 0, m_mpiInfo);
539 caltinay 3691 Paso_Connector* connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
540 caltinay 3699 Paso_SharedComponents_free(snd_shcomp);
541     Paso_SharedComponents_free(rcv_shcomp);
542 caltinay 3691
543     // create patterns
544 caltinay 3699 dim_t M, N;
545     IndexVector ptr(1,0);
546     IndexVector index;
547    
548     // main pattern
549     for (index_t i=0; i<numDOF; i++) {
550     // always add the node itself
551     index.push_back(i);
552     int num=insertNeighbours(index, i);
553     ptr.push_back(ptr.back()+num+1);
554     }
555     M=N=ptr.size()-1;
556     // paso will manage the memory
557     index_t* indexC = MEMALLOC(index.size(),index_t);
558     index_t* ptrC = MEMALLOC(ptr.size(), index_t);
559     copy(index.begin(), index.end(), indexC);
560     copy(ptr.begin(), ptr.end(), ptrC);
561 caltinay 3691 Paso_Pattern *mainPattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
562 caltinay 3699 M, N, ptrC, indexC);
563 caltinay 3691
564 caltinay 3702 /*
565 caltinay 3699 cout << "--- main_pattern ---" << endl;
566     cout << "M=" << M << ", N=" << N << endl;
567     for (size_t i=0; i<ptr.size(); i++) {
568     cout << "ptr[" << i << "]=" << ptr[i] << endl;
569     }
570     for (size_t i=0; i<index.size(); i++) {
571     cout << "index[" << i << "]=" << index[i] << endl;
572     }
573 caltinay 3702 */
574 caltinay 3699
575     ptr.clear();
576     index.clear();
577    
578     // column & row couple patterns
579     Paso_Pattern *colCouplePattern, *rowCouplePattern;
580     generateCouplePatterns(&colCouplePattern, &rowCouplePattern);
581    
582     // allocate paso distribution
583     Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
584     const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
585    
586 caltinay 3691 Paso_SystemMatrixPattern* pattern = Paso_SystemMatrixPattern_alloc(
587     MATRIX_FORMAT_DEFAULT, distribution, distribution,
588     mainPattern, colCouplePattern, rowCouplePattern,
589     connector, connector);
590     Paso_Pattern_free(mainPattern);
591     Paso_Pattern_free(colCouplePattern);
592     Paso_Pattern_free(rowCouplePattern);
593     Paso_Distribution_free(distribution);
594 caltinay 3697 return pattern;
595 caltinay 3691 }
596    
597     void Rectangle::Print_Mesh_Info(const bool full) const
598     {
599     RipleyDomain::Print_Mesh_Info(full);
600     if (full) {
601     cout << " Id Coordinates" << endl;
602     cout.precision(15);
603     cout.setf(ios::scientific, ios::floatfield);
604 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
605     pair<double,double> ydy = getFirstCoordAndSpacing(1);
606 caltinay 3691 for (index_t i=0; i < getNumNodes(); i++) {
607     cout << " " << setw(5) << m_nodeId[i]
608 caltinay 3697 << " " << xdx.first+(i%m_N0)*xdx.second
609     << " " << ydy.first+(i/m_N0)*ydy.second << endl;
610 caltinay 3691 }
611     }
612     }
613    
614 caltinay 3697 IndexVector Rectangle::getNumNodesPerDim() const
615     {
616     IndexVector ret;
617     ret.push_back(m_N0);
618     ret.push_back(m_N1);
619     return ret;
620     }
621    
622     IndexVector Rectangle::getNumElementsPerDim() const
623     {
624     IndexVector ret;
625     ret.push_back(m_NE0);
626     ret.push_back(m_NE1);
627     return ret;
628     }
629    
630     IndexVector Rectangle::getNumFacesPerBoundary() const
631     {
632     IndexVector ret(4, 0);
633     //left
634     if (m_offset0==0)
635     ret[0]=m_NE1;
636     //right
637     if (m_mpiInfo->rank%m_NX==m_NX-1)
638     ret[1]=m_NE1;
639     //bottom
640     if (m_offset1==0)
641     ret[2]=m_NE0;
642     //top
643     if (m_mpiInfo->rank/m_NX==m_NY-1)
644     ret[3]=m_NE0;
645     return ret;
646     }
647    
648     pair<double,double> Rectangle::getFirstCoordAndSpacing(dim_t dim) const
649     {
650     if (dim==0) {
651     return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
652     } else if (dim==1) {
653     return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
654     }
655     throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
656     }
657    
658 caltinay 3691 //protected
659     dim_t Rectangle::getNumFaceElements() const
660     {
661 caltinay 3699 const IndexVector faces = getNumFacesPerBoundary();
662 caltinay 3691 dim_t n=0;
663 caltinay 3699 for (size_t i=0; i<faces.size(); i++)
664     n+=faces[i];
665 caltinay 3691 return n;
666     }
667    
668     //protected
669     void Rectangle::assembleCoordinates(escript::Data& arg) const
670     {
671     escriptDataC x = arg.getDataC();
672     int numDim = m_numDim;
673     if (!isDataPointShapeEqual(&x, 1, &numDim))
674     throw RipleyException("setToX: Invalid Data object shape");
675     if (!numSamplesEqual(&x, 1, getNumNodes()))
676     throw RipleyException("setToX: Illegal number of samples in Data object");
677    
678 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
679     pair<double,double> ydy = getFirstCoordAndSpacing(1);
680 caltinay 3691 arg.requireWrite();
681     #pragma omp parallel for
682     for (dim_t i1 = 0; i1 < m_N1; i1++) {
683     for (dim_t i0 = 0; i0 < m_N0; i0++) {
684     double* point = arg.getSampleDataRW(i0+m_N0*i1);
685 caltinay 3697 point[0] = xdx.first+i0*xdx.second;
686     point[1] = ydy.first+i1*ydy.second;
687 caltinay 3691 }
688     }
689     }
690    
691     //private
692     void Rectangle::populateSampleIds()
693     {
694 caltinay 3697 // identifiers are ordered from left to right, bottom to top on each rank,
695     // except for the shared nodes which are owned by the rank below / to the
696     // left of the current rank
697    
698     // build node distribution vector first.
699     // m_nodeDistribution[i] is the first node id on rank i, that is
700     // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
701     m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
702     m_nodeDistribution[1]=getNumNodes();
703     for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
704     const index_t x=k%m_NX;
705     const index_t y=k/m_NX;
706     index_t numNodes=getNumNodes();
707     if (x>0)
708     numNodes-=m_N1;
709     if (y>0)
710     numNodes-=m_N0;
711     if (x>0 && y>0)
712     numNodes++; // subtracted corner twice -> fix that
713     m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
714     }
715     m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
716    
717 caltinay 3691 m_nodeId.resize(getNumNodes());
718 caltinay 3697
719     // the bottom row and left column are not owned by this rank so the
720     // identifiers need to be computed accordingly
721     const index_t left = (m_offset0==0 ? 0 : 1);
722     const index_t bottom = (m_offset1==0 ? 0 : 1);
723     if (left>0) {
724     const int neighbour=m_mpiInfo->rank-1;
725     const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
726 caltinay 3691 #pragma omp parallel for
727 caltinay 3697 for (dim_t i1=bottom; i1<m_N1; i1++) {
728     m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
729     + (i1-bottom+1)*leftN0-1;
730     }
731     }
732     if (bottom>0) {
733     const int neighbour=m_mpiInfo->rank-m_NX;
734     const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
735     const index_t bottomN1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
736     #pragma omp parallel for
737     for (dim_t i0=left; i0<m_N0; i0++) {
738     m_nodeId[i0]=m_nodeDistribution[neighbour]
739     + (bottomN1-1)*bottomN0 + i0 - left;
740     }
741     }
742     if (left>0 && bottom>0) {
743     const int neighbour=m_mpiInfo->rank-m_NX-1;
744 caltinay 3699 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
745     const index_t N1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
746     m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1-1;
747 caltinay 3697 }
748    
749     // the rest of the id's are contiguous
750     const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
751     #pragma omp parallel for
752     for (dim_t i1=bottom; i1<m_N1; i1++) {
753     for (dim_t i0=left; i0<m_N0; i0++) {
754     m_nodeId[i0+i1*m_N0] = firstId+i0-left+(i1-bottom)*(m_N0-left);
755     }
756     }
757    
758 caltinay 3704 // element id's
759 caltinay 3697 m_elementId.resize(getNumElements());
760     #pragma omp parallel for
761     for (dim_t k=0; k<getNumElements(); k++) {
762     m_elementId[k]=k;
763     }
764    
765 caltinay 3704 // face element id's
766 caltinay 3697 m_faceId.resize(getNumFaceElements());
767     #pragma omp parallel for
768     for (dim_t k=0; k<getNumFaceElements(); k++) {
769     m_faceId[k]=k;
770     }
771 caltinay 3704
772     // generate face offset vector
773     const IndexVector facesPerEdge = getNumFacesPerBoundary();
774     m_faceOffset.assign(facesPerEdge.size(), -1);
775     index_t offset=0;
776     for (size_t i=0; i<facesPerEdge.size(); i++) {
777     if (facesPerEdge[i]>0) {
778     m_faceOffset[i]=offset;
779     offset+=facesPerEdge[i];
780     }
781     }
782 caltinay 3691 }
783    
784 caltinay 3699 //private
785     int Rectangle::insertNeighbours(IndexVector& index, index_t node) const
786     {
787     const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
788     const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
789     const int x=node%myN0;
790     const int y=node/myN0;
791     int num=0;
792     if (y>0) {
793     if (x>0) {
794     // bottom-left
795     index.push_back(node-myN0-1);
796     num++;
797     }
798     // bottom
799     index.push_back(node-myN0);
800     num++;
801     if (x<myN0-1) {
802     // bottom-right
803     index.push_back(node-myN0+1);
804     num++;
805     }
806     }
807     if (x<myN0-1) {
808     // right
809     index.push_back(node+1);
810     num++;
811     if (y<myN1-1) {
812     // top-right
813     index.push_back(node+myN0+1);
814     num++;
815     }
816     }
817     if (y<myN1-1) {
818     // top
819     index.push_back(node+myN0);
820     num++;
821     if (x>0) {
822     // top-left
823     index.push_back(node+myN0-1);
824     num++;
825     }
826     }
827     if (x>0) {
828     // left
829     index.push_back(node-1);
830     num++;
831     }
832    
833     return num;
834     }
835    
836     //private
837     void Rectangle::generateCouplePatterns(Paso_Pattern** colPattern, Paso_Pattern** rowPattern) const
838     {
839     IndexVector ptr(1,0);
840     IndexVector index;
841     const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
842     const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
843     const IndexVector faces=getNumFacesPerBoundary();
844    
845     // bottom edge
846     dim_t n=0;
847     if (faces[0] == 0) {
848     index.push_back(2*(myN0+myN1+1));
849     index.push_back(2*(myN0+myN1+1)+1);
850     n+=2;
851     if (faces[2] == 0) {
852     index.push_back(0);
853     index.push_back(1);
854     index.push_back(2);
855     n+=3;
856     }
857     } else if (faces[2] == 0) {
858     index.push_back(1);
859     index.push_back(2);
860     n+=2;
861     }
862     // n=neighbours of bottom-left corner node
863     ptr.push_back(ptr.back()+n);
864     n=0;
865     if (faces[2] == 0) {
866     for (dim_t i=1; i<myN0-1; i++) {
867     index.push_back(i);
868     index.push_back(i+1);
869     index.push_back(i+2);
870     ptr.push_back(ptr.back()+3);
871     }
872     index.push_back(myN0-1);
873     index.push_back(myN0);
874     n+=2;
875     if (faces[1] == 0) {
876     index.push_back(myN0+1);
877     index.push_back(myN0+2);
878     index.push_back(myN0+3);
879     n+=3;
880     }
881     } else {
882     for (dim_t i=1; i<myN0-1; i++) {
883     ptr.push_back(ptr.back());
884     }
885     if (faces[1] == 0) {
886     index.push_back(myN0+2);
887     index.push_back(myN0+3);
888     n+=2;
889     }
890     }
891     // n=neighbours of bottom-right corner node
892     ptr.push_back(ptr.back()+n);
893    
894     // 2nd row to 2nd last row
895     for (dim_t i=1; i<myN1-1; i++) {
896     // left edge
897     if (faces[0] == 0) {
898     index.push_back(2*(myN0+myN1+2)-i);
899     index.push_back(2*(myN0+myN1+2)-i-1);
900     index.push_back(2*(myN0+myN1+2)-i-2);
901     ptr.push_back(ptr.back()+3);
902     } else {
903     ptr.push_back(ptr.back());
904     }
905     for (dim_t j=1; j<myN0-1; j++) {
906     ptr.push_back(ptr.back());
907     }
908     // right edge
909     if (faces[1] == 0) {
910     index.push_back(myN0+i+1);
911     index.push_back(myN0+i+2);
912     index.push_back(myN0+i+3);
913     ptr.push_back(ptr.back()+3);
914     } else {
915     ptr.push_back(ptr.back());
916     }
917     }
918    
919     // top edge
920     n=0;
921     if (faces[0] == 0) {
922     index.push_back(2*myN0+myN1+5);
923     index.push_back(2*myN0+myN1+4);
924     n+=2;
925     if (faces[3] == 0) {
926     index.push_back(2*myN0+myN1+3);
927     index.push_back(2*myN0+myN1+2);
928     index.push_back(2*myN0+myN1+1);
929     n+=3;
930     }
931     } else if (faces[3] == 0) {
932     index.push_back(2*myN0+myN1+2);
933     index.push_back(2*myN0+myN1+1);
934     n+=2;
935     }
936     // n=neighbours of top-left corner node
937     ptr.push_back(ptr.back()+n);
938     n=0;
939     if (faces[3] == 0) {
940     for (dim_t i=1; i<myN0-1; i++) {
941     index.push_back(2*myN0+myN1+i+1);
942     index.push_back(2*myN0+myN1+i);
943     index.push_back(2*myN0+myN1+i-1);
944     ptr.push_back(ptr.back()+3);
945     }
946     index.push_back(myN0+myN1+4);
947     index.push_back(myN0+myN1+3);
948     n+=2;
949     if (faces[1] == 0) {
950     index.push_back(myN0+myN1+2);
951     index.push_back(myN0+myN1+1);
952     index.push_back(myN0+myN1);
953     n+=3;
954     }
955     } else {
956     for (dim_t i=1; i<myN0-1; i++) {
957     ptr.push_back(ptr.back());
958     }
959     if (faces[1] == 0) {
960     index.push_back(myN0+myN1+1);
961     index.push_back(myN0+myN1);
962     n+=2;
963     }
964     }
965     // n=neighbours of top-right corner node
966     ptr.push_back(ptr.back()+n);
967    
968     dim_t M=ptr.size()-1;
969     map<index_t,index_t> idMap;
970     dim_t N=0;
971     for (IndexVector::iterator it=index.begin(); it!=index.end(); it++) {
972     if (idMap.find(*it)==idMap.end()) {
973     idMap[*it]=N;
974     *it=N++;
975     } else {
976     *it=idMap[*it];
977     }
978     }
979    
980 caltinay 3702 /*
981 caltinay 3699 cout << "--- colCouple_pattern ---" << endl;
982     cout << "M=" << M << ", N=" << N << endl;
983     for (size_t i=0; i<ptr.size(); i++) {
984     cout << "ptr[" << i << "]=" << ptr[i] << endl;
985     }
986     for (size_t i=0; i<index.size(); i++) {
987     cout << "index[" << i << "]=" << index[i] << endl;
988     }
989 caltinay 3702 */
990 caltinay 3699
991     // now build the row couple pattern
992     IndexVector ptr2(1,0);
993     IndexVector index2;
994     for (dim_t id=0; id<N; id++) {
995     n=0;
996     for (dim_t i=0; i<M; i++) {
997     for (dim_t j=ptr[i]; j<ptr[i+1]; j++) {
998     if (index[j]==id) {
999     index2.push_back(i);
1000     n++;
1001     break;
1002     }
1003     }
1004     }
1005     ptr2.push_back(ptr2.back()+n);
1006     }
1007    
1008 caltinay 3702 /*
1009 caltinay 3699 cout << "--- rowCouple_pattern ---" << endl;
1010     cout << "M=" << N << ", N=" << M << endl;
1011     for (size_t i=0; i<ptr2.size(); i++) {
1012     cout << "ptr[" << i << "]=" << ptr2[i] << endl;
1013     }
1014     for (size_t i=0; i<index2.size(); i++) {
1015     cout << "index[" << i << "]=" << index2[i] << endl;
1016     }
1017 caltinay 3702 */
1018 caltinay 3699
1019     // paso will manage the memory
1020     index_t* indexC = MEMALLOC(index.size(), index_t);
1021     index_t* ptrC = MEMALLOC(ptr.size(), index_t);
1022     copy(index.begin(), index.end(), indexC);
1023     copy(ptr.begin(), ptr.end(), ptrC);
1024     *colPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, M, N, ptrC, indexC);
1025    
1026     // paso will manage the memory
1027     indexC = MEMALLOC(index2.size(), index_t);
1028     ptrC = MEMALLOC(ptr2.size(), index_t);
1029     copy(index2.begin(), index2.end(), indexC);
1030     copy(ptr2.begin(), ptr2.end(), ptrC);
1031     *rowPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, N, M, ptrC, indexC);
1032     }
1033    
1034 caltinay 3702 //protected
1035     void Rectangle::interpolateNodesOnElements(escript::Data& out, escript::Data& in) const
1036     {
1037     const dim_t numComp = in.getDataPointSize();
1038     /* GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1039     const double tmp0_2 = 0.62200846792814621559;
1040     const double tmp0_1 = 0.044658198738520451079;
1041     const double tmp0_0 = 0.16666666666666666667;
1042     #pragma omp parallel for
1043     for (index_t k1=0; k1 < m_NE1; ++k1) {
1044     for (index_t k0=0; k0 < m_NE0; ++k0) {
1045     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1046     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1047     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1048     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1049     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1050     for (index_t i=0; i < numComp; ++i) {
1051     o[INDEX2(i,numComp,0)] = f_00[i]*tmp0_2 + f_11[i]*tmp0_1 + tmp0_0*(f_01[i] + f_10[i]);
1052     o[INDEX2(i,numComp,1)] = f_01[i]*tmp0_1 + f_10[i]*tmp0_2 + tmp0_0*(f_00[i] + f_11[i]);
1053     o[INDEX2(i,numComp,2)] = f_01[i]*tmp0_2 + f_10[i]*tmp0_1 + tmp0_0*(f_00[i] + f_11[i]);
1054     o[INDEX2(i,numComp,3)] = f_00[i]*tmp0_1 + f_11[i]*tmp0_2 + tmp0_0*(f_01[i] + f_10[i]);
1055     } /* end of component loop i */
1056     } /* end of k0 loop */
1057     } /* end of k1 loop */
1058     /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1059     }
1060    
1061     //protected
1062     void Rectangle::interpolateNodesOnFaces(escript::Data& out, escript::Data& in) const
1063     {
1064 caltinay 3704 const dim_t numComp = in.getDataPointSize();
1065     /* GENERATOR SNIP_INTERPOLATE_FACES TOP */
1066     if (m_faceOffset[0] > -1) {
1067     const double tmp0_1 = 0.78867513459481288225;
1068     const double tmp0_0 = 0.21132486540518711775;
1069     #pragma omp parallel for
1070     for (index_t k1=0; k1 < m_NE1; ++k1) {
1071     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1072     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1073 caltinay 3707 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1074 caltinay 3704 for (index_t i=0; i < numComp; ++i) {
1075     o[INDEX2(i,numComp,0)] = f_00[i]*tmp0_1 + f_01[i]*tmp0_0;
1076     o[INDEX2(i,numComp,1)] = f_00[i]*tmp0_0 + f_01[i]*tmp0_1;
1077     } /* end of component loop i */
1078     } /* end of k1 loop */
1079     } /* end of face 0 */
1080     if (m_faceOffset[1] > -1) {
1081     const double tmp0_1 = 0.21132486540518711775;
1082     const double tmp0_0 = 0.78867513459481288225;
1083     #pragma omp parallel for
1084     for (index_t k1=0; k1 < m_NE1; ++k1) {
1085     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1086     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1087 caltinay 3707 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1088 caltinay 3704 for (index_t i=0; i < numComp; ++i) {
1089     o[INDEX2(i,numComp,0)] = f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
1090     o[INDEX2(i,numComp,1)] = f_10[i]*tmp0_1 + f_11[i]*tmp0_0;
1091     } /* end of component loop i */
1092     } /* end of k1 loop */
1093     } /* end of face 1 */
1094     if (m_faceOffset[2] > -1) {
1095     const double tmp0_1 = 0.78867513459481288225;
1096     const double tmp0_0 = 0.21132486540518711775;
1097     #pragma omp parallel for
1098     for (index_t k0=0; k0 < m_NE0; ++k0) {
1099     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1100     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1101 caltinay 3707 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1102 caltinay 3704 for (index_t i=0; i < numComp; ++i) {
1103     o[INDEX2(i,numComp,0)] = f_00[i]*tmp0_1 + f_10[i]*tmp0_0;
1104     o[INDEX2(i,numComp,1)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
1105     } /* end of component loop i */
1106     } /* end of k0 loop */
1107     } /* end of face 2 */
1108     if (m_faceOffset[3] > -1) {
1109     const double tmp0_1 = 0.78867513459481288225;
1110     const double tmp0_0 = 0.21132486540518711775;
1111     #pragma omp parallel for
1112     for (index_t k0=0; k0 < m_NE0; ++k0) {
1113     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1114     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1115 caltinay 3707 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1116 caltinay 3704 for (index_t i=0; i < numComp; ++i) {
1117     o[INDEX2(i,numComp,0)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
1118     o[INDEX2(i,numComp,1)] = f_01[i]*tmp0_0 + f_11[i]*tmp0_1;
1119     } /* end of component loop i */
1120     } /* end of k0 loop */
1121     } /* end of face 3 */
1122     /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */
1123 caltinay 3702 }
1124    
1125 caltinay 3691 } // end of namespace ripley
1126    

  ViewVC Help
Powered by ViewVC 1.1.26