/[escript]/branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp
ViewVC logotype

Annotation of /branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3711 - (hide annotations)
Tue Dec 6 00:24:43 2011 UTC (8 years ago) by caltinay
File size: 52598 byte(s)
Interpolation & Gradient for reduced elements & faces.

1 caltinay 3691
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2011 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     #include <ripley/Rectangle.h>
15     extern "C" {
16     #include "paso/SystemMatrixPattern.h"
17     }
18    
19     #if USE_SILO
20     #include <silo.h>
21     #ifdef ESYS_MPI
22     #include <pmpio.h>
23     #endif
24     #endif
25    
26     #include <iomanip>
27    
28     using namespace std;
29    
30     namespace ripley {
31    
32     Rectangle::Rectangle(int n0, int n1, double l0, double l1, int d0, int d1) :
33     RipleyDomain(2),
34     m_gNE0(n0),
35     m_gNE1(n1),
36     m_l0(l0),
37     m_l1(l1),
38     m_NX(d0),
39     m_NY(d1)
40     {
41     // ensure number of subdivisions is valid and nodes can be distributed
42     // among number of ranks
43     if (m_NX*m_NY != m_mpiInfo->size)
44     throw RipleyException("Invalid number of spatial subdivisions");
45    
46     if (n0%m_NX > 0 || n1%m_NY > 0)
47     throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
48    
49     // local number of elements
50     m_NE0 = n0/m_NX;
51     m_NE1 = n1/m_NY;
52     // local number of nodes (not necessarily owned)
53     m_N0 = m_NE0+1;
54     m_N1 = m_NE1+1;
55     // bottom-left node is at (offset0,offset1) in global mesh
56     m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
57     m_offset1 = m_NE1*(m_mpiInfo->rank/m_NX);
58     populateSampleIds();
59     }
60    
61     Rectangle::~Rectangle()
62     {
63     }
64    
65     string Rectangle::getDescription() const
66     {
67     return "ripley::Rectangle";
68     }
69    
70     bool Rectangle::operator==(const AbstractDomain& other) const
71     {
72     if (dynamic_cast<const Rectangle*>(&other))
73     return this==&other;
74    
75     return false;
76     }
77    
78     void Rectangle::dump(const string& fileName) const
79     {
80     #if USE_SILO
81     string fn(fileName);
82     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
83     fn+=".silo";
84     }
85    
86     const int NUM_SILO_FILES = 1;
87     const char* blockDirFmt = "/block%04d";
88     int driver=DB_HDF5;
89     string siloPath;
90     DBfile* dbfile = NULL;
91    
92     #ifdef ESYS_MPI
93     PMPIO_baton_t* baton = NULL;
94     #endif
95    
96     if (m_mpiInfo->size > 1) {
97     #ifdef ESYS_MPI
98     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
99     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
100     PMPIO_DefaultClose, (void*)&driver);
101     // try the fallback driver in case of error
102     if (!baton && driver != DB_PDB) {
103     driver = DB_PDB;
104     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
105     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
106     PMPIO_DefaultClose, (void*)&driver);
107     }
108     if (baton) {
109     char str[64];
110     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
111     siloPath = str;
112     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
113     }
114     #endif
115     } else {
116     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
117     getDescription().c_str(), driver);
118     // try the fallback driver in case of error
119     if (!dbfile && driver != DB_PDB) {
120     driver = DB_PDB;
121     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
122     getDescription().c_str(), driver);
123     }
124     }
125    
126     if (!dbfile)
127     throw RipleyException("dump: Could not create Silo file");
128    
129     /*
130     if (driver==DB_HDF5) {
131     // gzip level 1 already provides good compression with minimal
132     // performance penalty. Some tests showed that gzip levels >3 performed
133     // rather badly on escript data both in terms of time and space
134     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
135     }
136     */
137    
138     boost::scoped_ptr<double> x(new double[m_N0]);
139     boost::scoped_ptr<double> y(new double[m_N1]);
140     double* coords[2] = { x.get(), y.get() };
141 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
142     pair<double,double> ydy = getFirstCoordAndSpacing(1);
143 caltinay 3691 #pragma omp parallel
144     {
145     #pragma omp for
146     for (dim_t i0 = 0; i0 < m_N0; i0++) {
147 caltinay 3697 coords[0][i0]=xdx.first+i0*xdx.second;
148 caltinay 3691 }
149     #pragma omp for
150     for (dim_t i1 = 0; i1 < m_N1; i1++) {
151 caltinay 3697 coords[1][i1]=ydy.first+i1*ydy.second;
152 caltinay 3691 }
153     }
154 caltinay 3697 IndexVector dims = getNumNodesPerDim();
155    
156     // write mesh
157     DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE,
158 caltinay 3691 DB_COLLINEAR, NULL);
159    
160 caltinay 3697 // write node ids
161     DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2,
162     NULL, 0, DB_INT, DB_NODECENT, NULL);
163    
164     // write element ids
165     dims = getNumElementsPerDim();
166     DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
167     &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
168    
169     // rank 0 writes multimesh and multivar
170 caltinay 3691 if (m_mpiInfo->rank == 0) {
171     vector<string> tempstrings;
172 caltinay 3697 vector<char*> names;
173 caltinay 3691 for (dim_t i=0; i<m_mpiInfo->size; i++) {
174     stringstream path;
175     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
176     tempstrings.push_back(path.str());
177 caltinay 3697 names.push_back((char*)tempstrings.back().c_str());
178 caltinay 3691 }
179 caltinay 3697 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
180 caltinay 3691 DBSetDir(dbfile, "/");
181 caltinay 3697 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
182     &types[0], NULL);
183     tempstrings.clear();
184     names.clear();
185     for (dim_t i=0; i<m_mpiInfo->size; i++) {
186     stringstream path;
187     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
188     tempstrings.push_back(path.str());
189     names.push_back((char*)tempstrings.back().c_str());
190     }
191     types.assign(m_mpiInfo->size, DB_QUADVAR);
192     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
193     &types[0], NULL);
194     tempstrings.clear();
195     names.clear();
196     for (dim_t i=0; i<m_mpiInfo->size; i++) {
197     stringstream path;
198     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
199     tempstrings.push_back(path.str());
200     names.push_back((char*)tempstrings.back().c_str());
201     }
202     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
203     &types[0], NULL);
204 caltinay 3691 }
205    
206     if (m_mpiInfo->size > 1) {
207     #ifdef ESYS_MPI
208     PMPIO_HandOffBaton(baton, dbfile);
209     PMPIO_Finish(baton);
210     #endif
211     } else {
212     DBClose(dbfile);
213     }
214    
215     #else // USE_SILO
216     throw RipleyException("dump(): no Silo support");
217     #endif
218     }
219    
220 caltinay 3697 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
221 caltinay 3691 {
222 caltinay 3697 switch (fsType) {
223 caltinay 3691 case Nodes:
224     return &m_nodeId[0];
225     case Elements:
226     return &m_elementId[0];
227     case FaceElements:
228     return &m_faceId[0];
229     default:
230     break;
231     }
232    
233     stringstream msg;
234     msg << "borrowSampleReferenceIDs() not implemented for function space type "
235 caltinay 3701 << functionSpaceTypeAsString(fsType);
236 caltinay 3691 throw RipleyException(msg.str());
237     }
238    
239     bool Rectangle::ownSample(int fsCode, index_t id) const
240     {
241     #ifdef ESYS_MPI
242 caltinay 3702 if (fsCode != ReducedNodes) {
243 caltinay 3699 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
244     const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
245 caltinay 3691 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
246 caltinay 3702 } else {
247     stringstream msg;
248     msg << "ownSample() not implemented for "
249     << functionSpaceTypeAsString(fsCode);
250     throw RipleyException(msg.str());
251     }
252 caltinay 3691 #else
253     return true;
254     #endif
255     }
256    
257 caltinay 3702 void Rectangle::setToGradient(escript::Data& out, const escript::Data& cIn) const
258 caltinay 3691 {
259 caltinay 3702 escript::Data& in = *const_cast<escript::Data*>(&cIn);
260 caltinay 3701 const dim_t numComp = in.getDataPointSize();
261 caltinay 3702 const double h0 = m_l0/m_gNE0;
262     const double h1 = m_l1/m_gNE1;
263     if (out.getFunctionSpace().getTypeCode() == Elements) {
264     /* GENERATOR SNIP_GRAD_ELEMENTS TOP */
265     const double tmp0_13 = 0.78867513459481288225/h1;
266     const double tmp0_0 = 0.78867513459481288225/h0;
267     const double tmp0_4 = 0.21132486540518711775/h0;
268     const double tmp0_10 = 0.78867513459481288225/h1;
269     const double tmp0_1 = 0.21132486540518711775/h0;
270     const double tmp0_8 = -0.21132486540518711775/h1;
271     const double tmp0_14 = 0.21132486540518711775/h1;
272     const double tmp0_5 = 0.78867513459481288225/h0;
273     const double tmp0_11 = -0.78867513459481288225/h1;
274     const double tmp0_2 = -0.21132486540518711775/h0;
275     const double tmp0_9 = 0.21132486540518711775/h1;
276     const double tmp0_15 = -0.21132486540518711775/h1;
277     const double tmp0_6 = -0.78867513459481288225/h0;
278     const double tmp0_3 = -0.78867513459481288225/h0;
279     const double tmp0_12 = -0.78867513459481288225/h1;
280     const double tmp0_7 = -0.21132486540518711775/h0;
281 caltinay 3697 #pragma omp parallel for
282 caltinay 3707 for (index_t k1=0; k1 < m_NE1; ++k1) {
283     for (index_t k0=0; k0 < m_NE0; ++k0) {
284 caltinay 3702 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
285     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
286     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
287     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
288     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
289     for (index_t i=0; i < numComp; ++i) {
290     o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
291     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_11 + f_01[i]*tmp0_10 + f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
292     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
293     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_15 + f_01[i]*tmp0_14 + f_10[i]*tmp0_12 + f_11[i]*tmp0_13;
294     o[INDEX3(i,0,2,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
295     o[INDEX3(i,1,2,numComp,2)] = f_00[i]*tmp0_11 + f_01[i]*tmp0_10 + f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
296     o[INDEX3(i,0,3,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
297     o[INDEX3(i,1,3,numComp,2)] = f_00[i]*tmp0_15 + f_01[i]*tmp0_14 + f_10[i]*tmp0_12 + f_11[i]*tmp0_13;
298     } /* end of component loop i */
299     } /* end of k0 loop */
300     } /* end of k1 loop */
301     /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
302 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
303     /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
304     const double tmp0_3 = 0.5/h1;
305     const double tmp0_2 = -0.5/h1;
306     const double tmp0_1 = -0.5/h0;
307     const double tmp0_0 = 0.5/h0;
308     #pragma omp parallel for
309     for (index_t k1=0; k1 < m_NE1; ++k1) {
310     for (index_t k0=0; k0 < m_NE0; ++k0) {
311     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
312     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
313     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
314     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
315     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
316     for (index_t i=0; i < numComp; ++i) {
317     o[INDEX3(i,0,0,numComp,2)] = tmp0_0*(f_10[i] + f_11[i]) + tmp0_1*(f_00[i] + f_01[i]);
318     o[INDEX3(i,1,0,numComp,2)] = tmp0_2*(f_00[i] + f_10[i]) + tmp0_3*(f_01[i] + f_11[i]);
319     } /* end of component loop i */
320     } /* end of k0 loop */
321     } /* end of k1 loop */
322     /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
323 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
324     /* GENERATOR SNIP_GRAD_FACES TOP */
325     if (m_faceOffset[0] > -1) {
326     const double tmp0_0 = 0.78867513459481288225/h0;
327     const double tmp0_4 = 0.21132486540518711775/h0;
328     const double tmp0_1 = 0.21132486540518711775/h0;
329     const double tmp0_8 = 1.0/h1;
330     const double tmp0_5 = 0.78867513459481288225/h0;
331     const double tmp0_2 = -0.21132486540518711775/h0;
332     const double tmp0_9 = -1/h1;
333     const double tmp0_6 = -0.78867513459481288225/h0;
334     const double tmp0_3 = -0.78867513459481288225/h0;
335     const double tmp0_7 = -0.21132486540518711775/h0;
336     #pragma omp parallel for
337     for (index_t k1=0; k1 < m_NE1; ++k1) {
338     const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
339     const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
340     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
341     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
342     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
343     for (index_t i=0; i < numComp; ++i) {
344     o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
345     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_8;
346     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
347     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_8;
348     } /* end of component loop i */
349     } /* end of k1 loop */
350     } /* end of face 0 */
351     if (m_faceOffset[1] > -1) {
352     const double tmp0_0 = 0.78867513459481288225/h0;
353     const double tmp0_4 = 0.21132486540518711775/h0;
354     const double tmp0_1 = 0.21132486540518711775/h0;
355     const double tmp0_8 = -1/h1;
356     const double tmp0_5 = 0.78867513459481288225/h0;
357     const double tmp0_2 = -0.21132486540518711775/h0;
358     const double tmp0_9 = 1.0/h1;
359     const double tmp0_6 = -0.78867513459481288225/h0;
360     const double tmp0_3 = -0.78867513459481288225/h0;
361     const double tmp0_7 = -0.21132486540518711775/h0;
362     #pragma omp parallel for
363     for (index_t k1=0; k1 < m_NE1; ++k1) {
364     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
365     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
366     const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
367     const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
368     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
369     for (index_t i=0; i < numComp; ++i) {
370     o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
371     o[INDEX3(i,1,0,numComp,2)] = f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
372     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
373     o[INDEX3(i,1,1,numComp,2)] = f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
374     } /* end of component loop i */
375     } /* end of k1 loop */
376     } /* end of face 1 */
377     if (m_faceOffset[2] > -1) {
378     const double tmp0_0 = -1/h0;
379     const double tmp0_4 = 0.21132486540518711775/h1;
380     const double tmp0_1 = 1.0/h0;
381     const double tmp0_8 = 0.78867513459481288225/h1;
382     const double tmp0_5 = 0.78867513459481288225/h1;
383     const double tmp0_2 = -0.78867513459481288225/h1;
384     const double tmp0_9 = 0.21132486540518711775/h1;
385     const double tmp0_6 = -0.21132486540518711775/h1;
386     const double tmp0_3 = -0.21132486540518711775/h1;
387     const double tmp0_7 = -0.78867513459481288225/h1;
388     #pragma omp parallel for
389     for (index_t k0=0; k0 < m_NE0; ++k0) {
390     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
391     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
392     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
393     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
394     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
395     for (index_t i=0; i < numComp; ++i) {
396     o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
397     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_2 + f_01[i]*tmp0_5 + f_10[i]*tmp0_3 + f_11[i]*tmp0_4;
398     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
399     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_6 + f_01[i]*tmp0_9 + f_10[i]*tmp0_7 + f_11[i]*tmp0_8;
400     } /* end of component loop i */
401     } /* end of k0 loop */
402     } /* end of face 2 */
403     if (m_faceOffset[3] > -1) {
404     const double tmp0_0 = 1.0/h0;
405     const double tmp0_4 = -0.21132486540518711775/h1;
406     const double tmp0_1 = -1/h0;
407     const double tmp0_8 = -0.78867513459481288225/h1;
408     const double tmp0_5 = -0.78867513459481288225/h1;
409     const double tmp0_2 = 0.21132486540518711775/h1;
410     const double tmp0_9 = -0.21132486540518711775/h1;
411     const double tmp0_6 = 0.78867513459481288225/h1;
412     const double tmp0_3 = 0.78867513459481288225/h1;
413     const double tmp0_7 = 0.21132486540518711775/h1;
414     #pragma omp parallel for
415     for (index_t k0=0; k0 < m_NE0; ++k0) {
416     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
417     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
418     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
419     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
420     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
421     for (index_t i=0; i < numComp; ++i) {
422     o[INDEX3(i,0,0,numComp,2)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
423     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_5 + f_01[i]*tmp0_3 + f_10[i]*tmp0_4 + f_11[i]*tmp0_2;
424     o[INDEX3(i,0,1,numComp,2)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
425     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_7 + f_10[i]*tmp0_8 + f_11[i]*tmp0_6;
426     } /* end of component loop i */
427     } /* end of k0 loop */
428     } /* end of face 3 */
429     /* GENERATOR SNIP_GRAD_FACES BOTTOM */
430 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
431     /* GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
432     if (m_faceOffset[0] > -1) {
433     const double tmp0_3 = -1/h1;
434     const double tmp0_2 = 1.0/h1;
435     const double tmp0_1 = -0.5/h0;
436     const double tmp0_0 = 0.5/h0;
437     #pragma omp parallel for
438     for (index_t k1=0; k1 < m_NE1; ++k1) {
439     const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
440     const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
441     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
442     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
443     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
444     for (index_t i=0; i < numComp; ++i) {
445     o[INDEX3(i,0,0,numComp,2)] = tmp0_0*(f_10[i] + f_11[i]) + tmp0_1*(f_00[i] + f_01[i]);
446     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2;
447     } /* end of component loop i */
448     } /* end of k1 loop */
449     } /* end of face 0 */
450     if (m_faceOffset[1] > -1) {
451     const double tmp0_3 = 1.0/h1;
452     const double tmp0_2 = -1/h1;
453     const double tmp0_1 = -0.5/h0;
454     const double tmp0_0 = 0.5/h0;
455     #pragma omp parallel for
456     for (index_t k1=0; k1 < m_NE1; ++k1) {
457     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
458     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
459     const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
460     const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
461     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
462     for (index_t i=0; i < numComp; ++i) {
463     o[INDEX3(i,0,0,numComp,2)] = tmp0_0*(f_10[i] + f_11[i]) + tmp0_1*(f_00[i] + f_01[i]);
464     o[INDEX3(i,1,0,numComp,2)] = f_10[i]*tmp0_2 + f_11[i]*tmp0_3;
465     } /* end of component loop i */
466     } /* end of k1 loop */
467     } /* end of face 1 */
468     if (m_faceOffset[2] > -1) {
469     const double tmp0_3 = 0.5/h1;
470     const double tmp0_2 = -0.5/h1;
471     const double tmp0_1 = 1.0/h0;
472     const double tmp0_0 = -1/h0;
473     #pragma omp parallel for
474     for (index_t k0=0; k0 < m_NE0; ++k0) {
475     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
476     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
477     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
478     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
479     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
480     for (index_t i=0; i < numComp; ++i) {
481     o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
482     o[INDEX3(i,1,0,numComp,2)] = tmp0_2*(f_00[i] + f_10[i]) + tmp0_3*(f_01[i] + f_11[i]);
483     } /* end of component loop i */
484     } /* end of k0 loop */
485     } /* end of face 2 */
486     if (m_faceOffset[3] > -1) {
487     const double tmp0_3 = -0.5/h1;
488     const double tmp0_2 = 0.5/h1;
489     const double tmp0_1 = -1/h0;
490     const double tmp0_0 = 1.0/h0;
491     #pragma omp parallel for
492     for (index_t k0=0; k0 < m_NE0; ++k0) {
493     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
494     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
495     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
496     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
497     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
498     for (index_t i=0; i < numComp; ++i) {
499     o[INDEX3(i,0,0,numComp,2)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
500     o[INDEX3(i,1,0,numComp,2)] = tmp0_2*(f_01[i] + f_11[i]) + tmp0_3*(f_00[i] + f_10[i]);
501     } /* end of component loop i */
502     } /* end of k0 loop */
503     } /* end of face 3 */
504     /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
505 caltinay 3702 } else {
506 caltinay 3707 stringstream msg;
507     msg << "setToGradient() not implemented for "
508     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
509     throw RipleyException(msg.str());
510 caltinay 3702 }
511 caltinay 3701 }
512 caltinay 3697
513 caltinay 3701 void Rectangle::addPDEToSystem(escript::AbstractSystemMatrix& mat,
514     escript::Data& rhs, const escript::Data& A, const escript::Data& B,
515     const escript::Data& C, const escript::Data& D,
516     const escript::Data& X, const escript::Data& Y,
517     const escript::Data& d, const escript::Data& y,
518     const escript::Data& d_contact, const escript::Data& y_contact,
519     const escript::Data& d_dirac, const escript::Data& y_dirac) const
520     {
521     throw RipleyException("addPDEToSystem() not implemented");
522     }
523    
524 caltinay 3691 Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder,
525     bool reducedColOrder) const
526     {
527     if (reducedRowOrder || reducedColOrder)
528     throw RipleyException("getPattern() not implemented for reduced order");
529    
530 caltinay 3699 // connector
531     RankVector neighbour;
532     IndexVector offsetInShared(1,0);
533     IndexVector sendShared, recvShared;
534     const IndexVector faces=getNumFacesPerBoundary();
535     const index_t left = (m_offset0==0 ? 0 : 1);
536     const index_t bottom = (m_offset1==0 ? 0 : 1);
537     // corner node from bottom-left
538     if (faces[0] == 0 && faces[2] == 0) {
539     neighbour.push_back(m_mpiInfo->rank-m_NX-1);
540     offsetInShared.push_back(offsetInShared.back()+1);
541     sendShared.push_back(m_nodeId[m_N0+left]);
542     recvShared.push_back(m_nodeId[0]);
543     }
544     // bottom edge
545     if (faces[2] == 0) {
546     neighbour.push_back(m_mpiInfo->rank-m_NX);
547     offsetInShared.push_back(offsetInShared.back()+m_N0-left);
548     for (dim_t i=left; i<m_N0; i++) {
549     // easy case, we know the neighbour id's
550     sendShared.push_back(m_nodeId[m_N0+i]);
551     recvShared.push_back(m_nodeId[i]);
552     }
553     }
554     // corner node from bottom-right
555     if (faces[1] == 0 && faces[2] == 0) {
556     neighbour.push_back(m_mpiInfo->rank-m_NX+1);
557     const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
558     const index_t N1=(neighbour.back()/m_NX == 0 ? m_N1 : m_N1-1);
559     const index_t first=m_nodeDistribution[neighbour.back()];
560     offsetInShared.push_back(offsetInShared.back()+1);
561     sendShared.push_back(m_nodeId[(bottom+1)*m_N0-1]);
562     recvShared.push_back(first+N0*(N1-1));
563     }
564     // left edge
565     if (faces[0] == 0) {
566     neighbour.push_back(m_mpiInfo->rank-1);
567     offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
568     for (dim_t i=bottom; i<m_N1; i++) {
569     // easy case, we know the neighbour id's
570     sendShared.push_back(m_nodeId[i*m_N0+1]);
571     recvShared.push_back(m_nodeId[i*m_N0]);
572     }
573     }
574     // right edge
575     if (faces[1] == 0) {
576     neighbour.push_back(m_mpiInfo->rank+1);
577     const index_t rightN0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
578     const index_t first=m_nodeDistribution[neighbour.back()];
579     offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
580     for (dim_t i=bottom; i<m_N1; i++) {
581     sendShared.push_back(m_nodeId[(i+1)*m_N0-1]);
582     recvShared.push_back(first+rightN0*(i-bottom));
583     }
584     }
585     // corner node from top-left
586     if (faces[0] == 0 && faces[3] == 0) {
587     neighbour.push_back(m_mpiInfo->rank+m_NX-1);
588     const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
589     const index_t first=m_nodeDistribution[neighbour.back()];
590     offsetInShared.push_back(offsetInShared.back()+1);
591     sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+left]);
592     recvShared.push_back(first+N0-1);
593     }
594     // top edge
595     if (faces[3] == 0) {
596     neighbour.push_back(m_mpiInfo->rank+m_NX);
597     const index_t first=m_nodeDistribution[neighbour.back()];
598     offsetInShared.push_back(offsetInShared.back()+m_N0-left);
599     for (dim_t i=left; i<m_N0; i++) {
600     sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+i]);
601     recvShared.push_back(first+i-left);
602     }
603     }
604     // corner node from top-right
605     if (faces[1] == 0 && faces[3] == 0) {
606     neighbour.push_back(m_mpiInfo->rank+m_NX+1);
607     const index_t first=m_nodeDistribution[neighbour.back()];
608     offsetInShared.push_back(offsetInShared.back()+1);
609     sendShared.push_back(m_nodeId[m_N0*m_N1-1]);
610     recvShared.push_back(first);
611     }
612     const int numDOF=m_nodeDistribution[m_mpiInfo->rank+1]-m_nodeDistribution[m_mpiInfo->rank];
613 caltinay 3702 /*
614 caltinay 3699 cout << "--- rcv_shcomp ---" << endl;
615     cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
616     for (size_t i=0; i<neighbour.size(); i++) {
617     cout << "neighbor[" << i << "]=" << neighbour[i]
618     << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
619     }
620     for (size_t i=0; i<recvShared.size(); i++) {
621     cout << "shared[" << i << "]=" << recvShared[i] << endl;
622     }
623     cout << "--- snd_shcomp ---" << endl;
624     for (size_t i=0; i<sendShared.size(); i++) {
625     cout << "shared[" << i << "]=" << sendShared[i] << endl;
626     }
627 caltinay 3702 */
628 caltinay 3691
629     Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
630 caltinay 3699 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
631 caltinay 3697 &offsetInShared[0], 1, 0, m_mpiInfo);
632 caltinay 3691 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
633 caltinay 3699 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
634 caltinay 3697 &offsetInShared[0], 1, 0, m_mpiInfo);
635 caltinay 3691 Paso_Connector* connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
636 caltinay 3699 Paso_SharedComponents_free(snd_shcomp);
637     Paso_SharedComponents_free(rcv_shcomp);
638 caltinay 3691
639     // create patterns
640 caltinay 3699 dim_t M, N;
641     IndexVector ptr(1,0);
642     IndexVector index;
643    
644     // main pattern
645     for (index_t i=0; i<numDOF; i++) {
646     // always add the node itself
647     index.push_back(i);
648     int num=insertNeighbours(index, i);
649     ptr.push_back(ptr.back()+num+1);
650     }
651     M=N=ptr.size()-1;
652     // paso will manage the memory
653     index_t* indexC = MEMALLOC(index.size(),index_t);
654     index_t* ptrC = MEMALLOC(ptr.size(), index_t);
655     copy(index.begin(), index.end(), indexC);
656     copy(ptr.begin(), ptr.end(), ptrC);
657 caltinay 3691 Paso_Pattern *mainPattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
658 caltinay 3699 M, N, ptrC, indexC);
659 caltinay 3691
660 caltinay 3702 /*
661 caltinay 3699 cout << "--- main_pattern ---" << endl;
662     cout << "M=" << M << ", N=" << N << endl;
663     for (size_t i=0; i<ptr.size(); i++) {
664     cout << "ptr[" << i << "]=" << ptr[i] << endl;
665     }
666     for (size_t i=0; i<index.size(); i++) {
667     cout << "index[" << i << "]=" << index[i] << endl;
668     }
669 caltinay 3702 */
670 caltinay 3699
671     ptr.clear();
672     index.clear();
673    
674     // column & row couple patterns
675     Paso_Pattern *colCouplePattern, *rowCouplePattern;
676     generateCouplePatterns(&colCouplePattern, &rowCouplePattern);
677    
678     // allocate paso distribution
679     Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
680     const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
681    
682 caltinay 3691 Paso_SystemMatrixPattern* pattern = Paso_SystemMatrixPattern_alloc(
683     MATRIX_FORMAT_DEFAULT, distribution, distribution,
684     mainPattern, colCouplePattern, rowCouplePattern,
685     connector, connector);
686     Paso_Pattern_free(mainPattern);
687     Paso_Pattern_free(colCouplePattern);
688     Paso_Pattern_free(rowCouplePattern);
689     Paso_Distribution_free(distribution);
690 caltinay 3697 return pattern;
691 caltinay 3691 }
692    
693     void Rectangle::Print_Mesh_Info(const bool full) const
694     {
695     RipleyDomain::Print_Mesh_Info(full);
696     if (full) {
697     cout << " Id Coordinates" << endl;
698     cout.precision(15);
699     cout.setf(ios::scientific, ios::floatfield);
700 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
701     pair<double,double> ydy = getFirstCoordAndSpacing(1);
702 caltinay 3691 for (index_t i=0; i < getNumNodes(); i++) {
703     cout << " " << setw(5) << m_nodeId[i]
704 caltinay 3697 << " " << xdx.first+(i%m_N0)*xdx.second
705     << " " << ydy.first+(i/m_N0)*ydy.second << endl;
706 caltinay 3691 }
707     }
708     }
709    
710 caltinay 3697 IndexVector Rectangle::getNumNodesPerDim() const
711     {
712     IndexVector ret;
713     ret.push_back(m_N0);
714     ret.push_back(m_N1);
715     return ret;
716     }
717    
718     IndexVector Rectangle::getNumElementsPerDim() const
719     {
720     IndexVector ret;
721     ret.push_back(m_NE0);
722     ret.push_back(m_NE1);
723     return ret;
724     }
725    
726     IndexVector Rectangle::getNumFacesPerBoundary() const
727     {
728     IndexVector ret(4, 0);
729     //left
730     if (m_offset0==0)
731     ret[0]=m_NE1;
732     //right
733     if (m_mpiInfo->rank%m_NX==m_NX-1)
734     ret[1]=m_NE1;
735     //bottom
736     if (m_offset1==0)
737     ret[2]=m_NE0;
738     //top
739     if (m_mpiInfo->rank/m_NX==m_NY-1)
740     ret[3]=m_NE0;
741     return ret;
742     }
743    
744     pair<double,double> Rectangle::getFirstCoordAndSpacing(dim_t dim) const
745     {
746     if (dim==0) {
747     return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
748     } else if (dim==1) {
749     return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
750     }
751     throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
752     }
753    
754 caltinay 3691 //protected
755     dim_t Rectangle::getNumFaceElements() const
756     {
757 caltinay 3699 const IndexVector faces = getNumFacesPerBoundary();
758 caltinay 3691 dim_t n=0;
759 caltinay 3699 for (size_t i=0; i<faces.size(); i++)
760     n+=faces[i];
761 caltinay 3691 return n;
762     }
763    
764     //protected
765     void Rectangle::assembleCoordinates(escript::Data& arg) const
766     {
767     escriptDataC x = arg.getDataC();
768     int numDim = m_numDim;
769     if (!isDataPointShapeEqual(&x, 1, &numDim))
770     throw RipleyException("setToX: Invalid Data object shape");
771     if (!numSamplesEqual(&x, 1, getNumNodes()))
772     throw RipleyException("setToX: Illegal number of samples in Data object");
773    
774 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
775     pair<double,double> ydy = getFirstCoordAndSpacing(1);
776 caltinay 3691 arg.requireWrite();
777     #pragma omp parallel for
778     for (dim_t i1 = 0; i1 < m_N1; i1++) {
779     for (dim_t i0 = 0; i0 < m_N0; i0++) {
780     double* point = arg.getSampleDataRW(i0+m_N0*i1);
781 caltinay 3697 point[0] = xdx.first+i0*xdx.second;
782     point[1] = ydy.first+i1*ydy.second;
783 caltinay 3691 }
784     }
785     }
786    
787     //private
788     void Rectangle::populateSampleIds()
789     {
790 caltinay 3697 // identifiers are ordered from left to right, bottom to top on each rank,
791     // except for the shared nodes which are owned by the rank below / to the
792     // left of the current rank
793    
794     // build node distribution vector first.
795     // m_nodeDistribution[i] is the first node id on rank i, that is
796     // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
797     m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
798     m_nodeDistribution[1]=getNumNodes();
799     for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
800     const index_t x=k%m_NX;
801     const index_t y=k/m_NX;
802     index_t numNodes=getNumNodes();
803     if (x>0)
804     numNodes-=m_N1;
805     if (y>0)
806     numNodes-=m_N0;
807     if (x>0 && y>0)
808     numNodes++; // subtracted corner twice -> fix that
809     m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
810     }
811     m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
812    
813 caltinay 3691 m_nodeId.resize(getNumNodes());
814 caltinay 3697
815     // the bottom row and left column are not owned by this rank so the
816     // identifiers need to be computed accordingly
817     const index_t left = (m_offset0==0 ? 0 : 1);
818     const index_t bottom = (m_offset1==0 ? 0 : 1);
819     if (left>0) {
820     const int neighbour=m_mpiInfo->rank-1;
821     const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
822 caltinay 3691 #pragma omp parallel for
823 caltinay 3697 for (dim_t i1=bottom; i1<m_N1; i1++) {
824     m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
825     + (i1-bottom+1)*leftN0-1;
826     }
827     }
828     if (bottom>0) {
829     const int neighbour=m_mpiInfo->rank-m_NX;
830     const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
831     const index_t bottomN1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
832     #pragma omp parallel for
833     for (dim_t i0=left; i0<m_N0; i0++) {
834     m_nodeId[i0]=m_nodeDistribution[neighbour]
835     + (bottomN1-1)*bottomN0 + i0 - left;
836     }
837     }
838     if (left>0 && bottom>0) {
839     const int neighbour=m_mpiInfo->rank-m_NX-1;
840 caltinay 3699 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
841     const index_t N1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
842     m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1-1;
843 caltinay 3697 }
844    
845     // the rest of the id's are contiguous
846     const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
847     #pragma omp parallel for
848     for (dim_t i1=bottom; i1<m_N1; i1++) {
849     for (dim_t i0=left; i0<m_N0; i0++) {
850     m_nodeId[i0+i1*m_N0] = firstId+i0-left+(i1-bottom)*(m_N0-left);
851     }
852     }
853    
854 caltinay 3704 // element id's
855 caltinay 3697 m_elementId.resize(getNumElements());
856     #pragma omp parallel for
857     for (dim_t k=0; k<getNumElements(); k++) {
858     m_elementId[k]=k;
859     }
860    
861 caltinay 3704 // face element id's
862 caltinay 3697 m_faceId.resize(getNumFaceElements());
863     #pragma omp parallel for
864     for (dim_t k=0; k<getNumFaceElements(); k++) {
865     m_faceId[k]=k;
866     }
867 caltinay 3704
868     // generate face offset vector
869     const IndexVector facesPerEdge = getNumFacesPerBoundary();
870     m_faceOffset.assign(facesPerEdge.size(), -1);
871     index_t offset=0;
872     for (size_t i=0; i<facesPerEdge.size(); i++) {
873     if (facesPerEdge[i]>0) {
874     m_faceOffset[i]=offset;
875     offset+=facesPerEdge[i];
876     }
877     }
878 caltinay 3691 }
879    
880 caltinay 3699 //private
881     int Rectangle::insertNeighbours(IndexVector& index, index_t node) const
882     {
883     const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
884     const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
885     const int x=node%myN0;
886     const int y=node/myN0;
887     int num=0;
888     if (y>0) {
889     if (x>0) {
890     // bottom-left
891     index.push_back(node-myN0-1);
892     num++;
893     }
894     // bottom
895     index.push_back(node-myN0);
896     num++;
897     if (x<myN0-1) {
898     // bottom-right
899     index.push_back(node-myN0+1);
900     num++;
901     }
902     }
903     if (x<myN0-1) {
904     // right
905     index.push_back(node+1);
906     num++;
907     if (y<myN1-1) {
908     // top-right
909     index.push_back(node+myN0+1);
910     num++;
911     }
912     }
913     if (y<myN1-1) {
914     // top
915     index.push_back(node+myN0);
916     num++;
917     if (x>0) {
918     // top-left
919     index.push_back(node+myN0-1);
920     num++;
921     }
922     }
923     if (x>0) {
924     // left
925     index.push_back(node-1);
926     num++;
927     }
928    
929     return num;
930     }
931    
932     //private
933     void Rectangle::generateCouplePatterns(Paso_Pattern** colPattern, Paso_Pattern** rowPattern) const
934     {
935     IndexVector ptr(1,0);
936     IndexVector index;
937     const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
938     const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
939     const IndexVector faces=getNumFacesPerBoundary();
940    
941     // bottom edge
942     dim_t n=0;
943     if (faces[0] == 0) {
944     index.push_back(2*(myN0+myN1+1));
945     index.push_back(2*(myN0+myN1+1)+1);
946     n+=2;
947     if (faces[2] == 0) {
948     index.push_back(0);
949     index.push_back(1);
950     index.push_back(2);
951     n+=3;
952     }
953     } else if (faces[2] == 0) {
954     index.push_back(1);
955     index.push_back(2);
956     n+=2;
957     }
958     // n=neighbours of bottom-left corner node
959     ptr.push_back(ptr.back()+n);
960     n=0;
961     if (faces[2] == 0) {
962     for (dim_t i=1; i<myN0-1; i++) {
963     index.push_back(i);
964     index.push_back(i+1);
965     index.push_back(i+2);
966     ptr.push_back(ptr.back()+3);
967     }
968     index.push_back(myN0-1);
969     index.push_back(myN0);
970     n+=2;
971     if (faces[1] == 0) {
972     index.push_back(myN0+1);
973     index.push_back(myN0+2);
974     index.push_back(myN0+3);
975     n+=3;
976     }
977     } else {
978     for (dim_t i=1; i<myN0-1; i++) {
979     ptr.push_back(ptr.back());
980     }
981     if (faces[1] == 0) {
982     index.push_back(myN0+2);
983     index.push_back(myN0+3);
984     n+=2;
985     }
986     }
987     // n=neighbours of bottom-right corner node
988     ptr.push_back(ptr.back()+n);
989    
990     // 2nd row to 2nd last row
991     for (dim_t i=1; i<myN1-1; i++) {
992     // left edge
993     if (faces[0] == 0) {
994     index.push_back(2*(myN0+myN1+2)-i);
995     index.push_back(2*(myN0+myN1+2)-i-1);
996     index.push_back(2*(myN0+myN1+2)-i-2);
997     ptr.push_back(ptr.back()+3);
998     } else {
999     ptr.push_back(ptr.back());
1000     }
1001     for (dim_t j=1; j<myN0-1; j++) {
1002     ptr.push_back(ptr.back());
1003     }
1004     // right edge
1005     if (faces[1] == 0) {
1006     index.push_back(myN0+i+1);
1007     index.push_back(myN0+i+2);
1008     index.push_back(myN0+i+3);
1009     ptr.push_back(ptr.back()+3);
1010     } else {
1011     ptr.push_back(ptr.back());
1012     }
1013     }
1014    
1015     // top edge
1016     n=0;
1017     if (faces[0] == 0) {
1018     index.push_back(2*myN0+myN1+5);
1019     index.push_back(2*myN0+myN1+4);
1020     n+=2;
1021     if (faces[3] == 0) {
1022     index.push_back(2*myN0+myN1+3);
1023     index.push_back(2*myN0+myN1+2);
1024     index.push_back(2*myN0+myN1+1);
1025     n+=3;
1026     }
1027     } else if (faces[3] == 0) {
1028     index.push_back(2*myN0+myN1+2);
1029     index.push_back(2*myN0+myN1+1);
1030     n+=2;
1031     }
1032     // n=neighbours of top-left corner node
1033     ptr.push_back(ptr.back()+n);
1034     n=0;
1035     if (faces[3] == 0) {
1036     for (dim_t i=1; i<myN0-1; i++) {
1037     index.push_back(2*myN0+myN1+i+1);
1038     index.push_back(2*myN0+myN1+i);
1039     index.push_back(2*myN0+myN1+i-1);
1040     ptr.push_back(ptr.back()+3);
1041     }
1042     index.push_back(myN0+myN1+4);
1043     index.push_back(myN0+myN1+3);
1044     n+=2;
1045     if (faces[1] == 0) {
1046     index.push_back(myN0+myN1+2);
1047     index.push_back(myN0+myN1+1);
1048     index.push_back(myN0+myN1);
1049     n+=3;
1050     }
1051     } else {
1052     for (dim_t i=1; i<myN0-1; i++) {
1053     ptr.push_back(ptr.back());
1054     }
1055     if (faces[1] == 0) {
1056     index.push_back(myN0+myN1+1);
1057     index.push_back(myN0+myN1);
1058     n+=2;
1059     }
1060     }
1061     // n=neighbours of top-right corner node
1062     ptr.push_back(ptr.back()+n);
1063    
1064     dim_t M=ptr.size()-1;
1065     map<index_t,index_t> idMap;
1066     dim_t N=0;
1067     for (IndexVector::iterator it=index.begin(); it!=index.end(); it++) {
1068     if (idMap.find(*it)==idMap.end()) {
1069     idMap[*it]=N;
1070     *it=N++;
1071     } else {
1072     *it=idMap[*it];
1073     }
1074     }
1075    
1076 caltinay 3702 /*
1077 caltinay 3699 cout << "--- colCouple_pattern ---" << endl;
1078     cout << "M=" << M << ", N=" << N << endl;
1079     for (size_t i=0; i<ptr.size(); i++) {
1080     cout << "ptr[" << i << "]=" << ptr[i] << endl;
1081     }
1082     for (size_t i=0; i<index.size(); i++) {
1083     cout << "index[" << i << "]=" << index[i] << endl;
1084     }
1085 caltinay 3702 */
1086 caltinay 3699
1087     // now build the row couple pattern
1088     IndexVector ptr2(1,0);
1089     IndexVector index2;
1090     for (dim_t id=0; id<N; id++) {
1091     n=0;
1092     for (dim_t i=0; i<M; i++) {
1093     for (dim_t j=ptr[i]; j<ptr[i+1]; j++) {
1094     if (index[j]==id) {
1095     index2.push_back(i);
1096     n++;
1097     break;
1098     }
1099     }
1100     }
1101     ptr2.push_back(ptr2.back()+n);
1102     }
1103    
1104 caltinay 3702 /*
1105 caltinay 3699 cout << "--- rowCouple_pattern ---" << endl;
1106     cout << "M=" << N << ", N=" << M << endl;
1107     for (size_t i=0; i<ptr2.size(); i++) {
1108     cout << "ptr[" << i << "]=" << ptr2[i] << endl;
1109     }
1110     for (size_t i=0; i<index2.size(); i++) {
1111     cout << "index[" << i << "]=" << index2[i] << endl;
1112     }
1113 caltinay 3702 */
1114 caltinay 3699
1115     // paso will manage the memory
1116     index_t* indexC = MEMALLOC(index.size(), index_t);
1117     index_t* ptrC = MEMALLOC(ptr.size(), index_t);
1118     copy(index.begin(), index.end(), indexC);
1119     copy(ptr.begin(), ptr.end(), ptrC);
1120     *colPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, M, N, ptrC, indexC);
1121    
1122     // paso will manage the memory
1123     indexC = MEMALLOC(index2.size(), index_t);
1124     ptrC = MEMALLOC(ptr2.size(), index_t);
1125     copy(index2.begin(), index2.end(), indexC);
1126     copy(ptr2.begin(), ptr2.end(), ptrC);
1127     *rowPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, N, M, ptrC, indexC);
1128     }
1129    
1130 caltinay 3702 //protected
1131 caltinay 3711 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1132     escript::Data& in, bool reduced) const
1133 caltinay 3702 {
1134     const dim_t numComp = in.getDataPointSize();
1135 caltinay 3711 if (reduced) {
1136     /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1137     const double tmp0_0 = 0.25000000000000000000;
1138 caltinay 3702 #pragma omp parallel for
1139 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
1140     for (index_t k0=0; k0 < m_NE0; ++k0) {
1141     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1142     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1143     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1144     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1145     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1146     for (index_t i=0; i < numComp; ++i) {
1147     o[INDEX2(i,numComp,0)] = tmp0_0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
1148     } /* end of component loop i */
1149     } /* end of k0 loop */
1150     } /* end of k1 loop */
1151     /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1152     } else {
1153     /* GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1154     const double tmp0_2 = 0.62200846792814621559;
1155     const double tmp0_1 = 0.044658198738520451079;
1156     const double tmp0_0 = 0.16666666666666666667;
1157     #pragma omp parallel for
1158     for (index_t k1=0; k1 < m_NE1; ++k1) {
1159     for (index_t k0=0; k0 < m_NE0; ++k0) {
1160     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1161     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1162     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1163     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1164     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1165     for (index_t i=0; i < numComp; ++i) {
1166     o[INDEX2(i,numComp,0)] = f_00[i]*tmp0_2 + f_11[i]*tmp0_1 + tmp0_0*(f_01[i] + f_10[i]);
1167     o[INDEX2(i,numComp,1)] = f_01[i]*tmp0_1 + f_10[i]*tmp0_2 + tmp0_0*(f_00[i] + f_11[i]);
1168     o[INDEX2(i,numComp,2)] = f_01[i]*tmp0_2 + f_10[i]*tmp0_1 + tmp0_0*(f_00[i] + f_11[i]);
1169     o[INDEX2(i,numComp,3)] = f_00[i]*tmp0_1 + f_11[i]*tmp0_2 + tmp0_0*(f_01[i] + f_10[i]);
1170     } /* end of component loop i */
1171     } /* end of k0 loop */
1172     } /* end of k1 loop */
1173     /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1174     }
1175 caltinay 3702 }
1176    
1177     //protected
1178 caltinay 3711 void Rectangle::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1179     bool reduced) const
1180 caltinay 3702 {
1181 caltinay 3704 const dim_t numComp = in.getDataPointSize();
1182 caltinay 3711 if (reduced) {
1183     /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1184     if (m_faceOffset[0] > -1) {
1185     const double tmp0_0 = 0.50000000000000000000;
1186 caltinay 3704 #pragma omp parallel for
1187 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
1188     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1189     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1190     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1191     for (index_t i=0; i < numComp; ++i) {
1192     o[INDEX2(i,numComp,0)] = tmp0_0*(f_00[i] + f_01[i]);
1193     } /* end of component loop i */
1194     } /* end of k1 loop */
1195     } /* end of face 0 */
1196     if (m_faceOffset[1] > -1) {
1197     const double tmp0_0 = 0.50000000000000000000;
1198 caltinay 3704 #pragma omp parallel for
1199 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
1200     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1201     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1202     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1203     for (index_t i=0; i < numComp; ++i) {
1204     o[INDEX2(i,numComp,0)] = tmp0_0*(f_10[i] + f_11[i]);
1205     } /* end of component loop i */
1206     } /* end of k1 loop */
1207     } /* end of face 1 */
1208     if (m_faceOffset[2] > -1) {
1209     const double tmp0_0 = 0.50000000000000000000;
1210 caltinay 3704 #pragma omp parallel for
1211 caltinay 3711 for (index_t k0=0; k0 < m_NE0; ++k0) {
1212     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1213     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1214     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1215     for (index_t i=0; i < numComp; ++i) {
1216     o[INDEX2(i,numComp,0)] = tmp0_0*(f_00[i] + f_10[i]);
1217     } /* end of component loop i */
1218     } /* end of k0 loop */
1219     } /* end of face 2 */
1220     if (m_faceOffset[3] > -1) {
1221     const double tmp0_0 = 0.50000000000000000000;
1222 caltinay 3704 #pragma omp parallel for
1223 caltinay 3711 for (index_t k0=0; k0 < m_NE0; ++k0) {
1224     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1225     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1226     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1227     for (index_t i=0; i < numComp; ++i) {
1228     o[INDEX2(i,numComp,0)] = tmp0_0*(f_01[i] + f_11[i]);
1229     } /* end of component loop i */
1230     } /* end of k0 loop */
1231     } /* end of face 3 */
1232     /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */
1233     } else {
1234     /* GENERATOR SNIP_INTERPOLATE_FACES TOP */
1235     if (m_faceOffset[0] > -1) {
1236     const double tmp0_1 = 0.78867513459481288225;
1237     const double tmp0_0 = 0.21132486540518711775;
1238     #pragma omp parallel for
1239     for (index_t k1=0; k1 < m_NE1; ++k1) {
1240     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1241     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1242     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1243     for (index_t i=0; i < numComp; ++i) {
1244     o[INDEX2(i,numComp,0)] = f_00[i]*tmp0_1 + f_01[i]*tmp0_0;
1245     o[INDEX2(i,numComp,1)] = f_00[i]*tmp0_0 + f_01[i]*tmp0_1;
1246     } /* end of component loop i */
1247     } /* end of k1 loop */
1248     } /* end of face 0 */
1249     if (m_faceOffset[1] > -1) {
1250     const double tmp0_1 = 0.21132486540518711775;
1251     const double tmp0_0 = 0.78867513459481288225;
1252     #pragma omp parallel for
1253     for (index_t k1=0; k1 < m_NE1; ++k1) {
1254     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1255     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1256     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1257     for (index_t i=0; i < numComp; ++i) {
1258     o[INDEX2(i,numComp,0)] = f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
1259     o[INDEX2(i,numComp,1)] = f_10[i]*tmp0_1 + f_11[i]*tmp0_0;
1260     } /* end of component loop i */
1261     } /* end of k1 loop */
1262     } /* end of face 1 */
1263     if (m_faceOffset[2] > -1) {
1264     const double tmp0_1 = 0.78867513459481288225;
1265     const double tmp0_0 = 0.21132486540518711775;
1266     #pragma omp parallel for
1267     for (index_t k0=0; k0 < m_NE0; ++k0) {
1268     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1269     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1270     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1271     for (index_t i=0; i < numComp; ++i) {
1272     o[INDEX2(i,numComp,0)] = f_00[i]*tmp0_1 + f_10[i]*tmp0_0;
1273     o[INDEX2(i,numComp,1)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
1274     } /* end of component loop i */
1275     } /* end of k0 loop */
1276     } /* end of face 2 */
1277     if (m_faceOffset[3] > -1) {
1278     const double tmp0_1 = 0.78867513459481288225;
1279     const double tmp0_0 = 0.21132486540518711775;
1280     #pragma omp parallel for
1281     for (index_t k0=0; k0 < m_NE0; ++k0) {
1282     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1283     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1284     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1285     for (index_t i=0; i < numComp; ++i) {
1286     o[INDEX2(i,numComp,0)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
1287     o[INDEX2(i,numComp,1)] = f_01[i]*tmp0_0 + f_11[i]*tmp0_1;
1288     } /* end of component loop i */
1289     } /* end of k0 loop */
1290     } /* end of face 3 */
1291     /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */
1292     }
1293 caltinay 3702 }
1294    
1295 caltinay 3691 } // end of namespace ripley
1296    

  ViewVC Help
Powered by ViewVC 1.1.26