/[escript]/trunk/ripley/src/Rectangle.cpp
ViewVC logotype

Annotation of /trunk/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3713 - (hide annotations)
Tue Dec 6 04:43:29 2011 UTC (7 years, 11 months ago) by caltinay
Original Path: branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp
File size: 59378 byte(s)
Integrals in 2D and 3D for rectangle & brick elements & face elements in
regular and reduced orders.

1 caltinay 3691
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2011 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     #include <ripley/Rectangle.h>
15     extern "C" {
16     #include "paso/SystemMatrixPattern.h"
17     }
18    
19     #if USE_SILO
20     #include <silo.h>
21     #ifdef ESYS_MPI
22     #include <pmpio.h>
23     #endif
24     #endif
25    
26     #include <iomanip>
27    
28     using namespace std;
29    
30     namespace ripley {
31    
32     Rectangle::Rectangle(int n0, int n1, double l0, double l1, int d0, int d1) :
33     RipleyDomain(2),
34     m_gNE0(n0),
35     m_gNE1(n1),
36     m_l0(l0),
37     m_l1(l1),
38     m_NX(d0),
39     m_NY(d1)
40     {
41     // ensure number of subdivisions is valid and nodes can be distributed
42     // among number of ranks
43     if (m_NX*m_NY != m_mpiInfo->size)
44     throw RipleyException("Invalid number of spatial subdivisions");
45    
46     if (n0%m_NX > 0 || n1%m_NY > 0)
47     throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
48    
49     // local number of elements
50     m_NE0 = n0/m_NX;
51     m_NE1 = n1/m_NY;
52     // local number of nodes (not necessarily owned)
53     m_N0 = m_NE0+1;
54     m_N1 = m_NE1+1;
55     // bottom-left node is at (offset0,offset1) in global mesh
56     m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
57     m_offset1 = m_NE1*(m_mpiInfo->rank/m_NX);
58     populateSampleIds();
59     }
60    
61     Rectangle::~Rectangle()
62     {
63     }
64    
65     string Rectangle::getDescription() const
66     {
67     return "ripley::Rectangle";
68     }
69    
70     bool Rectangle::operator==(const AbstractDomain& other) const
71     {
72     if (dynamic_cast<const Rectangle*>(&other))
73     return this==&other;
74    
75     return false;
76     }
77    
78     void Rectangle::dump(const string& fileName) const
79     {
80     #if USE_SILO
81     string fn(fileName);
82     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
83     fn+=".silo";
84     }
85    
86     const int NUM_SILO_FILES = 1;
87     const char* blockDirFmt = "/block%04d";
88     int driver=DB_HDF5;
89     string siloPath;
90     DBfile* dbfile = NULL;
91    
92     #ifdef ESYS_MPI
93     PMPIO_baton_t* baton = NULL;
94     #endif
95    
96     if (m_mpiInfo->size > 1) {
97     #ifdef ESYS_MPI
98     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
99     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
100     PMPIO_DefaultClose, (void*)&driver);
101     // try the fallback driver in case of error
102     if (!baton && driver != DB_PDB) {
103     driver = DB_PDB;
104     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
105     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
106     PMPIO_DefaultClose, (void*)&driver);
107     }
108     if (baton) {
109     char str[64];
110     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
111     siloPath = str;
112     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
113     }
114     #endif
115     } else {
116     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
117     getDescription().c_str(), driver);
118     // try the fallback driver in case of error
119     if (!dbfile && driver != DB_PDB) {
120     driver = DB_PDB;
121     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
122     getDescription().c_str(), driver);
123     }
124     }
125    
126     if (!dbfile)
127     throw RipleyException("dump: Could not create Silo file");
128    
129     /*
130     if (driver==DB_HDF5) {
131     // gzip level 1 already provides good compression with minimal
132     // performance penalty. Some tests showed that gzip levels >3 performed
133     // rather badly on escript data both in terms of time and space
134     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
135     }
136     */
137    
138     boost::scoped_ptr<double> x(new double[m_N0]);
139     boost::scoped_ptr<double> y(new double[m_N1]);
140     double* coords[2] = { x.get(), y.get() };
141 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
142     pair<double,double> ydy = getFirstCoordAndSpacing(1);
143 caltinay 3691 #pragma omp parallel
144     {
145     #pragma omp for
146     for (dim_t i0 = 0; i0 < m_N0; i0++) {
147 caltinay 3697 coords[0][i0]=xdx.first+i0*xdx.second;
148 caltinay 3691 }
149     #pragma omp for
150     for (dim_t i1 = 0; i1 < m_N1; i1++) {
151 caltinay 3697 coords[1][i1]=ydy.first+i1*ydy.second;
152 caltinay 3691 }
153     }
154 caltinay 3697 IndexVector dims = getNumNodesPerDim();
155    
156     // write mesh
157     DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE,
158 caltinay 3691 DB_COLLINEAR, NULL);
159    
160 caltinay 3697 // write node ids
161     DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2,
162     NULL, 0, DB_INT, DB_NODECENT, NULL);
163    
164     // write element ids
165     dims = getNumElementsPerDim();
166     DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
167     &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
168    
169     // rank 0 writes multimesh and multivar
170 caltinay 3691 if (m_mpiInfo->rank == 0) {
171     vector<string> tempstrings;
172 caltinay 3697 vector<char*> names;
173 caltinay 3691 for (dim_t i=0; i<m_mpiInfo->size; i++) {
174     stringstream path;
175     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
176     tempstrings.push_back(path.str());
177 caltinay 3697 names.push_back((char*)tempstrings.back().c_str());
178 caltinay 3691 }
179 caltinay 3697 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
180 caltinay 3691 DBSetDir(dbfile, "/");
181 caltinay 3697 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
182     &types[0], NULL);
183     tempstrings.clear();
184     names.clear();
185     for (dim_t i=0; i<m_mpiInfo->size; i++) {
186     stringstream path;
187     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
188     tempstrings.push_back(path.str());
189     names.push_back((char*)tempstrings.back().c_str());
190     }
191     types.assign(m_mpiInfo->size, DB_QUADVAR);
192     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
193     &types[0], NULL);
194     tempstrings.clear();
195     names.clear();
196     for (dim_t i=0; i<m_mpiInfo->size; i++) {
197     stringstream path;
198     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
199     tempstrings.push_back(path.str());
200     names.push_back((char*)tempstrings.back().c_str());
201     }
202     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
203     &types[0], NULL);
204 caltinay 3691 }
205    
206     if (m_mpiInfo->size > 1) {
207     #ifdef ESYS_MPI
208     PMPIO_HandOffBaton(baton, dbfile);
209     PMPIO_Finish(baton);
210     #endif
211     } else {
212     DBClose(dbfile);
213     }
214    
215     #else // USE_SILO
216     throw RipleyException("dump(): no Silo support");
217     #endif
218     }
219    
220 caltinay 3697 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
221 caltinay 3691 {
222 caltinay 3697 switch (fsType) {
223 caltinay 3691 case Nodes:
224     return &m_nodeId[0];
225     case Elements:
226     return &m_elementId[0];
227     case FaceElements:
228     return &m_faceId[0];
229     default:
230     break;
231     }
232    
233     stringstream msg;
234     msg << "borrowSampleReferenceIDs() not implemented for function space type "
235 caltinay 3701 << functionSpaceTypeAsString(fsType);
236 caltinay 3691 throw RipleyException(msg.str());
237     }
238    
239     bool Rectangle::ownSample(int fsCode, index_t id) const
240     {
241     #ifdef ESYS_MPI
242 caltinay 3702 if (fsCode != ReducedNodes) {
243 caltinay 3699 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
244     const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
245 caltinay 3691 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
246 caltinay 3702 } else {
247     stringstream msg;
248     msg << "ownSample() not implemented for "
249     << functionSpaceTypeAsString(fsCode);
250     throw RipleyException(msg.str());
251     }
252 caltinay 3691 #else
253     return true;
254     #endif
255     }
256    
257 caltinay 3702 void Rectangle::setToGradient(escript::Data& out, const escript::Data& cIn) const
258 caltinay 3691 {
259 caltinay 3702 escript::Data& in = *const_cast<escript::Data*>(&cIn);
260 caltinay 3701 const dim_t numComp = in.getDataPointSize();
261 caltinay 3702 const double h0 = m_l0/m_gNE0;
262     const double h1 = m_l1/m_gNE1;
263     if (out.getFunctionSpace().getTypeCode() == Elements) {
264     /* GENERATOR SNIP_GRAD_ELEMENTS TOP */
265     const double tmp0_13 = 0.78867513459481288225/h1;
266     const double tmp0_0 = 0.78867513459481288225/h0;
267     const double tmp0_4 = 0.21132486540518711775/h0;
268     const double tmp0_10 = 0.78867513459481288225/h1;
269     const double tmp0_1 = 0.21132486540518711775/h0;
270     const double tmp0_8 = -0.21132486540518711775/h1;
271     const double tmp0_14 = 0.21132486540518711775/h1;
272     const double tmp0_5 = 0.78867513459481288225/h0;
273     const double tmp0_11 = -0.78867513459481288225/h1;
274     const double tmp0_2 = -0.21132486540518711775/h0;
275     const double tmp0_9 = 0.21132486540518711775/h1;
276     const double tmp0_15 = -0.21132486540518711775/h1;
277     const double tmp0_6 = -0.78867513459481288225/h0;
278     const double tmp0_3 = -0.78867513459481288225/h0;
279     const double tmp0_12 = -0.78867513459481288225/h1;
280     const double tmp0_7 = -0.21132486540518711775/h0;
281 caltinay 3697 #pragma omp parallel for
282 caltinay 3707 for (index_t k1=0; k1 < m_NE1; ++k1) {
283     for (index_t k0=0; k0 < m_NE0; ++k0) {
284 caltinay 3702 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
285     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
286     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
287     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
288     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
289     for (index_t i=0; i < numComp; ++i) {
290     o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
291     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_11 + f_01[i]*tmp0_10 + f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
292     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
293     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_15 + f_01[i]*tmp0_14 + f_10[i]*tmp0_12 + f_11[i]*tmp0_13;
294     o[INDEX3(i,0,2,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
295     o[INDEX3(i,1,2,numComp,2)] = f_00[i]*tmp0_11 + f_01[i]*tmp0_10 + f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
296     o[INDEX3(i,0,3,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
297     o[INDEX3(i,1,3,numComp,2)] = f_00[i]*tmp0_15 + f_01[i]*tmp0_14 + f_10[i]*tmp0_12 + f_11[i]*tmp0_13;
298     } /* end of component loop i */
299     } /* end of k0 loop */
300     } /* end of k1 loop */
301     /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
302 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
303     /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
304     const double tmp0_3 = 0.5/h1;
305     const double tmp0_2 = -0.5/h1;
306     const double tmp0_1 = -0.5/h0;
307     const double tmp0_0 = 0.5/h0;
308     #pragma omp parallel for
309     for (index_t k1=0; k1 < m_NE1; ++k1) {
310     for (index_t k0=0; k0 < m_NE0; ++k0) {
311     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
312     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
313     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
314     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
315     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
316     for (index_t i=0; i < numComp; ++i) {
317     o[INDEX3(i,0,0,numComp,2)] = tmp0_0*(f_10[i] + f_11[i]) + tmp0_1*(f_00[i] + f_01[i]);
318     o[INDEX3(i,1,0,numComp,2)] = tmp0_2*(f_00[i] + f_10[i]) + tmp0_3*(f_01[i] + f_11[i]);
319     } /* end of component loop i */
320     } /* end of k0 loop */
321     } /* end of k1 loop */
322     /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
323 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
324     /* GENERATOR SNIP_GRAD_FACES TOP */
325     if (m_faceOffset[0] > -1) {
326     const double tmp0_0 = 0.78867513459481288225/h0;
327     const double tmp0_4 = 0.21132486540518711775/h0;
328     const double tmp0_1 = 0.21132486540518711775/h0;
329     const double tmp0_8 = 1.0/h1;
330     const double tmp0_5 = 0.78867513459481288225/h0;
331     const double tmp0_2 = -0.21132486540518711775/h0;
332     const double tmp0_9 = -1/h1;
333     const double tmp0_6 = -0.78867513459481288225/h0;
334     const double tmp0_3 = -0.78867513459481288225/h0;
335     const double tmp0_7 = -0.21132486540518711775/h0;
336     #pragma omp parallel for
337     for (index_t k1=0; k1 < m_NE1; ++k1) {
338     const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
339     const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
340     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
341     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
342     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
343     for (index_t i=0; i < numComp; ++i) {
344     o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
345     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_8;
346     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
347     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_8;
348     } /* end of component loop i */
349     } /* end of k1 loop */
350     } /* end of face 0 */
351     if (m_faceOffset[1] > -1) {
352     const double tmp0_0 = 0.78867513459481288225/h0;
353     const double tmp0_4 = 0.21132486540518711775/h0;
354     const double tmp0_1 = 0.21132486540518711775/h0;
355     const double tmp0_8 = -1/h1;
356     const double tmp0_5 = 0.78867513459481288225/h0;
357     const double tmp0_2 = -0.21132486540518711775/h0;
358     const double tmp0_9 = 1.0/h1;
359     const double tmp0_6 = -0.78867513459481288225/h0;
360     const double tmp0_3 = -0.78867513459481288225/h0;
361     const double tmp0_7 = -0.21132486540518711775/h0;
362     #pragma omp parallel for
363     for (index_t k1=0; k1 < m_NE1; ++k1) {
364     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
365     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
366     const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
367     const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
368     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
369     for (index_t i=0; i < numComp; ++i) {
370     o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
371     o[INDEX3(i,1,0,numComp,2)] = f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
372     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
373     o[INDEX3(i,1,1,numComp,2)] = f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
374     } /* end of component loop i */
375     } /* end of k1 loop */
376     } /* end of face 1 */
377     if (m_faceOffset[2] > -1) {
378     const double tmp0_0 = -1/h0;
379     const double tmp0_4 = 0.21132486540518711775/h1;
380     const double tmp0_1 = 1.0/h0;
381     const double tmp0_8 = 0.78867513459481288225/h1;
382     const double tmp0_5 = 0.78867513459481288225/h1;
383     const double tmp0_2 = -0.78867513459481288225/h1;
384     const double tmp0_9 = 0.21132486540518711775/h1;
385     const double tmp0_6 = -0.21132486540518711775/h1;
386     const double tmp0_3 = -0.21132486540518711775/h1;
387     const double tmp0_7 = -0.78867513459481288225/h1;
388     #pragma omp parallel for
389     for (index_t k0=0; k0 < m_NE0; ++k0) {
390     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
391     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
392     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
393     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
394     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
395     for (index_t i=0; i < numComp; ++i) {
396     o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
397     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_2 + f_01[i]*tmp0_5 + f_10[i]*tmp0_3 + f_11[i]*tmp0_4;
398     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
399     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_6 + f_01[i]*tmp0_9 + f_10[i]*tmp0_7 + f_11[i]*tmp0_8;
400     } /* end of component loop i */
401     } /* end of k0 loop */
402     } /* end of face 2 */
403     if (m_faceOffset[3] > -1) {
404     const double tmp0_0 = 1.0/h0;
405     const double tmp0_4 = -0.21132486540518711775/h1;
406     const double tmp0_1 = -1/h0;
407     const double tmp0_8 = -0.78867513459481288225/h1;
408     const double tmp0_5 = -0.78867513459481288225/h1;
409     const double tmp0_2 = 0.21132486540518711775/h1;
410     const double tmp0_9 = -0.21132486540518711775/h1;
411     const double tmp0_6 = 0.78867513459481288225/h1;
412     const double tmp0_3 = 0.78867513459481288225/h1;
413     const double tmp0_7 = 0.21132486540518711775/h1;
414     #pragma omp parallel for
415     for (index_t k0=0; k0 < m_NE0; ++k0) {
416     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
417     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
418     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
419     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
420     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
421     for (index_t i=0; i < numComp; ++i) {
422     o[INDEX3(i,0,0,numComp,2)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
423     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_5 + f_01[i]*tmp0_3 + f_10[i]*tmp0_4 + f_11[i]*tmp0_2;
424     o[INDEX3(i,0,1,numComp,2)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
425     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_7 + f_10[i]*tmp0_8 + f_11[i]*tmp0_6;
426     } /* end of component loop i */
427     } /* end of k0 loop */
428     } /* end of face 3 */
429     /* GENERATOR SNIP_GRAD_FACES BOTTOM */
430 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
431     /* GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
432     if (m_faceOffset[0] > -1) {
433     const double tmp0_3 = -1/h1;
434     const double tmp0_2 = 1.0/h1;
435     const double tmp0_1 = -0.5/h0;
436     const double tmp0_0 = 0.5/h0;
437     #pragma omp parallel for
438     for (index_t k1=0; k1 < m_NE1; ++k1) {
439     const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
440     const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
441     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
442     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
443     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
444     for (index_t i=0; i < numComp; ++i) {
445     o[INDEX3(i,0,0,numComp,2)] = tmp0_0*(f_10[i] + f_11[i]) + tmp0_1*(f_00[i] + f_01[i]);
446     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2;
447     } /* end of component loop i */
448     } /* end of k1 loop */
449     } /* end of face 0 */
450     if (m_faceOffset[1] > -1) {
451     const double tmp0_3 = 1.0/h1;
452     const double tmp0_2 = -1/h1;
453     const double tmp0_1 = -0.5/h0;
454     const double tmp0_0 = 0.5/h0;
455     #pragma omp parallel for
456     for (index_t k1=0; k1 < m_NE1; ++k1) {
457     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
458     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
459     const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
460     const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
461     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
462     for (index_t i=0; i < numComp; ++i) {
463     o[INDEX3(i,0,0,numComp,2)] = tmp0_0*(f_10[i] + f_11[i]) + tmp0_1*(f_00[i] + f_01[i]);
464     o[INDEX3(i,1,0,numComp,2)] = f_10[i]*tmp0_2 + f_11[i]*tmp0_3;
465     } /* end of component loop i */
466     } /* end of k1 loop */
467     } /* end of face 1 */
468     if (m_faceOffset[2] > -1) {
469     const double tmp0_3 = 0.5/h1;
470     const double tmp0_2 = -0.5/h1;
471     const double tmp0_1 = 1.0/h0;
472     const double tmp0_0 = -1/h0;
473     #pragma omp parallel for
474     for (index_t k0=0; k0 < m_NE0; ++k0) {
475     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
476     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
477     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
478     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
479     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
480     for (index_t i=0; i < numComp; ++i) {
481     o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
482     o[INDEX3(i,1,0,numComp,2)] = tmp0_2*(f_00[i] + f_10[i]) + tmp0_3*(f_01[i] + f_11[i]);
483     } /* end of component loop i */
484     } /* end of k0 loop */
485     } /* end of face 2 */
486     if (m_faceOffset[3] > -1) {
487     const double tmp0_3 = -0.5/h1;
488     const double tmp0_2 = 0.5/h1;
489     const double tmp0_1 = -1/h0;
490     const double tmp0_0 = 1.0/h0;
491     #pragma omp parallel for
492     for (index_t k0=0; k0 < m_NE0; ++k0) {
493     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
494     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
495     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
496     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
497     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
498     for (index_t i=0; i < numComp; ++i) {
499     o[INDEX3(i,0,0,numComp,2)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
500     o[INDEX3(i,1,0,numComp,2)] = tmp0_2*(f_01[i] + f_11[i]) + tmp0_3*(f_00[i] + f_10[i]);
501     } /* end of component loop i */
502     } /* end of k0 loop */
503     } /* end of face 3 */
504     /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
505 caltinay 3702 } else {
506 caltinay 3707 stringstream msg;
507     msg << "setToGradient() not implemented for "
508     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
509     throw RipleyException(msg.str());
510 caltinay 3702 }
511 caltinay 3701 }
512 caltinay 3697
513 caltinay 3713 void Rectangle::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
514     {
515     escript::Data& in = *const_cast<escript::Data*>(&arg);
516     const dim_t numComp = in.getDataPointSize();
517     const double h0 = m_l0/m_gNE0;
518     const double h1 = m_l1/m_gNE1;
519     if (arg.getFunctionSpace().getTypeCode() == Elements) {
520     const double w_0 = h0*h1/4.;
521     #pragma omp parallel
522     {
523     vector<double> int_local(numComp, 0);
524     #pragma omp for
525     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
526     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
527     const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
528     for (index_t i=0; i < numComp; ++i) {
529     const register double f_0 = f[INDEX2(i,0,numComp)];
530     const register double f_1 = f[INDEX2(i,1,numComp)];
531     const register double f_2 = f[INDEX2(i,2,numComp)];
532     const register double f_3 = f[INDEX2(i,3,numComp)];
533     int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
534     } /* end of component loop i */
535     } /* end of k0 loop */
536     } /* end of k1 loop */
537    
538     #pragma omp critical
539     for (index_t i=0; i<numComp; i++)
540     integrals[i]+=int_local[i];
541     }
542     } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
543     const double w_0 = h0*h1;
544     #pragma omp parallel
545     {
546     vector<double> int_local(numComp, 0);
547     #pragma omp for
548     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
549     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
550     const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
551     for (index_t i=0; i < numComp; ++i) {
552     int_local[i]+=f[i]*w_0;
553     } /* end of component loop i */
554     } /* end of k0 loop */
555     } /* end of k1 loop */
556    
557     #pragma omp critical
558     for (index_t i=0; i<numComp; i++)
559     integrals[i]+=int_local[i];
560     }
561     } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
562     const double w_0 = h0/2.;
563     const double w_1 = h1/2.;
564     #pragma omp parallel
565     {
566     vector<double> int_local(numComp, 0);
567     if (m_faceOffset[0] > -1) {
568     #pragma omp for
569     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
570     const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
571     for (index_t i=0; i < numComp; ++i) {
572     const register double f_0 = f[INDEX2(i,0,numComp)];
573     const register double f_1 = f[INDEX2(i,1,numComp)];
574     int_local[i]+=(f_0+f_1)*w_1;
575     } /* end of component loop i */
576     } /* end of k1 loop */
577     }
578    
579     if (m_faceOffset[1] > -1) {
580     #pragma omp for
581     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
582     const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
583     for (index_t i=0; i < numComp; ++i) {
584     const register double f_0 = f[INDEX2(i,0,numComp)];
585     const register double f_1 = f[INDEX2(i,1,numComp)];
586     int_local[i]+=(f_0+f_1)*w_1;
587     } /* end of component loop i */
588     } /* end of k1 loop */
589     }
590    
591     if (m_faceOffset[2] > -1) {
592     #pragma omp for
593     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
594     const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
595     for (index_t i=0; i < numComp; ++i) {
596     const register double f_0 = f[INDEX2(i,0,numComp)];
597     const register double f_1 = f[INDEX2(i,1,numComp)];
598     int_local[i]+=(f_0+f_1)*w_0;
599     } /* end of component loop i */
600     } /* end of k0 loop */
601     }
602    
603     if (m_faceOffset[3] > -1) {
604     #pragma omp for
605     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
606     const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
607     for (index_t i=0; i < numComp; ++i) {
608     const register double f_0 = f[INDEX2(i,0,numComp)];
609     const register double f_1 = f[INDEX2(i,1,numComp)];
610     int_local[i]+=(f_0+f_1)*w_0;
611     } /* end of component loop i */
612     } /* end of k0 loop */
613     }
614    
615     #pragma omp critical
616     for (index_t i=0; i<numComp; i++)
617     integrals[i]+=int_local[i];
618     }
619     } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
620     #pragma omp parallel
621     {
622     vector<double> int_local(numComp, 0);
623     if (m_faceOffset[0] > -1) {
624     #pragma omp for
625     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
626     const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
627     for (index_t i=0; i < numComp; ++i) {
628     int_local[i]+=f[i]*h1;
629     } /* end of component loop i */
630     } /* end of k1 loop */
631     }
632    
633     if (m_faceOffset[1] > -1) {
634     #pragma omp for
635     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
636     const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
637     for (index_t i=0; i < numComp; ++i) {
638     int_local[i]+=f[i]*h1;
639     } /* end of component loop i */
640     } /* end of k1 loop */
641     }
642    
643     if (m_faceOffset[2] > -1) {
644     #pragma omp for
645     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
646     const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
647     for (index_t i=0; i < numComp; ++i) {
648     int_local[i]+=f[i]*h0;
649     } /* end of component loop i */
650     } /* end of k0 loop */
651     }
652    
653     if (m_faceOffset[3] > -1) {
654     #pragma omp for
655     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
656     const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
657     for (index_t i=0; i < numComp; ++i) {
658     int_local[i]+=f[i]*h0;
659     } /* end of component loop i */
660     } /* end of k0 loop */
661     }
662    
663     #pragma omp critical
664     for (index_t i=0; i<numComp; i++)
665     integrals[i]+=int_local[i];
666     }
667     } else {
668     stringstream msg;
669     msg << "setToIntegrals() not implemented for "
670     << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
671     throw RipleyException(msg.str());
672     }
673     }
674    
675 caltinay 3701 void Rectangle::addPDEToSystem(escript::AbstractSystemMatrix& mat,
676     escript::Data& rhs, const escript::Data& A, const escript::Data& B,
677     const escript::Data& C, const escript::Data& D,
678     const escript::Data& X, const escript::Data& Y,
679     const escript::Data& d, const escript::Data& y,
680     const escript::Data& d_contact, const escript::Data& y_contact,
681     const escript::Data& d_dirac, const escript::Data& y_dirac) const
682     {
683     throw RipleyException("addPDEToSystem() not implemented");
684     }
685    
686 caltinay 3691 Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder,
687     bool reducedColOrder) const
688     {
689     if (reducedRowOrder || reducedColOrder)
690     throw RipleyException("getPattern() not implemented for reduced order");
691    
692 caltinay 3699 // connector
693     RankVector neighbour;
694     IndexVector offsetInShared(1,0);
695     IndexVector sendShared, recvShared;
696     const IndexVector faces=getNumFacesPerBoundary();
697     const index_t left = (m_offset0==0 ? 0 : 1);
698     const index_t bottom = (m_offset1==0 ? 0 : 1);
699     // corner node from bottom-left
700     if (faces[0] == 0 && faces[2] == 0) {
701     neighbour.push_back(m_mpiInfo->rank-m_NX-1);
702     offsetInShared.push_back(offsetInShared.back()+1);
703     sendShared.push_back(m_nodeId[m_N0+left]);
704     recvShared.push_back(m_nodeId[0]);
705     }
706     // bottom edge
707     if (faces[2] == 0) {
708     neighbour.push_back(m_mpiInfo->rank-m_NX);
709     offsetInShared.push_back(offsetInShared.back()+m_N0-left);
710     for (dim_t i=left; i<m_N0; i++) {
711     // easy case, we know the neighbour id's
712     sendShared.push_back(m_nodeId[m_N0+i]);
713     recvShared.push_back(m_nodeId[i]);
714     }
715     }
716     // corner node from bottom-right
717     if (faces[1] == 0 && faces[2] == 0) {
718     neighbour.push_back(m_mpiInfo->rank-m_NX+1);
719     const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
720     const index_t N1=(neighbour.back()/m_NX == 0 ? m_N1 : m_N1-1);
721     const index_t first=m_nodeDistribution[neighbour.back()];
722     offsetInShared.push_back(offsetInShared.back()+1);
723     sendShared.push_back(m_nodeId[(bottom+1)*m_N0-1]);
724     recvShared.push_back(first+N0*(N1-1));
725     }
726     // left edge
727     if (faces[0] == 0) {
728     neighbour.push_back(m_mpiInfo->rank-1);
729     offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
730     for (dim_t i=bottom; i<m_N1; i++) {
731     // easy case, we know the neighbour id's
732     sendShared.push_back(m_nodeId[i*m_N0+1]);
733     recvShared.push_back(m_nodeId[i*m_N0]);
734     }
735     }
736     // right edge
737     if (faces[1] == 0) {
738     neighbour.push_back(m_mpiInfo->rank+1);
739     const index_t rightN0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
740     const index_t first=m_nodeDistribution[neighbour.back()];
741     offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
742     for (dim_t i=bottom; i<m_N1; i++) {
743     sendShared.push_back(m_nodeId[(i+1)*m_N0-1]);
744     recvShared.push_back(first+rightN0*(i-bottom));
745     }
746     }
747     // corner node from top-left
748     if (faces[0] == 0 && faces[3] == 0) {
749     neighbour.push_back(m_mpiInfo->rank+m_NX-1);
750     const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
751     const index_t first=m_nodeDistribution[neighbour.back()];
752     offsetInShared.push_back(offsetInShared.back()+1);
753     sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+left]);
754     recvShared.push_back(first+N0-1);
755     }
756     // top edge
757     if (faces[3] == 0) {
758     neighbour.push_back(m_mpiInfo->rank+m_NX);
759     const index_t first=m_nodeDistribution[neighbour.back()];
760     offsetInShared.push_back(offsetInShared.back()+m_N0-left);
761     for (dim_t i=left; i<m_N0; i++) {
762     sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+i]);
763     recvShared.push_back(first+i-left);
764     }
765     }
766     // corner node from top-right
767     if (faces[1] == 0 && faces[3] == 0) {
768     neighbour.push_back(m_mpiInfo->rank+m_NX+1);
769     const index_t first=m_nodeDistribution[neighbour.back()];
770     offsetInShared.push_back(offsetInShared.back()+1);
771     sendShared.push_back(m_nodeId[m_N0*m_N1-1]);
772     recvShared.push_back(first);
773     }
774     const int numDOF=m_nodeDistribution[m_mpiInfo->rank+1]-m_nodeDistribution[m_mpiInfo->rank];
775 caltinay 3702 /*
776 caltinay 3699 cout << "--- rcv_shcomp ---" << endl;
777     cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
778     for (size_t i=0; i<neighbour.size(); i++) {
779     cout << "neighbor[" << i << "]=" << neighbour[i]
780     << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
781     }
782     for (size_t i=0; i<recvShared.size(); i++) {
783     cout << "shared[" << i << "]=" << recvShared[i] << endl;
784     }
785     cout << "--- snd_shcomp ---" << endl;
786     for (size_t i=0; i<sendShared.size(); i++) {
787     cout << "shared[" << i << "]=" << sendShared[i] << endl;
788     }
789 caltinay 3702 */
790 caltinay 3691
791     Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
792 caltinay 3699 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
793 caltinay 3697 &offsetInShared[0], 1, 0, m_mpiInfo);
794 caltinay 3691 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
795 caltinay 3699 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
796 caltinay 3697 &offsetInShared[0], 1, 0, m_mpiInfo);
797 caltinay 3691 Paso_Connector* connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
798 caltinay 3699 Paso_SharedComponents_free(snd_shcomp);
799     Paso_SharedComponents_free(rcv_shcomp);
800 caltinay 3691
801     // create patterns
802 caltinay 3699 dim_t M, N;
803     IndexVector ptr(1,0);
804     IndexVector index;
805    
806     // main pattern
807     for (index_t i=0; i<numDOF; i++) {
808     // always add the node itself
809     index.push_back(i);
810     int num=insertNeighbours(index, i);
811     ptr.push_back(ptr.back()+num+1);
812     }
813     M=N=ptr.size()-1;
814     // paso will manage the memory
815     index_t* indexC = MEMALLOC(index.size(),index_t);
816     index_t* ptrC = MEMALLOC(ptr.size(), index_t);
817     copy(index.begin(), index.end(), indexC);
818     copy(ptr.begin(), ptr.end(), ptrC);
819 caltinay 3691 Paso_Pattern *mainPattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
820 caltinay 3699 M, N, ptrC, indexC);
821 caltinay 3691
822 caltinay 3702 /*
823 caltinay 3699 cout << "--- main_pattern ---" << endl;
824     cout << "M=" << M << ", N=" << N << endl;
825     for (size_t i=0; i<ptr.size(); i++) {
826     cout << "ptr[" << i << "]=" << ptr[i] << endl;
827     }
828     for (size_t i=0; i<index.size(); i++) {
829     cout << "index[" << i << "]=" << index[i] << endl;
830     }
831 caltinay 3702 */
832 caltinay 3699
833     ptr.clear();
834     index.clear();
835    
836     // column & row couple patterns
837     Paso_Pattern *colCouplePattern, *rowCouplePattern;
838     generateCouplePatterns(&colCouplePattern, &rowCouplePattern);
839    
840     // allocate paso distribution
841     Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
842     const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
843    
844 caltinay 3691 Paso_SystemMatrixPattern* pattern = Paso_SystemMatrixPattern_alloc(
845     MATRIX_FORMAT_DEFAULT, distribution, distribution,
846     mainPattern, colCouplePattern, rowCouplePattern,
847     connector, connector);
848     Paso_Pattern_free(mainPattern);
849     Paso_Pattern_free(colCouplePattern);
850     Paso_Pattern_free(rowCouplePattern);
851     Paso_Distribution_free(distribution);
852 caltinay 3697 return pattern;
853 caltinay 3691 }
854    
855     void Rectangle::Print_Mesh_Info(const bool full) const
856     {
857     RipleyDomain::Print_Mesh_Info(full);
858     if (full) {
859     cout << " Id Coordinates" << endl;
860     cout.precision(15);
861     cout.setf(ios::scientific, ios::floatfield);
862 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
863     pair<double,double> ydy = getFirstCoordAndSpacing(1);
864 caltinay 3691 for (index_t i=0; i < getNumNodes(); i++) {
865     cout << " " << setw(5) << m_nodeId[i]
866 caltinay 3697 << " " << xdx.first+(i%m_N0)*xdx.second
867     << " " << ydy.first+(i/m_N0)*ydy.second << endl;
868 caltinay 3691 }
869     }
870     }
871    
872 caltinay 3697 IndexVector Rectangle::getNumNodesPerDim() const
873     {
874     IndexVector ret;
875     ret.push_back(m_N0);
876     ret.push_back(m_N1);
877     return ret;
878     }
879    
880     IndexVector Rectangle::getNumElementsPerDim() const
881     {
882     IndexVector ret;
883     ret.push_back(m_NE0);
884     ret.push_back(m_NE1);
885     return ret;
886     }
887    
888     IndexVector Rectangle::getNumFacesPerBoundary() const
889     {
890     IndexVector ret(4, 0);
891     //left
892     if (m_offset0==0)
893     ret[0]=m_NE1;
894     //right
895     if (m_mpiInfo->rank%m_NX==m_NX-1)
896     ret[1]=m_NE1;
897     //bottom
898     if (m_offset1==0)
899     ret[2]=m_NE0;
900     //top
901     if (m_mpiInfo->rank/m_NX==m_NY-1)
902     ret[3]=m_NE0;
903     return ret;
904     }
905    
906     pair<double,double> Rectangle::getFirstCoordAndSpacing(dim_t dim) const
907     {
908     if (dim==0) {
909     return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
910     } else if (dim==1) {
911     return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
912     }
913     throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
914     }
915    
916 caltinay 3691 //protected
917     dim_t Rectangle::getNumFaceElements() const
918     {
919 caltinay 3699 const IndexVector faces = getNumFacesPerBoundary();
920 caltinay 3691 dim_t n=0;
921 caltinay 3699 for (size_t i=0; i<faces.size(); i++)
922     n+=faces[i];
923 caltinay 3691 return n;
924     }
925    
926     //protected
927     void Rectangle::assembleCoordinates(escript::Data& arg) const
928     {
929     escriptDataC x = arg.getDataC();
930     int numDim = m_numDim;
931     if (!isDataPointShapeEqual(&x, 1, &numDim))
932     throw RipleyException("setToX: Invalid Data object shape");
933     if (!numSamplesEqual(&x, 1, getNumNodes()))
934     throw RipleyException("setToX: Illegal number of samples in Data object");
935    
936 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
937     pair<double,double> ydy = getFirstCoordAndSpacing(1);
938 caltinay 3691 arg.requireWrite();
939     #pragma omp parallel for
940     for (dim_t i1 = 0; i1 < m_N1; i1++) {
941     for (dim_t i0 = 0; i0 < m_N0; i0++) {
942     double* point = arg.getSampleDataRW(i0+m_N0*i1);
943 caltinay 3697 point[0] = xdx.first+i0*xdx.second;
944     point[1] = ydy.first+i1*ydy.second;
945 caltinay 3691 }
946     }
947     }
948    
949     //private
950     void Rectangle::populateSampleIds()
951     {
952 caltinay 3697 // identifiers are ordered from left to right, bottom to top on each rank,
953     // except for the shared nodes which are owned by the rank below / to the
954     // left of the current rank
955    
956     // build node distribution vector first.
957     // m_nodeDistribution[i] is the first node id on rank i, that is
958     // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
959     m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
960     m_nodeDistribution[1]=getNumNodes();
961     for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
962     const index_t x=k%m_NX;
963     const index_t y=k/m_NX;
964     index_t numNodes=getNumNodes();
965     if (x>0)
966     numNodes-=m_N1;
967     if (y>0)
968     numNodes-=m_N0;
969     if (x>0 && y>0)
970     numNodes++; // subtracted corner twice -> fix that
971     m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
972     }
973     m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
974    
975 caltinay 3691 m_nodeId.resize(getNumNodes());
976 caltinay 3697
977     // the bottom row and left column are not owned by this rank so the
978     // identifiers need to be computed accordingly
979     const index_t left = (m_offset0==0 ? 0 : 1);
980     const index_t bottom = (m_offset1==0 ? 0 : 1);
981     if (left>0) {
982     const int neighbour=m_mpiInfo->rank-1;
983     const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
984 caltinay 3691 #pragma omp parallel for
985 caltinay 3697 for (dim_t i1=bottom; i1<m_N1; i1++) {
986     m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
987     + (i1-bottom+1)*leftN0-1;
988     }
989     }
990     if (bottom>0) {
991     const int neighbour=m_mpiInfo->rank-m_NX;
992     const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
993     const index_t bottomN1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
994     #pragma omp parallel for
995     for (dim_t i0=left; i0<m_N0; i0++) {
996     m_nodeId[i0]=m_nodeDistribution[neighbour]
997     + (bottomN1-1)*bottomN0 + i0 - left;
998     }
999     }
1000     if (left>0 && bottom>0) {
1001     const int neighbour=m_mpiInfo->rank-m_NX-1;
1002 caltinay 3699 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1003     const index_t N1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1004     m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1-1;
1005 caltinay 3697 }
1006    
1007     // the rest of the id's are contiguous
1008     const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
1009     #pragma omp parallel for
1010     for (dim_t i1=bottom; i1<m_N1; i1++) {
1011     for (dim_t i0=left; i0<m_N0; i0++) {
1012     m_nodeId[i0+i1*m_N0] = firstId+i0-left+(i1-bottom)*(m_N0-left);
1013     }
1014     }
1015    
1016 caltinay 3704 // element id's
1017 caltinay 3697 m_elementId.resize(getNumElements());
1018     #pragma omp parallel for
1019     for (dim_t k=0; k<getNumElements(); k++) {
1020     m_elementId[k]=k;
1021     }
1022    
1023 caltinay 3704 // face element id's
1024 caltinay 3697 m_faceId.resize(getNumFaceElements());
1025     #pragma omp parallel for
1026     for (dim_t k=0; k<getNumFaceElements(); k++) {
1027     m_faceId[k]=k;
1028     }
1029 caltinay 3704
1030     // generate face offset vector
1031     const IndexVector facesPerEdge = getNumFacesPerBoundary();
1032     m_faceOffset.assign(facesPerEdge.size(), -1);
1033     index_t offset=0;
1034     for (size_t i=0; i<facesPerEdge.size(); i++) {
1035     if (facesPerEdge[i]>0) {
1036     m_faceOffset[i]=offset;
1037     offset+=facesPerEdge[i];
1038     }
1039     }
1040 caltinay 3691 }
1041    
1042 caltinay 3699 //private
1043     int Rectangle::insertNeighbours(IndexVector& index, index_t node) const
1044     {
1045     const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1046     const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1047     const int x=node%myN0;
1048     const int y=node/myN0;
1049     int num=0;
1050     if (y>0) {
1051     if (x>0) {
1052     // bottom-left
1053     index.push_back(node-myN0-1);
1054     num++;
1055     }
1056     // bottom
1057     index.push_back(node-myN0);
1058     num++;
1059     if (x<myN0-1) {
1060     // bottom-right
1061     index.push_back(node-myN0+1);
1062     num++;
1063     }
1064     }
1065     if (x<myN0-1) {
1066     // right
1067     index.push_back(node+1);
1068     num++;
1069     if (y<myN1-1) {
1070     // top-right
1071     index.push_back(node+myN0+1);
1072     num++;
1073     }
1074     }
1075     if (y<myN1-1) {
1076     // top
1077     index.push_back(node+myN0);
1078     num++;
1079     if (x>0) {
1080     // top-left
1081     index.push_back(node+myN0-1);
1082     num++;
1083     }
1084     }
1085     if (x>0) {
1086     // left
1087     index.push_back(node-1);
1088     num++;
1089     }
1090    
1091     return num;
1092     }
1093    
1094     //private
1095     void Rectangle::generateCouplePatterns(Paso_Pattern** colPattern, Paso_Pattern** rowPattern) const
1096     {
1097     IndexVector ptr(1,0);
1098     IndexVector index;
1099     const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1100     const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1101     const IndexVector faces=getNumFacesPerBoundary();
1102    
1103     // bottom edge
1104     dim_t n=0;
1105     if (faces[0] == 0) {
1106     index.push_back(2*(myN0+myN1+1));
1107     index.push_back(2*(myN0+myN1+1)+1);
1108     n+=2;
1109     if (faces[2] == 0) {
1110     index.push_back(0);
1111     index.push_back(1);
1112     index.push_back(2);
1113     n+=3;
1114     }
1115     } else if (faces[2] == 0) {
1116     index.push_back(1);
1117     index.push_back(2);
1118     n+=2;
1119     }
1120     // n=neighbours of bottom-left corner node
1121     ptr.push_back(ptr.back()+n);
1122     n=0;
1123     if (faces[2] == 0) {
1124     for (dim_t i=1; i<myN0-1; i++) {
1125     index.push_back(i);
1126     index.push_back(i+1);
1127     index.push_back(i+2);
1128     ptr.push_back(ptr.back()+3);
1129     }
1130     index.push_back(myN0-1);
1131     index.push_back(myN0);
1132     n+=2;
1133     if (faces[1] == 0) {
1134     index.push_back(myN0+1);
1135     index.push_back(myN0+2);
1136     index.push_back(myN0+3);
1137     n+=3;
1138     }
1139     } else {
1140     for (dim_t i=1; i<myN0-1; i++) {
1141     ptr.push_back(ptr.back());
1142     }
1143     if (faces[1] == 0) {
1144     index.push_back(myN0+2);
1145     index.push_back(myN0+3);
1146     n+=2;
1147     }
1148     }
1149     // n=neighbours of bottom-right corner node
1150     ptr.push_back(ptr.back()+n);
1151    
1152     // 2nd row to 2nd last row
1153     for (dim_t i=1; i<myN1-1; i++) {
1154     // left edge
1155     if (faces[0] == 0) {
1156     index.push_back(2*(myN0+myN1+2)-i);
1157     index.push_back(2*(myN0+myN1+2)-i-1);
1158     index.push_back(2*(myN0+myN1+2)-i-2);
1159     ptr.push_back(ptr.back()+3);
1160     } else {
1161     ptr.push_back(ptr.back());
1162     }
1163     for (dim_t j=1; j<myN0-1; j++) {
1164     ptr.push_back(ptr.back());
1165     }
1166     // right edge
1167     if (faces[1] == 0) {
1168     index.push_back(myN0+i+1);
1169     index.push_back(myN0+i+2);
1170     index.push_back(myN0+i+3);
1171     ptr.push_back(ptr.back()+3);
1172     } else {
1173     ptr.push_back(ptr.back());
1174     }
1175     }
1176    
1177     // top edge
1178     n=0;
1179     if (faces[0] == 0) {
1180     index.push_back(2*myN0+myN1+5);
1181     index.push_back(2*myN0+myN1+4);
1182     n+=2;
1183     if (faces[3] == 0) {
1184     index.push_back(2*myN0+myN1+3);
1185     index.push_back(2*myN0+myN1+2);
1186     index.push_back(2*myN0+myN1+1);
1187     n+=3;
1188     }
1189     } else if (faces[3] == 0) {
1190     index.push_back(2*myN0+myN1+2);
1191     index.push_back(2*myN0+myN1+1);
1192     n+=2;
1193     }
1194     // n=neighbours of top-left corner node
1195     ptr.push_back(ptr.back()+n);
1196     n=0;
1197     if (faces[3] == 0) {
1198     for (dim_t i=1; i<myN0-1; i++) {
1199     index.push_back(2*myN0+myN1+i+1);
1200     index.push_back(2*myN0+myN1+i);
1201     index.push_back(2*myN0+myN1+i-1);
1202     ptr.push_back(ptr.back()+3);
1203     }
1204     index.push_back(myN0+myN1+4);
1205     index.push_back(myN0+myN1+3);
1206     n+=2;
1207     if (faces[1] == 0) {
1208     index.push_back(myN0+myN1+2);
1209     index.push_back(myN0+myN1+1);
1210     index.push_back(myN0+myN1);
1211     n+=3;
1212     }
1213     } else {
1214     for (dim_t i=1; i<myN0-1; i++) {
1215     ptr.push_back(ptr.back());
1216     }
1217     if (faces[1] == 0) {
1218     index.push_back(myN0+myN1+1);
1219     index.push_back(myN0+myN1);
1220     n+=2;
1221     }
1222     }
1223     // n=neighbours of top-right corner node
1224     ptr.push_back(ptr.back()+n);
1225    
1226     dim_t M=ptr.size()-1;
1227     map<index_t,index_t> idMap;
1228     dim_t N=0;
1229     for (IndexVector::iterator it=index.begin(); it!=index.end(); it++) {
1230     if (idMap.find(*it)==idMap.end()) {
1231     idMap[*it]=N;
1232     *it=N++;
1233     } else {
1234     *it=idMap[*it];
1235     }
1236     }
1237    
1238 caltinay 3702 /*
1239 caltinay 3699 cout << "--- colCouple_pattern ---" << endl;
1240     cout << "M=" << M << ", N=" << N << endl;
1241     for (size_t i=0; i<ptr.size(); i++) {
1242     cout << "ptr[" << i << "]=" << ptr[i] << endl;
1243     }
1244     for (size_t i=0; i<index.size(); i++) {
1245     cout << "index[" << i << "]=" << index[i] << endl;
1246     }
1247 caltinay 3702 */
1248 caltinay 3699
1249     // now build the row couple pattern
1250     IndexVector ptr2(1,0);
1251     IndexVector index2;
1252     for (dim_t id=0; id<N; id++) {
1253     n=0;
1254     for (dim_t i=0; i<M; i++) {
1255     for (dim_t j=ptr[i]; j<ptr[i+1]; j++) {
1256     if (index[j]==id) {
1257     index2.push_back(i);
1258     n++;
1259     break;
1260     }
1261     }
1262     }
1263     ptr2.push_back(ptr2.back()+n);
1264     }
1265    
1266 caltinay 3702 /*
1267 caltinay 3699 cout << "--- rowCouple_pattern ---" << endl;
1268     cout << "M=" << N << ", N=" << M << endl;
1269     for (size_t i=0; i<ptr2.size(); i++) {
1270     cout << "ptr[" << i << "]=" << ptr2[i] << endl;
1271     }
1272     for (size_t i=0; i<index2.size(); i++) {
1273     cout << "index[" << i << "]=" << index2[i] << endl;
1274     }
1275 caltinay 3702 */
1276 caltinay 3699
1277     // paso will manage the memory
1278     index_t* indexC = MEMALLOC(index.size(), index_t);
1279     index_t* ptrC = MEMALLOC(ptr.size(), index_t);
1280     copy(index.begin(), index.end(), indexC);
1281     copy(ptr.begin(), ptr.end(), ptrC);
1282     *colPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, M, N, ptrC, indexC);
1283    
1284     // paso will manage the memory
1285     indexC = MEMALLOC(index2.size(), index_t);
1286     ptrC = MEMALLOC(ptr2.size(), index_t);
1287     copy(index2.begin(), index2.end(), indexC);
1288     copy(ptr2.begin(), ptr2.end(), ptrC);
1289     *rowPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, N, M, ptrC, indexC);
1290     }
1291    
1292 caltinay 3702 //protected
1293 caltinay 3711 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1294     escript::Data& in, bool reduced) const
1295 caltinay 3702 {
1296     const dim_t numComp = in.getDataPointSize();
1297 caltinay 3711 if (reduced) {
1298     /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1299     const double tmp0_0 = 0.25000000000000000000;
1300 caltinay 3702 #pragma omp parallel for
1301 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
1302     for (index_t k0=0; k0 < m_NE0; ++k0) {
1303     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1304     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1305     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1306     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1307     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1308     for (index_t i=0; i < numComp; ++i) {
1309     o[INDEX2(i,numComp,0)] = tmp0_0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
1310     } /* end of component loop i */
1311     } /* end of k0 loop */
1312     } /* end of k1 loop */
1313     /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1314     } else {
1315     /* GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1316     const double tmp0_2 = 0.62200846792814621559;
1317     const double tmp0_1 = 0.044658198738520451079;
1318     const double tmp0_0 = 0.16666666666666666667;
1319     #pragma omp parallel for
1320     for (index_t k1=0; k1 < m_NE1; ++k1) {
1321     for (index_t k0=0; k0 < m_NE0; ++k0) {
1322     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1323     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1324     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1325     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1326     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1327     for (index_t i=0; i < numComp; ++i) {
1328     o[INDEX2(i,numComp,0)] = f_00[i]*tmp0_2 + f_11[i]*tmp0_1 + tmp0_0*(f_01[i] + f_10[i]);
1329     o[INDEX2(i,numComp,1)] = f_01[i]*tmp0_1 + f_10[i]*tmp0_2 + tmp0_0*(f_00[i] + f_11[i]);
1330     o[INDEX2(i,numComp,2)] = f_01[i]*tmp0_2 + f_10[i]*tmp0_1 + tmp0_0*(f_00[i] + f_11[i]);
1331     o[INDEX2(i,numComp,3)] = f_00[i]*tmp0_1 + f_11[i]*tmp0_2 + tmp0_0*(f_01[i] + f_10[i]);
1332     } /* end of component loop i */
1333     } /* end of k0 loop */
1334     } /* end of k1 loop */
1335     /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1336     }
1337 caltinay 3702 }
1338    
1339     //protected
1340 caltinay 3711 void Rectangle::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1341     bool reduced) const
1342 caltinay 3702 {
1343 caltinay 3704 const dim_t numComp = in.getDataPointSize();
1344 caltinay 3711 if (reduced) {
1345     /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1346     if (m_faceOffset[0] > -1) {
1347     const double tmp0_0 = 0.50000000000000000000;
1348 caltinay 3704 #pragma omp parallel for
1349 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
1350     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1351     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1352     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1353     for (index_t i=0; i < numComp; ++i) {
1354     o[INDEX2(i,numComp,0)] = tmp0_0*(f_00[i] + f_01[i]);
1355     } /* end of component loop i */
1356     } /* end of k1 loop */
1357     } /* end of face 0 */
1358     if (m_faceOffset[1] > -1) {
1359     const double tmp0_0 = 0.50000000000000000000;
1360 caltinay 3704 #pragma omp parallel for
1361 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
1362     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1363     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1364     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1365     for (index_t i=0; i < numComp; ++i) {
1366     o[INDEX2(i,numComp,0)] = tmp0_0*(f_10[i] + f_11[i]);
1367     } /* end of component loop i */
1368     } /* end of k1 loop */
1369     } /* end of face 1 */
1370     if (m_faceOffset[2] > -1) {
1371     const double tmp0_0 = 0.50000000000000000000;
1372 caltinay 3704 #pragma omp parallel for
1373 caltinay 3711 for (index_t k0=0; k0 < m_NE0; ++k0) {
1374     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1375     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1376     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1377     for (index_t i=0; i < numComp; ++i) {
1378     o[INDEX2(i,numComp,0)] = tmp0_0*(f_00[i] + f_10[i]);
1379     } /* end of component loop i */
1380     } /* end of k0 loop */
1381     } /* end of face 2 */
1382     if (m_faceOffset[3] > -1) {
1383     const double tmp0_0 = 0.50000000000000000000;
1384 caltinay 3704 #pragma omp parallel for
1385 caltinay 3711 for (index_t k0=0; k0 < m_NE0; ++k0) {
1386     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1387     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1388     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1389     for (index_t i=0; i < numComp; ++i) {
1390     o[INDEX2(i,numComp,0)] = tmp0_0*(f_01[i] + f_11[i]);
1391     } /* end of component loop i */
1392     } /* end of k0 loop */
1393     } /* end of face 3 */
1394     /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */
1395     } else {
1396     /* GENERATOR SNIP_INTERPOLATE_FACES TOP */
1397     if (m_faceOffset[0] > -1) {
1398     const double tmp0_1 = 0.78867513459481288225;
1399     const double tmp0_0 = 0.21132486540518711775;
1400     #pragma omp parallel for
1401     for (index_t k1=0; k1 < m_NE1; ++k1) {
1402     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1403     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1404     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1405     for (index_t i=0; i < numComp; ++i) {
1406     o[INDEX2(i,numComp,0)] = f_00[i]*tmp0_1 + f_01[i]*tmp0_0;
1407     o[INDEX2(i,numComp,1)] = f_00[i]*tmp0_0 + f_01[i]*tmp0_1;
1408     } /* end of component loop i */
1409     } /* end of k1 loop */
1410     } /* end of face 0 */
1411     if (m_faceOffset[1] > -1) {
1412     const double tmp0_1 = 0.21132486540518711775;
1413     const double tmp0_0 = 0.78867513459481288225;
1414     #pragma omp parallel for
1415     for (index_t k1=0; k1 < m_NE1; ++k1) {
1416     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1417     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1418     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1419     for (index_t i=0; i < numComp; ++i) {
1420     o[INDEX2(i,numComp,0)] = f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
1421     o[INDEX2(i,numComp,1)] = f_10[i]*tmp0_1 + f_11[i]*tmp0_0;
1422     } /* end of component loop i */
1423     } /* end of k1 loop */
1424     } /* end of face 1 */
1425     if (m_faceOffset[2] > -1) {
1426     const double tmp0_1 = 0.78867513459481288225;
1427     const double tmp0_0 = 0.21132486540518711775;
1428     #pragma omp parallel for
1429     for (index_t k0=0; k0 < m_NE0; ++k0) {
1430     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1431     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1432     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1433     for (index_t i=0; i < numComp; ++i) {
1434     o[INDEX2(i,numComp,0)] = f_00[i]*tmp0_1 + f_10[i]*tmp0_0;
1435     o[INDEX2(i,numComp,1)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
1436     } /* end of component loop i */
1437     } /* end of k0 loop */
1438     } /* end of face 2 */
1439     if (m_faceOffset[3] > -1) {
1440     const double tmp0_1 = 0.78867513459481288225;
1441     const double tmp0_0 = 0.21132486540518711775;
1442     #pragma omp parallel for
1443     for (index_t k0=0; k0 < m_NE0; ++k0) {
1444     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1445     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1446     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1447     for (index_t i=0; i < numComp; ++i) {
1448     o[INDEX2(i,numComp,0)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
1449     o[INDEX2(i,numComp,1)] = f_01[i]*tmp0_0 + f_11[i]*tmp0_1;
1450     } /* end of component loop i */
1451     } /* end of k0 loop */
1452     } /* end of face 3 */
1453     /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */
1454     }
1455 caltinay 3702 }
1456    
1457 caltinay 3691 } // end of namespace ripley
1458    

  ViewVC Help
Powered by ViewVC 1.1.26