/[escript]/trunk/ripley/src/Rectangle.cpp
ViewVC logotype

Annotation of /trunk/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3724 - (hide annotations)
Wed Dec 7 07:35:21 2011 UTC (7 years, 10 months ago) by caltinay
Original Path: branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp
File size: 59905 byte(s)
Edits to generated code in Rectangle (renamed constants & removed duplicates,
more nowait optimizations)

1 caltinay 3691
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2011 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     #include <ripley/Rectangle.h>
15     extern "C" {
16     #include "paso/SystemMatrixPattern.h"
17     }
18    
19     #if USE_SILO
20     #include <silo.h>
21     #ifdef ESYS_MPI
22     #include <pmpio.h>
23     #endif
24     #endif
25    
26     #include <iomanip>
27    
28     using namespace std;
29    
30     namespace ripley {
31    
32     Rectangle::Rectangle(int n0, int n1, double l0, double l1, int d0, int d1) :
33     RipleyDomain(2),
34     m_gNE0(n0),
35     m_gNE1(n1),
36     m_l0(l0),
37     m_l1(l1),
38     m_NX(d0),
39     m_NY(d1)
40     {
41     // ensure number of subdivisions is valid and nodes can be distributed
42     // among number of ranks
43     if (m_NX*m_NY != m_mpiInfo->size)
44     throw RipleyException("Invalid number of spatial subdivisions");
45    
46     if (n0%m_NX > 0 || n1%m_NY > 0)
47     throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
48    
49     // local number of elements
50     m_NE0 = n0/m_NX;
51     m_NE1 = n1/m_NY;
52     // local number of nodes (not necessarily owned)
53     m_N0 = m_NE0+1;
54     m_N1 = m_NE1+1;
55     // bottom-left node is at (offset0,offset1) in global mesh
56     m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
57     m_offset1 = m_NE1*(m_mpiInfo->rank/m_NX);
58     populateSampleIds();
59     }
60    
61     Rectangle::~Rectangle()
62     {
63     }
64    
65     string Rectangle::getDescription() const
66     {
67     return "ripley::Rectangle";
68     }
69    
70     bool Rectangle::operator==(const AbstractDomain& other) const
71     {
72     if (dynamic_cast<const Rectangle*>(&other))
73     return this==&other;
74    
75     return false;
76     }
77    
78     void Rectangle::dump(const string& fileName) const
79     {
80     #if USE_SILO
81     string fn(fileName);
82     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
83     fn+=".silo";
84     }
85    
86     const int NUM_SILO_FILES = 1;
87     const char* blockDirFmt = "/block%04d";
88     int driver=DB_HDF5;
89     string siloPath;
90     DBfile* dbfile = NULL;
91    
92     #ifdef ESYS_MPI
93     PMPIO_baton_t* baton = NULL;
94     #endif
95    
96     if (m_mpiInfo->size > 1) {
97     #ifdef ESYS_MPI
98     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
99     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
100     PMPIO_DefaultClose, (void*)&driver);
101     // try the fallback driver in case of error
102     if (!baton && driver != DB_PDB) {
103     driver = DB_PDB;
104     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
105     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
106     PMPIO_DefaultClose, (void*)&driver);
107     }
108     if (baton) {
109     char str[64];
110     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
111     siloPath = str;
112     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
113     }
114     #endif
115     } else {
116     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
117     getDescription().c_str(), driver);
118     // try the fallback driver in case of error
119     if (!dbfile && driver != DB_PDB) {
120     driver = DB_PDB;
121     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
122     getDescription().c_str(), driver);
123     }
124     }
125    
126     if (!dbfile)
127     throw RipleyException("dump: Could not create Silo file");
128    
129     /*
130     if (driver==DB_HDF5) {
131     // gzip level 1 already provides good compression with minimal
132     // performance penalty. Some tests showed that gzip levels >3 performed
133     // rather badly on escript data both in terms of time and space
134     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
135     }
136     */
137    
138     boost::scoped_ptr<double> x(new double[m_N0]);
139     boost::scoped_ptr<double> y(new double[m_N1]);
140     double* coords[2] = { x.get(), y.get() };
141 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
142     pair<double,double> ydy = getFirstCoordAndSpacing(1);
143 caltinay 3691 #pragma omp parallel
144     {
145 caltinay 3722 #pragma omp for nowait
146 caltinay 3691 for (dim_t i0 = 0; i0 < m_N0; i0++) {
147 caltinay 3697 coords[0][i0]=xdx.first+i0*xdx.second;
148 caltinay 3691 }
149 caltinay 3722 #pragma omp for nowait
150 caltinay 3691 for (dim_t i1 = 0; i1 < m_N1; i1++) {
151 caltinay 3697 coords[1][i1]=ydy.first+i1*ydy.second;
152 caltinay 3691 }
153     }
154 caltinay 3697 IndexVector dims = getNumNodesPerDim();
155    
156     // write mesh
157     DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE,
158 caltinay 3691 DB_COLLINEAR, NULL);
159    
160 caltinay 3697 // write node ids
161     DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2,
162     NULL, 0, DB_INT, DB_NODECENT, NULL);
163    
164     // write element ids
165     dims = getNumElementsPerDim();
166     DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
167     &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
168    
169     // rank 0 writes multimesh and multivar
170 caltinay 3691 if (m_mpiInfo->rank == 0) {
171     vector<string> tempstrings;
172 caltinay 3697 vector<char*> names;
173 caltinay 3691 for (dim_t i=0; i<m_mpiInfo->size; i++) {
174     stringstream path;
175     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
176     tempstrings.push_back(path.str());
177 caltinay 3697 names.push_back((char*)tempstrings.back().c_str());
178 caltinay 3691 }
179 caltinay 3697 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
180 caltinay 3691 DBSetDir(dbfile, "/");
181 caltinay 3697 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
182     &types[0], NULL);
183     tempstrings.clear();
184     names.clear();
185     for (dim_t i=0; i<m_mpiInfo->size; i++) {
186     stringstream path;
187     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
188     tempstrings.push_back(path.str());
189     names.push_back((char*)tempstrings.back().c_str());
190     }
191     types.assign(m_mpiInfo->size, DB_QUADVAR);
192     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
193     &types[0], NULL);
194     tempstrings.clear();
195     names.clear();
196     for (dim_t i=0; i<m_mpiInfo->size; i++) {
197     stringstream path;
198     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
199     tempstrings.push_back(path.str());
200     names.push_back((char*)tempstrings.back().c_str());
201     }
202     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
203     &types[0], NULL);
204 caltinay 3691 }
205    
206     if (m_mpiInfo->size > 1) {
207     #ifdef ESYS_MPI
208     PMPIO_HandOffBaton(baton, dbfile);
209     PMPIO_Finish(baton);
210     #endif
211     } else {
212     DBClose(dbfile);
213     }
214    
215     #else // USE_SILO
216     throw RipleyException("dump(): no Silo support");
217     #endif
218     }
219    
220 caltinay 3697 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
221 caltinay 3691 {
222 caltinay 3697 switch (fsType) {
223 caltinay 3691 case Nodes:
224     return &m_nodeId[0];
225     case Elements:
226     return &m_elementId[0];
227     case FaceElements:
228     return &m_faceId[0];
229     default:
230     break;
231     }
232    
233     stringstream msg;
234     msg << "borrowSampleReferenceIDs() not implemented for function space type "
235 caltinay 3701 << functionSpaceTypeAsString(fsType);
236 caltinay 3691 throw RipleyException(msg.str());
237     }
238    
239     bool Rectangle::ownSample(int fsCode, index_t id) const
240     {
241     #ifdef ESYS_MPI
242 caltinay 3702 if (fsCode != ReducedNodes) {
243 caltinay 3699 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
244     const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
245 caltinay 3691 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
246 caltinay 3702 } else {
247     stringstream msg;
248     msg << "ownSample() not implemented for "
249     << functionSpaceTypeAsString(fsCode);
250     throw RipleyException(msg.str());
251     }
252 caltinay 3691 #else
253     return true;
254     #endif
255     }
256    
257 caltinay 3702 void Rectangle::setToGradient(escript::Data& out, const escript::Data& cIn) const
258 caltinay 3691 {
259 caltinay 3702 escript::Data& in = *const_cast<escript::Data*>(&cIn);
260 caltinay 3701 const dim_t numComp = in.getDataPointSize();
261 caltinay 3702 const double h0 = m_l0/m_gNE0;
262     const double h1 = m_l1/m_gNE1;
263 caltinay 3724 const double cx0 = -1./h0;
264     const double cx1 = -.78867513459481288225/h0;
265     const double cx2 = -.5/h0;
266     const double cx3 = -.21132486540518711775/h0;
267     const double cx4 = .21132486540518711775/h0;
268     const double cx5 = .5/h0;
269     const double cx6 = .78867513459481288225/h0;
270     const double cx7 = 1./h0;
271     const double cy0 = -1./h1;
272     const double cy1 = -.78867513459481288225/h1;
273     const double cy2 = -.5/h1;
274     const double cy3 = -.21132486540518711775/h1;
275     const double cy4 = .21132486540518711775/h1;
276     const double cy5 = .5/h1;
277     const double cy6 = .78867513459481288225/h1;
278     const double cy7 = 1./h1;
279    
280 caltinay 3702 if (out.getFunctionSpace().getTypeCode() == Elements) {
281 caltinay 3724 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
282 caltinay 3697 #pragma omp parallel for
283 caltinay 3707 for (index_t k1=0; k1 < m_NE1; ++k1) {
284     for (index_t k0=0; k0 < m_NE0; ++k0) {
285 caltinay 3702 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
286     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
287     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
288     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
289     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
290     for (index_t i=0; i < numComp; ++i) {
291 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
292     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
293     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
294     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
295     o[INDEX3(i,0,2,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
296     o[INDEX3(i,1,2,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
297     o[INDEX3(i,0,3,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
298     o[INDEX3(i,1,3,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
299 caltinay 3702 } /* end of component loop i */
300     } /* end of k0 loop */
301     } /* end of k1 loop */
302     /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
303 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
304 caltinay 3724 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
305 caltinay 3711 #pragma omp parallel for
306     for (index_t k1=0; k1 < m_NE1; ++k1) {
307     for (index_t k0=0; k0 < m_NE0; ++k0) {
308     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
309     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
310     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
311     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
312     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
313     for (index_t i=0; i < numComp; ++i) {
314 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
315     o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]);
316 caltinay 3711 } /* end of component loop i */
317     } /* end of k0 loop */
318     } /* end of k1 loop */
319     /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
320 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
321 caltinay 3722 #pragma omp parallel
322     {
323 caltinay 3724 /*** GENERATOR SNIP_GRAD_FACES TOP */
324 caltinay 3722 if (m_faceOffset[0] > -1) {
325     #pragma omp for nowait
326     for (index_t k1=0; k1 < m_NE1; ++k1) {
327     const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
328     const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
329     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
330     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
331     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
332     for (index_t i=0; i < numComp; ++i) {
333 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
334     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
335     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
336     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
337 caltinay 3722 } /* end of component loop i */
338     } /* end of k1 loop */
339     } /* end of face 0 */
340     if (m_faceOffset[1] > -1) {
341     #pragma omp for nowait
342     for (index_t k1=0; k1 < m_NE1; ++k1) {
343     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
344     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
345     const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
346     const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
347     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
348     for (index_t i=0; i < numComp; ++i) {
349 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
350     o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
351     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
352     o[INDEX3(i,1,1,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
353 caltinay 3722 } /* end of component loop i */
354     } /* end of k1 loop */
355     } /* end of face 1 */
356     if (m_faceOffset[2] > -1) {
357     #pragma omp for nowait
358     for (index_t k0=0; k0 < m_NE0; ++k0) {
359     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
360     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
361     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
362     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
363     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
364     for (index_t i=0; i < numComp; ++i) {
365 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
366     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
367     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
368     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
369 caltinay 3722 } /* end of component loop i */
370     } /* end of k0 loop */
371     } /* end of face 2 */
372     if (m_faceOffset[3] > -1) {
373     #pragma omp for nowait
374     for (index_t k0=0; k0 < m_NE0; ++k0) {
375     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
376     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
377     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
378     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
379     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
380     for (index_t i=0; i < numComp; ++i) {
381 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
382     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
383     o[INDEX3(i,0,1,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
384     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
385 caltinay 3722 } /* end of component loop i */
386     } /* end of k0 loop */
387     } /* end of face 3 */
388     /* GENERATOR SNIP_GRAD_FACES BOTTOM */
389     } // end of parallel section
390 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
391 caltinay 3722 #pragma omp parallel
392     {
393 caltinay 3724 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
394 caltinay 3722 if (m_faceOffset[0] > -1) {
395     #pragma omp for nowait
396     for (index_t k1=0; k1 < m_NE1; ++k1) {
397     const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
398     const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
399     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
400     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
401     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
402     for (index_t i=0; i < numComp; ++i) {
403 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
404     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
405 caltinay 3722 } /* end of component loop i */
406     } /* end of k1 loop */
407     } /* end of face 0 */
408     if (m_faceOffset[1] > -1) {
409     #pragma omp for nowait
410     for (index_t k1=0; k1 < m_NE1; ++k1) {
411     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
412     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
413     const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
414     const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
415     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
416     for (index_t i=0; i < numComp; ++i) {
417 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
418     o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
419 caltinay 3722 } /* end of component loop i */
420     } /* end of k1 loop */
421     } /* end of face 1 */
422     if (m_faceOffset[2] > -1) {
423     #pragma omp for nowait
424     for (index_t k0=0; k0 < m_NE0; ++k0) {
425     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
426     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
427     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
428     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
429     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
430     for (index_t i=0; i < numComp; ++i) {
431 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
432     o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]);
433 caltinay 3722 } /* end of component loop i */
434     } /* end of k0 loop */
435     } /* end of face 2 */
436     if (m_faceOffset[3] > -1) {
437     #pragma omp for nowait
438     for (index_t k0=0; k0 < m_NE0; ++k0) {
439     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
440     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
441     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
442     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
443     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
444     for (index_t i=0; i < numComp; ++i) {
445 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
446     o[INDEX3(i,1,0,numComp,2)] = cy5*(f_01[i] + f_11[i]) + cy2*(f_00[i] + f_10[i]);
447 caltinay 3722 } /* end of component loop i */
448     } /* end of k0 loop */
449     } /* end of face 3 */
450     /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
451     } // end of parallel section
452 caltinay 3702 } else {
453 caltinay 3707 stringstream msg;
454     msg << "setToGradient() not implemented for "
455     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
456     throw RipleyException(msg.str());
457 caltinay 3702 }
458 caltinay 3701 }
459 caltinay 3697
460 caltinay 3713 void Rectangle::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
461     {
462     escript::Data& in = *const_cast<escript::Data*>(&arg);
463     const dim_t numComp = in.getDataPointSize();
464     const double h0 = m_l0/m_gNE0;
465     const double h1 = m_l1/m_gNE1;
466     if (arg.getFunctionSpace().getTypeCode() == Elements) {
467     const double w_0 = h0*h1/4.;
468     #pragma omp parallel
469     {
470     vector<double> int_local(numComp, 0);
471 caltinay 3722 #pragma omp for nowait
472 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
473     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
474     const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
475     for (index_t i=0; i < numComp; ++i) {
476     const register double f_0 = f[INDEX2(i,0,numComp)];
477     const register double f_1 = f[INDEX2(i,1,numComp)];
478     const register double f_2 = f[INDEX2(i,2,numComp)];
479     const register double f_3 = f[INDEX2(i,3,numComp)];
480     int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
481     } /* end of component loop i */
482     } /* end of k0 loop */
483     } /* end of k1 loop */
484    
485     #pragma omp critical
486     for (index_t i=0; i<numComp; i++)
487     integrals[i]+=int_local[i];
488 caltinay 3722 } // end of parallel section
489 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
490     const double w_0 = h0*h1;
491     #pragma omp parallel
492     {
493     vector<double> int_local(numComp, 0);
494 caltinay 3722 #pragma omp for nowait
495 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
496     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
497     const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
498     for (index_t i=0; i < numComp; ++i) {
499     int_local[i]+=f[i]*w_0;
500     } /* end of component loop i */
501     } /* end of k0 loop */
502     } /* end of k1 loop */
503    
504     #pragma omp critical
505     for (index_t i=0; i<numComp; i++)
506     integrals[i]+=int_local[i];
507 caltinay 3722 } // end of parallel section
508 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
509     const double w_0 = h0/2.;
510     const double w_1 = h1/2.;
511     #pragma omp parallel
512     {
513     vector<double> int_local(numComp, 0);
514     if (m_faceOffset[0] > -1) {
515 caltinay 3722 #pragma omp for nowait
516 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
517     const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
518     for (index_t i=0; i < numComp; ++i) {
519     const register double f_0 = f[INDEX2(i,0,numComp)];
520     const register double f_1 = f[INDEX2(i,1,numComp)];
521     int_local[i]+=(f_0+f_1)*w_1;
522     } /* end of component loop i */
523     } /* end of k1 loop */
524     }
525    
526     if (m_faceOffset[1] > -1) {
527 caltinay 3722 #pragma omp for nowait
528 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
529     const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
530     for (index_t i=0; i < numComp; ++i) {
531     const register double f_0 = f[INDEX2(i,0,numComp)];
532     const register double f_1 = f[INDEX2(i,1,numComp)];
533     int_local[i]+=(f_0+f_1)*w_1;
534     } /* end of component loop i */
535     } /* end of k1 loop */
536     }
537    
538     if (m_faceOffset[2] > -1) {
539 caltinay 3722 #pragma omp for nowait
540 caltinay 3713 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
541     const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
542     for (index_t i=0; i < numComp; ++i) {
543     const register double f_0 = f[INDEX2(i,0,numComp)];
544     const register double f_1 = f[INDEX2(i,1,numComp)];
545     int_local[i]+=(f_0+f_1)*w_0;
546     } /* end of component loop i */
547     } /* end of k0 loop */
548     }
549    
550     if (m_faceOffset[3] > -1) {
551 caltinay 3722 #pragma omp for nowait
552 caltinay 3713 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
553     const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
554     for (index_t i=0; i < numComp; ++i) {
555     const register double f_0 = f[INDEX2(i,0,numComp)];
556     const register double f_1 = f[INDEX2(i,1,numComp)];
557     int_local[i]+=(f_0+f_1)*w_0;
558     } /* end of component loop i */
559     } /* end of k0 loop */
560     }
561    
562     #pragma omp critical
563     for (index_t i=0; i<numComp; i++)
564     integrals[i]+=int_local[i];
565 caltinay 3722 } // end of parallel section
566 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
567     #pragma omp parallel
568     {
569     vector<double> int_local(numComp, 0);
570     if (m_faceOffset[0] > -1) {
571 caltinay 3722 #pragma omp for nowait
572 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
573     const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
574     for (index_t i=0; i < numComp; ++i) {
575     int_local[i]+=f[i]*h1;
576     } /* end of component loop i */
577     } /* end of k1 loop */
578     }
579    
580     if (m_faceOffset[1] > -1) {
581 caltinay 3722 #pragma omp for nowait
582 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
583     const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
584     for (index_t i=0; i < numComp; ++i) {
585     int_local[i]+=f[i]*h1;
586     } /* end of component loop i */
587     } /* end of k1 loop */
588     }
589    
590     if (m_faceOffset[2] > -1) {
591 caltinay 3722 #pragma omp for nowait
592 caltinay 3713 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
593     const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
594     for (index_t i=0; i < numComp; ++i) {
595     int_local[i]+=f[i]*h0;
596     } /* end of component loop i */
597     } /* end of k0 loop */
598     }
599    
600     if (m_faceOffset[3] > -1) {
601 caltinay 3722 #pragma omp for nowait
602 caltinay 3713 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
603     const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
604     for (index_t i=0; i < numComp; ++i) {
605     int_local[i]+=f[i]*h0;
606     } /* end of component loop i */
607     } /* end of k0 loop */
608     }
609    
610     #pragma omp critical
611     for (index_t i=0; i<numComp; i++)
612     integrals[i]+=int_local[i];
613 caltinay 3722 } // end of parallel section
614 caltinay 3713 } else {
615     stringstream msg;
616     msg << "setToIntegrals() not implemented for "
617     << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
618     throw RipleyException(msg.str());
619     }
620     }
621    
622 caltinay 3722 void Rectangle::setToNormal(escript::Data& out) const
623     {
624     if (out.getFunctionSpace().getTypeCode() == FaceElements) {
625     #pragma omp parallel
626     {
627     if (m_faceOffset[0] > -1) {
628     #pragma omp for nowait
629     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
630     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
631     // set vector at two quadrature points
632     *o++ = -1.;
633     *o++ = 0.;
634     *o++ = -1.;
635     *o = 0.;
636     }
637     }
638    
639     if (m_faceOffset[1] > -1) {
640     #pragma omp for nowait
641     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
642     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
643     // set vector at two quadrature points
644     *o++ = 1.;
645     *o++ = 0.;
646     *o++ = 1.;
647     *o = 0.;
648     }
649     }
650    
651     if (m_faceOffset[2] > -1) {
652     #pragma omp for nowait
653     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
654     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
655     // set vector at two quadrature points
656     *o++ = 0.;
657     *o++ = -1.;
658     *o++ = 0.;
659     *o = -1.;
660     }
661     }
662    
663     if (m_faceOffset[3] > -1) {
664     #pragma omp for nowait
665     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
666     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
667     // set vector at two quadrature points
668     *o++ = 0.;
669     *o++ = 1.;
670     *o++ = 0.;
671     *o = 1.;
672     }
673     }
674     } // end of parallel section
675     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
676     #pragma omp parallel
677     {
678     if (m_faceOffset[0] > -1) {
679     #pragma omp for nowait
680     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
681     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
682     *o++ = -1.;
683     *o = 0.;
684     }
685     }
686    
687     if (m_faceOffset[1] > -1) {
688     #pragma omp for nowait
689     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
690     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
691     *o++ = 1.;
692     *o = 0.;
693     }
694     }
695    
696     if (m_faceOffset[2] > -1) {
697     #pragma omp for nowait
698     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
699     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
700     *o++ = 0.;
701     *o = -1.;
702     }
703     }
704    
705     if (m_faceOffset[3] > -1) {
706     #pragma omp for nowait
707     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
708     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
709     *o++ = 0.;
710     *o = 1.;
711     }
712     }
713     } // end of parallel section
714    
715     } else {
716     stringstream msg;
717     msg << "setToNormal() not implemented for "
718     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
719     throw RipleyException(msg.str());
720     }
721     }
722    
723 caltinay 3701 void Rectangle::addPDEToSystem(escript::AbstractSystemMatrix& mat,
724     escript::Data& rhs, const escript::Data& A, const escript::Data& B,
725     const escript::Data& C, const escript::Data& D,
726     const escript::Data& X, const escript::Data& Y,
727     const escript::Data& d, const escript::Data& y,
728     const escript::Data& d_contact, const escript::Data& y_contact,
729     const escript::Data& d_dirac, const escript::Data& y_dirac) const
730     {
731     throw RipleyException("addPDEToSystem() not implemented");
732     }
733    
734 caltinay 3691 Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder,
735     bool reducedColOrder) const
736     {
737     if (reducedRowOrder || reducedColOrder)
738     throw RipleyException("getPattern() not implemented for reduced order");
739    
740 caltinay 3699 // connector
741     RankVector neighbour;
742     IndexVector offsetInShared(1,0);
743     IndexVector sendShared, recvShared;
744     const IndexVector faces=getNumFacesPerBoundary();
745     const index_t left = (m_offset0==0 ? 0 : 1);
746     const index_t bottom = (m_offset1==0 ? 0 : 1);
747     // corner node from bottom-left
748     if (faces[0] == 0 && faces[2] == 0) {
749     neighbour.push_back(m_mpiInfo->rank-m_NX-1);
750     offsetInShared.push_back(offsetInShared.back()+1);
751     sendShared.push_back(m_nodeId[m_N0+left]);
752     recvShared.push_back(m_nodeId[0]);
753     }
754     // bottom edge
755     if (faces[2] == 0) {
756     neighbour.push_back(m_mpiInfo->rank-m_NX);
757     offsetInShared.push_back(offsetInShared.back()+m_N0-left);
758     for (dim_t i=left; i<m_N0; i++) {
759     // easy case, we know the neighbour id's
760     sendShared.push_back(m_nodeId[m_N0+i]);
761     recvShared.push_back(m_nodeId[i]);
762     }
763     }
764     // corner node from bottom-right
765     if (faces[1] == 0 && faces[2] == 0) {
766     neighbour.push_back(m_mpiInfo->rank-m_NX+1);
767     const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
768     const index_t N1=(neighbour.back()/m_NX == 0 ? m_N1 : m_N1-1);
769     const index_t first=m_nodeDistribution[neighbour.back()];
770     offsetInShared.push_back(offsetInShared.back()+1);
771     sendShared.push_back(m_nodeId[(bottom+1)*m_N0-1]);
772     recvShared.push_back(first+N0*(N1-1));
773     }
774     // left edge
775     if (faces[0] == 0) {
776     neighbour.push_back(m_mpiInfo->rank-1);
777     offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
778     for (dim_t i=bottom; i<m_N1; i++) {
779     // easy case, we know the neighbour id's
780     sendShared.push_back(m_nodeId[i*m_N0+1]);
781     recvShared.push_back(m_nodeId[i*m_N0]);
782     }
783     }
784     // right edge
785     if (faces[1] == 0) {
786     neighbour.push_back(m_mpiInfo->rank+1);
787     const index_t rightN0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
788     const index_t first=m_nodeDistribution[neighbour.back()];
789     offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
790     for (dim_t i=bottom; i<m_N1; i++) {
791     sendShared.push_back(m_nodeId[(i+1)*m_N0-1]);
792     recvShared.push_back(first+rightN0*(i-bottom));
793     }
794     }
795     // corner node from top-left
796     if (faces[0] == 0 && faces[3] == 0) {
797     neighbour.push_back(m_mpiInfo->rank+m_NX-1);
798     const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
799     const index_t first=m_nodeDistribution[neighbour.back()];
800     offsetInShared.push_back(offsetInShared.back()+1);
801     sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+left]);
802     recvShared.push_back(first+N0-1);
803     }
804     // top edge
805     if (faces[3] == 0) {
806     neighbour.push_back(m_mpiInfo->rank+m_NX);
807     const index_t first=m_nodeDistribution[neighbour.back()];
808     offsetInShared.push_back(offsetInShared.back()+m_N0-left);
809     for (dim_t i=left; i<m_N0; i++) {
810     sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+i]);
811     recvShared.push_back(first+i-left);
812     }
813     }
814     // corner node from top-right
815     if (faces[1] == 0 && faces[3] == 0) {
816     neighbour.push_back(m_mpiInfo->rank+m_NX+1);
817     const index_t first=m_nodeDistribution[neighbour.back()];
818     offsetInShared.push_back(offsetInShared.back()+1);
819     sendShared.push_back(m_nodeId[m_N0*m_N1-1]);
820     recvShared.push_back(first);
821     }
822     const int numDOF=m_nodeDistribution[m_mpiInfo->rank+1]-m_nodeDistribution[m_mpiInfo->rank];
823 caltinay 3702 /*
824 caltinay 3699 cout << "--- rcv_shcomp ---" << endl;
825     cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
826     for (size_t i=0; i<neighbour.size(); i++) {
827     cout << "neighbor[" << i << "]=" << neighbour[i]
828     << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
829     }
830     for (size_t i=0; i<recvShared.size(); i++) {
831     cout << "shared[" << i << "]=" << recvShared[i] << endl;
832     }
833     cout << "--- snd_shcomp ---" << endl;
834     for (size_t i=0; i<sendShared.size(); i++) {
835     cout << "shared[" << i << "]=" << sendShared[i] << endl;
836     }
837 caltinay 3702 */
838 caltinay 3691
839     Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
840 caltinay 3699 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
841 caltinay 3697 &offsetInShared[0], 1, 0, m_mpiInfo);
842 caltinay 3691 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
843 caltinay 3699 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
844 caltinay 3697 &offsetInShared[0], 1, 0, m_mpiInfo);
845 caltinay 3691 Paso_Connector* connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
846 caltinay 3699 Paso_SharedComponents_free(snd_shcomp);
847     Paso_SharedComponents_free(rcv_shcomp);
848 caltinay 3691
849     // create patterns
850 caltinay 3699 dim_t M, N;
851     IndexVector ptr(1,0);
852     IndexVector index;
853    
854     // main pattern
855     for (index_t i=0; i<numDOF; i++) {
856     // always add the node itself
857     index.push_back(i);
858     int num=insertNeighbours(index, i);
859     ptr.push_back(ptr.back()+num+1);
860     }
861     M=N=ptr.size()-1;
862     // paso will manage the memory
863     index_t* indexC = MEMALLOC(index.size(),index_t);
864     index_t* ptrC = MEMALLOC(ptr.size(), index_t);
865     copy(index.begin(), index.end(), indexC);
866     copy(ptr.begin(), ptr.end(), ptrC);
867 caltinay 3691 Paso_Pattern *mainPattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
868 caltinay 3699 M, N, ptrC, indexC);
869 caltinay 3691
870 caltinay 3702 /*
871 caltinay 3699 cout << "--- main_pattern ---" << endl;
872     cout << "M=" << M << ", N=" << N << endl;
873     for (size_t i=0; i<ptr.size(); i++) {
874     cout << "ptr[" << i << "]=" << ptr[i] << endl;
875     }
876     for (size_t i=0; i<index.size(); i++) {
877     cout << "index[" << i << "]=" << index[i] << endl;
878     }
879 caltinay 3702 */
880 caltinay 3699
881     ptr.clear();
882     index.clear();
883    
884     // column & row couple patterns
885     Paso_Pattern *colCouplePattern, *rowCouplePattern;
886     generateCouplePatterns(&colCouplePattern, &rowCouplePattern);
887    
888     // allocate paso distribution
889     Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
890     const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
891    
892 caltinay 3691 Paso_SystemMatrixPattern* pattern = Paso_SystemMatrixPattern_alloc(
893     MATRIX_FORMAT_DEFAULT, distribution, distribution,
894     mainPattern, colCouplePattern, rowCouplePattern,
895     connector, connector);
896     Paso_Pattern_free(mainPattern);
897     Paso_Pattern_free(colCouplePattern);
898     Paso_Pattern_free(rowCouplePattern);
899     Paso_Distribution_free(distribution);
900 caltinay 3697 return pattern;
901 caltinay 3691 }
902    
903     void Rectangle::Print_Mesh_Info(const bool full) const
904     {
905     RipleyDomain::Print_Mesh_Info(full);
906     if (full) {
907     cout << " Id Coordinates" << endl;
908     cout.precision(15);
909     cout.setf(ios::scientific, ios::floatfield);
910 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
911     pair<double,double> ydy = getFirstCoordAndSpacing(1);
912 caltinay 3691 for (index_t i=0; i < getNumNodes(); i++) {
913     cout << " " << setw(5) << m_nodeId[i]
914 caltinay 3697 << " " << xdx.first+(i%m_N0)*xdx.second
915     << " " << ydy.first+(i/m_N0)*ydy.second << endl;
916 caltinay 3691 }
917     }
918     }
919    
920 caltinay 3697 IndexVector Rectangle::getNumNodesPerDim() const
921     {
922     IndexVector ret;
923     ret.push_back(m_N0);
924     ret.push_back(m_N1);
925     return ret;
926     }
927    
928     IndexVector Rectangle::getNumElementsPerDim() const
929     {
930     IndexVector ret;
931     ret.push_back(m_NE0);
932     ret.push_back(m_NE1);
933     return ret;
934     }
935    
936     IndexVector Rectangle::getNumFacesPerBoundary() const
937     {
938     IndexVector ret(4, 0);
939     //left
940     if (m_offset0==0)
941     ret[0]=m_NE1;
942     //right
943     if (m_mpiInfo->rank%m_NX==m_NX-1)
944     ret[1]=m_NE1;
945     //bottom
946     if (m_offset1==0)
947     ret[2]=m_NE0;
948     //top
949     if (m_mpiInfo->rank/m_NX==m_NY-1)
950     ret[3]=m_NE0;
951     return ret;
952     }
953    
954     pair<double,double> Rectangle::getFirstCoordAndSpacing(dim_t dim) const
955     {
956     if (dim==0) {
957     return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
958     } else if (dim==1) {
959     return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
960     }
961     throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
962     }
963    
964 caltinay 3691 //protected
965     dim_t Rectangle::getNumFaceElements() const
966     {
967 caltinay 3699 const IndexVector faces = getNumFacesPerBoundary();
968 caltinay 3691 dim_t n=0;
969 caltinay 3699 for (size_t i=0; i<faces.size(); i++)
970     n+=faces[i];
971 caltinay 3691 return n;
972     }
973    
974     //protected
975     void Rectangle::assembleCoordinates(escript::Data& arg) const
976     {
977     escriptDataC x = arg.getDataC();
978     int numDim = m_numDim;
979     if (!isDataPointShapeEqual(&x, 1, &numDim))
980     throw RipleyException("setToX: Invalid Data object shape");
981     if (!numSamplesEqual(&x, 1, getNumNodes()))
982     throw RipleyException("setToX: Illegal number of samples in Data object");
983    
984 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
985     pair<double,double> ydy = getFirstCoordAndSpacing(1);
986 caltinay 3691 arg.requireWrite();
987     #pragma omp parallel for
988     for (dim_t i1 = 0; i1 < m_N1; i1++) {
989     for (dim_t i0 = 0; i0 < m_N0; i0++) {
990     double* point = arg.getSampleDataRW(i0+m_N0*i1);
991 caltinay 3697 point[0] = xdx.first+i0*xdx.second;
992     point[1] = ydy.first+i1*ydy.second;
993 caltinay 3691 }
994     }
995     }
996    
997     //private
998     void Rectangle::populateSampleIds()
999     {
1000 caltinay 3697 // identifiers are ordered from left to right, bottom to top on each rank,
1001     // except for the shared nodes which are owned by the rank below / to the
1002     // left of the current rank
1003    
1004     // build node distribution vector first.
1005     // m_nodeDistribution[i] is the first node id on rank i, that is
1006     // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1007     m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1008     m_nodeDistribution[1]=getNumNodes();
1009     for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
1010     const index_t x=k%m_NX;
1011     const index_t y=k/m_NX;
1012     index_t numNodes=getNumNodes();
1013     if (x>0)
1014     numNodes-=m_N1;
1015     if (y>0)
1016     numNodes-=m_N0;
1017     if (x>0 && y>0)
1018     numNodes++; // subtracted corner twice -> fix that
1019     m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
1020     }
1021     m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1022    
1023 caltinay 3691 m_nodeId.resize(getNumNodes());
1024 caltinay 3697
1025     // the bottom row and left column are not owned by this rank so the
1026     // identifiers need to be computed accordingly
1027     const index_t left = (m_offset0==0 ? 0 : 1);
1028     const index_t bottom = (m_offset1==0 ? 0 : 1);
1029     if (left>0) {
1030     const int neighbour=m_mpiInfo->rank-1;
1031     const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1032 caltinay 3691 #pragma omp parallel for
1033 caltinay 3697 for (dim_t i1=bottom; i1<m_N1; i1++) {
1034     m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
1035     + (i1-bottom+1)*leftN0-1;
1036     }
1037     }
1038     if (bottom>0) {
1039     const int neighbour=m_mpiInfo->rank-m_NX;
1040     const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1041     const index_t bottomN1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1042     #pragma omp parallel for
1043     for (dim_t i0=left; i0<m_N0; i0++) {
1044     m_nodeId[i0]=m_nodeDistribution[neighbour]
1045     + (bottomN1-1)*bottomN0 + i0 - left;
1046     }
1047     }
1048     if (left>0 && bottom>0) {
1049     const int neighbour=m_mpiInfo->rank-m_NX-1;
1050 caltinay 3699 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1051     const index_t N1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1052     m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1-1;
1053 caltinay 3697 }
1054    
1055     // the rest of the id's are contiguous
1056     const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
1057     #pragma omp parallel for
1058     for (dim_t i1=bottom; i1<m_N1; i1++) {
1059     for (dim_t i0=left; i0<m_N0; i0++) {
1060     m_nodeId[i0+i1*m_N0] = firstId+i0-left+(i1-bottom)*(m_N0-left);
1061     }
1062     }
1063    
1064 caltinay 3704 // element id's
1065 caltinay 3697 m_elementId.resize(getNumElements());
1066     #pragma omp parallel for
1067     for (dim_t k=0; k<getNumElements(); k++) {
1068     m_elementId[k]=k;
1069     }
1070    
1071 caltinay 3704 // face element id's
1072 caltinay 3697 m_faceId.resize(getNumFaceElements());
1073     #pragma omp parallel for
1074     for (dim_t k=0; k<getNumFaceElements(); k++) {
1075     m_faceId[k]=k;
1076     }
1077 caltinay 3704
1078 caltinay 3722 // generate face offset vector and set face tags
1079 caltinay 3704 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1080 caltinay 3722 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
1081     const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
1082 caltinay 3704 m_faceOffset.assign(facesPerEdge.size(), -1);
1083 caltinay 3722 m_faceTags.clear();
1084 caltinay 3704 index_t offset=0;
1085     for (size_t i=0; i<facesPerEdge.size(); i++) {
1086     if (facesPerEdge[i]>0) {
1087     m_faceOffset[i]=offset;
1088     offset+=facesPerEdge[i];
1089 caltinay 3722 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1090 caltinay 3704 }
1091     }
1092 caltinay 3722 setTagMap("left", LEFT);
1093     setTagMap("right", RIGHT);
1094     setTagMap("bottom", BOTTOM);
1095     setTagMap("top", TOP);
1096     updateTagsInUse(FaceElements);
1097 caltinay 3691 }
1098    
1099 caltinay 3699 //private
1100     int Rectangle::insertNeighbours(IndexVector& index, index_t node) const
1101     {
1102     const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1103     const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1104     const int x=node%myN0;
1105     const int y=node/myN0;
1106     int num=0;
1107     if (y>0) {
1108     if (x>0) {
1109     // bottom-left
1110     index.push_back(node-myN0-1);
1111     num++;
1112     }
1113     // bottom
1114     index.push_back(node-myN0);
1115     num++;
1116     if (x<myN0-1) {
1117     // bottom-right
1118     index.push_back(node-myN0+1);
1119     num++;
1120     }
1121     }
1122     if (x<myN0-1) {
1123     // right
1124     index.push_back(node+1);
1125     num++;
1126     if (y<myN1-1) {
1127     // top-right
1128     index.push_back(node+myN0+1);
1129     num++;
1130     }
1131     }
1132     if (y<myN1-1) {
1133     // top
1134     index.push_back(node+myN0);
1135     num++;
1136     if (x>0) {
1137     // top-left
1138     index.push_back(node+myN0-1);
1139     num++;
1140     }
1141     }
1142     if (x>0) {
1143     // left
1144     index.push_back(node-1);
1145     num++;
1146     }
1147    
1148     return num;
1149     }
1150    
1151     //private
1152     void Rectangle::generateCouplePatterns(Paso_Pattern** colPattern, Paso_Pattern** rowPattern) const
1153     {
1154     IndexVector ptr(1,0);
1155     IndexVector index;
1156     const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1157     const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1158     const IndexVector faces=getNumFacesPerBoundary();
1159    
1160     // bottom edge
1161     dim_t n=0;
1162     if (faces[0] == 0) {
1163     index.push_back(2*(myN0+myN1+1));
1164     index.push_back(2*(myN0+myN1+1)+1);
1165     n+=2;
1166     if (faces[2] == 0) {
1167     index.push_back(0);
1168     index.push_back(1);
1169     index.push_back(2);
1170     n+=3;
1171     }
1172     } else if (faces[2] == 0) {
1173     index.push_back(1);
1174     index.push_back(2);
1175     n+=2;
1176     }
1177     // n=neighbours of bottom-left corner node
1178     ptr.push_back(ptr.back()+n);
1179     n=0;
1180     if (faces[2] == 0) {
1181     for (dim_t i=1; i<myN0-1; i++) {
1182     index.push_back(i);
1183     index.push_back(i+1);
1184     index.push_back(i+2);
1185     ptr.push_back(ptr.back()+3);
1186     }
1187     index.push_back(myN0-1);
1188     index.push_back(myN0);
1189     n+=2;
1190     if (faces[1] == 0) {
1191     index.push_back(myN0+1);
1192     index.push_back(myN0+2);
1193     index.push_back(myN0+3);
1194     n+=3;
1195     }
1196     } else {
1197     for (dim_t i=1; i<myN0-1; i++) {
1198     ptr.push_back(ptr.back());
1199     }
1200     if (faces[1] == 0) {
1201     index.push_back(myN0+2);
1202     index.push_back(myN0+3);
1203     n+=2;
1204     }
1205     }
1206     // n=neighbours of bottom-right corner node
1207     ptr.push_back(ptr.back()+n);
1208    
1209     // 2nd row to 2nd last row
1210     for (dim_t i=1; i<myN1-1; i++) {
1211     // left edge
1212     if (faces[0] == 0) {
1213     index.push_back(2*(myN0+myN1+2)-i);
1214     index.push_back(2*(myN0+myN1+2)-i-1);
1215     index.push_back(2*(myN0+myN1+2)-i-2);
1216     ptr.push_back(ptr.back()+3);
1217     } else {
1218     ptr.push_back(ptr.back());
1219     }
1220     for (dim_t j=1; j<myN0-1; j++) {
1221     ptr.push_back(ptr.back());
1222     }
1223     // right edge
1224     if (faces[1] == 0) {
1225     index.push_back(myN0+i+1);
1226     index.push_back(myN0+i+2);
1227     index.push_back(myN0+i+3);
1228     ptr.push_back(ptr.back()+3);
1229     } else {
1230     ptr.push_back(ptr.back());
1231     }
1232     }
1233    
1234     // top edge
1235     n=0;
1236     if (faces[0] == 0) {
1237     index.push_back(2*myN0+myN1+5);
1238     index.push_back(2*myN0+myN1+4);
1239     n+=2;
1240     if (faces[3] == 0) {
1241     index.push_back(2*myN0+myN1+3);
1242     index.push_back(2*myN0+myN1+2);
1243     index.push_back(2*myN0+myN1+1);
1244     n+=3;
1245     }
1246     } else if (faces[3] == 0) {
1247     index.push_back(2*myN0+myN1+2);
1248     index.push_back(2*myN0+myN1+1);
1249     n+=2;
1250     }
1251     // n=neighbours of top-left corner node
1252     ptr.push_back(ptr.back()+n);
1253     n=0;
1254     if (faces[3] == 0) {
1255     for (dim_t i=1; i<myN0-1; i++) {
1256     index.push_back(2*myN0+myN1+i+1);
1257     index.push_back(2*myN0+myN1+i);
1258     index.push_back(2*myN0+myN1+i-1);
1259     ptr.push_back(ptr.back()+3);
1260     }
1261     index.push_back(myN0+myN1+4);
1262     index.push_back(myN0+myN1+3);
1263     n+=2;
1264     if (faces[1] == 0) {
1265     index.push_back(myN0+myN1+2);
1266     index.push_back(myN0+myN1+1);
1267     index.push_back(myN0+myN1);
1268     n+=3;
1269     }
1270     } else {
1271     for (dim_t i=1; i<myN0-1; i++) {
1272     ptr.push_back(ptr.back());
1273     }
1274     if (faces[1] == 0) {
1275     index.push_back(myN0+myN1+1);
1276     index.push_back(myN0+myN1);
1277     n+=2;
1278     }
1279     }
1280     // n=neighbours of top-right corner node
1281     ptr.push_back(ptr.back()+n);
1282    
1283     dim_t M=ptr.size()-1;
1284     map<index_t,index_t> idMap;
1285     dim_t N=0;
1286     for (IndexVector::iterator it=index.begin(); it!=index.end(); it++) {
1287     if (idMap.find(*it)==idMap.end()) {
1288     idMap[*it]=N;
1289     *it=N++;
1290     } else {
1291     *it=idMap[*it];
1292     }
1293     }
1294    
1295 caltinay 3702 /*
1296 caltinay 3699 cout << "--- colCouple_pattern ---" << endl;
1297     cout << "M=" << M << ", N=" << N << endl;
1298     for (size_t i=0; i<ptr.size(); i++) {
1299     cout << "ptr[" << i << "]=" << ptr[i] << endl;
1300     }
1301     for (size_t i=0; i<index.size(); i++) {
1302     cout << "index[" << i << "]=" << index[i] << endl;
1303     }
1304 caltinay 3702 */
1305 caltinay 3699
1306     // now build the row couple pattern
1307     IndexVector ptr2(1,0);
1308     IndexVector index2;
1309     for (dim_t id=0; id<N; id++) {
1310     n=0;
1311     for (dim_t i=0; i<M; i++) {
1312     for (dim_t j=ptr[i]; j<ptr[i+1]; j++) {
1313     if (index[j]==id) {
1314     index2.push_back(i);
1315     n++;
1316     break;
1317     }
1318     }
1319     }
1320     ptr2.push_back(ptr2.back()+n);
1321     }
1322    
1323 caltinay 3702 /*
1324 caltinay 3699 cout << "--- rowCouple_pattern ---" << endl;
1325     cout << "M=" << N << ", N=" << M << endl;
1326     for (size_t i=0; i<ptr2.size(); i++) {
1327     cout << "ptr[" << i << "]=" << ptr2[i] << endl;
1328     }
1329     for (size_t i=0; i<index2.size(); i++) {
1330     cout << "index[" << i << "]=" << index2[i] << endl;
1331     }
1332 caltinay 3702 */
1333 caltinay 3699
1334     // paso will manage the memory
1335     index_t* indexC = MEMALLOC(index.size(), index_t);
1336     index_t* ptrC = MEMALLOC(ptr.size(), index_t);
1337     copy(index.begin(), index.end(), indexC);
1338     copy(ptr.begin(), ptr.end(), ptrC);
1339     *colPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, M, N, ptrC, indexC);
1340    
1341     // paso will manage the memory
1342     indexC = MEMALLOC(index2.size(), index_t);
1343     ptrC = MEMALLOC(ptr2.size(), index_t);
1344     copy(index2.begin(), index2.end(), indexC);
1345     copy(ptr2.begin(), ptr2.end(), ptrC);
1346     *rowPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, N, M, ptrC, indexC);
1347     }
1348    
1349 caltinay 3702 //protected
1350 caltinay 3711 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1351     escript::Data& in, bool reduced) const
1352 caltinay 3702 {
1353     const dim_t numComp = in.getDataPointSize();
1354 caltinay 3711 if (reduced) {
1355 caltinay 3724 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1356     const double c0 = .25;
1357 caltinay 3702 #pragma omp parallel for
1358 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
1359     for (index_t k0=0; k0 < m_NE0; ++k0) {
1360     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1361     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1362     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1363     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1364     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1365     for (index_t i=0; i < numComp; ++i) {
1366 caltinay 3724 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
1367 caltinay 3711 } /* end of component loop i */
1368     } /* end of k0 loop */
1369     } /* end of k1 loop */
1370     /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1371     } else {
1372 caltinay 3724 /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1373     const double c0 = .16666666666666666667;
1374     const double c1 = .044658198738520451079;
1375     const double c2 = .62200846792814621559;
1376 caltinay 3711 #pragma omp parallel for
1377     for (index_t k1=0; k1 < m_NE1; ++k1) {
1378     for (index_t k0=0; k0 < m_NE0; ++k0) {
1379     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1380     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1381     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1382     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1383     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1384     for (index_t i=0; i < numComp; ++i) {
1385 caltinay 3724 o[INDEX2(i,numComp,0)] = f_00[i]*c2 + f_11[i]*c1 + c0*(f_01[i] + f_10[i]);
1386     o[INDEX2(i,numComp,1)] = f_01[i]*c1 + f_10[i]*c2 + c0*(f_00[i] + f_11[i]);
1387     o[INDEX2(i,numComp,2)] = f_01[i]*c2 + f_10[i]*c1 + c0*(f_00[i] + f_11[i]);
1388     o[INDEX2(i,numComp,3)] = f_00[i]*c1 + f_11[i]*c2 + c0*(f_01[i] + f_10[i]);
1389 caltinay 3711 } /* end of component loop i */
1390     } /* end of k0 loop */
1391     } /* end of k1 loop */
1392     /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1393     }
1394 caltinay 3702 }
1395    
1396     //protected
1397 caltinay 3711 void Rectangle::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1398     bool reduced) const
1399 caltinay 3702 {
1400 caltinay 3704 const dim_t numComp = in.getDataPointSize();
1401 caltinay 3711 if (reduced) {
1402 caltinay 3724 const double c0 = .5;
1403     #pragma omp parallel
1404     {
1405     /*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1406     if (m_faceOffset[0] > -1) {
1407     #pragma omp for nowait
1408     for (index_t k1=0; k1 < m_NE1; ++k1) {
1409     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1410     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1411     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1412     for (index_t i=0; i < numComp; ++i) {
1413     o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i]);
1414     } /* end of component loop i */
1415     } /* end of k1 loop */
1416     } /* end of face 0 */
1417     if (m_faceOffset[1] > -1) {
1418     #pragma omp for nowait
1419     for (index_t k1=0; k1 < m_NE1; ++k1) {
1420     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1421     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1422     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1423     for (index_t i=0; i < numComp; ++i) {
1424     o[INDEX2(i,numComp,0)] = c0*(f_10[i] + f_11[i]);
1425     } /* end of component loop i */
1426     } /* end of k1 loop */
1427     } /* end of face 1 */
1428     if (m_faceOffset[2] > -1) {
1429     #pragma omp for nowait
1430     for (index_t k0=0; k0 < m_NE0; ++k0) {
1431     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1432     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1433     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1434     for (index_t i=0; i < numComp; ++i) {
1435     o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_10[i]);
1436     } /* end of component loop i */
1437     } /* end of k0 loop */
1438     } /* end of face 2 */
1439     if (m_faceOffset[3] > -1) {
1440     #pragma omp for nowait
1441     for (index_t k0=0; k0 < m_NE0; ++k0) {
1442     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1443     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1444     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1445     for (index_t i=0; i < numComp; ++i) {
1446     o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_11[i]);
1447     } /* end of component loop i */
1448     } /* end of k0 loop */
1449     } /* end of face 3 */
1450     /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */
1451     } // end of parallel section
1452 caltinay 3711 } else {
1453 caltinay 3724 const double c0 = 0.21132486540518711775;
1454     const double c1 = 0.78867513459481288225;
1455     #pragma omp parallel
1456     {
1457     /*** GENERATOR SNIP_INTERPOLATE_FACES TOP */
1458     if (m_faceOffset[0] > -1) {
1459     #pragma omp for nowait
1460     for (index_t k1=0; k1 < m_NE1; ++k1) {
1461     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1462     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1463     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1464     for (index_t i=0; i < numComp; ++i) {
1465     o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_01[i]*c0;
1466     o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_01[i]*c1;
1467     } /* end of component loop i */
1468     } /* end of k1 loop */
1469     } /* end of face 0 */
1470     if (m_faceOffset[1] > -1) {
1471     #pragma omp for nowait
1472     for (index_t k1=0; k1 < m_NE1; ++k1) {
1473     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1474     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1475     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1476     for (index_t i=0; i < numComp; ++i) {
1477     o[INDEX2(i,numComp,0)] = f_10[i]*c1 + f_11[i]*c0;
1478     o[INDEX2(i,numComp,1)] = f_10[i]*c0 + f_11[i]*c1;
1479     } /* end of component loop i */
1480     } /* end of k1 loop */
1481     } /* end of face 1 */
1482     if (m_faceOffset[2] > -1) {
1483     #pragma omp for nowait
1484     for (index_t k0=0; k0 < m_NE0; ++k0) {
1485     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1486     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1487     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1488     for (index_t i=0; i < numComp; ++i) {
1489     o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_10[i]*c0;
1490     o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_10[i]*c1;
1491     } /* end of component loop i */
1492     } /* end of k0 loop */
1493     } /* end of face 2 */
1494     if (m_faceOffset[3] > -1) {
1495     #pragma omp for nowait
1496     for (index_t k0=0; k0 < m_NE0; ++k0) {
1497     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1498     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1499     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1500     for (index_t i=0; i < numComp; ++i) {
1501     o[INDEX2(i,numComp,0)] = f_01[i]*c1 + f_11[i]*c0;
1502     o[INDEX2(i,numComp,1)] = f_01[i]*c0 + f_11[i]*c1;
1503     } /* end of component loop i */
1504     } /* end of k0 loop */
1505     } /* end of face 3 */
1506     /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */
1507     } // end of parallel section
1508 caltinay 3711 }
1509 caltinay 3702 }
1510    
1511 caltinay 3691 } // end of namespace ripley
1512    

  ViewVC Help
Powered by ViewVC 1.1.26