/[escript]/trunk/ripley/src/Rectangle.cpp
ViewVC logotype

Annotation of /trunk/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3733 - (hide annotations)
Fri Dec 9 04:02:56 2011 UTC (7 years, 10 months ago) by caltinay
Original Path: branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp
File size: 60054 byte(s)
escript on ripley tests pass. However, returning 0 for number of point elements
and DOF id's are bogus.

1 caltinay 3691
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2011 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     #include <ripley/Rectangle.h>
15     extern "C" {
16     #include "paso/SystemMatrixPattern.h"
17     }
18    
19     #if USE_SILO
20     #include <silo.h>
21     #ifdef ESYS_MPI
22     #include <pmpio.h>
23     #endif
24     #endif
25    
26     #include <iomanip>
27    
28     using namespace std;
29    
30     namespace ripley {
31    
32     Rectangle::Rectangle(int n0, int n1, double l0, double l1, int d0, int d1) :
33     RipleyDomain(2),
34     m_gNE0(n0),
35     m_gNE1(n1),
36     m_l0(l0),
37     m_l1(l1),
38     m_NX(d0),
39     m_NY(d1)
40     {
41     // ensure number of subdivisions is valid and nodes can be distributed
42     // among number of ranks
43     if (m_NX*m_NY != m_mpiInfo->size)
44     throw RipleyException("Invalid number of spatial subdivisions");
45    
46     if (n0%m_NX > 0 || n1%m_NY > 0)
47     throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
48    
49     // local number of elements
50     m_NE0 = n0/m_NX;
51     m_NE1 = n1/m_NY;
52     // local number of nodes (not necessarily owned)
53     m_N0 = m_NE0+1;
54     m_N1 = m_NE1+1;
55     // bottom-left node is at (offset0,offset1) in global mesh
56     m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
57     m_offset1 = m_NE1*(m_mpiInfo->rank/m_NX);
58     populateSampleIds();
59     }
60    
61     Rectangle::~Rectangle()
62     {
63     }
64    
65     string Rectangle::getDescription() const
66     {
67     return "ripley::Rectangle";
68     }
69    
70     bool Rectangle::operator==(const AbstractDomain& other) const
71     {
72     if (dynamic_cast<const Rectangle*>(&other))
73     return this==&other;
74    
75     return false;
76     }
77    
78     void Rectangle::dump(const string& fileName) const
79     {
80     #if USE_SILO
81     string fn(fileName);
82     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
83     fn+=".silo";
84     }
85    
86     const int NUM_SILO_FILES = 1;
87     const char* blockDirFmt = "/block%04d";
88     int driver=DB_HDF5;
89     string siloPath;
90     DBfile* dbfile = NULL;
91    
92     #ifdef ESYS_MPI
93     PMPIO_baton_t* baton = NULL;
94     #endif
95    
96     if (m_mpiInfo->size > 1) {
97     #ifdef ESYS_MPI
98     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
99     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
100     PMPIO_DefaultClose, (void*)&driver);
101     // try the fallback driver in case of error
102     if (!baton && driver != DB_PDB) {
103     driver = DB_PDB;
104     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
105     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
106     PMPIO_DefaultClose, (void*)&driver);
107     }
108     if (baton) {
109     char str[64];
110     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
111     siloPath = str;
112     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
113     }
114     #endif
115     } else {
116     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
117     getDescription().c_str(), driver);
118     // try the fallback driver in case of error
119     if (!dbfile && driver != DB_PDB) {
120     driver = DB_PDB;
121     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
122     getDescription().c_str(), driver);
123     }
124     }
125    
126     if (!dbfile)
127     throw RipleyException("dump: Could not create Silo file");
128    
129     /*
130     if (driver==DB_HDF5) {
131     // gzip level 1 already provides good compression with minimal
132     // performance penalty. Some tests showed that gzip levels >3 performed
133     // rather badly on escript data both in terms of time and space
134     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
135     }
136     */
137    
138     boost::scoped_ptr<double> x(new double[m_N0]);
139     boost::scoped_ptr<double> y(new double[m_N1]);
140     double* coords[2] = { x.get(), y.get() };
141 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
142     pair<double,double> ydy = getFirstCoordAndSpacing(1);
143 caltinay 3691 #pragma omp parallel
144     {
145 caltinay 3722 #pragma omp for nowait
146 caltinay 3691 for (dim_t i0 = 0; i0 < m_N0; i0++) {
147 caltinay 3697 coords[0][i0]=xdx.first+i0*xdx.second;
148 caltinay 3691 }
149 caltinay 3722 #pragma omp for nowait
150 caltinay 3691 for (dim_t i1 = 0; i1 < m_N1; i1++) {
151 caltinay 3697 coords[1][i1]=ydy.first+i1*ydy.second;
152 caltinay 3691 }
153     }
154 caltinay 3697 IndexVector dims = getNumNodesPerDim();
155    
156     // write mesh
157     DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE,
158 caltinay 3691 DB_COLLINEAR, NULL);
159    
160 caltinay 3697 // write node ids
161     DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2,
162     NULL, 0, DB_INT, DB_NODECENT, NULL);
163    
164     // write element ids
165     dims = getNumElementsPerDim();
166     DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
167     &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
168    
169     // rank 0 writes multimesh and multivar
170 caltinay 3691 if (m_mpiInfo->rank == 0) {
171     vector<string> tempstrings;
172 caltinay 3697 vector<char*> names;
173 caltinay 3691 for (dim_t i=0; i<m_mpiInfo->size; i++) {
174     stringstream path;
175     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
176     tempstrings.push_back(path.str());
177 caltinay 3697 names.push_back((char*)tempstrings.back().c_str());
178 caltinay 3691 }
179 caltinay 3697 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
180 caltinay 3691 DBSetDir(dbfile, "/");
181 caltinay 3697 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
182     &types[0], NULL);
183     tempstrings.clear();
184     names.clear();
185     for (dim_t i=0; i<m_mpiInfo->size; i++) {
186     stringstream path;
187     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
188     tempstrings.push_back(path.str());
189     names.push_back((char*)tempstrings.back().c_str());
190     }
191     types.assign(m_mpiInfo->size, DB_QUADVAR);
192     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
193     &types[0], NULL);
194     tempstrings.clear();
195     names.clear();
196     for (dim_t i=0; i<m_mpiInfo->size; i++) {
197     stringstream path;
198     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
199     tempstrings.push_back(path.str());
200     names.push_back((char*)tempstrings.back().c_str());
201     }
202     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
203     &types[0], NULL);
204 caltinay 3691 }
205    
206     if (m_mpiInfo->size > 1) {
207     #ifdef ESYS_MPI
208     PMPIO_HandOffBaton(baton, dbfile);
209     PMPIO_Finish(baton);
210     #endif
211     } else {
212     DBClose(dbfile);
213     }
214    
215     #else // USE_SILO
216     throw RipleyException("dump(): no Silo support");
217     #endif
218     }
219    
220 caltinay 3697 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
221 caltinay 3691 {
222 caltinay 3697 switch (fsType) {
223 caltinay 3691 case Nodes:
224 caltinay 3733 case DegreesOfFreedom: //FIXME
225     case ReducedDegreesOfFreedom: //FIXME
226 caltinay 3691 return &m_nodeId[0];
227     case Elements:
228 caltinay 3733 case ReducedElements:
229 caltinay 3691 return &m_elementId[0];
230     case FaceElements:
231 caltinay 3733 case ReducedFaceElements:
232 caltinay 3691 return &m_faceId[0];
233     default:
234     break;
235     }
236    
237     stringstream msg;
238     msg << "borrowSampleReferenceIDs() not implemented for function space type "
239 caltinay 3701 << functionSpaceTypeAsString(fsType);
240 caltinay 3691 throw RipleyException(msg.str());
241     }
242    
243     bool Rectangle::ownSample(int fsCode, index_t id) const
244     {
245     #ifdef ESYS_MPI
246 caltinay 3702 if (fsCode != ReducedNodes) {
247 caltinay 3699 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
248     const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
249 caltinay 3691 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
250 caltinay 3702 } else {
251     stringstream msg;
252     msg << "ownSample() not implemented for "
253     << functionSpaceTypeAsString(fsCode);
254     throw RipleyException(msg.str());
255     }
256 caltinay 3691 #else
257     return true;
258     #endif
259     }
260    
261 caltinay 3702 void Rectangle::setToGradient(escript::Data& out, const escript::Data& cIn) const
262 caltinay 3691 {
263 caltinay 3702 escript::Data& in = *const_cast<escript::Data*>(&cIn);
264 caltinay 3701 const dim_t numComp = in.getDataPointSize();
265 caltinay 3702 const double h0 = m_l0/m_gNE0;
266     const double h1 = m_l1/m_gNE1;
267 caltinay 3724 const double cx0 = -1./h0;
268     const double cx1 = -.78867513459481288225/h0;
269     const double cx2 = -.5/h0;
270     const double cx3 = -.21132486540518711775/h0;
271     const double cx4 = .21132486540518711775/h0;
272     const double cx5 = .5/h0;
273     const double cx6 = .78867513459481288225/h0;
274     const double cx7 = 1./h0;
275     const double cy0 = -1./h1;
276     const double cy1 = -.78867513459481288225/h1;
277     const double cy2 = -.5/h1;
278     const double cy3 = -.21132486540518711775/h1;
279     const double cy4 = .21132486540518711775/h1;
280     const double cy5 = .5/h1;
281     const double cy6 = .78867513459481288225/h1;
282     const double cy7 = 1./h1;
283    
284 caltinay 3702 if (out.getFunctionSpace().getTypeCode() == Elements) {
285 caltinay 3724 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
286 caltinay 3697 #pragma omp parallel for
287 caltinay 3707 for (index_t k1=0; k1 < m_NE1; ++k1) {
288     for (index_t k0=0; k0 < m_NE0; ++k0) {
289 caltinay 3702 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
290     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
291     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
292     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
293     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
294     for (index_t i=0; i < numComp; ++i) {
295 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
296     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
297     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
298     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
299     o[INDEX3(i,0,2,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
300     o[INDEX3(i,1,2,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
301     o[INDEX3(i,0,3,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
302     o[INDEX3(i,1,3,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
303 caltinay 3702 } /* end of component loop i */
304     } /* end of k0 loop */
305     } /* end of k1 loop */
306     /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
307 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
308 caltinay 3724 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
309 caltinay 3711 #pragma omp parallel for
310     for (index_t k1=0; k1 < m_NE1; ++k1) {
311     for (index_t k0=0; k0 < m_NE0; ++k0) {
312     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
313     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
314     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
315     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
316     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
317     for (index_t i=0; i < numComp; ++i) {
318 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
319     o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]);
320 caltinay 3711 } /* end of component loop i */
321     } /* end of k0 loop */
322     } /* end of k1 loop */
323     /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
324 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
325 caltinay 3722 #pragma omp parallel
326     {
327 caltinay 3724 /*** GENERATOR SNIP_GRAD_FACES TOP */
328 caltinay 3722 if (m_faceOffset[0] > -1) {
329     #pragma omp for nowait
330     for (index_t k1=0; k1 < m_NE1; ++k1) {
331     const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
332     const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
333     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
334     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
335     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
336     for (index_t i=0; i < numComp; ++i) {
337 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
338     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
339     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
340     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
341 caltinay 3722 } /* end of component loop i */
342     } /* end of k1 loop */
343     } /* end of face 0 */
344     if (m_faceOffset[1] > -1) {
345     #pragma omp for nowait
346     for (index_t k1=0; k1 < m_NE1; ++k1) {
347     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
348     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
349     const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
350     const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
351     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
352     for (index_t i=0; i < numComp; ++i) {
353 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
354     o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
355     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
356     o[INDEX3(i,1,1,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
357 caltinay 3722 } /* end of component loop i */
358     } /* end of k1 loop */
359     } /* end of face 1 */
360     if (m_faceOffset[2] > -1) {
361     #pragma omp for nowait
362     for (index_t k0=0; k0 < m_NE0; ++k0) {
363     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
364     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
365     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
366     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
367     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
368     for (index_t i=0; i < numComp; ++i) {
369 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
370     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
371     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
372     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
373 caltinay 3722 } /* end of component loop i */
374     } /* end of k0 loop */
375     } /* end of face 2 */
376     if (m_faceOffset[3] > -1) {
377     #pragma omp for nowait
378     for (index_t k0=0; k0 < m_NE0; ++k0) {
379     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
380     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
381     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
382     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
383     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
384     for (index_t i=0; i < numComp; ++i) {
385 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
386     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
387     o[INDEX3(i,0,1,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
388     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
389 caltinay 3722 } /* end of component loop i */
390     } /* end of k0 loop */
391     } /* end of face 3 */
392     /* GENERATOR SNIP_GRAD_FACES BOTTOM */
393     } // end of parallel section
394 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
395 caltinay 3722 #pragma omp parallel
396     {
397 caltinay 3724 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
398 caltinay 3722 if (m_faceOffset[0] > -1) {
399     #pragma omp for nowait
400     for (index_t k1=0; k1 < m_NE1; ++k1) {
401     const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
402     const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
403     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
404     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
405     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
406     for (index_t i=0; i < numComp; ++i) {
407 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
408     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
409 caltinay 3722 } /* end of component loop i */
410     } /* end of k1 loop */
411     } /* end of face 0 */
412     if (m_faceOffset[1] > -1) {
413     #pragma omp for nowait
414     for (index_t k1=0; k1 < m_NE1; ++k1) {
415     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
416     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
417     const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
418     const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
419     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
420     for (index_t i=0; i < numComp; ++i) {
421 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
422     o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
423 caltinay 3722 } /* end of component loop i */
424     } /* end of k1 loop */
425     } /* end of face 1 */
426     if (m_faceOffset[2] > -1) {
427     #pragma omp for nowait
428     for (index_t k0=0; k0 < m_NE0; ++k0) {
429     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
430     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
431     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
432     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
433     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
434     for (index_t i=0; i < numComp; ++i) {
435 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
436     o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]);
437 caltinay 3722 } /* end of component loop i */
438     } /* end of k0 loop */
439     } /* end of face 2 */
440     if (m_faceOffset[3] > -1) {
441     #pragma omp for nowait
442     for (index_t k0=0; k0 < m_NE0; ++k0) {
443     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
444     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
445     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
446     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
447     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
448     for (index_t i=0; i < numComp; ++i) {
449 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
450     o[INDEX3(i,1,0,numComp,2)] = cy5*(f_01[i] + f_11[i]) + cy2*(f_00[i] + f_10[i]);
451 caltinay 3722 } /* end of component loop i */
452     } /* end of k0 loop */
453     } /* end of face 3 */
454     /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
455     } // end of parallel section
456 caltinay 3702 } else {
457 caltinay 3707 stringstream msg;
458     msg << "setToGradient() not implemented for "
459     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
460     throw RipleyException(msg.str());
461 caltinay 3702 }
462 caltinay 3701 }
463 caltinay 3697
464 caltinay 3713 void Rectangle::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
465     {
466     escript::Data& in = *const_cast<escript::Data*>(&arg);
467     const dim_t numComp = in.getDataPointSize();
468     const double h0 = m_l0/m_gNE0;
469     const double h1 = m_l1/m_gNE1;
470     if (arg.getFunctionSpace().getTypeCode() == Elements) {
471     const double w_0 = h0*h1/4.;
472     #pragma omp parallel
473     {
474     vector<double> int_local(numComp, 0);
475 caltinay 3722 #pragma omp for nowait
476 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
477     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
478     const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
479     for (index_t i=0; i < numComp; ++i) {
480     const register double f_0 = f[INDEX2(i,0,numComp)];
481     const register double f_1 = f[INDEX2(i,1,numComp)];
482     const register double f_2 = f[INDEX2(i,2,numComp)];
483     const register double f_3 = f[INDEX2(i,3,numComp)];
484     int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
485     } /* end of component loop i */
486     } /* end of k0 loop */
487     } /* end of k1 loop */
488    
489     #pragma omp critical
490     for (index_t i=0; i<numComp; i++)
491     integrals[i]+=int_local[i];
492 caltinay 3722 } // end of parallel section
493 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
494     const double w_0 = h0*h1;
495     #pragma omp parallel
496     {
497     vector<double> int_local(numComp, 0);
498 caltinay 3722 #pragma omp for nowait
499 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
500     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
501     const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
502     for (index_t i=0; i < numComp; ++i) {
503     int_local[i]+=f[i]*w_0;
504     } /* end of component loop i */
505     } /* end of k0 loop */
506     } /* end of k1 loop */
507    
508     #pragma omp critical
509     for (index_t i=0; i<numComp; i++)
510     integrals[i]+=int_local[i];
511 caltinay 3722 } // end of parallel section
512 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
513     const double w_0 = h0/2.;
514     const double w_1 = h1/2.;
515     #pragma omp parallel
516     {
517     vector<double> int_local(numComp, 0);
518     if (m_faceOffset[0] > -1) {
519 caltinay 3722 #pragma omp for nowait
520 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
521     const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
522     for (index_t i=0; i < numComp; ++i) {
523     const register double f_0 = f[INDEX2(i,0,numComp)];
524     const register double f_1 = f[INDEX2(i,1,numComp)];
525     int_local[i]+=(f_0+f_1)*w_1;
526     } /* end of component loop i */
527     } /* end of k1 loop */
528     }
529    
530     if (m_faceOffset[1] > -1) {
531 caltinay 3722 #pragma omp for nowait
532 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
533     const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
534     for (index_t i=0; i < numComp; ++i) {
535     const register double f_0 = f[INDEX2(i,0,numComp)];
536     const register double f_1 = f[INDEX2(i,1,numComp)];
537     int_local[i]+=(f_0+f_1)*w_1;
538     } /* end of component loop i */
539     } /* end of k1 loop */
540     }
541    
542     if (m_faceOffset[2] > -1) {
543 caltinay 3722 #pragma omp for nowait
544 caltinay 3713 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
545     const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
546     for (index_t i=0; i < numComp; ++i) {
547     const register double f_0 = f[INDEX2(i,0,numComp)];
548     const register double f_1 = f[INDEX2(i,1,numComp)];
549     int_local[i]+=(f_0+f_1)*w_0;
550     } /* end of component loop i */
551     } /* end of k0 loop */
552     }
553    
554     if (m_faceOffset[3] > -1) {
555 caltinay 3722 #pragma omp for nowait
556 caltinay 3713 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
557     const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
558     for (index_t i=0; i < numComp; ++i) {
559     const register double f_0 = f[INDEX2(i,0,numComp)];
560     const register double f_1 = f[INDEX2(i,1,numComp)];
561     int_local[i]+=(f_0+f_1)*w_0;
562     } /* end of component loop i */
563     } /* end of k0 loop */
564     }
565    
566     #pragma omp critical
567     for (index_t i=0; i<numComp; i++)
568     integrals[i]+=int_local[i];
569 caltinay 3722 } // end of parallel section
570 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
571     #pragma omp parallel
572     {
573     vector<double> int_local(numComp, 0);
574     if (m_faceOffset[0] > -1) {
575 caltinay 3722 #pragma omp for nowait
576 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
577     const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
578     for (index_t i=0; i < numComp; ++i) {
579     int_local[i]+=f[i]*h1;
580     } /* end of component loop i */
581     } /* end of k1 loop */
582     }
583    
584     if (m_faceOffset[1] > -1) {
585 caltinay 3722 #pragma omp for nowait
586 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
587     const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
588     for (index_t i=0; i < numComp; ++i) {
589     int_local[i]+=f[i]*h1;
590     } /* end of component loop i */
591     } /* end of k1 loop */
592     }
593    
594     if (m_faceOffset[2] > -1) {
595 caltinay 3722 #pragma omp for nowait
596 caltinay 3713 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
597     const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
598     for (index_t i=0; i < numComp; ++i) {
599     int_local[i]+=f[i]*h0;
600     } /* end of component loop i */
601     } /* end of k0 loop */
602     }
603    
604     if (m_faceOffset[3] > -1) {
605 caltinay 3722 #pragma omp for nowait
606 caltinay 3713 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
607     const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
608     for (index_t i=0; i < numComp; ++i) {
609     int_local[i]+=f[i]*h0;
610     } /* end of component loop i */
611     } /* end of k0 loop */
612     }
613    
614     #pragma omp critical
615     for (index_t i=0; i<numComp; i++)
616     integrals[i]+=int_local[i];
617 caltinay 3722 } // end of parallel section
618 caltinay 3713 } else {
619     stringstream msg;
620     msg << "setToIntegrals() not implemented for "
621     << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
622     throw RipleyException(msg.str());
623     }
624     }
625    
626 caltinay 3722 void Rectangle::setToNormal(escript::Data& out) const
627     {
628     if (out.getFunctionSpace().getTypeCode() == FaceElements) {
629     #pragma omp parallel
630     {
631     if (m_faceOffset[0] > -1) {
632     #pragma omp for nowait
633     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
634     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
635     // set vector at two quadrature points
636     *o++ = -1.;
637     *o++ = 0.;
638     *o++ = -1.;
639     *o = 0.;
640     }
641     }
642    
643     if (m_faceOffset[1] > -1) {
644     #pragma omp for nowait
645     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
646     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
647     // set vector at two quadrature points
648     *o++ = 1.;
649     *o++ = 0.;
650     *o++ = 1.;
651     *o = 0.;
652     }
653     }
654    
655     if (m_faceOffset[2] > -1) {
656     #pragma omp for nowait
657     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
658     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
659     // set vector at two quadrature points
660     *o++ = 0.;
661     *o++ = -1.;
662     *o++ = 0.;
663     *o = -1.;
664     }
665     }
666    
667     if (m_faceOffset[3] > -1) {
668     #pragma omp for nowait
669     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
670     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
671     // set vector at two quadrature points
672     *o++ = 0.;
673     *o++ = 1.;
674     *o++ = 0.;
675     *o = 1.;
676     }
677     }
678     } // end of parallel section
679     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
680     #pragma omp parallel
681     {
682     if (m_faceOffset[0] > -1) {
683     #pragma omp for nowait
684     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
685     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
686     *o++ = -1.;
687     *o = 0.;
688     }
689     }
690    
691     if (m_faceOffset[1] > -1) {
692     #pragma omp for nowait
693     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
694     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
695     *o++ = 1.;
696     *o = 0.;
697     }
698     }
699    
700     if (m_faceOffset[2] > -1) {
701     #pragma omp for nowait
702     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
703     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
704     *o++ = 0.;
705     *o = -1.;
706     }
707     }
708    
709     if (m_faceOffset[3] > -1) {
710     #pragma omp for nowait
711     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
712     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
713     *o++ = 0.;
714     *o = 1.;
715     }
716     }
717     } // end of parallel section
718    
719     } else {
720     stringstream msg;
721     msg << "setToNormal() not implemented for "
722     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
723     throw RipleyException(msg.str());
724     }
725     }
726    
727 caltinay 3701 void Rectangle::addPDEToSystem(escript::AbstractSystemMatrix& mat,
728     escript::Data& rhs, const escript::Data& A, const escript::Data& B,
729     const escript::Data& C, const escript::Data& D,
730     const escript::Data& X, const escript::Data& Y,
731     const escript::Data& d, const escript::Data& y,
732     const escript::Data& d_contact, const escript::Data& y_contact,
733     const escript::Data& d_dirac, const escript::Data& y_dirac) const
734     {
735     throw RipleyException("addPDEToSystem() not implemented");
736     }
737    
738 caltinay 3691 Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder,
739     bool reducedColOrder) const
740     {
741     if (reducedRowOrder || reducedColOrder)
742     throw RipleyException("getPattern() not implemented for reduced order");
743    
744 caltinay 3699 // connector
745     RankVector neighbour;
746     IndexVector offsetInShared(1,0);
747     IndexVector sendShared, recvShared;
748     const IndexVector faces=getNumFacesPerBoundary();
749     const index_t left = (m_offset0==0 ? 0 : 1);
750     const index_t bottom = (m_offset1==0 ? 0 : 1);
751     // corner node from bottom-left
752     if (faces[0] == 0 && faces[2] == 0) {
753     neighbour.push_back(m_mpiInfo->rank-m_NX-1);
754     offsetInShared.push_back(offsetInShared.back()+1);
755     sendShared.push_back(m_nodeId[m_N0+left]);
756     recvShared.push_back(m_nodeId[0]);
757     }
758     // bottom edge
759     if (faces[2] == 0) {
760     neighbour.push_back(m_mpiInfo->rank-m_NX);
761     offsetInShared.push_back(offsetInShared.back()+m_N0-left);
762     for (dim_t i=left; i<m_N0; i++) {
763     // easy case, we know the neighbour id's
764     sendShared.push_back(m_nodeId[m_N0+i]);
765     recvShared.push_back(m_nodeId[i]);
766     }
767     }
768     // corner node from bottom-right
769     if (faces[1] == 0 && faces[2] == 0) {
770     neighbour.push_back(m_mpiInfo->rank-m_NX+1);
771     const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
772     const index_t N1=(neighbour.back()/m_NX == 0 ? m_N1 : m_N1-1);
773     const index_t first=m_nodeDistribution[neighbour.back()];
774     offsetInShared.push_back(offsetInShared.back()+1);
775     sendShared.push_back(m_nodeId[(bottom+1)*m_N0-1]);
776     recvShared.push_back(first+N0*(N1-1));
777     }
778     // left edge
779     if (faces[0] == 0) {
780     neighbour.push_back(m_mpiInfo->rank-1);
781     offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
782     for (dim_t i=bottom; i<m_N1; i++) {
783     // easy case, we know the neighbour id's
784     sendShared.push_back(m_nodeId[i*m_N0+1]);
785     recvShared.push_back(m_nodeId[i*m_N0]);
786     }
787     }
788     // right edge
789     if (faces[1] == 0) {
790     neighbour.push_back(m_mpiInfo->rank+1);
791     const index_t rightN0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
792     const index_t first=m_nodeDistribution[neighbour.back()];
793     offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
794     for (dim_t i=bottom; i<m_N1; i++) {
795     sendShared.push_back(m_nodeId[(i+1)*m_N0-1]);
796     recvShared.push_back(first+rightN0*(i-bottom));
797     }
798     }
799     // corner node from top-left
800     if (faces[0] == 0 && faces[3] == 0) {
801     neighbour.push_back(m_mpiInfo->rank+m_NX-1);
802     const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
803     const index_t first=m_nodeDistribution[neighbour.back()];
804     offsetInShared.push_back(offsetInShared.back()+1);
805     sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+left]);
806     recvShared.push_back(first+N0-1);
807     }
808     // top edge
809     if (faces[3] == 0) {
810     neighbour.push_back(m_mpiInfo->rank+m_NX);
811     const index_t first=m_nodeDistribution[neighbour.back()];
812     offsetInShared.push_back(offsetInShared.back()+m_N0-left);
813     for (dim_t i=left; i<m_N0; i++) {
814     sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+i]);
815     recvShared.push_back(first+i-left);
816     }
817     }
818     // corner node from top-right
819     if (faces[1] == 0 && faces[3] == 0) {
820     neighbour.push_back(m_mpiInfo->rank+m_NX+1);
821     const index_t first=m_nodeDistribution[neighbour.back()];
822     offsetInShared.push_back(offsetInShared.back()+1);
823     sendShared.push_back(m_nodeId[m_N0*m_N1-1]);
824     recvShared.push_back(first);
825     }
826     const int numDOF=m_nodeDistribution[m_mpiInfo->rank+1]-m_nodeDistribution[m_mpiInfo->rank];
827 caltinay 3702 /*
828 caltinay 3699 cout << "--- rcv_shcomp ---" << endl;
829     cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
830     for (size_t i=0; i<neighbour.size(); i++) {
831     cout << "neighbor[" << i << "]=" << neighbour[i]
832     << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
833     }
834     for (size_t i=0; i<recvShared.size(); i++) {
835     cout << "shared[" << i << "]=" << recvShared[i] << endl;
836     }
837     cout << "--- snd_shcomp ---" << endl;
838     for (size_t i=0; i<sendShared.size(); i++) {
839     cout << "shared[" << i << "]=" << sendShared[i] << endl;
840     }
841 caltinay 3702 */
842 caltinay 3691
843     Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
844 caltinay 3699 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
845 caltinay 3697 &offsetInShared[0], 1, 0, m_mpiInfo);
846 caltinay 3691 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
847 caltinay 3699 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
848 caltinay 3697 &offsetInShared[0], 1, 0, m_mpiInfo);
849 caltinay 3691 Paso_Connector* connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
850 caltinay 3699 Paso_SharedComponents_free(snd_shcomp);
851     Paso_SharedComponents_free(rcv_shcomp);
852 caltinay 3691
853     // create patterns
854 caltinay 3699 dim_t M, N;
855     IndexVector ptr(1,0);
856     IndexVector index;
857    
858     // main pattern
859     for (index_t i=0; i<numDOF; i++) {
860     // always add the node itself
861     index.push_back(i);
862     int num=insertNeighbours(index, i);
863     ptr.push_back(ptr.back()+num+1);
864     }
865     M=N=ptr.size()-1;
866     // paso will manage the memory
867     index_t* indexC = MEMALLOC(index.size(),index_t);
868     index_t* ptrC = MEMALLOC(ptr.size(), index_t);
869     copy(index.begin(), index.end(), indexC);
870     copy(ptr.begin(), ptr.end(), ptrC);
871 caltinay 3691 Paso_Pattern *mainPattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
872 caltinay 3699 M, N, ptrC, indexC);
873 caltinay 3691
874 caltinay 3702 /*
875 caltinay 3699 cout << "--- main_pattern ---" << endl;
876     cout << "M=" << M << ", N=" << N << endl;
877     for (size_t i=0; i<ptr.size(); i++) {
878     cout << "ptr[" << i << "]=" << ptr[i] << endl;
879     }
880     for (size_t i=0; i<index.size(); i++) {
881     cout << "index[" << i << "]=" << index[i] << endl;
882     }
883 caltinay 3702 */
884 caltinay 3699
885     ptr.clear();
886     index.clear();
887    
888     // column & row couple patterns
889     Paso_Pattern *colCouplePattern, *rowCouplePattern;
890     generateCouplePatterns(&colCouplePattern, &rowCouplePattern);
891    
892     // allocate paso distribution
893     Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
894     const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
895    
896 caltinay 3691 Paso_SystemMatrixPattern* pattern = Paso_SystemMatrixPattern_alloc(
897     MATRIX_FORMAT_DEFAULT, distribution, distribution,
898     mainPattern, colCouplePattern, rowCouplePattern,
899     connector, connector);
900     Paso_Pattern_free(mainPattern);
901     Paso_Pattern_free(colCouplePattern);
902     Paso_Pattern_free(rowCouplePattern);
903     Paso_Distribution_free(distribution);
904 caltinay 3697 return pattern;
905 caltinay 3691 }
906    
907     void Rectangle::Print_Mesh_Info(const bool full) const
908     {
909     RipleyDomain::Print_Mesh_Info(full);
910     if (full) {
911     cout << " Id Coordinates" << endl;
912     cout.precision(15);
913     cout.setf(ios::scientific, ios::floatfield);
914 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
915     pair<double,double> ydy = getFirstCoordAndSpacing(1);
916 caltinay 3691 for (index_t i=0; i < getNumNodes(); i++) {
917     cout << " " << setw(5) << m_nodeId[i]
918 caltinay 3697 << " " << xdx.first+(i%m_N0)*xdx.second
919     << " " << ydy.first+(i/m_N0)*ydy.second << endl;
920 caltinay 3691 }
921     }
922     }
923    
924 caltinay 3697 IndexVector Rectangle::getNumNodesPerDim() const
925     {
926     IndexVector ret;
927     ret.push_back(m_N0);
928     ret.push_back(m_N1);
929     return ret;
930     }
931    
932     IndexVector Rectangle::getNumElementsPerDim() const
933     {
934     IndexVector ret;
935     ret.push_back(m_NE0);
936     ret.push_back(m_NE1);
937     return ret;
938     }
939    
940     IndexVector Rectangle::getNumFacesPerBoundary() const
941     {
942     IndexVector ret(4, 0);
943     //left
944     if (m_offset0==0)
945     ret[0]=m_NE1;
946     //right
947     if (m_mpiInfo->rank%m_NX==m_NX-1)
948     ret[1]=m_NE1;
949     //bottom
950     if (m_offset1==0)
951     ret[2]=m_NE0;
952     //top
953     if (m_mpiInfo->rank/m_NX==m_NY-1)
954     ret[3]=m_NE0;
955     return ret;
956     }
957    
958     pair<double,double> Rectangle::getFirstCoordAndSpacing(dim_t dim) const
959     {
960     if (dim==0) {
961     return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
962     } else if (dim==1) {
963     return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
964     }
965     throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
966     }
967    
968 caltinay 3691 //protected
969     dim_t Rectangle::getNumFaceElements() const
970     {
971 caltinay 3699 const IndexVector faces = getNumFacesPerBoundary();
972 caltinay 3691 dim_t n=0;
973 caltinay 3699 for (size_t i=0; i<faces.size(); i++)
974     n+=faces[i];
975 caltinay 3691 return n;
976     }
977    
978     //protected
979     void Rectangle::assembleCoordinates(escript::Data& arg) const
980     {
981     escriptDataC x = arg.getDataC();
982     int numDim = m_numDim;
983     if (!isDataPointShapeEqual(&x, 1, &numDim))
984     throw RipleyException("setToX: Invalid Data object shape");
985     if (!numSamplesEqual(&x, 1, getNumNodes()))
986     throw RipleyException("setToX: Illegal number of samples in Data object");
987    
988 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
989     pair<double,double> ydy = getFirstCoordAndSpacing(1);
990 caltinay 3691 arg.requireWrite();
991     #pragma omp parallel for
992     for (dim_t i1 = 0; i1 < m_N1; i1++) {
993     for (dim_t i0 = 0; i0 < m_N0; i0++) {
994     double* point = arg.getSampleDataRW(i0+m_N0*i1);
995 caltinay 3697 point[0] = xdx.first+i0*xdx.second;
996     point[1] = ydy.first+i1*ydy.second;
997 caltinay 3691 }
998     }
999     }
1000    
1001     //private
1002     void Rectangle::populateSampleIds()
1003     {
1004 caltinay 3697 // identifiers are ordered from left to right, bottom to top on each rank,
1005     // except for the shared nodes which are owned by the rank below / to the
1006     // left of the current rank
1007    
1008     // build node distribution vector first.
1009     // m_nodeDistribution[i] is the first node id on rank i, that is
1010     // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1011     m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1012     m_nodeDistribution[1]=getNumNodes();
1013     for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
1014     const index_t x=k%m_NX;
1015     const index_t y=k/m_NX;
1016     index_t numNodes=getNumNodes();
1017     if (x>0)
1018     numNodes-=m_N1;
1019     if (y>0)
1020     numNodes-=m_N0;
1021     if (x>0 && y>0)
1022     numNodes++; // subtracted corner twice -> fix that
1023     m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
1024     }
1025     m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1026    
1027 caltinay 3691 m_nodeId.resize(getNumNodes());
1028 caltinay 3697
1029     // the bottom row and left column are not owned by this rank so the
1030     // identifiers need to be computed accordingly
1031     const index_t left = (m_offset0==0 ? 0 : 1);
1032     const index_t bottom = (m_offset1==0 ? 0 : 1);
1033     if (left>0) {
1034     const int neighbour=m_mpiInfo->rank-1;
1035     const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1036 caltinay 3691 #pragma omp parallel for
1037 caltinay 3697 for (dim_t i1=bottom; i1<m_N1; i1++) {
1038     m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
1039     + (i1-bottom+1)*leftN0-1;
1040     }
1041     }
1042     if (bottom>0) {
1043     const int neighbour=m_mpiInfo->rank-m_NX;
1044     const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1045     const index_t bottomN1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1046     #pragma omp parallel for
1047     for (dim_t i0=left; i0<m_N0; i0++) {
1048     m_nodeId[i0]=m_nodeDistribution[neighbour]
1049     + (bottomN1-1)*bottomN0 + i0 - left;
1050     }
1051     }
1052     if (left>0 && bottom>0) {
1053     const int neighbour=m_mpiInfo->rank-m_NX-1;
1054 caltinay 3699 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1055     const index_t N1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1056     m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1-1;
1057 caltinay 3697 }
1058    
1059     // the rest of the id's are contiguous
1060     const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
1061     #pragma omp parallel for
1062     for (dim_t i1=bottom; i1<m_N1; i1++) {
1063     for (dim_t i0=left; i0<m_N0; i0++) {
1064     m_nodeId[i0+i1*m_N0] = firstId+i0-left+(i1-bottom)*(m_N0-left);
1065     }
1066     }
1067    
1068 caltinay 3704 // element id's
1069 caltinay 3697 m_elementId.resize(getNumElements());
1070     #pragma omp parallel for
1071     for (dim_t k=0; k<getNumElements(); k++) {
1072     m_elementId[k]=k;
1073     }
1074    
1075 caltinay 3704 // face element id's
1076 caltinay 3697 m_faceId.resize(getNumFaceElements());
1077     #pragma omp parallel for
1078     for (dim_t k=0; k<getNumFaceElements(); k++) {
1079     m_faceId[k]=k;
1080     }
1081 caltinay 3704
1082 caltinay 3722 // generate face offset vector and set face tags
1083 caltinay 3704 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1084 caltinay 3722 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
1085     const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
1086 caltinay 3704 m_faceOffset.assign(facesPerEdge.size(), -1);
1087 caltinay 3722 m_faceTags.clear();
1088 caltinay 3704 index_t offset=0;
1089     for (size_t i=0; i<facesPerEdge.size(); i++) {
1090     if (facesPerEdge[i]>0) {
1091     m_faceOffset[i]=offset;
1092     offset+=facesPerEdge[i];
1093 caltinay 3722 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1094 caltinay 3704 }
1095     }
1096 caltinay 3722 setTagMap("left", LEFT);
1097     setTagMap("right", RIGHT);
1098     setTagMap("bottom", BOTTOM);
1099     setTagMap("top", TOP);
1100     updateTagsInUse(FaceElements);
1101 caltinay 3691 }
1102    
1103 caltinay 3699 //private
1104     int Rectangle::insertNeighbours(IndexVector& index, index_t node) const
1105     {
1106     const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1107     const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1108     const int x=node%myN0;
1109     const int y=node/myN0;
1110     int num=0;
1111     if (y>0) {
1112     if (x>0) {
1113     // bottom-left
1114     index.push_back(node-myN0-1);
1115     num++;
1116     }
1117     // bottom
1118     index.push_back(node-myN0);
1119     num++;
1120     if (x<myN0-1) {
1121     // bottom-right
1122     index.push_back(node-myN0+1);
1123     num++;
1124     }
1125     }
1126     if (x<myN0-1) {
1127     // right
1128     index.push_back(node+1);
1129     num++;
1130     if (y<myN1-1) {
1131     // top-right
1132     index.push_back(node+myN0+1);
1133     num++;
1134     }
1135     }
1136     if (y<myN1-1) {
1137     // top
1138     index.push_back(node+myN0);
1139     num++;
1140     if (x>0) {
1141     // top-left
1142     index.push_back(node+myN0-1);
1143     num++;
1144     }
1145     }
1146     if (x>0) {
1147     // left
1148     index.push_back(node-1);
1149     num++;
1150     }
1151    
1152     return num;
1153     }
1154    
1155     //private
1156     void Rectangle::generateCouplePatterns(Paso_Pattern** colPattern, Paso_Pattern** rowPattern) const
1157     {
1158     IndexVector ptr(1,0);
1159     IndexVector index;
1160     const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1161     const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1162     const IndexVector faces=getNumFacesPerBoundary();
1163    
1164     // bottom edge
1165     dim_t n=0;
1166     if (faces[0] == 0) {
1167     index.push_back(2*(myN0+myN1+1));
1168     index.push_back(2*(myN0+myN1+1)+1);
1169     n+=2;
1170     if (faces[2] == 0) {
1171     index.push_back(0);
1172     index.push_back(1);
1173     index.push_back(2);
1174     n+=3;
1175     }
1176     } else if (faces[2] == 0) {
1177     index.push_back(1);
1178     index.push_back(2);
1179     n+=2;
1180     }
1181     // n=neighbours of bottom-left corner node
1182     ptr.push_back(ptr.back()+n);
1183     n=0;
1184     if (faces[2] == 0) {
1185     for (dim_t i=1; i<myN0-1; i++) {
1186     index.push_back(i);
1187     index.push_back(i+1);
1188     index.push_back(i+2);
1189     ptr.push_back(ptr.back()+3);
1190     }
1191     index.push_back(myN0-1);
1192     index.push_back(myN0);
1193     n+=2;
1194     if (faces[1] == 0) {
1195     index.push_back(myN0+1);
1196     index.push_back(myN0+2);
1197     index.push_back(myN0+3);
1198     n+=3;
1199     }
1200     } else {
1201     for (dim_t i=1; i<myN0-1; i++) {
1202     ptr.push_back(ptr.back());
1203     }
1204     if (faces[1] == 0) {
1205     index.push_back(myN0+2);
1206     index.push_back(myN0+3);
1207     n+=2;
1208     }
1209     }
1210     // n=neighbours of bottom-right corner node
1211     ptr.push_back(ptr.back()+n);
1212    
1213     // 2nd row to 2nd last row
1214     for (dim_t i=1; i<myN1-1; i++) {
1215     // left edge
1216     if (faces[0] == 0) {
1217     index.push_back(2*(myN0+myN1+2)-i);
1218     index.push_back(2*(myN0+myN1+2)-i-1);
1219     index.push_back(2*(myN0+myN1+2)-i-2);
1220     ptr.push_back(ptr.back()+3);
1221     } else {
1222     ptr.push_back(ptr.back());
1223     }
1224     for (dim_t j=1; j<myN0-1; j++) {
1225     ptr.push_back(ptr.back());
1226     }
1227     // right edge
1228     if (faces[1] == 0) {
1229     index.push_back(myN0+i+1);
1230     index.push_back(myN0+i+2);
1231     index.push_back(myN0+i+3);
1232     ptr.push_back(ptr.back()+3);
1233     } else {
1234     ptr.push_back(ptr.back());
1235     }
1236     }
1237    
1238     // top edge
1239     n=0;
1240     if (faces[0] == 0) {
1241     index.push_back(2*myN0+myN1+5);
1242     index.push_back(2*myN0+myN1+4);
1243     n+=2;
1244     if (faces[3] == 0) {
1245     index.push_back(2*myN0+myN1+3);
1246     index.push_back(2*myN0+myN1+2);
1247     index.push_back(2*myN0+myN1+1);
1248     n+=3;
1249     }
1250     } else if (faces[3] == 0) {
1251     index.push_back(2*myN0+myN1+2);
1252     index.push_back(2*myN0+myN1+1);
1253     n+=2;
1254     }
1255     // n=neighbours of top-left corner node
1256     ptr.push_back(ptr.back()+n);
1257     n=0;
1258     if (faces[3] == 0) {
1259     for (dim_t i=1; i<myN0-1; i++) {
1260     index.push_back(2*myN0+myN1+i+1);
1261     index.push_back(2*myN0+myN1+i);
1262     index.push_back(2*myN0+myN1+i-1);
1263     ptr.push_back(ptr.back()+3);
1264     }
1265     index.push_back(myN0+myN1+4);
1266     index.push_back(myN0+myN1+3);
1267     n+=2;
1268     if (faces[1] == 0) {
1269     index.push_back(myN0+myN1+2);
1270     index.push_back(myN0+myN1+1);
1271     index.push_back(myN0+myN1);
1272     n+=3;
1273     }
1274     } else {
1275     for (dim_t i=1; i<myN0-1; i++) {
1276     ptr.push_back(ptr.back());
1277     }
1278     if (faces[1] == 0) {
1279     index.push_back(myN0+myN1+1);
1280     index.push_back(myN0+myN1);
1281     n+=2;
1282     }
1283     }
1284     // n=neighbours of top-right corner node
1285     ptr.push_back(ptr.back()+n);
1286    
1287     dim_t M=ptr.size()-1;
1288     map<index_t,index_t> idMap;
1289     dim_t N=0;
1290     for (IndexVector::iterator it=index.begin(); it!=index.end(); it++) {
1291     if (idMap.find(*it)==idMap.end()) {
1292     idMap[*it]=N;
1293     *it=N++;
1294     } else {
1295     *it=idMap[*it];
1296     }
1297     }
1298    
1299 caltinay 3702 /*
1300 caltinay 3699 cout << "--- colCouple_pattern ---" << endl;
1301     cout << "M=" << M << ", N=" << N << endl;
1302     for (size_t i=0; i<ptr.size(); i++) {
1303     cout << "ptr[" << i << "]=" << ptr[i] << endl;
1304     }
1305     for (size_t i=0; i<index.size(); i++) {
1306     cout << "index[" << i << "]=" << index[i] << endl;
1307     }
1308 caltinay 3702 */
1309 caltinay 3699
1310     // now build the row couple pattern
1311     IndexVector ptr2(1,0);
1312     IndexVector index2;
1313     for (dim_t id=0; id<N; id++) {
1314     n=0;
1315     for (dim_t i=0; i<M; i++) {
1316     for (dim_t j=ptr[i]; j<ptr[i+1]; j++) {
1317     if (index[j]==id) {
1318     index2.push_back(i);
1319     n++;
1320     break;
1321     }
1322     }
1323     }
1324     ptr2.push_back(ptr2.back()+n);
1325     }
1326    
1327 caltinay 3702 /*
1328 caltinay 3699 cout << "--- rowCouple_pattern ---" << endl;
1329     cout << "M=" << N << ", N=" << M << endl;
1330     for (size_t i=0; i<ptr2.size(); i++) {
1331     cout << "ptr[" << i << "]=" << ptr2[i] << endl;
1332     }
1333     for (size_t i=0; i<index2.size(); i++) {
1334     cout << "index[" << i << "]=" << index2[i] << endl;
1335     }
1336 caltinay 3702 */
1337 caltinay 3699
1338     // paso will manage the memory
1339     index_t* indexC = MEMALLOC(index.size(), index_t);
1340     index_t* ptrC = MEMALLOC(ptr.size(), index_t);
1341     copy(index.begin(), index.end(), indexC);
1342     copy(ptr.begin(), ptr.end(), ptrC);
1343     *colPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, M, N, ptrC, indexC);
1344    
1345     // paso will manage the memory
1346     indexC = MEMALLOC(index2.size(), index_t);
1347     ptrC = MEMALLOC(ptr2.size(), index_t);
1348     copy(index2.begin(), index2.end(), indexC);
1349     copy(ptr2.begin(), ptr2.end(), ptrC);
1350     *rowPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, N, M, ptrC, indexC);
1351     }
1352    
1353 caltinay 3702 //protected
1354 caltinay 3711 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1355     escript::Data& in, bool reduced) const
1356 caltinay 3702 {
1357     const dim_t numComp = in.getDataPointSize();
1358 caltinay 3711 if (reduced) {
1359 caltinay 3724 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1360     const double c0 = .25;
1361 caltinay 3702 #pragma omp parallel for
1362 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
1363     for (index_t k0=0; k0 < m_NE0; ++k0) {
1364     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1365     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1366     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1367     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1368     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1369     for (index_t i=0; i < numComp; ++i) {
1370 caltinay 3724 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
1371 caltinay 3711 } /* end of component loop i */
1372     } /* end of k0 loop */
1373     } /* end of k1 loop */
1374     /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1375     } else {
1376 caltinay 3724 /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1377     const double c0 = .16666666666666666667;
1378     const double c1 = .044658198738520451079;
1379     const double c2 = .62200846792814621559;
1380 caltinay 3711 #pragma omp parallel for
1381     for (index_t k1=0; k1 < m_NE1; ++k1) {
1382     for (index_t k0=0; k0 < m_NE0; ++k0) {
1383     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1384     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1385     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1386     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1387     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1388     for (index_t i=0; i < numComp; ++i) {
1389 caltinay 3724 o[INDEX2(i,numComp,0)] = f_00[i]*c2 + f_11[i]*c1 + c0*(f_01[i] + f_10[i]);
1390     o[INDEX2(i,numComp,1)] = f_01[i]*c1 + f_10[i]*c2 + c0*(f_00[i] + f_11[i]);
1391     o[INDEX2(i,numComp,2)] = f_01[i]*c2 + f_10[i]*c1 + c0*(f_00[i] + f_11[i]);
1392     o[INDEX2(i,numComp,3)] = f_00[i]*c1 + f_11[i]*c2 + c0*(f_01[i] + f_10[i]);
1393 caltinay 3711 } /* end of component loop i */
1394     } /* end of k0 loop */
1395     } /* end of k1 loop */
1396     /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1397     }
1398 caltinay 3702 }
1399    
1400     //protected
1401 caltinay 3711 void Rectangle::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1402     bool reduced) const
1403 caltinay 3702 {
1404 caltinay 3704 const dim_t numComp = in.getDataPointSize();
1405 caltinay 3711 if (reduced) {
1406 caltinay 3724 const double c0 = .5;
1407     #pragma omp parallel
1408     {
1409     /*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1410     if (m_faceOffset[0] > -1) {
1411     #pragma omp for nowait
1412     for (index_t k1=0; k1 < m_NE1; ++k1) {
1413     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1414     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1415     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1416     for (index_t i=0; i < numComp; ++i) {
1417     o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i]);
1418     } /* end of component loop i */
1419     } /* end of k1 loop */
1420     } /* end of face 0 */
1421     if (m_faceOffset[1] > -1) {
1422     #pragma omp for nowait
1423     for (index_t k1=0; k1 < m_NE1; ++k1) {
1424     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1425     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1426     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1427     for (index_t i=0; i < numComp; ++i) {
1428     o[INDEX2(i,numComp,0)] = c0*(f_10[i] + f_11[i]);
1429     } /* end of component loop i */
1430     } /* end of k1 loop */
1431     } /* end of face 1 */
1432     if (m_faceOffset[2] > -1) {
1433     #pragma omp for nowait
1434     for (index_t k0=0; k0 < m_NE0; ++k0) {
1435     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1436     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1437     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1438     for (index_t i=0; i < numComp; ++i) {
1439     o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_10[i]);
1440     } /* end of component loop i */
1441     } /* end of k0 loop */
1442     } /* end of face 2 */
1443     if (m_faceOffset[3] > -1) {
1444     #pragma omp for nowait
1445     for (index_t k0=0; k0 < m_NE0; ++k0) {
1446     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1447     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1448     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1449     for (index_t i=0; i < numComp; ++i) {
1450     o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_11[i]);
1451     } /* end of component loop i */
1452     } /* end of k0 loop */
1453     } /* end of face 3 */
1454     /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */
1455     } // end of parallel section
1456 caltinay 3711 } else {
1457 caltinay 3724 const double c0 = 0.21132486540518711775;
1458     const double c1 = 0.78867513459481288225;
1459     #pragma omp parallel
1460     {
1461     /*** GENERATOR SNIP_INTERPOLATE_FACES TOP */
1462     if (m_faceOffset[0] > -1) {
1463     #pragma omp for nowait
1464     for (index_t k1=0; k1 < m_NE1; ++k1) {
1465     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1466     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1467     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1468     for (index_t i=0; i < numComp; ++i) {
1469     o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_01[i]*c0;
1470     o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_01[i]*c1;
1471     } /* end of component loop i */
1472     } /* end of k1 loop */
1473     } /* end of face 0 */
1474     if (m_faceOffset[1] > -1) {
1475     #pragma omp for nowait
1476     for (index_t k1=0; k1 < m_NE1; ++k1) {
1477     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1478     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1479     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1480     for (index_t i=0; i < numComp; ++i) {
1481     o[INDEX2(i,numComp,0)] = f_10[i]*c1 + f_11[i]*c0;
1482     o[INDEX2(i,numComp,1)] = f_10[i]*c0 + f_11[i]*c1;
1483     } /* end of component loop i */
1484     } /* end of k1 loop */
1485     } /* end of face 1 */
1486     if (m_faceOffset[2] > -1) {
1487     #pragma omp for nowait
1488     for (index_t k0=0; k0 < m_NE0; ++k0) {
1489     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1490     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1491     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1492     for (index_t i=0; i < numComp; ++i) {
1493     o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_10[i]*c0;
1494     o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_10[i]*c1;
1495     } /* end of component loop i */
1496     } /* end of k0 loop */
1497     } /* end of face 2 */
1498     if (m_faceOffset[3] > -1) {
1499     #pragma omp for nowait
1500     for (index_t k0=0; k0 < m_NE0; ++k0) {
1501     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1502     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1503     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1504     for (index_t i=0; i < numComp; ++i) {
1505     o[INDEX2(i,numComp,0)] = f_01[i]*c1 + f_11[i]*c0;
1506     o[INDEX2(i,numComp,1)] = f_01[i]*c0 + f_11[i]*c1;
1507     } /* end of component loop i */
1508     } /* end of k0 loop */
1509     } /* end of face 3 */
1510     /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */
1511     } // end of parallel section
1512 caltinay 3711 }
1513 caltinay 3702 }
1514    
1515 caltinay 3691 } // end of namespace ripley
1516    

  ViewVC Help
Powered by ViewVC 1.1.26