/[escript]/branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp
ViewVC logotype

Annotation of /branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3746 - (hide annotations)
Thu Dec 15 00:02:22 2011 UTC (8 years ago) by caltinay
File size: 60325 byte(s)
In Ripley Solution==ContinuousFunction and ReducedSolution==ReducedCF now.
Removed a test in escript that relied on failure when trying to tag Data on
Solution/ReducedSolution.

1 caltinay 3691
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2011 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     #include <ripley/Rectangle.h>
15     extern "C" {
16     #include "paso/SystemMatrixPattern.h"
17     }
18    
19     #if USE_SILO
20     #include <silo.h>
21     #ifdef ESYS_MPI
22     #include <pmpio.h>
23     #endif
24     #endif
25    
26     #include <iomanip>
27    
28     using namespace std;
29    
30     namespace ripley {
31    
32     Rectangle::Rectangle(int n0, int n1, double l0, double l1, int d0, int d1) :
33     RipleyDomain(2),
34     m_gNE0(n0),
35     m_gNE1(n1),
36     m_l0(l0),
37     m_l1(l1),
38     m_NX(d0),
39     m_NY(d1)
40     {
41     // ensure number of subdivisions is valid and nodes can be distributed
42     // among number of ranks
43     if (m_NX*m_NY != m_mpiInfo->size)
44     throw RipleyException("Invalid number of spatial subdivisions");
45    
46     if (n0%m_NX > 0 || n1%m_NY > 0)
47     throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
48    
49     // local number of elements
50     m_NE0 = n0/m_NX;
51     m_NE1 = n1/m_NY;
52     // local number of nodes (not necessarily owned)
53     m_N0 = m_NE0+1;
54     m_N1 = m_NE1+1;
55     // bottom-left node is at (offset0,offset1) in global mesh
56     m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
57     m_offset1 = m_NE1*(m_mpiInfo->rank/m_NX);
58     populateSampleIds();
59     }
60    
61     Rectangle::~Rectangle()
62     {
63     }
64    
65     string Rectangle::getDescription() const
66     {
67     return "ripley::Rectangle";
68     }
69    
70     bool Rectangle::operator==(const AbstractDomain& other) const
71     {
72 caltinay 3744 const Rectangle* o=dynamic_cast<const Rectangle*>(&other);
73     if (o) {
74     return (RipleyDomain::operator==(other) &&
75     m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1
76     && m_l0==o->m_l0 && m_l1==o->m_l1
77     && m_NX==o->m_NX && m_NY==o->m_NY);
78     }
79 caltinay 3691
80     return false;
81     }
82    
83     void Rectangle::dump(const string& fileName) const
84     {
85     #if USE_SILO
86     string fn(fileName);
87     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
88     fn+=".silo";
89     }
90    
91     const int NUM_SILO_FILES = 1;
92     const char* blockDirFmt = "/block%04d";
93     int driver=DB_HDF5;
94     string siloPath;
95     DBfile* dbfile = NULL;
96    
97     #ifdef ESYS_MPI
98     PMPIO_baton_t* baton = NULL;
99     #endif
100    
101     if (m_mpiInfo->size > 1) {
102     #ifdef ESYS_MPI
103     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
104     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
105     PMPIO_DefaultClose, (void*)&driver);
106     // try the fallback driver in case of error
107     if (!baton && driver != DB_PDB) {
108     driver = DB_PDB;
109     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
110     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
111     PMPIO_DefaultClose, (void*)&driver);
112     }
113     if (baton) {
114     char str[64];
115     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
116     siloPath = str;
117     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
118     }
119     #endif
120     } else {
121     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
122     getDescription().c_str(), driver);
123     // try the fallback driver in case of error
124     if (!dbfile && driver != DB_PDB) {
125     driver = DB_PDB;
126     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
127     getDescription().c_str(), driver);
128     }
129     }
130    
131     if (!dbfile)
132     throw RipleyException("dump: Could not create Silo file");
133    
134     /*
135     if (driver==DB_HDF5) {
136     // gzip level 1 already provides good compression with minimal
137     // performance penalty. Some tests showed that gzip levels >3 performed
138     // rather badly on escript data both in terms of time and space
139     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
140     }
141     */
142    
143     boost::scoped_ptr<double> x(new double[m_N0]);
144     boost::scoped_ptr<double> y(new double[m_N1]);
145     double* coords[2] = { x.get(), y.get() };
146 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
147     pair<double,double> ydy = getFirstCoordAndSpacing(1);
148 caltinay 3691 #pragma omp parallel
149     {
150 caltinay 3722 #pragma omp for nowait
151 caltinay 3691 for (dim_t i0 = 0; i0 < m_N0; i0++) {
152 caltinay 3697 coords[0][i0]=xdx.first+i0*xdx.second;
153 caltinay 3691 }
154 caltinay 3722 #pragma omp for nowait
155 caltinay 3691 for (dim_t i1 = 0; i1 < m_N1; i1++) {
156 caltinay 3697 coords[1][i1]=ydy.first+i1*ydy.second;
157 caltinay 3691 }
158     }
159 caltinay 3697 IndexVector dims = getNumNodesPerDim();
160    
161     // write mesh
162     DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE,
163 caltinay 3691 DB_COLLINEAR, NULL);
164    
165 caltinay 3697 // write node ids
166     DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2,
167     NULL, 0, DB_INT, DB_NODECENT, NULL);
168    
169     // write element ids
170     dims = getNumElementsPerDim();
171     DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
172     &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
173    
174     // rank 0 writes multimesh and multivar
175 caltinay 3691 if (m_mpiInfo->rank == 0) {
176     vector<string> tempstrings;
177 caltinay 3697 vector<char*> names;
178 caltinay 3691 for (dim_t i=0; i<m_mpiInfo->size; i++) {
179     stringstream path;
180     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
181     tempstrings.push_back(path.str());
182 caltinay 3697 names.push_back((char*)tempstrings.back().c_str());
183 caltinay 3691 }
184 caltinay 3697 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
185 caltinay 3691 DBSetDir(dbfile, "/");
186 caltinay 3697 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
187     &types[0], NULL);
188     tempstrings.clear();
189     names.clear();
190     for (dim_t i=0; i<m_mpiInfo->size; i++) {
191     stringstream path;
192     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
193     tempstrings.push_back(path.str());
194     names.push_back((char*)tempstrings.back().c_str());
195     }
196     types.assign(m_mpiInfo->size, DB_QUADVAR);
197     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
198     &types[0], NULL);
199     tempstrings.clear();
200     names.clear();
201     for (dim_t i=0; i<m_mpiInfo->size; i++) {
202     stringstream path;
203     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
204     tempstrings.push_back(path.str());
205     names.push_back((char*)tempstrings.back().c_str());
206     }
207     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
208     &types[0], NULL);
209 caltinay 3691 }
210    
211     if (m_mpiInfo->size > 1) {
212     #ifdef ESYS_MPI
213     PMPIO_HandOffBaton(baton, dbfile);
214     PMPIO_Finish(baton);
215     #endif
216     } else {
217     DBClose(dbfile);
218     }
219    
220     #else // USE_SILO
221     throw RipleyException("dump(): no Silo support");
222     #endif
223     }
224    
225 caltinay 3697 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
226 caltinay 3691 {
227 caltinay 3697 switch (fsType) {
228 caltinay 3691 case Nodes:
229     return &m_nodeId[0];
230     case Elements:
231 caltinay 3733 case ReducedElements:
232 caltinay 3691 return &m_elementId[0];
233     case FaceElements:
234 caltinay 3733 case ReducedFaceElements:
235 caltinay 3691 return &m_faceId[0];
236     default:
237     break;
238     }
239    
240     stringstream msg;
241     msg << "borrowSampleReferenceIDs() not implemented for function space type "
242 caltinay 3701 << functionSpaceTypeAsString(fsType);
243 caltinay 3691 throw RipleyException(msg.str());
244     }
245    
246     bool Rectangle::ownSample(int fsCode, index_t id) const
247     {
248     #ifdef ESYS_MPI
249 caltinay 3702 if (fsCode != ReducedNodes) {
250 caltinay 3699 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
251     const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
252 caltinay 3691 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
253 caltinay 3702 } else {
254     stringstream msg;
255     msg << "ownSample() not implemented for "
256     << functionSpaceTypeAsString(fsCode);
257     throw RipleyException(msg.str());
258     }
259 caltinay 3691 #else
260     return true;
261     #endif
262     }
263    
264 caltinay 3702 void Rectangle::setToGradient(escript::Data& out, const escript::Data& cIn) const
265 caltinay 3691 {
266 caltinay 3702 escript::Data& in = *const_cast<escript::Data*>(&cIn);
267 caltinay 3701 const dim_t numComp = in.getDataPointSize();
268 caltinay 3702 const double h0 = m_l0/m_gNE0;
269     const double h1 = m_l1/m_gNE1;
270 caltinay 3724 const double cx0 = -1./h0;
271     const double cx1 = -.78867513459481288225/h0;
272     const double cx2 = -.5/h0;
273     const double cx3 = -.21132486540518711775/h0;
274     const double cx4 = .21132486540518711775/h0;
275     const double cx5 = .5/h0;
276     const double cx6 = .78867513459481288225/h0;
277     const double cx7 = 1./h0;
278     const double cy0 = -1./h1;
279     const double cy1 = -.78867513459481288225/h1;
280     const double cy2 = -.5/h1;
281     const double cy3 = -.21132486540518711775/h1;
282     const double cy4 = .21132486540518711775/h1;
283     const double cy5 = .5/h1;
284     const double cy6 = .78867513459481288225/h1;
285     const double cy7 = 1./h1;
286    
287 caltinay 3702 if (out.getFunctionSpace().getTypeCode() == Elements) {
288 caltinay 3724 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
289 caltinay 3697 #pragma omp parallel for
290 caltinay 3707 for (index_t k1=0; k1 < m_NE1; ++k1) {
291     for (index_t k0=0; k0 < m_NE0; ++k0) {
292 caltinay 3702 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
293     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
294     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
295     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
296     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
297     for (index_t i=0; i < numComp; ++i) {
298 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
299     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
300     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
301     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
302     o[INDEX3(i,0,2,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
303     o[INDEX3(i,1,2,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
304     o[INDEX3(i,0,3,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
305     o[INDEX3(i,1,3,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
306 caltinay 3702 } /* end of component loop i */
307     } /* end of k0 loop */
308     } /* end of k1 loop */
309     /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
310 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
311 caltinay 3724 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
312 caltinay 3711 #pragma omp parallel for
313     for (index_t k1=0; k1 < m_NE1; ++k1) {
314     for (index_t k0=0; k0 < m_NE0; ++k0) {
315     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
316     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
317     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
318     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
319     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
320     for (index_t i=0; i < numComp; ++i) {
321 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
322     o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]);
323 caltinay 3711 } /* end of component loop i */
324     } /* end of k0 loop */
325     } /* end of k1 loop */
326     /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
327 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
328 caltinay 3722 #pragma omp parallel
329     {
330 caltinay 3724 /*** GENERATOR SNIP_GRAD_FACES TOP */
331 caltinay 3722 if (m_faceOffset[0] > -1) {
332     #pragma omp for nowait
333     for (index_t k1=0; k1 < m_NE1; ++k1) {
334     const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
335     const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
336     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
337     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
338     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
339     for (index_t i=0; i < numComp; ++i) {
340 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
341     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
342     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
343     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
344 caltinay 3722 } /* end of component loop i */
345     } /* end of k1 loop */
346     } /* end of face 0 */
347     if (m_faceOffset[1] > -1) {
348     #pragma omp for nowait
349     for (index_t k1=0; k1 < m_NE1; ++k1) {
350     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
351     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
352     const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
353     const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
354     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
355     for (index_t i=0; i < numComp; ++i) {
356 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
357     o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
358     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
359     o[INDEX3(i,1,1,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
360 caltinay 3722 } /* end of component loop i */
361     } /* end of k1 loop */
362     } /* end of face 1 */
363     if (m_faceOffset[2] > -1) {
364     #pragma omp for nowait
365     for (index_t k0=0; k0 < m_NE0; ++k0) {
366     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
367     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
368     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
369     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
370     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
371     for (index_t i=0; i < numComp; ++i) {
372 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
373     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
374     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
375     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
376 caltinay 3722 } /* end of component loop i */
377     } /* end of k0 loop */
378     } /* end of face 2 */
379     if (m_faceOffset[3] > -1) {
380     #pragma omp for nowait
381     for (index_t k0=0; k0 < m_NE0; ++k0) {
382     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
383     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
384     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
385     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
386     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
387     for (index_t i=0; i < numComp; ++i) {
388 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
389     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
390     o[INDEX3(i,0,1,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
391     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
392 caltinay 3722 } /* end of component loop i */
393     } /* end of k0 loop */
394     } /* end of face 3 */
395     /* GENERATOR SNIP_GRAD_FACES BOTTOM */
396     } // end of parallel section
397 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
398 caltinay 3722 #pragma omp parallel
399     {
400 caltinay 3724 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
401 caltinay 3722 if (m_faceOffset[0] > -1) {
402     #pragma omp for nowait
403     for (index_t k1=0; k1 < m_NE1; ++k1) {
404     const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
405     const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
406     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
407     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
408     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
409     for (index_t i=0; i < numComp; ++i) {
410 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
411     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
412 caltinay 3722 } /* end of component loop i */
413     } /* end of k1 loop */
414     } /* end of face 0 */
415     if (m_faceOffset[1] > -1) {
416     #pragma omp for nowait
417     for (index_t k1=0; k1 < m_NE1; ++k1) {
418     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
419     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
420     const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
421     const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
422     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
423     for (index_t i=0; i < numComp; ++i) {
424 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
425     o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
426 caltinay 3722 } /* end of component loop i */
427     } /* end of k1 loop */
428     } /* end of face 1 */
429     if (m_faceOffset[2] > -1) {
430     #pragma omp for nowait
431     for (index_t k0=0; k0 < m_NE0; ++k0) {
432     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
433     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
434     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
435     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
436     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
437     for (index_t i=0; i < numComp; ++i) {
438 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
439     o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]);
440 caltinay 3722 } /* end of component loop i */
441     } /* end of k0 loop */
442     } /* end of face 2 */
443     if (m_faceOffset[3] > -1) {
444     #pragma omp for nowait
445     for (index_t k0=0; k0 < m_NE0; ++k0) {
446     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
447     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
448     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
449     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
450     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
451     for (index_t i=0; i < numComp; ++i) {
452 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
453     o[INDEX3(i,1,0,numComp,2)] = cy5*(f_01[i] + f_11[i]) + cy2*(f_00[i] + f_10[i]);
454 caltinay 3722 } /* end of component loop i */
455     } /* end of k0 loop */
456     } /* end of face 3 */
457     /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
458     } // end of parallel section
459 caltinay 3702 } else {
460 caltinay 3707 stringstream msg;
461     msg << "setToGradient() not implemented for "
462     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
463     throw RipleyException(msg.str());
464 caltinay 3702 }
465 caltinay 3701 }
466 caltinay 3697
467 caltinay 3713 void Rectangle::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
468     {
469     escript::Data& in = *const_cast<escript::Data*>(&arg);
470     const dim_t numComp = in.getDataPointSize();
471     const double h0 = m_l0/m_gNE0;
472     const double h1 = m_l1/m_gNE1;
473     if (arg.getFunctionSpace().getTypeCode() == Elements) {
474     const double w_0 = h0*h1/4.;
475     #pragma omp parallel
476     {
477     vector<double> int_local(numComp, 0);
478 caltinay 3722 #pragma omp for nowait
479 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
480     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
481     const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
482     for (index_t i=0; i < numComp; ++i) {
483     const register double f_0 = f[INDEX2(i,0,numComp)];
484     const register double f_1 = f[INDEX2(i,1,numComp)];
485     const register double f_2 = f[INDEX2(i,2,numComp)];
486     const register double f_3 = f[INDEX2(i,3,numComp)];
487     int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
488     } /* end of component loop i */
489     } /* end of k0 loop */
490     } /* end of k1 loop */
491    
492     #pragma omp critical
493     for (index_t i=0; i<numComp; i++)
494     integrals[i]+=int_local[i];
495 caltinay 3722 } // end of parallel section
496 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
497     const double w_0 = h0*h1;
498     #pragma omp parallel
499     {
500     vector<double> int_local(numComp, 0);
501 caltinay 3722 #pragma omp for nowait
502 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
503     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
504     const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
505     for (index_t i=0; i < numComp; ++i) {
506     int_local[i]+=f[i]*w_0;
507     } /* end of component loop i */
508     } /* end of k0 loop */
509     } /* end of k1 loop */
510    
511     #pragma omp critical
512     for (index_t i=0; i<numComp; i++)
513     integrals[i]+=int_local[i];
514 caltinay 3722 } // end of parallel section
515 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
516     const double w_0 = h0/2.;
517     const double w_1 = h1/2.;
518     #pragma omp parallel
519     {
520     vector<double> int_local(numComp, 0);
521     if (m_faceOffset[0] > -1) {
522 caltinay 3722 #pragma omp for nowait
523 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
524     const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
525     for (index_t i=0; i < numComp; ++i) {
526     const register double f_0 = f[INDEX2(i,0,numComp)];
527     const register double f_1 = f[INDEX2(i,1,numComp)];
528     int_local[i]+=(f_0+f_1)*w_1;
529     } /* end of component loop i */
530     } /* end of k1 loop */
531     }
532    
533     if (m_faceOffset[1] > -1) {
534 caltinay 3722 #pragma omp for nowait
535 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
536     const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
537     for (index_t i=0; i < numComp; ++i) {
538     const register double f_0 = f[INDEX2(i,0,numComp)];
539     const register double f_1 = f[INDEX2(i,1,numComp)];
540     int_local[i]+=(f_0+f_1)*w_1;
541     } /* end of component loop i */
542     } /* end of k1 loop */
543     }
544    
545     if (m_faceOffset[2] > -1) {
546 caltinay 3722 #pragma omp for nowait
547 caltinay 3713 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
548     const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
549     for (index_t i=0; i < numComp; ++i) {
550     const register double f_0 = f[INDEX2(i,0,numComp)];
551     const register double f_1 = f[INDEX2(i,1,numComp)];
552     int_local[i]+=(f_0+f_1)*w_0;
553     } /* end of component loop i */
554     } /* end of k0 loop */
555     }
556    
557     if (m_faceOffset[3] > -1) {
558 caltinay 3722 #pragma omp for nowait
559 caltinay 3713 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
560     const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
561     for (index_t i=0; i < numComp; ++i) {
562     const register double f_0 = f[INDEX2(i,0,numComp)];
563     const register double f_1 = f[INDEX2(i,1,numComp)];
564     int_local[i]+=(f_0+f_1)*w_0;
565     } /* end of component loop i */
566     } /* end of k0 loop */
567     }
568    
569     #pragma omp critical
570     for (index_t i=0; i<numComp; i++)
571     integrals[i]+=int_local[i];
572 caltinay 3722 } // end of parallel section
573 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
574     #pragma omp parallel
575     {
576     vector<double> int_local(numComp, 0);
577     if (m_faceOffset[0] > -1) {
578 caltinay 3722 #pragma omp for nowait
579 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
580     const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
581     for (index_t i=0; i < numComp; ++i) {
582     int_local[i]+=f[i]*h1;
583     } /* end of component loop i */
584     } /* end of k1 loop */
585     }
586    
587     if (m_faceOffset[1] > -1) {
588 caltinay 3722 #pragma omp for nowait
589 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
590     const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
591     for (index_t i=0; i < numComp; ++i) {
592     int_local[i]+=f[i]*h1;
593     } /* end of component loop i */
594     } /* end of k1 loop */
595     }
596    
597     if (m_faceOffset[2] > -1) {
598 caltinay 3722 #pragma omp for nowait
599 caltinay 3713 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
600     const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
601     for (index_t i=0; i < numComp; ++i) {
602     int_local[i]+=f[i]*h0;
603     } /* end of component loop i */
604     } /* end of k0 loop */
605     }
606    
607     if (m_faceOffset[3] > -1) {
608 caltinay 3722 #pragma omp for nowait
609 caltinay 3713 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
610     const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
611     for (index_t i=0; i < numComp; ++i) {
612     int_local[i]+=f[i]*h0;
613     } /* end of component loop i */
614     } /* end of k0 loop */
615     }
616    
617     #pragma omp critical
618     for (index_t i=0; i<numComp; i++)
619     integrals[i]+=int_local[i];
620 caltinay 3722 } // end of parallel section
621 caltinay 3713 } else {
622     stringstream msg;
623     msg << "setToIntegrals() not implemented for "
624     << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
625     throw RipleyException(msg.str());
626     }
627     }
628    
629 caltinay 3722 void Rectangle::setToNormal(escript::Data& out) const
630     {
631     if (out.getFunctionSpace().getTypeCode() == FaceElements) {
632     #pragma omp parallel
633     {
634     if (m_faceOffset[0] > -1) {
635     #pragma omp for nowait
636     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
637     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
638     // set vector at two quadrature points
639     *o++ = -1.;
640     *o++ = 0.;
641     *o++ = -1.;
642     *o = 0.;
643     }
644     }
645    
646     if (m_faceOffset[1] > -1) {
647     #pragma omp for nowait
648     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
649     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
650     // set vector at two quadrature points
651     *o++ = 1.;
652     *o++ = 0.;
653     *o++ = 1.;
654     *o = 0.;
655     }
656     }
657    
658     if (m_faceOffset[2] > -1) {
659     #pragma omp for nowait
660     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
661     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
662     // set vector at two quadrature points
663     *o++ = 0.;
664     *o++ = -1.;
665     *o++ = 0.;
666     *o = -1.;
667     }
668     }
669    
670     if (m_faceOffset[3] > -1) {
671     #pragma omp for nowait
672     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
673     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
674     // set vector at two quadrature points
675     *o++ = 0.;
676     *o++ = 1.;
677     *o++ = 0.;
678     *o = 1.;
679     }
680     }
681     } // end of parallel section
682     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
683     #pragma omp parallel
684     {
685     if (m_faceOffset[0] > -1) {
686     #pragma omp for nowait
687     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
688     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
689     *o++ = -1.;
690     *o = 0.;
691     }
692     }
693    
694     if (m_faceOffset[1] > -1) {
695     #pragma omp for nowait
696     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
697     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
698     *o++ = 1.;
699     *o = 0.;
700     }
701     }
702    
703     if (m_faceOffset[2] > -1) {
704     #pragma omp for nowait
705     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
706     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
707     *o++ = 0.;
708     *o = -1.;
709     }
710     }
711    
712     if (m_faceOffset[3] > -1) {
713     #pragma omp for nowait
714     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
715     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
716     *o++ = 0.;
717     *o = 1.;
718     }
719     }
720     } // end of parallel section
721    
722     } else {
723     stringstream msg;
724     msg << "setToNormal() not implemented for "
725     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
726     throw RipleyException(msg.str());
727     }
728     }
729    
730 caltinay 3701 void Rectangle::addPDEToSystem(escript::AbstractSystemMatrix& mat,
731     escript::Data& rhs, const escript::Data& A, const escript::Data& B,
732     const escript::Data& C, const escript::Data& D,
733     const escript::Data& X, const escript::Data& Y,
734     const escript::Data& d, const escript::Data& y,
735     const escript::Data& d_contact, const escript::Data& y_contact,
736     const escript::Data& d_dirac, const escript::Data& y_dirac) const
737     {
738     throw RipleyException("addPDEToSystem() not implemented");
739     }
740    
741 caltinay 3691 Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder,
742     bool reducedColOrder) const
743     {
744     if (reducedRowOrder || reducedColOrder)
745     throw RipleyException("getPattern() not implemented for reduced order");
746    
747 caltinay 3699 // connector
748     RankVector neighbour;
749     IndexVector offsetInShared(1,0);
750     IndexVector sendShared, recvShared;
751     const IndexVector faces=getNumFacesPerBoundary();
752     const index_t left = (m_offset0==0 ? 0 : 1);
753     const index_t bottom = (m_offset1==0 ? 0 : 1);
754     // corner node from bottom-left
755     if (faces[0] == 0 && faces[2] == 0) {
756     neighbour.push_back(m_mpiInfo->rank-m_NX-1);
757     offsetInShared.push_back(offsetInShared.back()+1);
758     sendShared.push_back(m_nodeId[m_N0+left]);
759     recvShared.push_back(m_nodeId[0]);
760     }
761     // bottom edge
762     if (faces[2] == 0) {
763     neighbour.push_back(m_mpiInfo->rank-m_NX);
764     offsetInShared.push_back(offsetInShared.back()+m_N0-left);
765     for (dim_t i=left; i<m_N0; i++) {
766     // easy case, we know the neighbour id's
767     sendShared.push_back(m_nodeId[m_N0+i]);
768     recvShared.push_back(m_nodeId[i]);
769     }
770     }
771     // corner node from bottom-right
772     if (faces[1] == 0 && faces[2] == 0) {
773     neighbour.push_back(m_mpiInfo->rank-m_NX+1);
774     const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
775     const index_t N1=(neighbour.back()/m_NX == 0 ? m_N1 : m_N1-1);
776     const index_t first=m_nodeDistribution[neighbour.back()];
777     offsetInShared.push_back(offsetInShared.back()+1);
778     sendShared.push_back(m_nodeId[(bottom+1)*m_N0-1]);
779     recvShared.push_back(first+N0*(N1-1));
780     }
781     // left edge
782     if (faces[0] == 0) {
783     neighbour.push_back(m_mpiInfo->rank-1);
784     offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
785     for (dim_t i=bottom; i<m_N1; i++) {
786     // easy case, we know the neighbour id's
787     sendShared.push_back(m_nodeId[i*m_N0+1]);
788     recvShared.push_back(m_nodeId[i*m_N0]);
789     }
790     }
791     // right edge
792     if (faces[1] == 0) {
793     neighbour.push_back(m_mpiInfo->rank+1);
794     const index_t rightN0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
795     const index_t first=m_nodeDistribution[neighbour.back()];
796     offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
797     for (dim_t i=bottom; i<m_N1; i++) {
798     sendShared.push_back(m_nodeId[(i+1)*m_N0-1]);
799     recvShared.push_back(first+rightN0*(i-bottom));
800     }
801     }
802     // corner node from top-left
803     if (faces[0] == 0 && faces[3] == 0) {
804     neighbour.push_back(m_mpiInfo->rank+m_NX-1);
805     const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
806     const index_t first=m_nodeDistribution[neighbour.back()];
807     offsetInShared.push_back(offsetInShared.back()+1);
808     sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+left]);
809     recvShared.push_back(first+N0-1);
810     }
811     // top edge
812     if (faces[3] == 0) {
813     neighbour.push_back(m_mpiInfo->rank+m_NX);
814     const index_t first=m_nodeDistribution[neighbour.back()];
815     offsetInShared.push_back(offsetInShared.back()+m_N0-left);
816     for (dim_t i=left; i<m_N0; i++) {
817     sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+i]);
818     recvShared.push_back(first+i-left);
819     }
820     }
821     // corner node from top-right
822     if (faces[1] == 0 && faces[3] == 0) {
823     neighbour.push_back(m_mpiInfo->rank+m_NX+1);
824     const index_t first=m_nodeDistribution[neighbour.back()];
825     offsetInShared.push_back(offsetInShared.back()+1);
826     sendShared.push_back(m_nodeId[m_N0*m_N1-1]);
827     recvShared.push_back(first);
828     }
829     const int numDOF=m_nodeDistribution[m_mpiInfo->rank+1]-m_nodeDistribution[m_mpiInfo->rank];
830 caltinay 3702 /*
831 caltinay 3699 cout << "--- rcv_shcomp ---" << endl;
832     cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
833     for (size_t i=0; i<neighbour.size(); i++) {
834     cout << "neighbor[" << i << "]=" << neighbour[i]
835     << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
836     }
837     for (size_t i=0; i<recvShared.size(); i++) {
838     cout << "shared[" << i << "]=" << recvShared[i] << endl;
839     }
840     cout << "--- snd_shcomp ---" << endl;
841     for (size_t i=0; i<sendShared.size(); i++) {
842     cout << "shared[" << i << "]=" << sendShared[i] << endl;
843     }
844 caltinay 3702 */
845 caltinay 3691
846     Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
847 caltinay 3699 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
848 caltinay 3697 &offsetInShared[0], 1, 0, m_mpiInfo);
849 caltinay 3691 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
850 caltinay 3699 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
851 caltinay 3697 &offsetInShared[0], 1, 0, m_mpiInfo);
852 caltinay 3691 Paso_Connector* connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
853 caltinay 3699 Paso_SharedComponents_free(snd_shcomp);
854     Paso_SharedComponents_free(rcv_shcomp);
855 caltinay 3691
856     // create patterns
857 caltinay 3699 dim_t M, N;
858     IndexVector ptr(1,0);
859     IndexVector index;
860    
861     // main pattern
862     for (index_t i=0; i<numDOF; i++) {
863     // always add the node itself
864     index.push_back(i);
865     int num=insertNeighbours(index, i);
866     ptr.push_back(ptr.back()+num+1);
867     }
868     M=N=ptr.size()-1;
869     // paso will manage the memory
870     index_t* indexC = MEMALLOC(index.size(),index_t);
871     index_t* ptrC = MEMALLOC(ptr.size(), index_t);
872     copy(index.begin(), index.end(), indexC);
873     copy(ptr.begin(), ptr.end(), ptrC);
874 caltinay 3691 Paso_Pattern *mainPattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
875 caltinay 3699 M, N, ptrC, indexC);
876 caltinay 3691
877 caltinay 3702 /*
878 caltinay 3699 cout << "--- main_pattern ---" << endl;
879     cout << "M=" << M << ", N=" << N << endl;
880     for (size_t i=0; i<ptr.size(); i++) {
881     cout << "ptr[" << i << "]=" << ptr[i] << endl;
882     }
883     for (size_t i=0; i<index.size(); i++) {
884     cout << "index[" << i << "]=" << index[i] << endl;
885     }
886 caltinay 3702 */
887 caltinay 3699
888     ptr.clear();
889     index.clear();
890    
891     // column & row couple patterns
892     Paso_Pattern *colCouplePattern, *rowCouplePattern;
893     generateCouplePatterns(&colCouplePattern, &rowCouplePattern);
894    
895     // allocate paso distribution
896     Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
897     const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
898    
899 caltinay 3691 Paso_SystemMatrixPattern* pattern = Paso_SystemMatrixPattern_alloc(
900     MATRIX_FORMAT_DEFAULT, distribution, distribution,
901     mainPattern, colCouplePattern, rowCouplePattern,
902     connector, connector);
903     Paso_Pattern_free(mainPattern);
904     Paso_Pattern_free(colCouplePattern);
905     Paso_Pattern_free(rowCouplePattern);
906     Paso_Distribution_free(distribution);
907 caltinay 3697 return pattern;
908 caltinay 3691 }
909    
910     void Rectangle::Print_Mesh_Info(const bool full) const
911     {
912     RipleyDomain::Print_Mesh_Info(full);
913     if (full) {
914     cout << " Id Coordinates" << endl;
915     cout.precision(15);
916     cout.setf(ios::scientific, ios::floatfield);
917 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
918     pair<double,double> ydy = getFirstCoordAndSpacing(1);
919 caltinay 3691 for (index_t i=0; i < getNumNodes(); i++) {
920     cout << " " << setw(5) << m_nodeId[i]
921 caltinay 3697 << " " << xdx.first+(i%m_N0)*xdx.second
922     << " " << ydy.first+(i/m_N0)*ydy.second << endl;
923 caltinay 3691 }
924     }
925     }
926    
927 caltinay 3697 IndexVector Rectangle::getNumNodesPerDim() const
928     {
929     IndexVector ret;
930     ret.push_back(m_N0);
931     ret.push_back(m_N1);
932     return ret;
933     }
934    
935     IndexVector Rectangle::getNumElementsPerDim() const
936     {
937     IndexVector ret;
938     ret.push_back(m_NE0);
939     ret.push_back(m_NE1);
940     return ret;
941     }
942    
943     IndexVector Rectangle::getNumFacesPerBoundary() const
944     {
945     IndexVector ret(4, 0);
946     //left
947     if (m_offset0==0)
948     ret[0]=m_NE1;
949     //right
950     if (m_mpiInfo->rank%m_NX==m_NX-1)
951     ret[1]=m_NE1;
952     //bottom
953     if (m_offset1==0)
954     ret[2]=m_NE0;
955     //top
956     if (m_mpiInfo->rank/m_NX==m_NY-1)
957     ret[3]=m_NE0;
958     return ret;
959     }
960    
961     pair<double,double> Rectangle::getFirstCoordAndSpacing(dim_t dim) const
962     {
963     if (dim==0) {
964     return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
965     } else if (dim==1) {
966     return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
967     }
968     throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
969     }
970    
971 caltinay 3691 //protected
972     dim_t Rectangle::getNumFaceElements() const
973     {
974 caltinay 3699 const IndexVector faces = getNumFacesPerBoundary();
975 caltinay 3691 dim_t n=0;
976 caltinay 3699 for (size_t i=0; i<faces.size(); i++)
977     n+=faces[i];
978 caltinay 3691 return n;
979     }
980    
981     //protected
982     void Rectangle::assembleCoordinates(escript::Data& arg) const
983     {
984     escriptDataC x = arg.getDataC();
985     int numDim = m_numDim;
986     if (!isDataPointShapeEqual(&x, 1, &numDim))
987     throw RipleyException("setToX: Invalid Data object shape");
988     if (!numSamplesEqual(&x, 1, getNumNodes()))
989     throw RipleyException("setToX: Illegal number of samples in Data object");
990    
991 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
992     pair<double,double> ydy = getFirstCoordAndSpacing(1);
993 caltinay 3691 arg.requireWrite();
994     #pragma omp parallel for
995     for (dim_t i1 = 0; i1 < m_N1; i1++) {
996     for (dim_t i0 = 0; i0 < m_N0; i0++) {
997     double* point = arg.getSampleDataRW(i0+m_N0*i1);
998 caltinay 3697 point[0] = xdx.first+i0*xdx.second;
999     point[1] = ydy.first+i1*ydy.second;
1000 caltinay 3691 }
1001     }
1002     }
1003    
1004     //private
1005     void Rectangle::populateSampleIds()
1006     {
1007 caltinay 3697 // identifiers are ordered from left to right, bottom to top on each rank,
1008     // except for the shared nodes which are owned by the rank below / to the
1009     // left of the current rank
1010    
1011     // build node distribution vector first.
1012     // m_nodeDistribution[i] is the first node id on rank i, that is
1013     // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1014     m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1015     m_nodeDistribution[1]=getNumNodes();
1016     for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
1017     const index_t x=k%m_NX;
1018     const index_t y=k/m_NX;
1019     index_t numNodes=getNumNodes();
1020     if (x>0)
1021     numNodes-=m_N1;
1022     if (y>0)
1023     numNodes-=m_N0;
1024     if (x>0 && y>0)
1025     numNodes++; // subtracted corner twice -> fix that
1026     m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
1027     }
1028     m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1029    
1030 caltinay 3691 m_nodeId.resize(getNumNodes());
1031 caltinay 3697
1032     // the bottom row and left column are not owned by this rank so the
1033     // identifiers need to be computed accordingly
1034     const index_t left = (m_offset0==0 ? 0 : 1);
1035     const index_t bottom = (m_offset1==0 ? 0 : 1);
1036     if (left>0) {
1037     const int neighbour=m_mpiInfo->rank-1;
1038     const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1039 caltinay 3691 #pragma omp parallel for
1040 caltinay 3697 for (dim_t i1=bottom; i1<m_N1; i1++) {
1041     m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
1042     + (i1-bottom+1)*leftN0-1;
1043     }
1044     }
1045     if (bottom>0) {
1046     const int neighbour=m_mpiInfo->rank-m_NX;
1047     const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1048     const index_t bottomN1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1049     #pragma omp parallel for
1050     for (dim_t i0=left; i0<m_N0; i0++) {
1051     m_nodeId[i0]=m_nodeDistribution[neighbour]
1052     + (bottomN1-1)*bottomN0 + i0 - left;
1053     }
1054     }
1055     if (left>0 && bottom>0) {
1056     const int neighbour=m_mpiInfo->rank-m_NX-1;
1057 caltinay 3699 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1058     const index_t N1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1059     m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1-1;
1060 caltinay 3697 }
1061    
1062     // the rest of the id's are contiguous
1063     const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
1064     #pragma omp parallel for
1065     for (dim_t i1=bottom; i1<m_N1; i1++) {
1066     for (dim_t i0=left; i0<m_N0; i0++) {
1067 caltinay 3746 m_nodeId[i0+i1*m_N0] = firstId+i0-left+(i1-bottom)*(m_N0-left);
1068 caltinay 3697 }
1069     }
1070 caltinay 3735 m_nodeTags.assign(getNumNodes(), 0);
1071     updateTagsInUse(Nodes);
1072 caltinay 3697
1073 caltinay 3735 // elements
1074 caltinay 3697 m_elementId.resize(getNumElements());
1075     #pragma omp parallel for
1076     for (dim_t k=0; k<getNumElements(); k++) {
1077     m_elementId[k]=k;
1078     }
1079 caltinay 3735 m_elementTags.assign(getNumElements(), 0);
1080     updateTagsInUse(Elements);
1081 caltinay 3697
1082 caltinay 3704 // face element id's
1083 caltinay 3697 m_faceId.resize(getNumFaceElements());
1084     #pragma omp parallel for
1085     for (dim_t k=0; k<getNumFaceElements(); k++) {
1086     m_faceId[k]=k;
1087     }
1088 caltinay 3704
1089 caltinay 3722 // generate face offset vector and set face tags
1090 caltinay 3704 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1091 caltinay 3722 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
1092     const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
1093 caltinay 3704 m_faceOffset.assign(facesPerEdge.size(), -1);
1094 caltinay 3722 m_faceTags.clear();
1095 caltinay 3704 index_t offset=0;
1096     for (size_t i=0; i<facesPerEdge.size(); i++) {
1097     if (facesPerEdge[i]>0) {
1098     m_faceOffset[i]=offset;
1099     offset+=facesPerEdge[i];
1100 caltinay 3722 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1101 caltinay 3704 }
1102     }
1103 caltinay 3722 setTagMap("left", LEFT);
1104     setTagMap("right", RIGHT);
1105     setTagMap("bottom", BOTTOM);
1106     setTagMap("top", TOP);
1107     updateTagsInUse(FaceElements);
1108 caltinay 3691 }
1109    
1110 caltinay 3699 //private
1111     int Rectangle::insertNeighbours(IndexVector& index, index_t node) const
1112     {
1113     const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1114     const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1115     const int x=node%myN0;
1116     const int y=node/myN0;
1117     int num=0;
1118     if (y>0) {
1119     if (x>0) {
1120     // bottom-left
1121     index.push_back(node-myN0-1);
1122     num++;
1123     }
1124     // bottom
1125     index.push_back(node-myN0);
1126     num++;
1127     if (x<myN0-1) {
1128     // bottom-right
1129     index.push_back(node-myN0+1);
1130     num++;
1131     }
1132     }
1133     if (x<myN0-1) {
1134     // right
1135     index.push_back(node+1);
1136     num++;
1137     if (y<myN1-1) {
1138     // top-right
1139     index.push_back(node+myN0+1);
1140     num++;
1141     }
1142     }
1143     if (y<myN1-1) {
1144     // top
1145     index.push_back(node+myN0);
1146     num++;
1147     if (x>0) {
1148     // top-left
1149     index.push_back(node+myN0-1);
1150     num++;
1151     }
1152     }
1153     if (x>0) {
1154     // left
1155     index.push_back(node-1);
1156     num++;
1157     }
1158    
1159     return num;
1160     }
1161    
1162     //private
1163     void Rectangle::generateCouplePatterns(Paso_Pattern** colPattern, Paso_Pattern** rowPattern) const
1164     {
1165     IndexVector ptr(1,0);
1166     IndexVector index;
1167     const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1168     const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1169     const IndexVector faces=getNumFacesPerBoundary();
1170    
1171     // bottom edge
1172     dim_t n=0;
1173     if (faces[0] == 0) {
1174     index.push_back(2*(myN0+myN1+1));
1175     index.push_back(2*(myN0+myN1+1)+1);
1176     n+=2;
1177     if (faces[2] == 0) {
1178     index.push_back(0);
1179     index.push_back(1);
1180     index.push_back(2);
1181     n+=3;
1182     }
1183     } else if (faces[2] == 0) {
1184     index.push_back(1);
1185     index.push_back(2);
1186     n+=2;
1187     }
1188     // n=neighbours of bottom-left corner node
1189     ptr.push_back(ptr.back()+n);
1190     n=0;
1191     if (faces[2] == 0) {
1192     for (dim_t i=1; i<myN0-1; i++) {
1193     index.push_back(i);
1194     index.push_back(i+1);
1195     index.push_back(i+2);
1196     ptr.push_back(ptr.back()+3);
1197     }
1198     index.push_back(myN0-1);
1199     index.push_back(myN0);
1200     n+=2;
1201     if (faces[1] == 0) {
1202     index.push_back(myN0+1);
1203     index.push_back(myN0+2);
1204     index.push_back(myN0+3);
1205     n+=3;
1206     }
1207     } else {
1208     for (dim_t i=1; i<myN0-1; i++) {
1209     ptr.push_back(ptr.back());
1210     }
1211     if (faces[1] == 0) {
1212     index.push_back(myN0+2);
1213     index.push_back(myN0+3);
1214     n+=2;
1215     }
1216     }
1217     // n=neighbours of bottom-right corner node
1218     ptr.push_back(ptr.back()+n);
1219    
1220     // 2nd row to 2nd last row
1221     for (dim_t i=1; i<myN1-1; i++) {
1222     // left edge
1223     if (faces[0] == 0) {
1224     index.push_back(2*(myN0+myN1+2)-i);
1225     index.push_back(2*(myN0+myN1+2)-i-1);
1226     index.push_back(2*(myN0+myN1+2)-i-2);
1227     ptr.push_back(ptr.back()+3);
1228     } else {
1229     ptr.push_back(ptr.back());
1230     }
1231     for (dim_t j=1; j<myN0-1; j++) {
1232     ptr.push_back(ptr.back());
1233     }
1234     // right edge
1235     if (faces[1] == 0) {
1236     index.push_back(myN0+i+1);
1237     index.push_back(myN0+i+2);
1238     index.push_back(myN0+i+3);
1239     ptr.push_back(ptr.back()+3);
1240     } else {
1241     ptr.push_back(ptr.back());
1242     }
1243     }
1244    
1245     // top edge
1246     n=0;
1247     if (faces[0] == 0) {
1248     index.push_back(2*myN0+myN1+5);
1249     index.push_back(2*myN0+myN1+4);
1250     n+=2;
1251     if (faces[3] == 0) {
1252     index.push_back(2*myN0+myN1+3);
1253     index.push_back(2*myN0+myN1+2);
1254     index.push_back(2*myN0+myN1+1);
1255     n+=3;
1256     }
1257     } else if (faces[3] == 0) {
1258     index.push_back(2*myN0+myN1+2);
1259     index.push_back(2*myN0+myN1+1);
1260     n+=2;
1261     }
1262     // n=neighbours of top-left corner node
1263     ptr.push_back(ptr.back()+n);
1264     n=0;
1265     if (faces[3] == 0) {
1266     for (dim_t i=1; i<myN0-1; i++) {
1267     index.push_back(2*myN0+myN1+i+1);
1268     index.push_back(2*myN0+myN1+i);
1269     index.push_back(2*myN0+myN1+i-1);
1270     ptr.push_back(ptr.back()+3);
1271     }
1272     index.push_back(myN0+myN1+4);
1273     index.push_back(myN0+myN1+3);
1274     n+=2;
1275     if (faces[1] == 0) {
1276     index.push_back(myN0+myN1+2);
1277     index.push_back(myN0+myN1+1);
1278     index.push_back(myN0+myN1);
1279     n+=3;
1280     }
1281     } else {
1282     for (dim_t i=1; i<myN0-1; i++) {
1283     ptr.push_back(ptr.back());
1284     }
1285     if (faces[1] == 0) {
1286     index.push_back(myN0+myN1+1);
1287     index.push_back(myN0+myN1);
1288     n+=2;
1289     }
1290     }
1291     // n=neighbours of top-right corner node
1292     ptr.push_back(ptr.back()+n);
1293    
1294     dim_t M=ptr.size()-1;
1295     map<index_t,index_t> idMap;
1296     dim_t N=0;
1297     for (IndexVector::iterator it=index.begin(); it!=index.end(); it++) {
1298     if (idMap.find(*it)==idMap.end()) {
1299     idMap[*it]=N;
1300     *it=N++;
1301     } else {
1302     *it=idMap[*it];
1303     }
1304     }
1305    
1306 caltinay 3702 /*
1307 caltinay 3699 cout << "--- colCouple_pattern ---" << endl;
1308     cout << "M=" << M << ", N=" << N << endl;
1309     for (size_t i=0; i<ptr.size(); i++) {
1310     cout << "ptr[" << i << "]=" << ptr[i] << endl;
1311     }
1312     for (size_t i=0; i<index.size(); i++) {
1313     cout << "index[" << i << "]=" << index[i] << endl;
1314     }
1315 caltinay 3702 */
1316 caltinay 3699
1317     // now build the row couple pattern
1318     IndexVector ptr2(1,0);
1319     IndexVector index2;
1320     for (dim_t id=0; id<N; id++) {
1321     n=0;
1322     for (dim_t i=0; i<M; i++) {
1323     for (dim_t j=ptr[i]; j<ptr[i+1]; j++) {
1324     if (index[j]==id) {
1325     index2.push_back(i);
1326     n++;
1327     break;
1328     }
1329     }
1330     }
1331     ptr2.push_back(ptr2.back()+n);
1332     }
1333    
1334 caltinay 3702 /*
1335 caltinay 3699 cout << "--- rowCouple_pattern ---" << endl;
1336     cout << "M=" << N << ", N=" << M << endl;
1337     for (size_t i=0; i<ptr2.size(); i++) {
1338     cout << "ptr[" << i << "]=" << ptr2[i] << endl;
1339     }
1340     for (size_t i=0; i<index2.size(); i++) {
1341     cout << "index[" << i << "]=" << index2[i] << endl;
1342     }
1343 caltinay 3702 */
1344 caltinay 3699
1345     // paso will manage the memory
1346     index_t* indexC = MEMALLOC(index.size(), index_t);
1347     index_t* ptrC = MEMALLOC(ptr.size(), index_t);
1348     copy(index.begin(), index.end(), indexC);
1349     copy(ptr.begin(), ptr.end(), ptrC);
1350     *colPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, M, N, ptrC, indexC);
1351    
1352     // paso will manage the memory
1353     indexC = MEMALLOC(index2.size(), index_t);
1354     ptrC = MEMALLOC(ptr2.size(), index_t);
1355     copy(index2.begin(), index2.end(), indexC);
1356     copy(ptr2.begin(), ptr2.end(), ptrC);
1357     *rowPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, N, M, ptrC, indexC);
1358     }
1359    
1360 caltinay 3702 //protected
1361 caltinay 3711 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1362     escript::Data& in, bool reduced) const
1363 caltinay 3702 {
1364     const dim_t numComp = in.getDataPointSize();
1365 caltinay 3711 if (reduced) {
1366 caltinay 3724 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1367     const double c0 = .25;
1368 caltinay 3702 #pragma omp parallel for
1369 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
1370     for (index_t k0=0; k0 < m_NE0; ++k0) {
1371     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1372     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1373     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1374     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1375     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1376     for (index_t i=0; i < numComp; ++i) {
1377 caltinay 3724 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
1378 caltinay 3711 } /* end of component loop i */
1379     } /* end of k0 loop */
1380     } /* end of k1 loop */
1381     /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1382     } else {
1383 caltinay 3724 /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1384     const double c0 = .16666666666666666667;
1385     const double c1 = .044658198738520451079;
1386     const double c2 = .62200846792814621559;
1387 caltinay 3711 #pragma omp parallel for
1388     for (index_t k1=0; k1 < m_NE1; ++k1) {
1389     for (index_t k0=0; k0 < m_NE0; ++k0) {
1390     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1391     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1392     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1393     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1394     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1395     for (index_t i=0; i < numComp; ++i) {
1396 caltinay 3724 o[INDEX2(i,numComp,0)] = f_00[i]*c2 + f_11[i]*c1 + c0*(f_01[i] + f_10[i]);
1397     o[INDEX2(i,numComp,1)] = f_01[i]*c1 + f_10[i]*c2 + c0*(f_00[i] + f_11[i]);
1398     o[INDEX2(i,numComp,2)] = f_01[i]*c2 + f_10[i]*c1 + c0*(f_00[i] + f_11[i]);
1399     o[INDEX2(i,numComp,3)] = f_00[i]*c1 + f_11[i]*c2 + c0*(f_01[i] + f_10[i]);
1400 caltinay 3711 } /* end of component loop i */
1401     } /* end of k0 loop */
1402     } /* end of k1 loop */
1403     /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1404     }
1405 caltinay 3702 }
1406    
1407     //protected
1408 caltinay 3711 void Rectangle::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1409     bool reduced) const
1410 caltinay 3702 {
1411 caltinay 3704 const dim_t numComp = in.getDataPointSize();
1412 caltinay 3711 if (reduced) {
1413 caltinay 3724 const double c0 = .5;
1414     #pragma omp parallel
1415     {
1416     /*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1417     if (m_faceOffset[0] > -1) {
1418     #pragma omp for nowait
1419     for (index_t k1=0; k1 < m_NE1; ++k1) {
1420     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1421     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1422     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1423     for (index_t i=0; i < numComp; ++i) {
1424     o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i]);
1425     } /* end of component loop i */
1426     } /* end of k1 loop */
1427     } /* end of face 0 */
1428     if (m_faceOffset[1] > -1) {
1429     #pragma omp for nowait
1430     for (index_t k1=0; k1 < m_NE1; ++k1) {
1431     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1432     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1433     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1434     for (index_t i=0; i < numComp; ++i) {
1435     o[INDEX2(i,numComp,0)] = c0*(f_10[i] + f_11[i]);
1436     } /* end of component loop i */
1437     } /* end of k1 loop */
1438     } /* end of face 1 */
1439     if (m_faceOffset[2] > -1) {
1440     #pragma omp for nowait
1441     for (index_t k0=0; k0 < m_NE0; ++k0) {
1442     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1443     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1444     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1445     for (index_t i=0; i < numComp; ++i) {
1446     o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_10[i]);
1447     } /* end of component loop i */
1448     } /* end of k0 loop */
1449     } /* end of face 2 */
1450     if (m_faceOffset[3] > -1) {
1451     #pragma omp for nowait
1452     for (index_t k0=0; k0 < m_NE0; ++k0) {
1453     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1454     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1455     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1456     for (index_t i=0; i < numComp; ++i) {
1457     o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_11[i]);
1458     } /* end of component loop i */
1459     } /* end of k0 loop */
1460     } /* end of face 3 */
1461     /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */
1462     } // end of parallel section
1463 caltinay 3711 } else {
1464 caltinay 3724 const double c0 = 0.21132486540518711775;
1465     const double c1 = 0.78867513459481288225;
1466     #pragma omp parallel
1467     {
1468     /*** GENERATOR SNIP_INTERPOLATE_FACES TOP */
1469     if (m_faceOffset[0] > -1) {
1470     #pragma omp for nowait
1471     for (index_t k1=0; k1 < m_NE1; ++k1) {
1472     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1473     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1474     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1475     for (index_t i=0; i < numComp; ++i) {
1476     o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_01[i]*c0;
1477     o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_01[i]*c1;
1478     } /* end of component loop i */
1479     } /* end of k1 loop */
1480     } /* end of face 0 */
1481     if (m_faceOffset[1] > -1) {
1482     #pragma omp for nowait
1483     for (index_t k1=0; k1 < m_NE1; ++k1) {
1484     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1485     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1486     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1487     for (index_t i=0; i < numComp; ++i) {
1488     o[INDEX2(i,numComp,0)] = f_10[i]*c1 + f_11[i]*c0;
1489     o[INDEX2(i,numComp,1)] = f_10[i]*c0 + f_11[i]*c1;
1490     } /* end of component loop i */
1491     } /* end of k1 loop */
1492     } /* end of face 1 */
1493     if (m_faceOffset[2] > -1) {
1494     #pragma omp for nowait
1495     for (index_t k0=0; k0 < m_NE0; ++k0) {
1496     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1497     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1498     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1499     for (index_t i=0; i < numComp; ++i) {
1500     o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_10[i]*c0;
1501     o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_10[i]*c1;
1502     } /* end of component loop i */
1503     } /* end of k0 loop */
1504     } /* end of face 2 */
1505     if (m_faceOffset[3] > -1) {
1506     #pragma omp for nowait
1507     for (index_t k0=0; k0 < m_NE0; ++k0) {
1508     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1509     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1510     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1511     for (index_t i=0; i < numComp; ++i) {
1512     o[INDEX2(i,numComp,0)] = f_01[i]*c1 + f_11[i]*c0;
1513     o[INDEX2(i,numComp,1)] = f_01[i]*c0 + f_11[i]*c1;
1514     } /* end of component loop i */
1515     } /* end of k0 loop */
1516     } /* end of face 3 */
1517     /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */
1518     } // end of parallel section
1519 caltinay 3711 }
1520 caltinay 3702 }
1521    
1522 caltinay 3691 } // end of namespace ripley
1523    

  ViewVC Help
Powered by ViewVC 1.1.26