/[escript]/branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp
ViewVC logotype

Annotation of /branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3744 - (hide annotations)
Tue Dec 13 06:41:54 2011 UTC (8 years ago) by caltinay
File size: 60698 byte(s)
Implemented (Face)Elements->Reduced(Face)Elements interpolation and started
separating DOF and nodes.
Also, implemented operator==() so that a==b for different domain objects a and
b which are in the same state.

1 caltinay 3691
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2011 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     #include <ripley/Rectangle.h>
15     extern "C" {
16     #include "paso/SystemMatrixPattern.h"
17     }
18    
19     #if USE_SILO
20     #include <silo.h>
21     #ifdef ESYS_MPI
22     #include <pmpio.h>
23     #endif
24     #endif
25    
26     #include <iomanip>
27    
28     using namespace std;
29    
30     namespace ripley {
31    
32     Rectangle::Rectangle(int n0, int n1, double l0, double l1, int d0, int d1) :
33     RipleyDomain(2),
34     m_gNE0(n0),
35     m_gNE1(n1),
36     m_l0(l0),
37     m_l1(l1),
38     m_NX(d0),
39     m_NY(d1)
40     {
41     // ensure number of subdivisions is valid and nodes can be distributed
42     // among number of ranks
43     if (m_NX*m_NY != m_mpiInfo->size)
44     throw RipleyException("Invalid number of spatial subdivisions");
45    
46     if (n0%m_NX > 0 || n1%m_NY > 0)
47     throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
48    
49     // local number of elements
50     m_NE0 = n0/m_NX;
51     m_NE1 = n1/m_NY;
52     // local number of nodes (not necessarily owned)
53     m_N0 = m_NE0+1;
54     m_N1 = m_NE1+1;
55     // bottom-left node is at (offset0,offset1) in global mesh
56     m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
57     m_offset1 = m_NE1*(m_mpiInfo->rank/m_NX);
58     populateSampleIds();
59     }
60    
61     Rectangle::~Rectangle()
62     {
63     }
64    
65     string Rectangle::getDescription() const
66     {
67     return "ripley::Rectangle";
68     }
69    
70     bool Rectangle::operator==(const AbstractDomain& other) const
71     {
72 caltinay 3744 const Rectangle* o=dynamic_cast<const Rectangle*>(&other);
73     if (o) {
74     return (RipleyDomain::operator==(other) &&
75     m_gNE0==o->m_gNE0 && m_gNE1==o->m_gNE1
76     && m_l0==o->m_l0 && m_l1==o->m_l1
77     && m_NX==o->m_NX && m_NY==o->m_NY);
78     }
79 caltinay 3691
80     return false;
81     }
82    
83     void Rectangle::dump(const string& fileName) const
84     {
85     #if USE_SILO
86     string fn(fileName);
87     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
88     fn+=".silo";
89     }
90    
91     const int NUM_SILO_FILES = 1;
92     const char* blockDirFmt = "/block%04d";
93     int driver=DB_HDF5;
94     string siloPath;
95     DBfile* dbfile = NULL;
96    
97     #ifdef ESYS_MPI
98     PMPIO_baton_t* baton = NULL;
99     #endif
100    
101     if (m_mpiInfo->size > 1) {
102     #ifdef ESYS_MPI
103     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
104     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
105     PMPIO_DefaultClose, (void*)&driver);
106     // try the fallback driver in case of error
107     if (!baton && driver != DB_PDB) {
108     driver = DB_PDB;
109     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
110     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
111     PMPIO_DefaultClose, (void*)&driver);
112     }
113     if (baton) {
114     char str[64];
115     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
116     siloPath = str;
117     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
118     }
119     #endif
120     } else {
121     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
122     getDescription().c_str(), driver);
123     // try the fallback driver in case of error
124     if (!dbfile && driver != DB_PDB) {
125     driver = DB_PDB;
126     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
127     getDescription().c_str(), driver);
128     }
129     }
130    
131     if (!dbfile)
132     throw RipleyException("dump: Could not create Silo file");
133    
134     /*
135     if (driver==DB_HDF5) {
136     // gzip level 1 already provides good compression with minimal
137     // performance penalty. Some tests showed that gzip levels >3 performed
138     // rather badly on escript data both in terms of time and space
139     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
140     }
141     */
142    
143     boost::scoped_ptr<double> x(new double[m_N0]);
144     boost::scoped_ptr<double> y(new double[m_N1]);
145     double* coords[2] = { x.get(), y.get() };
146 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
147     pair<double,double> ydy = getFirstCoordAndSpacing(1);
148 caltinay 3691 #pragma omp parallel
149     {
150 caltinay 3722 #pragma omp for nowait
151 caltinay 3691 for (dim_t i0 = 0; i0 < m_N0; i0++) {
152 caltinay 3697 coords[0][i0]=xdx.first+i0*xdx.second;
153 caltinay 3691 }
154 caltinay 3722 #pragma omp for nowait
155 caltinay 3691 for (dim_t i1 = 0; i1 < m_N1; i1++) {
156 caltinay 3697 coords[1][i1]=ydy.first+i1*ydy.second;
157 caltinay 3691 }
158     }
159 caltinay 3697 IndexVector dims = getNumNodesPerDim();
160    
161     // write mesh
162     DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE,
163 caltinay 3691 DB_COLLINEAR, NULL);
164    
165 caltinay 3697 // write node ids
166     DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2,
167     NULL, 0, DB_INT, DB_NODECENT, NULL);
168    
169     // write element ids
170     dims = getNumElementsPerDim();
171     DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
172     &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
173    
174     // rank 0 writes multimesh and multivar
175 caltinay 3691 if (m_mpiInfo->rank == 0) {
176     vector<string> tempstrings;
177 caltinay 3697 vector<char*> names;
178 caltinay 3691 for (dim_t i=0; i<m_mpiInfo->size; i++) {
179     stringstream path;
180     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
181     tempstrings.push_back(path.str());
182 caltinay 3697 names.push_back((char*)tempstrings.back().c_str());
183 caltinay 3691 }
184 caltinay 3697 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
185 caltinay 3691 DBSetDir(dbfile, "/");
186 caltinay 3697 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
187     &types[0], NULL);
188     tempstrings.clear();
189     names.clear();
190     for (dim_t i=0; i<m_mpiInfo->size; i++) {
191     stringstream path;
192     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
193     tempstrings.push_back(path.str());
194     names.push_back((char*)tempstrings.back().c_str());
195     }
196     types.assign(m_mpiInfo->size, DB_QUADVAR);
197     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
198     &types[0], NULL);
199     tempstrings.clear();
200     names.clear();
201     for (dim_t i=0; i<m_mpiInfo->size; i++) {
202     stringstream path;
203     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
204     tempstrings.push_back(path.str());
205     names.push_back((char*)tempstrings.back().c_str());
206     }
207     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
208     &types[0], NULL);
209 caltinay 3691 }
210    
211     if (m_mpiInfo->size > 1) {
212     #ifdef ESYS_MPI
213     PMPIO_HandOffBaton(baton, dbfile);
214     PMPIO_Finish(baton);
215     #endif
216     } else {
217     DBClose(dbfile);
218     }
219    
220     #else // USE_SILO
221     throw RipleyException("dump(): no Silo support");
222     #endif
223     }
224    
225 caltinay 3697 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
226 caltinay 3691 {
227 caltinay 3697 switch (fsType) {
228 caltinay 3691 case Nodes:
229     return &m_nodeId[0];
230 caltinay 3744 case DegreesOfFreedom:
231     case ReducedDegreesOfFreedom: //FIXME: reduced
232     return &m_dofId[0];
233 caltinay 3691 case Elements:
234 caltinay 3733 case ReducedElements:
235 caltinay 3691 return &m_elementId[0];
236     case FaceElements:
237 caltinay 3733 case ReducedFaceElements:
238 caltinay 3691 return &m_faceId[0];
239     default:
240     break;
241     }
242    
243     stringstream msg;
244     msg << "borrowSampleReferenceIDs() not implemented for function space type "
245 caltinay 3701 << functionSpaceTypeAsString(fsType);
246 caltinay 3691 throw RipleyException(msg.str());
247     }
248    
249     bool Rectangle::ownSample(int fsCode, index_t id) const
250     {
251     #ifdef ESYS_MPI
252 caltinay 3702 if (fsCode != ReducedNodes) {
253 caltinay 3699 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
254     const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
255 caltinay 3691 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
256 caltinay 3702 } else {
257     stringstream msg;
258     msg << "ownSample() not implemented for "
259     << functionSpaceTypeAsString(fsCode);
260     throw RipleyException(msg.str());
261     }
262 caltinay 3691 #else
263     return true;
264     #endif
265     }
266    
267 caltinay 3702 void Rectangle::setToGradient(escript::Data& out, const escript::Data& cIn) const
268 caltinay 3691 {
269 caltinay 3702 escript::Data& in = *const_cast<escript::Data*>(&cIn);
270 caltinay 3701 const dim_t numComp = in.getDataPointSize();
271 caltinay 3702 const double h0 = m_l0/m_gNE0;
272     const double h1 = m_l1/m_gNE1;
273 caltinay 3724 const double cx0 = -1./h0;
274     const double cx1 = -.78867513459481288225/h0;
275     const double cx2 = -.5/h0;
276     const double cx3 = -.21132486540518711775/h0;
277     const double cx4 = .21132486540518711775/h0;
278     const double cx5 = .5/h0;
279     const double cx6 = .78867513459481288225/h0;
280     const double cx7 = 1./h0;
281     const double cy0 = -1./h1;
282     const double cy1 = -.78867513459481288225/h1;
283     const double cy2 = -.5/h1;
284     const double cy3 = -.21132486540518711775/h1;
285     const double cy4 = .21132486540518711775/h1;
286     const double cy5 = .5/h1;
287     const double cy6 = .78867513459481288225/h1;
288     const double cy7 = 1./h1;
289    
290 caltinay 3702 if (out.getFunctionSpace().getTypeCode() == Elements) {
291 caltinay 3724 /*** GENERATOR SNIP_GRAD_ELEMENTS TOP */
292 caltinay 3697 #pragma omp parallel for
293 caltinay 3707 for (index_t k1=0; k1 < m_NE1; ++k1) {
294     for (index_t k0=0; k0 < m_NE0; ++k0) {
295 caltinay 3702 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
296     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
297     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
298     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
299     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
300     for (index_t i=0; i < numComp; ++i) {
301 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
302     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
303     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
304     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
305     o[INDEX3(i,0,2,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
306     o[INDEX3(i,1,2,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
307     o[INDEX3(i,0,3,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
308     o[INDEX3(i,1,3,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
309 caltinay 3702 } /* end of component loop i */
310     } /* end of k0 loop */
311     } /* end of k1 loop */
312     /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
313 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
314 caltinay 3724 /*** GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
315 caltinay 3711 #pragma omp parallel for
316     for (index_t k1=0; k1 < m_NE1; ++k1) {
317     for (index_t k0=0; k0 < m_NE0; ++k0) {
318     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
319     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
320     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
321     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
322     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
323     for (index_t i=0; i < numComp; ++i) {
324 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
325     o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]);
326 caltinay 3711 } /* end of component loop i */
327     } /* end of k0 loop */
328     } /* end of k1 loop */
329     /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
330 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
331 caltinay 3722 #pragma omp parallel
332     {
333 caltinay 3724 /*** GENERATOR SNIP_GRAD_FACES TOP */
334 caltinay 3722 if (m_faceOffset[0] > -1) {
335     #pragma omp for nowait
336     for (index_t k1=0; k1 < m_NE1; ++k1) {
337     const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
338     const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
339     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
340     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
341     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
342     for (index_t i=0; i < numComp; ++i) {
343 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
344     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
345     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
346     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
347 caltinay 3722 } /* end of component loop i */
348     } /* end of k1 loop */
349     } /* end of face 0 */
350     if (m_faceOffset[1] > -1) {
351     #pragma omp for nowait
352     for (index_t k1=0; k1 < m_NE1; ++k1) {
353     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
354     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
355     const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
356     const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
357     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
358     for (index_t i=0; i < numComp; ++i) {
359 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx1 + f_01[i]*cx3 + f_10[i]*cx6 + f_11[i]*cx4;
360     o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
361     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx3 + f_01[i]*cx1 + f_10[i]*cx4 + f_11[i]*cx6;
362     o[INDEX3(i,1,1,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
363 caltinay 3722 } /* end of component loop i */
364     } /* end of k1 loop */
365     } /* end of face 1 */
366     if (m_faceOffset[2] > -1) {
367     #pragma omp for nowait
368     for (index_t k0=0; k0 < m_NE0; ++k0) {
369     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
370     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
371     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
372     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
373     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
374     for (index_t i=0; i < numComp; ++i) {
375 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
376     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
377     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
378     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
379 caltinay 3722 } /* end of component loop i */
380     } /* end of k0 loop */
381     } /* end of face 2 */
382     if (m_faceOffset[3] > -1) {
383     #pragma omp for nowait
384     for (index_t k0=0; k0 < m_NE0; ++k0) {
385     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
386     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
387     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
388     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
389     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
390     for (index_t i=0; i < numComp; ++i) {
391 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
392     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy1 + f_01[i]*cy6 + f_10[i]*cy3 + f_11[i]*cy4;
393     o[INDEX3(i,0,1,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
394     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*cy3 + f_01[i]*cy4 + f_10[i]*cy1 + f_11[i]*cy6;
395 caltinay 3722 } /* end of component loop i */
396     } /* end of k0 loop */
397     } /* end of face 3 */
398     /* GENERATOR SNIP_GRAD_FACES BOTTOM */
399     } // end of parallel section
400 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
401 caltinay 3722 #pragma omp parallel
402     {
403 caltinay 3724 /*** GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
404 caltinay 3722 if (m_faceOffset[0] > -1) {
405     #pragma omp for nowait
406     for (index_t k1=0; k1 < m_NE1; ++k1) {
407     const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
408     const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
409     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
410     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
411     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
412     for (index_t i=0; i < numComp; ++i) {
413 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
414     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*cy0 + f_01[i]*cy7;
415 caltinay 3722 } /* end of component loop i */
416     } /* end of k1 loop */
417     } /* end of face 0 */
418     if (m_faceOffset[1] > -1) {
419     #pragma omp for nowait
420     for (index_t k1=0; k1 < m_NE1; ++k1) {
421     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
422     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
423     const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
424     const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
425     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
426     for (index_t i=0; i < numComp; ++i) {
427 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = cx5*(f_10[i] + f_11[i]) + cx2*(f_00[i] + f_01[i]);
428     o[INDEX3(i,1,0,numComp,2)] = f_10[i]*cy0 + f_11[i]*cy7;
429 caltinay 3722 } /* end of component loop i */
430     } /* end of k1 loop */
431     } /* end of face 1 */
432     if (m_faceOffset[2] > -1) {
433     #pragma omp for nowait
434     for (index_t k0=0; k0 < m_NE0; ++k0) {
435     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
436     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
437     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
438     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
439     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
440     for (index_t i=0; i < numComp; ++i) {
441 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_00[i]*cx0 + f_10[i]*cx7;
442     o[INDEX3(i,1,0,numComp,2)] = cy2*(f_00[i] + f_10[i]) + cy5*(f_01[i] + f_11[i]);
443 caltinay 3722 } /* end of component loop i */
444     } /* end of k0 loop */
445     } /* end of face 2 */
446     if (m_faceOffset[3] > -1) {
447     #pragma omp for nowait
448     for (index_t k0=0; k0 < m_NE0; ++k0) {
449     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
450     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
451     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
452     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
453     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
454     for (index_t i=0; i < numComp; ++i) {
455 caltinay 3724 o[INDEX3(i,0,0,numComp,2)] = f_01[i]*cx0 + f_11[i]*cx7;
456     o[INDEX3(i,1,0,numComp,2)] = cy5*(f_01[i] + f_11[i]) + cy2*(f_00[i] + f_10[i]);
457 caltinay 3722 } /* end of component loop i */
458     } /* end of k0 loop */
459     } /* end of face 3 */
460     /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
461     } // end of parallel section
462 caltinay 3702 } else {
463 caltinay 3707 stringstream msg;
464     msg << "setToGradient() not implemented for "
465     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
466     throw RipleyException(msg.str());
467 caltinay 3702 }
468 caltinay 3701 }
469 caltinay 3697
470 caltinay 3713 void Rectangle::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
471     {
472     escript::Data& in = *const_cast<escript::Data*>(&arg);
473     const dim_t numComp = in.getDataPointSize();
474     const double h0 = m_l0/m_gNE0;
475     const double h1 = m_l1/m_gNE1;
476     if (arg.getFunctionSpace().getTypeCode() == Elements) {
477     const double w_0 = h0*h1/4.;
478     #pragma omp parallel
479     {
480     vector<double> int_local(numComp, 0);
481 caltinay 3722 #pragma omp for nowait
482 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
483     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
484     const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
485     for (index_t i=0; i < numComp; ++i) {
486     const register double f_0 = f[INDEX2(i,0,numComp)];
487     const register double f_1 = f[INDEX2(i,1,numComp)];
488     const register double f_2 = f[INDEX2(i,2,numComp)];
489     const register double f_3 = f[INDEX2(i,3,numComp)];
490     int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
491     } /* end of component loop i */
492     } /* end of k0 loop */
493     } /* end of k1 loop */
494    
495     #pragma omp critical
496     for (index_t i=0; i<numComp; i++)
497     integrals[i]+=int_local[i];
498 caltinay 3722 } // end of parallel section
499 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
500     const double w_0 = h0*h1;
501     #pragma omp parallel
502     {
503     vector<double> int_local(numComp, 0);
504 caltinay 3722 #pragma omp for nowait
505 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
506     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
507     const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
508     for (index_t i=0; i < numComp; ++i) {
509     int_local[i]+=f[i]*w_0;
510     } /* end of component loop i */
511     } /* end of k0 loop */
512     } /* end of k1 loop */
513    
514     #pragma omp critical
515     for (index_t i=0; i<numComp; i++)
516     integrals[i]+=int_local[i];
517 caltinay 3722 } // end of parallel section
518 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
519     const double w_0 = h0/2.;
520     const double w_1 = h1/2.;
521     #pragma omp parallel
522     {
523     vector<double> int_local(numComp, 0);
524     if (m_faceOffset[0] > -1) {
525 caltinay 3722 #pragma omp for nowait
526 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
527     const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
528     for (index_t i=0; i < numComp; ++i) {
529     const register double f_0 = f[INDEX2(i,0,numComp)];
530     const register double f_1 = f[INDEX2(i,1,numComp)];
531     int_local[i]+=(f_0+f_1)*w_1;
532     } /* end of component loop i */
533     } /* end of k1 loop */
534     }
535    
536     if (m_faceOffset[1] > -1) {
537 caltinay 3722 #pragma omp for nowait
538 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
539     const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
540     for (index_t i=0; i < numComp; ++i) {
541     const register double f_0 = f[INDEX2(i,0,numComp)];
542     const register double f_1 = f[INDEX2(i,1,numComp)];
543     int_local[i]+=(f_0+f_1)*w_1;
544     } /* end of component loop i */
545     } /* end of k1 loop */
546     }
547    
548     if (m_faceOffset[2] > -1) {
549 caltinay 3722 #pragma omp for nowait
550 caltinay 3713 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
551     const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
552     for (index_t i=0; i < numComp; ++i) {
553     const register double f_0 = f[INDEX2(i,0,numComp)];
554     const register double f_1 = f[INDEX2(i,1,numComp)];
555     int_local[i]+=(f_0+f_1)*w_0;
556     } /* end of component loop i */
557     } /* end of k0 loop */
558     }
559    
560     if (m_faceOffset[3] > -1) {
561 caltinay 3722 #pragma omp for nowait
562 caltinay 3713 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
563     const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
564     for (index_t i=0; i < numComp; ++i) {
565     const register double f_0 = f[INDEX2(i,0,numComp)];
566     const register double f_1 = f[INDEX2(i,1,numComp)];
567     int_local[i]+=(f_0+f_1)*w_0;
568     } /* end of component loop i */
569     } /* end of k0 loop */
570     }
571    
572     #pragma omp critical
573     for (index_t i=0; i<numComp; i++)
574     integrals[i]+=int_local[i];
575 caltinay 3722 } // end of parallel section
576 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
577     #pragma omp parallel
578     {
579     vector<double> int_local(numComp, 0);
580     if (m_faceOffset[0] > -1) {
581 caltinay 3722 #pragma omp for nowait
582 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
583     const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
584     for (index_t i=0; i < numComp; ++i) {
585     int_local[i]+=f[i]*h1;
586     } /* end of component loop i */
587     } /* end of k1 loop */
588     }
589    
590     if (m_faceOffset[1] > -1) {
591 caltinay 3722 #pragma omp for nowait
592 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
593     const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
594     for (index_t i=0; i < numComp; ++i) {
595     int_local[i]+=f[i]*h1;
596     } /* end of component loop i */
597     } /* end of k1 loop */
598     }
599    
600     if (m_faceOffset[2] > -1) {
601 caltinay 3722 #pragma omp for nowait
602 caltinay 3713 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
603     const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
604     for (index_t i=0; i < numComp; ++i) {
605     int_local[i]+=f[i]*h0;
606     } /* end of component loop i */
607     } /* end of k0 loop */
608     }
609    
610     if (m_faceOffset[3] > -1) {
611 caltinay 3722 #pragma omp for nowait
612 caltinay 3713 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
613     const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
614     for (index_t i=0; i < numComp; ++i) {
615     int_local[i]+=f[i]*h0;
616     } /* end of component loop i */
617     } /* end of k0 loop */
618     }
619    
620     #pragma omp critical
621     for (index_t i=0; i<numComp; i++)
622     integrals[i]+=int_local[i];
623 caltinay 3722 } // end of parallel section
624 caltinay 3713 } else {
625     stringstream msg;
626     msg << "setToIntegrals() not implemented for "
627     << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
628     throw RipleyException(msg.str());
629     }
630     }
631    
632 caltinay 3722 void Rectangle::setToNormal(escript::Data& out) const
633     {
634     if (out.getFunctionSpace().getTypeCode() == FaceElements) {
635     #pragma omp parallel
636     {
637     if (m_faceOffset[0] > -1) {
638     #pragma omp for nowait
639     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
640     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
641     // set vector at two quadrature points
642     *o++ = -1.;
643     *o++ = 0.;
644     *o++ = -1.;
645     *o = 0.;
646     }
647     }
648    
649     if (m_faceOffset[1] > -1) {
650     #pragma omp for nowait
651     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
652     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
653     // set vector at two quadrature points
654     *o++ = 1.;
655     *o++ = 0.;
656     *o++ = 1.;
657     *o = 0.;
658     }
659     }
660    
661     if (m_faceOffset[2] > -1) {
662     #pragma omp for nowait
663     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
664     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
665     // set vector at two quadrature points
666     *o++ = 0.;
667     *o++ = -1.;
668     *o++ = 0.;
669     *o = -1.;
670     }
671     }
672    
673     if (m_faceOffset[3] > -1) {
674     #pragma omp for nowait
675     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
676     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
677     // set vector at two quadrature points
678     *o++ = 0.;
679     *o++ = 1.;
680     *o++ = 0.;
681     *o = 1.;
682     }
683     }
684     } // end of parallel section
685     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
686     #pragma omp parallel
687     {
688     if (m_faceOffset[0] > -1) {
689     #pragma omp for nowait
690     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
691     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
692     *o++ = -1.;
693     *o = 0.;
694     }
695     }
696    
697     if (m_faceOffset[1] > -1) {
698     #pragma omp for nowait
699     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
700     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
701     *o++ = 1.;
702     *o = 0.;
703     }
704     }
705    
706     if (m_faceOffset[2] > -1) {
707     #pragma omp for nowait
708     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
709     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
710     *o++ = 0.;
711     *o = -1.;
712     }
713     }
714    
715     if (m_faceOffset[3] > -1) {
716     #pragma omp for nowait
717     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
718     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
719     *o++ = 0.;
720     *o = 1.;
721     }
722     }
723     } // end of parallel section
724    
725     } else {
726     stringstream msg;
727     msg << "setToNormal() not implemented for "
728     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
729     throw RipleyException(msg.str());
730     }
731     }
732    
733 caltinay 3701 void Rectangle::addPDEToSystem(escript::AbstractSystemMatrix& mat,
734     escript::Data& rhs, const escript::Data& A, const escript::Data& B,
735     const escript::Data& C, const escript::Data& D,
736     const escript::Data& X, const escript::Data& Y,
737     const escript::Data& d, const escript::Data& y,
738     const escript::Data& d_contact, const escript::Data& y_contact,
739     const escript::Data& d_dirac, const escript::Data& y_dirac) const
740     {
741     throw RipleyException("addPDEToSystem() not implemented");
742     }
743    
744 caltinay 3691 Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder,
745     bool reducedColOrder) const
746     {
747     if (reducedRowOrder || reducedColOrder)
748     throw RipleyException("getPattern() not implemented for reduced order");
749    
750 caltinay 3699 // connector
751     RankVector neighbour;
752     IndexVector offsetInShared(1,0);
753     IndexVector sendShared, recvShared;
754     const IndexVector faces=getNumFacesPerBoundary();
755     const index_t left = (m_offset0==0 ? 0 : 1);
756     const index_t bottom = (m_offset1==0 ? 0 : 1);
757     // corner node from bottom-left
758     if (faces[0] == 0 && faces[2] == 0) {
759     neighbour.push_back(m_mpiInfo->rank-m_NX-1);
760     offsetInShared.push_back(offsetInShared.back()+1);
761     sendShared.push_back(m_nodeId[m_N0+left]);
762     recvShared.push_back(m_nodeId[0]);
763     }
764     // bottom edge
765     if (faces[2] == 0) {
766     neighbour.push_back(m_mpiInfo->rank-m_NX);
767     offsetInShared.push_back(offsetInShared.back()+m_N0-left);
768     for (dim_t i=left; i<m_N0; i++) {
769     // easy case, we know the neighbour id's
770     sendShared.push_back(m_nodeId[m_N0+i]);
771     recvShared.push_back(m_nodeId[i]);
772     }
773     }
774     // corner node from bottom-right
775     if (faces[1] == 0 && faces[2] == 0) {
776     neighbour.push_back(m_mpiInfo->rank-m_NX+1);
777     const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
778     const index_t N1=(neighbour.back()/m_NX == 0 ? m_N1 : m_N1-1);
779     const index_t first=m_nodeDistribution[neighbour.back()];
780     offsetInShared.push_back(offsetInShared.back()+1);
781     sendShared.push_back(m_nodeId[(bottom+1)*m_N0-1]);
782     recvShared.push_back(first+N0*(N1-1));
783     }
784     // left edge
785     if (faces[0] == 0) {
786     neighbour.push_back(m_mpiInfo->rank-1);
787     offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
788     for (dim_t i=bottom; i<m_N1; i++) {
789     // easy case, we know the neighbour id's
790     sendShared.push_back(m_nodeId[i*m_N0+1]);
791     recvShared.push_back(m_nodeId[i*m_N0]);
792     }
793     }
794     // right edge
795     if (faces[1] == 0) {
796     neighbour.push_back(m_mpiInfo->rank+1);
797     const index_t rightN0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
798     const index_t first=m_nodeDistribution[neighbour.back()];
799     offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
800     for (dim_t i=bottom; i<m_N1; i++) {
801     sendShared.push_back(m_nodeId[(i+1)*m_N0-1]);
802     recvShared.push_back(first+rightN0*(i-bottom));
803     }
804     }
805     // corner node from top-left
806     if (faces[0] == 0 && faces[3] == 0) {
807     neighbour.push_back(m_mpiInfo->rank+m_NX-1);
808     const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
809     const index_t first=m_nodeDistribution[neighbour.back()];
810     offsetInShared.push_back(offsetInShared.back()+1);
811     sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+left]);
812     recvShared.push_back(first+N0-1);
813     }
814     // top edge
815     if (faces[3] == 0) {
816     neighbour.push_back(m_mpiInfo->rank+m_NX);
817     const index_t first=m_nodeDistribution[neighbour.back()];
818     offsetInShared.push_back(offsetInShared.back()+m_N0-left);
819     for (dim_t i=left; i<m_N0; i++) {
820     sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+i]);
821     recvShared.push_back(first+i-left);
822     }
823     }
824     // corner node from top-right
825     if (faces[1] == 0 && faces[3] == 0) {
826     neighbour.push_back(m_mpiInfo->rank+m_NX+1);
827     const index_t first=m_nodeDistribution[neighbour.back()];
828     offsetInShared.push_back(offsetInShared.back()+1);
829     sendShared.push_back(m_nodeId[m_N0*m_N1-1]);
830     recvShared.push_back(first);
831     }
832     const int numDOF=m_nodeDistribution[m_mpiInfo->rank+1]-m_nodeDistribution[m_mpiInfo->rank];
833 caltinay 3702 /*
834 caltinay 3699 cout << "--- rcv_shcomp ---" << endl;
835     cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
836     for (size_t i=0; i<neighbour.size(); i++) {
837     cout << "neighbor[" << i << "]=" << neighbour[i]
838     << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
839     }
840     for (size_t i=0; i<recvShared.size(); i++) {
841     cout << "shared[" << i << "]=" << recvShared[i] << endl;
842     }
843     cout << "--- snd_shcomp ---" << endl;
844     for (size_t i=0; i<sendShared.size(); i++) {
845     cout << "shared[" << i << "]=" << sendShared[i] << endl;
846     }
847 caltinay 3702 */
848 caltinay 3691
849     Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
850 caltinay 3699 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
851 caltinay 3697 &offsetInShared[0], 1, 0, m_mpiInfo);
852 caltinay 3691 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
853 caltinay 3699 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
854 caltinay 3697 &offsetInShared[0], 1, 0, m_mpiInfo);
855 caltinay 3691 Paso_Connector* connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
856 caltinay 3699 Paso_SharedComponents_free(snd_shcomp);
857     Paso_SharedComponents_free(rcv_shcomp);
858 caltinay 3691
859     // create patterns
860 caltinay 3699 dim_t M, N;
861     IndexVector ptr(1,0);
862     IndexVector index;
863    
864     // main pattern
865     for (index_t i=0; i<numDOF; i++) {
866     // always add the node itself
867     index.push_back(i);
868     int num=insertNeighbours(index, i);
869     ptr.push_back(ptr.back()+num+1);
870     }
871     M=N=ptr.size()-1;
872     // paso will manage the memory
873     index_t* indexC = MEMALLOC(index.size(),index_t);
874     index_t* ptrC = MEMALLOC(ptr.size(), index_t);
875     copy(index.begin(), index.end(), indexC);
876     copy(ptr.begin(), ptr.end(), ptrC);
877 caltinay 3691 Paso_Pattern *mainPattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
878 caltinay 3699 M, N, ptrC, indexC);
879 caltinay 3691
880 caltinay 3702 /*
881 caltinay 3699 cout << "--- main_pattern ---" << endl;
882     cout << "M=" << M << ", N=" << N << endl;
883     for (size_t i=0; i<ptr.size(); i++) {
884     cout << "ptr[" << i << "]=" << ptr[i] << endl;
885     }
886     for (size_t i=0; i<index.size(); i++) {
887     cout << "index[" << i << "]=" << index[i] << endl;
888     }
889 caltinay 3702 */
890 caltinay 3699
891     ptr.clear();
892     index.clear();
893    
894     // column & row couple patterns
895     Paso_Pattern *colCouplePattern, *rowCouplePattern;
896     generateCouplePatterns(&colCouplePattern, &rowCouplePattern);
897    
898     // allocate paso distribution
899     Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
900     const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
901    
902 caltinay 3691 Paso_SystemMatrixPattern* pattern = Paso_SystemMatrixPattern_alloc(
903     MATRIX_FORMAT_DEFAULT, distribution, distribution,
904     mainPattern, colCouplePattern, rowCouplePattern,
905     connector, connector);
906     Paso_Pattern_free(mainPattern);
907     Paso_Pattern_free(colCouplePattern);
908     Paso_Pattern_free(rowCouplePattern);
909     Paso_Distribution_free(distribution);
910 caltinay 3697 return pattern;
911 caltinay 3691 }
912    
913     void Rectangle::Print_Mesh_Info(const bool full) const
914     {
915     RipleyDomain::Print_Mesh_Info(full);
916     if (full) {
917     cout << " Id Coordinates" << endl;
918     cout.precision(15);
919     cout.setf(ios::scientific, ios::floatfield);
920 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
921     pair<double,double> ydy = getFirstCoordAndSpacing(1);
922 caltinay 3691 for (index_t i=0; i < getNumNodes(); i++) {
923     cout << " " << setw(5) << m_nodeId[i]
924 caltinay 3697 << " " << xdx.first+(i%m_N0)*xdx.second
925     << " " << ydy.first+(i/m_N0)*ydy.second << endl;
926 caltinay 3691 }
927     }
928     }
929    
930 caltinay 3697 IndexVector Rectangle::getNumNodesPerDim() const
931     {
932     IndexVector ret;
933     ret.push_back(m_N0);
934     ret.push_back(m_N1);
935     return ret;
936     }
937    
938     IndexVector Rectangle::getNumElementsPerDim() const
939     {
940     IndexVector ret;
941     ret.push_back(m_NE0);
942     ret.push_back(m_NE1);
943     return ret;
944     }
945    
946     IndexVector Rectangle::getNumFacesPerBoundary() const
947     {
948     IndexVector ret(4, 0);
949     //left
950     if (m_offset0==0)
951     ret[0]=m_NE1;
952     //right
953     if (m_mpiInfo->rank%m_NX==m_NX-1)
954     ret[1]=m_NE1;
955     //bottom
956     if (m_offset1==0)
957     ret[2]=m_NE0;
958     //top
959     if (m_mpiInfo->rank/m_NX==m_NY-1)
960     ret[3]=m_NE0;
961     return ret;
962     }
963    
964     pair<double,double> Rectangle::getFirstCoordAndSpacing(dim_t dim) const
965     {
966     if (dim==0) {
967     return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
968     } else if (dim==1) {
969     return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
970     }
971     throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
972     }
973    
974 caltinay 3691 //protected
975 caltinay 3744 dim_t Rectangle::getNumDOF() const
976     {
977     return m_nodeDistribution[m_mpiInfo->rank+1]
978     -m_nodeDistribution[m_mpiInfo->rank];
979     }
980    
981     //protected
982 caltinay 3691 dim_t Rectangle::getNumFaceElements() const
983     {
984 caltinay 3699 const IndexVector faces = getNumFacesPerBoundary();
985 caltinay 3691 dim_t n=0;
986 caltinay 3699 for (size_t i=0; i<faces.size(); i++)
987     n+=faces[i];
988 caltinay 3691 return n;
989     }
990    
991     //protected
992     void Rectangle::assembleCoordinates(escript::Data& arg) const
993     {
994     escriptDataC x = arg.getDataC();
995     int numDim = m_numDim;
996     if (!isDataPointShapeEqual(&x, 1, &numDim))
997     throw RipleyException("setToX: Invalid Data object shape");
998     if (!numSamplesEqual(&x, 1, getNumNodes()))
999     throw RipleyException("setToX: Illegal number of samples in Data object");
1000    
1001 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1002     pair<double,double> ydy = getFirstCoordAndSpacing(1);
1003 caltinay 3691 arg.requireWrite();
1004     #pragma omp parallel for
1005     for (dim_t i1 = 0; i1 < m_N1; i1++) {
1006     for (dim_t i0 = 0; i0 < m_N0; i0++) {
1007     double* point = arg.getSampleDataRW(i0+m_N0*i1);
1008 caltinay 3697 point[0] = xdx.first+i0*xdx.second;
1009     point[1] = ydy.first+i1*ydy.second;
1010 caltinay 3691 }
1011     }
1012     }
1013    
1014     //private
1015     void Rectangle::populateSampleIds()
1016     {
1017 caltinay 3697 // identifiers are ordered from left to right, bottom to top on each rank,
1018     // except for the shared nodes which are owned by the rank below / to the
1019     // left of the current rank
1020    
1021     // build node distribution vector first.
1022     // m_nodeDistribution[i] is the first node id on rank i, that is
1023     // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1024     m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1025     m_nodeDistribution[1]=getNumNodes();
1026     for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
1027     const index_t x=k%m_NX;
1028     const index_t y=k/m_NX;
1029     index_t numNodes=getNumNodes();
1030     if (x>0)
1031     numNodes-=m_N1;
1032     if (y>0)
1033     numNodes-=m_N0;
1034     if (x>0 && y>0)
1035     numNodes++; // subtracted corner twice -> fix that
1036     m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
1037     }
1038     m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1039    
1040 caltinay 3744 m_dofId.resize(getNumDOF());
1041 caltinay 3691 m_nodeId.resize(getNumNodes());
1042 caltinay 3697
1043     // the bottom row and left column are not owned by this rank so the
1044     // identifiers need to be computed accordingly
1045     const index_t left = (m_offset0==0 ? 0 : 1);
1046     const index_t bottom = (m_offset1==0 ? 0 : 1);
1047     if (left>0) {
1048     const int neighbour=m_mpiInfo->rank-1;
1049     const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1050 caltinay 3691 #pragma omp parallel for
1051 caltinay 3697 for (dim_t i1=bottom; i1<m_N1; i1++) {
1052     m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
1053     + (i1-bottom+1)*leftN0-1;
1054     }
1055     }
1056     if (bottom>0) {
1057     const int neighbour=m_mpiInfo->rank-m_NX;
1058     const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1059     const index_t bottomN1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1060     #pragma omp parallel for
1061     for (dim_t i0=left; i0<m_N0; i0++) {
1062     m_nodeId[i0]=m_nodeDistribution[neighbour]
1063     + (bottomN1-1)*bottomN0 + i0 - left;
1064     }
1065     }
1066     if (left>0 && bottom>0) {
1067     const int neighbour=m_mpiInfo->rank-m_NX-1;
1068 caltinay 3699 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1069     const index_t N1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1070     m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1-1;
1071 caltinay 3697 }
1072    
1073     // the rest of the id's are contiguous
1074     const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
1075     #pragma omp parallel for
1076     for (dim_t i1=bottom; i1<m_N1; i1++) {
1077     for (dim_t i0=left; i0<m_N0; i0++) {
1078 caltinay 3744 const index_t idx=i0-left+(i1-bottom)*(m_N0-left);
1079     m_nodeId[i0+i1*m_N0] = firstId+idx;
1080     m_dofId[idx] = firstId+idx;
1081 caltinay 3697 }
1082     }
1083 caltinay 3735 m_nodeTags.assign(getNumNodes(), 0);
1084     updateTagsInUse(Nodes);
1085 caltinay 3697
1086 caltinay 3735 // elements
1087 caltinay 3697 m_elementId.resize(getNumElements());
1088     #pragma omp parallel for
1089     for (dim_t k=0; k<getNumElements(); k++) {
1090     m_elementId[k]=k;
1091     }
1092 caltinay 3735 m_elementTags.assign(getNumElements(), 0);
1093     updateTagsInUse(Elements);
1094 caltinay 3697
1095 caltinay 3704 // face element id's
1096 caltinay 3697 m_faceId.resize(getNumFaceElements());
1097     #pragma omp parallel for
1098     for (dim_t k=0; k<getNumFaceElements(); k++) {
1099     m_faceId[k]=k;
1100     }
1101 caltinay 3704
1102 caltinay 3722 // generate face offset vector and set face tags
1103 caltinay 3704 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1104 caltinay 3722 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
1105     const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
1106 caltinay 3704 m_faceOffset.assign(facesPerEdge.size(), -1);
1107 caltinay 3722 m_faceTags.clear();
1108 caltinay 3704 index_t offset=0;
1109     for (size_t i=0; i<facesPerEdge.size(); i++) {
1110     if (facesPerEdge[i]>0) {
1111     m_faceOffset[i]=offset;
1112     offset+=facesPerEdge[i];
1113 caltinay 3722 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1114 caltinay 3704 }
1115     }
1116 caltinay 3722 setTagMap("left", LEFT);
1117     setTagMap("right", RIGHT);
1118     setTagMap("bottom", BOTTOM);
1119     setTagMap("top", TOP);
1120     updateTagsInUse(FaceElements);
1121 caltinay 3691 }
1122    
1123 caltinay 3699 //private
1124     int Rectangle::insertNeighbours(IndexVector& index, index_t node) const
1125     {
1126     const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1127     const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1128     const int x=node%myN0;
1129     const int y=node/myN0;
1130     int num=0;
1131     if (y>0) {
1132     if (x>0) {
1133     // bottom-left
1134     index.push_back(node-myN0-1);
1135     num++;
1136     }
1137     // bottom
1138     index.push_back(node-myN0);
1139     num++;
1140     if (x<myN0-1) {
1141     // bottom-right
1142     index.push_back(node-myN0+1);
1143     num++;
1144     }
1145     }
1146     if (x<myN0-1) {
1147     // right
1148     index.push_back(node+1);
1149     num++;
1150     if (y<myN1-1) {
1151     // top-right
1152     index.push_back(node+myN0+1);
1153     num++;
1154     }
1155     }
1156     if (y<myN1-1) {
1157     // top
1158     index.push_back(node+myN0);
1159     num++;
1160     if (x>0) {
1161     // top-left
1162     index.push_back(node+myN0-1);
1163     num++;
1164     }
1165     }
1166     if (x>0) {
1167     // left
1168     index.push_back(node-1);
1169     num++;
1170     }
1171    
1172     return num;
1173     }
1174    
1175     //private
1176     void Rectangle::generateCouplePatterns(Paso_Pattern** colPattern, Paso_Pattern** rowPattern) const
1177     {
1178     IndexVector ptr(1,0);
1179     IndexVector index;
1180     const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1181     const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1182     const IndexVector faces=getNumFacesPerBoundary();
1183    
1184     // bottom edge
1185     dim_t n=0;
1186     if (faces[0] == 0) {
1187     index.push_back(2*(myN0+myN1+1));
1188     index.push_back(2*(myN0+myN1+1)+1);
1189     n+=2;
1190     if (faces[2] == 0) {
1191     index.push_back(0);
1192     index.push_back(1);
1193     index.push_back(2);
1194     n+=3;
1195     }
1196     } else if (faces[2] == 0) {
1197     index.push_back(1);
1198     index.push_back(2);
1199     n+=2;
1200     }
1201     // n=neighbours of bottom-left corner node
1202     ptr.push_back(ptr.back()+n);
1203     n=0;
1204     if (faces[2] == 0) {
1205     for (dim_t i=1; i<myN0-1; i++) {
1206     index.push_back(i);
1207     index.push_back(i+1);
1208     index.push_back(i+2);
1209     ptr.push_back(ptr.back()+3);
1210     }
1211     index.push_back(myN0-1);
1212     index.push_back(myN0);
1213     n+=2;
1214     if (faces[1] == 0) {
1215     index.push_back(myN0+1);
1216     index.push_back(myN0+2);
1217     index.push_back(myN0+3);
1218     n+=3;
1219     }
1220     } else {
1221     for (dim_t i=1; i<myN0-1; i++) {
1222     ptr.push_back(ptr.back());
1223     }
1224     if (faces[1] == 0) {
1225     index.push_back(myN0+2);
1226     index.push_back(myN0+3);
1227     n+=2;
1228     }
1229     }
1230     // n=neighbours of bottom-right corner node
1231     ptr.push_back(ptr.back()+n);
1232    
1233     // 2nd row to 2nd last row
1234     for (dim_t i=1; i<myN1-1; i++) {
1235     // left edge
1236     if (faces[0] == 0) {
1237     index.push_back(2*(myN0+myN1+2)-i);
1238     index.push_back(2*(myN0+myN1+2)-i-1);
1239     index.push_back(2*(myN0+myN1+2)-i-2);
1240     ptr.push_back(ptr.back()+3);
1241     } else {
1242     ptr.push_back(ptr.back());
1243     }
1244     for (dim_t j=1; j<myN0-1; j++) {
1245     ptr.push_back(ptr.back());
1246     }
1247     // right edge
1248     if (faces[1] == 0) {
1249     index.push_back(myN0+i+1);
1250     index.push_back(myN0+i+2);
1251     index.push_back(myN0+i+3);
1252     ptr.push_back(ptr.back()+3);
1253     } else {
1254     ptr.push_back(ptr.back());
1255     }
1256     }
1257    
1258     // top edge
1259     n=0;
1260     if (faces[0] == 0) {
1261     index.push_back(2*myN0+myN1+5);
1262     index.push_back(2*myN0+myN1+4);
1263     n+=2;
1264     if (faces[3] == 0) {
1265     index.push_back(2*myN0+myN1+3);
1266     index.push_back(2*myN0+myN1+2);
1267     index.push_back(2*myN0+myN1+1);
1268     n+=3;
1269     }
1270     } else if (faces[3] == 0) {
1271     index.push_back(2*myN0+myN1+2);
1272     index.push_back(2*myN0+myN1+1);
1273     n+=2;
1274     }
1275     // n=neighbours of top-left corner node
1276     ptr.push_back(ptr.back()+n);
1277     n=0;
1278     if (faces[3] == 0) {
1279     for (dim_t i=1; i<myN0-1; i++) {
1280     index.push_back(2*myN0+myN1+i+1);
1281     index.push_back(2*myN0+myN1+i);
1282     index.push_back(2*myN0+myN1+i-1);
1283     ptr.push_back(ptr.back()+3);
1284     }
1285     index.push_back(myN0+myN1+4);
1286     index.push_back(myN0+myN1+3);
1287     n+=2;
1288     if (faces[1] == 0) {
1289     index.push_back(myN0+myN1+2);
1290     index.push_back(myN0+myN1+1);
1291     index.push_back(myN0+myN1);
1292     n+=3;
1293     }
1294     } else {
1295     for (dim_t i=1; i<myN0-1; i++) {
1296     ptr.push_back(ptr.back());
1297     }
1298     if (faces[1] == 0) {
1299     index.push_back(myN0+myN1+1);
1300     index.push_back(myN0+myN1);
1301     n+=2;
1302     }
1303     }
1304     // n=neighbours of top-right corner node
1305     ptr.push_back(ptr.back()+n);
1306    
1307     dim_t M=ptr.size()-1;
1308     map<index_t,index_t> idMap;
1309     dim_t N=0;
1310     for (IndexVector::iterator it=index.begin(); it!=index.end(); it++) {
1311     if (idMap.find(*it)==idMap.end()) {
1312     idMap[*it]=N;
1313     *it=N++;
1314     } else {
1315     *it=idMap[*it];
1316     }
1317     }
1318    
1319 caltinay 3702 /*
1320 caltinay 3699 cout << "--- colCouple_pattern ---" << endl;
1321     cout << "M=" << M << ", N=" << N << endl;
1322     for (size_t i=0; i<ptr.size(); i++) {
1323     cout << "ptr[" << i << "]=" << ptr[i] << endl;
1324     }
1325     for (size_t i=0; i<index.size(); i++) {
1326     cout << "index[" << i << "]=" << index[i] << endl;
1327     }
1328 caltinay 3702 */
1329 caltinay 3699
1330     // now build the row couple pattern
1331     IndexVector ptr2(1,0);
1332     IndexVector index2;
1333     for (dim_t id=0; id<N; id++) {
1334     n=0;
1335     for (dim_t i=0; i<M; i++) {
1336     for (dim_t j=ptr[i]; j<ptr[i+1]; j++) {
1337     if (index[j]==id) {
1338     index2.push_back(i);
1339     n++;
1340     break;
1341     }
1342     }
1343     }
1344     ptr2.push_back(ptr2.back()+n);
1345     }
1346    
1347 caltinay 3702 /*
1348 caltinay 3699 cout << "--- rowCouple_pattern ---" << endl;
1349     cout << "M=" << N << ", N=" << M << endl;
1350     for (size_t i=0; i<ptr2.size(); i++) {
1351     cout << "ptr[" << i << "]=" << ptr2[i] << endl;
1352     }
1353     for (size_t i=0; i<index2.size(); i++) {
1354     cout << "index[" << i << "]=" << index2[i] << endl;
1355     }
1356 caltinay 3702 */
1357 caltinay 3699
1358     // paso will manage the memory
1359     index_t* indexC = MEMALLOC(index.size(), index_t);
1360     index_t* ptrC = MEMALLOC(ptr.size(), index_t);
1361     copy(index.begin(), index.end(), indexC);
1362     copy(ptr.begin(), ptr.end(), ptrC);
1363     *colPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, M, N, ptrC, indexC);
1364    
1365     // paso will manage the memory
1366     indexC = MEMALLOC(index2.size(), index_t);
1367     ptrC = MEMALLOC(ptr2.size(), index_t);
1368     copy(index2.begin(), index2.end(), indexC);
1369     copy(ptr2.begin(), ptr2.end(), ptrC);
1370     *rowPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, N, M, ptrC, indexC);
1371     }
1372    
1373 caltinay 3702 //protected
1374 caltinay 3711 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1375     escript::Data& in, bool reduced) const
1376 caltinay 3702 {
1377     const dim_t numComp = in.getDataPointSize();
1378 caltinay 3711 if (reduced) {
1379 caltinay 3724 /*** GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1380     const double c0 = .25;
1381 caltinay 3702 #pragma omp parallel for
1382 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
1383     for (index_t k0=0; k0 < m_NE0; ++k0) {
1384     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1385     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1386     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1387     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1388     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1389     for (index_t i=0; i < numComp; ++i) {
1390 caltinay 3724 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
1391 caltinay 3711 } /* end of component loop i */
1392     } /* end of k0 loop */
1393     } /* end of k1 loop */
1394     /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1395     } else {
1396 caltinay 3724 /*** GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1397     const double c0 = .16666666666666666667;
1398     const double c1 = .044658198738520451079;
1399     const double c2 = .62200846792814621559;
1400 caltinay 3711 #pragma omp parallel for
1401     for (index_t k1=0; k1 < m_NE1; ++k1) {
1402     for (index_t k0=0; k0 < m_NE0; ++k0) {
1403     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1404     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1405     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1406     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1407     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1408     for (index_t i=0; i < numComp; ++i) {
1409 caltinay 3724 o[INDEX2(i,numComp,0)] = f_00[i]*c2 + f_11[i]*c1 + c0*(f_01[i] + f_10[i]);
1410     o[INDEX2(i,numComp,1)] = f_01[i]*c1 + f_10[i]*c2 + c0*(f_00[i] + f_11[i]);
1411     o[INDEX2(i,numComp,2)] = f_01[i]*c2 + f_10[i]*c1 + c0*(f_00[i] + f_11[i]);
1412     o[INDEX2(i,numComp,3)] = f_00[i]*c1 + f_11[i]*c2 + c0*(f_01[i] + f_10[i]);
1413 caltinay 3711 } /* end of component loop i */
1414     } /* end of k0 loop */
1415     } /* end of k1 loop */
1416     /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1417     }
1418 caltinay 3702 }
1419    
1420     //protected
1421 caltinay 3711 void Rectangle::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1422     bool reduced) const
1423 caltinay 3702 {
1424 caltinay 3704 const dim_t numComp = in.getDataPointSize();
1425 caltinay 3711 if (reduced) {
1426 caltinay 3724 const double c0 = .5;
1427     #pragma omp parallel
1428     {
1429     /*** GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1430     if (m_faceOffset[0] > -1) {
1431     #pragma omp for nowait
1432     for (index_t k1=0; k1 < m_NE1; ++k1) {
1433     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1434     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1435     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1436     for (index_t i=0; i < numComp; ++i) {
1437     o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i]);
1438     } /* end of component loop i */
1439     } /* end of k1 loop */
1440     } /* end of face 0 */
1441     if (m_faceOffset[1] > -1) {
1442     #pragma omp for nowait
1443     for (index_t k1=0; k1 < m_NE1; ++k1) {
1444     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1445     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1446     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1447     for (index_t i=0; i < numComp; ++i) {
1448     o[INDEX2(i,numComp,0)] = c0*(f_10[i] + f_11[i]);
1449     } /* end of component loop i */
1450     } /* end of k1 loop */
1451     } /* end of face 1 */
1452     if (m_faceOffset[2] > -1) {
1453     #pragma omp for nowait
1454     for (index_t k0=0; k0 < m_NE0; ++k0) {
1455     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1456     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1457     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1458     for (index_t i=0; i < numComp; ++i) {
1459     o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_10[i]);
1460     } /* end of component loop i */
1461     } /* end of k0 loop */
1462     } /* end of face 2 */
1463     if (m_faceOffset[3] > -1) {
1464     #pragma omp for nowait
1465     for (index_t k0=0; k0 < m_NE0; ++k0) {
1466     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1467     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1468     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1469     for (index_t i=0; i < numComp; ++i) {
1470     o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_11[i]);
1471     } /* end of component loop i */
1472     } /* end of k0 loop */
1473     } /* end of face 3 */
1474     /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */
1475     } // end of parallel section
1476 caltinay 3711 } else {
1477 caltinay 3724 const double c0 = 0.21132486540518711775;
1478     const double c1 = 0.78867513459481288225;
1479     #pragma omp parallel
1480     {
1481     /*** GENERATOR SNIP_INTERPOLATE_FACES TOP */
1482     if (m_faceOffset[0] > -1) {
1483     #pragma omp for nowait
1484     for (index_t k1=0; k1 < m_NE1; ++k1) {
1485     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1486     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1487     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1488     for (index_t i=0; i < numComp; ++i) {
1489     o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_01[i]*c0;
1490     o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_01[i]*c1;
1491     } /* end of component loop i */
1492     } /* end of k1 loop */
1493     } /* end of face 0 */
1494     if (m_faceOffset[1] > -1) {
1495     #pragma omp for nowait
1496     for (index_t k1=0; k1 < m_NE1; ++k1) {
1497     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1498     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1499     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1500     for (index_t i=0; i < numComp; ++i) {
1501     o[INDEX2(i,numComp,0)] = f_10[i]*c1 + f_11[i]*c0;
1502     o[INDEX2(i,numComp,1)] = f_10[i]*c0 + f_11[i]*c1;
1503     } /* end of component loop i */
1504     } /* end of k1 loop */
1505     } /* end of face 1 */
1506     if (m_faceOffset[2] > -1) {
1507     #pragma omp for nowait
1508     for (index_t k0=0; k0 < m_NE0; ++k0) {
1509     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1510     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1511     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1512     for (index_t i=0; i < numComp; ++i) {
1513     o[INDEX2(i,numComp,0)] = f_00[i]*c1 + f_10[i]*c0;
1514     o[INDEX2(i,numComp,1)] = f_00[i]*c0 + f_10[i]*c1;
1515     } /* end of component loop i */
1516     } /* end of k0 loop */
1517     } /* end of face 2 */
1518     if (m_faceOffset[3] > -1) {
1519     #pragma omp for nowait
1520     for (index_t k0=0; k0 < m_NE0; ++k0) {
1521     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1522     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1523     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1524     for (index_t i=0; i < numComp; ++i) {
1525     o[INDEX2(i,numComp,0)] = f_01[i]*c1 + f_11[i]*c0;
1526     o[INDEX2(i,numComp,1)] = f_01[i]*c0 + f_11[i]*c1;
1527     } /* end of component loop i */
1528     } /* end of k0 loop */
1529     } /* end of face 3 */
1530     /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */
1531     } // end of parallel section
1532 caltinay 3711 }
1533 caltinay 3702 }
1534    
1535 caltinay 3691 } // end of namespace ripley
1536    

  ViewVC Help
Powered by ViewVC 1.1.26