/[escript]/trunk/ripley/src/Rectangle.cpp
ViewVC logotype

Annotation of /trunk/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3722 - (hide annotations)
Wed Dec 7 05:53:22 2011 UTC (7 years, 10 months ago) by caltinay
Original Path: branches/ripleygmg_from_3668/ripley/src/Rectangle.cpp
File size: 64100 byte(s)
All "util on ripley" tests pass now :-)
-added support for node/element/face tags
-implemented setToNormal()
-added a bunch of omp nowait

1 caltinay 3691
2     /*******************************************************
3     *
4     * Copyright (c) 2003-2011 by University of Queensland
5     * Earth Systems Science Computational Center (ESSCC)
6     * http://www.uq.edu.au/esscc
7     *
8     * Primary Business: Queensland, Australia
9     * Licensed under the Open Software License version 3.0
10     * http://www.opensource.org/licenses/osl-3.0.php
11     *
12     *******************************************************/
13    
14     #include <ripley/Rectangle.h>
15     extern "C" {
16     #include "paso/SystemMatrixPattern.h"
17     }
18    
19     #if USE_SILO
20     #include <silo.h>
21     #ifdef ESYS_MPI
22     #include <pmpio.h>
23     #endif
24     #endif
25    
26     #include <iomanip>
27    
28     using namespace std;
29    
30     namespace ripley {
31    
32     Rectangle::Rectangle(int n0, int n1, double l0, double l1, int d0, int d1) :
33     RipleyDomain(2),
34     m_gNE0(n0),
35     m_gNE1(n1),
36     m_l0(l0),
37     m_l1(l1),
38     m_NX(d0),
39     m_NY(d1)
40     {
41     // ensure number of subdivisions is valid and nodes can be distributed
42     // among number of ranks
43     if (m_NX*m_NY != m_mpiInfo->size)
44     throw RipleyException("Invalid number of spatial subdivisions");
45    
46     if (n0%m_NX > 0 || n1%m_NY > 0)
47     throw RipleyException("Number of elements must be separable into number of ranks in each dimension");
48    
49     // local number of elements
50     m_NE0 = n0/m_NX;
51     m_NE1 = n1/m_NY;
52     // local number of nodes (not necessarily owned)
53     m_N0 = m_NE0+1;
54     m_N1 = m_NE1+1;
55     // bottom-left node is at (offset0,offset1) in global mesh
56     m_offset0 = m_NE0*(m_mpiInfo->rank%m_NX);
57     m_offset1 = m_NE1*(m_mpiInfo->rank/m_NX);
58     populateSampleIds();
59     }
60    
61     Rectangle::~Rectangle()
62     {
63     }
64    
65     string Rectangle::getDescription() const
66     {
67     return "ripley::Rectangle";
68     }
69    
70     bool Rectangle::operator==(const AbstractDomain& other) const
71     {
72     if (dynamic_cast<const Rectangle*>(&other))
73     return this==&other;
74    
75     return false;
76     }
77    
78     void Rectangle::dump(const string& fileName) const
79     {
80     #if USE_SILO
81     string fn(fileName);
82     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
83     fn+=".silo";
84     }
85    
86     const int NUM_SILO_FILES = 1;
87     const char* blockDirFmt = "/block%04d";
88     int driver=DB_HDF5;
89     string siloPath;
90     DBfile* dbfile = NULL;
91    
92     #ifdef ESYS_MPI
93     PMPIO_baton_t* baton = NULL;
94     #endif
95    
96     if (m_mpiInfo->size > 1) {
97     #ifdef ESYS_MPI
98     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
99     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
100     PMPIO_DefaultClose, (void*)&driver);
101     // try the fallback driver in case of error
102     if (!baton && driver != DB_PDB) {
103     driver = DB_PDB;
104     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
105     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
106     PMPIO_DefaultClose, (void*)&driver);
107     }
108     if (baton) {
109     char str[64];
110     snprintf(str, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
111     siloPath = str;
112     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath.c_str());
113     }
114     #endif
115     } else {
116     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
117     getDescription().c_str(), driver);
118     // try the fallback driver in case of error
119     if (!dbfile && driver != DB_PDB) {
120     driver = DB_PDB;
121     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
122     getDescription().c_str(), driver);
123     }
124     }
125    
126     if (!dbfile)
127     throw RipleyException("dump: Could not create Silo file");
128    
129     /*
130     if (driver==DB_HDF5) {
131     // gzip level 1 already provides good compression with minimal
132     // performance penalty. Some tests showed that gzip levels >3 performed
133     // rather badly on escript data both in terms of time and space
134     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
135     }
136     */
137    
138     boost::scoped_ptr<double> x(new double[m_N0]);
139     boost::scoped_ptr<double> y(new double[m_N1]);
140     double* coords[2] = { x.get(), y.get() };
141 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
142     pair<double,double> ydy = getFirstCoordAndSpacing(1);
143 caltinay 3691 #pragma omp parallel
144     {
145 caltinay 3722 #pragma omp for nowait
146 caltinay 3691 for (dim_t i0 = 0; i0 < m_N0; i0++) {
147 caltinay 3697 coords[0][i0]=xdx.first+i0*xdx.second;
148 caltinay 3691 }
149 caltinay 3722 #pragma omp for nowait
150 caltinay 3691 for (dim_t i1 = 0; i1 < m_N1; i1++) {
151 caltinay 3697 coords[1][i1]=ydy.first+i1*ydy.second;
152 caltinay 3691 }
153     }
154 caltinay 3697 IndexVector dims = getNumNodesPerDim();
155    
156     // write mesh
157     DBPutQuadmesh(dbfile, "mesh", NULL, coords, &dims[0], 2, DB_DOUBLE,
158 caltinay 3691 DB_COLLINEAR, NULL);
159    
160 caltinay 3697 // write node ids
161     DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], &dims[0], 2,
162     NULL, 0, DB_INT, DB_NODECENT, NULL);
163    
164     // write element ids
165     dims = getNumElementsPerDim();
166     DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
167     &dims[0], 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
168    
169     // rank 0 writes multimesh and multivar
170 caltinay 3691 if (m_mpiInfo->rank == 0) {
171     vector<string> tempstrings;
172 caltinay 3697 vector<char*> names;
173 caltinay 3691 for (dim_t i=0; i<m_mpiInfo->size; i++) {
174     stringstream path;
175     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
176     tempstrings.push_back(path.str());
177 caltinay 3697 names.push_back((char*)tempstrings.back().c_str());
178 caltinay 3691 }
179 caltinay 3697 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
180 caltinay 3691 DBSetDir(dbfile, "/");
181 caltinay 3697 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
182     &types[0], NULL);
183     tempstrings.clear();
184     names.clear();
185     for (dim_t i=0; i<m_mpiInfo->size; i++) {
186     stringstream path;
187     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
188     tempstrings.push_back(path.str());
189     names.push_back((char*)tempstrings.back().c_str());
190     }
191     types.assign(m_mpiInfo->size, DB_QUADVAR);
192     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
193     &types[0], NULL);
194     tempstrings.clear();
195     names.clear();
196     for (dim_t i=0; i<m_mpiInfo->size; i++) {
197     stringstream path;
198     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
199     tempstrings.push_back(path.str());
200     names.push_back((char*)tempstrings.back().c_str());
201     }
202     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
203     &types[0], NULL);
204 caltinay 3691 }
205    
206     if (m_mpiInfo->size > 1) {
207     #ifdef ESYS_MPI
208     PMPIO_HandOffBaton(baton, dbfile);
209     PMPIO_Finish(baton);
210     #endif
211     } else {
212     DBClose(dbfile);
213     }
214    
215     #else // USE_SILO
216     throw RipleyException("dump(): no Silo support");
217     #endif
218     }
219    
220 caltinay 3697 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
221 caltinay 3691 {
222 caltinay 3697 switch (fsType) {
223 caltinay 3691 case Nodes:
224     return &m_nodeId[0];
225     case Elements:
226     return &m_elementId[0];
227     case FaceElements:
228     return &m_faceId[0];
229     default:
230     break;
231     }
232    
233     stringstream msg;
234     msg << "borrowSampleReferenceIDs() not implemented for function space type "
235 caltinay 3701 << functionSpaceTypeAsString(fsType);
236 caltinay 3691 throw RipleyException(msg.str());
237     }
238    
239     bool Rectangle::ownSample(int fsCode, index_t id) const
240     {
241     #ifdef ESYS_MPI
242 caltinay 3702 if (fsCode != ReducedNodes) {
243 caltinay 3699 const index_t myFirst=m_nodeDistribution[m_mpiInfo->rank];
244     const index_t myLast=m_nodeDistribution[m_mpiInfo->rank+1]-1;
245 caltinay 3691 return (m_nodeId[id]>=myFirst && m_nodeId[id]<=myLast);
246 caltinay 3702 } else {
247     stringstream msg;
248     msg << "ownSample() not implemented for "
249     << functionSpaceTypeAsString(fsCode);
250     throw RipleyException(msg.str());
251     }
252 caltinay 3691 #else
253     return true;
254     #endif
255     }
256    
257 caltinay 3702 void Rectangle::setToGradient(escript::Data& out, const escript::Data& cIn) const
258 caltinay 3691 {
259 caltinay 3702 escript::Data& in = *const_cast<escript::Data*>(&cIn);
260 caltinay 3701 const dim_t numComp = in.getDataPointSize();
261 caltinay 3702 const double h0 = m_l0/m_gNE0;
262     const double h1 = m_l1/m_gNE1;
263     if (out.getFunctionSpace().getTypeCode() == Elements) {
264     /* GENERATOR SNIP_GRAD_ELEMENTS TOP */
265     const double tmp0_13 = 0.78867513459481288225/h1;
266     const double tmp0_0 = 0.78867513459481288225/h0;
267     const double tmp0_4 = 0.21132486540518711775/h0;
268     const double tmp0_10 = 0.78867513459481288225/h1;
269     const double tmp0_1 = 0.21132486540518711775/h0;
270     const double tmp0_8 = -0.21132486540518711775/h1;
271     const double tmp0_14 = 0.21132486540518711775/h1;
272     const double tmp0_5 = 0.78867513459481288225/h0;
273     const double tmp0_11 = -0.78867513459481288225/h1;
274     const double tmp0_2 = -0.21132486540518711775/h0;
275     const double tmp0_9 = 0.21132486540518711775/h1;
276     const double tmp0_15 = -0.21132486540518711775/h1;
277     const double tmp0_6 = -0.78867513459481288225/h0;
278     const double tmp0_3 = -0.78867513459481288225/h0;
279     const double tmp0_12 = -0.78867513459481288225/h1;
280     const double tmp0_7 = -0.21132486540518711775/h0;
281 caltinay 3697 #pragma omp parallel for
282 caltinay 3707 for (index_t k1=0; k1 < m_NE1; ++k1) {
283     for (index_t k0=0; k0 < m_NE0; ++k0) {
284 caltinay 3702 const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
285     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
286     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
287     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
288     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
289     for (index_t i=0; i < numComp; ++i) {
290     o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
291     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_11 + f_01[i]*tmp0_10 + f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
292     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
293     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_15 + f_01[i]*tmp0_14 + f_10[i]*tmp0_12 + f_11[i]*tmp0_13;
294     o[INDEX3(i,0,2,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
295     o[INDEX3(i,1,2,numComp,2)] = f_00[i]*tmp0_11 + f_01[i]*tmp0_10 + f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
296     o[INDEX3(i,0,3,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
297     o[INDEX3(i,1,3,numComp,2)] = f_00[i]*tmp0_15 + f_01[i]*tmp0_14 + f_10[i]*tmp0_12 + f_11[i]*tmp0_13;
298     } /* end of component loop i */
299     } /* end of k0 loop */
300     } /* end of k1 loop */
301     /* GENERATOR SNIP_GRAD_ELEMENTS BOTTOM */
302 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
303     /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS TOP */
304     const double tmp0_3 = 0.5/h1;
305     const double tmp0_2 = -0.5/h1;
306     const double tmp0_1 = -0.5/h0;
307     const double tmp0_0 = 0.5/h0;
308     #pragma omp parallel for
309     for (index_t k1=0; k1 < m_NE1; ++k1) {
310     for (index_t k0=0; k0 < m_NE0; ++k0) {
311     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
312     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
313     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
314     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
315     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
316     for (index_t i=0; i < numComp; ++i) {
317     o[INDEX3(i,0,0,numComp,2)] = tmp0_0*(f_10[i] + f_11[i]) + tmp0_1*(f_00[i] + f_01[i]);
318     o[INDEX3(i,1,0,numComp,2)] = tmp0_2*(f_00[i] + f_10[i]) + tmp0_3*(f_01[i] + f_11[i]);
319     } /* end of component loop i */
320     } /* end of k0 loop */
321     } /* end of k1 loop */
322     /* GENERATOR SNIP_GRAD_REDUCED_ELEMENTS BOTTOM */
323 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
324 caltinay 3722 #pragma omp parallel
325     {
326     /* GENERATOR SNIP_GRAD_FACES TOP */
327     if (m_faceOffset[0] > -1) {
328     const double tmp0_0 = 0.78867513459481288225/h0;
329     const double tmp0_4 = 0.21132486540518711775/h0;
330     const double tmp0_1 = 0.21132486540518711775/h0;
331     const double tmp0_8 = 1.0/h1;
332     const double tmp0_5 = 0.78867513459481288225/h0;
333     const double tmp0_2 = -0.21132486540518711775/h0;
334     const double tmp0_9 = -1/h1;
335     const double tmp0_6 = -0.78867513459481288225/h0;
336     const double tmp0_3 = -0.78867513459481288225/h0;
337     const double tmp0_7 = -0.21132486540518711775/h0;
338     #pragma omp for nowait
339     for (index_t k1=0; k1 < m_NE1; ++k1) {
340     const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
341     const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
342     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
343     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
344     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
345     for (index_t i=0; i < numComp; ++i) {
346     o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
347     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_8;
348     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
349     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_8;
350     } /* end of component loop i */
351     } /* end of k1 loop */
352     } /* end of face 0 */
353     if (m_faceOffset[1] > -1) {
354     const double tmp0_0 = 0.78867513459481288225/h0;
355     const double tmp0_4 = 0.21132486540518711775/h0;
356     const double tmp0_1 = 0.21132486540518711775/h0;
357     const double tmp0_8 = -1/h1;
358     const double tmp0_5 = 0.78867513459481288225/h0;
359     const double tmp0_2 = -0.21132486540518711775/h0;
360     const double tmp0_9 = 1.0/h1;
361     const double tmp0_6 = -0.78867513459481288225/h0;
362     const double tmp0_3 = -0.78867513459481288225/h0;
363     const double tmp0_7 = -0.21132486540518711775/h0;
364     #pragma omp for nowait
365     for (index_t k1=0; k1 < m_NE1; ++k1) {
366     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
367     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
368     const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
369     const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
370     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
371     for (index_t i=0; i < numComp; ++i) {
372     o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2 + f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
373     o[INDEX3(i,1,0,numComp,2)] = f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
374     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_7 + f_01[i]*tmp0_6 + f_10[i]*tmp0_4 + f_11[i]*tmp0_5;
375     o[INDEX3(i,1,1,numComp,2)] = f_10[i]*tmp0_8 + f_11[i]*tmp0_9;
376     } /* end of component loop i */
377     } /* end of k1 loop */
378     } /* end of face 1 */
379     if (m_faceOffset[2] > -1) {
380     const double tmp0_0 = -1/h0;
381     const double tmp0_4 = 0.21132486540518711775/h1;
382     const double tmp0_1 = 1.0/h0;
383     const double tmp0_8 = 0.78867513459481288225/h1;
384     const double tmp0_5 = 0.78867513459481288225/h1;
385     const double tmp0_2 = -0.78867513459481288225/h1;
386     const double tmp0_9 = 0.21132486540518711775/h1;
387     const double tmp0_6 = -0.21132486540518711775/h1;
388     const double tmp0_3 = -0.21132486540518711775/h1;
389     const double tmp0_7 = -0.78867513459481288225/h1;
390     #pragma omp for nowait
391     for (index_t k0=0; k0 < m_NE0; ++k0) {
392     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
393     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
394     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
395     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
396     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
397     for (index_t i=0; i < numComp; ++i) {
398     o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
399     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_2 + f_01[i]*tmp0_5 + f_10[i]*tmp0_3 + f_11[i]*tmp0_4;
400     o[INDEX3(i,0,1,numComp,2)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
401     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_6 + f_01[i]*tmp0_9 + f_10[i]*tmp0_7 + f_11[i]*tmp0_8;
402     } /* end of component loop i */
403     } /* end of k0 loop */
404     } /* end of face 2 */
405     if (m_faceOffset[3] > -1) {
406     const double tmp0_0 = 1.0/h0;
407     const double tmp0_4 = -0.21132486540518711775/h1;
408     const double tmp0_1 = -1/h0;
409     const double tmp0_8 = -0.78867513459481288225/h1;
410     const double tmp0_5 = -0.78867513459481288225/h1;
411     const double tmp0_2 = 0.21132486540518711775/h1;
412     const double tmp0_9 = -0.21132486540518711775/h1;
413     const double tmp0_6 = 0.78867513459481288225/h1;
414     const double tmp0_3 = 0.78867513459481288225/h1;
415     const double tmp0_7 = 0.21132486540518711775/h1;
416     #pragma omp for nowait
417     for (index_t k0=0; k0 < m_NE0; ++k0) {
418     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
419     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
420     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
421     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
422     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
423     for (index_t i=0; i < numComp; ++i) {
424     o[INDEX3(i,0,0,numComp,2)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
425     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_5 + f_01[i]*tmp0_3 + f_10[i]*tmp0_4 + f_11[i]*tmp0_2;
426     o[INDEX3(i,0,1,numComp,2)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
427     o[INDEX3(i,1,1,numComp,2)] = f_00[i]*tmp0_9 + f_01[i]*tmp0_7 + f_10[i]*tmp0_8 + f_11[i]*tmp0_6;
428     } /* end of component loop i */
429     } /* end of k0 loop */
430     } /* end of face 3 */
431     /* GENERATOR SNIP_GRAD_FACES BOTTOM */
432     } // end of parallel section
433 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
434 caltinay 3722 #pragma omp parallel
435     {
436     /* GENERATOR SNIP_GRAD_REDUCED_FACES TOP */
437     if (m_faceOffset[0] > -1) {
438     const double tmp0_3 = -1/h1;
439     const double tmp0_2 = 1.0/h1;
440     const double tmp0_1 = -0.5/h0;
441     const double tmp0_0 = 0.5/h0;
442     #pragma omp for nowait
443     for (index_t k1=0; k1 < m_NE1; ++k1) {
444     const register double* f_10 = in.getSampleDataRO(INDEX2(1,k1, m_N0));
445     const register double* f_11 = in.getSampleDataRO(INDEX2(1,k1+1, m_N0));
446     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
447     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
448     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
449     for (index_t i=0; i < numComp; ++i) {
450     o[INDEX3(i,0,0,numComp,2)] = tmp0_0*(f_10[i] + f_11[i]) + tmp0_1*(f_00[i] + f_01[i]);
451     o[INDEX3(i,1,0,numComp,2)] = f_00[i]*tmp0_3 + f_01[i]*tmp0_2;
452     } /* end of component loop i */
453     } /* end of k1 loop */
454     } /* end of face 0 */
455     if (m_faceOffset[1] > -1) {
456     const double tmp0_3 = 1.0/h1;
457     const double tmp0_2 = -1/h1;
458     const double tmp0_1 = -0.5/h0;
459     const double tmp0_0 = 0.5/h0;
460     #pragma omp for nowait
461     for (index_t k1=0; k1 < m_NE1; ++k1) {
462     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
463     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
464     const register double* f_01 = in.getSampleDataRO(INDEX2(m_N0-2,k1+1, m_N0));
465     const register double* f_00 = in.getSampleDataRO(INDEX2(m_N0-2,k1, m_N0));
466     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
467     for (index_t i=0; i < numComp; ++i) {
468     o[INDEX3(i,0,0,numComp,2)] = tmp0_0*(f_10[i] + f_11[i]) + tmp0_1*(f_00[i] + f_01[i]);
469     o[INDEX3(i,1,0,numComp,2)] = f_10[i]*tmp0_2 + f_11[i]*tmp0_3;
470     } /* end of component loop i */
471     } /* end of k1 loop */
472     } /* end of face 1 */
473     if (m_faceOffset[2] > -1) {
474     const double tmp0_3 = 0.5/h1;
475     const double tmp0_2 = -0.5/h1;
476     const double tmp0_1 = 1.0/h0;
477     const double tmp0_0 = -1/h0;
478     #pragma omp for nowait
479     for (index_t k0=0; k0 < m_NE0; ++k0) {
480     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
481     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
482     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,1, m_N0));
483     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,1, m_N0));
484     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
485     for (index_t i=0; i < numComp; ++i) {
486     o[INDEX3(i,0,0,numComp,2)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
487     o[INDEX3(i,1,0,numComp,2)] = tmp0_2*(f_00[i] + f_10[i]) + tmp0_3*(f_01[i] + f_11[i]);
488     } /* end of component loop i */
489     } /* end of k0 loop */
490     } /* end of face 2 */
491     if (m_faceOffset[3] > -1) {
492     const double tmp0_3 = -0.5/h1;
493     const double tmp0_2 = 0.5/h1;
494     const double tmp0_1 = -1/h0;
495     const double tmp0_0 = 1.0/h0;
496     #pragma omp for nowait
497     for (index_t k0=0; k0 < m_NE0; ++k0) {
498     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
499     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
500     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,m_N1-2, m_N0));
501     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,m_N1-2, m_N0));
502     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
503     for (index_t i=0; i < numComp; ++i) {
504     o[INDEX3(i,0,0,numComp,2)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
505     o[INDEX3(i,1,0,numComp,2)] = tmp0_2*(f_01[i] + f_11[i]) + tmp0_3*(f_00[i] + f_10[i]);
506     } /* end of component loop i */
507     } /* end of k0 loop */
508     } /* end of face 3 */
509     /* GENERATOR SNIP_GRAD_REDUCED_FACES BOTTOM */
510     } // end of parallel section
511 caltinay 3702 } else {
512 caltinay 3707 stringstream msg;
513     msg << "setToGradient() not implemented for "
514     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
515     throw RipleyException(msg.str());
516 caltinay 3702 }
517 caltinay 3701 }
518 caltinay 3697
519 caltinay 3713 void Rectangle::setToIntegrals(vector<double>& integrals, const escript::Data& arg) const
520     {
521     escript::Data& in = *const_cast<escript::Data*>(&arg);
522     const dim_t numComp = in.getDataPointSize();
523     const double h0 = m_l0/m_gNE0;
524     const double h1 = m_l1/m_gNE1;
525     if (arg.getFunctionSpace().getTypeCode() == Elements) {
526     const double w_0 = h0*h1/4.;
527     #pragma omp parallel
528     {
529     vector<double> int_local(numComp, 0);
530 caltinay 3722 #pragma omp for nowait
531 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
532     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
533     const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
534     for (index_t i=0; i < numComp; ++i) {
535     const register double f_0 = f[INDEX2(i,0,numComp)];
536     const register double f_1 = f[INDEX2(i,1,numComp)];
537     const register double f_2 = f[INDEX2(i,2,numComp)];
538     const register double f_3 = f[INDEX2(i,3,numComp)];
539     int_local[i]+=(f_0+f_1+f_2+f_3)*w_0;
540     } /* end of component loop i */
541     } /* end of k0 loop */
542     } /* end of k1 loop */
543    
544     #pragma omp critical
545     for (index_t i=0; i<numComp; i++)
546     integrals[i]+=int_local[i];
547 caltinay 3722 } // end of parallel section
548 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == ReducedElements) {
549     const double w_0 = h0*h1;
550     #pragma omp parallel
551     {
552     vector<double> int_local(numComp, 0);
553 caltinay 3722 #pragma omp for nowait
554 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
555     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
556     const double* f = in.getSampleDataRO(INDEX2(k0, k1, m_NE0));
557     for (index_t i=0; i < numComp; ++i) {
558     int_local[i]+=f[i]*w_0;
559     } /* end of component loop i */
560     } /* end of k0 loop */
561     } /* end of k1 loop */
562    
563     #pragma omp critical
564     for (index_t i=0; i<numComp; i++)
565     integrals[i]+=int_local[i];
566 caltinay 3722 } // end of parallel section
567 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == FaceElements) {
568     const double w_0 = h0/2.;
569     const double w_1 = h1/2.;
570     #pragma omp parallel
571     {
572     vector<double> int_local(numComp, 0);
573     if (m_faceOffset[0] > -1) {
574 caltinay 3722 #pragma omp for nowait
575 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
576     const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
577     for (index_t i=0; i < numComp; ++i) {
578     const register double f_0 = f[INDEX2(i,0,numComp)];
579     const register double f_1 = f[INDEX2(i,1,numComp)];
580     int_local[i]+=(f_0+f_1)*w_1;
581     } /* end of component loop i */
582     } /* end of k1 loop */
583     }
584    
585     if (m_faceOffset[1] > -1) {
586 caltinay 3722 #pragma omp for nowait
587 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
588     const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
589     for (index_t i=0; i < numComp; ++i) {
590     const register double f_0 = f[INDEX2(i,0,numComp)];
591     const register double f_1 = f[INDEX2(i,1,numComp)];
592     int_local[i]+=(f_0+f_1)*w_1;
593     } /* end of component loop i */
594     } /* end of k1 loop */
595     }
596    
597     if (m_faceOffset[2] > -1) {
598 caltinay 3722 #pragma omp for nowait
599 caltinay 3713 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
600     const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
601     for (index_t i=0; i < numComp; ++i) {
602     const register double f_0 = f[INDEX2(i,0,numComp)];
603     const register double f_1 = f[INDEX2(i,1,numComp)];
604     int_local[i]+=(f_0+f_1)*w_0;
605     } /* end of component loop i */
606     } /* end of k0 loop */
607     }
608    
609     if (m_faceOffset[3] > -1) {
610 caltinay 3722 #pragma omp for nowait
611 caltinay 3713 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
612     const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
613     for (index_t i=0; i < numComp; ++i) {
614     const register double f_0 = f[INDEX2(i,0,numComp)];
615     const register double f_1 = f[INDEX2(i,1,numComp)];
616     int_local[i]+=(f_0+f_1)*w_0;
617     } /* end of component loop i */
618     } /* end of k0 loop */
619     }
620    
621     #pragma omp critical
622     for (index_t i=0; i<numComp; i++)
623     integrals[i]+=int_local[i];
624 caltinay 3722 } // end of parallel section
625 caltinay 3713 } else if (arg.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
626     #pragma omp parallel
627     {
628     vector<double> int_local(numComp, 0);
629     if (m_faceOffset[0] > -1) {
630 caltinay 3722 #pragma omp for nowait
631 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
632     const double* f = in.getSampleDataRO(m_faceOffset[0]+k1);
633     for (index_t i=0; i < numComp; ++i) {
634     int_local[i]+=f[i]*h1;
635     } /* end of component loop i */
636     } /* end of k1 loop */
637     }
638    
639     if (m_faceOffset[1] > -1) {
640 caltinay 3722 #pragma omp for nowait
641 caltinay 3713 for (index_t k1 = 0; k1 < m_NE1; ++k1) {
642     const double* f = in.getSampleDataRO(m_faceOffset[1]+k1);
643     for (index_t i=0; i < numComp; ++i) {
644     int_local[i]+=f[i]*h1;
645     } /* end of component loop i */
646     } /* end of k1 loop */
647     }
648    
649     if (m_faceOffset[2] > -1) {
650 caltinay 3722 #pragma omp for nowait
651 caltinay 3713 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
652     const double* f = in.getSampleDataRO(m_faceOffset[2]+k0);
653     for (index_t i=0; i < numComp; ++i) {
654     int_local[i]+=f[i]*h0;
655     } /* end of component loop i */
656     } /* end of k0 loop */
657     }
658    
659     if (m_faceOffset[3] > -1) {
660 caltinay 3722 #pragma omp for nowait
661 caltinay 3713 for (index_t k0 = 0; k0 < m_NE0; ++k0) {
662     const double* f = in.getSampleDataRO(m_faceOffset[3]+k0);
663     for (index_t i=0; i < numComp; ++i) {
664     int_local[i]+=f[i]*h0;
665     } /* end of component loop i */
666     } /* end of k0 loop */
667     }
668    
669     #pragma omp critical
670     for (index_t i=0; i<numComp; i++)
671     integrals[i]+=int_local[i];
672 caltinay 3722 } // end of parallel section
673 caltinay 3713 } else {
674     stringstream msg;
675     msg << "setToIntegrals() not implemented for "
676     << functionSpaceTypeAsString(arg.getFunctionSpace().getTypeCode());
677     throw RipleyException(msg.str());
678     }
679     }
680    
681 caltinay 3722 void Rectangle::setToNormal(escript::Data& out) const
682     {
683     if (out.getFunctionSpace().getTypeCode() == FaceElements) {
684     #pragma omp parallel
685     {
686     if (m_faceOffset[0] > -1) {
687     #pragma omp for nowait
688     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
689     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
690     // set vector at two quadrature points
691     *o++ = -1.;
692     *o++ = 0.;
693     *o++ = -1.;
694     *o = 0.;
695     }
696     }
697    
698     if (m_faceOffset[1] > -1) {
699     #pragma omp for nowait
700     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
701     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
702     // set vector at two quadrature points
703     *o++ = 1.;
704     *o++ = 0.;
705     *o++ = 1.;
706     *o = 0.;
707     }
708     }
709    
710     if (m_faceOffset[2] > -1) {
711     #pragma omp for nowait
712     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
713     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
714     // set vector at two quadrature points
715     *o++ = 0.;
716     *o++ = -1.;
717     *o++ = 0.;
718     *o = -1.;
719     }
720     }
721    
722     if (m_faceOffset[3] > -1) {
723     #pragma omp for nowait
724     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
725     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
726     // set vector at two quadrature points
727     *o++ = 0.;
728     *o++ = 1.;
729     *o++ = 0.;
730     *o = 1.;
731     }
732     }
733     } // end of parallel section
734     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
735     #pragma omp parallel
736     {
737     if (m_faceOffset[0] > -1) {
738     #pragma omp for nowait
739     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
740     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
741     *o++ = -1.;
742     *o = 0.;
743     }
744     }
745    
746     if (m_faceOffset[1] > -1) {
747     #pragma omp for nowait
748     for (index_t k1 = 0; k1 < m_NE1; ++k1) {
749     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
750     *o++ = 1.;
751     *o = 0.;
752     }
753     }
754    
755     if (m_faceOffset[2] > -1) {
756     #pragma omp for nowait
757     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
758     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
759     *o++ = 0.;
760     *o = -1.;
761     }
762     }
763    
764     if (m_faceOffset[3] > -1) {
765     #pragma omp for nowait
766     for (index_t k0 = 0; k0 < m_NE0; ++k0) {
767     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
768     *o++ = 0.;
769     *o = 1.;
770     }
771     }
772     } // end of parallel section
773    
774     } else {
775     stringstream msg;
776     msg << "setToNormal() not implemented for "
777     << functionSpaceTypeAsString(out.getFunctionSpace().getTypeCode());
778     throw RipleyException(msg.str());
779     }
780     }
781    
782 caltinay 3701 void Rectangle::addPDEToSystem(escript::AbstractSystemMatrix& mat,
783     escript::Data& rhs, const escript::Data& A, const escript::Data& B,
784     const escript::Data& C, const escript::Data& D,
785     const escript::Data& X, const escript::Data& Y,
786     const escript::Data& d, const escript::Data& y,
787     const escript::Data& d_contact, const escript::Data& y_contact,
788     const escript::Data& d_dirac, const escript::Data& y_dirac) const
789     {
790     throw RipleyException("addPDEToSystem() not implemented");
791     }
792    
793 caltinay 3691 Paso_SystemMatrixPattern* Rectangle::getPattern(bool reducedRowOrder,
794     bool reducedColOrder) const
795     {
796     if (reducedRowOrder || reducedColOrder)
797     throw RipleyException("getPattern() not implemented for reduced order");
798    
799 caltinay 3699 // connector
800     RankVector neighbour;
801     IndexVector offsetInShared(1,0);
802     IndexVector sendShared, recvShared;
803     const IndexVector faces=getNumFacesPerBoundary();
804     const index_t left = (m_offset0==0 ? 0 : 1);
805     const index_t bottom = (m_offset1==0 ? 0 : 1);
806     // corner node from bottom-left
807     if (faces[0] == 0 && faces[2] == 0) {
808     neighbour.push_back(m_mpiInfo->rank-m_NX-1);
809     offsetInShared.push_back(offsetInShared.back()+1);
810     sendShared.push_back(m_nodeId[m_N0+left]);
811     recvShared.push_back(m_nodeId[0]);
812     }
813     // bottom edge
814     if (faces[2] == 0) {
815     neighbour.push_back(m_mpiInfo->rank-m_NX);
816     offsetInShared.push_back(offsetInShared.back()+m_N0-left);
817     for (dim_t i=left; i<m_N0; i++) {
818     // easy case, we know the neighbour id's
819     sendShared.push_back(m_nodeId[m_N0+i]);
820     recvShared.push_back(m_nodeId[i]);
821     }
822     }
823     // corner node from bottom-right
824     if (faces[1] == 0 && faces[2] == 0) {
825     neighbour.push_back(m_mpiInfo->rank-m_NX+1);
826     const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
827     const index_t N1=(neighbour.back()/m_NX == 0 ? m_N1 : m_N1-1);
828     const index_t first=m_nodeDistribution[neighbour.back()];
829     offsetInShared.push_back(offsetInShared.back()+1);
830     sendShared.push_back(m_nodeId[(bottom+1)*m_N0-1]);
831     recvShared.push_back(first+N0*(N1-1));
832     }
833     // left edge
834     if (faces[0] == 0) {
835     neighbour.push_back(m_mpiInfo->rank-1);
836     offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
837     for (dim_t i=bottom; i<m_N1; i++) {
838     // easy case, we know the neighbour id's
839     sendShared.push_back(m_nodeId[i*m_N0+1]);
840     recvShared.push_back(m_nodeId[i*m_N0]);
841     }
842     }
843     // right edge
844     if (faces[1] == 0) {
845     neighbour.push_back(m_mpiInfo->rank+1);
846     const index_t rightN0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
847     const index_t first=m_nodeDistribution[neighbour.back()];
848     offsetInShared.push_back(offsetInShared.back()+m_N1-bottom);
849     for (dim_t i=bottom; i<m_N1; i++) {
850     sendShared.push_back(m_nodeId[(i+1)*m_N0-1]);
851     recvShared.push_back(first+rightN0*(i-bottom));
852     }
853     }
854     // corner node from top-left
855     if (faces[0] == 0 && faces[3] == 0) {
856     neighbour.push_back(m_mpiInfo->rank+m_NX-1);
857     const index_t N0=(neighbour.back()%m_NX == 0 ? m_N0 : m_N0-1);
858     const index_t first=m_nodeDistribution[neighbour.back()];
859     offsetInShared.push_back(offsetInShared.back()+1);
860     sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+left]);
861     recvShared.push_back(first+N0-1);
862     }
863     // top edge
864     if (faces[3] == 0) {
865     neighbour.push_back(m_mpiInfo->rank+m_NX);
866     const index_t first=m_nodeDistribution[neighbour.back()];
867     offsetInShared.push_back(offsetInShared.back()+m_N0-left);
868     for (dim_t i=left; i<m_N0; i++) {
869     sendShared.push_back(m_nodeId[m_N0*(m_N1-1)+i]);
870     recvShared.push_back(first+i-left);
871     }
872     }
873     // corner node from top-right
874     if (faces[1] == 0 && faces[3] == 0) {
875     neighbour.push_back(m_mpiInfo->rank+m_NX+1);
876     const index_t first=m_nodeDistribution[neighbour.back()];
877     offsetInShared.push_back(offsetInShared.back()+1);
878     sendShared.push_back(m_nodeId[m_N0*m_N1-1]);
879     recvShared.push_back(first);
880     }
881     const int numDOF=m_nodeDistribution[m_mpiInfo->rank+1]-m_nodeDistribution[m_mpiInfo->rank];
882 caltinay 3702 /*
883 caltinay 3699 cout << "--- rcv_shcomp ---" << endl;
884     cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
885     for (size_t i=0; i<neighbour.size(); i++) {
886     cout << "neighbor[" << i << "]=" << neighbour[i]
887     << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
888     }
889     for (size_t i=0; i<recvShared.size(); i++) {
890     cout << "shared[" << i << "]=" << recvShared[i] << endl;
891     }
892     cout << "--- snd_shcomp ---" << endl;
893     for (size_t i=0; i<sendShared.size(); i++) {
894     cout << "shared[" << i << "]=" << sendShared[i] << endl;
895     }
896 caltinay 3702 */
897 caltinay 3691
898     Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
899 caltinay 3699 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
900 caltinay 3697 &offsetInShared[0], 1, 0, m_mpiInfo);
901 caltinay 3691 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
902 caltinay 3699 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
903 caltinay 3697 &offsetInShared[0], 1, 0, m_mpiInfo);
904 caltinay 3691 Paso_Connector* connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
905 caltinay 3699 Paso_SharedComponents_free(snd_shcomp);
906     Paso_SharedComponents_free(rcv_shcomp);
907 caltinay 3691
908     // create patterns
909 caltinay 3699 dim_t M, N;
910     IndexVector ptr(1,0);
911     IndexVector index;
912    
913     // main pattern
914     for (index_t i=0; i<numDOF; i++) {
915     // always add the node itself
916     index.push_back(i);
917     int num=insertNeighbours(index, i);
918     ptr.push_back(ptr.back()+num+1);
919     }
920     M=N=ptr.size()-1;
921     // paso will manage the memory
922     index_t* indexC = MEMALLOC(index.size(),index_t);
923     index_t* ptrC = MEMALLOC(ptr.size(), index_t);
924     copy(index.begin(), index.end(), indexC);
925     copy(ptr.begin(), ptr.end(), ptrC);
926 caltinay 3691 Paso_Pattern *mainPattern = Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT,
927 caltinay 3699 M, N, ptrC, indexC);
928 caltinay 3691
929 caltinay 3702 /*
930 caltinay 3699 cout << "--- main_pattern ---" << endl;
931     cout << "M=" << M << ", N=" << N << endl;
932     for (size_t i=0; i<ptr.size(); i++) {
933     cout << "ptr[" << i << "]=" << ptr[i] << endl;
934     }
935     for (size_t i=0; i<index.size(); i++) {
936     cout << "index[" << i << "]=" << index[i] << endl;
937     }
938 caltinay 3702 */
939 caltinay 3699
940     ptr.clear();
941     index.clear();
942    
943     // column & row couple patterns
944     Paso_Pattern *colCouplePattern, *rowCouplePattern;
945     generateCouplePatterns(&colCouplePattern, &rowCouplePattern);
946    
947     // allocate paso distribution
948     Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
949     const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
950    
951 caltinay 3691 Paso_SystemMatrixPattern* pattern = Paso_SystemMatrixPattern_alloc(
952     MATRIX_FORMAT_DEFAULT, distribution, distribution,
953     mainPattern, colCouplePattern, rowCouplePattern,
954     connector, connector);
955     Paso_Pattern_free(mainPattern);
956     Paso_Pattern_free(colCouplePattern);
957     Paso_Pattern_free(rowCouplePattern);
958     Paso_Distribution_free(distribution);
959 caltinay 3697 return pattern;
960 caltinay 3691 }
961    
962     void Rectangle::Print_Mesh_Info(const bool full) const
963     {
964     RipleyDomain::Print_Mesh_Info(full);
965     if (full) {
966     cout << " Id Coordinates" << endl;
967     cout.precision(15);
968     cout.setf(ios::scientific, ios::floatfield);
969 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
970     pair<double,double> ydy = getFirstCoordAndSpacing(1);
971 caltinay 3691 for (index_t i=0; i < getNumNodes(); i++) {
972     cout << " " << setw(5) << m_nodeId[i]
973 caltinay 3697 << " " << xdx.first+(i%m_N0)*xdx.second
974     << " " << ydy.first+(i/m_N0)*ydy.second << endl;
975 caltinay 3691 }
976     }
977     }
978    
979 caltinay 3697 IndexVector Rectangle::getNumNodesPerDim() const
980     {
981     IndexVector ret;
982     ret.push_back(m_N0);
983     ret.push_back(m_N1);
984     return ret;
985     }
986    
987     IndexVector Rectangle::getNumElementsPerDim() const
988     {
989     IndexVector ret;
990     ret.push_back(m_NE0);
991     ret.push_back(m_NE1);
992     return ret;
993     }
994    
995     IndexVector Rectangle::getNumFacesPerBoundary() const
996     {
997     IndexVector ret(4, 0);
998     //left
999     if (m_offset0==0)
1000     ret[0]=m_NE1;
1001     //right
1002     if (m_mpiInfo->rank%m_NX==m_NX-1)
1003     ret[1]=m_NE1;
1004     //bottom
1005     if (m_offset1==0)
1006     ret[2]=m_NE0;
1007     //top
1008     if (m_mpiInfo->rank/m_NX==m_NY-1)
1009     ret[3]=m_NE0;
1010     return ret;
1011     }
1012    
1013     pair<double,double> Rectangle::getFirstCoordAndSpacing(dim_t dim) const
1014     {
1015     if (dim==0) {
1016     return pair<double,double>((m_l0*m_offset0)/m_gNE0, m_l0/m_gNE0);
1017     } else if (dim==1) {
1018     return pair<double,double>((m_l1*m_offset1)/m_gNE1, m_l1/m_gNE1);
1019     }
1020     throw RipleyException("getFirstCoordAndSpacing(): invalid argument");
1021     }
1022    
1023 caltinay 3691 //protected
1024     dim_t Rectangle::getNumFaceElements() const
1025     {
1026 caltinay 3699 const IndexVector faces = getNumFacesPerBoundary();
1027 caltinay 3691 dim_t n=0;
1028 caltinay 3699 for (size_t i=0; i<faces.size(); i++)
1029     n+=faces[i];
1030 caltinay 3691 return n;
1031     }
1032    
1033     //protected
1034     void Rectangle::assembleCoordinates(escript::Data& arg) const
1035     {
1036     escriptDataC x = arg.getDataC();
1037     int numDim = m_numDim;
1038     if (!isDataPointShapeEqual(&x, 1, &numDim))
1039     throw RipleyException("setToX: Invalid Data object shape");
1040     if (!numSamplesEqual(&x, 1, getNumNodes()))
1041     throw RipleyException("setToX: Illegal number of samples in Data object");
1042    
1043 caltinay 3697 pair<double,double> xdx = getFirstCoordAndSpacing(0);
1044     pair<double,double> ydy = getFirstCoordAndSpacing(1);
1045 caltinay 3691 arg.requireWrite();
1046     #pragma omp parallel for
1047     for (dim_t i1 = 0; i1 < m_N1; i1++) {
1048     for (dim_t i0 = 0; i0 < m_N0; i0++) {
1049     double* point = arg.getSampleDataRW(i0+m_N0*i1);
1050 caltinay 3697 point[0] = xdx.first+i0*xdx.second;
1051     point[1] = ydy.first+i1*ydy.second;
1052 caltinay 3691 }
1053     }
1054     }
1055    
1056     //private
1057     void Rectangle::populateSampleIds()
1058     {
1059 caltinay 3697 // identifiers are ordered from left to right, bottom to top on each rank,
1060     // except for the shared nodes which are owned by the rank below / to the
1061     // left of the current rank
1062    
1063     // build node distribution vector first.
1064     // m_nodeDistribution[i] is the first node id on rank i, that is
1065     // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes
1066     m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1067     m_nodeDistribution[1]=getNumNodes();
1068     for (dim_t k=1; k<m_mpiInfo->size-1; k++) {
1069     const index_t x=k%m_NX;
1070     const index_t y=k/m_NX;
1071     index_t numNodes=getNumNodes();
1072     if (x>0)
1073     numNodes-=m_N1;
1074     if (y>0)
1075     numNodes-=m_N0;
1076     if (x>0 && y>0)
1077     numNodes++; // subtracted corner twice -> fix that
1078     m_nodeDistribution[k+1]=m_nodeDistribution[k]+numNodes;
1079     }
1080     m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1081    
1082 caltinay 3691 m_nodeId.resize(getNumNodes());
1083 caltinay 3697
1084     // the bottom row and left column are not owned by this rank so the
1085     // identifiers need to be computed accordingly
1086     const index_t left = (m_offset0==0 ? 0 : 1);
1087     const index_t bottom = (m_offset1==0 ? 0 : 1);
1088     if (left>0) {
1089     const int neighbour=m_mpiInfo->rank-1;
1090     const index_t leftN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1091 caltinay 3691 #pragma omp parallel for
1092 caltinay 3697 for (dim_t i1=bottom; i1<m_N1; i1++) {
1093     m_nodeId[i1*m_N0]=m_nodeDistribution[neighbour]
1094     + (i1-bottom+1)*leftN0-1;
1095     }
1096     }
1097     if (bottom>0) {
1098     const int neighbour=m_mpiInfo->rank-m_NX;
1099     const index_t bottomN0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1100     const index_t bottomN1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1101     #pragma omp parallel for
1102     for (dim_t i0=left; i0<m_N0; i0++) {
1103     m_nodeId[i0]=m_nodeDistribution[neighbour]
1104     + (bottomN1-1)*bottomN0 + i0 - left;
1105     }
1106     }
1107     if (left>0 && bottom>0) {
1108     const int neighbour=m_mpiInfo->rank-m_NX-1;
1109 caltinay 3699 const index_t N0=(neighbour%m_NX == 0 ? m_N0 : m_N0-1);
1110     const index_t N1=(neighbour/m_NX == 0 ? m_N1 : m_N1-1);
1111     m_nodeId[0]=m_nodeDistribution[neighbour]+N0*N1-1;
1112 caltinay 3697 }
1113    
1114     // the rest of the id's are contiguous
1115     const index_t firstId=m_nodeDistribution[m_mpiInfo->rank];
1116     #pragma omp parallel for
1117     for (dim_t i1=bottom; i1<m_N1; i1++) {
1118     for (dim_t i0=left; i0<m_N0; i0++) {
1119     m_nodeId[i0+i1*m_N0] = firstId+i0-left+(i1-bottom)*(m_N0-left);
1120     }
1121     }
1122    
1123 caltinay 3704 // element id's
1124 caltinay 3697 m_elementId.resize(getNumElements());
1125     #pragma omp parallel for
1126     for (dim_t k=0; k<getNumElements(); k++) {
1127     m_elementId[k]=k;
1128     }
1129    
1130 caltinay 3704 // face element id's
1131 caltinay 3697 m_faceId.resize(getNumFaceElements());
1132     #pragma omp parallel for
1133     for (dim_t k=0; k<getNumFaceElements(); k++) {
1134     m_faceId[k]=k;
1135     }
1136 caltinay 3704
1137 caltinay 3722 // generate face offset vector and set face tags
1138 caltinay 3704 const IndexVector facesPerEdge = getNumFacesPerBoundary();
1139 caltinay 3722 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
1140     const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
1141 caltinay 3704 m_faceOffset.assign(facesPerEdge.size(), -1);
1142 caltinay 3722 m_faceTags.clear();
1143 caltinay 3704 index_t offset=0;
1144     for (size_t i=0; i<facesPerEdge.size(); i++) {
1145     if (facesPerEdge[i]>0) {
1146     m_faceOffset[i]=offset;
1147     offset+=facesPerEdge[i];
1148 caltinay 3722 m_faceTags.insert(m_faceTags.end(), facesPerEdge[i], faceTag[i]);
1149 caltinay 3704 }
1150     }
1151 caltinay 3722 setTagMap("left", LEFT);
1152     setTagMap("right", RIGHT);
1153     setTagMap("bottom", BOTTOM);
1154     setTagMap("top", TOP);
1155     updateTagsInUse(FaceElements);
1156 caltinay 3691 }
1157    
1158 caltinay 3699 //private
1159     int Rectangle::insertNeighbours(IndexVector& index, index_t node) const
1160     {
1161     const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1162     const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1163     const int x=node%myN0;
1164     const int y=node/myN0;
1165     int num=0;
1166     if (y>0) {
1167     if (x>0) {
1168     // bottom-left
1169     index.push_back(node-myN0-1);
1170     num++;
1171     }
1172     // bottom
1173     index.push_back(node-myN0);
1174     num++;
1175     if (x<myN0-1) {
1176     // bottom-right
1177     index.push_back(node-myN0+1);
1178     num++;
1179     }
1180     }
1181     if (x<myN0-1) {
1182     // right
1183     index.push_back(node+1);
1184     num++;
1185     if (y<myN1-1) {
1186     // top-right
1187     index.push_back(node+myN0+1);
1188     num++;
1189     }
1190     }
1191     if (y<myN1-1) {
1192     // top
1193     index.push_back(node+myN0);
1194     num++;
1195     if (x>0) {
1196     // top-left
1197     index.push_back(node+myN0-1);
1198     num++;
1199     }
1200     }
1201     if (x>0) {
1202     // left
1203     index.push_back(node-1);
1204     num++;
1205     }
1206    
1207     return num;
1208     }
1209    
1210     //private
1211     void Rectangle::generateCouplePatterns(Paso_Pattern** colPattern, Paso_Pattern** rowPattern) const
1212     {
1213     IndexVector ptr(1,0);
1214     IndexVector index;
1215     const dim_t myN0 = (m_offset0==0 ? m_N0 : m_N0-1);
1216     const dim_t myN1 = (m_offset1==0 ? m_N1 : m_N1-1);
1217     const IndexVector faces=getNumFacesPerBoundary();
1218    
1219     // bottom edge
1220     dim_t n=0;
1221     if (faces[0] == 0) {
1222     index.push_back(2*(myN0+myN1+1));
1223     index.push_back(2*(myN0+myN1+1)+1);
1224     n+=2;
1225     if (faces[2] == 0) {
1226     index.push_back(0);
1227     index.push_back(1);
1228     index.push_back(2);
1229     n+=3;
1230     }
1231     } else if (faces[2] == 0) {
1232     index.push_back(1);
1233     index.push_back(2);
1234     n+=2;
1235     }
1236     // n=neighbours of bottom-left corner node
1237     ptr.push_back(ptr.back()+n);
1238     n=0;
1239     if (faces[2] == 0) {
1240     for (dim_t i=1; i<myN0-1; i++) {
1241     index.push_back(i);
1242     index.push_back(i+1);
1243     index.push_back(i+2);
1244     ptr.push_back(ptr.back()+3);
1245     }
1246     index.push_back(myN0-1);
1247     index.push_back(myN0);
1248     n+=2;
1249     if (faces[1] == 0) {
1250     index.push_back(myN0+1);
1251     index.push_back(myN0+2);
1252     index.push_back(myN0+3);
1253     n+=3;
1254     }
1255     } else {
1256     for (dim_t i=1; i<myN0-1; i++) {
1257     ptr.push_back(ptr.back());
1258     }
1259     if (faces[1] == 0) {
1260     index.push_back(myN0+2);
1261     index.push_back(myN0+3);
1262     n+=2;
1263     }
1264     }
1265     // n=neighbours of bottom-right corner node
1266     ptr.push_back(ptr.back()+n);
1267    
1268     // 2nd row to 2nd last row
1269     for (dim_t i=1; i<myN1-1; i++) {
1270     // left edge
1271     if (faces[0] == 0) {
1272     index.push_back(2*(myN0+myN1+2)-i);
1273     index.push_back(2*(myN0+myN1+2)-i-1);
1274     index.push_back(2*(myN0+myN1+2)-i-2);
1275     ptr.push_back(ptr.back()+3);
1276     } else {
1277     ptr.push_back(ptr.back());
1278     }
1279     for (dim_t j=1; j<myN0-1; j++) {
1280     ptr.push_back(ptr.back());
1281     }
1282     // right edge
1283     if (faces[1] == 0) {
1284     index.push_back(myN0+i+1);
1285     index.push_back(myN0+i+2);
1286     index.push_back(myN0+i+3);
1287     ptr.push_back(ptr.back()+3);
1288     } else {
1289     ptr.push_back(ptr.back());
1290     }
1291     }
1292    
1293     // top edge
1294     n=0;
1295     if (faces[0] == 0) {
1296     index.push_back(2*myN0+myN1+5);
1297     index.push_back(2*myN0+myN1+4);
1298     n+=2;
1299     if (faces[3] == 0) {
1300     index.push_back(2*myN0+myN1+3);
1301     index.push_back(2*myN0+myN1+2);
1302     index.push_back(2*myN0+myN1+1);
1303     n+=3;
1304     }
1305     } else if (faces[3] == 0) {
1306     index.push_back(2*myN0+myN1+2);
1307     index.push_back(2*myN0+myN1+1);
1308     n+=2;
1309     }
1310     // n=neighbours of top-left corner node
1311     ptr.push_back(ptr.back()+n);
1312     n=0;
1313     if (faces[3] == 0) {
1314     for (dim_t i=1; i<myN0-1; i++) {
1315     index.push_back(2*myN0+myN1+i+1);
1316     index.push_back(2*myN0+myN1+i);
1317     index.push_back(2*myN0+myN1+i-1);
1318     ptr.push_back(ptr.back()+3);
1319     }
1320     index.push_back(myN0+myN1+4);
1321     index.push_back(myN0+myN1+3);
1322     n+=2;
1323     if (faces[1] == 0) {
1324     index.push_back(myN0+myN1+2);
1325     index.push_back(myN0+myN1+1);
1326     index.push_back(myN0+myN1);
1327     n+=3;
1328     }
1329     } else {
1330     for (dim_t i=1; i<myN0-1; i++) {
1331     ptr.push_back(ptr.back());
1332     }
1333     if (faces[1] == 0) {
1334     index.push_back(myN0+myN1+1);
1335     index.push_back(myN0+myN1);
1336     n+=2;
1337     }
1338     }
1339     // n=neighbours of top-right corner node
1340     ptr.push_back(ptr.back()+n);
1341    
1342     dim_t M=ptr.size()-1;
1343     map<index_t,index_t> idMap;
1344     dim_t N=0;
1345     for (IndexVector::iterator it=index.begin(); it!=index.end(); it++) {
1346     if (idMap.find(*it)==idMap.end()) {
1347     idMap[*it]=N;
1348     *it=N++;
1349     } else {
1350     *it=idMap[*it];
1351     }
1352     }
1353    
1354 caltinay 3702 /*
1355 caltinay 3699 cout << "--- colCouple_pattern ---" << endl;
1356     cout << "M=" << M << ", N=" << N << endl;
1357     for (size_t i=0; i<ptr.size(); i++) {
1358     cout << "ptr[" << i << "]=" << ptr[i] << endl;
1359     }
1360     for (size_t i=0; i<index.size(); i++) {
1361     cout << "index[" << i << "]=" << index[i] << endl;
1362     }
1363 caltinay 3702 */
1364 caltinay 3699
1365     // now build the row couple pattern
1366     IndexVector ptr2(1,0);
1367     IndexVector index2;
1368     for (dim_t id=0; id<N; id++) {
1369     n=0;
1370     for (dim_t i=0; i<M; i++) {
1371     for (dim_t j=ptr[i]; j<ptr[i+1]; j++) {
1372     if (index[j]==id) {
1373     index2.push_back(i);
1374     n++;
1375     break;
1376     }
1377     }
1378     }
1379     ptr2.push_back(ptr2.back()+n);
1380     }
1381    
1382 caltinay 3702 /*
1383 caltinay 3699 cout << "--- rowCouple_pattern ---" << endl;
1384     cout << "M=" << N << ", N=" << M << endl;
1385     for (size_t i=0; i<ptr2.size(); i++) {
1386     cout << "ptr[" << i << "]=" << ptr2[i] << endl;
1387     }
1388     for (size_t i=0; i<index2.size(); i++) {
1389     cout << "index[" << i << "]=" << index2[i] << endl;
1390     }
1391 caltinay 3702 */
1392 caltinay 3699
1393     // paso will manage the memory
1394     index_t* indexC = MEMALLOC(index.size(), index_t);
1395     index_t* ptrC = MEMALLOC(ptr.size(), index_t);
1396     copy(index.begin(), index.end(), indexC);
1397     copy(ptr.begin(), ptr.end(), ptrC);
1398     *colPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, M, N, ptrC, indexC);
1399    
1400     // paso will manage the memory
1401     indexC = MEMALLOC(index2.size(), index_t);
1402     ptrC = MEMALLOC(ptr2.size(), index_t);
1403     copy(index2.begin(), index2.end(), indexC);
1404     copy(ptr2.begin(), ptr2.end(), ptrC);
1405     *rowPattern=Paso_Pattern_alloc(MATRIX_FORMAT_DEFAULT, N, M, ptrC, indexC);
1406     }
1407    
1408 caltinay 3702 //protected
1409 caltinay 3711 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1410     escript::Data& in, bool reduced) const
1411 caltinay 3702 {
1412     const dim_t numComp = in.getDataPointSize();
1413 caltinay 3711 if (reduced) {
1414     /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS TOP */
1415     const double tmp0_0 = 0.25000000000000000000;
1416 caltinay 3702 #pragma omp parallel for
1417 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
1418     for (index_t k0=0; k0 < m_NE0; ++k0) {
1419     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1420     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1421     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1422     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1423     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1424     for (index_t i=0; i < numComp; ++i) {
1425     o[INDEX2(i,numComp,0)] = tmp0_0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
1426     } /* end of component loop i */
1427     } /* end of k0 loop */
1428     } /* end of k1 loop */
1429     /* GENERATOR SNIP_INTERPOLATE_REDUCED_ELEMENTS BOTTOM */
1430     } else {
1431     /* GENERATOR SNIP_INTERPOLATE_ELEMENTS TOP */
1432     const double tmp0_2 = 0.62200846792814621559;
1433     const double tmp0_1 = 0.044658198738520451079;
1434     const double tmp0_0 = 0.16666666666666666667;
1435     #pragma omp parallel for
1436     for (index_t k1=0; k1 < m_NE1; ++k1) {
1437     for (index_t k0=0; k0 < m_NE0; ++k0) {
1438     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,k1, m_N0));
1439     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,k1+1, m_N0));
1440     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,k1+1, m_N0));
1441     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,k1, m_N0));
1442     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE0));
1443     for (index_t i=0; i < numComp; ++i) {
1444     o[INDEX2(i,numComp,0)] = f_00[i]*tmp0_2 + f_11[i]*tmp0_1 + tmp0_0*(f_01[i] + f_10[i]);
1445     o[INDEX2(i,numComp,1)] = f_01[i]*tmp0_1 + f_10[i]*tmp0_2 + tmp0_0*(f_00[i] + f_11[i]);
1446     o[INDEX2(i,numComp,2)] = f_01[i]*tmp0_2 + f_10[i]*tmp0_1 + tmp0_0*(f_00[i] + f_11[i]);
1447     o[INDEX2(i,numComp,3)] = f_00[i]*tmp0_1 + f_11[i]*tmp0_2 + tmp0_0*(f_01[i] + f_10[i]);
1448     } /* end of component loop i */
1449     } /* end of k0 loop */
1450     } /* end of k1 loop */
1451     /* GENERATOR SNIP_INTERPOLATE_ELEMENTS BOTTOM */
1452     }
1453 caltinay 3702 }
1454    
1455     //protected
1456 caltinay 3711 void Rectangle::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1457     bool reduced) const
1458 caltinay 3702 {
1459 caltinay 3704 const dim_t numComp = in.getDataPointSize();
1460 caltinay 3711 if (reduced) {
1461     /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES TOP */
1462     if (m_faceOffset[0] > -1) {
1463     const double tmp0_0 = 0.50000000000000000000;
1464 caltinay 3704 #pragma omp parallel for
1465 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
1466     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1467     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1468     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1469     for (index_t i=0; i < numComp; ++i) {
1470     o[INDEX2(i,numComp,0)] = tmp0_0*(f_00[i] + f_01[i]);
1471     } /* end of component loop i */
1472     } /* end of k1 loop */
1473     } /* end of face 0 */
1474     if (m_faceOffset[1] > -1) {
1475     const double tmp0_0 = 0.50000000000000000000;
1476 caltinay 3704 #pragma omp parallel for
1477 caltinay 3711 for (index_t k1=0; k1 < m_NE1; ++k1) {
1478     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1479     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1480     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1481     for (index_t i=0; i < numComp; ++i) {
1482     o[INDEX2(i,numComp,0)] = tmp0_0*(f_10[i] + f_11[i]);
1483     } /* end of component loop i */
1484     } /* end of k1 loop */
1485     } /* end of face 1 */
1486     if (m_faceOffset[2] > -1) {
1487     const double tmp0_0 = 0.50000000000000000000;
1488 caltinay 3704 #pragma omp parallel for
1489 caltinay 3711 for (index_t k0=0; k0 < m_NE0; ++k0) {
1490     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1491     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1492     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1493     for (index_t i=0; i < numComp; ++i) {
1494     o[INDEX2(i,numComp,0)] = tmp0_0*(f_00[i] + f_10[i]);
1495     } /* end of component loop i */
1496     } /* end of k0 loop */
1497     } /* end of face 2 */
1498     if (m_faceOffset[3] > -1) {
1499     const double tmp0_0 = 0.50000000000000000000;
1500 caltinay 3704 #pragma omp parallel for
1501 caltinay 3711 for (index_t k0=0; k0 < m_NE0; ++k0) {
1502     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1503     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1504     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1505     for (index_t i=0; i < numComp; ++i) {
1506     o[INDEX2(i,numComp,0)] = tmp0_0*(f_01[i] + f_11[i]);
1507     } /* end of component loop i */
1508     } /* end of k0 loop */
1509     } /* end of face 3 */
1510     /* GENERATOR SNIP_INTERPOLATE_REDUCED_FACES BOTTOM */
1511     } else {
1512     /* GENERATOR SNIP_INTERPOLATE_FACES TOP */
1513     if (m_faceOffset[0] > -1) {
1514     const double tmp0_1 = 0.78867513459481288225;
1515     const double tmp0_0 = 0.21132486540518711775;
1516     #pragma omp parallel for
1517     for (index_t k1=0; k1 < m_NE1; ++k1) {
1518     const register double* f_01 = in.getSampleDataRO(INDEX2(0,k1+1, m_N0));
1519     const register double* f_00 = in.getSampleDataRO(INDEX2(0,k1, m_N0));
1520     double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1521     for (index_t i=0; i < numComp; ++i) {
1522     o[INDEX2(i,numComp,0)] = f_00[i]*tmp0_1 + f_01[i]*tmp0_0;
1523     o[INDEX2(i,numComp,1)] = f_00[i]*tmp0_0 + f_01[i]*tmp0_1;
1524     } /* end of component loop i */
1525     } /* end of k1 loop */
1526     } /* end of face 0 */
1527     if (m_faceOffset[1] > -1) {
1528     const double tmp0_1 = 0.21132486540518711775;
1529     const double tmp0_0 = 0.78867513459481288225;
1530     #pragma omp parallel for
1531     for (index_t k1=0; k1 < m_NE1; ++k1) {
1532     const register double* f_10 = in.getSampleDataRO(INDEX2(m_N0-1,k1, m_N0));
1533     const register double* f_11 = in.getSampleDataRO(INDEX2(m_N0-1,k1+1, m_N0));
1534     double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1535     for (index_t i=0; i < numComp; ++i) {
1536     o[INDEX2(i,numComp,0)] = f_10[i]*tmp0_0 + f_11[i]*tmp0_1;
1537     o[INDEX2(i,numComp,1)] = f_10[i]*tmp0_1 + f_11[i]*tmp0_0;
1538     } /* end of component loop i */
1539     } /* end of k1 loop */
1540     } /* end of face 1 */
1541     if (m_faceOffset[2] > -1) {
1542     const double tmp0_1 = 0.78867513459481288225;
1543     const double tmp0_0 = 0.21132486540518711775;
1544     #pragma omp parallel for
1545     for (index_t k0=0; k0 < m_NE0; ++k0) {
1546     const register double* f_10 = in.getSampleDataRO(INDEX2(k0+1,0, m_N0));
1547     const register double* f_00 = in.getSampleDataRO(INDEX2(k0,0, m_N0));
1548     double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1549     for (index_t i=0; i < numComp; ++i) {
1550     o[INDEX2(i,numComp,0)] = f_00[i]*tmp0_1 + f_10[i]*tmp0_0;
1551     o[INDEX2(i,numComp,1)] = f_00[i]*tmp0_0 + f_10[i]*tmp0_1;
1552     } /* end of component loop i */
1553     } /* end of k0 loop */
1554     } /* end of face 2 */
1555     if (m_faceOffset[3] > -1) {
1556     const double tmp0_1 = 0.78867513459481288225;
1557     const double tmp0_0 = 0.21132486540518711775;
1558     #pragma omp parallel for
1559     for (index_t k0=0; k0 < m_NE0; ++k0) {
1560     const register double* f_11 = in.getSampleDataRO(INDEX2(k0+1,m_N1-1, m_N0));
1561     const register double* f_01 = in.getSampleDataRO(INDEX2(k0,m_N1-1, m_N0));
1562     double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1563     for (index_t i=0; i < numComp; ++i) {
1564     o[INDEX2(i,numComp,0)] = f_01[i]*tmp0_1 + f_11[i]*tmp0_0;
1565     o[INDEX2(i,numComp,1)] = f_01[i]*tmp0_0 + f_11[i]*tmp0_1;
1566     } /* end of component loop i */
1567     } /* end of k0 loop */
1568     } /* end of face 3 */
1569     /* GENERATOR SNIP_INTERPOLATE_FACES BOTTOM */
1570     }
1571 caltinay 3702 }
1572    
1573 caltinay 3691 } // end of namespace ripley
1574    

  ViewVC Help
Powered by ViewVC 1.1.26