/[escript]/trunk/ripley/src/Rectangle.cpp
ViewVC logotype

Annotation of /trunk/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4521 - (hide annotations)
Mon Aug 26 11:51:30 2013 UTC (6 years, 2 months ago) by jfenwick
File size: 201429 byte(s)
Remove bool_t
Part of random.


1 caltinay 3691
2 jfenwick 3981 /*****************************************************************************
3 caltinay 3691 *
4 jfenwick 4154 * Copyright (c) 2003-2013 by University of Queensland
5 jfenwick 3981 * http://www.uq.edu.au
6 caltinay 3691 *
7     * Primary Business: Queensland, Australia
8     * Licensed under the Open Software License version 3.0
9     * http://www.opensource.org/licenses/osl-3.0.php
10     *
11 jfenwick 3981 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12     * Development since 2012 by School of Earth Sciences
13     *
14     *****************************************************************************/
15 caltinay 3691
16     #include <ripley/Rectangle.h>
17 caltinay 3791 #include <paso/SystemMatrix.h>
18 caltinay 4334 #include <esysUtils/esysFileWriter.h>
19 caltinay 3691
20 caltinay 4477 #include <boost/scoped_array.hpp>
21 jfenwick 4521 #include "esysUtils/EsysRandom.h"
22 caltinay 4477
23 caltinay 4013 #ifdef USE_NETCDF
24     #include <netcdfcpp.h>
25     #endif
26    
27 caltinay 3691 #if USE_SILO
28     #include <silo.h>
29     #ifdef ESYS_MPI
30     #include <pmpio.h>
31     #endif
32     #endif
33    
34     #include <iomanip>
35    
36     using namespace std;
37 caltinay 4334 using esysUtils::FileWriter;
38 caltinay 3691
39     namespace ripley {
40    
41 caltinay 3781 Rectangle::Rectangle(int n0, int n1, double x0, double y0, double x1,
42     double y1, int d0, int d1) :
43 caltinay 4334 RipleyDomain(2)
44 caltinay 3691 {
45 caltinay 3943 // ignore subdivision parameters for serial run
46     if (m_mpiInfo->size == 1) {
47     d0=1;
48     d1=1;
49     }
50    
51     bool warn=false;
52     // if number of subdivisions is non-positive, try to subdivide by the same
53     // ratio as the number of elements
54     if (d0<=0 && d1<=0) {
55     warn=true;
56 caltinay 4010 d0=max(1, (int)(sqrt(m_mpiInfo->size*(n0+1)/(float)(n1+1))));
57 caltinay 3943 d1=m_mpiInfo->size/d0;
58     if (d0*d1 != m_mpiInfo->size) {
59     // ratios not the same so subdivide side with more elements only
60     if (n0>n1) {
61     d0=0;
62     d1=1;
63     } else {
64     d0=1;
65     d1=0;
66     }
67     }
68     }
69     if (d0<=0) {
70     // d1 is preset, determine d0 - throw further down if result is no good
71     d0=m_mpiInfo->size/d1;
72     } else if (d1<=0) {
73     // d0 is preset, determine d1 - throw further down if result is no good
74     d1=m_mpiInfo->size/d0;
75     }
76    
77 caltinay 3691 // ensure number of subdivisions is valid and nodes can be distributed
78     // among number of ranks
79 caltinay 4334 if (d0*d1 != m_mpiInfo->size)
80 caltinay 3691 throw RipleyException("Invalid number of spatial subdivisions");
81    
82 caltinay 3943 if (warn) {
83     cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
84     << d1 << "). This may not be optimal!" << endl;
85     }
86 caltinay 3691
87 caltinay 4334 double l0 = x1-x0;
88     double l1 = y1-y0;
89     m_dx[0] = l0/n0;
90     m_dx[1] = l1/n1;
91    
92     if ((n0+1)%d0 > 0) {
93 caltinay 3943 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
94 caltinay 4334 l0=m_dx[0]*n0;
95 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N0="
96 caltinay 4334 << n0 << ", l0=" << l0 << endl;
97 caltinay 3943 }
98 caltinay 4334 if ((n1+1)%d1 > 0) {
99 caltinay 3943 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
100 caltinay 4334 l1=m_dx[1]*n1;
101 caltinay 3943 cout << "Warning: Adjusted number of elements and length. N1="
102 caltinay 4334 << n1 << ", l1=" << l1 << endl;
103 caltinay 3943 }
104    
105 caltinay 4334 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2))
106 caltinay 3752 throw RipleyException("Too few elements for the number of ranks");
107    
108 caltinay 4334 m_gNE[0] = n0;
109     m_gNE[1] = n1;
110     m_origin[0] = x0;
111     m_origin[1] = y0;
112     m_length[0] = l0;
113     m_length[1] = l1;
114     m_NX[0] = d0;
115     m_NX[1] = d1;
116    
117 caltinay 3764 // local number of elements (with and without overlap)
118 caltinay 4334 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
119     if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
120     m_NE[0]++;
121     else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
122     m_ownNE[0]--;
123 caltinay 3764
124 caltinay 4334 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
125     if (m_mpiInfo->rank/d0>0 && m_mpiInfo->rank/d0<d1-1)
126     m_NE[1]++;
127     else if (d1>1 && m_mpiInfo->rank/d0==d1-1)
128     m_ownNE[1]--;
129 caltinay 3752
130     // local number of nodes
131 caltinay 4334 m_NN[0] = m_NE[0]+1;
132     m_NN[1] = m_NE[1]+1;
133 caltinay 3752
134 caltinay 3691 // bottom-left node is at (offset0,offset1) in global mesh
135 caltinay 4334 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
136     if (m_offset[0] > 0)
137     m_offset[0]--;
138     m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank/d0);
139     if (m_offset[1] > 0)
140     m_offset[1]--;
141 caltinay 3752
142 caltinay 3691 populateSampleIds();
143 caltinay 3756 createPattern();
144 caltinay 3691 }
145    
146     Rectangle::~Rectangle()
147     {
148 caltinay 3785 Paso_SystemMatrixPattern_free(m_pattern);
149     Paso_Connector_free(m_connector);
150 caltinay 3691 }
151    
152     string Rectangle::getDescription() const
153     {
154     return "ripley::Rectangle";
155     }
156    
157     bool Rectangle::operator==(const AbstractDomain& other) const
158     {
159 caltinay 3744 const Rectangle* o=dynamic_cast<const Rectangle*>(&other);
160     if (o) {
161     return (RipleyDomain::operator==(other) &&
162 caltinay 4334 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1]
163     && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1]
164     && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1]
165     && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1]);
166 caltinay 3744 }
167 caltinay 3691
168     return false;
169     }
170    
171 caltinay 4013 void Rectangle::readNcGrid(escript::Data& out, string filename, string varname,
172 caltinay 4277 const vector<int>& first, const vector<int>& numValues,
173     const vector<int>& multiplier) const
174 caltinay 4013 {
175     #ifdef USE_NETCDF
176     // check destination function space
177     int myN0, myN1;
178     if (out.getFunctionSpace().getTypeCode() == Nodes) {
179 caltinay 4334 myN0 = m_NN[0];
180     myN1 = m_NN[1];
181 caltinay 4013 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
182     out.getFunctionSpace().getTypeCode() == ReducedElements) {
183 caltinay 4334 myN0 = m_NE[0];
184     myN1 = m_NE[1];
185 caltinay 4013 } else
186     throw RipleyException("readNcGrid(): invalid function space for output data object");
187    
188     if (first.size() != 2)
189     throw RipleyException("readNcGrid(): argument 'first' must have 2 entries");
190    
191     if (numValues.size() != 2)
192     throw RipleyException("readNcGrid(): argument 'numValues' must have 2 entries");
193    
194 caltinay 4277 if (multiplier.size() != 2)
195     throw RipleyException("readNcGrid(): argument 'multiplier' must have 2 entries");
196     for (size_t i=0; i<multiplier.size(); i++)
197     if (multiplier[i]<1)
198     throw RipleyException("readNcGrid(): all multipliers must be positive");
199    
200 caltinay 4013 // check file existence and size
201     NcFile f(filename.c_str(), NcFile::ReadOnly);
202     if (!f.is_valid())
203     throw RipleyException("readNcGrid(): cannot open file");
204    
205     NcVar* var = f.get_var(varname.c_str());
206     if (!var)
207     throw RipleyException("readNcGrid(): invalid variable");
208    
209     // TODO: rank>0 data support
210     const int numComp = out.getDataPointSize();
211     if (numComp > 1)
212     throw RipleyException("readNcGrid(): only scalar data supported");
213    
214     const int dims = var->num_dims();
215 caltinay 4477 boost::scoped_array<long> edges(var->edges());
216 caltinay 4013
217     // is this a slice of the data object (dims!=2)?
218     // note the expected ordering of edges (as in numpy: y,x)
219     if ( (dims==2 && (numValues[1] > edges[0] || numValues[0] > edges[1]))
220     || (dims==1 && numValues[1]>1) ) {
221     throw RipleyException("readNcGrid(): not enough data in file");
222     }
223    
224     // check if this rank contributes anything
225 caltinay 4334 if (first[0] >= m_offset[0]+myN0 || first[0]+numValues[0]*multiplier[0] <= m_offset[0] ||
226     first[1] >= m_offset[1]+myN1 || first[1]+numValues[1]*multiplier[1] <= m_offset[1])
227 caltinay 4013 return;
228    
229     // now determine how much this rank has to write
230    
231     // first coordinates in data object to write to
232 caltinay 4334 const int first0 = max(0, first[0]-m_offset[0]);
233     const int first1 = max(0, first[1]-m_offset[1]);
234 caltinay 4013 // indices to first value in file
235 caltinay 4334 const int idx0 = max(0, m_offset[0]-first[0]);
236     const int idx1 = max(0, m_offset[1]-first[1]);
237 caltinay 4277 // number of values to read
238 caltinay 4013 const int num0 = min(numValues[0]-idx0, myN0-first0);
239     const int num1 = min(numValues[1]-idx1, myN1-first1);
240    
241     vector<double> values(num0*num1);
242     if (dims==2) {
243     var->set_cur(idx1, idx0);
244     var->get(&values[0], num1, num0);
245     } else {
246     var->set_cur(idx0);
247     var->get(&values[0], num0);
248     }
249    
250     const int dpp = out.getNumDataPointsPerSample();
251     out.requireWrite();
252    
253     for (index_t y=0; y<num1; y++) {
254     #pragma omp parallel for
255     for (index_t x=0; x<num0; x++) {
256 caltinay 4277 const int baseIndex = first0+x*multiplier[0]
257     +(first1+y*multiplier[1])*myN0;
258     const int srcIndex = y*num0+x;
259 caltinay 4174 if (!isnan(values[srcIndex])) {
260 caltinay 4277 for (index_t m1=0; m1<multiplier[1]; m1++) {
261     for (index_t m0=0; m0<multiplier[0]; m0++) {
262     const int dataIndex = baseIndex+m0+m1*myN0;
263     double* dest = out.getSampleDataRW(dataIndex);
264     for (index_t q=0; q<dpp; q++) {
265     *dest++ = values[srcIndex];
266     }
267     }
268 caltinay 4174 }
269 caltinay 4013 }
270     }
271     }
272     #else
273     throw RipleyException("readNcGrid(): not compiled with netCDF support");
274     #endif
275     }
276    
277 caltinay 3971 void Rectangle::readBinaryGrid(escript::Data& out, string filename,
278 caltinay 4277 const vector<int>& first,
279     const vector<int>& numValues,
280 caltinay 4495 const std::vector<int>& multiplier,
281     int byteOrder, int dataType) const
282 caltinay 3971 {
283 caltinay 4495 // the mapping is not universally correct but should work on our
284     // supported platforms
285     switch (dataType) {
286     case DATATYPE_INT32:
287     readBinaryGridImpl<int>(out, filename, first, numValues,
288     multiplier, byteOrder);
289     break;
290     case DATATYPE_FLOAT32:
291     readBinaryGridImpl<float>(out, filename, first, numValues,
292     multiplier, byteOrder);
293     break;
294     case DATATYPE_FLOAT64:
295     readBinaryGridImpl<double>(out, filename, first, numValues,
296     multiplier, byteOrder);
297     break;
298     default:
299     throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
300     }
301     }
302    
303     template<typename ValueType>
304     void Rectangle::readBinaryGridImpl(escript::Data& out, const string& filename,
305     const vector<int>& first,
306     const vector<int>& numValues,
307     const std::vector<int>& multiplier,
308     int byteOrder) const
309     {
310     if (byteOrder != BYTEORDER_NATIVE)
311     throw RipleyException("readBinaryGrid(): only native byte order supported at the moment.");
312    
313 caltinay 3971 // check destination function space
314     int myN0, myN1;
315     if (out.getFunctionSpace().getTypeCode() == Nodes) {
316 caltinay 4334 myN0 = m_NN[0];
317     myN1 = m_NN[1];
318 caltinay 3971 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
319     out.getFunctionSpace().getTypeCode() == ReducedElements) {
320 caltinay 4334 myN0 = m_NE[0];
321     myN1 = m_NE[1];
322 caltinay 3971 } else
323     throw RipleyException("readBinaryGrid(): invalid function space for output data object");
324    
325     // check file existence and size
326     ifstream f(filename.c_str(), ifstream::binary);
327     if (f.fail()) {
328     throw RipleyException("readBinaryGrid(): cannot open file");
329     }
330     f.seekg(0, ios::end);
331     const int numComp = out.getDataPointSize();
332     const int filesize = f.tellg();
333 caltinay 4495 const int reqsize = numValues[0]*numValues[1]*numComp*sizeof(ValueType);
334 caltinay 3971 if (filesize < reqsize) {
335     f.close();
336     throw RipleyException("readBinaryGrid(): not enough data in file");
337     }
338    
339     // check if this rank contributes anything
340 caltinay 4334 if (first[0] >= m_offset[0]+myN0 || first[0]+numValues[0] <= m_offset[0] ||
341     first[1] >= m_offset[1]+myN1 || first[1]+numValues[1] <= m_offset[1]) {
342 caltinay 3971 f.close();
343     return;
344     }
345    
346     // now determine how much this rank has to write
347    
348     // first coordinates in data object to write to
349 caltinay 4334 const int first0 = max(0, first[0]-m_offset[0]);
350     const int first1 = max(0, first[1]-m_offset[1]);
351 caltinay 3971 // indices to first value in file
352 caltinay 4334 const int idx0 = max(0, m_offset[0]-first[0]);
353     const int idx1 = max(0, m_offset[1]-first[1]);
354 caltinay 4277 // number of values to read
355 caltinay 3971 const int num0 = min(numValues[0]-idx0, myN0-first0);
356     const int num1 = min(numValues[1]-idx1, myN1-first1);
357    
358     out.requireWrite();
359 caltinay 4495 vector<ValueType> values(num0*numComp);
360 caltinay 3971 const int dpp = out.getNumDataPointsPerSample();
361    
362     for (index_t y=0; y<num1; y++) {
363     const int fileofs = numComp*(idx0+(idx1+y)*numValues[0]);
364 caltinay 4495 f.seekg(fileofs*sizeof(ValueType));
365     f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
366     for (int x=0; x<num0; x++) {
367 caltinay 4277 const int baseIndex = first0+x*multiplier[0]
368     +(first1+y*multiplier[1])*myN0;
369     for (index_t m1=0; m1<multiplier[1]; m1++) {
370     for (index_t m0=0; m0<multiplier[0]; m0++) {
371     const int dataIndex = baseIndex+m0+m1*myN0;
372     double* dest = out.getSampleDataRW(dataIndex);
373 caltinay 4495 for (int c=0; c<numComp; c++) {
374 jfenwick 4368 if (!std::isnan(values[x*numComp+c])) {
375 caltinay 4495 for (int q=0; q<dpp; q++) {
376 caltinay 4277 *dest++ = static_cast<double>(values[x*numComp+c]);
377     }
378     }
379 caltinay 4174 }
380 caltinay 3971 }
381     }
382     }
383     }
384    
385     f.close();
386     }
387    
388 caltinay 4357 void Rectangle::writeBinaryGrid(const escript::Data& in, string filename,
389     int byteOrder, int dataType) const
390 caltinay 4334 {
391 caltinay 4357 // the mapping is not universally correct but should work on our
392     // supported platforms
393     switch (dataType) {
394     case DATATYPE_INT32:
395     writeBinaryGridImpl<int>(in, filename, byteOrder);
396     break;
397     case DATATYPE_FLOAT32:
398     writeBinaryGridImpl<float>(in, filename, byteOrder);
399     break;
400     case DATATYPE_FLOAT64:
401     writeBinaryGridImpl<double>(in, filename, byteOrder);
402     break;
403     default:
404     throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
405     }
406     }
407    
408     template<typename ValueType>
409     void Rectangle::writeBinaryGridImpl(const escript::Data& in,
410     const string& filename, int byteOrder) const
411     {
412 caltinay 4334 // check function space and determine number of points
413     int myN0, myN1;
414     int totalN0, totalN1;
415     if (in.getFunctionSpace().getTypeCode() == Nodes) {
416     myN0 = m_NN[0];
417     myN1 = m_NN[1];
418     totalN0 = m_gNE[0]+1;
419     totalN1 = m_gNE[1]+1;
420     } else if (in.getFunctionSpace().getTypeCode() == Elements ||
421     in.getFunctionSpace().getTypeCode() == ReducedElements) {
422     myN0 = m_NE[0];
423     myN1 = m_NE[1];
424     totalN0 = m_gNE[0];
425     totalN1 = m_gNE[1];
426     } else
427     throw RipleyException("writeBinaryGrid(): invalid function space of data object");
428    
429     const int numComp = in.getDataPointSize();
430     const int dpp = in.getNumDataPointsPerSample();
431    
432     if (numComp > 1 || dpp > 1)
433     throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
434    
435     escript::Data* _in = const_cast<escript::Data*>(&in);
436 caltinay 4357 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1;
437 caltinay 4334
438     // from here on we know that each sample consists of one value
439 caltinay 4482 FileWriter fw;
440     fw.openFile(filename, fileSize);
441 caltinay 4334 MPIBarrier();
442    
443     for (index_t y=0; y<myN1; y++) {
444 caltinay 4357 const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0)*sizeof(ValueType);
445 caltinay 4334 ostringstream oss;
446    
447     for (index_t x=0; x<myN0; x++) {
448     const double* sample = _in->getSampleDataRO(y*myN0+x);
449 caltinay 4357 ValueType fvalue = static_cast<ValueType>(*sample);
450     if (byteOrder == BYTEORDER_NATIVE) {
451 caltinay 4334 oss.write((char*)&fvalue, sizeof(fvalue));
452     } else {
453     char* value = reinterpret_cast<char*>(&fvalue);
454 caltinay 4357 oss.write(byte_swap32(value), sizeof(fvalue));
455 caltinay 4334 }
456     }
457 caltinay 4482 fw.writeAt(oss, fileofs);
458 caltinay 4334 }
459 caltinay 4482 fw.close();
460 caltinay 4334 }
461    
462 caltinay 3691 void Rectangle::dump(const string& fileName) const
463     {
464     #if USE_SILO
465     string fn(fileName);
466     if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
467     fn+=".silo";
468     }
469    
470     int driver=DB_HDF5;
471     DBfile* dbfile = NULL;
472 gross 3793 const char* blockDirFmt = "/block%04d";
473 caltinay 3691
474     #ifdef ESYS_MPI
475     PMPIO_baton_t* baton = NULL;
476 gross 3793 const int NUM_SILO_FILES = 1;
477 caltinay 3691 #endif
478    
479     if (m_mpiInfo->size > 1) {
480     #ifdef ESYS_MPI
481     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
482     0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
483     PMPIO_DefaultClose, (void*)&driver);
484     // try the fallback driver in case of error
485     if (!baton && driver != DB_PDB) {
486     driver = DB_PDB;
487     baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
488     0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
489     PMPIO_DefaultClose, (void*)&driver);
490     }
491     if (baton) {
492 caltinay 3766 char siloPath[64];
493     snprintf(siloPath, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
494     dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath);
495 caltinay 3691 }
496     #endif
497     } else {
498     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
499     getDescription().c_str(), driver);
500     // try the fallback driver in case of error
501     if (!dbfile && driver != DB_PDB) {
502     driver = DB_PDB;
503     dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
504     getDescription().c_str(), driver);
505     }
506 caltinay 3766 char siloPath[64];
507     snprintf(siloPath, 64, blockDirFmt, 0);
508     DBMkDir(dbfile, siloPath);
509     DBSetDir(dbfile, siloPath);
510 caltinay 3691 }
511    
512     if (!dbfile)
513     throw RipleyException("dump: Could not create Silo file");
514    
515     /*
516     if (driver==DB_HDF5) {
517     // gzip level 1 already provides good compression with minimal
518     // performance penalty. Some tests showed that gzip levels >3 performed
519     // rather badly on escript data both in terms of time and space
520     DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
521     }
522     */
523    
524 caltinay 4334 boost::scoped_ptr<double> x(new double[m_NN[0]]);
525     boost::scoped_ptr<double> y(new double[m_NN[1]]);
526 caltinay 3691 double* coords[2] = { x.get(), y.get() };
527     #pragma omp parallel
528     {
529 caltinay 3722 #pragma omp for nowait
530 caltinay 4334 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
531     coords[0][i0]=getLocalCoordinate(i0, 0);
532 caltinay 3691 }
533 caltinay 3722 #pragma omp for nowait
534 caltinay 4334 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
535     coords[1][i1]=getLocalCoordinate(i1, 1);
536 caltinay 3691 }
537     }
538 caltinay 4334 int* dims = const_cast<int*>(getNumNodesPerDim());
539 caltinay 3697
540     // write mesh
541 caltinay 4334 DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 2, DB_DOUBLE,
542 caltinay 3691 DB_COLLINEAR, NULL);
543    
544 caltinay 3697 // write node ids
545 caltinay 4334 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 2,
546 caltinay 3697 NULL, 0, DB_INT, DB_NODECENT, NULL);
547    
548     // write element ids
549 caltinay 4334 dims = const_cast<int*>(getNumElementsPerDim());
550 caltinay 3697 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
551 caltinay 4334 dims, 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
552 caltinay 3697
553     // rank 0 writes multimesh and multivar
554 caltinay 3691 if (m_mpiInfo->rank == 0) {
555     vector<string> tempstrings;
556 caltinay 3697 vector<char*> names;
557 caltinay 3691 for (dim_t i=0; i<m_mpiInfo->size; i++) {
558     stringstream path;
559     path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
560     tempstrings.push_back(path.str());
561 caltinay 3697 names.push_back((char*)tempstrings.back().c_str());
562 caltinay 3691 }
563 caltinay 3697 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
564 caltinay 3691 DBSetDir(dbfile, "/");
565 caltinay 3697 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
566     &types[0], NULL);
567     tempstrings.clear();
568     names.clear();
569     for (dim_t i=0; i<m_mpiInfo->size; i++) {
570     stringstream path;
571     path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
572     tempstrings.push_back(path.str());
573     names.push_back((char*)tempstrings.back().c_str());
574     }
575     types.assign(m_mpiInfo->size, DB_QUADVAR);
576     DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
577     &types[0], NULL);
578     tempstrings.clear();
579     names.clear();
580     for (dim_t i=0; i<m_mpiInfo->size; i++) {
581     stringstream path;
582     path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
583     tempstrings.push_back(path.str());
584     names.push_back((char*)tempstrings.back().c_str());
585     }
586     DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
587     &types[0], NULL);
588 caltinay 3691 }
589    
590     if (m_mpiInfo->size > 1) {
591     #ifdef ESYS_MPI
592     PMPIO_HandOffBaton(baton, dbfile);
593     PMPIO_Finish(baton);
594     #endif
595     } else {
596     DBClose(dbfile);
597     }
598    
599     #else // USE_SILO
600 caltinay 3791 throw RipleyException("dump: no Silo support");
601 caltinay 3691 #endif
602     }
603    
604 caltinay 3697 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
605 caltinay 3691 {
606 caltinay 3697 switch (fsType) {
607 caltinay 3691 case Nodes:
608 caltinay 3769 case ReducedNodes: // FIXME: reduced
609 caltinay 3691 return &m_nodeId[0];
610 caltinay 3750 case DegreesOfFreedom:
611 caltinay 3769 case ReducedDegreesOfFreedom: // FIXME: reduced
612 caltinay 3750 return &m_dofId[0];
613 caltinay 3691 case Elements:
614 caltinay 3733 case ReducedElements:
615 caltinay 3691 return &m_elementId[0];
616     case FaceElements:
617 caltinay 3733 case ReducedFaceElements:
618 caltinay 3691 return &m_faceId[0];
619     default:
620     break;
621     }
622    
623     stringstream msg;
624 caltinay 3791 msg << "borrowSampleReferenceIDs: invalid function space type " << fsType;
625 caltinay 3691 throw RipleyException(msg.str());
626     }
627    
628 caltinay 3757 bool Rectangle::ownSample(int fsType, index_t id) const
629 caltinay 3691 {
630 caltinay 3759 if (getMPISize()==1)
631     return true;
632    
633 caltinay 3757 switch (fsType) {
634     case Nodes:
635 caltinay 3769 case ReducedNodes: // FIXME: reduced
636 caltinay 3757 return (m_dofMap[id] < getNumDOF());
637     case DegreesOfFreedom:
638     case ReducedDegreesOfFreedom:
639     return true;
640     case Elements:
641     case ReducedElements:
642     // check ownership of element's bottom left node
643 caltinay 4334 return (m_dofMap[id%m_NE[0]+m_NN[0]*(id/m_NE[0])] < getNumDOF());
644 caltinay 3757 case FaceElements:
645     case ReducedFaceElements:
646 caltinay 3759 {
647 caltinay 3764 // determine which face the sample belongs to before
648 caltinay 3768 // checking ownership of corresponding element's first node
649 caltinay 3759 dim_t n=0;
650 caltinay 4334 for (size_t i=0; i<4; i++) {
651     n+=m_faceCount[i];
652 caltinay 3759 if (id<n) {
653     index_t k;
654     if (i==1)
655 caltinay 4334 k=m_NN[0]-2;
656 caltinay 3759 else if (i==3)
657 caltinay 4334 k=m_NN[0]*(m_NN[1]-2);
658 caltinay 3759 else
659     k=0;
660     // determine whether to move right or up
661 caltinay 4334 const index_t delta=(i/2==0 ? m_NN[0] : 1);
662     return (m_dofMap[k+(id-n+m_faceCount[i])*delta] < getNumDOF());
663 caltinay 3759 }
664     }
665     return false;
666     }
667 caltinay 3757 default:
668     break;
669 caltinay 3702 }
670 caltinay 3757
671     stringstream msg;
672 caltinay 3791 msg << "ownSample: invalid function space type " << fsType;
673 caltinay 3757 throw RipleyException(msg.str());
674 caltinay 3691 }
675    
676 caltinay 3764 void Rectangle::setToNormal(escript::Data& out) const
677 caltinay 3691 {
678 caltinay 3764 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
679     out.requireWrite();
680     #pragma omp parallel
681     {
682     if (m_faceOffset[0] > -1) {
683     #pragma omp for nowait
684 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
685 caltinay 3764 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
686     // set vector at two quadrature points
687     *o++ = -1.;
688     *o++ = 0.;
689     *o++ = -1.;
690     *o = 0.;
691     }
692     }
693    
694     if (m_faceOffset[1] > -1) {
695     #pragma omp for nowait
696 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
697 caltinay 3764 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
698     // set vector at two quadrature points
699     *o++ = 1.;
700     *o++ = 0.;
701     *o++ = 1.;
702     *o = 0.;
703     }
704     }
705    
706     if (m_faceOffset[2] > -1) {
707     #pragma omp for nowait
708 caltinay 4334 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
709 caltinay 3764 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
710     // set vector at two quadrature points
711     *o++ = 0.;
712     *o++ = -1.;
713     *o++ = 0.;
714     *o = -1.;
715     }
716     }
717    
718     if (m_faceOffset[3] > -1) {
719     #pragma omp for nowait
720 caltinay 4334 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
721 caltinay 3764 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
722     // set vector at two quadrature points
723     *o++ = 0.;
724     *o++ = 1.;
725     *o++ = 0.;
726     *o = 1.;
727     }
728     }
729     } // end of parallel section
730     } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
731     out.requireWrite();
732     #pragma omp parallel
733     {
734     if (m_faceOffset[0] > -1) {
735     #pragma omp for nowait
736 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
737 caltinay 3764 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
738     *o++ = -1.;
739     *o = 0.;
740     }
741     }
742    
743     if (m_faceOffset[1] > -1) {
744     #pragma omp for nowait
745 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
746 caltinay 3764 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
747     *o++ = 1.;
748     *o = 0.;
749     }
750     }
751    
752     if (m_faceOffset[2] > -1) {
753     #pragma omp for nowait
754 caltinay 4334 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
755 caltinay 3764 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
756     *o++ = 0.;
757     *o = -1.;
758     }
759     }
760    
761     if (m_faceOffset[3] > -1) {
762     #pragma omp for nowait
763 caltinay 4334 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
764 caltinay 3764 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
765     *o++ = 0.;
766     *o = 1.;
767     }
768     }
769     } // end of parallel section
770    
771     } else {
772     stringstream msg;
773 caltinay 3791 msg << "setToNormal: invalid function space type "
774     << out.getFunctionSpace().getTypeCode();
775 caltinay 3764 throw RipleyException(msg.str());
776     }
777     }
778    
779     void Rectangle::setToSize(escript::Data& out) const
780     {
781     if (out.getFunctionSpace().getTypeCode() == Elements
782     || out.getFunctionSpace().getTypeCode() == ReducedElements) {
783     out.requireWrite();
784     const dim_t numQuad=out.getNumDataPointsPerSample();
785 caltinay 4334 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]);
786 caltinay 3764 #pragma omp parallel for
787     for (index_t k = 0; k < getNumElements(); ++k) {
788     double* o = out.getSampleDataRW(k);
789     fill(o, o+numQuad, size);
790     }
791     } else if (out.getFunctionSpace().getTypeCode() == FaceElements
792     || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
793     out.requireWrite();
794     const dim_t numQuad=out.getNumDataPointsPerSample();
795     #pragma omp parallel
796     {
797     if (m_faceOffset[0] > -1) {
798     #pragma omp for nowait
799 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
800 caltinay 3764 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
801 caltinay 4334 fill(o, o+numQuad, m_dx[1]);
802 caltinay 3764 }
803     }
804    
805     if (m_faceOffset[1] > -1) {
806     #pragma omp for nowait
807 caltinay 4334 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
808 caltinay 3764 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
809 caltinay 4334 fill(o, o+numQuad, m_dx[1]);
810 caltinay 3764 }
811     }
812    
813     if (m_faceOffset[2] > -1) {
814     #pragma omp for nowait
815 caltinay 4334 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
816 caltinay 3764 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
817 caltinay 4334 fill(o, o+numQuad, m_dx[0]);
818 caltinay 3764 }
819     }
820    
821     if (m_faceOffset[3] > -1) {
822     #pragma omp for nowait
823 caltinay 4334 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
824 caltinay 3764 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
825 caltinay 4334 fill(o, o+numQuad, m_dx[0]);
826 caltinay 3764 }
827     }
828     } // end of parallel section
829    
830     } else {
831     stringstream msg;
832 caltinay 3791 msg << "setToSize: invalid function space type "
833     << out.getFunctionSpace().getTypeCode();
834 caltinay 3764 throw RipleyException(msg.str());
835     }
836     }
837    
838     void Rectangle::Print_Mesh_Info(const bool full) const
839     {
840     RipleyDomain::Print_Mesh_Info(full);
841     if (full) {
842     cout << " Id Coordinates" << endl;
843     cout.precision(15);
844     cout.setf(ios::scientific, ios::floatfield);
845     for (index_t i=0; i < getNumNodes(); i++) {
846     cout << " " << setw(5) << m_nodeId[i]
847 caltinay 4334 << " " << getLocalCoordinate(i%m_NN[0], 0)
848     << " " << getLocalCoordinate(i/m_NN[0], 1) << endl;
849 caltinay 3764 }
850     }
851     }
852    
853    
854     //protected
855     void Rectangle::assembleCoordinates(escript::Data& arg) const
856     {
857     escriptDataC x = arg.getDataC();
858     int numDim = m_numDim;
859     if (!isDataPointShapeEqual(&x, 1, &numDim))
860     throw RipleyException("setToX: Invalid Data object shape");
861     if (!numSamplesEqual(&x, 1, getNumNodes()))
862     throw RipleyException("setToX: Illegal number of samples in Data object");
863    
864     arg.requireWrite();
865     #pragma omp parallel for
866 caltinay 4334 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
867     for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
868     double* point = arg.getSampleDataRW(i0+m_NN[0]*i1);
869     point[0] = getLocalCoordinate(i0, 0);
870     point[1] = getLocalCoordinate(i1, 1);
871 caltinay 3764 }
872     }
873     }
874    
875     //protected
876     void Rectangle::assembleGradient(escript::Data& out, escript::Data& in) const
877     {
878 caltinay 3701 const dim_t numComp = in.getDataPointSize();
879 caltinay 4375 const double cx0 = .21132486540518711775/m_dx[0];
880     const double cx1 = .78867513459481288225/m_dx[0];
881     const double cx2 = 1./m_dx[0];
882     const double cy0 = .21132486540518711775/m_dx[1];
883     const double cy1 = .78867513459481288225/m_dx[1];
884     const double cy2 = 1./m_dx[1];
885 caltinay 3724
886 caltinay 3702 if (out.getFunctionSpace().getTypeCode() == Elements) {
887 caltinay 3760 out.requireWrite();
888 caltinay 3913 #pragma omp parallel
889     {
890     vector<double> f_00(numComp);
891     vector<double> f_01(numComp);
892     vector<double> f_10(numComp);
893     vector<double> f_11(numComp);
894     #pragma omp for
895 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
896     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
897     memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
898     memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
899     memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
900     memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
901     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
902 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
903 caltinay 4375 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
904     o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
905     o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
906     o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
907     o[INDEX3(i,0,2,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
908     o[INDEX3(i,1,2,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
909     o[INDEX3(i,0,3,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
910     o[INDEX3(i,1,3,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
911 caltinay 3913 } // end of component loop i
912     } // end of k0 loop
913     } // end of k1 loop
914     } // end of parallel section
915 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
916 caltinay 3760 out.requireWrite();
917 caltinay 3913 #pragma omp parallel
918     {
919     vector<double> f_00(numComp);
920     vector<double> f_01(numComp);
921     vector<double> f_10(numComp);
922     vector<double> f_11(numComp);
923     #pragma omp for
924 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
925     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
926     memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
927     memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
928     memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
929     memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
930     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
931 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
932 caltinay 4375 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
933     o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
934 caltinay 3913 } // end of component loop i
935     } // end of k0 loop
936     } // end of k1 loop
937     } // end of parallel section
938 caltinay 3707 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
939 caltinay 3760 out.requireWrite();
940 caltinay 3722 #pragma omp parallel
941     {
942 caltinay 3913 vector<double> f_00(numComp);
943     vector<double> f_01(numComp);
944     vector<double> f_10(numComp);
945     vector<double> f_11(numComp);
946 caltinay 3722 if (m_faceOffset[0] > -1) {
947     #pragma omp for nowait
948 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
949     memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
950     memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
951     memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double));
952     memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double));
953 caltinay 3722 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
954     for (index_t i=0; i < numComp; ++i) {
955 caltinay 4375 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
956     o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2;
957     o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
958     o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy2;
959 caltinay 3800 } // end of component loop i
960     } // end of k1 loop
961     } // end of face 0
962 caltinay 3722 if (m_faceOffset[1] > -1) {
963     #pragma omp for nowait
964 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
965     memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double));
966     memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double));
967     memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
968     memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
969 caltinay 3722 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
970     for (index_t i=0; i < numComp; ++i) {
971 caltinay 4375 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
972     o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2;
973     o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
974     o[INDEX3(i,1,1,numComp,2)] = (f_11[i]-f_10[i])*cy2;
975 caltinay 3800 } // end of component loop i
976     } // end of k1 loop
977     } // end of face 1
978 caltinay 3722 if (m_faceOffset[2] > -1) {
979     #pragma omp for nowait
980 caltinay 4334 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
981     memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
982     memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double));
983     memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
984     memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double));
985 caltinay 3722 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
986     for (index_t i=0; i < numComp; ++i) {
987 caltinay 4375 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2;
988     o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
989     o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx2;
990     o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
991 caltinay 3800 } // end of component loop i
992     } // end of k0 loop
993     } // end of face 2
994 caltinay 3722 if (m_faceOffset[3] > -1) {
995     #pragma omp for nowait
996 caltinay 4334 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
997     memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
998     memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
999     memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1000     memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1001 caltinay 3722 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1002     for (index_t i=0; i < numComp; ++i) {
1003 caltinay 4375 o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1004     o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1005     o[INDEX3(i,0,1,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1006     o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1007 caltinay 3800 } // end of component loop i
1008     } // end of k0 loop
1009     } // end of face 3
1010 caltinay 3722 } // end of parallel section
1011 caltinay 3800
1012 caltinay 3711 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1013 caltinay 3760 out.requireWrite();
1014 caltinay 3722 #pragma omp parallel
1015     {
1016 caltinay 3913 vector<double> f_00(numComp);
1017     vector<double> f_01(numComp);
1018     vector<double> f_10(numComp);
1019     vector<double> f_11(numComp);
1020 caltinay 3722 if (m_faceOffset[0] > -1) {
1021     #pragma omp for nowait
1022 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1023     memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1024     memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1025     memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double));
1026     memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double));
1027 caltinay 3722 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1028     for (index_t i=0; i < numComp; ++i) {
1029 caltinay 4375 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1030     o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2;
1031 caltinay 3800 } // end of component loop i
1032     } // end of k1 loop
1033     } // end of face 0
1034 caltinay 3722 if (m_faceOffset[1] > -1) {
1035     #pragma omp for nowait
1036 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1037     memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double));
1038     memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double));
1039     memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1040     memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1041 caltinay 3722 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1042     for (index_t i=0; i < numComp; ++i) {
1043 caltinay 4375 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1044     o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1045 caltinay 3800 } // end of component loop i
1046     } // end of k1 loop
1047     } // end of face 1
1048 caltinay 3722 if (m_faceOffset[2] > -1) {
1049     #pragma omp for nowait
1050 caltinay 4334 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1051     memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1052     memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double));
1053     memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1054     memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double));
1055 caltinay 3722 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1056     for (index_t i=0; i < numComp; ++i) {
1057 caltinay 4375 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1058     o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1059 caltinay 3800 } // end of component loop i
1060     } // end of k0 loop
1061     } // end of face 2
1062 caltinay 3722 if (m_faceOffset[3] > -1) {
1063     #pragma omp for nowait
1064 caltinay 4334 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1065     memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1066     memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1067     memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1068     memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1069 caltinay 3722 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1070     for (index_t i=0; i < numComp; ++i) {
1071 caltinay 4375 o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1072     o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1073 caltinay 3800 } // end of component loop i
1074     } // end of k0 loop
1075     } // end of face 3
1076 caltinay 3722 } // end of parallel section
1077 caltinay 3702 }
1078 caltinay 3701 }
1079 caltinay 3697
1080 caltinay 3764 //protected
1081     void Rectangle::assembleIntegrate(vector<double>& integrals, escript::Data& arg) const
1082 caltinay 3713 {
1083 caltinay 3764 const dim_t numComp = arg.getDataPointSize();
1084 caltinay 4334 const index_t left = (m_offset[0]==0 ? 0 : 1);
1085     const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1086 caltinay 3800 const int fs=arg.getFunctionSpace().getTypeCode();
1087     if (fs == Elements && arg.actsExpanded()) {
1088 caltinay 3713 #pragma omp parallel
1089     {
1090     vector<double> int_local(numComp, 0);
1091 caltinay 4334 const double w = m_dx[0]*m_dx[1]/4.;
1092 caltinay 3722 #pragma omp for nowait
1093 caltinay 4334 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1094     for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1095     const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0]));
1096 caltinay 3713 for (index_t i=0; i < numComp; ++i) {
1097 caltinay 3769 const double f0 = f[INDEX2(i,0,numComp)];
1098     const double f1 = f[INDEX2(i,1,numComp)];
1099     const double f2 = f[INDEX2(i,2,numComp)];
1100     const double f3 = f[INDEX2(i,3,numComp)];
1101 caltinay 3764 int_local[i]+=(f0+f1+f2+f3)*w;
1102 caltinay 3800 } // end of component loop i
1103     } // end of k0 loop
1104     } // end of k1 loop
1105 caltinay 3713 #pragma omp critical
1106     for (index_t i=0; i<numComp; i++)
1107     integrals[i]+=int_local[i];
1108 caltinay 3722 } // end of parallel section
1109 caltinay 3800
1110     } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) {
1111 caltinay 4334 const double w = m_dx[0]*m_dx[1];
1112 caltinay 3713 #pragma omp parallel
1113     {
1114     vector<double> int_local(numComp, 0);
1115 caltinay 3722 #pragma omp for nowait
1116 caltinay 4334 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1117     for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1118     const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0]));
1119 caltinay 3713 for (index_t i=0; i < numComp; ++i) {
1120 caltinay 3764 int_local[i]+=f[i]*w;
1121 caltinay 3800 }
1122     }
1123     }
1124 caltinay 3713 #pragma omp critical
1125     for (index_t i=0; i<numComp; i++)
1126     integrals[i]+=int_local[i];
1127 caltinay 3722 } // end of parallel section
1128 caltinay 3800
1129     } else if (fs == FaceElements && arg.actsExpanded()) {
1130 caltinay 3713 #pragma omp parallel
1131     {
1132     vector<double> int_local(numComp, 0);
1133 caltinay 4334 const double w0 = m_dx[0]/2.;
1134     const double w1 = m_dx[1]/2.;
1135 caltinay 3713 if (m_faceOffset[0] > -1) {
1136 caltinay 3722 #pragma omp for nowait
1137 caltinay 4334 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1138 caltinay 3764 const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1);
1139 caltinay 3713 for (index_t i=0; i < numComp; ++i) {
1140 caltinay 3769 const double f0 = f[INDEX2(i,0,numComp)];
1141     const double f1 = f[INDEX2(i,1,numComp)];
1142 caltinay 3764 int_local[i]+=(f0+f1)*w1;
1143 caltinay 3800 } // end of component loop i
1144     } // end of k1 loop
1145 caltinay 3713 }
1146    
1147     if (m_faceOffset[1] > -1) {
1148 caltinay 3722 #pragma omp for nowait
1149 caltinay 4334 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1150 caltinay 3764 const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1);
1151 caltinay 3713 for (index_t i=0; i < numComp; ++i) {
1152 caltinay 3769 const double f0 = f[INDEX2(i,0,numComp)];
1153     const double f1 = f[INDEX2(i,1,numComp)];
1154 caltinay 3764 int_local[i]+=(f0+f1)*w1;
1155 caltinay 3800 } // end of component loop i
1156     } // end of k1 loop
1157 caltinay 3713 }
1158    
1159     if (m_faceOffset[2] > -1) {
1160 caltinay 3722 #pragma omp for nowait
1161 caltinay 4334 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1162 caltinay 3764 const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0);
1163 caltinay 3713 for (index_t i=0; i < numComp; ++i) {
1164 caltinay 3769 const double f0 = f[INDEX2(i,0,numComp)];
1165     const double f1 = f[INDEX2(i,1,numComp)];
1166 caltinay 3764 int_local[i]+=(f0+f1)*w0;
1167 caltinay 3800 } // end of component loop i
1168     } // end of k0 loop
1169 caltinay 3713 }
1170    
1171     if (m_faceOffset[3] > -1) {
1172 caltinay 3722 #pragma omp for nowait
1173 caltinay 4334 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1174 caltinay 3764 const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0);
1175 caltinay 3713 for (index_t i=0; i < numComp; ++i) {
1176 caltinay 3769 const double f0 = f[INDEX2(i,0,numComp)];
1177     const double f1 = f[INDEX2(i,1,numComp)];
1178 caltinay 3764 int_local[i]+=(f0+f1)*w0;
1179 caltinay 3800 } // end of component loop i
1180     } // end of k0 loop
1181 caltinay 3713 }
1182     #pragma omp critical
1183     for (index_t i=0; i<numComp; i++)
1184     integrals[i]+=int_local[i];
1185 caltinay 3722 } // end of parallel section
1186 caltinay 3800
1187     } else if (fs==ReducedFaceElements || (fs==FaceElements && !arg.actsExpanded())) {
1188 caltinay 3713 #pragma omp parallel
1189     {
1190     vector<double> int_local(numComp, 0);
1191     if (m_faceOffset[0] > -1) {
1192 caltinay 3722 #pragma omp for nowait
1193 caltinay 4334 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1194 caltinay 3764 const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1);
1195 caltinay 3713 for (index_t i=0; i < numComp; ++i) {
1196 caltinay 4334 int_local[i]+=f[i]*m_dx[1];
1197 caltinay 3800 }
1198     }
1199 caltinay 3713 }
1200    
1201     if (m_faceOffset[1] > -1) {
1202 caltinay 3722 #pragma omp for nowait
1203 caltinay 4334 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1204 caltinay 3764 const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1);
1205 caltinay 3713 for (index_t i=0; i < numComp; ++i) {
1206 caltinay 4334 int_local[i]+=f[i]*m_dx[1];
1207 caltinay 3800 }
1208     }
1209 caltinay 3713 }
1210    
1211     if (m_faceOffset[2] > -1) {
1212 caltinay 3722 #pragma omp for nowait
1213 caltinay 4334 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1214 caltinay 3764 const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0);
1215 caltinay 3713 for (index_t i=0; i < numComp; ++i) {
1216 caltinay 4334 int_local[i]+=f[i]*m_dx[0];
1217 caltinay 3800 }
1218     }
1219 caltinay 3713 }
1220    
1221     if (m_faceOffset[3] > -1) {
1222 caltinay 3722 #pragma omp for nowait
1223 caltinay 4334 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1224 caltinay 3764 const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0);
1225 caltinay 3713 for (index_t i=0; i < numComp; ++i) {
1226 caltinay 4334 int_local[i]+=f[i]*m_dx[0];
1227 caltinay 3800 }
1228     }
1229 caltinay 3713 }
1230    
1231     #pragma omp critical
1232     for (index_t i=0; i<numComp; i++)
1233     integrals[i]+=int_local[i];
1234 caltinay 3722 } // end of parallel section
1235 caltinay 3800 } // function space selector
1236 caltinay 3713 }
1237    
1238 caltinay 3691 //protected
1239 caltinay 3756 dim_t Rectangle::insertNeighbourNodes(IndexVector& index, index_t node) const
1240     {
1241 caltinay 4334 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1242     const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1243 caltinay 3756 const int x=node%nDOF0;
1244     const int y=node/nDOF0;
1245     dim_t num=0;
1246     // loop through potential neighbours and add to index if positions are
1247     // within bounds
1248     for (int i1=-1; i1<2; i1++) {
1249     for (int i0=-1; i0<2; i0++) {
1250     // skip node itself
1251     if (i0==0 && i1==0)
1252     continue;
1253     // location of neighbour node
1254     const int nx=x+i0;
1255     const int ny=y+i1;
1256     if (nx>=0 && ny>=0 && nx<nDOF0 && ny<nDOF1) {
1257     index.push_back(ny*nDOF0+nx);
1258     num++;
1259     }
1260     }
1261     }
1262    
1263     return num;
1264     }
1265    
1266     //protected
1267     void Rectangle::nodesToDOF(escript::Data& out, escript::Data& in) const
1268     {
1269     const dim_t numComp = in.getDataPointSize();
1270     out.requireWrite();
1271    
1272 caltinay 4334 const index_t left = (m_offset[0]==0 ? 0 : 1);
1273     const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1274     const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1275     const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1276 caltinay 3756 #pragma omp parallel for
1277     for (index_t i=0; i<nDOF1; i++) {
1278     for (index_t j=0; j<nDOF0; j++) {
1279 caltinay 4334 const index_t n=j+left+(i+bottom)*m_NN[0];
1280 caltinay 3756 const double* src=in.getSampleDataRO(n);
1281     copy(src, src+numComp, out.getSampleDataRW(j+i*nDOF0));
1282     }
1283     }
1284     }
1285    
1286     //protected
1287     void Rectangle::dofToNodes(escript::Data& out, escript::Data& in) const
1288     {
1289     const dim_t numComp = in.getDataPointSize();
1290     Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp);
1291     in.requireWrite();
1292     Paso_Coupler_startCollect(coupler, in.getSampleDataRW(0));
1293    
1294     const dim_t numDOF = getNumDOF();
1295     out.requireWrite();
1296     const double* buffer = Paso_Coupler_finishCollect(coupler);
1297    
1298     #pragma omp parallel for
1299     for (index_t i=0; i<getNumNodes(); i++) {
1300     const double* src=(m_dofMap[i]<numDOF ?
1301     in.getSampleDataRO(m_dofMap[i])
1302     : &buffer[(m_dofMap[i]-numDOF)*numComp]);
1303     copy(src, src+numComp, out.getSampleDataRW(i));
1304     }
1305 caltinay 4002 Paso_Coupler_free(coupler);
1306 caltinay 3756 }
1307    
1308 caltinay 3691 //private
1309     void Rectangle::populateSampleIds()
1310     {
1311 caltinay 4334 // degrees of freedom are numbered from left to right, bottom to top in
1312     // each rank, continuing on the next rank (ranks also go left-right,
1313     // bottom-top).
1314     // This means rank 0 has id 0...n0-1, rank 1 has id n0...n1-1 etc. which
1315     // helps when writing out data rank after rank.
1316 caltinay 3697
1317     // build node distribution vector first.
1318 caltinay 4334 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes which is
1319     // constant for all ranks in this implementation
1320 caltinay 3697 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1321 caltinay 3752 const dim_t numDOF=getNumDOF();
1322     for (dim_t k=1; k<m_mpiInfo->size; k++) {
1323     m_nodeDistribution[k]=k*numDOF;
1324 caltinay 3697 }
1325     m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1326 caltinay 3691 m_nodeId.resize(getNumNodes());
1327 caltinay 3753 m_dofId.resize(numDOF);
1328     m_elementId.resize(getNumElements());
1329 caltinay 4334
1330     // populate face element counts
1331     //left
1332     if (m_offset[0]==0)
1333     m_faceCount[0]=m_NE[1];
1334     else
1335     m_faceCount[0]=0;
1336     //right
1337     if (m_mpiInfo->rank%m_NX[0]==m_NX[0]-1)
1338     m_faceCount[1]=m_NE[1];
1339     else
1340     m_faceCount[1]=0;
1341     //bottom
1342     if (m_offset[1]==0)
1343     m_faceCount[2]=m_NE[0];
1344     else
1345     m_faceCount[2]=0;
1346     //top
1347     if (m_mpiInfo->rank/m_NX[0]==m_NX[1]-1)
1348     m_faceCount[3]=m_NE[0];
1349     else
1350     m_faceCount[3]=0;
1351    
1352 caltinay 3753 m_faceId.resize(getNumFaceElements());
1353 caltinay 3697
1354 caltinay 4334 const index_t left = (m_offset[0]==0 ? 0 : 1);
1355     const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1356     const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1357     const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1358    
1359     #define globalNodeId(x,y) \
1360     ((m_offset[0]+x)/nDOF0)*nDOF0*nDOF1+(m_offset[0]+x)%nDOF0 \
1361     + ((m_offset[1]+y)/nDOF1)*nDOF0*nDOF1*m_NX[0]+((m_offset[1]+y)%nDOF1)*nDOF0
1362    
1363     // set corner id's outside the parallel region
1364     m_nodeId[0] = globalNodeId(0, 0);
1365     m_nodeId[m_NN[0]-1] = globalNodeId(m_NN[0]-1, 0);
1366     m_nodeId[m_NN[0]*(m_NN[1]-1)] = globalNodeId(0, m_NN[1]-1);
1367     m_nodeId[m_NN[0]*m_NN[1]-1] = globalNodeId(m_NN[0]-1,m_NN[1]-1);
1368     #undef globalNodeId
1369    
1370 caltinay 3753 #pragma omp parallel
1371     {
1372 caltinay 4334 // populate degrees of freedom and own nodes (identical id)
1373 caltinay 3753 #pragma omp for nowait
1374 caltinay 4334 for (dim_t i=0; i<nDOF1; i++) {
1375     for (dim_t j=0; j<nDOF0; j++) {
1376     const index_t nodeIdx=j+left+(i+bottom)*m_NN[0];
1377     const index_t dofIdx=j+i*nDOF0;
1378     m_dofId[dofIdx] = m_nodeId[nodeIdx]
1379     = m_nodeDistribution[m_mpiInfo->rank]+dofIdx;
1380 caltinay 3753 }
1381 caltinay 3697 }
1382    
1383 caltinay 4334 // populate the rest of the nodes (shared with other ranks)
1384     if (m_faceCount[0]==0) { // left column
1385 caltinay 3753 #pragma omp for nowait
1386 caltinay 4334 for (dim_t i=0; i<nDOF1; i++) {
1387     const index_t nodeIdx=(i+bottom)*m_NN[0];
1388     const index_t dofId=(i+1)*nDOF0-1;
1389     m_nodeId[nodeIdx]
1390     = m_nodeDistribution[m_mpiInfo->rank-1]+dofId;
1391     }
1392     }
1393     if (m_faceCount[1]==0) { // right column
1394     #pragma omp for nowait
1395     for (dim_t i=0; i<nDOF1; i++) {
1396     const index_t nodeIdx=(i+bottom+1)*m_NN[0]-1;
1397     const index_t dofId=i*nDOF0;
1398     m_nodeId[nodeIdx]
1399     = m_nodeDistribution[m_mpiInfo->rank+1]+dofId;
1400     }
1401     }
1402     if (m_faceCount[2]==0) { // bottom row
1403     #pragma omp for nowait
1404     for (dim_t i=0; i<nDOF0; i++) {
1405     const index_t nodeIdx=i+left;
1406     const index_t dofId=nDOF0*(nDOF1-1)+i;
1407     m_nodeId[nodeIdx]
1408     = m_nodeDistribution[m_mpiInfo->rank-m_NX[0]]+dofId;
1409     }
1410     }
1411     if (m_faceCount[3]==0) { // top row
1412     #pragma omp for nowait
1413     for (dim_t i=0; i<nDOF0; i++) {
1414     const index_t nodeIdx=m_NN[0]*(m_NN[1]-1)+i+left;
1415     const index_t dofId=i;
1416     m_nodeId[nodeIdx]
1417     = m_nodeDistribution[m_mpiInfo->rank+m_NX[0]]+dofId;
1418     }
1419     }
1420 caltinay 3752
1421 caltinay 4334 // populate element id's
1422 caltinay 3753 #pragma omp for nowait
1423 caltinay 4334 for (dim_t i1=0; i1<m_NE[1]; i1++) {
1424     for (dim_t i0=0; i0<m_NE[0]; i0++) {
1425     m_elementId[i0+i1*m_NE[0]]=(m_offset[1]+i1)*m_gNE[0]+m_offset[0]+i0;
1426 caltinay 3755 }
1427     }
1428 caltinay 3753
1429     // face elements
1430     #pragma omp for
1431     for (dim_t k=0; k<getNumFaceElements(); k++)
1432     m_faceId[k]=k;
1433     } // end parallel section
1434    
1435 caltinay 3735 m_nodeTags.assign(getNumNodes(), 0);
1436     updateTagsInUse(Nodes);
1437 caltinay 3697
1438 caltinay 3735 m_elementTags.assign(getNumElements(), 0);
1439     updateTagsInUse(Elements);
1440 caltinay 3697
1441 caltinay 3722 // generate face offset vector and set face tags
1442     const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
1443     const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
1444 caltinay 4334 m_faceOffset.assign(4, -1);
1445 caltinay 3722 m_faceTags.clear();
1446 caltinay 3704 index_t offset=0;
1447 caltinay 4334 for (size_t i=0; i<4; i++) {
1448     if (m_faceCount[i]>0) {
1449 caltinay 3704 m_faceOffset[i]=offset;
1450 caltinay 4334 offset+=m_faceCount[i];
1451     m_faceTags.insert(m_faceTags.end(), m_faceCount[i], faceTag[i]);
1452 caltinay 3704 }
1453     }
1454 caltinay 3722 setTagMap("left", LEFT);
1455     setTagMap("right", RIGHT);
1456     setTagMap("bottom", BOTTOM);
1457     setTagMap("top", TOP);
1458     updateTagsInUse(FaceElements);
1459 caltinay 3691 }
1460    
1461 caltinay 3699 //private
1462 caltinay 3756 void Rectangle::createPattern()
1463 caltinay 3699 {
1464 caltinay 4334 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1465     const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1466     const index_t left = (m_offset[0]==0 ? 0 : 1);
1467     const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1468 caltinay 3756
1469     // populate node->DOF mapping with own degrees of freedom.
1470     // The rest is assigned in the loop further down
1471     m_dofMap.assign(getNumNodes(), 0);
1472     #pragma omp parallel for
1473 caltinay 3766 for (index_t i=bottom; i<bottom+nDOF1; i++) {
1474     for (index_t j=left; j<left+nDOF0; j++) {
1475 caltinay 4334 m_dofMap[i*m_NN[0]+j]=(i-bottom)*nDOF0+j-left;
1476 caltinay 3756 }
1477     }
1478    
1479     // build list of shared components and neighbours by looping through
1480     // all potential neighbouring ranks and checking if positions are
1481 caltinay 3754 // within bounds
1482 caltinay 3756 const dim_t numDOF=nDOF0*nDOF1;
1483     vector<IndexVector> colIndices(numDOF); // for the couple blocks
1484     RankVector neighbour;
1485     IndexVector offsetInShared(1,0);
1486     IndexVector sendShared, recvShared;
1487     int numShared=0;
1488 caltinay 4334 const int x=m_mpiInfo->rank%m_NX[0];
1489     const int y=m_mpiInfo->rank/m_NX[0];
1490 caltinay 3754 for (int i1=-1; i1<2; i1++) {
1491     for (int i0=-1; i0<2; i0++) {
1492 caltinay 3756 // skip this rank
1493 caltinay 3754 if (i0==0 && i1==0)
1494     continue;
1495 caltinay 3756 // location of neighbour rank
1496 caltinay 3754 const int nx=x+i0;
1497     const int ny=y+i1;
1498 caltinay 4334 if (nx>=0 && ny>=0 && nx<m_NX[0] && ny<m_NX[1]) {
1499     neighbour.push_back(ny*m_NX[0]+nx);
1500 caltinay 3756 if (i0==0) {
1501     // sharing top or bottom edge
1502     const int firstDOF=(i1==-1 ? 0 : numDOF-nDOF0);
1503 caltinay 4334 const int firstNode=(i1==-1 ? left : m_NN[0]*(m_NN[1]-1)+left);
1504 caltinay 3756 offsetInShared.push_back(offsetInShared.back()+nDOF0);
1505     for (dim_t i=0; i<nDOF0; i++, numShared++) {
1506     sendShared.push_back(firstDOF+i);
1507     recvShared.push_back(numDOF+numShared);
1508     if (i>0)
1509     colIndices[firstDOF+i-1].push_back(numShared);
1510     colIndices[firstDOF+i].push_back(numShared);
1511     if (i<nDOF0-1)
1512     colIndices[firstDOF+i+1].push_back(numShared);
1513     m_dofMap[firstNode+i]=numDOF+numShared;
1514     }
1515     } else if (i1==0) {
1516     // sharing left or right edge
1517     const int firstDOF=(i0==-1 ? 0 : nDOF0-1);
1518 caltinay 4334 const int firstNode=(i0==-1 ? bottom*m_NN[0] : (bottom+1)*m_NN[0]-1);
1519 caltinay 3756 offsetInShared.push_back(offsetInShared.back()+nDOF1);
1520     for (dim_t i=0; i<nDOF1; i++, numShared++) {
1521     sendShared.push_back(firstDOF+i*nDOF0);
1522     recvShared.push_back(numDOF+numShared);
1523     if (i>0)
1524     colIndices[firstDOF+(i-1)*nDOF0].push_back(numShared);
1525     colIndices[firstDOF+i*nDOF0].push_back(numShared);
1526     if (i<nDOF1-1)
1527     colIndices[firstDOF+(i+1)*nDOF0].push_back(numShared);
1528 caltinay 4334 m_dofMap[firstNode+i*m_NN[0]]=numDOF+numShared;
1529 caltinay 3756 }
1530     } else {
1531     // sharing a node
1532     const int dof=(i0+1)/2*(nDOF0-1)+(i1+1)/2*(numDOF-nDOF0);
1533 caltinay 4334 const int node=(i0+1)/2*(m_NN[0]-1)+(i1+1)/2*m_NN[0]*(m_NN[1]-1);
1534 caltinay 3756 offsetInShared.push_back(offsetInShared.back()+1);
1535     sendShared.push_back(dof);
1536     recvShared.push_back(numDOF+numShared);
1537     colIndices[dof].push_back(numShared);
1538     m_dofMap[node]=numDOF+numShared;
1539     ++numShared;
1540     }
1541 caltinay 3754 }
1542 caltinay 3699 }
1543     }
1544 caltinay 3754
1545 caltinay 3756 // create connector
1546     Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
1547     numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
1548     &offsetInShared[0], 1, 0, m_mpiInfo);
1549     Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
1550     numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
1551     &offsetInShared[0], 1, 0, m_mpiInfo);
1552     m_connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
1553     Paso_SharedComponents_free(snd_shcomp);
1554     Paso_SharedComponents_free(rcv_shcomp);
1555 caltinay 3754
1556 caltinay 3756 // create main and couple blocks
1557     Paso_Pattern *mainPattern = createMainPattern();
1558     Paso_Pattern *colPattern, *rowPattern;
1559     createCouplePatterns(colIndices, numShared, &colPattern, &rowPattern);
1560 caltinay 3754
1561 caltinay 3756 // allocate paso distribution
1562     Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
1563     const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
1564 caltinay 3755
1565 caltinay 3756 // finally create the system matrix
1566     m_pattern = Paso_SystemMatrixPattern_alloc(MATRIX_FORMAT_DEFAULT,
1567     distribution, distribution, mainPattern, colPattern, rowPattern,
1568     m_connector, m_connector);
1569 caltinay 3755
1570 caltinay 3756 Paso_Distribution_free(distribution);
1571 caltinay 3755
1572 caltinay 3756 // useful debug output
1573     /*
1574     cout << "--- rcv_shcomp ---" << endl;
1575     cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
1576     for (size_t i=0; i<neighbour.size(); i++) {
1577     cout << "neighbor[" << i << "]=" << neighbour[i]
1578     << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
1579 caltinay 3699 }
1580 caltinay 3756 for (size_t i=0; i<recvShared.size(); i++) {
1581     cout << "shared[" << i << "]=" << recvShared[i] << endl;
1582     }
1583     cout << "--- snd_shcomp ---" << endl;
1584     for (size_t i=0; i<sendShared.size(); i++) {
1585     cout << "shared[" << i << "]=" << sendShared[i] << endl;
1586     }
1587     cout << "--- dofMap ---" << endl;
1588     for (size_t i=0; i<m_dofMap.size(); i++) {
1589     cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << endl;
1590     }
1591     cout << "--- colIndices ---" << endl;
1592     for (size_t i=0; i<colIndices.size(); i++) {
1593     cout << "colIndices[" << i << "].size()=" << colIndices[i].size() << endl;
1594     }
1595     */
1596 caltinay 3754
1597 caltinay 3756 /*
1598     cout << "--- main_pattern ---" << endl;
1599     cout << "M=" << mainPattern->numOutput << ", N=" << mainPattern->numInput << endl;
1600     for (size_t i=0; i<mainPattern->numOutput+1; i++) {
1601     cout << "ptr[" << i << "]=" << mainPattern->ptr[i] << endl;
1602     }
1603     for (size_t i=0; i<mainPattern->ptr[mainPattern->numOutput]; i++) {
1604     cout << "index[" << i << "]=" << mainPattern->index[i] << endl;
1605     }
1606     */
1607 caltinay 3754
1608 caltinay 3756 /*
1609     cout << "--- colCouple_pattern ---" << endl;
1610     cout << "M=" << colPattern->numOutput << ", N=" << colPattern->numInput << endl;
1611     for (size_t i=0; i<colPattern->numOutput+1; i++) {
1612     cout << "ptr[" << i << "]=" << colPattern->ptr[i] << endl;
1613     }
1614     for (size_t i=0; i<colPattern->ptr[colPattern->numOutput]; i++) {
1615     cout << "index[" << i << "]=" << colPattern->index[i] << endl;
1616     }
1617     */
1618 caltinay 3754
1619 caltinay 3756 /*
1620     cout << "--- rowCouple_pattern ---" << endl;
1621     cout << "M=" << rowPattern->numOutput << ", N=" << rowPattern->numInput << endl;
1622     for (size_t i=0; i<rowPattern->numOutput+1; i++) {
1623     cout << "ptr[" << i << "]=" << rowPattern->ptr[i] << endl;
1624 caltinay 3699 }
1625 caltinay 3756 for (size_t i=0; i<rowPattern->ptr[rowPattern->numOutput]; i++) {
1626     cout << "index[" << i << "]=" << rowPattern->index[i] << endl;
1627     }
1628     */
1629    
1630     Paso_Pattern_free(mainPattern);
1631     Paso_Pattern_free(colPattern);
1632     Paso_Pattern_free(rowPattern);
1633 caltinay 3699 }
1634    
1635 caltinay 3776 //private
1636     void Rectangle::addToMatrixAndRHS(Paso_SystemMatrix* S, escript::Data& F,
1637     const vector<double>& EM_S, const vector<double>& EM_F, bool addS,
1638     bool addF, index_t firstNode, dim_t nEq, dim_t nComp) const
1639     {
1640     IndexVector rowIndex;
1641     rowIndex.push_back(m_dofMap[firstNode]);
1642     rowIndex.push_back(m_dofMap[firstNode+1]);
1643 caltinay 4334 rowIndex.push_back(m_dofMap[firstNode+m_NN[0]]);
1644     rowIndex.push_back(m_dofMap[firstNode+m_NN[0]+1]);
1645 caltinay 3776 if (addF) {
1646     double *F_p=F.getSampleDataRW(0);
1647     for (index_t i=0; i<rowIndex.size(); i++) {
1648     if (rowIndex[i]<getNumDOF()) {
1649     for (index_t eq=0; eq<nEq; eq++) {
1650     F_p[INDEX2(eq, rowIndex[i], nEq)]+=EM_F[INDEX2(eq,i,nEq)];
1651     }
1652     }
1653     }
1654     }
1655     if (addS) {
1656     addToSystemMatrix(S, rowIndex, nEq, rowIndex, nComp, EM_S);
1657     }
1658     }
1659    
1660 caltinay 3702 //protected
1661 caltinay 3711 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1662     escript::Data& in, bool reduced) const
1663 caltinay 3702 {
1664     const dim_t numComp = in.getDataPointSize();
1665 caltinay 3711 if (reduced) {
1666 caltinay 3760 out.requireWrite();
1667 caltinay 3913 const double c0 = 0.25;
1668     #pragma omp parallel
1669     {
1670     vector<double> f_00(numComp);
1671     vector<double> f_01(numComp);
1672     vector<double> f_10(numComp);
1673     vector<double> f_11(numComp);
1674     #pragma omp for
1675 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1676     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1677     memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1678     memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1679     memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1680     memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1681     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1682 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1683     o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
1684     } /* end of component loop i */
1685     } /* end of k0 loop */
1686     } /* end of k1 loop */
1687     } /* end of parallel section */
1688 caltinay 3711 } else {
1689 caltinay 3760 out.requireWrite();
1690 caltinay 3913 const double c0 = 0.16666666666666666667;
1691     const double c1 = 0.044658198738520451079;
1692     const double c2 = 0.62200846792814621559;
1693     #pragma omp parallel
1694     {
1695     vector<double> f_00(numComp);
1696     vector<double> f_01(numComp);
1697     vector<double> f_10(numComp);
1698     vector<double> f_11(numComp);
1699     #pragma omp for
1700 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1701     for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1702     memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1703     memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1704     memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1705     memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1706     double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1707 caltinay 3913 for (index_t i=0; i < numComp; ++i) {
1708     o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_10[i]) + c1*f_11[i] + c2*f_00[i];
1709     o[INDEX2(i,numComp,1)] = c0*(f_00[i] + f_11[i]) + c1*f_01[i] + c2*f_10[i];
1710     o[INDEX2(i,numComp,2)] = c0*(f_00[i] + f_11[i]) + c1*f_10[i] + c2*f_01[i];
1711     o[INDEX2(i,numComp,3)] = c0*(f_01[i] + f_10[i]) + c1*f_00[i] + c2*f_11[i];
1712     } /* end of component loop i */
1713     } /* end of k0 loop */
1714     } /* end of k1 loop */
1715     } /* end of parallel section */
1716 caltinay 3711 }
1717 caltinay 3702 }
1718    
1719     //protected
1720 caltinay 3711 void Rectangle::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1721     bool reduced) const
1722 caltinay 3702 {
1723 caltinay 3704 const dim_t numComp = in.getDataPointSize();
1724 caltinay 3711 if (reduced) {
1725 caltinay 3760 out.requireWrite();
1726 caltinay 3724 #pragma omp parallel
1727     {
1728 caltinay 3913 vector<double> f_00(numComp);
1729     vector<double> f_01(numComp);
1730     vector<double> f_10(numComp);
1731     vector<double> f_11(numComp);
1732 caltinay 3724 if (m_faceOffset[0] > -1) {
1733     #pragma omp for nowait
1734 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1735     memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1736     memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1737 caltinay 3724 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1738     for (index_t i=0; i < numComp; ++i) {
1739 caltinay 4375 o[INDEX2(i,numComp,0)] = (f_00[i] + f_01[i])/2;
1740 caltinay 3724 } /* end of component loop i */
1741     } /* end of k1 loop */
1742     } /* end of face 0 */
1743     if (m_faceOffset[1] > -1) {
1744     #pragma omp for nowait
1745 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1746     memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1747     memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1748 caltinay 3724 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1749     for (index_t i=0; i < numComp; ++i) {
1750 caltinay 4375 o[INDEX2(i,numComp,0)] = (f_10[i] + f_11[i])/2;
1751 caltinay 3724 } /* end of component loop i */
1752     } /* end of k1 loop */
1753     } /* end of face 1 */
1754     if (m_faceOffset[2] > -1) {
1755     #pragma omp for nowait
1756 caltinay 4334 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1757     memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1758     memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1759 caltinay 3724 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1760     for (index_t i=0; i < numComp; ++i) {
1761 caltinay 4375 o[INDEX2(i,numComp,0)] = (f_00[i] + f_10[i])/2;
1762 caltinay 3724 } /* end of component loop i */
1763     } /* end of k0 loop */
1764     } /* end of face 2 */
1765     if (m_faceOffset[3] > -1) {
1766     #pragma omp for nowait
1767 caltinay 4334 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1768     memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1769     memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1770 caltinay 3724 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1771     for (index_t i=0; i < numComp; ++i) {
1772 caltinay 4375 o[INDEX2(i,numComp,0)] = (f_01[i] + f_11[i])/2;
1773 caltinay 3724 } /* end of component loop i */
1774     } /* end of k0 loop */
1775     } /* end of face 3 */
1776 caltinay 3913 } /* end of parallel section */
1777 caltinay 3711 } else {
1778 caltinay 3760 out.requireWrite();
1779 caltinay 3724 const double c0 = 0.21132486540518711775;
1780     const double c1 = 0.78867513459481288225;
1781     #pragma omp parallel
1782     {
1783 caltinay 3913 vector<double> f_00(numComp);
1784     vector<double> f_01(numComp);
1785     vector<double> f_10(numComp);
1786     vector<double> f_11(numComp);
1787 caltinay 3724 if (m_faceOffset[0] > -1) {
1788     #pragma omp for nowait
1789 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1790     memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1791     memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1792 caltinay 3724 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1793     for (index_t i=0; i < numComp; ++i) {
1794 caltinay 3913 o[INDEX2(i,numComp,0)] = c0*f_01[i] + c1*f_00[i];
1795     o[INDEX2(i,numComp,1)] = c0*f_00[i] + c1*f_01[i];
1796 caltinay 3724 } /* end of component loop i */
1797     } /* end of k1 loop */
1798     } /* end of face 0 */
1799     if (m_faceOffset[1] > -1) {
1800     #pragma omp for nowait
1801 caltinay 4334 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1802     memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1803     memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1804 caltinay 3724 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1805     for (index_t i=0; i < numComp; ++i) {
1806 caltinay 3913 o[INDEX2(i,numComp,0)] = c1*f_10[i] + c0*f_11[i];
1807     o[INDEX2(i,numComp,1)] = c1*f_11[i] + c0*f_10[i];
1808 caltinay 3724 } /* end of component loop i */
1809     } /* end of k1 loop */
1810     } /* end of face 1 */
1811     if (m_faceOffset[2] > -1) {
1812     #pragma omp for nowait
1813 caltinay 4334 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1814     memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1815     memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1816 caltinay 3724 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1817     for (index_t i=0; i < numComp; ++i) {
1818 caltinay 3913 o[INDEX2(i,numComp,0)] = c0*f_10[i] + c1*f_00[i];
1819     o[INDEX2(i,numComp,1)] = c0*f_00[i] + c1*f_10[i];
1820 caltinay 3724 } /* end of component loop i */
1821     } /* end of k0 loop */
1822     } /* end of face 2 */
1823     if (m_faceOffset[3] > -1) {
1824     #pragma omp for nowait
1825 caltinay 4334 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1826     memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1827     memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1828 caltinay 3724 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1829     for (index_t i=0; i < numComp; ++i) {
1830 caltinay 3913 o[INDEX2(i,numComp,0)] = c0*f_11[i] + c1*f_01[i];
1831     o[INDEX2(i,numComp,1)] = c0*f_01[i] + c1*f_11[i];
1832 caltinay 3724 } /* end of component loop i */
1833     } /* end of k0 loop */
1834     } /* end of face 3 */
1835 caltinay 3913 } /* end of parallel section */
1836 caltinay 3711 }
1837 caltinay 3702 }
1838    
1839 caltinay 3748 //protected
1840     void Rectangle::assemblePDESingle(Paso_SystemMatrix* mat,
1841     escript::Data& rhs, const escript::Data& A, const escript::Data& B,
1842     const escript::Data& C, const escript::Data& D,
1843 caltinay 3769 const escript::Data& X, const escript::Data&