/[escript]/trunk/ripley/src/Rectangle.cpp
ViewVC logotype

Contents of /trunk/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4675 - (show annotations)
Mon Feb 17 01:07:12 2014 UTC (5 years, 9 months ago) by sshaw
File size: 90203 byte(s)
expanded tab chars for consistentcy and removed trailing whitespace
1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include <ripley/Rectangle.h>
18 #include <paso/SystemMatrix.h>
19 #include <esysUtils/esysFileWriter.h>
20 #include <ripley/DefaultAssembler2D.h>
21 #include <ripley/WaveAssembler2D.h>
22 #include <boost/scoped_array.hpp>
23 #include "esysUtils/EsysRandom.h"
24
25 #ifdef USE_NETCDF
26 #include <netcdfcpp.h>
27 #endif
28
29 #if USE_SILO
30 #include <silo.h>
31 #ifdef ESYS_MPI
32 #include <pmpio.h>
33 #endif
34 #endif
35
36 #include <iomanip>
37
38 using namespace std;
39 using esysUtils::FileWriter;
40
41 namespace ripley {
42
43 Rectangle::Rectangle(int n0, int n1, double x0, double y0, double x1,
44 double y1, int d0, int d1,
45 const std::vector<double>& points,
46 const std::vector<int>& tags,
47 const simap_t& tagnamestonums) :
48 RipleyDomain(2)
49 {
50 // ignore subdivision parameters for serial run
51 if (m_mpiInfo->size == 1) {
52 d0=1;
53 d1=1;
54 }
55
56 bool warn=false;
57 // if number of subdivisions is non-positive, try to subdivide by the same
58 // ratio as the number of elements
59 if (d0<=0 && d1<=0) {
60 warn=true;
61 d0=max(1, (int)(sqrt(m_mpiInfo->size*(n0+1)/(float)(n1+1))));
62 d1=m_mpiInfo->size/d0;
63 if (d0*d1 != m_mpiInfo->size) {
64 // ratios not the same so subdivide side with more elements only
65 if (n0>n1) {
66 d0=0;
67 d1=1;
68 } else {
69 d0=1;
70 d1=0;
71 }
72 }
73 }
74 if (d0<=0) {
75 // d1 is preset, determine d0 - throw further down if result is no good
76 d0=m_mpiInfo->size/d1;
77 } else if (d1<=0) {
78 // d0 is preset, determine d1 - throw further down if result is no good
79 d1=m_mpiInfo->size/d0;
80 }
81
82 // ensure number of subdivisions is valid and nodes can be distributed
83 // among number of ranks
84 if (d0*d1 != m_mpiInfo->size)
85 throw RipleyException("Invalid number of spatial subdivisions");
86
87 if (warn) {
88 cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
89 << d1 << "). This may not be optimal!" << endl;
90 }
91
92 double l0 = x1-x0;
93 double l1 = y1-y0;
94 m_dx[0] = l0/n0;
95 m_dx[1] = l1/n1;
96
97 if ((n0+1)%d0 > 0) {
98 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
99 l0=m_dx[0]*n0;
100 cout << "Warning: Adjusted number of elements and length. N0="
101 << n0 << ", l0=" << l0 << endl;
102 }
103 if ((n1+1)%d1 > 0) {
104 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
105 l1=m_dx[1]*n1;
106 cout << "Warning: Adjusted number of elements and length. N1="
107 << n1 << ", l1=" << l1 << endl;
108 }
109
110 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2))
111 throw RipleyException("Too few elements for the number of ranks");
112
113 m_gNE[0] = n0;
114 m_gNE[1] = n1;
115 m_origin[0] = x0;
116 m_origin[1] = y0;
117 m_length[0] = l0;
118 m_length[1] = l1;
119 m_NX[0] = d0;
120 m_NX[1] = d1;
121
122 // local number of elements (with and without overlap)
123 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
124 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
125 m_NE[0]++;
126 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
127 m_ownNE[0]--;
128
129 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
130 if (m_mpiInfo->rank/d0>0 && m_mpiInfo->rank/d0<d1-1)
131 m_NE[1]++;
132 else if (d1>1 && m_mpiInfo->rank/d0==d1-1)
133 m_ownNE[1]--;
134
135 // local number of nodes
136 m_NN[0] = m_NE[0]+1;
137 m_NN[1] = m_NE[1]+1;
138
139 // bottom-left node is at (offset0,offset1) in global mesh
140 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
141 if (m_offset[0] > 0)
142 m_offset[0]--;
143 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank/d0);
144 if (m_offset[1] > 0)
145 m_offset[1]--;
146
147 populateSampleIds();
148 createPattern();
149 assembler = new DefaultAssembler2D(this, m_dx, m_NX, m_NE, m_NN);
150 for (map<string, int>::const_iterator i = tagnamestonums.begin();
151 i != tagnamestonums.end(); i++) {
152 setTagMap(i->first, i->second);
153 }
154 addPoints(tags.size(), &points[0], &tags[0]);
155 }
156
157 Rectangle::~Rectangle()
158 {
159 Paso_SystemMatrixPattern_free(m_pattern);
160 Paso_Connector_free(m_connector);
161 delete assembler;
162 }
163
164 string Rectangle::getDescription() const
165 {
166 return "ripley::Rectangle";
167 }
168
169 bool Rectangle::operator==(const AbstractDomain& other) const
170 {
171 const Rectangle* o=dynamic_cast<const Rectangle*>(&other);
172 if (o) {
173 return (RipleyDomain::operator==(other) &&
174 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1]
175 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1]
176 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1]
177 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1]);
178 }
179
180 return false;
181 }
182
183 void Rectangle::readNcGrid(escript::Data& out, string filename, string varname,
184 const ReaderParameters& params) const
185 {
186 #ifdef USE_NETCDF
187 // check destination function space
188 int myN0, myN1;
189 if (out.getFunctionSpace().getTypeCode() == Nodes) {
190 myN0 = m_NN[0];
191 myN1 = m_NN[1];
192 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
193 out.getFunctionSpace().getTypeCode() == ReducedElements) {
194 myN0 = m_NE[0];
195 myN1 = m_NE[1];
196 } else
197 throw RipleyException("readNcGrid(): invalid function space for output data object");
198
199 if (params.first.size() != 2)
200 throw RipleyException("readNcGrid(): argument 'first' must have 2 entries");
201
202 if (params.numValues.size() != 2)
203 throw RipleyException("readNcGrid(): argument 'numValues' must have 2 entries");
204
205 if (params.multiplier.size() != 2)
206 throw RipleyException("readNcGrid(): argument 'multiplier' must have 2 entries");
207 for (size_t i=0; i<params.multiplier.size(); i++)
208 if (params.multiplier[i]<1)
209 throw RipleyException("readNcGrid(): all multipliers must be positive");
210 if (params.reverse.size() != 2)
211 throw RipleyException("readNcGrid(): argument 'reverse' must have 2 entries");
212
213 // check file existence and size
214 NcFile f(filename.c_str(), NcFile::ReadOnly);
215 if (!f.is_valid())
216 throw RipleyException("readNcGrid(): cannot open file");
217
218 NcVar* var = f.get_var(varname.c_str());
219 if (!var)
220 throw RipleyException("readNcGrid(): invalid variable");
221
222 // TODO: rank>0 data support
223 const int numComp = out.getDataPointSize();
224 if (numComp > 1)
225 throw RipleyException("readNcGrid(): only scalar data supported");
226
227 const int dims = var->num_dims();
228 boost::scoped_array<long> edges(var->edges());
229
230 // is this a slice of the data object (dims!=2)?
231 // note the expected ordering of edges (as in numpy: y,x)
232 if ( (dims==2 && (params.numValues[1] > edges[0] || params.numValues[0] > edges[1]))
233 || (dims==1 && params.numValues[1]>1) ) {
234 throw RipleyException("readNcGrid(): not enough data in file");
235 }
236
237 // check if this rank contributes anything
238 if (params.first[0] >= m_offset[0]+myN0 ||
239 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
240 params.first[1] >= m_offset[1]+myN1 ||
241 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1])
242 return;
243
244 // now determine how much this rank has to write
245
246 // first coordinates in data object to write to
247 const int first0 = max(0, params.first[0]-m_offset[0]);
248 const int first1 = max(0, params.first[1]-m_offset[1]);
249 // indices to first value in file (not accounting for reverse yet)
250 int idx0 = max(0, m_offset[0]-params.first[0]);
251 int idx1 = max(0, m_offset[1]-params.first[1]);
252 // number of values to read
253 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
254 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
255
256 // make sure we read the right block if going backwards through file
257 if (params.reverse[0])
258 idx0 = edges[dims-1]-num0-idx0;
259 if (dims>1 && params.reverse[1])
260 idx1 = edges[dims-2]-num1-idx1;
261
262 vector<double> values(num0*num1);
263 if (dims==2) {
264 var->set_cur(idx1, idx0);
265 var->get(&values[0], num1, num0);
266 } else {
267 var->set_cur(idx0);
268 var->get(&values[0], num0);
269 }
270
271 const int dpp = out.getNumDataPointsPerSample();
272 out.requireWrite();
273
274 // helpers for reversing
275 const int x0 = (params.reverse[0] ? num0-1 : 0);
276 const int x_mult = (params.reverse[0] ? -1 : 1);
277 const int y0 = (params.reverse[1] ? num1-1 : 0);
278 const int y_mult = (params.reverse[1] ? -1 : 1);
279
280 for (index_t y=0; y<num1; y++) {
281 #pragma omp parallel for
282 for (index_t x=0; x<num0; x++) {
283 const int baseIndex = first0+x*params.multiplier[0]
284 +(first1+y*params.multiplier[1])*myN0;
285 const int srcIndex = (y0+y_mult*y)*num0+(x0+x_mult*x);
286 if (!isnan(values[srcIndex])) {
287 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
288 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
289 const int dataIndex = baseIndex+m0+m1*myN0;
290 double* dest = out.getSampleDataRW(dataIndex);
291 for (index_t q=0; q<dpp; q++) {
292 *dest++ = values[srcIndex];
293 }
294 }
295 }
296 }
297 }
298 }
299 #else
300 throw RipleyException("readNcGrid(): not compiled with netCDF support");
301 #endif
302 }
303
304 void Rectangle::readBinaryGrid(escript::Data& out, string filename,
305 const ReaderParameters& params) const
306 {
307 // the mapping is not universally correct but should work on our
308 // supported platforms
309 switch (params.dataType) {
310 case DATATYPE_INT32:
311 readBinaryGridImpl<int>(out, filename, params);
312 break;
313 case DATATYPE_FLOAT32:
314 readBinaryGridImpl<float>(out, filename, params);
315 break;
316 case DATATYPE_FLOAT64:
317 readBinaryGridImpl<double>(out, filename, params);
318 break;
319 default:
320 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
321 }
322 }
323
324 template<typename ValueType>
325 void Rectangle::readBinaryGridImpl(escript::Data& out, const string& filename,
326 const ReaderParameters& params) const
327 {
328 // check destination function space
329 int myN0, myN1;
330 if (out.getFunctionSpace().getTypeCode() == Nodes) {
331 myN0 = m_NN[0];
332 myN1 = m_NN[1];
333 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
334 out.getFunctionSpace().getTypeCode() == ReducedElements) {
335 myN0 = m_NE[0];
336 myN1 = m_NE[1];
337 } else
338 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
339
340 // check file existence and size
341 ifstream f(filename.c_str(), ifstream::binary);
342 if (f.fail()) {
343 throw RipleyException("readBinaryGrid(): cannot open file");
344 }
345 f.seekg(0, ios::end);
346 const int numComp = out.getDataPointSize();
347 const int filesize = f.tellg();
348 const int reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
349 if (filesize < reqsize) {
350 f.close();
351 throw RipleyException("readBinaryGrid(): not enough data in file");
352 }
353
354 // check if this rank contributes anything
355 if (params.first[0] >= m_offset[0]+myN0 ||
356 params.first[0]+params.numValues[0] <= m_offset[0] ||
357 params.first[1] >= m_offset[1]+myN1 ||
358 params.first[1]+params.numValues[1] <= m_offset[1]) {
359 f.close();
360 return;
361 }
362
363 // now determine how much this rank has to write
364
365 // first coordinates in data object to write to
366 const int first0 = max(0, params.first[0]-m_offset[0]);
367 const int first1 = max(0, params.first[1]-m_offset[1]);
368 // indices to first value in file
369 const int idx0 = max(0, m_offset[0]-params.first[0]);
370 const int idx1 = max(0, m_offset[1]-params.first[1]);
371 // number of values to read
372 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
373 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
374
375 out.requireWrite();
376 vector<ValueType> values(num0*numComp);
377 const int dpp = out.getNumDataPointsPerSample();
378
379 for (int y=0; y<num1; y++) {
380 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]);
381 f.seekg(fileofs*sizeof(ValueType));
382 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
383 for (int x=0; x<num0; x++) {
384 const int baseIndex = first0+x*params.multiplier[0]
385 +(first1+y*params.multiplier[1])*myN0;
386 for (int m1=0; m1<params.multiplier[1]; m1++) {
387 for (int m0=0; m0<params.multiplier[0]; m0++) {
388 const int dataIndex = baseIndex+m0+m1*myN0;
389 double* dest = out.getSampleDataRW(dataIndex);
390 for (int c=0; c<numComp; c++) {
391 ValueType val = values[x*numComp+c];
392
393 if (params.byteOrder != BYTEORDER_NATIVE) {
394 char* cval = reinterpret_cast<char*>(&val);
395 // this will alter val!!
396 byte_swap32(cval);
397 }
398 if (!std::isnan(val)) {
399 for (int q=0; q<dpp; q++) {
400 *dest++ = static_cast<double>(val);
401 }
402 }
403 }
404 }
405 }
406 }
407 }
408
409 f.close();
410 }
411
412 void Rectangle::writeBinaryGrid(const escript::Data& in, string filename,
413 int byteOrder, int dataType) const
414 {
415 // the mapping is not universally correct but should work on our
416 // supported platforms
417 switch (dataType) {
418 case DATATYPE_INT32:
419 writeBinaryGridImpl<int>(in, filename, byteOrder);
420 break;
421 case DATATYPE_FLOAT32:
422 writeBinaryGridImpl<float>(in, filename, byteOrder);
423 break;
424 case DATATYPE_FLOAT64:
425 writeBinaryGridImpl<double>(in, filename, byteOrder);
426 break;
427 default:
428 throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
429 }
430 }
431
432 template<typename ValueType>
433 void Rectangle::writeBinaryGridImpl(const escript::Data& in,
434 const string& filename, int byteOrder) const
435 {
436 // check function space and determine number of points
437 int myN0, myN1;
438 int totalN0, totalN1;
439 if (in.getFunctionSpace().getTypeCode() == Nodes) {
440 myN0 = m_NN[0];
441 myN1 = m_NN[1];
442 totalN0 = m_gNE[0]+1;
443 totalN1 = m_gNE[1]+1;
444 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
445 in.getFunctionSpace().getTypeCode() == ReducedElements) {
446 myN0 = m_NE[0];
447 myN1 = m_NE[1];
448 totalN0 = m_gNE[0];
449 totalN1 = m_gNE[1];
450 } else
451 throw RipleyException("writeBinaryGrid(): invalid function space of data object");
452
453 const int numComp = in.getDataPointSize();
454 const int dpp = in.getNumDataPointsPerSample();
455
456 if (numComp > 1 || dpp > 1)
457 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
458
459 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1;
460
461 // from here on we know that each sample consists of one value
462 FileWriter fw;
463 fw.openFile(filename, fileSize);
464 MPIBarrier();
465
466 for (index_t y=0; y<myN1; y++) {
467 const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0)*sizeof(ValueType);
468 ostringstream oss;
469
470 for (index_t x=0; x<myN0; x++) {
471 const double* sample = in.getSampleDataRO(y*myN0+x);
472 ValueType fvalue = static_cast<ValueType>(*sample);
473 if (byteOrder == BYTEORDER_NATIVE) {
474 oss.write((char*)&fvalue, sizeof(fvalue));
475 } else {
476 char* value = reinterpret_cast<char*>(&fvalue);
477 oss.write(byte_swap32(value), sizeof(fvalue));
478 }
479 }
480 fw.writeAt(oss, fileofs);
481 }
482 fw.close();
483 }
484
485 void Rectangle::dump(const string& fileName) const
486 {
487 #if USE_SILO
488 string fn(fileName);
489 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
490 fn+=".silo";
491 }
492
493 int driver=DB_HDF5;
494 DBfile* dbfile = NULL;
495 const char* blockDirFmt = "/block%04d";
496
497 #ifdef ESYS_MPI
498 PMPIO_baton_t* baton = NULL;
499 const int NUM_SILO_FILES = 1;
500 #endif
501
502 if (m_mpiInfo->size > 1) {
503 #ifdef ESYS_MPI
504 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
505 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
506 PMPIO_DefaultClose, (void*)&driver);
507 // try the fallback driver in case of error
508 if (!baton && driver != DB_PDB) {
509 driver = DB_PDB;
510 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
511 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
512 PMPIO_DefaultClose, (void*)&driver);
513 }
514 if (baton) {
515 char siloPath[64];
516 snprintf(siloPath, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
517 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath);
518 }
519 #endif
520 } else {
521 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
522 getDescription().c_str(), driver);
523 // try the fallback driver in case of error
524 if (!dbfile && driver != DB_PDB) {
525 driver = DB_PDB;
526 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
527 getDescription().c_str(), driver);
528 }
529 char siloPath[64];
530 snprintf(siloPath, 64, blockDirFmt, 0);
531 DBMkDir(dbfile, siloPath);
532 DBSetDir(dbfile, siloPath);
533 }
534
535 if (!dbfile)
536 throw RipleyException("dump: Could not create Silo file");
537
538 /*
539 if (driver==DB_HDF5) {
540 // gzip level 1 already provides good compression with minimal
541 // performance penalty. Some tests showed that gzip levels >3 performed
542 // rather badly on escript data both in terms of time and space
543 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
544 }
545 */
546
547 boost::scoped_ptr<double> x(new double[m_NN[0]]);
548 boost::scoped_ptr<double> y(new double[m_NN[1]]);
549 double* coords[2] = { x.get(), y.get() };
550 #pragma omp parallel
551 {
552 #pragma omp for nowait
553 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
554 coords[0][i0]=getLocalCoordinate(i0, 0);
555 }
556 #pragma omp for nowait
557 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
558 coords[1][i1]=getLocalCoordinate(i1, 1);
559 }
560 }
561 int* dims = const_cast<int*>(getNumNodesPerDim());
562
563 // write mesh
564 DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 2, DB_DOUBLE,
565 DB_COLLINEAR, NULL);
566
567 // write node ids
568 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 2,
569 NULL, 0, DB_INT, DB_NODECENT, NULL);
570
571 // write element ids
572 dims = const_cast<int*>(getNumElementsPerDim());
573 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
574 dims, 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
575
576 // rank 0 writes multimesh and multivar
577 if (m_mpiInfo->rank == 0) {
578 vector<string> tempstrings;
579 vector<char*> names;
580 for (dim_t i=0; i<m_mpiInfo->size; i++) {
581 stringstream path;
582 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
583 tempstrings.push_back(path.str());
584 names.push_back((char*)tempstrings.back().c_str());
585 }
586 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
587 DBSetDir(dbfile, "/");
588 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
589 &types[0], NULL);
590 tempstrings.clear();
591 names.clear();
592 for (dim_t i=0; i<m_mpiInfo->size; i++) {
593 stringstream path;
594 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
595 tempstrings.push_back(path.str());
596 names.push_back((char*)tempstrings.back().c_str());
597 }
598 types.assign(m_mpiInfo->size, DB_QUADVAR);
599 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
600 &types[0], NULL);
601 tempstrings.clear();
602 names.clear();
603 for (dim_t i=0; i<m_mpiInfo->size; i++) {
604 stringstream path;
605 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
606 tempstrings.push_back(path.str());
607 names.push_back((char*)tempstrings.back().c_str());
608 }
609 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
610 &types[0], NULL);
611 }
612
613 if (m_mpiInfo->size > 1) {
614 #ifdef ESYS_MPI
615 PMPIO_HandOffBaton(baton, dbfile);
616 PMPIO_Finish(baton);
617 #endif
618 } else {
619 DBClose(dbfile);
620 }
621
622 #else // USE_SILO
623 throw RipleyException("dump: no Silo support");
624 #endif
625 }
626
627 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
628 {
629 switch (fsType) {
630 case Nodes:
631 case ReducedNodes: // FIXME: reduced
632 return &m_nodeId[0];
633 case DegreesOfFreedom:
634 case ReducedDegreesOfFreedom: // FIXME: reduced
635 return &m_dofId[0];
636 case Elements:
637 case ReducedElements:
638 return &m_elementId[0];
639 case FaceElements:
640 case ReducedFaceElements:
641 return &m_faceId[0];
642 case Points:
643 return &m_diracPointNodeIDs[0];
644 default:
645 break;
646 }
647
648 stringstream msg;
649 msg << "borrowSampleReferenceIDs: invalid function space type " << fsType;
650 throw RipleyException(msg.str());
651 }
652
653 bool Rectangle::ownSample(int fsType, index_t id) const
654 {
655 if (getMPISize()==1)
656 return true;
657
658 switch (fsType) {
659 case Nodes:
660 case ReducedNodes: // FIXME: reduced
661 return (m_dofMap[id] < getNumDOF());
662 case DegreesOfFreedom:
663 case ReducedDegreesOfFreedom:
664 return true;
665 case Elements:
666 case ReducedElements:
667 // check ownership of element's bottom left node
668 return (m_dofMap[id%m_NE[0]+m_NN[0]*(id/m_NE[0])] < getNumDOF());
669 case FaceElements:
670 case ReducedFaceElements:
671 {
672 // determine which face the sample belongs to before
673 // checking ownership of corresponding element's first node
674 dim_t n=0;
675 for (size_t i=0; i<4; i++) {
676 n+=m_faceCount[i];
677 if (id<n) {
678 index_t k;
679 if (i==1)
680 k=m_NN[0]-2;
681 else if (i==3)
682 k=m_NN[0]*(m_NN[1]-2);
683 else
684 k=0;
685 // determine whether to move right or up
686 const index_t delta=(i/2==0 ? m_NN[0] : 1);
687 return (m_dofMap[k+(id-n+m_faceCount[i])*delta] < getNumDOF());
688 }
689 }
690 return false;
691 }
692 default:
693 break;
694 }
695
696 stringstream msg;
697 msg << "ownSample: invalid function space type " << fsType;
698 throw RipleyException(msg.str());
699 }
700
701 void Rectangle::setToNormal(escript::Data& out) const
702 {
703 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
704 out.requireWrite();
705 #pragma omp parallel
706 {
707 if (m_faceOffset[0] > -1) {
708 #pragma omp for nowait
709 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
710 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
711 // set vector at two quadrature points
712 *o++ = -1.;
713 *o++ = 0.;
714 *o++ = -1.;
715 *o = 0.;
716 }
717 }
718
719 if (m_faceOffset[1] > -1) {
720 #pragma omp for nowait
721 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
722 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
723 // set vector at two quadrature points
724 *o++ = 1.;
725 *o++ = 0.;
726 *o++ = 1.;
727 *o = 0.;
728 }
729 }
730
731 if (m_faceOffset[2] > -1) {
732 #pragma omp for nowait
733 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
734 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
735 // set vector at two quadrature points
736 *o++ = 0.;
737 *o++ = -1.;
738 *o++ = 0.;
739 *o = -1.;
740 }
741 }
742
743 if (m_faceOffset[3] > -1) {
744 #pragma omp for nowait
745 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
746 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
747 // set vector at two quadrature points
748 *o++ = 0.;
749 *o++ = 1.;
750 *o++ = 0.;
751 *o = 1.;
752 }
753 }
754 } // end of parallel section
755 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
756 out.requireWrite();
757 #pragma omp parallel
758 {
759 if (m_faceOffset[0] > -1) {
760 #pragma omp for nowait
761 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
762 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
763 *o++ = -1.;
764 *o = 0.;
765 }
766 }
767
768 if (m_faceOffset[1] > -1) {
769 #pragma omp for nowait
770 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
771 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
772 *o++ = 1.;
773 *o = 0.;
774 }
775 }
776
777 if (m_faceOffset[2] > -1) {
778 #pragma omp for nowait
779 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
780 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
781 *o++ = 0.;
782 *o = -1.;
783 }
784 }
785
786 if (m_faceOffset[3] > -1) {
787 #pragma omp for nowait
788 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
789 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
790 *o++ = 0.;
791 *o = 1.;
792 }
793 }
794 } // end of parallel section
795
796 } else {
797 stringstream msg;
798 msg << "setToNormal: invalid function space type "
799 << out.getFunctionSpace().getTypeCode();
800 throw RipleyException(msg.str());
801 }
802 }
803
804 void Rectangle::setToSize(escript::Data& out) const
805 {
806 if (out.getFunctionSpace().getTypeCode() == Elements
807 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
808 out.requireWrite();
809 const dim_t numQuad=out.getNumDataPointsPerSample();
810 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]);
811 #pragma omp parallel for
812 for (index_t k = 0; k < getNumElements(); ++k) {
813 double* o = out.getSampleDataRW(k);
814 fill(o, o+numQuad, size);
815 }
816 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
817 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
818 out.requireWrite();
819 const dim_t numQuad=out.getNumDataPointsPerSample();
820 #pragma omp parallel
821 {
822 if (m_faceOffset[0] > -1) {
823 #pragma omp for nowait
824 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
825 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
826 fill(o, o+numQuad, m_dx[1]);
827 }
828 }
829
830 if (m_faceOffset[1] > -1) {
831 #pragma omp for nowait
832 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
833 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
834 fill(o, o+numQuad, m_dx[1]);
835 }
836 }
837
838 if (m_faceOffset[2] > -1) {
839 #pragma omp for nowait
840 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
841 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
842 fill(o, o+numQuad, m_dx[0]);
843 }
844 }
845
846 if (m_faceOffset[3] > -1) {
847 #pragma omp for nowait
848 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
849 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
850 fill(o, o+numQuad, m_dx[0]);
851 }
852 }
853 } // end of parallel section
854
855 } else {
856 stringstream msg;
857 msg << "setToSize: invalid function space type "
858 << out.getFunctionSpace().getTypeCode();
859 throw RipleyException(msg.str());
860 }
861 }
862
863 void Rectangle::Print_Mesh_Info(const bool full) const
864 {
865 RipleyDomain::Print_Mesh_Info(full);
866 if (full) {
867 cout << " Id Coordinates" << endl;
868 cout.precision(15);
869 cout.setf(ios::scientific, ios::floatfield);
870 for (index_t i=0; i < getNumNodes(); i++) {
871 cout << " " << setw(5) << m_nodeId[i]
872 << " " << getLocalCoordinate(i%m_NN[0], 0)
873 << " " << getLocalCoordinate(i/m_NN[0], 1) << endl;
874 }
875 }
876 }
877
878
879 //protected
880 void Rectangle::assembleCoordinates(escript::Data& arg) const
881 {
882 escriptDataC x = arg.getDataC();
883 int numDim = m_numDim;
884 if (!isDataPointShapeEqual(&x, 1, &numDim))
885 throw RipleyException("setToX: Invalid Data object shape");
886 if (!numSamplesEqual(&x, 1, getNumNodes()))
887 throw RipleyException("setToX: Illegal number of samples in Data object");
888
889 arg.requireWrite();
890 #pragma omp parallel for
891 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
892 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
893 double* point = arg.getSampleDataRW(i0+m_NN[0]*i1);
894 point[0] = getLocalCoordinate(i0, 0);
895 point[1] = getLocalCoordinate(i1, 1);
896 }
897 }
898 }
899
900 //protected
901 void Rectangle::assembleGradient(escript::Data& out, const escript::Data& in) const
902 {
903 const dim_t numComp = in.getDataPointSize();
904 const double cx0 = .21132486540518711775/m_dx[0];
905 const double cx1 = .78867513459481288225/m_dx[0];
906 const double cx2 = 1./m_dx[0];
907 const double cy0 = .21132486540518711775/m_dx[1];
908 const double cy1 = .78867513459481288225/m_dx[1];
909 const double cy2 = 1./m_dx[1];
910
911 if (out.getFunctionSpace().getTypeCode() == Elements) {
912 out.requireWrite();
913 #pragma omp parallel
914 {
915 vector<double> f_00(numComp);
916 vector<double> f_01(numComp);
917 vector<double> f_10(numComp);
918 vector<double> f_11(numComp);
919 #pragma omp for
920 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
921 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
922 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
923 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
924 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
925 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
926 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
927 for (index_t i=0; i < numComp; ++i) {
928 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
929 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
930 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
931 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
932 o[INDEX3(i,0,2,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
933 o[INDEX3(i,1,2,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
934 o[INDEX3(i,0,3,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
935 o[INDEX3(i,1,3,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
936 } // end of component loop i
937 } // end of k0 loop
938 } // end of k1 loop
939 } // end of parallel section
940 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
941 out.requireWrite();
942 #pragma omp parallel
943 {
944 vector<double> f_00(numComp);
945 vector<double> f_01(numComp);
946 vector<double> f_10(numComp);
947 vector<double> f_11(numComp);
948 #pragma omp for
949 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
950 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
951 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
952 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
953 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
954 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
955 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
956 for (index_t i=0; i < numComp; ++i) {
957 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
958 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
959 } // end of component loop i
960 } // end of k0 loop
961 } // end of k1 loop
962 } // end of parallel section
963 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
964 out.requireWrite();
965 #pragma omp parallel
966 {
967 vector<double> f_00(numComp);
968 vector<double> f_01(numComp);
969 vector<double> f_10(numComp);
970 vector<double> f_11(numComp);
971 if (m_faceOffset[0] > -1) {
972 #pragma omp for nowait
973 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
974 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
975 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
976 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double));
977 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double));
978 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
979 for (index_t i=0; i < numComp; ++i) {
980 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
981 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2;
982 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
983 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy2;
984 } // end of component loop i
985 } // end of k1 loop
986 } // end of face 0
987 if (m_faceOffset[1] > -1) {
988 #pragma omp for nowait
989 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
990 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double));
991 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double));
992 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
993 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
994 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
995 for (index_t i=0; i < numComp; ++i) {
996 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
997 o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2;
998 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
999 o[INDEX3(i,1,1,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1000 } // end of component loop i
1001 } // end of k1 loop
1002 } // end of face 1
1003 if (m_faceOffset[2] > -1) {
1004 #pragma omp for nowait
1005 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1006 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1007 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double));
1008 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1009 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double));
1010 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1011 for (index_t i=0; i < numComp; ++i) {
1012 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1013 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1014 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1015 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1016 } // end of component loop i
1017 } // end of k0 loop
1018 } // end of face 2
1019 if (m_faceOffset[3] > -1) {
1020 #pragma omp for nowait
1021 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1022 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1023 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1024 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1025 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1026 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1027 for (index_t i=0; i < numComp; ++i) {
1028 o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1029 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1030 o[INDEX3(i,0,1,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1031 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1032 } // end of component loop i
1033 } // end of k0 loop
1034 } // end of face 3
1035 } // end of parallel section
1036
1037 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1038 out.requireWrite();
1039 #pragma omp parallel
1040 {
1041 vector<double> f_00(numComp);
1042 vector<double> f_01(numComp);
1043 vector<double> f_10(numComp);
1044 vector<double> f_11(numComp);
1045 if (m_faceOffset[0] > -1) {
1046 #pragma omp for nowait
1047 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1048 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1049 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1050 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double));
1051 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double));
1052 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1053 for (index_t i=0; i < numComp; ++i) {
1054 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1055 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2;
1056 } // end of component loop i
1057 } // end of k1 loop
1058 } // end of face 0
1059 if (m_faceOffset[1] > -1) {
1060 #pragma omp for nowait
1061 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1062 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double));
1063 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double));
1064 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1065 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1066 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1067 for (index_t i=0; i < numComp; ++i) {
1068 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1069 o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1070 } // end of component loop i
1071 } // end of k1 loop
1072 } // end of face 1
1073 if (m_faceOffset[2] > -1) {
1074 #pragma omp for nowait
1075 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1076 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1077 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double));
1078 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1079 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double));
1080 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1081 for (index_t i=0; i < numComp; ++i) {
1082 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1083 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1084 } // end of component loop i
1085 } // end of k0 loop
1086 } // end of face 2
1087 if (m_faceOffset[3] > -1) {
1088 #pragma omp for nowait
1089 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1090 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1091 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1092 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1093 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1094 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1095 for (index_t i=0; i < numComp; ++i) {
1096 o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1097 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1098 } // end of component loop i
1099 } // end of k0 loop
1100 } // end of face 3
1101 } // end of parallel section
1102 }
1103 }
1104
1105 //protected
1106 void Rectangle::assembleIntegrate(vector<double>& integrals,
1107 const escript::Data& arg) const
1108 {
1109 const dim_t numComp = arg.getDataPointSize();
1110 const index_t left = (m_offset[0]==0 ? 0 : 1);
1111 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1112 const int fs=arg.getFunctionSpace().getTypeCode();
1113 if (fs == Elements && arg.actsExpanded()) {
1114 #pragma omp parallel
1115 {
1116 vector<double> int_local(numComp, 0);
1117 const double w = m_dx[0]*m_dx[1]/4.;
1118 #pragma omp for nowait
1119 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1120 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1121 const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0]));
1122 for (index_t i=0; i < numComp; ++i) {
1123 const double f0 = f[INDEX2(i,0,numComp)];
1124 const double f1 = f[INDEX2(i,1,numComp)];
1125 const double f2 = f[INDEX2(i,2,numComp)];
1126 const double f3 = f[INDEX2(i,3,numComp)];
1127 int_local[i]+=(f0+f1+f2+f3)*w;
1128 } // end of component loop i
1129 } // end of k0 loop
1130 } // end of k1 loop
1131 #pragma omp critical
1132 for (index_t i=0; i<numComp; i++)
1133 integrals[i]+=int_local[i];
1134 } // end of parallel section
1135
1136 } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) {
1137 const double w = m_dx[0]*m_dx[1];
1138 #pragma omp parallel
1139 {
1140 vector<double> int_local(numComp, 0);
1141 #pragma omp for nowait
1142 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1143 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1144 const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0]));
1145 for (index_t i=0; i < numComp; ++i) {
1146 int_local[i]+=f[i]*w;
1147 }
1148 }
1149 }
1150 #pragma omp critical
1151 for (index_t i=0; i<numComp; i++)
1152 integrals[i]+=int_local[i];
1153 } // end of parallel section
1154
1155 } else if (fs == FaceElements && arg.actsExpanded()) {
1156 #pragma omp parallel
1157 {
1158 vector<double> int_local(numComp, 0);
1159 const double w0 = m_dx[0]/2.;
1160 const double w1 = m_dx[1]/2.;
1161 if (m_faceOffset[0] > -1) {
1162 #pragma omp for nowait
1163 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1164 const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1);
1165 for (index_t i=0; i < numComp; ++i) {
1166 const double f0 = f[INDEX2(i,0,numComp)];
1167 const double f1 = f[INDEX2(i,1,numComp)];
1168 int_local[i]+=(f0+f1)*w1;
1169 } // end of component loop i
1170 } // end of k1 loop
1171 }
1172
1173 if (m_faceOffset[1] > -1) {
1174 #pragma omp for nowait
1175 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1176 const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1);
1177 for (index_t i=0; i < numComp; ++i) {
1178 const double f0 = f[INDEX2(i,0,numComp)];
1179 const double f1 = f[INDEX2(i,1,numComp)];
1180 int_local[i]+=(f0+f1)*w1;
1181 } // end of component loop i
1182 } // end of k1 loop
1183 }
1184
1185 if (m_faceOffset[2] > -1) {
1186 #pragma omp for nowait
1187 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1188 const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0);
1189 for (index_t i=0; i < numComp; ++i) {
1190 const double f0 = f[INDEX2(i,0,numComp)];
1191 const double f1 = f[INDEX2(i,1,numComp)];
1192 int_local[i]+=(f0+f1)*w0;
1193 } // end of component loop i
1194 } // end of k0 loop
1195 }
1196
1197 if (m_faceOffset[3] > -1) {
1198 #pragma omp for nowait
1199 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1200 const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0);
1201 for (index_t i=0; i < numComp; ++i) {
1202 const double f0 = f[INDEX2(i,0,numComp)];
1203 const double f1 = f[INDEX2(i,1,numComp)];
1204 int_local[i]+=(f0+f1)*w0;
1205 } // end of component loop i
1206 } // end of k0 loop
1207 }
1208 #pragma omp critical
1209 for (index_t i=0; i<numComp; i++)
1210 integrals[i]+=int_local[i];
1211 } // end of parallel section
1212
1213 } else if (fs==ReducedFaceElements || (fs==FaceElements && !arg.actsExpanded())) {
1214 #pragma omp parallel
1215 {
1216 vector<double> int_local(numComp, 0);
1217 if (m_faceOffset[0] > -1) {
1218 #pragma omp for nowait
1219 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1220 const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1);
1221 for (index_t i=0; i < numComp; ++i) {
1222 int_local[i]+=f[i]*m_dx[1];
1223 }
1224 }
1225 }
1226
1227 if (m_faceOffset[1] > -1) {
1228 #pragma omp for nowait
1229 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1230 const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1);
1231 for (index_t i=0; i < numComp; ++i) {
1232 int_local[i]+=f[i]*m_dx[1];
1233 }
1234 }
1235 }
1236
1237 if (m_faceOffset[2] > -1) {
1238 #pragma omp for nowait
1239 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1240 const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0);
1241 for (index_t i=0; i < numComp; ++i) {
1242 int_local[i]+=f[i]*m_dx[0];
1243 }
1244 }
1245 }
1246
1247 if (m_faceOffset[3] > -1) {
1248 #pragma omp for nowait
1249 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1250 const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0);
1251 for (index_t i=0; i < numComp; ++i) {
1252 int_local[i]+=f[i]*m_dx[0];
1253 }
1254 }
1255 }
1256
1257 #pragma omp critical
1258 for (index_t i=0; i<numComp; i++)
1259 integrals[i]+=int_local[i];
1260 } // end of parallel section
1261 } // function space selector
1262 }
1263
1264 //protected
1265 dim_t Rectangle::insertNeighbourNodes(IndexVector& index, index_t node) const
1266 {
1267 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1268 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1269 const int x=node%nDOF0;
1270 const int y=node/nDOF0;
1271 dim_t num=0;
1272 // loop through potential neighbours and add to index if positions are
1273 // within bounds
1274 for (int i1=-1; i1<2; i1++) {
1275 for (int i0=-1; i0<2; i0++) {
1276 // skip node itself
1277 if (i0==0 && i1==0)
1278 continue;
1279 // location of neighbour node
1280 const int nx=x+i0;
1281 const int ny=y+i1;
1282 if (nx>=0 && ny>=0 && nx<nDOF0 && ny<nDOF1) {
1283 index.push_back(ny*nDOF0+nx);
1284 num++;
1285 }
1286 }
1287 }
1288
1289 return num;
1290 }
1291
1292 //protected
1293 void Rectangle::nodesToDOF(escript::Data& out, const escript::Data& in) const
1294 {
1295 const dim_t numComp = in.getDataPointSize();
1296 out.requireWrite();
1297
1298 const index_t left = (m_offset[0]==0 ? 0 : 1);
1299 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1300 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1301 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1302 #pragma omp parallel for
1303 for (index_t i=0; i<nDOF1; i++) {
1304 for (index_t j=0; j<nDOF0; j++) {
1305 const index_t n=j+left+(i+bottom)*m_NN[0];
1306 const double* src=in.getSampleDataRO(n);
1307 copy(src, src+numComp, out.getSampleDataRW(j+i*nDOF0));
1308 }
1309 }
1310 }
1311
1312 //protected
1313 void Rectangle::dofToNodes(escript::Data& out, const escript::Data& in) const
1314 {
1315 const dim_t numComp = in.getDataPointSize();
1316 Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp);
1317 // expand data object if necessary to be able to grab the whole data
1318 const_cast<escript::Data*>(&in)->expand();
1319 Paso_Coupler_startCollect(coupler, in.getSampleDataRO(0));
1320
1321 const dim_t numDOF = getNumDOF();
1322 out.requireWrite();
1323 const double* buffer = Paso_Coupler_finishCollect(coupler);
1324
1325 #pragma omp parallel for
1326 for (index_t i=0; i<getNumNodes(); i++) {
1327 const double* src=(m_dofMap[i]<numDOF ?
1328 in.getSampleDataRO(m_dofMap[i])
1329 : &buffer[(m_dofMap[i]-numDOF)*numComp]);
1330 copy(src, src+numComp, out.getSampleDataRW(i));
1331 }
1332 Paso_Coupler_free(coupler);
1333 }
1334
1335 //private
1336 void Rectangle::populateSampleIds()
1337 {
1338 // degrees of freedom are numbered from left to right, bottom to top in
1339 // each rank, continuing on the next rank (ranks also go left-right,
1340 // bottom-top).
1341 // This means rank 0 has id 0...n0-1, rank 1 has id n0...n1-1 etc. which
1342 // helps when writing out data rank after rank.
1343
1344 // build node distribution vector first.
1345 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes which is
1346 // constant for all ranks in this implementation
1347 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1348 const dim_t numDOF=getNumDOF();
1349 for (dim_t k=1; k<m_mpiInfo->size; k++) {
1350 m_nodeDistribution[k]=k*numDOF;
1351 }
1352 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1353 m_nodeId.resize(getNumNodes());
1354 m_dofId.resize(numDOF);
1355 m_elementId.resize(getNumElements());
1356
1357 // populate face element counts
1358 //left
1359 if (m_offset[0]==0)
1360 m_faceCount[0]=m_NE[1];
1361 else
1362 m_faceCount[0]=0;
1363 //right
1364 if (m_mpiInfo->rank%m_NX[0]==m_NX[0]-1)
1365 m_faceCount[1]=m_NE[1];
1366 else
1367 m_faceCount[1]=0;
1368 //bottom
1369 if (m_offset[1]==0)
1370 m_faceCount[2]=m_NE[0];
1371 else
1372 m_faceCount[2]=0;
1373 //top
1374 if (m_mpiInfo->rank/m_NX[0]==m_NX[1]-1)
1375 m_faceCount[3]=m_NE[0];
1376 else
1377 m_faceCount[3]=0;
1378
1379 m_faceId.resize(getNumFaceElements());
1380
1381 const index_t left = (m_offset[0]==0 ? 0 : 1);
1382 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1383 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1384 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1385
1386 #define globalNodeId(x,y) \
1387 ((m_offset[0]+x)/nDOF0)*nDOF0*nDOF1+(m_offset[0]+x)%nDOF0 \
1388 + ((m_offset[1]+y)/nDOF1)*nDOF0*nDOF1*m_NX[0]+((m_offset[1]+y)%nDOF1)*nDOF0
1389
1390 // set corner id's outside the parallel region
1391 m_nodeId[0] = globalNodeId(0, 0);
1392 m_nodeId[m_NN[0]-1] = globalNodeId(m_NN[0]-1, 0);
1393 m_nodeId[m_NN[0]*(m_NN[1]-1)] = globalNodeId(0, m_NN[1]-1);
1394 m_nodeId[m_NN[0]*m_NN[1]-1] = globalNodeId(m_NN[0]-1,m_NN[1]-1);
1395 #undef globalNodeId
1396
1397 #pragma omp parallel
1398 {
1399 // populate degrees of freedom and own nodes (identical id)
1400 #pragma omp for nowait
1401 for (dim_t i=0; i<nDOF1; i++) {
1402 for (dim_t j=0; j<nDOF0; j++) {
1403 const index_t nodeIdx=j+left+(i+bottom)*m_NN[0];
1404 const index_t dofIdx=j+i*nDOF0;
1405 m_dofId[dofIdx] = m_nodeId[nodeIdx]
1406 = m_nodeDistribution[m_mpiInfo->rank]+dofIdx;
1407 }
1408 }
1409
1410 // populate the rest of the nodes (shared with other ranks)
1411 if (m_faceCount[0]==0) { // left column
1412 #pragma omp for nowait
1413 for (dim_t i=0; i<nDOF1; i++) {
1414 const index_t nodeIdx=(i+bottom)*m_NN[0];
1415 const index_t dofId=(i+1)*nDOF0-1;
1416 m_nodeId[nodeIdx]
1417 = m_nodeDistribution[m_mpiInfo->rank-1]+dofId;
1418 }
1419 }
1420 if (m_faceCount[1]==0) { // right column
1421 #pragma omp for nowait
1422 for (dim_t i=0; i<nDOF1; i++) {
1423 const index_t nodeIdx=(i+bottom+1)*m_NN[0]-1;
1424 const index_t dofId=i*nDOF0;
1425 m_nodeId[nodeIdx]
1426 = m_nodeDistribution[m_mpiInfo->rank+1]+dofId;
1427 }
1428 }
1429 if (m_faceCount[2]==0) { // bottom row
1430 #pragma omp for nowait
1431 for (dim_t i=0; i<nDOF0; i++) {
1432 const index_t nodeIdx=i+left;
1433 const index_t dofId=nDOF0*(nDOF1-1)+i;
1434 m_nodeId[nodeIdx]
1435 = m_nodeDistribution[m_mpiInfo->rank-m_NX[0]]+dofId;
1436 }
1437 }
1438 if (m_faceCount[3]==0) { // top row
1439 #pragma omp for nowait
1440 for (dim_t i=0; i<nDOF0; i++) {
1441 const index_t nodeIdx=m_NN[0]*(m_NN[1]-1)+i+left;
1442 const index_t dofId=i;
1443 m_nodeId[nodeIdx]
1444 = m_nodeDistribution[m_mpiInfo->rank+m_NX[0]]+dofId;
1445 }
1446 }
1447
1448 // populate element id's
1449 #pragma omp for nowait
1450 for (dim_t i1=0; i1<m_NE[1]; i1++) {
1451 for (dim_t i0=0; i0<m_NE[0]; i0++) {
1452 m_elementId[i0+i1*m_NE[0]]=(m_offset[1]+i1)*m_gNE[0]+m_offset[0]+i0;
1453 }
1454 }
1455
1456 // face elements
1457 #pragma omp for
1458 for (dim_t k=0; k<getNumFaceElements(); k++)
1459 m_faceId[k]=k;
1460 } // end parallel section
1461
1462 m_nodeTags.assign(getNumNodes(), 0);
1463 updateTagsInUse(Nodes);
1464
1465 m_elementTags.assign(getNumElements(), 0);
1466 updateTagsInUse(Elements);
1467
1468 // generate face offset vector and set face tags
1469 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
1470 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
1471 m_faceOffset.assign(4, -1);
1472 m_faceTags.clear();
1473 index_t offset=0;
1474 for (size_t i=0; i<4; i++) {
1475 if (m_faceCount[i]>0) {
1476 m_faceOffset[i]=offset;
1477 offset+=m_faceCount[i];
1478 m_faceTags.insert(m_faceTags.end(), m_faceCount[i], faceTag[i]);
1479 }
1480 }
1481 setTagMap("left", LEFT);
1482 setTagMap("right", RIGHT);
1483 setTagMap("bottom", BOTTOM);
1484 setTagMap("top", TOP);
1485 updateTagsInUse(FaceElements);
1486 }
1487
1488 //private
1489 void Rectangle::createPattern()
1490 {
1491 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1492 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1493 const index_t left = (m_offset[0]==0 ? 0 : 1);
1494 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1495
1496 // populate node->DOF mapping with own degrees of freedom.
1497 // The rest is assigned in the loop further down
1498 m_dofMap.assign(getNumNodes(), 0);
1499 #pragma omp parallel for
1500 for (index_t i=bottom; i<bottom+nDOF1; i++) {
1501 for (index_t j=left; j<left+nDOF0; j++) {
1502 m_dofMap[i*m_NN[0]+j]=(i-bottom)*nDOF0+j-left;
1503 }
1504 }
1505
1506 // build list of shared components and neighbours by looping through
1507 // all potential neighbouring ranks and checking if positions are
1508 // within bounds
1509 const dim_t numDOF=nDOF0*nDOF1;
1510 vector<IndexVector> colIndices(numDOF); // for the couple blocks
1511 RankVector neighbour;
1512 IndexVector offsetInShared(1,0);
1513 IndexVector sendShared, recvShared;
1514 int numShared=0;
1515 const int x=m_mpiInfo->rank%m_NX[0];
1516 const int y=m_mpiInfo->rank/m_NX[0];
1517 for (int i1=-1; i1<2; i1++) {
1518 for (int i0=-1; i0<2; i0++) {
1519 // skip this rank
1520 if (i0==0 && i1==0)
1521 continue;
1522 // location of neighbour rank
1523 const int nx=x+i0;
1524 const int ny=y+i1;
1525 if (nx>=0 && ny>=0 && nx<m_NX[0] && ny<m_NX[1]) {
1526 neighbour.push_back(ny*m_NX[0]+nx);
1527 if (i0==0) {
1528 // sharing top or bottom edge
1529 const int firstDOF=(i1==-1 ? 0 : numDOF-nDOF0);
1530 const int firstNode=(i1==-1 ? left : m_NN[0]*(m_NN[1]-1)+left);
1531 offsetInShared.push_back(offsetInShared.back()+nDOF0);
1532 for (dim_t i=0; i<nDOF0; i++, numShared++) {
1533 sendShared.push_back(firstDOF+i);
1534 recvShared.push_back(numDOF+numShared);
1535 if (i>0)
1536 colIndices[firstDOF+i-1].push_back(numShared);
1537 colIndices[firstDOF+i].push_back(numShared);
1538 if (i<nDOF0-1)
1539 colIndices[firstDOF+i+1].push_back(numShared);
1540 m_dofMap[firstNode+i]=numDOF+numShared;
1541 }
1542 } else if (i1==0) {
1543 // sharing left or right edge
1544 const int firstDOF=(i0==-1 ? 0 : nDOF0-1);
1545 const int firstNode=(i0==-1 ? bottom*m_NN[0] : (bottom+1)*m_NN[0]-1);
1546 offsetInShared.push_back(offsetInShared.back()+nDOF1);
1547 for (dim_t i=0; i<nDOF1; i++, numShared++) {
1548 sendShared.push_back(firstDOF+i*nDOF0);
1549 recvShared.push_back(numDOF+numShared);
1550 if (i>0)
1551 colIndices[firstDOF+(i-1)*nDOF0].push_back(numShared);
1552 colIndices[firstDOF+i*nDOF0].push_back(numShared);
1553 if (i<nDOF1-1)
1554 colIndices[firstDOF+(i+1)*nDOF0].push_back(numShared);
1555 m_dofMap[firstNode+i*m_NN[0]]=numDOF+numShared;
1556 }
1557 } else {
1558 // sharing a node
1559 const int dof=(i0+1)/2*(nDOF0-1)+(i1+1)/2*(numDOF-nDOF0);
1560 const int node=(i0+1)/2*(m_NN[0]-1)+(i1+1)/2*m_NN[0]*(m_NN[1]-1);
1561 offsetInShared.push_back(offsetInShared.back()+1);
1562 sendShared.push_back(dof);
1563 recvShared.push_back(numDOF+numShared);
1564 colIndices[dof].push_back(numShared);
1565 m_dofMap[node]=numDOF+numShared;
1566 ++numShared;
1567 }
1568 }
1569 }
1570 }
1571
1572 // create connector
1573 Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
1574 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
1575 &offsetInShared[0], 1, 0, m_mpiInfo);
1576 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
1577 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
1578 &offsetInShared[0], 1, 0, m_mpiInfo);
1579 m_connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
1580 Paso_SharedComponents_free(snd_shcomp);
1581 Paso_SharedComponents_free(rcv_shcomp);
1582
1583 // create main and couple blocks
1584 Paso_Pattern *mainPattern = createMainPattern();
1585 Paso_Pattern *colPattern, *rowPattern;
1586 createCouplePatterns(colIndices, numShared, &colPattern, &rowPattern);
1587
1588 // allocate paso distribution
1589 Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
1590 const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
1591
1592 // finally create the system matrix
1593 m_pattern = Paso_SystemMatrixPattern_alloc(MATRIX_FORMAT_DEFAULT,
1594 distribution, distribution, mainPattern, colPattern, rowPattern,
1595 m_connector, m_connector);
1596
1597 Paso_Distribution_free(distribution);
1598
1599 // useful debug output
1600 /*
1601 cout << "--- rcv_shcomp ---" << endl;
1602 cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
1603 for (size_t i=0; i<neighbour.size(); i++) {
1604 cout << "neighbor[" << i << "]=" << neighbour[i]
1605 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
1606 }
1607 for (size_t i=0; i<recvShared.size(); i++) {
1608 cout << "shared[" << i << "]=" << recvShared[i] << endl;
1609 }
1610 cout << "--- snd_shcomp ---" << endl;
1611 for (size_t i=0; i<sendShared.size(); i++) {
1612 cout << "shared[" << i << "]=" << sendShared[i] << endl;
1613 }
1614 cout << "--- dofMap ---" << endl;
1615 for (size_t i=0; i<m_dofMap.size(); i++) {
1616 cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << endl;
1617 }
1618 cout << "--- colIndices ---" << endl;
1619 for (size_t i=0; i<colIndices.size(); i++) {
1620 cout << "colIndices[" << i << "].size()=" << colIndices[i].size() << endl;
1621 }
1622 */
1623
1624 /*
1625 cout << "--- main_pattern ---" << endl;
1626 cout << "M=" << mainPattern->numOutput << ", N=" << mainPattern->numInput << endl;
1627 for (size_t i=0; i<mainPattern->numOutput+1; i++) {
1628 cout << "ptr[" << i << "]=" << mainPattern->ptr[i] << endl;
1629 }
1630 for (size_t i=0; i<mainPattern->ptr[mainPattern->numOutput]; i++) {
1631 cout << "index[" << i << "]=" << mainPattern->index[i] << endl;
1632 }
1633 */
1634
1635 /*
1636 cout << "--- colCouple_pattern ---" << endl;
1637 cout << "M=" << colPattern->numOutput << ", N=" << colPattern->numInput << endl;
1638 for (size_t i=0; i<colPattern->numOutput+1; i++) {
1639 cout << "ptr[" << i << "]=" << colPattern->ptr[i] << endl;
1640 }
1641 for (size_t i=0; i<colPattern->ptr[colPattern->numOutput]; i++) {
1642 cout << "index[" << i << "]=" << colPattern->index[i] << endl;
1643 }
1644 */
1645
1646 /*
1647 cout << "--- rowCouple_pattern ---" << endl;
1648 cout << "M=" << rowPattern->numOutput << ", N=" << rowPattern->numInput << endl;
1649 for (size_t i=0; i<rowPattern->numOutput+1; i++) {
1650 cout << "ptr[" << i << "]=" << rowPattern->ptr[i] << endl;
1651 }
1652 for (size_t i=0; i<rowPattern->ptr[rowPattern->numOutput]; i++) {
1653 cout << "index[" << i << "]=" << rowPattern->index[i] << endl;
1654 }
1655 */
1656
1657 Paso_Pattern_free(mainPattern);
1658 Paso_Pattern_free(colPattern);
1659 Paso_Pattern_free(rowPattern);
1660 }
1661
1662 //private
1663 void Rectangle::addToMatrixAndRHS(Paso_SystemMatrix* S, escript::Data& F,
1664 const vector<double>& EM_S, const vector<double>& EM_F, bool addS,
1665 bool addF, index_t firstNode, dim_t nEq, dim_t nComp) const
1666 {
1667 IndexVector rowIndex;
1668 rowIndex.push_back(m_dofMap[firstNode]);
1669 rowIndex.push_back(m_dofMap[firstNode+1]);
1670 rowIndex.push_back(m_dofMap[firstNode+m_NN[0]]);
1671 rowIndex.push_back(m_dofMap[firstNode+m_NN[0]+1]);
1672 if (addF) {
1673 double *F_p=F.getSampleDataRW(0);
1674 for (index_t i=0; i<rowIndex.size(); i++) {
1675 if (rowIndex[i]<getNumDOF()) {
1676 for (index_t eq=0; eq<nEq; eq++) {
1677 F_p[INDEX2(eq, rowIndex[i], nEq)]+=EM_F[INDEX2(eq,i,nEq)];
1678 }
1679 }
1680 }
1681 }
1682 if (addS) {
1683 addToSystemMatrix(S, rowIndex, nEq, rowIndex, nComp, EM_S);
1684 }
1685 }
1686
1687 //protected
1688 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1689 const escript::Data& in,
1690 bool reduced) const
1691 {
1692 const dim_t numComp = in.getDataPointSize();
1693 if (reduced) {
1694 out.requireWrite();
1695 const double c0 = 0.25;
1696 #pragma omp parallel
1697 {
1698 vector<double> f_00(numComp);
1699 vector<double> f_01(numComp);
1700 vector<double> f_10(numComp);
1701 vector<double> f_11(numComp);
1702 #pragma omp for
1703 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1704 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1705 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1706 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1707 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1708 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1709 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1710 for (index_t i=0; i < numComp; ++i) {
1711 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
1712 } /* end of component loop i */
1713 } /* end of k0 loop */
1714 } /* end of k1 loop */
1715 } /* end of parallel section */
1716 } else {
1717 out.requireWrite();
1718 const double c0 = 0.16666666666666666667;
1719 const double c1 = 0.044658198738520451079;
1720 const double c2 = 0.62200846792814621559;
1721 #pragma omp parallel
1722 {
1723 vector<double> f_00(numComp);
1724 vector<double> f_01(numComp);
1725 vector<double> f_10(numComp);
1726 vector<double> f_11(numComp);
1727 #pragma omp for
1728 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1729 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1730 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1731 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1732 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1733 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1734 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1735 for (index_t i=0; i < numComp; ++i) {
1736 o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_10[i]) + c1*f_11[i] + c2*f_00[i];
1737 o[INDEX2(i,numComp,1)] = c0*(f_00[i] + f_11[i]) + c1*f_01[i] + c2*f_10[i];
1738 o[INDEX2(i,numComp,2)] = c0*(f_00[i] + f_11[i]) + c1*f_10[i] + c2*f_01[i];
1739 o[INDEX2(i,numComp,3)] = c0*(f_01[i] + f_10[i]) + c1*f_00[i] + c2*f_11[i];
1740 } /* end of component loop i */
1741 } /* end of k0 loop */
1742 } /* end of k1 loop */
1743 } /* end of parallel section */
1744 }
1745 }
1746
1747 //protected
1748 void Rectangle::interpolateNodesOnFaces(escript::Data& out,
1749 const escript::Data& in,
1750 bool reduced) const
1751 {
1752 const dim_t numComp = in.getDataPointSize();
1753 if (reduced) {
1754 out.requireWrite();
1755 #pragma omp parallel
1756 {
1757 vector<double> f_00(numComp);
1758 vector<double> f_01(numComp);
1759 vector<double> f_10(numComp);
1760 vector<double> f_11(numComp);
1761 if (m_faceOffset[0] > -1) {
1762 #pragma omp for nowait
1763 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1764 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1765 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1766 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1767 for (index_t i=0; i < numComp; ++i) {
1768 o[INDEX2(i,numComp,0)] = (f_00[i] + f_01[i])/2;
1769 } /* end of component loop i */
1770 } /* end of k1 loop */
1771 } /* end of face 0 */
1772 if (m_faceOffset[1] > -1) {
1773 #pragma omp for nowait
1774 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1775 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1776 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1777 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1778 for (index_t i=0; i < numComp; ++i) {
1779 o[INDEX2(i,numComp,0)] = (f_10[i] + f_11[i])/2;
1780 } /* end of component loop i */
1781 } /* end of k1 loop */
1782 } /* end of face 1 */
1783 if (m_faceOffset[2] > -1) {
1784 #pragma omp for nowait
1785 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1786 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1787 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1788 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1789 for (index_t i=0; i < numComp; ++i) {
1790 o[INDEX2(i,numComp,0)] = (f_00[i] + f_10[i])/2;
1791 } /* end of component loop i */
1792 } /* end of k0 loop */
1793 } /* end of face 2 */
1794 if (m_faceOffset[3] > -1) {
1795 #pragma omp for nowait
1796 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1797 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1798 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1799 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1800 for (index_t i=0; i < numComp; ++i) {
1801 o[INDEX2(i,numComp,0)] = (f_01[i] + f_11[i])/2;
1802 } /* end of component loop i */
1803 } /* end of k0 loop */
1804 } /* end of face 3 */
1805 } /* end of parallel section */
1806 } else {
1807 out.requireWrite();
1808 const double c0 = 0.21132486540518711775;
1809 const double c1 = 0.78867513459481288225;
1810 #pragma omp parallel
1811 {
1812 vector<double> f_00(numComp);
1813 vector<double> f_01(numComp);
1814 vector<double> f_10(numComp);
1815 vector<double> f_11(numComp);
1816 if (m_faceOffset[0] > -1) {
1817 #pragma omp for nowait
1818 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1819 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1820 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1821 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1822 for (index_t i=0; i < numComp; ++i) {
1823 o[INDEX2(i,numComp,0)] = c0*f_01[i] + c1*f_00[i];
1824 o[INDEX2(i,numComp,1)] = c0*f_00[i] + c1*f_01[i];
1825 } /* end of component loop i */
1826 } /* end of k1 loop */
1827 } /* end of face 0 */
1828 if (m_faceOffset[1] > -1) {
1829 #pragma omp for nowait
1830 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1831 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1832 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1833 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1834 for (index_t i=0; i < numComp; ++i) {
1835 o[INDEX2(i,numComp,0)] = c1*f_10[i] + c0*f_11[i];
1836 o[INDEX2(i,numComp,1)] = c1*f_11[i] + c0*f_10[i];
1837 } /* end of component loop i */
1838 } /* end of k1 loop */
1839 } /* end of face 1 */
1840 if (m_faceOffset[2] > -1) {
1841 #pragma omp for nowait
1842 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1843 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1844 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1845 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1846 for (index_t i=0; i < numComp; ++i) {
1847 o[INDEX2(i,numComp,0)] = c0*f_10[i] + c1*f_00[i];
1848 o[INDEX2(i,numComp,1)] = c0*f_00[i] + c1*f_10[i];
1849 } /* end of component loop i */
1850 } /* end of k0 loop */
1851 } /* end of face 2 */
1852 if (m_faceOffset[3] > -1) {
1853 #pragma omp for nowait
1854 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1855 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1856 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1857 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1858 for (index_t i=0; i < numComp; ++i) {
1859 o[INDEX2(i,numComp,0)] = c0*f_11[i] + c1*f_01[i];
1860 o[INDEX2(i,numComp,1)] = c0*f_01[i] + c1*f_11[i];
1861 } /* end of component loop i */
1862 } /* end of k0 loop */
1863 } /* end of face 3 */
1864 } /* end of parallel section */
1865 }
1866 }
1867
1868
1869
1870 namespace
1871 {
1872 // Calculates a guassian blur colvolution matrix for 2D
1873 // See wiki article on the subject
1874 double* get2DGauss(unsigned radius, double sigma)
1875 {
1876 double* arr=new double[(radius*2+1)*(radius*2+1)];
1877 double common=M_1_PI*0.5*1/(sigma*sigma);
1878 double total=0;
1879 int r=static_cast<int>(radius);
1880 for (int y=-r;y<=r;++y)
1881 {
1882 for (int x=-r;x<=r;++x)
1883 {
1884 arr[(x+r)+(y+r)*(r*2+1)]=common*exp(-(x*x+y*y)/(2*sigma*sigma));
1885 // cout << (x+y*(r*2+1)) << " " << arr[(x+r)+(y+r)*(r*2+1)] << endl;
1886 total+=arr[(x+r)+(y+r)*(r*2+1)];
1887 }
1888 }
1889 double invtotal=1/total;
1890 //cout << "Inv total is " << invtotal << endl;
1891 for (size_t p=0;p<(radius*2+1)*(radius*2+1);++p)
1892 {
1893 arr[p]*=invtotal;
1894 //cout << p << "->" << arr[p] << endl;
1895 }
1896 return arr;
1897 }
1898
1899 // applies conv to source to get a point.
1900 // (xp, yp) are the coords in the source matrix not the destination matrix
1901 double Convolve2D(double* conv, double* source, size_t xp, size_t yp, unsigned radius, size_t width)
1902 {
1903 size_t bx=xp-radius, by=yp-radius;
1904 size_t sbase=bx+by*width;
1905 double result=0;
1906 for (int y=0;y<2*radius+1;++y)
1907 {
1908 for (int x=0;x<2*radius+1;++x)
1909 {
1910 result+=conv[x+y*(2*radius+1)] * source[sbase + x+y*width];
1911 }
1912 }
1913 return result;
1914 }
1915 }
1916
1917
1918
1919 /* This routine produces a Data object filled with smoothed random data.
1920 The dimensions of the rectangle being filled are internal[0] x internal[1] points.
1921 A parameter radius gives the size of the stencil used for the smoothing.
1922 A point on the left hand edge for example, will still require `radius` extra points to the left
1923 in order to complete the stencil.
1924
1925 All local calculation is done on an array called `src`, with
1926 dimensions = ext[0] * ext[1].
1927 Where ext[i]= internal[i]+2*radius.
1928
1929 Now for MPI there is overlap to deal with. We need to share both the overlapping
1930 values themselves but also the external region.
1931
1932 In a hypothetical 1-D case:
1933
1934
1935 1234567
1936 would be split into two ranks thus:
1937 123(4) (4)567 [4 being a shared element]
1938
1939 If the radius is 2. There will be padding elements on the outside:
1940
1941 pp123(4) (4)567pp
1942
1943 To ensure that 4 can be correctly computed on both ranks, values from the other rank
1944 need to be known.
1945
1946 pp123(4)56 23(4)567pp
1947
1948 Now in our case, we wout set all the values 23456 on the left rank and send them to the
1949 right hand rank.
1950
1951 So the edges _may_ need to be shared at a distance `inset` from all boundaries.
1952 inset=2*radius+1
1953 This is to ensure that values at distance `radius` from the shared/overlapped element
1954 that ripley has.
1955
1956
1957 */
1958 escript::Data Rectangle::randomFill(long seed, const boost::python::tuple& filter) const
1959 {
1960 if (m_numDim!=2)
1961 {
1962 throw RipleyException("Only 2D supported at this time.");
1963 }
1964 if (len(filter)!=3) {
1965 throw RipleyException("Unsupported random filter");
1966 }
1967 boost::python::extract<string> ex(filter[0]);
1968 if (!ex.check() || (ex()!="gaussian"))
1969 {
1970 throw RipleyException("Unsupported random filter");
1971 }
1972 boost::python::extract<unsigned int> ex1(filter[1]);
1973 if (!ex1.check())
1974 {
1975 throw RipleyException("Radius of gaussian filter must be a positive integer.");
1976 }
1977 unsigned int radius=ex1();
1978 double sigma=0.5;
1979 boost::python::extract<double> ex2(filter[2]);
1980 if (!ex2.check() || (sigma=ex2())<=0)
1981 {
1982 throw RipleyException("Sigma must be a postive floating point number.");
1983 }
1984
1985 size_t internal[2];
1986 internal[0]=m_NE[0]+1; // number of points in the internal region
1987 internal[1]=m_NE[1]+1; // that is, the ones we need smoothed versions of
1988 size_t ext[2];
1989 ext[0]=internal[0]+2*radius; // includes points we need as input
1990 ext[1]=internal[1]+2*radius; // for smoothing
1991
1992 // now we check to see if the radius is acceptable
1993 // That is, would not cross multiple ranks in MPI
1994
1995 if ((2*radius>=internal[0]) || (2*radius>=internal[1]))
1996 {
1997 throw RipleyException("Radius of gaussian filter must be less than half the width/height of a rank");
1998 }
1999
2000
2001 double* src=new double[ext[0]*ext[1]];
2002 esysUtils::randomFillArray(seed, src, ext[0]*ext[1]);
2003
2004
2005 #ifdef ESYS_MPI
2006 size_t inset=2*radius+1;
2007 size_t Eheight=ext[1]-2*inset; // how high the E (shared) region is
2008 size_t Swidth=ext[0]-2*inset;
2009
2010 double* SWin=new double[inset*inset]; memset(SWin, 0, inset*inset*sizeof(double));
2011 double* SEin=new double[inset*inset]; memset(SEin, 0, inset*inset*sizeof(double));
2012 double* NWin=new double[inset*inset]; memset(NWin, 0, inset*inset*sizeof(double));
2013 double* Sin=new double[inset*Swidth]; memset(Sin, 0, inset*Swidth*sizeof(double));
2014 double* Win=new double[inset*Eheight]; memset(Win, 0, inset*Eheight*sizeof(double));
2015
2016 double* NEout=new double[inset*inset]; memset(NEout, 0, inset*inset*sizeof(double));
2017 unsigned int base=ext[0]-inset+(ext[1]-inset)*ext[0];
2018 for (unsigned int i=0;i<inset;++i)
2019 {
2020 memcpy(NEout+inset*i, src+base, inset*sizeof(double));
2021 base+=ext[0];
2022 }
2023 double* NWout=new double[inset*inset]; memset(NWout, 0, inset*inset*sizeof(double));
2024 base=(ext[1]-inset)*ext[0];
2025 for (unsigned int i=0;i<inset;++i)
2026 {
2027 memcpy(NWout+inset*i, src+base, inset*sizeof(double));
2028 base+=ext[0];
2029 }
2030
2031 double* SEout=new double[inset*inset]; memset(SEout, 0, inset*inset*sizeof(double));
2032 base=ext[0]-inset;
2033 for (int i=0;i<inset;++i)
2034 {
2035 memcpy(SEout+inset*i, src+base, inset*sizeof(double));
2036 base+=ext[0];
2037 }
2038 double* Nout=new double[inset*Swidth]; memset(Nout, 0, inset*Swidth*sizeof(double));
2039 base=inset+(ext[1]-inset)*ext[0];
2040 for (unsigned int i=0;i<inset;++i)
2041 {
2042 memcpy(Nout+Swidth*i, src+base, Swidth*sizeof(double));
2043 base+=ext[0];
2044 }
2045
2046 double* Eout=new double[inset*Eheight]; memset(Eout, 0, inset*Eheight*sizeof(double));
2047 base=ext[0]-inset+inset*ext[0];
2048 for (unsigned int i=0;i<Eheight;++i)
2049 {
2050 memcpy(Eout+i*inset, src+base, inset*sizeof(double));
2051 base+=ext[0];
2052 }
2053
2054 MPI_Request reqs[10];
2055 MPI_Status stats[10];
2056 short rused=0;
2057
2058 dim_t X=m_mpiInfo->rank%m_NX[0];
2059 dim_t Y=m_mpiInfo->rank/m_NX[0];
2060 dim_t row=m_NX[0];
2061 bool swused=false; // These vars will be true if data needs to be copied out of them
2062 bool seused=false;
2063 bool nwused=false;
2064 bool sused=false;
2065 bool wused=false;
2066
2067 // Tags:
2068 // 10 : EW transfer (middle)
2069 // 8 : NS transfer (middle)
2070 // 7 : NE corner -> to N, E and NE
2071 // 11 : NW corner to SW corner (only used on the left hand edge
2072 // 12 : SE corner to SW corner (only used on the bottom edge
2073
2074
2075
2076 int comserr=0;
2077 if (Y!=0) // not on bottom row,
2078 {
2079 if (X!=0) // not on the left hand edge
2080 {
2081 // recv bottomleft from SW
2082 comserr|=MPI_Irecv(SWin, inset*inset, MPI_DOUBLE, (X-1)+(Y-1)*row, 7, m_mpiInfo->comm, reqs+(rused++));
2083 swused=true;
2084 comserr|=MPI_Irecv(Win, Eheight*inset, MPI_DOUBLE, X-1+Y*row, 10, m_mpiInfo->comm, reqs+(rused++));
2085 wused=true;
2086 }
2087 else // on the left hand edge
2088 {
2089 comserr|=MPI_Irecv(SWin, inset*inset, MPI_DOUBLE, (Y-1)*row, 11, m_mpiInfo->comm, reqs+(rused++));
2090 swused=true;
2091 }
2092 comserr|=MPI_Irecv(Sin, Swidth*inset, MPI_DOUBLE, X+(Y-1)*row, 8, m_mpiInfo->comm, reqs+(rused++));
2093 sused=true;
2094 comserr|=MPI_Irecv(SEin, inset*inset, MPI_DOUBLE, X+(Y-1)*row, 7, m_mpiInfo->comm, reqs+(rused++));
2095 seused=true;
2096
2097
2098 }
2099 else // on the bottom row
2100 {
2101 if (X!=0)
2102 {
2103 comserr|=MPI_Irecv(Win, Eheight*inset, MPI_DOUBLE, X-1+Y*row, 10, m_mpiInfo->comm, reqs+(rused++));
2104 wused=true;
2105 // Need to use tag 12 here because SW is coming from the East not South East
2106 comserr|=MPI_Irecv(SWin, inset*inset, MPI_DOUBLE, X-1+Y*row, 12, m_mpiInfo->comm, reqs+(rused++));
2107 swused=true;
2108 }
2109 if (X!=(row-1))
2110 {
2111 comserr|=MPI_Isend(SEout, inset*inset, MPI_DOUBLE, X+1+(Y)*row, 12, m_mpiInfo->comm, reqs+(rused++));
2112 }
2113 }
2114
2115 if (Y!=(m_NX[1]-1)) // not on the top row
2116 {
2117 comserr|=MPI_Isend(Nout, inset*Swidth, MPI_DOUBLE, X+(Y+1)*row, 8, m_mpiInfo->comm, reqs+(rused++));
2118 comserr|=MPI_Isend(NEout, inset*inset, MPI_DOUBLE, X+(Y+1)*row, 7, m_mpiInfo->comm, reqs+(rused++));
2119 if (X!=(row-1)) // not on right hand edge
2120 {
2121 comserr|=MPI_Isend(NEout, inset*inset, MPI_DOUBLE, X+1+(Y+1)*row, 7, m_mpiInfo->comm, reqs+(rused++));
2122 }
2123 if (X==0) // left hand edge
2124 {
2125 comserr|=MPI_Isend(NWout, inset*inset, MPI_DOUBLE, (Y+1)*row,11, m_mpiInfo->comm, reqs+(rused++));
2126 }
2127 }
2128 if (X!=(row-1)) // not on right hand edge
2129 {
2130 comserr|=MPI_Isend(NEout, inset*inset, MPI_DOUBLE, X+1+(Y)*row, 7, m_mpiInfo->comm, reqs+(rused++));
2131 comserr|=MPI_Isend(Eout, Eheight*inset, MPI_DOUBLE, X+1+(Y)*row, 10, m_mpiInfo->comm, reqs+(rused++));
2132 }
2133 if (X!=0)
2134 {
2135 comserr|=MPI_Irecv(NWin, inset*inset, MPI_DOUBLE, (X-1)+Y*row, 7, m_mpiInfo->comm, reqs+(rused++));
2136 nwused=true;
2137
2138
2139 }
2140
2141 if (!comserr)
2142 {
2143 comserr=MPI_Waitall(rused, reqs, stats);
2144 }
2145
2146 if (comserr)
2147 {
2148 // Yes this is throwing an exception as a result of an MPI error.
2149 // and no we don't inform the other ranks that we are doing this.
2150 // however, we have no reason to believe coms work at this point anyway
2151 throw RipleyException("Error in coms for randomFill");
2152 }
2153
2154
2155 // Need to copy the values back from the buffers
2156 // Copy from SW
2157
2158 if (swused)
2159 {
2160 base=0;
2161 for (unsigned int i=0;i<inset;++i)
2162 {
2163 memcpy(src+base, SWin+i*inset, inset*sizeof(double));
2164 base+=ext[0];
2165 }
2166 }
2167 if (seused)
2168 {
2169 base=ext[0]-inset;
2170 for (unsigned int i=0;i<inset;++i)
2171 {
2172 memcpy(src+base, SEin+i*inset, inset*sizeof(double));
2173 base+=ext[0];
2174 }
2175 }
2176 if (nwused)
2177 {
2178 base=(ext[1]-inset)*ext[0];
2179 for (unsigned int i=0;i<inset;++i)
2180 {
2181 memcpy(src+base, NWin+i*inset, inset*sizeof(double));
2182 base+=ext[0];
2183 }
2184 }
2185 if (sused)
2186 {
2187 base=inset;
2188 for (unsigned int i=0;i<inset;++i)
2189 {
2190 memcpy(src+base, Sin+i*Swidth, Swidth*sizeof(double));
2191 base+=ext[0];
2192 }
2193 }
2194 if (wused)
2195 {
2196 base=inset*ext[0];
2197 for (unsigned int i=0;i<Eheight;++i)
2198 {
2199 memcpy(src+base, Win+i*inset, inset*sizeof(double));
2200 base+=ext[0];
2201 }
2202
2203 }
2204
2205 delete[] SWin;
2206 delete[] SEin;
2207 delete[] NWin;
2208 delete[] Sin;
2209 delete[] Win;
2210
2211 delete[] NEout;
2212 delete[] NWout;
2213 delete[] SEout;
2214 delete[] Nout;
2215 delete[] Eout;
2216 #endif
2217 escript::FunctionSpace fs(getPtr(), getContinuousFunctionCode());
2218 escript::Data resdat(0, escript::DataTypes::scalarShape, fs , true);
2219 // don't need to check for exwrite because we just made it
2220 escript::DataVector& dv=resdat.getExpandedVectorReference();
2221 double* convolution=get2DGauss(radius, sigma);
2222 for (size_t y=0;y<(internal[1]);++y)
2223 {
2224 for (size_t x=0;x<(internal[0]);++x)
2225 {
2226 dv[x+y*(internal[0])]=Convolve2D(convolution, src, x+radius, y+radius, radius, ext[0]);
2227
2228 }
2229 }
2230 delete[] convolution;
2231 delete[] src;
2232 return resdat;
2233 }
2234
2235 int Rectangle::findNode(const double *coords) const {
2236 const int NOT_MINE = -1;
2237 //is the found element even owned by this rank
2238 // (inside owned or shared elements but will map to an owned element)
2239 for (int dim = 0; dim < m_numDim; dim++) {
2240 double min = m_origin[dim] + m_offset[dim]* m_dx[dim]
2241 - m_dx[dim]/2.; //allows for point outside mapping onto node
2242 double max = m_origin[dim] + (m_offset[dim] + m_NE[dim])*m_dx[dim]
2243 + m_dx[dim]/2.;
2244 if (min > coords[dim] || max < coords[dim]) {
2245 return NOT_MINE;
2246 }
2247 }
2248 // get distance from origin
2249 double x = coords[0] - m_origin[0];
2250 double y = coords[1] - m_origin[1];
2251 // distance in elements
2252 int ex = (int) floor(x / m_dx[0]);
2253 int ey = (int) floor(y / m_dx[1]);
2254 // set the min distance high enough to be outside the element plus a bit
2255 int closest = NOT_MINE;
2256 double minDist = 1;
2257 for (int dim = 0; dim < m_numDim; dim++) {
2258 minDist += m_dx[dim]*m_dx[dim];
2259 }
2260 //find the closest node
2261 for (int dx = 0; dx < 1; dx++) {
2262 double xdist = (x - (ex + dx)*m_dx[0]);
2263 for (int dy = 0; dy < 1; dy++) {
2264 double ydist = (y - (ey + dy)*m_dx[1]);
2265 double total = xdist*xdist + ydist*ydist;
2266 if (total < minDist) {
2267 closest = INDEX2(ex+dx-m_offset[0], ey+dy-m_offset[1], m_NE[0] + 1);
2268 minDist = total;
2269 }
2270 }
2271 }
2272 //if this happens, we've let a dirac point slip through, which is awful
2273 if (closest == NOT_MINE) {
2274 throw RipleyException("Unable to map appropriate dirac point to a node,"
2275 " implementation problem in Rectangle::findNode()");
2276 }
2277 return closest;
2278 }
2279
2280 void Rectangle::setAssembler(std::string type, std::map<std::string,
2281 escript::Data> constants) {
2282 if (type.compare("WaveAssembler") == 0) {
2283 delete assembler;
2284 assembler = new WaveAssembler2D(this, m_dx, m_NX, m_NE, m_NN, constants);
2285 } else { //else ifs would go before this for other types
2286 throw RipleyException("Ripley::Rectangle does not support the"
2287 " requested assembler");
2288 }
2289 }
2290
2291 } // end of namespace ripley
2292

  ViewVC Help
Powered by ViewVC 1.1.26