/[escript]/branches/split/ripley/src/Rectangle.cpp
ViewVC logotype

Contents of /branches/split/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4763 - (show annotations)
Tue Mar 18 05:20:23 2014 UTC (3 years, 9 months ago) by jfenwick
File size: 88968 byte(s)
Replaced references to getSampleDataRO(0) with getDataRO()
This was to deal with the case where processes with no data where trying to compute the offset of sample 0 when all that was really needed was a null.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17 #include <algorithm>
18
19 #include <ripley/Rectangle.h>
20 #include <paso/SystemMatrix.h>
21 #include <esysUtils/esysFileWriter.h>
22 #include <ripley/DefaultAssembler2D.h>
23 #include <ripley/WaveAssembler2D.h>
24 #include <ripley/LameAssembler2D.h>
25 #include <ripley/domainhelpers.h>
26 #include <boost/scoped_array.hpp>
27 #include "esysUtils/EsysRandom.h"
28 #include "blocktools.h"
29
30 #ifdef USE_NETCDF
31 #include <netcdfcpp.h>
32 #endif
33
34 #if USE_SILO
35 #include <silo.h>
36 #ifdef ESYS_MPI
37 #include <pmpio.h>
38 #endif
39 #endif
40
41 #include <iomanip>
42
43 using namespace std;
44 using esysUtils::FileWriter;
45
46 namespace ripley {
47
48 Rectangle::Rectangle(int n0, int n1, double x0, double y0, double x1,
49 double y1, int d0, int d1,
50 const std::vector<double>& points,
51 const std::vector<int>& tags,
52 const simap_t& tagnamestonums) :
53 RipleyDomain(2)
54 {
55 // ignore subdivision parameters for serial run
56 if (m_mpiInfo->size == 1) {
57 d0=1;
58 d1=1;
59 }
60
61 bool warn=false;
62 std::vector<int> factors;
63 int ranks = m_mpiInfo->size;
64 int epr[2] = {n0,n1};
65 int d[2] = {d0,d1};
66 if (d0<=0 || d1<=0) {
67 for (int i = 0; i < 2; i++) {
68 if (d[i] < 1) {
69 d[i] = 1;
70 continue;
71 }
72 epr[i] = -1; // can no longer be max
73 //remove
74 if (ranks % d[i] != 0) {
75 throw RipleyException("Invalid number of spatial subdivisions");
76 }
77 ranks /= d[i];
78 }
79 factorise(factors, ranks);
80 if (factors.size() != 0) {
81 warn = true;
82 }
83 }
84
85 while (factors.size() > 0) {
86 int i = epr[0] > epr[1] ? 0 : 1;
87 int f = factors.back();
88 factors.pop_back();
89 d[i] *= f;
90 epr[i] /= f;
91 }
92 d0 = d[0]; d1 = d[1];
93
94 // ensure number of subdivisions is valid and nodes can be distributed
95 // among number of ranks
96 if (d0*d1 != m_mpiInfo->size)
97 throw RipleyException("Invalid number of spatial subdivisions");
98
99 if (warn) {
100 cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
101 << d1 << "). This may not be optimal!" << endl;
102 }
103
104 double l0 = x1-x0;
105 double l1 = y1-y0;
106 m_dx[0] = l0/n0;
107 m_dx[1] = l1/n1;
108
109 if ((n0+1)%d0 > 0) {
110 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
111 l0=m_dx[0]*n0;
112 cout << "Warning: Adjusted number of elements and length. N0="
113 << n0 << ", l0=" << l0 << endl;
114 }
115 if ((n1+1)%d1 > 0) {
116 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
117 l1=m_dx[1]*n1;
118 cout << "Warning: Adjusted number of elements and length. N1="
119 << n1 << ", l1=" << l1 << endl;
120 }
121
122 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2))
123 throw RipleyException("Too few elements for the number of ranks");
124
125 m_gNE[0] = n0;
126 m_gNE[1] = n1;
127 m_origin[0] = x0;
128 m_origin[1] = y0;
129 m_length[0] = l0;
130 m_length[1] = l1;
131 m_NX[0] = d0;
132 m_NX[1] = d1;
133
134 // local number of elements (with and without overlap)
135 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
136 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
137 m_NE[0]++;
138 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
139 m_ownNE[0]--;
140
141 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
142 if (m_mpiInfo->rank/d0>0 && m_mpiInfo->rank/d0<d1-1)
143 m_NE[1]++;
144 else if (d1>1 && m_mpiInfo->rank/d0==d1-1)
145 m_ownNE[1]--;
146
147 // local number of nodes
148 m_NN[0] = m_NE[0]+1;
149 m_NN[1] = m_NE[1]+1;
150
151 // bottom-left node is at (offset0,offset1) in global mesh
152 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
153 if (m_offset[0] > 0)
154 m_offset[0]--;
155 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank/d0);
156 if (m_offset[1] > 0)
157 m_offset[1]--;
158
159 populateSampleIds();
160 createPattern();
161 assembler = new DefaultAssembler2D(this, m_dx, m_NX, m_NE, m_NN);
162 for (map<string, int>::const_iterator i = tagnamestonums.begin();
163 i != tagnamestonums.end(); i++) {
164 setTagMap(i->first, i->second);
165 }
166 addPoints(tags.size(), &points[0], &tags[0]);
167 }
168
169 Rectangle::~Rectangle()
170 {
171 Paso_SystemMatrixPattern_free(m_pattern);
172 Paso_Connector_free(m_connector);
173 delete assembler;
174 }
175
176 string Rectangle::getDescription() const
177 {
178 return "ripley::Rectangle";
179 }
180
181 bool Rectangle::operator==(const AbstractDomain& other) const
182 {
183 const Rectangle* o=dynamic_cast<const Rectangle*>(&other);
184 if (o) {
185 return (RipleyDomain::operator==(other) &&
186 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1]
187 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1]
188 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1]
189 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1]);
190 }
191
192 return false;
193 }
194
195 void Rectangle::readNcGrid(escript::Data& out, string filename, string varname,
196 const ReaderParameters& params) const
197 {
198 #ifdef USE_NETCDF
199 // check destination function space
200 int myN0, myN1;
201 if (out.getFunctionSpace().getTypeCode() == Nodes) {
202 myN0 = m_NN[0];
203 myN1 = m_NN[1];
204 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
205 out.getFunctionSpace().getTypeCode() == ReducedElements) {
206 myN0 = m_NE[0];
207 myN1 = m_NE[1];
208 } else
209 throw RipleyException("readNcGrid(): invalid function space for output data object");
210
211 if (params.first.size() != 2)
212 throw RipleyException("readNcGrid(): argument 'first' must have 2 entries");
213
214 if (params.numValues.size() != 2)
215 throw RipleyException("readNcGrid(): argument 'numValues' must have 2 entries");
216
217 if (params.multiplier.size() != 2)
218 throw RipleyException("readNcGrid(): argument 'multiplier' must have 2 entries");
219 for (size_t i=0; i<params.multiplier.size(); i++)
220 if (params.multiplier[i]<1)
221 throw RipleyException("readNcGrid(): all multipliers must be positive");
222 if (params.reverse.size() != 2)
223 throw RipleyException("readNcGrid(): argument 'reverse' must have 2 entries");
224
225 // check file existence and size
226 NcFile f(filename.c_str(), NcFile::ReadOnly);
227 if (!f.is_valid())
228 throw RipleyException("readNcGrid(): cannot open file");
229
230 NcVar* var = f.get_var(varname.c_str());
231 if (!var)
232 throw RipleyException("readNcGrid(): invalid variable");
233
234 // TODO: rank>0 data support
235 const int numComp = out.getDataPointSize();
236 if (numComp > 1)
237 throw RipleyException("readNcGrid(): only scalar data supported");
238
239 const int dims = var->num_dims();
240 boost::scoped_array<long> edges(var->edges());
241
242 // is this a slice of the data object (dims!=2)?
243 // note the expected ordering of edges (as in numpy: y,x)
244 if ( (dims==2 && (params.numValues[1] > edges[0] || params.numValues[0] > edges[1]))
245 || (dims==1 && params.numValues[1]>1) ) {
246 throw RipleyException("readNcGrid(): not enough data in file");
247 }
248
249 // check if this rank contributes anything
250 if (params.first[0] >= m_offset[0]+myN0 ||
251 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
252 params.first[1] >= m_offset[1]+myN1 ||
253 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1])
254 return;
255
256 // now determine how much this rank has to write
257
258 // first coordinates in data object to write to
259 const int first0 = max(0, params.first[0]-m_offset[0]);
260 const int first1 = max(0, params.first[1]-m_offset[1]);
261 // indices to first value in file (not accounting for reverse yet)
262 int idx0 = max(0, m_offset[0]-params.first[0]);
263 int idx1 = max(0, m_offset[1]-params.first[1]);
264 // number of values to read
265 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
266 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
267
268 // make sure we read the right block if going backwards through file
269 if (params.reverse[0])
270 idx0 = edges[dims-1]-num0-idx0;
271 if (dims>1 && params.reverse[1])
272 idx1 = edges[dims-2]-num1-idx1;
273
274 vector<double> values(num0*num1);
275 if (dims==2) {
276 var->set_cur(idx1, idx0);
277 var->get(&values[0], num1, num0);
278 } else {
279 var->set_cur(idx0);
280 var->get(&values[0], num0);
281 }
282
283 const int dpp = out.getNumDataPointsPerSample();
284 out.requireWrite();
285
286 // helpers for reversing
287 const int x0 = (params.reverse[0] ? num0-1 : 0);
288 const int x_mult = (params.reverse[0] ? -1 : 1);
289 const int y0 = (params.reverse[1] ? num1-1 : 0);
290 const int y_mult = (params.reverse[1] ? -1 : 1);
291
292 for (index_t y=0; y<num1; y++) {
293 #pragma omp parallel for
294 for (index_t x=0; x<num0; x++) {
295 const int baseIndex = first0+x*params.multiplier[0]
296 +(first1+y*params.multiplier[1])*myN0;
297 const int srcIndex = (y0+y_mult*y)*num0+(x0+x_mult*x);
298 if (!isnan(values[srcIndex])) {
299 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
300 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
301 const int dataIndex = baseIndex+m0+m1*myN0;
302 double* dest = out.getSampleDataRW(dataIndex);
303 for (index_t q=0; q<dpp; q++) {
304 *dest++ = values[srcIndex];
305 }
306 }
307 }
308 }
309 }
310 }
311 #else
312 throw RipleyException("readNcGrid(): not compiled with netCDF support");
313 #endif
314 }
315
316 void Rectangle::readBinaryGrid(escript::Data& out, string filename,
317 const ReaderParameters& params) const
318 {
319 // the mapping is not universally correct but should work on our
320 // supported platforms
321 switch (params.dataType) {
322 case DATATYPE_INT32:
323 readBinaryGridImpl<int>(out, filename, params);
324 break;
325 case DATATYPE_FLOAT32:
326 readBinaryGridImpl<float>(out, filename, params);
327 break;
328 case DATATYPE_FLOAT64:
329 readBinaryGridImpl<double>(out, filename, params);
330 break;
331 default:
332 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
333 }
334 }
335
336 template<typename ValueType>
337 void Rectangle::readBinaryGridImpl(escript::Data& out, const string& filename,
338 const ReaderParameters& params) const
339 {
340 // check destination function space
341 int myN0, myN1;
342 if (out.getFunctionSpace().getTypeCode() == Nodes) {
343 myN0 = m_NN[0];
344 myN1 = m_NN[1];
345 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
346 out.getFunctionSpace().getTypeCode() == ReducedElements) {
347 myN0 = m_NE[0];
348 myN1 = m_NE[1];
349 } else
350 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
351
352 // check file existence and size
353 ifstream f(filename.c_str(), ifstream::binary);
354 if (f.fail()) {
355 throw RipleyException("readBinaryGrid(): cannot open file");
356 }
357 f.seekg(0, ios::end);
358 const int numComp = out.getDataPointSize();
359 const int filesize = f.tellg();
360 const int reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
361 if (filesize < reqsize) {
362 f.close();
363 throw RipleyException("readBinaryGrid(): not enough data in file");
364 }
365
366 // check if this rank contributes anything
367 if (params.first[0] >= m_offset[0]+myN0 ||
368 params.first[0]+params.numValues[0] <= m_offset[0] ||
369 params.first[1] >= m_offset[1]+myN1 ||
370 params.first[1]+params.numValues[1] <= m_offset[1]) {
371 f.close();
372 return;
373 }
374
375 // now determine how much this rank has to write
376
377 // first coordinates in data object to write to
378 const int first0 = max(0, params.first[0]-m_offset[0]);
379 const int first1 = max(0, params.first[1]-m_offset[1]);
380 // indices to first value in file
381 const int idx0 = max(0, m_offset[0]-params.first[0]);
382 const int idx1 = max(0, m_offset[1]-params.first[1]);
383 // number of values to read
384 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
385 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
386
387 out.requireWrite();
388 vector<ValueType> values(num0*numComp);
389 const int dpp = out.getNumDataPointsPerSample();
390
391 for (int y=0; y<num1; y++) {
392 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]);
393 f.seekg(fileofs*sizeof(ValueType));
394 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
395 for (int x=0; x<num0; x++) {
396 const int baseIndex = first0+x*params.multiplier[0]
397 +(first1+y*params.multiplier[1])*myN0;
398 for (int m1=0; m1<params.multiplier[1]; m1++) {
399 for (int m0=0; m0<params.multiplier[0]; m0++) {
400 const int dataIndex = baseIndex+m0+m1*myN0;
401 double* dest = out.getSampleDataRW(dataIndex);
402 for (int c=0; c<numComp; c++) {
403 ValueType val = values[x*numComp+c];
404
405 if (params.byteOrder != BYTEORDER_NATIVE) {
406 char* cval = reinterpret_cast<char*>(&val);
407 // this will alter val!!
408 byte_swap32(cval);
409 }
410 if (!std::isnan(val)) {
411 for (int q=0; q<dpp; q++) {
412 *dest++ = static_cast<double>(val);
413 }
414 }
415 }
416 }
417 }
418 }
419 }
420
421 f.close();
422 }
423
424 void Rectangle::writeBinaryGrid(const escript::Data& in, string filename,
425 int byteOrder, int dataType) const
426 {
427 // the mapping is not universally correct but should work on our
428 // supported platforms
429 switch (dataType) {
430 case DATATYPE_INT32:
431 writeBinaryGridImpl<int>(in, filename, byteOrder);
432 break;
433 case DATATYPE_FLOAT32:
434 writeBinaryGridImpl<float>(in, filename, byteOrder);
435 break;
436 case DATATYPE_FLOAT64:
437 writeBinaryGridImpl<double>(in, filename, byteOrder);
438 break;
439 default:
440 throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
441 }
442 }
443
444 template<typename ValueType>
445 void Rectangle::writeBinaryGridImpl(const escript::Data& in,
446 const string& filename, int byteOrder) const
447 {
448 // check function space and determine number of points
449 int myN0, myN1;
450 int totalN0, totalN1;
451 if (in.getFunctionSpace().getTypeCode() == Nodes) {
452 myN0 = m_NN[0];
453 myN1 = m_NN[1];
454 totalN0 = m_gNE[0]+1;
455 totalN1 = m_gNE[1]+1;
456 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
457 in.getFunctionSpace().getTypeCode() == ReducedElements) {
458 myN0 = m_NE[0];
459 myN1 = m_NE[1];
460 totalN0 = m_gNE[0];
461 totalN1 = m_gNE[1];
462 } else
463 throw RipleyException("writeBinaryGrid(): invalid function space of data object");
464
465 const int numComp = in.getDataPointSize();
466 const int dpp = in.getNumDataPointsPerSample();
467
468 if (numComp > 1 || dpp > 1)
469 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
470
471 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1;
472
473 // from here on we know that each sample consists of one value
474 FileWriter fw;
475 fw.openFile(filename, fileSize);
476 MPIBarrier();
477
478 for (index_t y=0; y<myN1; y++) {
479 const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0)*sizeof(ValueType);
480 ostringstream oss;
481
482 for (index_t x=0; x<myN0; x++) {
483 const double* sample = in.getSampleDataRO(y*myN0+x);
484 ValueType fvalue = static_cast<ValueType>(*sample);
485 if (byteOrder == BYTEORDER_NATIVE) {
486 oss.write((char*)&fvalue, sizeof(fvalue));
487 } else {
488 char* value = reinterpret_cast<char*>(&fvalue);
489 oss.write(byte_swap32(value), sizeof(fvalue));
490 }
491 }
492 fw.writeAt(oss, fileofs);
493 }
494 fw.close();
495 }
496
497 void Rectangle::dump(const string& fileName) const
498 {
499 #if USE_SILO
500 string fn(fileName);
501 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
502 fn+=".silo";
503 }
504
505 int driver=DB_HDF5;
506 DBfile* dbfile = NULL;
507 const char* blockDirFmt = "/block%04d";
508
509 #ifdef ESYS_MPI
510 PMPIO_baton_t* baton = NULL;
511 const int NUM_SILO_FILES = 1;
512 #endif
513
514 if (m_mpiInfo->size > 1) {
515 #ifdef ESYS_MPI
516 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
517 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
518 PMPIO_DefaultClose, (void*)&driver);
519 // try the fallback driver in case of error
520 if (!baton && driver != DB_PDB) {
521 driver = DB_PDB;
522 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
523 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
524 PMPIO_DefaultClose, (void*)&driver);
525 }
526 if (baton) {
527 char siloPath[64];
528 snprintf(siloPath, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
529 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath);
530 }
531 #endif
532 } else {
533 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
534 getDescription().c_str(), driver);
535 // try the fallback driver in case of error
536 if (!dbfile && driver != DB_PDB) {
537 driver = DB_PDB;
538 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
539 getDescription().c_str(), driver);
540 }
541 char siloPath[64];
542 snprintf(siloPath, 64, blockDirFmt, 0);
543 DBMkDir(dbfile, siloPath);
544 DBSetDir(dbfile, siloPath);
545 }
546
547 if (!dbfile)
548 throw RipleyException("dump: Could not create Silo file");
549
550 /*
551 if (driver==DB_HDF5) {
552 // gzip level 1 already provides good compression with minimal
553 // performance penalty. Some tests showed that gzip levels >3 performed
554 // rather badly on escript data both in terms of time and space
555 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
556 }
557 */
558
559 boost::scoped_ptr<double> x(new double[m_NN[0]]);
560 boost::scoped_ptr<double> y(new double[m_NN[1]]);
561 double* coords[2] = { x.get(), y.get() };
562 #pragma omp parallel
563 {
564 #pragma omp for nowait
565 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
566 coords[0][i0]=getLocalCoordinate(i0, 0);
567 }
568 #pragma omp for nowait
569 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
570 coords[1][i1]=getLocalCoordinate(i1, 1);
571 }
572 }
573 int* dims = const_cast<int*>(getNumNodesPerDim());
574
575 // write mesh
576 DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 2, DB_DOUBLE,
577 DB_COLLINEAR, NULL);
578
579 // write node ids
580 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 2,
581 NULL, 0, DB_INT, DB_NODECENT, NULL);
582
583 // write element ids
584 dims = const_cast<int*>(getNumElementsPerDim());
585 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
586 dims, 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
587
588 // rank 0 writes multimesh and multivar
589 if (m_mpiInfo->rank == 0) {
590 vector<string> tempstrings;
591 vector<char*> names;
592 for (dim_t i=0; i<m_mpiInfo->size; i++) {
593 stringstream path;
594 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
595 tempstrings.push_back(path.str());
596 names.push_back((char*)tempstrings.back().c_str());
597 }
598 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
599 DBSetDir(dbfile, "/");
600 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
601 &types[0], NULL);
602 tempstrings.clear();
603 names.clear();
604 for (dim_t i=0; i<m_mpiInfo->size; i++) {
605 stringstream path;
606 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
607 tempstrings.push_back(path.str());
608 names.push_back((char*)tempstrings.back().c_str());
609 }
610 types.assign(m_mpiInfo->size, DB_QUADVAR);
611 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
612 &types[0], NULL);
613 tempstrings.clear();
614 names.clear();
615 for (dim_t i=0; i<m_mpiInfo->size; i++) {
616 stringstream path;
617 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
618 tempstrings.push_back(path.str());
619 names.push_back((char*)tempstrings.back().c_str());
620 }
621 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
622 &types[0], NULL);
623 }
624
625 if (m_mpiInfo->size > 1) {
626 #ifdef ESYS_MPI
627 PMPIO_HandOffBaton(baton, dbfile);
628 PMPIO_Finish(baton);
629 #endif
630 } else {
631 DBClose(dbfile);
632 }
633
634 #else // USE_SILO
635 throw RipleyException("dump: no Silo support");
636 #endif
637 }
638
639 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
640 {
641 switch (fsType) {
642 case Nodes:
643 case ReducedNodes: // FIXME: reduced
644 return &m_nodeId[0];
645 case DegreesOfFreedom:
646 case ReducedDegreesOfFreedom: // FIXME: reduced
647 return &m_dofId[0];
648 case Elements:
649 case ReducedElements:
650 return &m_elementId[0];
651 case FaceElements:
652 case ReducedFaceElements:
653 return &m_faceId[0];
654 case Points:
655 return &m_diracPointNodeIDs[0];
656 default:
657 break;
658 }
659
660 stringstream msg;
661 msg << "borrowSampleReferenceIDs: invalid function space type " << fsType;
662 throw RipleyException(msg.str());
663 }
664
665 bool Rectangle::ownSample(int fsType, index_t id) const
666 {
667 if (getMPISize()==1)
668 return true;
669
670 switch (fsType) {
671 case Nodes:
672 case ReducedNodes: // FIXME: reduced
673 return (m_dofMap[id] < getNumDOF());
674 case DegreesOfFreedom:
675 case ReducedDegreesOfFreedom:
676 return true;
677 case Elements:
678 case ReducedElements:
679 // check ownership of element's bottom left node
680 return (m_dofMap[id%m_NE[0]+m_NN[0]*(id/m_NE[0])] < getNumDOF());
681 case FaceElements:
682 case ReducedFaceElements:
683 {
684 // determine which face the sample belongs to before
685 // checking ownership of corresponding element's first node
686 dim_t n=0;
687 for (size_t i=0; i<4; i++) {
688 n+=m_faceCount[i];
689 if (id<n) {
690 index_t k;
691 if (i==1)
692 k=m_NN[0]-2;
693 else if (i==3)
694 k=m_NN[0]*(m_NN[1]-2);
695 else
696 k=0;
697 // determine whether to move right or up
698 const index_t delta=(i/2==0 ? m_NN[0] : 1);
699 return (m_dofMap[k+(id-n+m_faceCount[i])*delta] < getNumDOF());
700 }
701 }
702 return false;
703 }
704 default:
705 break;
706 }
707
708 stringstream msg;
709 msg << "ownSample: invalid function space type " << fsType;
710 throw RipleyException(msg.str());
711 }
712
713 void Rectangle::setToNormal(escript::Data& out) const
714 {
715 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
716 out.requireWrite();
717 #pragma omp parallel
718 {
719 if (m_faceOffset[0] > -1) {
720 #pragma omp for nowait
721 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
722 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
723 // set vector at two quadrature points
724 *o++ = -1.;
725 *o++ = 0.;
726 *o++ = -1.;
727 *o = 0.;
728 }
729 }
730
731 if (m_faceOffset[1] > -1) {
732 #pragma omp for nowait
733 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
734 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
735 // set vector at two quadrature points
736 *o++ = 1.;
737 *o++ = 0.;
738 *o++ = 1.;
739 *o = 0.;
740 }
741 }
742
743 if (m_faceOffset[2] > -1) {
744 #pragma omp for nowait
745 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
746 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
747 // set vector at two quadrature points
748 *o++ = 0.;
749 *o++ = -1.;
750 *o++ = 0.;
751 *o = -1.;
752 }
753 }
754
755 if (m_faceOffset[3] > -1) {
756 #pragma omp for nowait
757 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
758 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
759 // set vector at two quadrature points
760 *o++ = 0.;
761 *o++ = 1.;
762 *o++ = 0.;
763 *o = 1.;
764 }
765 }
766 } // end of parallel section
767 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
768 out.requireWrite();
769 #pragma omp parallel
770 {
771 if (m_faceOffset[0] > -1) {
772 #pragma omp for nowait
773 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
774 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
775 *o++ = -1.;
776 *o = 0.;
777 }
778 }
779
780 if (m_faceOffset[1] > -1) {
781 #pragma omp for nowait
782 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
783 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
784 *o++ = 1.;
785 *o = 0.;
786 }
787 }
788
789 if (m_faceOffset[2] > -1) {
790 #pragma omp for nowait
791 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
792 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
793 *o++ = 0.;
794 *o = -1.;
795 }
796 }
797
798 if (m_faceOffset[3] > -1) {
799 #pragma omp for nowait
800 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
801 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
802 *o++ = 0.;
803 *o = 1.;
804 }
805 }
806 } // end of parallel section
807
808 } else {
809 stringstream msg;
810 msg << "setToNormal: invalid function space type "
811 << out.getFunctionSpace().getTypeCode();
812 throw RipleyException(msg.str());
813 }
814 }
815
816 void Rectangle::setToSize(escript::Data& out) const
817 {
818 if (out.getFunctionSpace().getTypeCode() == Elements
819 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
820 out.requireWrite();
821 const dim_t numQuad=out.getNumDataPointsPerSample();
822 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]);
823 #pragma omp parallel for
824 for (index_t k = 0; k < getNumElements(); ++k) {
825 double* o = out.getSampleDataRW(k);
826 fill(o, o+numQuad, size);
827 }
828 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
829 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
830 out.requireWrite();
831 const dim_t numQuad=out.getNumDataPointsPerSample();
832 #pragma omp parallel
833 {
834 if (m_faceOffset[0] > -1) {
835 #pragma omp for nowait
836 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
837 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
838 fill(o, o+numQuad, m_dx[1]);
839 }
840 }
841
842 if (m_faceOffset[1] > -1) {
843 #pragma omp for nowait
844 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
845 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
846 fill(o, o+numQuad, m_dx[1]);
847 }
848 }
849
850 if (m_faceOffset[2] > -1) {
851 #pragma omp for nowait
852 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
853 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
854 fill(o, o+numQuad, m_dx[0]);
855 }
856 }
857
858 if (m_faceOffset[3] > -1) {
859 #pragma omp for nowait
860 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
861 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
862 fill(o, o+numQuad, m_dx[0]);
863 }
864 }
865 } // end of parallel section
866
867 } else {
868 stringstream msg;
869 msg << "setToSize: invalid function space type "
870 << out.getFunctionSpace().getTypeCode();
871 throw RipleyException(msg.str());
872 }
873 }
874
875 void Rectangle::Print_Mesh_Info(const bool full) const
876 {
877 RipleyDomain::Print_Mesh_Info(full);
878 if (full) {
879 cout << " Id Coordinates" << endl;
880 cout.precision(15);
881 cout.setf(ios::scientific, ios::floatfield);
882 for (index_t i=0; i < getNumNodes(); i++) {
883 cout << " " << setw(5) << m_nodeId[i]
884 << " " << getLocalCoordinate(i%m_NN[0], 0)
885 << " " << getLocalCoordinate(i/m_NN[0], 1) << endl;
886 }
887 }
888 }
889
890
891 //protected
892 void Rectangle::assembleCoordinates(escript::Data& arg) const
893 {
894 escriptDataC x = arg.getDataC();
895 int numDim = m_numDim;
896 if (!isDataPointShapeEqual(&x, 1, &numDim))
897 throw RipleyException("setToX: Invalid Data object shape");
898 if (!numSamplesEqual(&x, 1, getNumNodes()))
899 throw RipleyException("setToX: Illegal number of samples in Data object");
900
901 arg.requireWrite();
902 #pragma omp parallel for
903 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
904 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
905 double* point = arg.getSampleDataRW(i0+m_NN[0]*i1);
906 point[0] = getLocalCoordinate(i0, 0);
907 point[1] = getLocalCoordinate(i1, 1);
908 }
909 }
910 }
911
912 //protected
913 void Rectangle::assembleGradient(escript::Data& out, const escript::Data& in) const
914 {
915 const dim_t numComp = in.getDataPointSize();
916 const double cx0 = .21132486540518711775/m_dx[0];
917 const double cx1 = .78867513459481288225/m_dx[0];
918 const double cx2 = 1./m_dx[0];
919 const double cy0 = .21132486540518711775/m_dx[1];
920 const double cy1 = .78867513459481288225/m_dx[1];
921 const double cy2 = 1./m_dx[1];
922
923 if (out.getFunctionSpace().getTypeCode() == Elements) {
924 out.requireWrite();
925 #pragma omp parallel
926 {
927 vector<double> f_00(numComp);
928 vector<double> f_01(numComp);
929 vector<double> f_10(numComp);
930 vector<double> f_11(numComp);
931 #pragma omp for
932 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
933 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
934 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
935 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
936 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
937 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
938 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
939 for (index_t i=0; i < numComp; ++i) {
940 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
941 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
942 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
943 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
944 o[INDEX3(i,0,2,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
945 o[INDEX3(i,1,2,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
946 o[INDEX3(i,0,3,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
947 o[INDEX3(i,1,3,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
948 } // end of component loop i
949 } // end of k0 loop
950 } // end of k1 loop
951 } // end of parallel section
952 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
953 out.requireWrite();
954 #pragma omp parallel
955 {
956 vector<double> f_00(numComp);
957 vector<double> f_01(numComp);
958 vector<double> f_10(numComp);
959 vector<double> f_11(numComp);
960 #pragma omp for
961 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
962 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
963 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
964 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
965 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
966 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
967 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
968 for (index_t i=0; i < numComp; ++i) {
969 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
970 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
971 } // end of component loop i
972 } // end of k0 loop
973 } // end of k1 loop
974 } // end of parallel section
975 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
976 out.requireWrite();
977 #pragma omp parallel
978 {
979 vector<double> f_00(numComp);
980 vector<double> f_01(numComp);
981 vector<double> f_10(numComp);
982 vector<double> f_11(numComp);
983 if (m_faceOffset[0] > -1) {
984 #pragma omp for nowait
985 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
986 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
987 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
988 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double));
989 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double));
990 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
991 for (index_t i=0; i < numComp; ++i) {
992 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
993 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2;
994 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
995 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy2;
996 } // end of component loop i
997 } // end of k1 loop
998 } // end of face 0
999 if (m_faceOffset[1] > -1) {
1000 #pragma omp for nowait
1001 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1002 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double));
1003 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double));
1004 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1005 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1006 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1007 for (index_t i=0; i < numComp; ++i) {
1008 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
1009 o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1010 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
1011 o[INDEX3(i,1,1,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1012 } // end of component loop i
1013 } // end of k1 loop
1014 } // end of face 1
1015 if (m_faceOffset[2] > -1) {
1016 #pragma omp for nowait
1017 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1018 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1019 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double));
1020 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1021 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double));
1022 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1023 for (index_t i=0; i < numComp; ++i) {
1024 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1025 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1026 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1027 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1028 } // end of component loop i
1029 } // end of k0 loop
1030 } // end of face 2
1031 if (m_faceOffset[3] > -1) {
1032 #pragma omp for nowait
1033 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1034 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1035 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1036 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1037 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1038 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1039 for (index_t i=0; i < numComp; ++i) {
1040 o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1041 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1042 o[INDEX3(i,0,1,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1043 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1044 } // end of component loop i
1045 } // end of k0 loop
1046 } // end of face 3
1047 } // end of parallel section
1048
1049 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1050 out.requireWrite();
1051 #pragma omp parallel
1052 {
1053 vector<double> f_00(numComp);
1054 vector<double> f_01(numComp);
1055 vector<double> f_10(numComp);
1056 vector<double> f_11(numComp);
1057 if (m_faceOffset[0] > -1) {
1058 #pragma omp for nowait
1059 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1060 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1061 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1062 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double));
1063 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double));
1064 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1065 for (index_t i=0; i < numComp; ++i) {
1066 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1067 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2;
1068 } // end of component loop i
1069 } // end of k1 loop
1070 } // end of face 0
1071 if (m_faceOffset[1] > -1) {
1072 #pragma omp for nowait
1073 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1074 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double));
1075 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double));
1076 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1077 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1078 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1079 for (index_t i=0; i < numComp; ++i) {
1080 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1081 o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1082 } // end of component loop i
1083 } // end of k1 loop
1084 } // end of face 1
1085 if (m_faceOffset[2] > -1) {
1086 #pragma omp for nowait
1087 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1088 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1089 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double));
1090 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1091 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double));
1092 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1093 for (index_t i=0; i < numComp; ++i) {
1094 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1095 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1096 } // end of component loop i
1097 } // end of k0 loop
1098 } // end of face 2
1099 if (m_faceOffset[3] > -1) {
1100 #pragma omp for nowait
1101 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1102 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1103 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1104 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1105 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1106 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1107 for (index_t i=0; i < numComp; ++i) {
1108 o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1109 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1110 } // end of component loop i
1111 } // end of k0 loop
1112 } // end of face 3
1113 } // end of parallel section
1114 }
1115 }
1116
1117 //protected
1118 void Rectangle::assembleIntegrate(vector<double>& integrals,
1119 const escript::Data& arg) const
1120 {
1121 const dim_t numComp = arg.getDataPointSize();
1122 const index_t left = (m_offset[0]==0 ? 0 : 1);
1123 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1124 const int fs=arg.getFunctionSpace().getTypeCode();
1125 if (fs == Elements && arg.actsExpanded()) {
1126 #pragma omp parallel
1127 {
1128 vector<double> int_local(numComp, 0);
1129 const double w = m_dx[0]*m_dx[1]/4.;
1130 #pragma omp for nowait
1131 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1132 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1133 const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0]));
1134 for (index_t i=0; i < numComp; ++i) {
1135 const double f0 = f[INDEX2(i,0,numComp)];
1136 const double f1 = f[INDEX2(i,1,numComp)];
1137 const double f2 = f[INDEX2(i,2,numComp)];
1138 const double f3 = f[INDEX2(i,3,numComp)];
1139 int_local[i]+=(f0+f1+f2+f3)*w;
1140 } // end of component loop i
1141 } // end of k0 loop
1142 } // end of k1 loop
1143 #pragma omp critical
1144 for (index_t i=0; i<numComp; i++)
1145 integrals[i]+=int_local[i];
1146 } // end of parallel section
1147
1148 } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) {
1149 const double w = m_dx[0]*m_dx[1];
1150 #pragma omp parallel
1151 {
1152 vector<double> int_local(numComp, 0);
1153 #pragma omp for nowait
1154 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1155 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1156 const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0]));
1157 for (index_t i=0; i < numComp; ++i) {
1158 int_local[i]+=f[i]*w;
1159 }
1160 }
1161 }
1162 #pragma omp critical
1163 for (index_t i=0; i<numComp; i++)
1164 integrals[i]+=int_local[i];
1165 } // end of parallel section
1166
1167 } else if (fs == FaceElements && arg.actsExpanded()) {
1168 #pragma omp parallel
1169 {
1170 vector<double> int_local(numComp, 0);
1171 const double w0 = m_dx[0]/2.;
1172 const double w1 = m_dx[1]/2.;
1173 if (m_faceOffset[0] > -1) {
1174 #pragma omp for nowait
1175 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1176 const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1);
1177 for (index_t i=0; i < numComp; ++i) {
1178 const double f0 = f[INDEX2(i,0,numComp)];
1179 const double f1 = f[INDEX2(i,1,numComp)];
1180 int_local[i]+=(f0+f1)*w1;
1181 } // end of component loop i
1182 } // end of k1 loop
1183 }
1184
1185 if (m_faceOffset[1] > -1) {
1186 #pragma omp for nowait
1187 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1188 const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1);
1189 for (index_t i=0; i < numComp; ++i) {
1190 const double f0 = f[INDEX2(i,0,numComp)];
1191 const double f1 = f[INDEX2(i,1,numComp)];
1192 int_local[i]+=(f0+f1)*w1;
1193 } // end of component loop i
1194 } // end of k1 loop
1195 }
1196
1197 if (m_faceOffset[2] > -1) {
1198 #pragma omp for nowait
1199 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1200 const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0);
1201 for (index_t i=0; i < numComp; ++i) {
1202 const double f0 = f[INDEX2(i,0,numComp)];
1203 const double f1 = f[INDEX2(i,1,numComp)];
1204 int_local[i]+=(f0+f1)*w0;
1205 } // end of component loop i
1206 } // end of k0 loop
1207 }
1208
1209 if (m_faceOffset[3] > -1) {
1210 #pragma omp for nowait
1211 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1212 const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0);
1213 for (index_t i=0; i < numComp; ++i) {
1214 const double f0 = f[INDEX2(i,0,numComp)];
1215 const double f1 = f[INDEX2(i,1,numComp)];
1216 int_local[i]+=(f0+f1)*w0;
1217 } // end of component loop i
1218 } // end of k0 loop
1219 }
1220 #pragma omp critical
1221 for (index_t i=0; i<numComp; i++)
1222 integrals[i]+=int_local[i];
1223 } // end of parallel section
1224
1225 } else if (fs==ReducedFaceElements || (fs==FaceElements && !arg.actsExpanded())) {
1226 #pragma omp parallel
1227 {
1228 vector<double> int_local(numComp, 0);
1229 if (m_faceOffset[0] > -1) {
1230 #pragma omp for nowait
1231 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1232 const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1);
1233 for (index_t i=0; i < numComp; ++i) {
1234 int_local[i]+=f[i]*m_dx[1];
1235 }
1236 }
1237 }
1238
1239 if (m_faceOffset[1] > -1) {
1240 #pragma omp for nowait
1241 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1242 const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1);
1243 for (index_t i=0; i < numComp; ++i) {
1244 int_local[i]+=f[i]*m_dx[1];
1245 }
1246 }
1247 }
1248
1249 if (m_faceOffset[2] > -1) {
1250 #pragma omp for nowait
1251 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1252 const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0);
1253 for (index_t i=0; i < numComp; ++i) {
1254 int_local[i]+=f[i]*m_dx[0];
1255 }
1256 }
1257 }
1258
1259 if (m_faceOffset[3] > -1) {
1260 #pragma omp for nowait
1261 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1262 const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0);
1263 for (index_t i=0; i < numComp; ++i) {
1264 int_local[i]+=f[i]*m_dx[0];
1265 }
1266 }
1267 }
1268
1269 #pragma omp critical
1270 for (index_t i=0; i<numComp; i++)
1271 integrals[i]+=int_local[i];
1272 } // end of parallel section
1273 } // function space selector
1274 }
1275
1276 //protected
1277 dim_t Rectangle::insertNeighbourNodes(IndexVector& index, index_t node) const
1278 {
1279 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1280 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1281 const int x=node%nDOF0;
1282 const int y=node/nDOF0;
1283 dim_t num=0;
1284 // loop through potential neighbours and add to index if positions are
1285 // within bounds
1286 for (int i1=-1; i1<2; i1++) {
1287 for (int i0=-1; i0<2; i0++) {
1288 // skip node itself
1289 if (i0==0 && i1==0)
1290 continue;
1291 // location of neighbour node
1292 const int nx=x+i0;
1293 const int ny=y+i1;
1294 if (nx>=0 && ny>=0 && nx<nDOF0 && ny<nDOF1) {
1295 index.push_back(ny*nDOF0+nx);
1296 num++;
1297 }
1298 }
1299 }
1300
1301 return num;
1302 }
1303
1304 //protected
1305 void Rectangle::nodesToDOF(escript::Data& out, const escript::Data& in) const
1306 {
1307 const dim_t numComp = in.getDataPointSize();
1308 out.requireWrite();
1309
1310 const index_t left = (m_offset[0]==0 ? 0 : 1);
1311 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1312 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1313 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1314 #pragma omp parallel for
1315 for (index_t i=0; i<nDOF1; i++) {
1316 for (index_t j=0; j<nDOF0; j++) {
1317 const index_t n=j+left+(i+bottom)*m_NN[0];
1318 const double* src=in.getSampleDataRO(n);
1319 copy(src, src+numComp, out.getSampleDataRW(j+i*nDOF0));
1320 }
1321 }
1322 }
1323
1324 //protected
1325 void Rectangle::dofToNodes(escript::Data& out, const escript::Data& in) const
1326 {
1327 const dim_t numComp = in.getDataPointSize();
1328 Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp);
1329 // expand data object if necessary to be able to grab the whole data
1330 const_cast<escript::Data*>(&in)->expand();
1331 Paso_Coupler_startCollect(coupler, in.getDataRO());
1332
1333 const dim_t numDOF = getNumDOF();
1334 out.requireWrite();
1335 const double* buffer = Paso_Coupler_finishCollect(coupler);
1336
1337 #pragma omp parallel for
1338 for (index_t i=0; i<getNumNodes(); i++) {
1339 const double* src=(m_dofMap[i]<numDOF ?
1340 in.getSampleDataRO(m_dofMap[i])
1341 : &buffer[(m_dofMap[i]-numDOF)*numComp]);
1342 copy(src, src+numComp, out.getSampleDataRW(i));
1343 }
1344 Paso_Coupler_free(coupler);
1345 }
1346
1347 //private
1348 void Rectangle::populateSampleIds()
1349 {
1350 // degrees of freedom are numbered from left to right, bottom to top in
1351 // each rank, continuing on the next rank (ranks also go left-right,
1352 // bottom-top).
1353 // This means rank 0 has id 0...n0-1, rank 1 has id n0...n1-1 etc. which
1354 // helps when writing out data rank after rank.
1355
1356 // build node distribution vector first.
1357 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes which is
1358 // constant for all ranks in this implementation
1359 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1360 const dim_t numDOF=getNumDOF();
1361 for (dim_t k=1; k<m_mpiInfo->size; k++) {
1362 m_nodeDistribution[k]=k*numDOF;
1363 }
1364 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1365 m_nodeId.resize(getNumNodes());
1366 m_dofId.resize(numDOF);
1367 m_elementId.resize(getNumElements());
1368
1369 // populate face element counts
1370 //left
1371 if (m_offset[0]==0)
1372 m_faceCount[0]=m_NE[1];
1373 else
1374 m_faceCount[0]=0;
1375 //right
1376 if (m_mpiInfo->rank%m_NX[0]==m_NX[0]-1)
1377 m_faceCount[1]=m_NE[1];
1378 else
1379 m_faceCount[1]=0;
1380 //bottom
1381 if (m_offset[1]==0)
1382 m_faceCount[2]=m_NE[0];
1383 else
1384 m_faceCount[2]=0;
1385 //top
1386 if (m_mpiInfo->rank/m_NX[0]==m_NX[1]-1)
1387 m_faceCount[3]=m_NE[0];
1388 else
1389 m_faceCount[3]=0;
1390
1391 m_faceId.resize(getNumFaceElements());
1392
1393 const index_t left = (m_offset[0]==0 ? 0 : 1);
1394 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1395 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1396 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1397
1398 #define globalNodeId(x,y) \
1399 ((m_offset[0]+x)/nDOF0)*nDOF0*nDOF1+(m_offset[0]+x)%nDOF0 \
1400 + ((m_offset[1]+y)/nDOF1)*nDOF0*nDOF1*m_NX[0]+((m_offset[1]+y)%nDOF1)*nDOF0
1401
1402 // set corner id's outside the parallel region
1403 m_nodeId[0] = globalNodeId(0, 0);
1404 m_nodeId[m_NN[0]-1] = globalNodeId(m_NN[0]-1, 0);
1405 m_nodeId[m_NN[0]*(m_NN[1]-1)] = globalNodeId(0, m_NN[1]-1);
1406 m_nodeId[m_NN[0]*m_NN[1]-1] = globalNodeId(m_NN[0]-1,m_NN[1]-1);
1407 #undef globalNodeId
1408
1409 #pragma omp parallel
1410 {
1411 // populate degrees of freedom and own nodes (identical id)
1412 #pragma omp for nowait
1413 for (dim_t i=0; i<nDOF1; i++) {
1414 for (dim_t j=0; j<nDOF0; j++) {
1415 const index_t nodeIdx=j+left+(i+bottom)*m_NN[0];
1416 const index_t dofIdx=j+i*nDOF0;
1417 m_dofId[dofIdx] = m_nodeId[nodeIdx]
1418 = m_nodeDistribution[m_mpiInfo->rank]+dofIdx;
1419 }
1420 }
1421
1422 // populate the rest of the nodes (shared with other ranks)
1423 if (m_faceCount[0]==0) { // left column
1424 #pragma omp for nowait
1425 for (dim_t i=0; i<nDOF1; i++) {
1426 const index_t nodeIdx=(i+bottom)*m_NN[0];
1427 const index_t dofId=(i+1)*nDOF0-1;
1428 m_nodeId[nodeIdx]
1429 = m_nodeDistribution[m_mpiInfo->rank-1]+dofId;
1430 }
1431 }
1432 if (m_faceCount[1]==0) { // right column
1433 #pragma omp for nowait
1434 for (dim_t i=0; i<nDOF1; i++) {
1435 const index_t nodeIdx=(i+bottom+1)*m_NN[0]-1;
1436 const index_t dofId=i*nDOF0;
1437 m_nodeId[nodeIdx]
1438 = m_nodeDistribution[m_mpiInfo->rank+1]+dofId;
1439 }
1440 }
1441 if (m_faceCount[2]==0) { // bottom row
1442 #pragma omp for nowait
1443 for (dim_t i=0; i<nDOF0; i++) {
1444 const index_t nodeIdx=i+left;
1445 const index_t dofId=nDOF0*(nDOF1-1)+i;
1446 m_nodeId[nodeIdx]
1447 = m_nodeDistribution[m_mpiInfo->rank-m_NX[0]]+dofId;
1448 }
1449 }
1450 if (m_faceCount[3]==0) { // top row
1451 #pragma omp for nowait
1452 for (dim_t i=0; i<nDOF0; i++) {
1453 const index_t nodeIdx=m_NN[0]*(m_NN[1]-1)+i+left;
1454 const index_t dofId=i;
1455 m_nodeId[nodeIdx]
1456 = m_nodeDistribution[m_mpiInfo->rank+m_NX[0]]+dofId;
1457 }
1458 }
1459
1460 // populate element id's
1461 #pragma omp for nowait
1462 for (dim_t i1=0; i1<m_NE[1]; i1++) {
1463 for (dim_t i0=0; i0<m_NE[0]; i0++) {
1464 m_elementId[i0+i1*m_NE[0]]=(m_offset[1]+i1)*m_gNE[0]+m_offset[0]+i0;
1465 }
1466 }
1467
1468 // face elements
1469 #pragma omp for
1470 for (dim_t k=0; k<getNumFaceElements(); k++)
1471 m_faceId[k]=k;
1472 } // end parallel section
1473
1474 m_nodeTags.assign(getNumNodes(), 0);
1475 updateTagsInUse(Nodes);
1476
1477 m_elementTags.assign(getNumElements(), 0);
1478 updateTagsInUse(Elements);
1479
1480 // generate face offset vector and set face tags
1481 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
1482 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
1483 m_faceOffset.assign(4, -1);
1484 m_faceTags.clear();
1485 index_t offset=0;
1486 for (size_t i=0; i<4; i++) {
1487 if (m_faceCount[i]>0) {
1488 m_faceOffset[i]=offset;
1489 offset+=m_faceCount[i];
1490 m_faceTags.insert(m_faceTags.end(), m_faceCount[i], faceTag[i]);
1491 }
1492 }
1493 setTagMap("left", LEFT);
1494 setTagMap("right", RIGHT);
1495 setTagMap("bottom", BOTTOM);
1496 setTagMap("top", TOP);
1497 updateTagsInUse(FaceElements);
1498 }
1499
1500 //private
1501 void Rectangle::createPattern()
1502 {
1503 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1504 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1505 const index_t left = (m_offset[0]==0 ? 0 : 1);
1506 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1507
1508 // populate node->DOF mapping with own degrees of freedom.
1509 // The rest is assigned in the loop further down
1510 m_dofMap.assign(getNumNodes(), 0);
1511 #pragma omp parallel for
1512 for (index_t i=bottom; i<bottom+nDOF1; i++) {
1513 for (index_t j=left; j<left+nDOF0; j++) {
1514 m_dofMap[i*m_NN[0]+j]=(i-bottom)*nDOF0+j-left;
1515 }
1516 }
1517
1518 // build list of shared components and neighbours by looping through
1519 // all potential neighbouring ranks and checking if positions are
1520 // within bounds
1521 const dim_t numDOF=nDOF0*nDOF1;
1522 vector<IndexVector> colIndices(numDOF); // for the couple blocks
1523 RankVector neighbour;
1524 IndexVector offsetInShared(1,0);
1525 IndexVector sendShared, recvShared;
1526 int numShared=0, expectedShared=0;
1527 const int x=m_mpiInfo->rank%m_NX[0];
1528 const int y=m_mpiInfo->rank/m_NX[0];
1529 if (x > 0)
1530 expectedShared += nDOF1;
1531 if (x < m_NX[0] - 1)
1532 expectedShared += nDOF1;
1533 if (y > 0)
1534 expectedShared += nDOF0;
1535 if (y < m_NX[1] - 1)
1536 expectedShared += nDOF0;
1537 if (x > 0 && y > 0) expectedShared++;
1538 if (x > 0 && y < m_NX[1] - 1) expectedShared++;
1539 if (x < m_NX[0] - 1 && y > 0) expectedShared++;
1540 if (x < m_NX[0] - 1 && y < m_NX[1] - 1) expectedShared++;
1541
1542 vector<IndexVector> rowIndices(expectedShared);
1543
1544 for (int i1=-1; i1<2; i1++) {
1545 for (int i0=-1; i0<2; i0++) {
1546 // skip this rank
1547 if (i0==0 && i1==0)
1548 continue;
1549 // location of neighbour rank
1550 const int nx=x+i0;
1551 const int ny=y+i1;
1552 if (nx>=0 && ny>=0 && nx<m_NX[0] && ny<m_NX[1]) {
1553 neighbour.push_back(ny*m_NX[0]+nx);
1554 if (i0==0) {
1555 // sharing top or bottom edge
1556 const int firstDOF=(i1==-1 ? 0 : numDOF-nDOF0);
1557 const int firstNode=(i1==-1 ? left : m_NN[0]*(m_NN[1]-1)+left);
1558 offsetInShared.push_back(offsetInShared.back()+nDOF0);
1559 for (dim_t i=0; i<nDOF0; i++, numShared++) {
1560 sendShared.push_back(firstDOF+i);
1561 recvShared.push_back(numDOF+numShared);
1562 if (i>0)
1563 doublyLink(colIndices, rowIndices, firstDOF+i-1, numShared);
1564 doublyLink(colIndices, rowIndices, firstDOF+i, numShared);
1565 if (i<nDOF0-1)
1566 doublyLink(colIndices, rowIndices, firstDOF+i+1, numShared);
1567 m_dofMap[firstNode+i]=numDOF+numShared;
1568 }
1569 } else if (i1==0) {
1570 // sharing left or right edge
1571 const int firstDOF=(i0==-1 ? 0 : nDOF0-1);
1572 const int firstNode=(i0==-1 ? bottom*m_NN[0] : (bottom+1)*m_NN[0]-1);
1573 offsetInShared.push_back(offsetInShared.back()+nDOF1);
1574 for (dim_t i=0; i<nDOF1; i++, numShared++) {
1575 sendShared.push_back(firstDOF+i*nDOF0);
1576 recvShared.push_back(numDOF+numShared);
1577 if (i>0)
1578 doublyLink(colIndices, rowIndices, firstDOF+(i-1)*nDOF0, numShared);
1579 doublyLink(colIndices, rowIndices, firstDOF+i*nDOF0, numShared);
1580 if (i<nDOF1-1)
1581 doublyLink(colIndices, rowIndices, firstDOF+(i+1)*nDOF0, numShared);
1582 m_dofMap[firstNode+i*m_NN[0]]=numDOF+numShared;
1583 }
1584 } else {
1585 // sharing a node
1586 const int dof=(i0+1)/2*(nDOF0-1)+(i1+1)/2*(numDOF-nDOF0);
1587 const int node=(i0+1)/2*(m_NN[0]-1)+(i1+1)/2*m_NN[0]*(m_NN[1]-1);
1588 offsetInShared.push_back(offsetInShared.back()+1);
1589 sendShared.push_back(dof);
1590 recvShared.push_back(numDOF+numShared);
1591 doublyLink(colIndices, rowIndices, dof, numShared);
1592 m_dofMap[node]=numDOF+numShared;
1593 ++numShared;
1594 }
1595 }
1596 }
1597 }
1598
1599 #pragma omp parallel for
1600 for (int i = 0; i < numShared; i++) {
1601 std::sort(rowIndices[i].begin(), rowIndices[i].end());
1602 }
1603
1604 // create connector
1605 Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
1606 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
1607 &offsetInShared[0], 1, 0, m_mpiInfo);
1608 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
1609 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
1610 &offsetInShared[0], 1, 0, m_mpiInfo);
1611 m_connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
1612 Paso_SharedComponents_free(snd_shcomp);
1613 Paso_SharedComponents_free(rcv_shcomp);
1614
1615 // create main and couple blocks
1616 Paso_Pattern *mainPattern = createMainPattern();
1617 Paso_Pattern *colPattern, *rowPattern;
1618 createCouplePatterns(colIndices, rowIndices, numShared, &colPattern, &rowPattern);
1619
1620 // allocate paso distribution
1621 Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
1622 const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
1623
1624 // finally create the system matrix
1625 m_pattern = Paso_SystemMatrixPattern_alloc(MATRIX_FORMAT_DEFAULT,
1626 distribution, distribution, mainPattern, colPattern, rowPattern,
1627 m_connector, m_connector);
1628
1629 Paso_Distribution_free(distribution);
1630
1631 // useful debug output
1632 /*
1633 cout << "--- rcv_shcomp ---" << endl;
1634 cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
1635 for (size_t i=0; i<neighbour.size(); i++) {
1636 cout << "neighbor[" << i << "]=" << neighbour[i]
1637 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
1638 }
1639 for (size_t i=0; i<recvShared.size(); i++) {
1640 cout << "shared[" << i << "]=" << recvShared[i] << endl;
1641 }
1642 cout << "--- snd_shcomp ---" << endl;
1643 for (size_t i=0; i<sendShared.size(); i++) {
1644 cout << "shared[" << i << "]=" << sendShared[i] << endl;
1645 }
1646 cout << "--- dofMap ---" << endl;
1647 for (size_t i=0; i<m_dofMap.size(); i++) {
1648 cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << endl;
1649 }
1650 cout << "--- colIndices ---" << endl;
1651 for (size_t i=0; i<colIndices.size(); i++) {
1652 cout << "colIndices[" << i << "].size()=" << colIndices[i].size() << endl;
1653 }
1654 */
1655
1656 /*
1657 cout << "--- main_pattern ---" << endl;
1658 cout << "M=" << mainPattern->numOutput << ", N=" << mainPattern->numInput << endl;
1659 for (size_t i=0; i<mainPattern->numOutput+1; i++) {
1660 cout << "ptr[" << i << "]=" << mainPattern->ptr[i] << endl;
1661 }
1662 for (size_t i=0; i<mainPattern->ptr[mainPattern->numOutput]; i++) {
1663 cout << "index[" << i << "]=" << mainPattern->index[i] << endl;
1664 }
1665 */
1666
1667 /*
1668 cout << "--- colCouple_pattern ---" << endl;
1669 cout << "M=" << colPattern->numOutput << ", N=" << colPattern->numInput << endl;
1670 for (size_t i=0; i<colPattern->numOutput+1; i++) {
1671 cout << "ptr[" << i << "]=" << colPattern->ptr[i] << endl;
1672 }
1673 for (size_t i=0; i<colPattern->ptr[colPattern->numOutput]; i++) {
1674 cout << "index[" << i << "]=" << colPattern->index[i] << endl;
1675 }
1676 */
1677
1678 /*
1679 cout << "--- rowCouple_pattern ---" << endl;
1680 cout << "M=" << rowPattern->numOutput << ", N=" << rowPattern->numInput << endl;
1681 for (size_t i=0; i<rowPattern->numOutput+1; i++) {
1682 cout << "ptr[" << i << "]=" << rowPattern->ptr[i] << endl;
1683 }
1684 for (size_t i=0; i<rowPattern->ptr[rowPattern->numOutput]; i++) {
1685 cout << "index[" << i << "]=" << rowPattern->index[i] << endl;
1686 }
1687 */
1688
1689 Paso_Pattern_free(mainPattern);
1690 Paso_Pattern_free(colPattern);
1691 Paso_Pattern_free(rowPattern);
1692 }
1693
1694 //private
1695 void Rectangle::addToMatrixAndRHS(Paso_SystemMatrix* S, escript::Data& F,
1696 const vector<double>& EM_S, const vector<double>& EM_F, bool addS,
1697 bool addF, index_t firstNode, dim_t nEq, dim_t nComp) const
1698 {
1699 IndexVector rowIndex;
1700 rowIndex.push_back(m_dofMap[firstNode]);
1701 rowIndex.push_back(m_dofMap[firstNode+1]);
1702 rowIndex.push_back(m_dofMap[firstNode+m_NN[0]]);
1703 rowIndex.push_back(m_dofMap[firstNode+m_NN[0]+1]);
1704 if (addF) {
1705 double *F_p=F.getSampleDataRW(0);
1706 for (index_t i=0; i<rowIndex.size(); i++) {
1707 if (rowIndex[i]<getNumDOF()) {
1708 for (index_t eq=0; eq<nEq; eq++) {
1709 F_p[INDEX2(eq, rowIndex[i], nEq)]+=EM_F[INDEX2(eq,i,nEq)];
1710 }
1711 }
1712 }
1713 }
1714 if (addS) {
1715 addToSystemMatrix(S, rowIndex, nEq, rowIndex, nComp, EM_S);
1716 }
1717 }
1718
1719 //protected
1720 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1721 const escript::Data& in,
1722 bool reduced) const
1723 {
1724 const dim_t numComp = in.getDataPointSize();
1725 if (reduced) {
1726 out.requireWrite();
1727 const double c0 = 0.25;
1728 #pragma omp parallel
1729 {
1730 vector<double> f_00(numComp);
1731 vector<double> f_01(numComp);
1732 vector<double> f_10(numComp);
1733 vector<double> f_11(numComp);
1734 #pragma omp for
1735 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1736 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1737 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1738 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1739 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1740 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1741 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1742 for (index_t i=0; i < numComp; ++i) {
1743 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
1744 } /* end of component loop i */
1745 } /* end of k0 loop */
1746 } /* end of k1 loop */
1747 } /* end of parallel section */
1748 } else {
1749 out.requireWrite();
1750 const double c0 = 0.16666666666666666667;
1751 const double c1 = 0.044658198738520451079;
1752 const double c2 = 0.62200846792814621559;
1753 #pragma omp parallel
1754 {
1755 vector<double> f_00(numComp);
1756 vector<double> f_01(numComp);
1757 vector<double> f_10(numComp);
1758 vector<double> f_11(numComp);
1759 #pragma omp for
1760 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1761 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1762 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1763 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1764 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1765 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1766 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1767 for (index_t i=0; i < numComp; ++i) {
1768 o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_10[i]) + c1*f_11[i] + c2*f_00[i];
1769 o[INDEX2(i,numComp,1)] = c0*(f_00[i] + f_11[i]) + c1*f_01[i] + c2*f_10[i];
1770 o[INDEX2(i,numComp,2)] = c0*(f_00[i] + f_11[i]) + c1*f_10[i] + c2*f_01[i];
1771 o[INDEX2(i,numComp,3)] = c0*(f_01[i] + f_10[i]) + c1*f_00[i] + c2*f_11[i];
1772 } /* end of component loop i */
1773 } /* end of k0 loop */
1774 } /* end of k1 loop */
1775 } /* end of parallel section */
1776 }
1777 }
1778
1779 //protected
1780 void Rectangle::interpolateNodesOnFaces(escript::Data& out,
1781 const escript::Data& in,
1782 bool reduced) const
1783 {
1784 const dim_t numComp = in.getDataPointSize();
1785 if (reduced) {
1786 out.requireWrite();
1787 #pragma omp parallel
1788 {
1789 vector<double> f_00(numComp);
1790 vector<double> f_01(numComp);
1791 vector<double> f_10(numComp);
1792 vector<double> f_11(numComp);
1793 if (m_faceOffset[0] > -1) {
1794 #pragma omp for nowait
1795 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1796 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1797 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1798 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1799 for (index_t i=0; i < numComp; ++i) {
1800 o[INDEX2(i,numComp,0)] = (f_00[i] + f_01[i])/2;
1801 } /* end of component loop i */
1802 } /* end of k1 loop */
1803 } /* end of face 0 */
1804 if (m_faceOffset[1] > -1) {
1805 #pragma omp for nowait
1806 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1807 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1808 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1809 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1810 for (index_t i=0; i < numComp; ++i) {
1811 o[INDEX2(i,numComp,0)] = (f_10[i] + f_11[i])/2;
1812 } /* end of component loop i */
1813 } /* end of k1 loop */
1814 } /* end of face 1 */
1815 if (m_faceOffset[2] > -1) {
1816 #pragma omp for nowait
1817 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1818 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1819 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1820 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1821 for (index_t i=0; i < numComp; ++i) {
1822 o[INDEX2(i,numComp,0)] = (f_00[i] + f_10[i])/2;
1823 } /* end of component loop i */
1824 } /* end of k0 loop */
1825 } /* end of face 2 */
1826 if (m_faceOffset[3] > -1) {
1827 #pragma omp for nowait
1828 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1829 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1830 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1831 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1832 for (index_t i=0; i < numComp; ++i) {
1833 o[INDEX2(i,numComp,0)] = (f_01[i] + f_11[i])/2;
1834 } /* end of component loop i */
1835 } /* end of k0 loop */
1836 } /* end of face 3 */
1837 } /* end of parallel section */
1838 } else {
1839 out.requireWrite();
1840 const double c0 = 0.21132486540518711775;
1841 const double c1 = 0.78867513459481288225;
1842 #pragma omp parallel
1843 {
1844 vector<double> f_00(numComp);
1845 vector<double> f_01(numComp);
1846 vector<double> f_10(numComp);
1847 vector<double> f_11(numComp);
1848 if (m_faceOffset[0] > -1) {
1849 #pragma omp for nowait
1850 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1851 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1852 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1853 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1854 for (index_t i=0; i < numComp; ++i) {
1855 o[INDEX2(i,numComp,0)] = c0*f_01[i] + c1*f_00[i];
1856 o[INDEX2(i,numComp,1)] = c0*f_00[i] + c1*f_01[i];
1857 } /* end of component loop i */
1858 } /* end of k1 loop */
1859 } /* end of face 0 */
1860 if (m_faceOffset[1] > -1) {
1861 #pragma omp for nowait
1862 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1863 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1864 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1865 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1866 for (index_t i=0; i < numComp; ++i) {
1867 o[INDEX2(i,numComp,0)] = c1*f_10[i] + c0*f_11[i];
1868 o[INDEX2(i,numComp,1)] = c1*f_11[i] + c0*f_10[i];
1869 } /* end of component loop i */
1870 } /* end of k1 loop */
1871 } /* end of face 1 */
1872 if (m_faceOffset[2] > -1) {
1873 #pragma omp for nowait
1874 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1875 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1876 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1877 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1878 for (index_t i=0; i < numComp; ++i) {
1879 o[INDEX2(i,numComp,0)] = c0*f_10[i] + c1*f_00[i];
1880 o[INDEX2(i,numComp,1)] = c0*f_00[i] + c1*f_10[i];
1881 } /* end of component loop i */
1882 } /* end of k0 loop */
1883 } /* end of face 2 */
1884 if (m_faceOffset[3] > -1) {
1885 #pragma omp for nowait
1886 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1887 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1888 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1889 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1890 for (index_t i=0; i < numComp; ++i) {
1891 o[INDEX2(i,numComp,0)] = c0*f_11[i] + c1*f_01[i];
1892 o[INDEX2(i,numComp,1)] = c0*f_01[i] + c1*f_11[i];
1893 } /* end of component loop i */
1894 } /* end of k0 loop */
1895 } /* end of face 3 */
1896 } /* end of parallel section */
1897 }
1898 }
1899
1900
1901
1902 namespace
1903 {
1904 // Calculates a guassian blur colvolution matrix for 2D
1905 // See wiki article on the subject
1906 double* get2DGauss(unsigned radius, double sigma)
1907 {
1908 double* arr=new double[(radius*2+1)*(radius*2+1)];
1909 double common=M_1_PI*0.5*1/(sigma*sigma);
1910 double total=0;
1911 int r=static_cast<int>(radius);
1912 for (int y=-r;y<=r;++y)
1913 {
1914 for (int x=-r;x<=r;++x)
1915 {
1916 arr[(x+r)+(y+r)*(r*2+1)]=common*exp(-(x*x+y*y)/(2*sigma*sigma));
1917 total+=arr[(x+r)+(y+r)*(r*2+1)];
1918 }
1919 }
1920 double invtotal=1/total;
1921 for (size_t p=0;p<(radius*2+1)*(radius*2+1);++p)
1922 {
1923 arr[p]*=invtotal;
1924 }
1925 return arr;
1926 }
1927
1928 // applies conv to source to get a point.
1929 // (xp, yp) are the coords in the source matrix not the destination matrix
1930 double Convolve2D(double* conv, double* source, size_t xp, size_t yp, unsigned radius, size_t width)
1931 {
1932 size_t bx=xp-radius, by=yp-radius;
1933 size_t sbase=bx+by*width;
1934 double result=0;
1935 for (int y=0;y<2*radius+1;++y)
1936 {
1937 for (int x=0;x<2*radius+1;++x)
1938 {
1939 result+=conv[x+y*(2*radius+1)] * source[sbase + x+y*width];
1940 }
1941 }
1942 return result;
1943 }
1944 }
1945
1946
1947 /* This is a wrapper for filtered (and non-filtered) randoms
1948 * For detailed doco see randomFillWorker
1949 */
1950 escript::Data Rectangle::randomFill(const escript::DataTypes::ShapeType& shape,
1951 const escript::FunctionSpace& what,
1952 long seed, const boost::python::tuple& filter) const
1953 {
1954 int numvals=escript::DataTypes::noValues(shape);
1955 if (len(filter)>0 && (numvals!=1))
1956 {
1957 throw RipleyException("Ripley only supports filters for scalar data.");
1958 }
1959 escript::Data res=randomFillWorker(shape, seed, filter);
1960 if (res.getFunctionSpace()!=what)
1961 {
1962 escript::Data r=escript::Data(res, what);
1963 return r;
1964 }
1965 return res;
1966 }
1967
1968
1969 /* This routine produces a Data object filled with smoothed random data.
1970 The dimensions of the rectangle being filled are internal[0] x internal[1] points.
1971 A parameter radius gives the size of the stencil used for the smoothing.
1972 A point on the left hand edge for example, will still require `radius` extra points to the left
1973 in order to complete the stencil.
1974
1975 All local calculation is done on an array called `src`, with
1976 dimensions = ext[0] * ext[1].
1977 Where ext[i]= internal[i]+2*radius.
1978
1979 Now for MPI there is overlap to deal with. We need to share both the overlapping
1980 values themselves but also the external region.
1981
1982 In a hypothetical 1-D case:
1983
1984
1985 1234567
1986 would be split into two ranks thus:
1987 123(4) (4)567 [4 being a shared element]
1988
1989 If the radius is 2. There will be padding elements on the outside:
1990
1991 pp123(4) (4)567pp
1992
1993 To ensure that 4 can be correctly computed on both ranks, values from the other rank
1994 need to be known.
1995
1996 pp123(4)56 23(4)567pp
1997
1998 Now in our case, we wout set all the values 23456 on the left rank and send them to the
1999 right hand rank.
2000
2001 So the edges _may_ need to be shared at a distance `inset` from all boundaries.
2002 inset=2*radius+1
2003 This is to ensure that values at distance `radius` from the shared/overlapped element
2004 that ripley has.
2005
2006
2007 */
2008 escript::Data Rectangle::randomFillWorker(const escript::DataTypes::ShapeType& shape,
2009 long seed, const boost::python::tuple& filter) const
2010 {
2011 if (m_numDim!=2)
2012 {
2013 throw RipleyException("Only 2D supported at this time.");
2014 }
2015
2016 unsigned int radius=0; // these are only used by gaussian
2017 double sigma=0.5;
2018
2019 unsigned int numvals=escript::DataTypes::noValues(shape);
2020
2021
2022 if (len(filter)==0)
2023 {
2024 // nothing special required here yet
2025 }
2026 else if (len(filter)==3)
2027 {
2028 boost::python::extract<string> ex(filter[0]);
2029 if (!ex.check() || (ex()!="gaussian"))
2030 {
2031 throw RipleyException("Unsupported random filter");
2032 }
2033 boost::python::extract<unsigned int> ex1(filter[1]);
2034 if (!ex1.check())
2035 {
2036 throw RipleyException("Radius of gaussian filter must be a positive integer.");
2037 }
2038 radius=ex1();
2039 sigma=0.5;
2040 boost::python::extract<double> ex2(filter[2]);
2041 if (!ex2.check() || (sigma=ex2())<=0)
2042 {
2043 throw RipleyException("Sigma must be a postive floating point number.");
2044 }
2045 }
2046 else
2047 {
2048 throw RipleyException("Unsupported random filter for Rectangle.");
2049 }
2050
2051
2052
2053 size_t internal[2];
2054 internal[0]=m_NE[0]+1; // number of points in the internal region
2055 internal[1]=m_NE[1]+1; // that is, the ones we need smoothed versions of
2056 size_t ext[2];
2057 ext[0]=internal[0]+2*radius; // includes points we need as input
2058 ext[1]=internal[1]+2*radius; // for smoothing
2059
2060 // now we check to see if the radius is acceptable
2061 // That is, would not cross multiple ranks in MPI
2062
2063 if (2*radius>=internal[0]-4)
2064 {
2065 throw RipleyException("Radius of gaussian filter is too large for X dimension of a rank");
2066 }
2067 if (2*radius>=internal[1]-4)
2068 {
2069 throw RipleyException("Radius of gaussian filter is too large for Y dimension of a rank");
2070 }
2071
2072 double* src=new double[ext[0]*ext[1]*numvals];
2073 esysUtils::randomFillArray(seed, src, ext[0]*ext[1]*numvals);
2074
2075
2076
2077 #ifdef ESYS_MPI
2078 if ((internal[0]<5) || (internal[1]<5))
2079 {
2080 // since the dimensions are equal for all ranks, this exception
2081 // will be thrown on all ranks
2082 throw RipleyException("Random Data in Ripley requries at least five elements per side per rank.");
2083 }
2084 dim_t X=m_mpiInfo->rank%m_NX[0];
2085 dim_t Y=m_mpiInfo->rank%(m_NX[0]*m_NX[1])/m_NX[0];
2086 #endif
2087
2088 /*
2089 // if we wanted to test a repeating pattern
2090 size_t basex=0;
2091 size_t basey=0;
2092 #ifdef ESYS_MPI
2093 basex=X*m_gNE[0]/m_NX[0];
2094 basey=Y*m_gNE[1]/m_NX[1];
2095 #endif
2096
2097 esysUtils::patternFillArray2D(ext[0], ext[1], src, 4, basex, basey, numvals);
2098 */
2099
2100
2101 #ifdef ESYS_MPI
2102
2103 BlockGrid2 grid(m_NX[0]-1, m_NX[1]-1);
2104 size_t inset=2*radius+2; // Its +2 not +1 because a whole element is shared (and hence
2105 // there is an overlap of two points both of which need to have "radius" points on either side.
2106
2107 size_t xmidlen=ext[0]-2*inset; // how wide is the x-dimension between the two insets
2108 size_t ymidlen=ext[1]-2*inset;
2109
2110 Block2 block(ext[0], ext[1], inset, xmidlen, ymidlen, numvals);
2111
2112 MPI_Request reqs[40]; // a non-tight upper bound on how many we need
2113 MPI_Status stats[40];
2114 short rused=0;
2115
2116 messvec incoms;
2117 messvec outcoms;
2118
2119 grid.generateInNeighbours(X, Y, incoms);
2120 grid.generateOutNeighbours(X, Y, outcoms);
2121
2122 block.copyAllToBuffer(src);
2123
2124 int comserr=0;
2125 for (size_t i=0;i<incoms.size();++i)
2126 {
2127 message& m=incoms[i];
2128 comserr|=MPI_Irecv(block.getInBuffer(m.destbuffid), block.getBuffSize(m.destbuffid) , MPI_DOUBLE, m.sourceID, m.tag, m_mpiInfo->comm, reqs+(rused++));
2129 block.setUsed(m.destbuffid);
2130 }
2131
2132 for (size_t i=0;i<outcoms.size();++i)
2133 {
2134 message& m=outcoms[i];
2135 comserr|=MPI_Isend(block.getOutBuffer(m.srcbuffid), block.getBuffSize(m.srcbuffid) , MPI_DOUBLE, m.destID, m.tag, m_mpiInfo->comm, reqs+(rused++));
2136 }
2137
2138 if (!comserr)
2139 {
2140 comserr=MPI_Waitall(rused, reqs, stats);
2141 }
2142
2143 if (comserr)
2144 {
2145 // Yes this is throwing an exception as a result of an MPI error.
2146 // and no we don't inform the other ranks that we are doing this.
2147 // however, we have no reason to believe coms work at this point anyway
2148 throw RipleyException("Error in coms for randomFill");
2149 }
2150
2151 block.copyUsedFromBuffer(src);
2152
2153 #endif
2154
2155 if (radius==0 || numvals>1) // the truth of either should imply the truth of the other but let's be safe
2156 {
2157
2158 escript::FunctionSpace fs(getPtr(), getContinuousFunctionCode());
2159 escript::Data resdat(0, shape, fs , true);
2160 // don't need to check for exwrite because we just made it
2161 escript::DataVector& dv=resdat.getExpandedVectorReference();
2162
2163
2164 // now we need to copy values over
2165 for (size_t y=0;y<(internal[1]);++y)
2166 {
2167 for (size_t x=0;x<(internal[0]);++x)
2168 {
2169 for (unsigned int i=0;i<numvals;++i)
2170 {
2171 dv[i+(x+y*(internal[0]))*numvals]=src[i+(x+y*ext[0])*numvals];
2172 }
2173 }
2174 }
2175 delete[] src;
2176 return resdat;
2177 }
2178 else // filter enabled
2179 {
2180 escript::FunctionSpace fs(getPtr(), getContinuousFunctionCode());
2181 escript::Data resdat(0, escript::DataTypes::scalarShape, fs , true);
2182 // don't need to check for exwrite because we just made it
2183 escript::DataVector& dv=resdat.getExpandedVectorReference();
2184 double* convolution=get2DGauss(radius, sigma);
2185 for (size_t y=0;y<(internal[1]);++y)
2186 {
2187 for (size_t x=0;x<(internal[0]);++x)
2188 {
2189 dv[x+y*(internal[0])]=Convolve2D(convolution, src, x+radius, y+radius, radius, ext[0]);
2190
2191 }
2192 }
2193 delete[] convolution;
2194 delete[] src;
2195 return resdat;
2196 }
2197 }
2198
2199 int Rectangle::findNode(const double *coords) const {
2200 const int NOT_MINE = -1;
2201 //is the found element even owned by this rank
2202 // (inside owned or shared elements but will map to an owned element)
2203 for (int dim = 0; dim < m_numDim; dim++) {
2204 double min = m_origin[dim] + m_offset[dim]* m_dx[dim]
2205 - m_dx[dim]/2.; //allows for point outside mapping onto node
2206 double max = m_origin[dim] + (m_offset[dim] + m_NE[dim])*m_dx[dim]
2207 + m_dx[dim]/2.;
2208 if (min > coords[dim] || max < coords[dim]) {
2209 return NOT_MINE;
2210 }
2211 }
2212 // get distance from origin
2213 double x = coords[0] - m_origin[0];
2214 double y = coords[1] - m_origin[1];
2215 // distance in elements
2216 int ex = (int) floor(x / m_dx[0]);
2217 int ey = (int) floor(y / m_dx[1]);
2218 // set the min distance high enough to be outside the element plus a bit
2219 int closest = NOT_MINE;
2220 double minDist = 1;
2221 for (int dim = 0; dim < m_numDim; dim++) {
2222 minDist += m_dx[dim]*m_dx[dim];
2223 }
2224 //find the closest node
2225 for (int dx = 0; dx < 1; dx++) {
2226 double xdist = (x - (ex + dx)*m_dx[0]);
2227 for (int dy = 0; dy < 1; dy++) {
2228 double ydist = (y - (ey + dy)*m_dx[1]);
2229 double total = xdist*xdist + ydist*ydist;
2230 if (total < minDist) {
2231 closest = INDEX2(ex+dx-m_offset[0], ey+dy-m_offset[1], m_NE[0] + 1);
2232 minDist = total;
2233 }
2234 }
2235 }
2236 //if this happens, we've let a dirac point slip through, which is awful
2237 if (closest == NOT_MINE) {
2238 throw RipleyException("Unable to map appropriate dirac point to a node,"
2239 " implementation problem in Rectangle::findNode()");
2240 }
2241 return closest;
2242 }
2243
2244 void Rectangle::setAssembler(std::string type, std::map<std::string,
2245 escript::Data> constants) {
2246 if (type.compare("WaveAssembler") == 0) {
2247 delete assembler;
2248 assembler = new WaveAssembler2D(this, m_dx, m_NX, m_NE, m_NN, constants);
2249 } else if (type.compare("LameAssembler") == 0) {
2250 delete assembler;
2251 assembler = new LameAssembler2D(this, m_dx, m_NX, m_NE, m_NN);
2252 } else { //else ifs would go before this for other types
2253 throw RipleyException("Ripley::Rectangle does not support the"
2254 " requested assembler");
2255 }
2256 }
2257
2258 } // end of namespace ripley
2259

  ViewVC Help
Powered by ViewVC 1.1.26