/[escript]/trunk/ripley/src/Rectangle.cpp
ViewVC logotype

Contents of /trunk/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4622 - (show annotations)
Fri Jan 17 04:55:41 2014 UTC (5 years, 9 months ago) by sshaw
File size: 88644 byte(s)
Added dirac support to ripley, added interface for custom assemblers for ripleydomains (also added the custom assembler for 2D VTI waves), changed synthetic_VTI example to use the new, faster custom assembler

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15
16 #include <ripley/Rectangle.h>
17 #include <paso/SystemMatrix.h>
18 #include <esysUtils/esysFileWriter.h>
19 #include <ripley/DefaultAssembler2D.h>
20 #include <ripley/WaveAssembler2D.h>
21 #include <boost/scoped_array.hpp>
22 #include "esysUtils/EsysRandom.h"
23
24 #ifdef USE_NETCDF
25 #include <netcdfcpp.h>
26 #endif
27
28 #if USE_SILO
29 #include <silo.h>
30 #ifdef ESYS_MPI
31 #include <pmpio.h>
32 #endif
33 #endif
34
35 #include <iomanip>
36
37 using namespace std;
38 using esysUtils::FileWriter;
39
40 namespace ripley {
41
42 Rectangle::Rectangle(int n0, int n1, double x0, double y0, double x1,
43 double y1, int d0, int d1,
44 const std::vector<double>& points,
45 const std::vector<int>& tags,
46 const std::map<std::string, int>& tagnamestonums) :
47 RipleyDomain(2)
48 {
49 // ignore subdivision parameters for serial run
50 if (m_mpiInfo->size == 1) {
51 d0=1;
52 d1=1;
53 }
54
55 bool warn=false;
56 // if number of subdivisions is non-positive, try to subdivide by the same
57 // ratio as the number of elements
58 if (d0<=0 && d1<=0) {
59 warn=true;
60 d0=max(1, (int)(sqrt(m_mpiInfo->size*(n0+1)/(float)(n1+1))));
61 d1=m_mpiInfo->size/d0;
62 if (d0*d1 != m_mpiInfo->size) {
63 // ratios not the same so subdivide side with more elements only
64 if (n0>n1) {
65 d0=0;
66 d1=1;
67 } else {
68 d0=1;
69 d1=0;
70 }
71 }
72 }
73 if (d0<=0) {
74 // d1 is preset, determine d0 - throw further down if result is no good
75 d0=m_mpiInfo->size/d1;
76 } else if (d1<=0) {
77 // d0 is preset, determine d1 - throw further down if result is no good
78 d1=m_mpiInfo->size/d0;
79 }
80
81 // ensure number of subdivisions is valid and nodes can be distributed
82 // among number of ranks
83 if (d0*d1 != m_mpiInfo->size)
84 throw RipleyException("Invalid number of spatial subdivisions");
85
86 if (warn) {
87 cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
88 << d1 << "). This may not be optimal!" << endl;
89 }
90
91 double l0 = x1-x0;
92 double l1 = y1-y0;
93 m_dx[0] = l0/n0;
94 m_dx[1] = l1/n1;
95
96 if ((n0+1)%d0 > 0) {
97 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
98 l0=m_dx[0]*n0;
99 cout << "Warning: Adjusted number of elements and length. N0="
100 << n0 << ", l0=" << l0 << endl;
101 }
102 if ((n1+1)%d1 > 0) {
103 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
104 l1=m_dx[1]*n1;
105 cout << "Warning: Adjusted number of elements and length. N1="
106 << n1 << ", l1=" << l1 << endl;
107 }
108
109 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2))
110 throw RipleyException("Too few elements for the number of ranks");
111
112 m_gNE[0] = n0;
113 m_gNE[1] = n1;
114 m_origin[0] = x0;
115 m_origin[1] = y0;
116 m_length[0] = l0;
117 m_length[1] = l1;
118 m_NX[0] = d0;
119 m_NX[1] = d1;
120
121 // local number of elements (with and without overlap)
122 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
123 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
124 m_NE[0]++;
125 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
126 m_ownNE[0]--;
127
128 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
129 if (m_mpiInfo->rank/d0>0 && m_mpiInfo->rank/d0<d1-1)
130 m_NE[1]++;
131 else if (d1>1 && m_mpiInfo->rank/d0==d1-1)
132 m_ownNE[1]--;
133
134 // local number of nodes
135 m_NN[0] = m_NE[0]+1;
136 m_NN[1] = m_NE[1]+1;
137
138 // bottom-left node is at (offset0,offset1) in global mesh
139 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
140 if (m_offset[0] > 0)
141 m_offset[0]--;
142 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank/d0);
143 if (m_offset[1] > 0)
144 m_offset[1]--;
145
146 populateSampleIds();
147 createPattern();
148 assembler = new DefaultAssembler2D(this, m_dx, m_NX, m_NE, m_NN);
149 for (map<string, int>::const_iterator i = tagnamestonums.begin();
150 i != tagnamestonums.end(); i++) {
151 setTagMap(i->first, i->second);
152 }
153 addPoints(tags.size(), &points[0], &tags[0]);
154 }
155
156 Rectangle::~Rectangle()
157 {
158 Paso_SystemMatrixPattern_free(m_pattern);
159 Paso_Connector_free(m_connector);
160 delete assembler;
161 }
162
163 string Rectangle::getDescription() const
164 {
165 return "ripley::Rectangle";
166 }
167
168 bool Rectangle::operator==(const AbstractDomain& other) const
169 {
170 const Rectangle* o=dynamic_cast<const Rectangle*>(&other);
171 if (o) {
172 return (RipleyDomain::operator==(other) &&
173 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1]
174 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1]
175 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1]
176 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1]);
177 }
178
179 return false;
180 }
181
182 void Rectangle::readNcGrid(escript::Data& out, string filename, string varname,
183 const ReaderParameters& params) const
184 {
185 #ifdef USE_NETCDF
186 // check destination function space
187 int myN0, myN1;
188 if (out.getFunctionSpace().getTypeCode() == Nodes) {
189 myN0 = m_NN[0];
190 myN1 = m_NN[1];
191 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
192 out.getFunctionSpace().getTypeCode() == ReducedElements) {
193 myN0 = m_NE[0];
194 myN1 = m_NE[1];
195 } else
196 throw RipleyException("readNcGrid(): invalid function space for output data object");
197
198 if (params.first.size() != 2)
199 throw RipleyException("readNcGrid(): argument 'first' must have 2 entries");
200
201 if (params.numValues.size() != 2)
202 throw RipleyException("readNcGrid(): argument 'numValues' must have 2 entries");
203
204 if (params.multiplier.size() != 2)
205 throw RipleyException("readNcGrid(): argument 'multiplier' must have 2 entries");
206 for (size_t i=0; i<params.multiplier.size(); i++)
207 if (params.multiplier[i]<1)
208 throw RipleyException("readNcGrid(): all multipliers must be positive");
209 if (params.reverse.size() != 2)
210 throw RipleyException("readNcGrid(): argument 'reverse' must have 2 entries");
211
212 // check file existence and size
213 NcFile f(filename.c_str(), NcFile::ReadOnly);
214 if (!f.is_valid())
215 throw RipleyException("readNcGrid(): cannot open file");
216
217 NcVar* var = f.get_var(varname.c_str());
218 if (!var)
219 throw RipleyException("readNcGrid(): invalid variable");
220
221 // TODO: rank>0 data support
222 const int numComp = out.getDataPointSize();
223 if (numComp > 1)
224 throw RipleyException("readNcGrid(): only scalar data supported");
225
226 const int dims = var->num_dims();
227 boost::scoped_array<long> edges(var->edges());
228
229 // is this a slice of the data object (dims!=2)?
230 // note the expected ordering of edges (as in numpy: y,x)
231 if ( (dims==2 && (params.numValues[1] > edges[0] || params.numValues[0] > edges[1]))
232 || (dims==1 && params.numValues[1]>1) ) {
233 throw RipleyException("readNcGrid(): not enough data in file");
234 }
235
236 // check if this rank contributes anything
237 if (params.first[0] >= m_offset[0]+myN0 ||
238 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
239 params.first[1] >= m_offset[1]+myN1 ||
240 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1])
241 return;
242
243 // now determine how much this rank has to write
244
245 // first coordinates in data object to write to
246 const int first0 = max(0, params.first[0]-m_offset[0]);
247 const int first1 = max(0, params.first[1]-m_offset[1]);
248 // indices to first value in file (not accounting for reverse yet)
249 int idx0 = max(0, m_offset[0]-params.first[0]);
250 int idx1 = max(0, m_offset[1]-params.first[1]);
251 // number of values to read
252 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
253 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
254
255 // make sure we read the right block if going backwards through file
256 if (params.reverse[0])
257 idx0 = edges[dims-1]-num0-idx0;
258 if (dims>1 && params.reverse[1])
259 idx1 = edges[dims-2]-num1-idx1;
260
261 vector<double> values(num0*num1);
262 if (dims==2) {
263 var->set_cur(idx1, idx0);
264 var->get(&values[0], num1, num0);
265 } else {
266 var->set_cur(idx0);
267 var->get(&values[0], num0);
268 }
269
270 const int dpp = out.getNumDataPointsPerSample();
271 out.requireWrite();
272
273 // helpers for reversing
274 const int x0 = (params.reverse[0] ? num0-1 : 0);
275 const int x_mult = (params.reverse[0] ? -1 : 1);
276 const int y0 = (params.reverse[1] ? num1-1 : 0);
277 const int y_mult = (params.reverse[1] ? -1 : 1);
278
279 for (index_t y=0; y<num1; y++) {
280 #pragma omp parallel for
281 for (index_t x=0; x<num0; x++) {
282 const int baseIndex = first0+x*params.multiplier[0]
283 +(first1+y*params.multiplier[1])*myN0;
284 const int srcIndex = (y0+y_mult*y)*num0+(x0+x_mult*x);
285 if (!isnan(values[srcIndex])) {
286 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
287 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
288 const int dataIndex = baseIndex+m0+m1*myN0;
289 double* dest = out.getSampleDataRW(dataIndex);
290 for (index_t q=0; q<dpp; q++) {
291 *dest++ = values[srcIndex];
292 }
293 }
294 }
295 }
296 }
297 }
298 #else
299 throw RipleyException("readNcGrid(): not compiled with netCDF support");
300 #endif
301 }
302
303 void Rectangle::readBinaryGrid(escript::Data& out, string filename,
304 const ReaderParameters& params) const
305 {
306 // the mapping is not universally correct but should work on our
307 // supported platforms
308 switch (params.dataType) {
309 case DATATYPE_INT32:
310 readBinaryGridImpl<int>(out, filename, params);
311 break;
312 case DATATYPE_FLOAT32:
313 readBinaryGridImpl<float>(out, filename, params);
314 break;
315 case DATATYPE_FLOAT64:
316 readBinaryGridImpl<double>(out, filename, params);
317 break;
318 default:
319 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
320 }
321 }
322
323 template<typename ValueType>
324 void Rectangle::readBinaryGridImpl(escript::Data& out, const string& filename,
325 const ReaderParameters& params) const
326 {
327 // check destination function space
328 int myN0, myN1;
329 if (out.getFunctionSpace().getTypeCode() == Nodes) {
330 myN0 = m_NN[0];
331 myN1 = m_NN[1];
332 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
333 out.getFunctionSpace().getTypeCode() == ReducedElements) {
334 myN0 = m_NE[0];
335 myN1 = m_NE[1];
336 } else
337 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
338
339 // check file existence and size
340 ifstream f(filename.c_str(), ifstream::binary);
341 if (f.fail()) {
342 throw RipleyException("readBinaryGrid(): cannot open file");
343 }
344 f.seekg(0, ios::end);
345 const int numComp = out.getDataPointSize();
346 const int filesize = f.tellg();
347 const int reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
348 if (filesize < reqsize) {
349 f.close();
350 throw RipleyException("readBinaryGrid(): not enough data in file");
351 }
352
353 // check if this rank contributes anything
354 if (params.first[0] >= m_offset[0]+myN0 ||
355 params.first[0]+params.numValues[0] <= m_offset[0] ||
356 params.first[1] >= m_offset[1]+myN1 ||
357 params.first[1]+params.numValues[1] <= m_offset[1]) {
358 f.close();
359 return;
360 }
361
362 // now determine how much this rank has to write
363
364 // first coordinates in data object to write to
365 const int first0 = max(0, params.first[0]-m_offset[0]);
366 const int first1 = max(0, params.first[1]-m_offset[1]);
367 // indices to first value in file
368 const int idx0 = max(0, m_offset[0]-params.first[0]);
369 const int idx1 = max(0, m_offset[1]-params.first[1]);
370 // number of values to read
371 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
372 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
373
374 out.requireWrite();
375 vector<ValueType> values(num0*numComp);
376 const int dpp = out.getNumDataPointsPerSample();
377
378 for (int y=0; y<num1; y++) {
379 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]);
380 f.seekg(fileofs*sizeof(ValueType));
381 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
382 for (int x=0; x<num0; x++) {
383 const int baseIndex = first0+x*params.multiplier[0]
384 +(first1+y*params.multiplier[1])*myN0;
385 for (int m1=0; m1<params.multiplier[1]; m1++) {
386 for (int m0=0; m0<params.multiplier[0]; m0++) {
387 const int dataIndex = baseIndex+m0+m1*myN0;
388 double* dest = out.getSampleDataRW(dataIndex);
389 for (int c=0; c<numComp; c++) {
390 ValueType val = values[x*numComp+c];
391
392 if (params.byteOrder != BYTEORDER_NATIVE) {
393 char* cval = reinterpret_cast<char*>(&val);
394 // this will alter val!!
395 byte_swap32(cval);
396 }
397 if (!std::isnan(val)) {
398 for (int q=0; q<dpp; q++) {
399 *dest++ = static_cast<double>(val);
400 }
401 }
402 }
403 }
404 }
405 }
406 }
407
408 f.close();
409 }
410
411 void Rectangle::writeBinaryGrid(const escript::Data& in, string filename,
412 int byteOrder, int dataType) const
413 {
414 // the mapping is not universally correct but should work on our
415 // supported platforms
416 switch (dataType) {
417 case DATATYPE_INT32:
418 writeBinaryGridImpl<int>(in, filename, byteOrder);
419 break;
420 case DATATYPE_FLOAT32:
421 writeBinaryGridImpl<float>(in, filename, byteOrder);
422 break;
423 case DATATYPE_FLOAT64:
424 writeBinaryGridImpl<double>(in, filename, byteOrder);
425 break;
426 default:
427 throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
428 }
429 }
430
431 template<typename ValueType>
432 void Rectangle::writeBinaryGridImpl(const escript::Data& in,
433 const string& filename, int byteOrder) const
434 {
435 // check function space and determine number of points
436 int myN0, myN1;
437 int totalN0, totalN1;
438 if (in.getFunctionSpace().getTypeCode() == Nodes) {
439 myN0 = m_NN[0];
440 myN1 = m_NN[1];
441 totalN0 = m_gNE[0]+1;
442 totalN1 = m_gNE[1]+1;
443 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
444 in.getFunctionSpace().getTypeCode() == ReducedElements) {
445 myN0 = m_NE[0];
446 myN1 = m_NE[1];
447 totalN0 = m_gNE[0];
448 totalN1 = m_gNE[1];
449 } else
450 throw RipleyException("writeBinaryGrid(): invalid function space of data object");
451
452 const int numComp = in.getDataPointSize();
453 const int dpp = in.getNumDataPointsPerSample();
454
455 if (numComp > 1 || dpp > 1)
456 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
457
458 escript::Data* _in = const_cast<escript::Data*>(&in);
459 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1;
460
461 // from here on we know that each sample consists of one value
462 FileWriter fw;
463 fw.openFile(filename, fileSize);
464 MPIBarrier();
465
466 for (index_t y=0; y<myN1; y++) {
467 const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0)*sizeof(ValueType);
468 ostringstream oss;
469
470 for (index_t x=0; x<myN0; x++) {
471 const double* sample = _in->getSampleDataRO(y*myN0+x);
472 ValueType fvalue = static_cast<ValueType>(*sample);
473 if (byteOrder == BYTEORDER_NATIVE) {
474 oss.write((char*)&fvalue, sizeof(fvalue));
475 } else {
476 char* value = reinterpret_cast<char*>(&fvalue);
477 oss.write(byte_swap32(value), sizeof(fvalue));
478 }
479 }
480 fw.writeAt(oss, fileofs);
481 }
482 fw.close();
483 }
484
485 void Rectangle::dump(const string& fileName) const
486 {
487 #if USE_SILO
488 string fn(fileName);
489 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
490 fn+=".silo";
491 }
492
493 int driver=DB_HDF5;
494 DBfile* dbfile = NULL;
495 const char* blockDirFmt = "/block%04d";
496
497 #ifdef ESYS_MPI
498 PMPIO_baton_t* baton = NULL;
499 const int NUM_SILO_FILES = 1;
500 #endif
501
502 if (m_mpiInfo->size > 1) {
503 #ifdef ESYS_MPI
504 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
505 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
506 PMPIO_DefaultClose, (void*)&driver);
507 // try the fallback driver in case of error
508 if (!baton && driver != DB_PDB) {
509 driver = DB_PDB;
510 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
511 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
512 PMPIO_DefaultClose, (void*)&driver);
513 }
514 if (baton) {
515 char siloPath[64];
516 snprintf(siloPath, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
517 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath);
518 }
519 #endif
520 } else {
521 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
522 getDescription().c_str(), driver);
523 // try the fallback driver in case of error
524 if (!dbfile && driver != DB_PDB) {
525 driver = DB_PDB;
526 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
527 getDescription().c_str(), driver);
528 }
529 char siloPath[64];
530 snprintf(siloPath, 64, blockDirFmt, 0);
531 DBMkDir(dbfile, siloPath);
532 DBSetDir(dbfile, siloPath);
533 }
534
535 if (!dbfile)
536 throw RipleyException("dump: Could not create Silo file");
537
538 /*
539 if (driver==DB_HDF5) {
540 // gzip level 1 already provides good compression with minimal
541 // performance penalty. Some tests showed that gzip levels >3 performed
542 // rather badly on escript data both in terms of time and space
543 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
544 }
545 */
546
547 boost::scoped_ptr<double> x(new double[m_NN[0]]);
548 boost::scoped_ptr<double> y(new double[m_NN[1]]);
549 double* coords[2] = { x.get(), y.get() };
550 #pragma omp parallel
551 {
552 #pragma omp for nowait
553 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
554 coords[0][i0]=getLocalCoordinate(i0, 0);
555 }
556 #pragma omp for nowait
557 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
558 coords[1][i1]=getLocalCoordinate(i1, 1);
559 }
560 }
561 int* dims = const_cast<int*>(getNumNodesPerDim());
562
563 // write mesh
564 DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 2, DB_DOUBLE,
565 DB_COLLINEAR, NULL);
566
567 // write node ids
568 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 2,
569 NULL, 0, DB_INT, DB_NODECENT, NULL);
570
571 // write element ids
572 dims = const_cast<int*>(getNumElementsPerDim());
573 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
574 dims, 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
575
576 // rank 0 writes multimesh and multivar
577 if (m_mpiInfo->rank == 0) {
578 vector<string> tempstrings;
579 vector<char*> names;
580 for (dim_t i=0; i<m_mpiInfo->size; i++) {
581 stringstream path;
582 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
583 tempstrings.push_back(path.str());
584 names.push_back((char*)tempstrings.back().c_str());
585 }
586 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
587 DBSetDir(dbfile, "/");
588 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
589 &types[0], NULL);
590 tempstrings.clear();
591 names.clear();
592 for (dim_t i=0; i<m_mpiInfo->size; i++) {
593 stringstream path;
594 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
595 tempstrings.push_back(path.str());
596 names.push_back((char*)tempstrings.back().c_str());
597 }
598 types.assign(m_mpiInfo->size, DB_QUADVAR);
599 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
600 &types[0], NULL);
601 tempstrings.clear();
602 names.clear();
603 for (dim_t i=0; i<m_mpiInfo->size; i++) {
604 stringstream path;
605 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
606 tempstrings.push_back(path.str());
607 names.push_back((char*)tempstrings.back().c_str());
608 }
609 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
610 &types[0], NULL);
611 }
612
613 if (m_mpiInfo->size > 1) {
614 #ifdef ESYS_MPI
615 PMPIO_HandOffBaton(baton, dbfile);
616 PMPIO_Finish(baton);
617 #endif
618 } else {
619 DBClose(dbfile);
620 }
621
622 #else // USE_SILO
623 throw RipleyException("dump: no Silo support");
624 #endif
625 }
626
627 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
628 {
629 switch (fsType) {
630 case Nodes:
631 case ReducedNodes: // FIXME: reduced
632 return &m_nodeId[0];
633 case DegreesOfFreedom:
634 case ReducedDegreesOfFreedom: // FIXME: reduced
635 return &m_dofId[0];
636 case Elements:
637 case ReducedElements:
638 return &m_elementId[0];
639 case FaceElements:
640 case ReducedFaceElements:
641 return &m_faceId[0];
642 default:
643 break;
644 }
645
646 stringstream msg;
647 msg << "borrowSampleReferenceIDs: invalid function space type " << fsType;
648 throw RipleyException(msg.str());
649 }
650
651 bool Rectangle::ownSample(int fsType, index_t id) const
652 {
653 if (getMPISize()==1)
654 return true;
655
656 switch (fsType) {
657 case Nodes:
658 case ReducedNodes: // FIXME: reduced
659 return (m_dofMap[id] < getNumDOF());
660 case DegreesOfFreedom:
661 case ReducedDegreesOfFreedom:
662 return true;
663 case Elements:
664 case ReducedElements:
665 // check ownership of element's bottom left node
666 return (m_dofMap[id%m_NE[0]+m_NN[0]*(id/m_NE[0])] < getNumDOF());
667 case FaceElements:
668 case ReducedFaceElements:
669 {
670 // determine which face the sample belongs to before
671 // checking ownership of corresponding element's first node
672 dim_t n=0;
673 for (size_t i=0; i<4; i++) {
674 n+=m_faceCount[i];
675 if (id<n) {
676 index_t k;
677 if (i==1)
678 k=m_NN[0]-2;
679 else if (i==3)
680 k=m_NN[0]*(m_NN[1]-2);
681 else
682 k=0;
683 // determine whether to move right or up
684 const index_t delta=(i/2==0 ? m_NN[0] : 1);
685 return (m_dofMap[k+(id-n+m_faceCount[i])*delta] < getNumDOF());
686 }
687 }
688 return false;
689 }
690 default:
691 break;
692 }
693
694 stringstream msg;
695 msg << "ownSample: invalid function space type " << fsType;
696 throw RipleyException(msg.str());
697 }
698
699 void Rectangle::setToNormal(escript::Data& out) const
700 {
701 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
702 out.requireWrite();
703 #pragma omp parallel
704 {
705 if (m_faceOffset[0] > -1) {
706 #pragma omp for nowait
707 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
708 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
709 // set vector at two quadrature points
710 *o++ = -1.;
711 *o++ = 0.;
712 *o++ = -1.;
713 *o = 0.;
714 }
715 }
716
717 if (m_faceOffset[1] > -1) {
718 #pragma omp for nowait
719 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
720 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
721 // set vector at two quadrature points
722 *o++ = 1.;
723 *o++ = 0.;
724 *o++ = 1.;
725 *o = 0.;
726 }
727 }
728
729 if (m_faceOffset[2] > -1) {
730 #pragma omp for nowait
731 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
732 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
733 // set vector at two quadrature points
734 *o++ = 0.;
735 *o++ = -1.;
736 *o++ = 0.;
737 *o = -1.;
738 }
739 }
740
741 if (m_faceOffset[3] > -1) {
742 #pragma omp for nowait
743 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
744 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
745 // set vector at two quadrature points
746 *o++ = 0.;
747 *o++ = 1.;
748 *o++ = 0.;
749 *o = 1.;
750 }
751 }
752 } // end of parallel section
753 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
754 out.requireWrite();
755 #pragma omp parallel
756 {
757 if (m_faceOffset[0] > -1) {
758 #pragma omp for nowait
759 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
760 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
761 *o++ = -1.;
762 *o = 0.;
763 }
764 }
765
766 if (m_faceOffset[1] > -1) {
767 #pragma omp for nowait
768 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
769 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
770 *o++ = 1.;
771 *o = 0.;
772 }
773 }
774
775 if (m_faceOffset[2] > -1) {
776 #pragma omp for nowait
777 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
778 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
779 *o++ = 0.;
780 *o = -1.;
781 }
782 }
783
784 if (m_faceOffset[3] > -1) {
785 #pragma omp for nowait
786 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
787 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
788 *o++ = 0.;
789 *o = 1.;
790 }
791 }
792 } // end of parallel section
793
794 } else {
795 stringstream msg;
796 msg << "setToNormal: invalid function space type "
797 << out.getFunctionSpace().getTypeCode();
798 throw RipleyException(msg.str());
799 }
800 }
801
802 void Rectangle::setToSize(escript::Data& out) const
803 {
804 if (out.getFunctionSpace().getTypeCode() == Elements
805 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
806 out.requireWrite();
807 const dim_t numQuad=out.getNumDataPointsPerSample();
808 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]);
809 #pragma omp parallel for
810 for (index_t k = 0; k < getNumElements(); ++k) {
811 double* o = out.getSampleDataRW(k);
812 fill(o, o+numQuad, size);
813 }
814 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
815 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
816 out.requireWrite();
817 const dim_t numQuad=out.getNumDataPointsPerSample();
818 #pragma omp parallel
819 {
820 if (m_faceOffset[0] > -1) {
821 #pragma omp for nowait
822 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
823 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
824 fill(o, o+numQuad, m_dx[1]);
825 }
826 }
827
828 if (m_faceOffset[1] > -1) {
829 #pragma omp for nowait
830 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
831 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
832 fill(o, o+numQuad, m_dx[1]);
833 }
834 }
835
836 if (m_faceOffset[2] > -1) {
837 #pragma omp for nowait
838 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
839 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
840 fill(o, o+numQuad, m_dx[0]);
841 }
842 }
843
844 if (m_faceOffset[3] > -1) {
845 #pragma omp for nowait
846 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
847 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
848 fill(o, o+numQuad, m_dx[0]);
849 }
850 }
851 } // end of parallel section
852
853 } else {
854 stringstream msg;
855 msg << "setToSize: invalid function space type "
856 << out.getFunctionSpace().getTypeCode();
857 throw RipleyException(msg.str());
858 }
859 }
860
861 void Rectangle::Print_Mesh_Info(const bool full) const
862 {
863 RipleyDomain::Print_Mesh_Info(full);
864 if (full) {
865 cout << " Id Coordinates" << endl;
866 cout.precision(15);
867 cout.setf(ios::scientific, ios::floatfield);
868 for (index_t i=0; i < getNumNodes(); i++) {
869 cout << " " << setw(5) << m_nodeId[i]
870 << " " << getLocalCoordinate(i%m_NN[0], 0)
871 << " " << getLocalCoordinate(i/m_NN[0], 1) << endl;
872 }
873 }
874 }
875
876
877 //protected
878 void Rectangle::assembleCoordinates(escript::Data& arg) const
879 {
880 escriptDataC x = arg.getDataC();
881 int numDim = m_numDim;
882 if (!isDataPointShapeEqual(&x, 1, &numDim))
883 throw RipleyException("setToX: Invalid Data object shape");
884 if (!numSamplesEqual(&x, 1, getNumNodes()))
885 throw RipleyException("setToX: Illegal number of samples in Data object");
886
887 arg.requireWrite();
888 #pragma omp parallel for
889 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
890 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
891 double* point = arg.getSampleDataRW(i0+m_NN[0]*i1);
892 point[0] = getLocalCoordinate(i0, 0);
893 point[1] = getLocalCoordinate(i1, 1);
894 }
895 }
896 }
897
898 //protected
899 void Rectangle::assembleGradient(escript::Data& out, escript::Data& in) const
900 {
901 const dim_t numComp = in.getDataPointSize();
902 const double cx0 = .21132486540518711775/m_dx[0];
903 const double cx1 = .78867513459481288225/m_dx[0];
904 const double cx2 = 1./m_dx[0];
905 const double cy0 = .21132486540518711775/m_dx[1];
906 const double cy1 = .78867513459481288225/m_dx[1];
907 const double cy2 = 1./m_dx[1];
908
909 if (out.getFunctionSpace().getTypeCode() == Elements) {
910 out.requireWrite();
911 #pragma omp parallel
912 {
913 vector<double> f_00(numComp);
914 vector<double> f_01(numComp);
915 vector<double> f_10(numComp);
916 vector<double> f_11(numComp);
917 #pragma omp for
918 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
919 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
920 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
921 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
922 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
923 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
924 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
925 for (index_t i=0; i < numComp; ++i) {
926 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
927 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
928 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
929 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
930 o[INDEX3(i,0,2,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
931 o[INDEX3(i,1,2,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
932 o[INDEX3(i,0,3,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
933 o[INDEX3(i,1,3,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
934 } // end of component loop i
935 } // end of k0 loop
936 } // end of k1 loop
937 } // end of parallel section
938 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
939 out.requireWrite();
940 #pragma omp parallel
941 {
942 vector<double> f_00(numComp);
943 vector<double> f_01(numComp);
944 vector<double> f_10(numComp);
945 vector<double> f_11(numComp);
946 #pragma omp for
947 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
948 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
949 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
950 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
951 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
952 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
953 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
954 for (index_t i=0; i < numComp; ++i) {
955 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
956 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
957 } // end of component loop i
958 } // end of k0 loop
959 } // end of k1 loop
960 } // end of parallel section
961 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
962 out.requireWrite();
963 #pragma omp parallel
964 {
965 vector<double> f_00(numComp);
966 vector<double> f_01(numComp);
967 vector<double> f_10(numComp);
968 vector<double> f_11(numComp);
969 if (m_faceOffset[0] > -1) {
970 #pragma omp for nowait
971 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
972 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
973 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
974 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double));
975 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double));
976 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
977 for (index_t i=0; i < numComp; ++i) {
978 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
979 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2;
980 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
981 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy2;
982 } // end of component loop i
983 } // end of k1 loop
984 } // end of face 0
985 if (m_faceOffset[1] > -1) {
986 #pragma omp for nowait
987 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
988 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double));
989 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double));
990 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
991 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
992 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
993 for (index_t i=0; i < numComp; ++i) {
994 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
995 o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2;
996 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
997 o[INDEX3(i,1,1,numComp,2)] = (f_11[i]-f_10[i])*cy2;
998 } // end of component loop i
999 } // end of k1 loop
1000 } // end of face 1
1001 if (m_faceOffset[2] > -1) {
1002 #pragma omp for nowait
1003 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1004 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1005 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double));
1006 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1007 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double));
1008 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1009 for (index_t i=0; i < numComp; ++i) {
1010 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1011 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1012 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1013 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1014 } // end of component loop i
1015 } // end of k0 loop
1016 } // end of face 2
1017 if (m_faceOffset[3] > -1) {
1018 #pragma omp for nowait
1019 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1020 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1021 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1022 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1023 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1024 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1025 for (index_t i=0; i < numComp; ++i) {
1026 o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1027 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1028 o[INDEX3(i,0,1,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1029 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1030 } // end of component loop i
1031 } // end of k0 loop
1032 } // end of face 3
1033 } // end of parallel section
1034
1035 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1036 out.requireWrite();
1037 #pragma omp parallel
1038 {
1039 vector<double> f_00(numComp);
1040 vector<double> f_01(numComp);
1041 vector<double> f_10(numComp);
1042 vector<double> f_11(numComp);
1043 if (m_faceOffset[0] > -1) {
1044 #pragma omp for nowait
1045 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1046 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1047 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1048 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double));
1049 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double));
1050 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1051 for (index_t i=0; i < numComp; ++i) {
1052 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1053 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2;
1054 } // end of component loop i
1055 } // end of k1 loop
1056 } // end of face 0
1057 if (m_faceOffset[1] > -1) {
1058 #pragma omp for nowait
1059 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1060 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double));
1061 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double));
1062 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1063 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1064 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1065 for (index_t i=0; i < numComp; ++i) {
1066 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1067 o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1068 } // end of component loop i
1069 } // end of k1 loop
1070 } // end of face 1
1071 if (m_faceOffset[2] > -1) {
1072 #pragma omp for nowait
1073 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1074 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1075 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double));
1076 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1077 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double));
1078 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1079 for (index_t i=0; i < numComp; ++i) {
1080 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1081 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1082 } // end of component loop i
1083 } // end of k0 loop
1084 } // end of face 2
1085 if (m_faceOffset[3] > -1) {
1086 #pragma omp for nowait
1087 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1088 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1089 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1090 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1091 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1092 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1093 for (index_t i=0; i < numComp; ++i) {
1094 o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1095 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1096 } // end of component loop i
1097 } // end of k0 loop
1098 } // end of face 3
1099 } // end of parallel section
1100 }
1101 }
1102
1103 //protected
1104 void Rectangle::assembleIntegrate(vector<double>& integrals, escript::Data& arg) const
1105 {
1106 const dim_t numComp = arg.getDataPointSize();
1107 const index_t left = (m_offset[0]==0 ? 0 : 1);
1108 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1109 const int fs=arg.getFunctionSpace().getTypeCode();
1110 if (fs == Elements && arg.actsExpanded()) {
1111 #pragma omp parallel
1112 {
1113 vector<double> int_local(numComp, 0);
1114 const double w = m_dx[0]*m_dx[1]/4.;
1115 #pragma omp for nowait
1116 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1117 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1118 const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0]));
1119 for (index_t i=0; i < numComp; ++i) {
1120 const double f0 = f[INDEX2(i,0,numComp)];
1121 const double f1 = f[INDEX2(i,1,numComp)];
1122 const double f2 = f[INDEX2(i,2,numComp)];
1123 const double f3 = f[INDEX2(i,3,numComp)];
1124 int_local[i]+=(f0+f1+f2+f3)*w;
1125 } // end of component loop i
1126 } // end of k0 loop
1127 } // end of k1 loop
1128 #pragma omp critical
1129 for (index_t i=0; i<numComp; i++)
1130 integrals[i]+=int_local[i];
1131 } // end of parallel section
1132
1133 } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) {
1134 const double w = m_dx[0]*m_dx[1];
1135 #pragma omp parallel
1136 {
1137 vector<double> int_local(numComp, 0);
1138 #pragma omp for nowait
1139 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1140 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1141 const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0]));
1142 for (index_t i=0; i < numComp; ++i) {
1143 int_local[i]+=f[i]*w;
1144 }
1145 }
1146 }
1147 #pragma omp critical
1148 for (index_t i=0; i<numComp; i++)
1149 integrals[i]+=int_local[i];
1150 } // end of parallel section
1151
1152 } else if (fs == FaceElements && arg.actsExpanded()) {
1153 #pragma omp parallel
1154 {
1155 vector<double> int_local(numComp, 0);
1156 const double w0 = m_dx[0]/2.;
1157 const double w1 = m_dx[1]/2.;
1158 if (m_faceOffset[0] > -1) {
1159 #pragma omp for nowait
1160 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1161 const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1);
1162 for (index_t i=0; i < numComp; ++i) {
1163 const double f0 = f[INDEX2(i,0,numComp)];
1164 const double f1 = f[INDEX2(i,1,numComp)];
1165 int_local[i]+=(f0+f1)*w1;
1166 } // end of component loop i
1167 } // end of k1 loop
1168 }
1169
1170 if (m_faceOffset[1] > -1) {
1171 #pragma omp for nowait
1172 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1173 const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1);
1174 for (index_t i=0; i < numComp; ++i) {
1175 const double f0 = f[INDEX2(i,0,numComp)];
1176 const double f1 = f[INDEX2(i,1,numComp)];
1177 int_local[i]+=(f0+f1)*w1;
1178 } // end of component loop i
1179 } // end of k1 loop
1180 }
1181
1182 if (m_faceOffset[2] > -1) {
1183 #pragma omp for nowait
1184 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1185 const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0);
1186 for (index_t i=0; i < numComp; ++i) {
1187 const double f0 = f[INDEX2(i,0,numComp)];
1188 const double f1 = f[INDEX2(i,1,numComp)];
1189 int_local[i]+=(f0+f1)*w0;
1190 } // end of component loop i
1191 } // end of k0 loop
1192 }
1193
1194 if (m_faceOffset[3] > -1) {
1195 #pragma omp for nowait
1196 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1197 const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0);
1198 for (index_t i=0; i < numComp; ++i) {
1199 const double f0 = f[INDEX2(i,0,numComp)];
1200 const double f1 = f[INDEX2(i,1,numComp)];
1201 int_local[i]+=(f0+f1)*w0;
1202 } // end of component loop i
1203 } // end of k0 loop
1204 }
1205 #pragma omp critical
1206 for (index_t i=0; i<numComp; i++)
1207 integrals[i]+=int_local[i];
1208 } // end of parallel section
1209
1210 } else if (fs==ReducedFaceElements || (fs==FaceElements && !arg.actsExpanded())) {
1211 #pragma omp parallel
1212 {
1213 vector<double> int_local(numComp, 0);
1214 if (m_faceOffset[0] > -1) {
1215 #pragma omp for nowait
1216 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1217 const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1);
1218 for (index_t i=0; i < numComp; ++i) {
1219 int_local[i]+=f[i]*m_dx[1];
1220 }
1221 }
1222 }
1223
1224 if (m_faceOffset[1] > -1) {
1225 #pragma omp for nowait
1226 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1227 const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1);
1228 for (index_t i=0; i < numComp; ++i) {
1229 int_local[i]+=f[i]*m_dx[1];
1230 }
1231 }
1232 }
1233
1234 if (m_faceOffset[2] > -1) {
1235 #pragma omp for nowait
1236 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1237 const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0);
1238 for (index_t i=0; i < numComp; ++i) {
1239 int_local[i]+=f[i]*m_dx[0];
1240 }
1241 }
1242 }
1243
1244 if (m_faceOffset[3] > -1) {
1245 #pragma omp for nowait
1246 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1247 const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0);
1248 for (index_t i=0; i < numComp; ++i) {
1249 int_local[i]+=f[i]*m_dx[0];
1250 }
1251 }
1252 }
1253
1254 #pragma omp critical
1255 for (index_t i=0; i<numComp; i++)
1256 integrals[i]+=int_local[i];
1257 } // end of parallel section
1258 } // function space selector
1259 }
1260
1261 //protected
1262 dim_t Rectangle::insertNeighbourNodes(IndexVector& index, index_t node) const
1263 {
1264 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1265 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1266 const int x=node%nDOF0;
1267 const int y=node/nDOF0;
1268 dim_t num=0;
1269 // loop through potential neighbours and add to index if positions are
1270 // within bounds
1271 for (int i1=-1; i1<2; i1++) {
1272 for (int i0=-1; i0<2; i0++) {
1273 // skip node itself
1274 if (i0==0 && i1==0)
1275 continue;
1276 // location of neighbour node
1277 const int nx=x+i0;
1278 const int ny=y+i1;
1279 if (nx>=0 && ny>=0 && nx<nDOF0 && ny<nDOF1) {
1280 index.push_back(ny*nDOF0+nx);
1281 num++;
1282 }
1283 }
1284 }
1285
1286 return num;
1287 }
1288
1289 //protected
1290 void Rectangle::nodesToDOF(escript::Data& out, escript::Data& in) const
1291 {
1292 const dim_t numComp = in.getDataPointSize();
1293 out.requireWrite();
1294
1295 const index_t left = (m_offset[0]==0 ? 0 : 1);
1296 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1297 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1298 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1299 #pragma omp parallel for
1300 for (index_t i=0; i<nDOF1; i++) {
1301 for (index_t j=0; j<nDOF0; j++) {
1302 const index_t n=j+left+(i+bottom)*m_NN[0];
1303 const double* src=in.getSampleDataRO(n);
1304 copy(src, src+numComp, out.getSampleDataRW(j+i*nDOF0));
1305 }
1306 }
1307 }
1308
1309 //protected
1310 void Rectangle::dofToNodes(escript::Data& out, escript::Data& in) const
1311 {
1312 const dim_t numComp = in.getDataPointSize();
1313 Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp);
1314 in.requireWrite();
1315 Paso_Coupler_startCollect(coupler, in.getSampleDataRW(0));
1316
1317 const dim_t numDOF = getNumDOF();
1318 out.requireWrite();
1319 const double* buffer = Paso_Coupler_finishCollect(coupler);
1320
1321 #pragma omp parallel for
1322 for (index_t i=0; i<getNumNodes(); i++) {
1323 const double* src=(m_dofMap[i]<numDOF ?
1324 in.getSampleDataRO(m_dofMap[i])
1325 : &buffer[(m_dofMap[i]-numDOF)*numComp]);
1326 copy(src, src+numComp, out.getSampleDataRW(i));
1327 }
1328 Paso_Coupler_free(coupler);
1329 }
1330
1331 //private
1332 void Rectangle::populateSampleIds()
1333 {
1334 // degrees of freedom are numbered from left to right, bottom to top in
1335 // each rank, continuing on the next rank (ranks also go left-right,
1336 // bottom-top).
1337 // This means rank 0 has id 0...n0-1, rank 1 has id n0...n1-1 etc. which
1338 // helps when writing out data rank after rank.
1339
1340 // build node distribution vector first.
1341 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes which is
1342 // constant for all ranks in this implementation
1343 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1344 const dim_t numDOF=getNumDOF();
1345 for (dim_t k=1; k<m_mpiInfo->size; k++) {
1346 m_nodeDistribution[k]=k*numDOF;
1347 }
1348 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1349 m_nodeId.resize(getNumNodes());
1350 m_dofId.resize(numDOF);
1351 m_elementId.resize(getNumElements());
1352
1353 // populate face element counts
1354 //left
1355 if (m_offset[0]==0)
1356 m_faceCount[0]=m_NE[1];
1357 else
1358 m_faceCount[0]=0;
1359 //right
1360 if (m_mpiInfo->rank%m_NX[0]==m_NX[0]-1)
1361 m_faceCount[1]=m_NE[1];
1362 else
1363 m_faceCount[1]=0;
1364 //bottom
1365 if (m_offset[1]==0)
1366 m_faceCount[2]=m_NE[0];
1367 else
1368 m_faceCount[2]=0;
1369 //top
1370 if (m_mpiInfo->rank/m_NX[0]==m_NX[1]-1)
1371 m_faceCount[3]=m_NE[0];
1372 else
1373 m_faceCount[3]=0;
1374
1375 m_faceId.resize(getNumFaceElements());
1376
1377 const index_t left = (m_offset[0]==0 ? 0 : 1);
1378 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1379 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1380 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1381
1382 #define globalNodeId(x,y) \
1383 ((m_offset[0]+x)/nDOF0)*nDOF0*nDOF1+(m_offset[0]+x)%nDOF0 \
1384 + ((m_offset[1]+y)/nDOF1)*nDOF0*nDOF1*m_NX[0]+((m_offset[1]+y)%nDOF1)*nDOF0
1385
1386 // set corner id's outside the parallel region
1387 m_nodeId[0] = globalNodeId(0, 0);
1388 m_nodeId[m_NN[0]-1] = globalNodeId(m_NN[0]-1, 0);
1389 m_nodeId[m_NN[0]*(m_NN[1]-1)] = globalNodeId(0, m_NN[1]-1);
1390 m_nodeId[m_NN[0]*m_NN[1]-1] = globalNodeId(m_NN[0]-1,m_NN[1]-1);
1391 #undef globalNodeId
1392
1393 #pragma omp parallel
1394 {
1395 // populate degrees of freedom and own nodes (identical id)
1396 #pragma omp for nowait
1397 for (dim_t i=0; i<nDOF1; i++) {
1398 for (dim_t j=0; j<nDOF0; j++) {
1399 const index_t nodeIdx=j+left+(i+bottom)*m_NN[0];
1400 const index_t dofIdx=j+i*nDOF0;
1401 m_dofId[dofIdx] = m_nodeId[nodeIdx]
1402 = m_nodeDistribution[m_mpiInfo->rank]+dofIdx;
1403 }
1404 }
1405
1406 // populate the rest of the nodes (shared with other ranks)
1407 if (m_faceCount[0]==0) { // left column
1408 #pragma omp for nowait
1409 for (dim_t i=0; i<nDOF1; i++) {
1410 const index_t nodeIdx=(i+bottom)*m_NN[0];
1411 const index_t dofId=(i+1)*nDOF0-1;
1412 m_nodeId[nodeIdx]
1413 = m_nodeDistribution[m_mpiInfo->rank-1]+dofId;
1414 }
1415 }
1416 if (m_faceCount[1]==0) { // right column
1417 #pragma omp for nowait
1418 for (dim_t i=0; i<nDOF1; i++) {
1419 const index_t nodeIdx=(i+bottom+1)*m_NN[0]-1;
1420 const index_t dofId=i*nDOF0;
1421 m_nodeId[nodeIdx]
1422 = m_nodeDistribution[m_mpiInfo->rank+1]+dofId;
1423 }
1424 }
1425 if (m_faceCount[2]==0) { // bottom row
1426 #pragma omp for nowait
1427 for (dim_t i=0; i<nDOF0; i++) {
1428 const index_t nodeIdx=i+left;
1429 const index_t dofId=nDOF0*(nDOF1-1)+i;
1430 m_nodeId[nodeIdx]
1431 = m_nodeDistribution[m_mpiInfo->rank-m_NX[0]]+dofId;
1432 }
1433 }
1434 if (m_faceCount[3]==0) { // top row
1435 #pragma omp for nowait
1436 for (dim_t i=0; i<nDOF0; i++) {
1437 const index_t nodeIdx=m_NN[0]*(m_NN[1]-1)+i+left;
1438 const index_t dofId=i;
1439 m_nodeId[nodeIdx]
1440 = m_nodeDistribution[m_mpiInfo->rank+m_NX[0]]+dofId;
1441 }
1442 }
1443
1444 // populate element id's
1445 #pragma omp for nowait
1446 for (dim_t i1=0; i1<m_NE[1]; i1++) {
1447 for (dim_t i0=0; i0<m_NE[0]; i0++) {
1448 m_elementId[i0+i1*m_NE[0]]=(m_offset[1]+i1)*m_gNE[0]+m_offset[0]+i0;
1449 }
1450 }
1451
1452 // face elements
1453 #pragma omp for
1454 for (dim_t k=0; k<getNumFaceElements(); k++)
1455 m_faceId[k]=k;
1456 } // end parallel section
1457
1458 m_nodeTags.assign(getNumNodes(), 0);
1459 updateTagsInUse(Nodes);
1460
1461 m_elementTags.assign(getNumElements(), 0);
1462 updateTagsInUse(Elements);
1463
1464 // generate face offset vector and set face tags
1465 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
1466 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
1467 m_faceOffset.assign(4, -1);
1468 m_faceTags.clear();
1469 index_t offset=0;
1470 for (size_t i=0; i<4; i++) {
1471 if (m_faceCount[i]>0) {
1472 m_faceOffset[i]=offset;
1473 offset+=m_faceCount[i];
1474 m_faceTags.insert(m_faceTags.end(), m_faceCount[i], faceTag[i]);
1475 }
1476 }
1477 setTagMap("left", LEFT);
1478 setTagMap("right", RIGHT);
1479 setTagMap("bottom", BOTTOM);
1480 setTagMap("top", TOP);
1481 updateTagsInUse(FaceElements);
1482 }
1483
1484 //private
1485 void Rectangle::createPattern()
1486 {
1487 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1488 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1489 const index_t left = (m_offset[0]==0 ? 0 : 1);
1490 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1491
1492 // populate node->DOF mapping with own degrees of freedom.
1493 // The rest is assigned in the loop further down
1494 m_dofMap.assign(getNumNodes(), 0);
1495 #pragma omp parallel for
1496 for (index_t i=bottom; i<bottom+nDOF1; i++) {
1497 for (index_t j=left; j<left+nDOF0; j++) {
1498 m_dofMap[i*m_NN[0]+j]=(i-bottom)*nDOF0+j-left;
1499 }
1500 }
1501
1502 // build list of shared components and neighbours by looping through
1503 // all potential neighbouring ranks and checking if positions are
1504 // within bounds
1505 const dim_t numDOF=nDOF0*nDOF1;
1506 vector<IndexVector> colIndices(numDOF); // for the couple blocks
1507 RankVector neighbour;
1508 IndexVector offsetInShared(1,0);
1509 IndexVector sendShared, recvShared;
1510 int numShared=0;
1511 const int x=m_mpiInfo->rank%m_NX[0];
1512 const int y=m_mpiInfo->rank/m_NX[0];
1513 for (int i1=-1; i1<2; i1++) {
1514 for (int i0=-1; i0<2; i0++) {
1515 // skip this rank
1516 if (i0==0 && i1==0)
1517 continue;
1518 // location of neighbour rank
1519 const int nx=x+i0;
1520 const int ny=y+i1;
1521 if (nx>=0 && ny>=0 && nx<m_NX[0] && ny<m_NX[1]) {
1522 neighbour.push_back(ny*m_NX[0]+nx);
1523 if (i0==0) {
1524 // sharing top or bottom edge
1525 const int firstDOF=(i1==-1 ? 0 : numDOF-nDOF0);
1526 const int firstNode=(i1==-1 ? left : m_NN[0]*(m_NN[1]-1)+left);
1527 offsetInShared.push_back(offsetInShared.back()+nDOF0);
1528 for (dim_t i=0; i<nDOF0; i++, numShared++) {
1529 sendShared.push_back(firstDOF+i);
1530 recvShared.push_back(numDOF+numShared);
1531 if (i>0)
1532 colIndices[firstDOF+i-1].push_back(numShared);
1533 colIndices[firstDOF+i].push_back(numShared);
1534 if (i<nDOF0-1)
1535 colIndices[firstDOF+i+1].push_back(numShared);
1536 m_dofMap[firstNode+i]=numDOF+numShared;
1537 }
1538 } else if (i1==0) {
1539 // sharing left or right edge
1540 const int firstDOF=(i0==-1 ? 0 : nDOF0-1);
1541 const int firstNode=(i0==-1 ? bottom*m_NN[0] : (bottom+1)*m_NN[0]-1);
1542 offsetInShared.push_back(offsetInShared.back()+nDOF1);
1543 for (dim_t i=0; i<nDOF1; i++, numShared++) {
1544 sendShared.push_back(firstDOF+i*nDOF0);
1545 recvShared.push_back(numDOF+numShared);
1546 if (i>0)
1547 colIndices[firstDOF+(i-1)*nDOF0].push_back(numShared);
1548 colIndices[firstDOF+i*nDOF0].push_back(numShared);
1549 if (i<nDOF1-1)
1550 colIndices[firstDOF+(i+1)*nDOF0].push_back(numShared);
1551 m_dofMap[firstNode+i*m_NN[0]]=numDOF+numShared;
1552 }
1553 } else {
1554 // sharing a node
1555 const int dof=(i0+1)/2*(nDOF0-1)+(i1+1)/2*(numDOF-nDOF0);
1556 const int node=(i0+1)/2*(m_NN[0]-1)+(i1+1)/2*m_NN[0]*(m_NN[1]-1);
1557 offsetInShared.push_back(offsetInShared.back()+1);
1558 sendShared.push_back(dof);
1559 recvShared.push_back(numDOF+numShared);
1560 colIndices[dof].push_back(numShared);
1561 m_dofMap[node]=numDOF+numShared;
1562 ++numShared;
1563 }
1564 }
1565 }
1566 }
1567
1568 // create connector
1569 Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
1570 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
1571 &offsetInShared[0], 1, 0, m_mpiInfo);
1572 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
1573 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
1574 &offsetInShared[0], 1, 0, m_mpiInfo);
1575 m_connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
1576 Paso_SharedComponents_free(snd_shcomp);
1577 Paso_SharedComponents_free(rcv_shcomp);
1578
1579 // create main and couple blocks
1580 Paso_Pattern *mainPattern = createMainPattern();
1581 Paso_Pattern *colPattern, *rowPattern;
1582 createCouplePatterns(colIndices, numShared, &colPattern, &rowPattern);
1583
1584 // allocate paso distribution
1585 Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
1586 const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
1587
1588 // finally create the system matrix
1589 m_pattern = Paso_SystemMatrixPattern_alloc(MATRIX_FORMAT_DEFAULT,
1590 distribution, distribution, mainPattern, colPattern, rowPattern,
1591 m_connector, m_connector);
1592
1593 Paso_Distribution_free(distribution);
1594
1595 // useful debug output
1596 /*
1597 cout << "--- rcv_shcomp ---" << endl;
1598 cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
1599 for (size_t i=0; i<neighbour.size(); i++) {
1600 cout << "neighbor[" << i << "]=" << neighbour[i]
1601 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
1602 }
1603 for (size_t i=0; i<recvShared.size(); i++) {
1604 cout << "shared[" << i << "]=" << recvShared[i] << endl;
1605 }
1606 cout << "--- snd_shcomp ---" << endl;
1607 for (size_t i=0; i<sendShared.size(); i++) {
1608 cout << "shared[" << i << "]=" << sendShared[i] << endl;
1609 }
1610 cout << "--- dofMap ---" << endl;
1611 for (size_t i=0; i<m_dofMap.size(); i++) {
1612 cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << endl;
1613 }
1614 cout << "--- colIndices ---" << endl;
1615 for (size_t i=0; i<colIndices.size(); i++) {
1616 cout << "colIndices[" << i << "].size()=" << colIndices[i].size() << endl;
1617 }
1618 */
1619
1620 /*
1621 cout << "--- main_pattern ---" << endl;
1622 cout << "M=" << mainPattern->numOutput << ", N=" << mainPattern->numInput << endl;
1623 for (size_t i=0; i<mainPattern->numOutput+1; i++) {
1624 cout << "ptr[" << i << "]=" << mainPattern->ptr[i] << endl;
1625 }
1626 for (size_t i=0; i<mainPattern->ptr[mainPattern->numOutput]; i++) {
1627 cout << "index[" << i << "]=" << mainPattern->index[i] << endl;
1628 }
1629 */
1630
1631 /*
1632 cout << "--- colCouple_pattern ---" << endl;
1633 cout << "M=" << colPattern->numOutput << ", N=" << colPattern->numInput << endl;
1634 for (size_t i=0; i<colPattern->numOutput+1; i++) {
1635 cout << "ptr[" << i << "]=" << colPattern->ptr[i] << endl;
1636 }
1637 for (size_t i=0; i<colPattern->ptr[colPattern->numOutput]; i++) {
1638 cout << "index[" << i << "]=" << colPattern->index[i] << endl;
1639 }
1640 */
1641
1642 /*
1643 cout << "--- rowCouple_pattern ---" << endl;
1644 cout << "M=" << rowPattern->numOutput << ", N=" << rowPattern->numInput << endl;
1645 for (size_t i=0; i<rowPattern->numOutput+1; i++) {
1646 cout << "ptr[" << i << "]=" << rowPattern->ptr[i] << endl;
1647 }
1648 for (size_t i=0; i<rowPattern->ptr[rowPattern->numOutput]; i++) {
1649 cout << "index[" << i << "]=" << rowPattern->index[i] << endl;
1650 }
1651 */
1652
1653 Paso_Pattern_free(mainPattern);
1654 Paso_Pattern_free(colPattern);
1655 Paso_Pattern_free(rowPattern);
1656 }
1657
1658 //private
1659 void Rectangle::addToMatrixAndRHS(Paso_SystemMatrix* S, escript::Data& F,
1660 const vector<double>& EM_S, const vector<double>& EM_F, bool addS,
1661 bool addF, index_t firstNode, dim_t nEq, dim_t nComp) const
1662 {
1663 IndexVector rowIndex;
1664 rowIndex.push_back(m_dofMap[firstNode]);
1665 rowIndex.push_back(m_dofMap[firstNode+1]);
1666 rowIndex.push_back(m_dofMap[firstNode+m_NN[0]]);
1667 rowIndex.push_back(m_dofMap[firstNode+m_NN[0]+1]);
1668 if (addF) {
1669 double *F_p=F.getSampleDataRW(0);
1670 for (index_t i=0; i<rowIndex.size(); i++) {
1671 if (rowIndex[i]<getNumDOF()) {
1672 for (index_t eq=0; eq<nEq; eq++) {
1673 F_p[INDEX2(eq, rowIndex[i], nEq)]+=EM_F[INDEX2(eq,i,nEq)];
1674 }
1675 }
1676 }
1677 }
1678 if (addS) {
1679 addToSystemMatrix(S, rowIndex, nEq, rowIndex, nComp, EM_S);
1680 }
1681 }
1682
1683 //protected
1684 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1685 escript::Data& in, bool reduced) const
1686 {
1687 const dim_t numComp = in.getDataPointSize();
1688 if (reduced) {
1689 out.requireWrite();
1690 const double c0 = 0.25;
1691 #pragma omp parallel
1692 {
1693 vector<double> f_00(numComp);
1694 vector<double> f_01(numComp);
1695 vector<double> f_10(numComp);
1696 vector<double> f_11(numComp);
1697 #pragma omp for
1698 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1699 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1700 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1701 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1702 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1703 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1704 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1705 for (index_t i=0; i < numComp; ++i) {
1706 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
1707 } /* end of component loop i */
1708 } /* end of k0 loop */
1709 } /* end of k1 loop */
1710 } /* end of parallel section */
1711 } else {
1712 out.requireWrite();
1713 const double c0 = 0.16666666666666666667;
1714 const double c1 = 0.044658198738520451079;
1715 const double c2 = 0.62200846792814621559;
1716 #pragma omp parallel
1717 {
1718 vector<double> f_00(numComp);
1719 vector<double> f_01(numComp);
1720 vector<double> f_10(numComp);
1721 vector<double> f_11(numComp);
1722 #pragma omp for
1723 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1724 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1725 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1726 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1727 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1728 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1729 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1730 for (index_t i=0; i < numComp; ++i) {
1731 o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_10[i]) + c1*f_11[i] + c2*f_00[i];
1732 o[INDEX2(i,numComp,1)] = c0*(f_00[i] + f_11[i]) + c1*f_01[i] + c2*f_10[i];
1733 o[INDEX2(i,numComp,2)] = c0*(f_00[i] + f_11[i]) + c1*f_10[i] + c2*f_01[i];
1734 o[INDEX2(i,numComp,3)] = c0*(f_01[i] + f_10[i]) + c1*f_00[i] + c2*f_11[i];
1735 } /* end of component loop i */
1736 } /* end of k0 loop */
1737 } /* end of k1 loop */
1738 } /* end of parallel section */
1739 }
1740 }
1741
1742 //protected
1743 void Rectangle::interpolateNodesOnFaces(escript::Data& out, escript::Data& in,
1744 bool reduced) const
1745 {
1746 const dim_t numComp = in.getDataPointSize();
1747 if (reduced) {
1748 out.requireWrite();
1749 #pragma omp parallel
1750 {
1751 vector<double> f_00(numComp);
1752 vector<double> f_01(numComp);
1753 vector<double> f_10(numComp);
1754 vector<double> f_11(numComp);
1755 if (m_faceOffset[0] > -1) {
1756 #pragma omp for nowait
1757 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1758 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1759 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1760 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1761 for (index_t i=0; i < numComp; ++i) {
1762 o[INDEX2(i,numComp,0)] = (f_00[i] + f_01[i])/2;
1763 } /* end of component loop i */
1764 } /* end of k1 loop */
1765 } /* end of face 0 */
1766 if (m_faceOffset[1] > -1) {
1767 #pragma omp for nowait
1768 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1769 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1770 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1771 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1772 for (index_t i=0; i < numComp; ++i) {
1773 o[INDEX2(i,numComp,0)] = (f_10[i] + f_11[i])/2;
1774 } /* end of component loop i */
1775 } /* end of k1 loop */
1776 } /* end of face 1 */
1777 if (m_faceOffset[2] > -1) {
1778 #pragma omp for nowait
1779 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1780 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1781 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1782 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1783 for (index_t i=0; i < numComp; ++i) {
1784 o[INDEX2(i,numComp,0)] = (f_00[i] + f_10[i])/2;
1785 } /* end of component loop i */
1786 } /* end of k0 loop */
1787 } /* end of face 2 */
1788 if (m_faceOffset[3] > -1) {
1789 #pragma omp for nowait
1790 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1791 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1792 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1793 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1794 for (index_t i=0; i < numComp; ++i) {
1795 o[INDEX2(i,numComp,0)] = (f_01[i] + f_11[i])/2;
1796 } /* end of component loop i */
1797 } /* end of k0 loop */
1798 } /* end of face 3 */
1799 } /* end of parallel section */
1800 } else {
1801 out.requireWrite();
1802 const double c0 = 0.21132486540518711775;
1803 const double c1 = 0.78867513459481288225;
1804 #pragma omp parallel
1805 {
1806 vector<double> f_00(numComp);
1807 vector<double> f_01(numComp);
1808 vector<double> f_10(numComp);
1809 vector<double> f_11(numComp);
1810 if (m_faceOffset[0] > -1) {
1811 #pragma omp for nowait
1812 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1813 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1814 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1815 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1816 for (index_t i=0; i < numComp; ++i) {
1817 o[INDEX2(i,numComp,0)] = c0*f_01[i] + c1*f_00[i];
1818 o[INDEX2(i,numComp,1)] = c0*f_00[i] + c1*f_01[i];
1819 } /* end of component loop i */
1820 } /* end of k1 loop */
1821 } /* end of face 0 */
1822 if (m_faceOffset[1] > -1) {
1823 #pragma omp for nowait
1824 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1825 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1826 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1827 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1828 for (index_t i=0; i < numComp; ++i) {
1829 o[INDEX2(i,numComp,0)] = c1*f_10[i] + c0*f_11[i];
1830 o[INDEX2(i,numComp,1)] = c1*f_11[i] + c0*f_10[i];
1831 } /* end of component loop i */
1832 } /* end of k1 loop */
1833 } /* end of face 1 */
1834 if (m_faceOffset[2] > -1) {
1835 #pragma omp for nowait
1836 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1837 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1838 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1839 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1840 for (index_t i=0; i < numComp; ++i) {
1841 o[INDEX2(i,numComp,0)] = c0*f_10[i] + c1*f_00[i];
1842 o[INDEX2(i,numComp,1)] = c0*f_00[i] + c1*f_10[i];
1843 } /* end of component loop i */
1844 } /* end of k0 loop */
1845 } /* end of face 2 */
1846 if (m_faceOffset[3] > -1) {
1847 #pragma omp for nowait
1848 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1849 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1850 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1851 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1852 for (index_t i=0; i < numComp; ++i) {
1853 o[INDEX2(i,numComp,0)] = c0*f_11[i] + c1*f_01[i];
1854 o[INDEX2(i,numComp,1)] = c0*f_01[i] + c1*f_11[i];
1855 } /* end of component loop i */
1856 } /* end of k0 loop */
1857 } /* end of face 3 */
1858 } /* end of parallel section */
1859 }
1860 }
1861
1862
1863
1864 namespace
1865 {
1866 // Calculates a guassian blur colvolution matrix for 2D
1867 double* get2DGauss(unsigned radius, double sigma)
1868 {
1869 double* arr=new double[(radius*2+1)*(radius*2+1)];
1870 double common=M_1_PI*0.5*1/(sigma*sigma);
1871 double total=0;
1872 int r=static_cast<int>(radius);
1873 for (int y=-r;y<=r;++y)
1874 {
1875 for (int x=-r;x<=r;++x)
1876 {
1877 arr[(x+r)+(y+r)*(r*2+1)]=common*exp(-(x*x+y*y)/(2*sigma*sigma));
1878 // cout << (x+y*(r*2+1)) << " " << arr[(x+r)+(y+r)*(r*2+1)] << endl;
1879 total+=arr[(x+r)+(y+r)*(r*2+1)];
1880 }
1881 }
1882 double invtotal=1/total;
1883 //cout << "Inv total is " << invtotal << endl;
1884 for (size_t p=0;p<(radius*2+1)*(radius*2+1);++p)
1885 {
1886 arr[p]*=invtotal;
1887 //cout << p << "->" << arr[p] << endl;
1888 }
1889 return arr;
1890 }
1891
1892 // applies conv to source to get a point.
1893 // (xp, yp) are the coords in the source matrix not the destination matrix
1894 double Convolve2D(double* conv, double* source, size_t xp, size_t yp, unsigned radius, size_t width)
1895 {
1896 size_t bx=xp-radius, by=yp-radius;
1897 size_t sbase=bx+by*width;
1898 double result=0;
1899 for (int y=0;y<2*radius+1;++y)
1900 {
1901 for (int x=0;x<2*radius+1;++x)
1902 {
1903 result+=conv[x+y*(2*radius+1)] * source[sbase + x+y*width];
1904 }
1905 }
1906 return result;
1907 }
1908 }
1909
1910
1911
1912 /* This routine produces a Data object filled with smoothed random data.
1913 The dimensions of the rectangle being filled are internal[0] x internal[1] points.
1914 A parameter radius dives the size of the stencil used for the smoothing.
1915 A point on the left hand edge for example, will still require `radius` extra points to the left
1916 in order to complete the stencil.
1917
1918 All local calculation is done on an array called `src`, with
1919 dimensions = ext[0] * ext[1].
1920 Where ext[i]= internal[i]+2*radius.
1921
1922 Now for MPI there is overlap to deal with. We need to share both the overlapping
1923 values themselves but also the external region.
1924
1925 In a hypothetical 1-D case:
1926
1927
1928 1234567
1929 would be split into two ranks thus:
1930 123(4) (4)567 [4 being a shared element]
1931
1932 If the radius is 2. There will be padding elements on the outside:
1933
1934 pp123(4) (4)567pp
1935
1936 To ensure that 4 can be correctly computed on both ranks, values from the other rank
1937 need to be known.
1938
1939 pp123(4)56 23(4)567pp
1940
1941 Now in our case, we wout set all the values 23456 on the left rank and send them to the
1942 right hand rank.
1943
1944 So the edges _may_ need to be shared at a distance `inset` from all boundaries.
1945 inset=2*radius+1
1946 This is to ensure that values at distance `radius` from the shared/overlapped element
1947 that ripley has.
1948
1949
1950 */
1951 escript::Data Rectangle::randomFill(long seed, const boost::python::tuple& filter) const
1952 {
1953 if (m_numDim!=2)
1954 {
1955 throw RipleyException("Only 2D supported at this time.");
1956 }
1957 if (len(filter)!=3) {
1958 throw RipleyException("Unsupported random filter");
1959 }
1960 boost::python::extract<string> ex(filter[0]);
1961 if (!ex.check() || (ex()!="gaussian"))
1962 {
1963 throw RipleyException("Unsupported random filter");
1964 }
1965 boost::python::extract<unsigned int> ex1(filter[1]);
1966 if (!ex1.check())
1967 {
1968 throw RipleyException("Radius of gaussian filter must be a positive integer.");
1969 }
1970 unsigned int radius=ex1();
1971 double sigma=0.5;
1972 boost::python::extract<double> ex2(filter[2]);
1973 if (!ex2.check() || (sigma=ex2())<=0)
1974 {
1975 throw RipleyException("Sigma must be a postive floating point number.");
1976 }
1977
1978 size_t internal[2];
1979 internal[0]=m_NE[0]+1; // number of points in the internal region
1980 internal[1]=m_NE[1]+1; // that is, the ones we need smoothed versions of
1981 size_t ext[2];
1982 ext[0]=internal[0]+2*radius; // includes points we need as input
1983 ext[1]=internal[1]+2*radius; // for smoothing
1984
1985 // now we check to see if the radius is acceptable
1986 // That is, would not cross multiple ranks in MPI
1987
1988 if ((2*radius>=internal[0]) || (2*radius>=internal[1]))
1989 {
1990 throw RipleyException("Radius of gaussian filter must be less than half the width/height of a rank");
1991 }
1992
1993
1994 double* src=new double[ext[0]*ext[1]];
1995 esysUtils::randomFillArray(seed, src, ext[0]*ext[1]);
1996
1997
1998 #ifdef ESYS_MPI
1999 size_t inset=2*radius+1;
2000 size_t Eheight=ext[1]-2*inset; // how high the E (shared) region is
2001 size_t Swidth=ext[0]-2*inset;
2002
2003 double* SWin=new double[inset*inset]; memset(SWin, 0, inset*inset*sizeof(double));
2004 double* SEin=new double[inset*inset]; memset(SEin, 0, inset*inset*sizeof(double));
2005 double* NWin=new double[inset*inset]; memset(NWin, 0, inset*inset*sizeof(double));
2006 double* Sin=new double[inset*Swidth]; memset(Sin, 0, inset*Swidth*sizeof(double));
2007 double* Win=new double[inset*Eheight]; memset(Win, 0, inset*Eheight*sizeof(double));
2008
2009 double* NEout=new double[inset*inset]; memset(NEout, 0, inset*inset*sizeof(double));
2010 unsigned int base=ext[0]-inset+(ext[1]-inset)*ext[0];
2011 for (unsigned int i=0;i<inset;++i)
2012 {
2013 memcpy(NEout+inset*i, src+base, inset*sizeof(double));
2014 base+=ext[0];
2015 }
2016 double* NWout=new double[inset*inset]; memset(NWout, 0, inset*inset*sizeof(double));
2017 base=(ext[1]-inset)*ext[0];
2018 for (unsigned int i=0;i<inset;++i)
2019 {
2020 memcpy(NWout+inset*i, src+base, inset*sizeof(double));
2021 base+=ext[0];
2022 }
2023
2024 double* SEout=new double[inset*inset]; memset(SEout, 0, inset*inset*sizeof(double));
2025 base=ext[0]-inset;
2026 for (int i=0;i<inset;++i)
2027 {
2028 memcpy(SEout+inset*i, src+base, inset*sizeof(double));
2029 base+=ext[0];
2030 }
2031 double* Nout=new double[inset*Swidth]; memset(Nout, 0, inset*Swidth*sizeof(double));
2032 base=inset+(ext[1]-inset)*ext[0];
2033 for (unsigned int i=0;i<inset;++i)
2034 {
2035 memcpy(Nout+Swidth*i, src+base, Swidth*sizeof(double));
2036 base+=ext[0];
2037 }
2038
2039 double* Eout=new double[inset*Eheight]; memset(Eout, 0, inset*Eheight*sizeof(double));
2040 base=ext[0]-inset+inset*ext[0];
2041 for (unsigned int i=0;i<Eheight;++i)
2042 {
2043 memcpy(Eout+i*inset, src+base, inset*sizeof(double));
2044 base+=ext[0];
2045 }
2046
2047 MPI_Request reqs[10];
2048 MPI_Status stats[10];
2049 short rused=0;
2050
2051 dim_t X=m_mpiInfo->rank%m_NX[0];
2052 dim_t Y=m_mpiInfo->rank/m_NX[0];
2053 dim_t row=m_NX[0];
2054 bool swused=false; // These vars will be true if data needs to be copied out of them
2055 bool seused=false;
2056 bool nwused=false;
2057 bool sused=false;
2058 bool wused=false;
2059
2060 // Tags:
2061 // 10 : EW transfer (middle)
2062 // 8 : NS transfer (middle)
2063 // 7 : NE corner -> to N, E and NE
2064 // 11 : NW corner to SW corner (only used on the left hand edge
2065 // 12 : SE corner to SW corner (only used on the bottom edge
2066
2067
2068
2069 int comserr=0;
2070 if (Y!=0) // not on bottom row,
2071 {
2072 if (X!=0) // not on the left hand edge
2073 {
2074 // recv bottomleft from SW
2075 comserr|=MPI_Irecv(SWin, inset*inset, MPI_DOUBLE, (X-1)+(Y-1)*row, 7, m_mpiInfo->comm, reqs+(rused++));
2076 swused=true;
2077 comserr|=MPI_Irecv(Win, Eheight*inset, MPI_DOUBLE, X-1+Y*row, 10, m_mpiInfo->comm, reqs+(rused++));
2078 wused=true;
2079 }
2080 else // on the left hand edge
2081 {
2082 comserr|=MPI_Irecv(SWin, inset*inset, MPI_DOUBLE, (Y-1)*row, 11, m_mpiInfo->comm, reqs+(rused++));
2083 swused=true;
2084 }
2085 comserr|=MPI_Irecv(Sin, Swidth*inset, MPI_DOUBLE, X+(Y-1)*row, 8, m_mpiInfo->comm, reqs+(rused++));
2086 sused=true;
2087 comserr|=MPI_Irecv(SEin, inset*inset, MPI_DOUBLE, X+(Y-1)*row, 7, m_mpiInfo->comm, reqs+(rused++));
2088 seused=true;
2089
2090
2091 }
2092 else // on the bottom row
2093 {
2094 if (X!=0)
2095 {
2096 comserr|=MPI_Irecv(Win, Eheight*inset, MPI_DOUBLE, X-1+Y*row, 10, m_mpiInfo->comm, reqs+(rused++));
2097 wused=true;
2098 // Need to use tag 12 here because SW is coming from the East not South East
2099 comserr|=MPI_Irecv(SWin, inset*inset, MPI_DOUBLE, X-1+Y*row, 12, m_mpiInfo->comm, reqs+(rused++));
2100 swused=true;
2101 }
2102 if (X!=(row-1))
2103 {
2104 comserr|=MPI_Isend(SEout, inset*inset, MPI_DOUBLE, X+1+(Y)*row, 12, m_mpiInfo->comm, reqs+(rused++));
2105 }
2106 }
2107
2108 if (Y!=(m_NX[1]-1)) // not on the top row
2109 {
2110 comserr|=MPI_Isend(Nout, inset*Swidth, MPI_DOUBLE, X+(Y+1)*row, 8, m_mpiInfo->comm, reqs+(rused++));
2111 comserr|=MPI_Isend(NEout, inset*inset, MPI_DOUBLE, X+(Y+1)*row, 7, m_mpiInfo->comm, reqs+(rused++));
2112 if (X!=(row-1)) // not on right hand edge
2113 {
2114 comserr|=MPI_Isend(NEout, inset*inset, MPI_DOUBLE, X+1+(Y+1)*row, 7, m_mpiInfo->comm, reqs+(rused++));
2115 }
2116 if (X==0) // left hand edge
2117 {
2118 comserr|=MPI_Isend(NWout, inset*inset, MPI_DOUBLE, (Y+1)*row,11, m_mpiInfo->comm, reqs+(rused++));
2119 }
2120 }
2121 if (X!=(row-1)) // not on right hand edge
2122 {
2123 comserr|=MPI_Isend(NEout, inset*inset, MPI_DOUBLE, X+1+(Y)*row, 7, m_mpiInfo->comm, reqs+(rused++));
2124 comserr|=MPI_Isend(Eout, Eheight*inset, MPI_DOUBLE, X+1+(Y)*row, 10, m_mpiInfo->comm, reqs+(rused++));
2125 }
2126 if (X!=0)
2127 {
2128 comserr|=MPI_Irecv(NWin, inset*inset, MPI_DOUBLE, (X-1)+Y*row, 7, m_mpiInfo->comm, reqs+(rused++));
2129 nwused=true;
2130
2131
2132 }
2133
2134 if (!comserr)
2135 {
2136 comserr=MPI_Waitall(rused, reqs, stats);
2137 }
2138
2139 if (comserr)
2140 {
2141 // Yes this is throwing an exception as a result of an MPI error.
2142 // and no we don't inform the other ranks that we are doing this.
2143 // however, we have no reason to believe coms work at this point anyway
2144 throw RipleyException("Error in coms for randomFill");
2145 }
2146
2147
2148 // Need to copy the values back from the buffers
2149 // Copy from SW
2150
2151 if (swused)
2152 {
2153 base=0;
2154 for (unsigned int i=0;i<inset;++i)
2155 {
2156 memcpy(src+base, SWin+i*inset, inset*sizeof(double));
2157 base+=ext[0];
2158 }
2159 }
2160 if (seused)
2161 {
2162 base=ext[0]-inset;
2163 for (unsigned int i=0;i<inset;++i)
2164 {
2165 memcpy(src+base, SEin+i*inset, inset*sizeof(double));
2166 base+=ext[0];
2167 }
2168 }
2169 if (nwused)
2170 {
2171 base=(ext[1]-inset)*ext[0];
2172 for (unsigned int i=0;i<inset;++i)
2173 {
2174 memcpy(src+base, NWin+i*inset, inset*sizeof(double));
2175 base+=ext[0];
2176 }
2177 }
2178 if (sused)
2179 {
2180 base=inset;
2181 for (unsigned int i=0;i<inset;++i)
2182 {
2183 memcpy(src+base, Sin+i*Swidth, Swidth*sizeof(double));
2184 base+=ext[0];
2185 }
2186 }
2187 if (wused)
2188 {
2189 base=inset*ext[0];
2190 for (unsigned int i=0;i<Eheight;++i)
2191 {
2192 memcpy(src+base, Win+i*inset, inset*sizeof(double));
2193 base+=ext[0];
2194 }
2195
2196 }
2197
2198 delete[] SWin;
2199 delete[] SEin;
2200 delete[] NWin;
2201 delete[] Sin;
2202 delete[] Win;
2203
2204 delete[] NEout;
2205 delete[] NWout;
2206 delete[] SEout;
2207 delete[] Nout;
2208 delete[] Eout;
2209 #endif
2210 escript::FunctionSpace fs(getPtr(), getContinuousFunctionCode());
2211 escript::Data resdat(0, escript::DataTypes::scalarShape, fs , true);
2212 // don't need to check for exwrite because we just made it
2213 escript::DataVector& dv=resdat.getExpandedVectorReference();
2214 double* convolution=get2DGauss(radius, sigma);
2215 for (size_t y=0;y<(internal[1]);++y)
2216 {
2217 for (size_t x=0;x<(internal[0]);++x)
2218 {
2219 dv[x+y*(internal[0])]=Convolve2D(convolution, src, x+radius, y+radius, radius, ext[0]);
2220
2221 }
2222 }
2223 delete[] convolution;
2224 delete[] src;
2225 return resdat;
2226 }
2227
2228 int Rectangle::findNode(double *coords) const {
2229 const int NOT_MINE = -1;
2230 //is the found element even owned by this rank
2231 for (int dim = 0; dim < m_numDim; dim++) {
2232 if (m_origin[dim] + m_offset[dim] > coords[dim] || m_origin[dim]
2233 + m_offset[dim] + m_dx[dim]*m_ownNE[dim] < coords[dim]) {
2234 return NOT_MINE;
2235 }
2236 }
2237 // get distance from origin
2238 double x = coords[0] - m_origin[0];
2239 double y = coords[1] - m_origin[1];
2240 // distance in elements
2241 int ex = (int) floor(x / m_dx[0]);
2242 int ey = (int) floor(y / m_dx[1]);
2243 // set the min distance high enough to be outside the element plus a bit
2244 int closest = NOT_MINE;
2245 double minDist = 1;
2246 for (int dim = 0; dim < m_numDim; dim++) {
2247 minDist += m_dx[dim]*m_dx[dim];
2248 }
2249 //find the closest node
2250 for (int dx = 0; dx < 2; dx++) {
2251 double xdist = (x - (ex + dx)*m_dx[0]);
2252 for (int dy = 0; dy < 2; dy++) {
2253 double ydist = (y - (ey + dy)*m_dx[1]);
2254 double total = xdist*xdist + ydist*ydist;
2255 if (total < minDist) {
2256 closest = INDEX2(ex+dx, ey+dy, m_NE[0] + 1);
2257 minDist = total;
2258 }
2259 }
2260 }
2261 return closest;
2262 }
2263
2264 void Rectangle::setAssembler(std::string type, std::map<std::string,
2265 escript::Data> constants) {
2266 if (type.compare("WaveAssembler") == 0) {
2267 delete assembler;
2268 assembler = new WaveAssembler2D(this, m_dx, m_NX, m_NE, m_NN, constants);
2269 } else { //else ifs would go before this for other types
2270 throw RipleyException("Ripley::Rectangle does not support the"
2271 " requested assembler");
2272 }
2273 }
2274
2275 } // end of namespace ripley
2276

  ViewVC Help
Powered by ViewVC 1.1.26