/[escript]/trunk/ripley/src/Rectangle.cpp
ViewVC logotype

Contents of /trunk/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4629 - (show annotations)
Fri Jan 24 03:29:25 2014 UTC (5 years, 9 months ago) by sshaw
File size: 88788 byte(s)
added dirac point interpolation to ripley (and fixed dirac points in Brick)
added fast wave assembler for Brick and minor correction to fast wave assembler for Rectangle

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15
16 #include <ripley/Rectangle.h>
17 #include <paso/SystemMatrix.h>
18 #include <esysUtils/esysFileWriter.h>
19 #include <ripley/DefaultAssembler2D.h>
20 #include <ripley/WaveAssembler2D.h>
21 #include <boost/scoped_array.hpp>
22 #include "esysUtils/EsysRandom.h"
23
24 #ifdef USE_NETCDF
25 #include <netcdfcpp.h>
26 #endif
27
28 #if USE_SILO
29 #include <silo.h>
30 #ifdef ESYS_MPI
31 #include <pmpio.h>
32 #endif
33 #endif
34
35 #include <iomanip>
36
37 using namespace std;
38 using esysUtils::FileWriter;
39
40 namespace ripley {
41
42 Rectangle::Rectangle(int n0, int n1, double x0, double y0, double x1,
43 double y1, int d0, int d1,
44 const std::vector<double>& points,
45 const std::vector<int>& tags,
46 const std::map<std::string, int>& tagnamestonums) :
47 RipleyDomain(2)
48 {
49 // ignore subdivision parameters for serial run
50 if (m_mpiInfo->size == 1) {
51 d0=1;
52 d1=1;
53 }
54
55 bool warn=false;
56 // if number of subdivisions is non-positive, try to subdivide by the same
57 // ratio as the number of elements
58 if (d0<=0 && d1<=0) {
59 warn=true;
60 d0=max(1, (int)(sqrt(m_mpiInfo->size*(n0+1)/(float)(n1+1))));
61 d1=m_mpiInfo->size/d0;
62 if (d0*d1 != m_mpiInfo->size) {
63 // ratios not the same so subdivide side with more elements only
64 if (n0>n1) {
65 d0=0;
66 d1=1;
67 } else {
68 d0=1;
69 d1=0;
70 }
71 }
72 }
73 if (d0<=0) {
74 // d1 is preset, determine d0 - throw further down if result is no good
75 d0=m_mpiInfo->size/d1;
76 } else if (d1<=0) {
77 // d0 is preset, determine d1 - throw further down if result is no good
78 d1=m_mpiInfo->size/d0;
79 }
80
81 // ensure number of subdivisions is valid and nodes can be distributed
82 // among number of ranks
83 if (d0*d1 != m_mpiInfo->size)
84 throw RipleyException("Invalid number of spatial subdivisions");
85
86 if (warn) {
87 cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
88 << d1 << "). This may not be optimal!" << endl;
89 }
90
91 double l0 = x1-x0;
92 double l1 = y1-y0;
93 m_dx[0] = l0/n0;
94 m_dx[1] = l1/n1;
95
96 if ((n0+1)%d0 > 0) {
97 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
98 l0=m_dx[0]*n0;
99 cout << "Warning: Adjusted number of elements and length. N0="
100 << n0 << ", l0=" << l0 << endl;
101 }
102 if ((n1+1)%d1 > 0) {
103 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
104 l1=m_dx[1]*n1;
105 cout << "Warning: Adjusted number of elements and length. N1="
106 << n1 << ", l1=" << l1 << endl;
107 }
108
109 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2))
110 throw RipleyException("Too few elements for the number of ranks");
111
112 m_gNE[0] = n0;
113 m_gNE[1] = n1;
114 m_origin[0] = x0;
115 m_origin[1] = y0;
116 m_length[0] = l0;
117 m_length[1] = l1;
118 m_NX[0] = d0;
119 m_NX[1] = d1;
120
121 // local number of elements (with and without overlap)
122 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
123 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
124 m_NE[0]++;
125 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
126 m_ownNE[0]--;
127
128 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
129 if (m_mpiInfo->rank/d0>0 && m_mpiInfo->rank/d0<d1-1)
130 m_NE[1]++;
131 else if (d1>1 && m_mpiInfo->rank/d0==d1-1)
132 m_ownNE[1]--;
133
134 // local number of nodes
135 m_NN[0] = m_NE[0]+1;
136 m_NN[1] = m_NE[1]+1;
137
138 // bottom-left node is at (offset0,offset1) in global mesh
139 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
140 if (m_offset[0] > 0)
141 m_offset[0]--;
142 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank/d0);
143 if (m_offset[1] > 0)
144 m_offset[1]--;
145
146 populateSampleIds();
147 createPattern();
148 assembler = new DefaultAssembler2D(this, m_dx, m_NX, m_NE, m_NN);
149 for (map<string, int>::const_iterator i = tagnamestonums.begin();
150 i != tagnamestonums.end(); i++) {
151 setTagMap(i->first, i->second);
152 }
153 addPoints(tags.size(), &points[0], &tags[0]);
154 }
155
156 Rectangle::~Rectangle()
157 {
158 Paso_SystemMatrixPattern_free(m_pattern);
159 Paso_Connector_free(m_connector);
160 delete assembler;
161 }
162
163 string Rectangle::getDescription() const
164 {
165 return "ripley::Rectangle";
166 }
167
168 bool Rectangle::operator==(const AbstractDomain& other) const
169 {
170 const Rectangle* o=dynamic_cast<const Rectangle*>(&other);
171 if (o) {
172 return (RipleyDomain::operator==(other) &&
173 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1]
174 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1]
175 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1]
176 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1]);
177 }
178
179 return false;
180 }
181
182 void Rectangle::readNcGrid(escript::Data& out, string filename, string varname,
183 const ReaderParameters& params) const
184 {
185 #ifdef USE_NETCDF
186 // check destination function space
187 int myN0, myN1;
188 if (out.getFunctionSpace().getTypeCode() == Nodes) {
189 myN0 = m_NN[0];
190 myN1 = m_NN[1];
191 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
192 out.getFunctionSpace().getTypeCode() == ReducedElements) {
193 myN0 = m_NE[0];
194 myN1 = m_NE[1];
195 } else
196 throw RipleyException("readNcGrid(): invalid function space for output data object");
197
198 if (params.first.size() != 2)
199 throw RipleyException("readNcGrid(): argument 'first' must have 2 entries");
200
201 if (params.numValues.size() != 2)
202 throw RipleyException("readNcGrid(): argument 'numValues' must have 2 entries");
203
204 if (params.multiplier.size() != 2)
205 throw RipleyException("readNcGrid(): argument 'multiplier' must have 2 entries");
206 for (size_t i=0; i<params.multiplier.size(); i++)
207 if (params.multiplier[i]<1)
208 throw RipleyException("readNcGrid(): all multipliers must be positive");
209 if (params.reverse.size() != 2)
210 throw RipleyException("readNcGrid(): argument 'reverse' must have 2 entries");
211
212 // check file existence and size
213 NcFile f(filename.c_str(), NcFile::ReadOnly);
214 if (!f.is_valid())
215 throw RipleyException("readNcGrid(): cannot open file");
216
217 NcVar* var = f.get_var(varname.c_str());
218 if (!var)
219 throw RipleyException("readNcGrid(): invalid variable");
220
221 // TODO: rank>0 data support
222 const int numComp = out.getDataPointSize();
223 if (numComp > 1)
224 throw RipleyException("readNcGrid(): only scalar data supported");
225
226 const int dims = var->num_dims();
227 boost::scoped_array<long> edges(var->edges());
228
229 // is this a slice of the data object (dims!=2)?
230 // note the expected ordering of edges (as in numpy: y,x)
231 if ( (dims==2 && (params.numValues[1] > edges[0] || params.numValues[0] > edges[1]))
232 || (dims==1 && params.numValues[1]>1) ) {
233 throw RipleyException("readNcGrid(): not enough data in file");
234 }
235
236 // check if this rank contributes anything
237 if (params.first[0] >= m_offset[0]+myN0 ||
238 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
239 params.first[1] >= m_offset[1]+myN1 ||
240 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1])
241 return;
242
243 // now determine how much this rank has to write
244
245 // first coordinates in data object to write to
246 const int first0 = max(0, params.first[0]-m_offset[0]);
247 const int first1 = max(0, params.first[1]-m_offset[1]);
248 // indices to first value in file (not accounting for reverse yet)
249 int idx0 = max(0, m_offset[0]-params.first[0]);
250 int idx1 = max(0, m_offset[1]-params.first[1]);
251 // number of values to read
252 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
253 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
254
255 // make sure we read the right block if going backwards through file
256 if (params.reverse[0])
257 idx0 = edges[dims-1]-num0-idx0;
258 if (dims>1 && params.reverse[1])
259 idx1 = edges[dims-2]-num1-idx1;
260
261 vector<double> values(num0*num1);
262 if (dims==2) {
263 var->set_cur(idx1, idx0);
264 var->get(&values[0], num1, num0);
265 } else {
266 var->set_cur(idx0);
267 var->get(&values[0], num0);
268 }
269
270 const int dpp = out.getNumDataPointsPerSample();
271 out.requireWrite();
272
273 // helpers for reversing
274 const int x0 = (params.reverse[0] ? num0-1 : 0);
275 const int x_mult = (params.reverse[0] ? -1 : 1);
276 const int y0 = (params.reverse[1] ? num1-1 : 0);
277 const int y_mult = (params.reverse[1] ? -1 : 1);
278
279 for (index_t y=0; y<num1; y++) {
280 #pragma omp parallel for
281 for (index_t x=0; x<num0; x++) {
282 const int baseIndex = first0+x*params.multiplier[0]
283 +(first1+y*params.multiplier[1])*myN0;
284 const int srcIndex = (y0+y_mult*y)*num0+(x0+x_mult*x);
285 if (!isnan(values[srcIndex])) {
286 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
287 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
288 const int dataIndex = baseIndex+m0+m1*myN0;
289 double* dest = out.getSampleDataRW(dataIndex);
290 for (index_t q=0; q<dpp; q++) {
291 *dest++ = values[srcIndex];
292 }
293 }
294 }
295 }
296 }
297 }
298 #else
299 throw RipleyException("readNcGrid(): not compiled with netCDF support");
300 #endif
301 }
302
303 void Rectangle::readBinaryGrid(escript::Data& out, string filename,
304 const ReaderParameters& params) const
305 {
306 // the mapping is not universally correct but should work on our
307 // supported platforms
308 switch (params.dataType) {
309 case DATATYPE_INT32:
310 readBinaryGridImpl<int>(out, filename, params);
311 break;
312 case DATATYPE_FLOAT32:
313 readBinaryGridImpl<float>(out, filename, params);
314 break;
315 case DATATYPE_FLOAT64:
316 readBinaryGridImpl<double>(out, filename, params);
317 break;
318 default:
319 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
320 }
321 }
322
323 template<typename ValueType>
324 void Rectangle::readBinaryGridImpl(escript::Data& out, const string& filename,
325 const ReaderParameters& params) const
326 {
327 // check destination function space
328 int myN0, myN1;
329 if (out.getFunctionSpace().getTypeCode() == Nodes) {
330 myN0 = m_NN[0];
331 myN1 = m_NN[1];
332 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
333 out.getFunctionSpace().getTypeCode() == ReducedElements) {
334 myN0 = m_NE[0];
335 myN1 = m_NE[1];
336 } else
337 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
338
339 // check file existence and size
340 ifstream f(filename.c_str(), ifstream::binary);
341 if (f.fail()) {
342 throw RipleyException("readBinaryGrid(): cannot open file");
343 }
344 f.seekg(0, ios::end);
345 const int numComp = out.getDataPointSize();
346 const int filesize = f.tellg();
347 const int reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
348 if (filesize < reqsize) {
349 f.close();
350 throw RipleyException("readBinaryGrid(): not enough data in file");
351 }
352
353 // check if this rank contributes anything
354 if (params.first[0] >= m_offset[0]+myN0 ||
355 params.first[0]+params.numValues[0] <= m_offset[0] ||
356 params.first[1] >= m_offset[1]+myN1 ||
357 params.first[1]+params.numValues[1] <= m_offset[1]) {
358 f.close();
359 return;
360 }
361
362 // now determine how much this rank has to write
363
364 // first coordinates in data object to write to
365 const int first0 = max(0, params.first[0]-m_offset[0]);
366 const int first1 = max(0, params.first[1]-m_offset[1]);
367 // indices to first value in file
368 const int idx0 = max(0, m_offset[0]-params.first[0]);
369 const int idx1 = max(0, m_offset[1]-params.first[1]);
370 // number of values to read
371 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
372 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
373
374 out.requireWrite();
375 vector<ValueType> values(num0*numComp);
376 const int dpp = out.getNumDataPointsPerSample();
377
378 for (int y=0; y<num1; y++) {
379 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]);
380 f.seekg(fileofs*sizeof(ValueType));
381 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
382 for (int x=0; x<num0; x++) {
383 const int baseIndex = first0+x*params.multiplier[0]
384 +(first1+y*params.multiplier[1])*myN0;
385 for (int m1=0; m1<params.multiplier[1]; m1++) {
386 for (int m0=0; m0<params.multiplier[0]; m0++) {
387 const int dataIndex = baseIndex+m0+m1*myN0;
388 double* dest = out.getSampleDataRW(dataIndex);
389 for (int c=0; c<numComp; c++) {
390 ValueType val = values[x*numComp+c];
391
392 if (params.byteOrder != BYTEORDER_NATIVE) {
393 char* cval = reinterpret_cast<char*>(&val);
394 // this will alter val!!
395 byte_swap32(cval);
396 }
397 if (!std::isnan(val)) {
398 for (int q=0; q<dpp; q++) {
399 *dest++ = static_cast<double>(val);
400 }
401 }
402 }
403 }
404 }
405 }
406 }
407
408 f.close();
409 }
410
411 void Rectangle::writeBinaryGrid(const escript::Data& in, string filename,
412 int byteOrder, int dataType) const
413 {
414 // the mapping is not universally correct but should work on our
415 // supported platforms
416 switch (dataType) {
417 case DATATYPE_INT32:
418 writeBinaryGridImpl<int>(in, filename, byteOrder);
419 break;
420 case DATATYPE_FLOAT32:
421 writeBinaryGridImpl<float>(in, filename, byteOrder);
422 break;
423 case DATATYPE_FLOAT64:
424 writeBinaryGridImpl<double>(in, filename, byteOrder);
425 break;
426 default:
427 throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
428 }
429 }
430
431 template<typename ValueType>
432 void Rectangle::writeBinaryGridImpl(const escript::Data& in,
433 const string& filename, int byteOrder) const
434 {
435 // check function space and determine number of points
436 int myN0, myN1;
437 int totalN0, totalN1;
438 if (in.getFunctionSpace().getTypeCode() == Nodes) {
439 myN0 = m_NN[0];
440 myN1 = m_NN[1];
441 totalN0 = m_gNE[0]+1;
442 totalN1 = m_gNE[1]+1;
443 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
444 in.getFunctionSpace().getTypeCode() == ReducedElements) {
445 myN0 = m_NE[0];
446 myN1 = m_NE[1];
447 totalN0 = m_gNE[0];
448 totalN1 = m_gNE[1];
449 } else
450 throw RipleyException("writeBinaryGrid(): invalid function space of data object");
451
452 const int numComp = in.getDataPointSize();
453 const int dpp = in.getNumDataPointsPerSample();
454
455 if (numComp > 1 || dpp > 1)
456 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
457
458 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1;
459
460 // from here on we know that each sample consists of one value
461 FileWriter fw;
462 fw.openFile(filename, fileSize);
463 MPIBarrier();
464
465 for (index_t y=0; y<myN1; y++) {
466 const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0)*sizeof(ValueType);
467 ostringstream oss;
468
469 for (index_t x=0; x<myN0; x++) {
470 const double* sample = in.getSampleDataRO(y*myN0+x);
471 ValueType fvalue = static_cast<ValueType>(*sample);
472 if (byteOrder == BYTEORDER_NATIVE) {
473 oss.write((char*)&fvalue, sizeof(fvalue));
474 } else {
475 char* value = reinterpret_cast<char*>(&fvalue);
476 oss.write(byte_swap32(value), sizeof(fvalue));
477 }
478 }
479 fw.writeAt(oss, fileofs);
480 }
481 fw.close();
482 }
483
484 void Rectangle::dump(const string& fileName) const
485 {
486 #if USE_SILO
487 string fn(fileName);
488 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
489 fn+=".silo";
490 }
491
492 int driver=DB_HDF5;
493 DBfile* dbfile = NULL;
494 const char* blockDirFmt = "/block%04d";
495
496 #ifdef ESYS_MPI
497 PMPIO_baton_t* baton = NULL;
498 const int NUM_SILO_FILES = 1;
499 #endif
500
501 if (m_mpiInfo->size > 1) {
502 #ifdef ESYS_MPI
503 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
504 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
505 PMPIO_DefaultClose, (void*)&driver);
506 // try the fallback driver in case of error
507 if (!baton && driver != DB_PDB) {
508 driver = DB_PDB;
509 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
510 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
511 PMPIO_DefaultClose, (void*)&driver);
512 }
513 if (baton) {
514 char siloPath[64];
515 snprintf(siloPath, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
516 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath);
517 }
518 #endif
519 } else {
520 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
521 getDescription().c_str(), driver);
522 // try the fallback driver in case of error
523 if (!dbfile && driver != DB_PDB) {
524 driver = DB_PDB;
525 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
526 getDescription().c_str(), driver);
527 }
528 char siloPath[64];
529 snprintf(siloPath, 64, blockDirFmt, 0);
530 DBMkDir(dbfile, siloPath);
531 DBSetDir(dbfile, siloPath);
532 }
533
534 if (!dbfile)
535 throw RipleyException("dump: Could not create Silo file");
536
537 /*
538 if (driver==DB_HDF5) {
539 // gzip level 1 already provides good compression with minimal
540 // performance penalty. Some tests showed that gzip levels >3 performed
541 // rather badly on escript data both in terms of time and space
542 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
543 }
544 */
545
546 boost::scoped_ptr<double> x(new double[m_NN[0]]);
547 boost::scoped_ptr<double> y(new double[m_NN[1]]);
548 double* coords[2] = { x.get(), y.get() };
549 #pragma omp parallel
550 {
551 #pragma omp for nowait
552 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
553 coords[0][i0]=getLocalCoordinate(i0, 0);
554 }
555 #pragma omp for nowait
556 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
557 coords[1][i1]=getLocalCoordinate(i1, 1);
558 }
559 }
560 int* dims = const_cast<int*>(getNumNodesPerDim());
561
562 // write mesh
563 DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 2, DB_DOUBLE,
564 DB_COLLINEAR, NULL);
565
566 // write node ids
567 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 2,
568 NULL, 0, DB_INT, DB_NODECENT, NULL);
569
570 // write element ids
571 dims = const_cast<int*>(getNumElementsPerDim());
572 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
573 dims, 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
574
575 // rank 0 writes multimesh and multivar
576 if (m_mpiInfo->rank == 0) {
577 vector<string> tempstrings;
578 vector<char*> names;
579 for (dim_t i=0; i<m_mpiInfo->size; i++) {
580 stringstream path;
581 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
582 tempstrings.push_back(path.str());
583 names.push_back((char*)tempstrings.back().c_str());
584 }
585 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
586 DBSetDir(dbfile, "/");
587 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
588 &types[0], NULL);
589 tempstrings.clear();
590 names.clear();
591 for (dim_t i=0; i<m_mpiInfo->size; i++) {
592 stringstream path;
593 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
594 tempstrings.push_back(path.str());
595 names.push_back((char*)tempstrings.back().c_str());
596 }
597 types.assign(m_mpiInfo->size, DB_QUADVAR);
598 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
599 &types[0], NULL);
600 tempstrings.clear();
601 names.clear();
602 for (dim_t i=0; i<m_mpiInfo->size; i++) {
603 stringstream path;
604 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
605 tempstrings.push_back(path.str());
606 names.push_back((char*)tempstrings.back().c_str());
607 }
608 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
609 &types[0], NULL);
610 }
611
612 if (m_mpiInfo->size > 1) {
613 #ifdef ESYS_MPI
614 PMPIO_HandOffBaton(baton, dbfile);
615 PMPIO_Finish(baton);
616 #endif
617 } else {
618 DBClose(dbfile);
619 }
620
621 #else // USE_SILO
622 throw RipleyException("dump: no Silo support");
623 #endif
624 }
625
626 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
627 {
628 switch (fsType) {
629 case Nodes:
630 case ReducedNodes: // FIXME: reduced
631 return &m_nodeId[0];
632 case DegreesOfFreedom:
633 case ReducedDegreesOfFreedom: // FIXME: reduced
634 return &m_dofId[0];
635 case Elements:
636 case ReducedElements:
637 return &m_elementId[0];
638 case FaceElements:
639 case ReducedFaceElements:
640 return &m_faceId[0];
641 case Points:
642 return &m_diracPointNodeIDs[0];
643 default:
644 break;
645 }
646
647 stringstream msg;
648 msg << "borrowSampleReferenceIDs: invalid function space type " << fsType;
649 throw RipleyException(msg.str());
650 }
651
652 bool Rectangle::ownSample(int fsType, index_t id) const
653 {
654 if (getMPISize()==1)
655 return true;
656
657 switch (fsType) {
658 case Nodes:
659 case ReducedNodes: // FIXME: reduced
660 return (m_dofMap[id] < getNumDOF());
661 case DegreesOfFreedom:
662 case ReducedDegreesOfFreedom:
663 return true;
664 case Elements:
665 case ReducedElements:
666 // check ownership of element's bottom left node
667 return (m_dofMap[id%m_NE[0]+m_NN[0]*(id/m_NE[0])] < getNumDOF());
668 case FaceElements:
669 case ReducedFaceElements:
670 {
671 // determine which face the sample belongs to before
672 // checking ownership of corresponding element's first node
673 dim_t n=0;
674 for (size_t i=0; i<4; i++) {
675 n+=m_faceCount[i];
676 if (id<n) {
677 index_t k;
678 if (i==1)
679 k=m_NN[0]-2;
680 else if (i==3)
681 k=m_NN[0]*(m_NN[1]-2);
682 else
683 k=0;
684 // determine whether to move right or up
685 const index_t delta=(i/2==0 ? m_NN[0] : 1);
686 return (m_dofMap[k+(id-n+m_faceCount[i])*delta] < getNumDOF());
687 }
688 }
689 return false;
690 }
691 default:
692 break;
693 }
694
695 stringstream msg;
696 msg << "ownSample: invalid function space type " << fsType;
697 throw RipleyException(msg.str());
698 }
699
700 void Rectangle::setToNormal(escript::Data& out) const
701 {
702 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
703 out.requireWrite();
704 #pragma omp parallel
705 {
706 if (m_faceOffset[0] > -1) {
707 #pragma omp for nowait
708 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
709 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
710 // set vector at two quadrature points
711 *o++ = -1.;
712 *o++ = 0.;
713 *o++ = -1.;
714 *o = 0.;
715 }
716 }
717
718 if (m_faceOffset[1] > -1) {
719 #pragma omp for nowait
720 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
721 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
722 // set vector at two quadrature points
723 *o++ = 1.;
724 *o++ = 0.;
725 *o++ = 1.;
726 *o = 0.;
727 }
728 }
729
730 if (m_faceOffset[2] > -1) {
731 #pragma omp for nowait
732 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
733 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
734 // set vector at two quadrature points
735 *o++ = 0.;
736 *o++ = -1.;
737 *o++ = 0.;
738 *o = -1.;
739 }
740 }
741
742 if (m_faceOffset[3] > -1) {
743 #pragma omp for nowait
744 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
745 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
746 // set vector at two quadrature points
747 *o++ = 0.;
748 *o++ = 1.;
749 *o++ = 0.;
750 *o = 1.;
751 }
752 }
753 } // end of parallel section
754 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
755 out.requireWrite();
756 #pragma omp parallel
757 {
758 if (m_faceOffset[0] > -1) {
759 #pragma omp for nowait
760 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
761 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
762 *o++ = -1.;
763 *o = 0.;
764 }
765 }
766
767 if (m_faceOffset[1] > -1) {
768 #pragma omp for nowait
769 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
770 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
771 *o++ = 1.;
772 *o = 0.;
773 }
774 }
775
776 if (m_faceOffset[2] > -1) {
777 #pragma omp for nowait
778 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
779 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
780 *o++ = 0.;
781 *o = -1.;
782 }
783 }
784
785 if (m_faceOffset[3] > -1) {
786 #pragma omp for nowait
787 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
788 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
789 *o++ = 0.;
790 *o = 1.;
791 }
792 }
793 } // end of parallel section
794
795 } else {
796 stringstream msg;
797 msg << "setToNormal: invalid function space type "
798 << out.getFunctionSpace().getTypeCode();
799 throw RipleyException(msg.str());
800 }
801 }
802
803 void Rectangle::setToSize(escript::Data& out) const
804 {
805 if (out.getFunctionSpace().getTypeCode() == Elements
806 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
807 out.requireWrite();
808 const dim_t numQuad=out.getNumDataPointsPerSample();
809 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]);
810 #pragma omp parallel for
811 for (index_t k = 0; k < getNumElements(); ++k) {
812 double* o = out.getSampleDataRW(k);
813 fill(o, o+numQuad, size);
814 }
815 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
816 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
817 out.requireWrite();
818 const dim_t numQuad=out.getNumDataPointsPerSample();
819 #pragma omp parallel
820 {
821 if (m_faceOffset[0] > -1) {
822 #pragma omp for nowait
823 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
824 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
825 fill(o, o+numQuad, m_dx[1]);
826 }
827 }
828
829 if (m_faceOffset[1] > -1) {
830 #pragma omp for nowait
831 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
832 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
833 fill(o, o+numQuad, m_dx[1]);
834 }
835 }
836
837 if (m_faceOffset[2] > -1) {
838 #pragma omp for nowait
839 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
840 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
841 fill(o, o+numQuad, m_dx[0]);
842 }
843 }
844
845 if (m_faceOffset[3] > -1) {
846 #pragma omp for nowait
847 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
848 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
849 fill(o, o+numQuad, m_dx[0]);
850 }
851 }
852 } // end of parallel section
853
854 } else {
855 stringstream msg;
856 msg << "setToSize: invalid function space type "
857 << out.getFunctionSpace().getTypeCode();
858 throw RipleyException(msg.str());
859 }
860 }
861
862 void Rectangle::Print_Mesh_Info(const bool full) const
863 {
864 RipleyDomain::Print_Mesh_Info(full);
865 if (full) {
866 cout << " Id Coordinates" << endl;
867 cout.precision(15);
868 cout.setf(ios::scientific, ios::floatfield);
869 for (index_t i=0; i < getNumNodes(); i++) {
870 cout << " " << setw(5) << m_nodeId[i]
871 << " " << getLocalCoordinate(i%m_NN[0], 0)
872 << " " << getLocalCoordinate(i/m_NN[0], 1) << endl;
873 }
874 }
875 }
876
877
878 //protected
879 void Rectangle::assembleCoordinates(escript::Data& arg) const
880 {
881 escriptDataC x = arg.getDataC();
882 int numDim = m_numDim;
883 if (!isDataPointShapeEqual(&x, 1, &numDim))
884 throw RipleyException("setToX: Invalid Data object shape");
885 if (!numSamplesEqual(&x, 1, getNumNodes()))
886 throw RipleyException("setToX: Illegal number of samples in Data object");
887
888 arg.requireWrite();
889 #pragma omp parallel for
890 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
891 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
892 double* point = arg.getSampleDataRW(i0+m_NN[0]*i1);
893 point[0] = getLocalCoordinate(i0, 0);
894 point[1] = getLocalCoordinate(i1, 1);
895 }
896 }
897 }
898
899 //protected
900 void Rectangle::assembleGradient(escript::Data& out, const escript::Data& in) const
901 {
902 const dim_t numComp = in.getDataPointSize();
903 const double cx0 = .21132486540518711775/m_dx[0];
904 const double cx1 = .78867513459481288225/m_dx[0];
905 const double cx2 = 1./m_dx[0];
906 const double cy0 = .21132486540518711775/m_dx[1];
907 const double cy1 = .78867513459481288225/m_dx[1];
908 const double cy2 = 1./m_dx[1];
909
910 if (out.getFunctionSpace().getTypeCode() == Elements) {
911 out.requireWrite();
912 #pragma omp parallel
913 {
914 vector<double> f_00(numComp);
915 vector<double> f_01(numComp);
916 vector<double> f_10(numComp);
917 vector<double> f_11(numComp);
918 #pragma omp for
919 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
920 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
921 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
922 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
923 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
924 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
925 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
926 for (index_t i=0; i < numComp; ++i) {
927 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
928 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
929 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
930 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
931 o[INDEX3(i,0,2,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
932 o[INDEX3(i,1,2,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
933 o[INDEX3(i,0,3,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
934 o[INDEX3(i,1,3,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
935 } // end of component loop i
936 } // end of k0 loop
937 } // end of k1 loop
938 } // end of parallel section
939 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
940 out.requireWrite();
941 #pragma omp parallel
942 {
943 vector<double> f_00(numComp);
944 vector<double> f_01(numComp);
945 vector<double> f_10(numComp);
946 vector<double> f_11(numComp);
947 #pragma omp for
948 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
949 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
950 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
951 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
952 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
953 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
954 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
955 for (index_t i=0; i < numComp; ++i) {
956 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
957 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
958 } // end of component loop i
959 } // end of k0 loop
960 } // end of k1 loop
961 } // end of parallel section
962 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
963 out.requireWrite();
964 #pragma omp parallel
965 {
966 vector<double> f_00(numComp);
967 vector<double> f_01(numComp);
968 vector<double> f_10(numComp);
969 vector<double> f_11(numComp);
970 if (m_faceOffset[0] > -1) {
971 #pragma omp for nowait
972 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
973 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
974 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
975 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double));
976 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double));
977 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
978 for (index_t i=0; i < numComp; ++i) {
979 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
980 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2;
981 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
982 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy2;
983 } // end of component loop i
984 } // end of k1 loop
985 } // end of face 0
986 if (m_faceOffset[1] > -1) {
987 #pragma omp for nowait
988 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
989 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double));
990 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double));
991 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
992 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
993 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
994 for (index_t i=0; i < numComp; ++i) {
995 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
996 o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2;
997 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
998 o[INDEX3(i,1,1,numComp,2)] = (f_11[i]-f_10[i])*cy2;
999 } // end of component loop i
1000 } // end of k1 loop
1001 } // end of face 1
1002 if (m_faceOffset[2] > -1) {
1003 #pragma omp for nowait
1004 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1005 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1006 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double));
1007 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1008 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double));
1009 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1010 for (index_t i=0; i < numComp; ++i) {
1011 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1012 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1013 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1014 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1015 } // end of component loop i
1016 } // end of k0 loop
1017 } // end of face 2
1018 if (m_faceOffset[3] > -1) {
1019 #pragma omp for nowait
1020 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1021 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1022 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1023 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1024 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1025 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1026 for (index_t i=0; i < numComp; ++i) {
1027 o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1028 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1029 o[INDEX3(i,0,1,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1030 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1031 } // end of component loop i
1032 } // end of k0 loop
1033 } // end of face 3
1034 } // end of parallel section
1035
1036 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1037 out.requireWrite();
1038 #pragma omp parallel
1039 {
1040 vector<double> f_00(numComp);
1041 vector<double> f_01(numComp);
1042 vector<double> f_10(numComp);
1043 vector<double> f_11(numComp);
1044 if (m_faceOffset[0] > -1) {
1045 #pragma omp for nowait
1046 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1047 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1048 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1049 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double));
1050 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double));
1051 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1052 for (index_t i=0; i < numComp; ++i) {
1053 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1054 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2;
1055 } // end of component loop i
1056 } // end of k1 loop
1057 } // end of face 0
1058 if (m_faceOffset[1] > -1) {
1059 #pragma omp for nowait
1060 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1061 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double));
1062 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double));
1063 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1064 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1065 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1066 for (index_t i=0; i < numComp; ++i) {
1067 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1068 o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1069 } // end of component loop i
1070 } // end of k1 loop
1071 } // end of face 1
1072 if (m_faceOffset[2] > -1) {
1073 #pragma omp for nowait
1074 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1075 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1076 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double));
1077 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1078 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double));
1079 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1080 for (index_t i=0; i < numComp; ++i) {
1081 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1082 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1083 } // end of component loop i
1084 } // end of k0 loop
1085 } // end of face 2
1086 if (m_faceOffset[3] > -1) {
1087 #pragma omp for nowait
1088 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1089 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1090 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1091 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1092 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1093 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1094 for (index_t i=0; i < numComp; ++i) {
1095 o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1096 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1097 } // end of component loop i
1098 } // end of k0 loop
1099 } // end of face 3
1100 } // end of parallel section
1101 }
1102 }
1103
1104 //protected
1105 void Rectangle::assembleIntegrate(vector<double>& integrals,
1106 const escript::Data& arg) const
1107 {
1108 const dim_t numComp = arg.getDataPointSize();
1109 const index_t left = (m_offset[0]==0 ? 0 : 1);
1110 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1111 const int fs=arg.getFunctionSpace().getTypeCode();
1112 if (fs == Elements && arg.actsExpanded()) {
1113 #pragma omp parallel
1114 {
1115 vector<double> int_local(numComp, 0);
1116 const double w = m_dx[0]*m_dx[1]/4.;
1117 #pragma omp for nowait
1118 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1119 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1120 const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0]));
1121 for (index_t i=0; i < numComp; ++i) {
1122 const double f0 = f[INDEX2(i,0,numComp)];
1123 const double f1 = f[INDEX2(i,1,numComp)];
1124 const double f2 = f[INDEX2(i,2,numComp)];
1125 const double f3 = f[INDEX2(i,3,numComp)];
1126 int_local[i]+=(f0+f1+f2+f3)*w;
1127 } // end of component loop i
1128 } // end of k0 loop
1129 } // end of k1 loop
1130 #pragma omp critical
1131 for (index_t i=0; i<numComp; i++)
1132 integrals[i]+=int_local[i];
1133 } // end of parallel section
1134
1135 } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) {
1136 const double w = m_dx[0]*m_dx[1];
1137 #pragma omp parallel
1138 {
1139 vector<double> int_local(numComp, 0);
1140 #pragma omp for nowait
1141 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1142 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1143 const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0]));
1144 for (index_t i=0; i < numComp; ++i) {
1145 int_local[i]+=f[i]*w;
1146 }
1147 }
1148 }
1149 #pragma omp critical
1150 for (index_t i=0; i<numComp; i++)
1151 integrals[i]+=int_local[i];
1152 } // end of parallel section
1153
1154 } else if (fs == FaceElements && arg.actsExpanded()) {
1155 #pragma omp parallel
1156 {
1157 vector<double> int_local(numComp, 0);
1158 const double w0 = m_dx[0]/2.;
1159 const double w1 = m_dx[1]/2.;
1160 if (m_faceOffset[0] > -1) {
1161 #pragma omp for nowait
1162 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1163 const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1);
1164 for (index_t i=0; i < numComp; ++i) {
1165 const double f0 = f[INDEX2(i,0,numComp)];
1166 const double f1 = f[INDEX2(i,1,numComp)];
1167 int_local[i]+=(f0+f1)*w1;
1168 } // end of component loop i
1169 } // end of k1 loop
1170 }
1171
1172 if (m_faceOffset[1] > -1) {
1173 #pragma omp for nowait
1174 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1175 const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1);
1176 for (index_t i=0; i < numComp; ++i) {
1177 const double f0 = f[INDEX2(i,0,numComp)];
1178 const double f1 = f[INDEX2(i,1,numComp)];
1179 int_local[i]+=(f0+f1)*w1;
1180 } // end of component loop i
1181 } // end of k1 loop
1182 }
1183
1184 if (m_faceOffset[2] > -1) {
1185 #pragma omp for nowait
1186 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1187 const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0);
1188 for (index_t i=0; i < numComp; ++i) {
1189 const double f0 = f[INDEX2(i,0,numComp)];
1190 const double f1 = f[INDEX2(i,1,numComp)];
1191 int_local[i]+=(f0+f1)*w0;
1192 } // end of component loop i
1193 } // end of k0 loop
1194 }
1195
1196 if (m_faceOffset[3] > -1) {
1197 #pragma omp for nowait
1198 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1199 const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0);
1200 for (index_t i=0; i < numComp; ++i) {
1201 const double f0 = f[INDEX2(i,0,numComp)];
1202 const double f1 = f[INDEX2(i,1,numComp)];
1203 int_local[i]+=(f0+f1)*w0;
1204 } // end of component loop i
1205 } // end of k0 loop
1206 }
1207 #pragma omp critical
1208 for (index_t i=0; i<numComp; i++)
1209 integrals[i]+=int_local[i];
1210 } // end of parallel section
1211
1212 } else if (fs==ReducedFaceElements || (fs==FaceElements && !arg.actsExpanded())) {
1213 #pragma omp parallel
1214 {
1215 vector<double> int_local(numComp, 0);
1216 if (m_faceOffset[0] > -1) {
1217 #pragma omp for nowait
1218 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1219 const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1);
1220 for (index_t i=0; i < numComp; ++i) {
1221 int_local[i]+=f[i]*m_dx[1];
1222 }
1223 }
1224 }
1225
1226 if (m_faceOffset[1] > -1) {
1227 #pragma omp for nowait
1228 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1229 const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1);
1230 for (index_t i=0; i < numComp; ++i) {
1231 int_local[i]+=f[i]*m_dx[1];
1232 }
1233 }
1234 }
1235
1236 if (m_faceOffset[2] > -1) {
1237 #pragma omp for nowait
1238 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1239 const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0);
1240 for (index_t i=0; i < numComp; ++i) {
1241 int_local[i]+=f[i]*m_dx[0];
1242 }
1243 }
1244 }
1245
1246 if (m_faceOffset[3] > -1) {
1247 #pragma omp for nowait
1248 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1249 const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0);
1250 for (index_t i=0; i < numComp; ++i) {
1251 int_local[i]+=f[i]*m_dx[0];
1252 }
1253 }
1254 }
1255
1256 #pragma omp critical
1257 for (index_t i=0; i<numComp; i++)
1258 integrals[i]+=int_local[i];
1259 } // end of parallel section
1260 } // function space selector
1261 }
1262
1263 //protected
1264 dim_t Rectangle::insertNeighbourNodes(IndexVector& index, index_t node) const
1265 {
1266 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1267 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1268 const int x=node%nDOF0;
1269 const int y=node/nDOF0;
1270 dim_t num=0;
1271 // loop through potential neighbours and add to index if positions are
1272 // within bounds
1273 for (int i1=-1; i1<2; i1++) {
1274 for (int i0=-1; i0<2; i0++) {
1275 // skip node itself
1276 if (i0==0 && i1==0)
1277 continue;
1278 // location of neighbour node
1279 const int nx=x+i0;
1280 const int ny=y+i1;
1281 if (nx>=0 && ny>=0 && nx<nDOF0 && ny<nDOF1) {
1282 index.push_back(ny*nDOF0+nx);
1283 num++;
1284 }
1285 }
1286 }
1287
1288 return num;
1289 }
1290
1291 //protected
1292 void Rectangle::nodesToDOF(escript::Data& out, const escript::Data& in) const
1293 {
1294 const dim_t numComp = in.getDataPointSize();
1295 out.requireWrite();
1296
1297 const index_t left = (m_offset[0]==0 ? 0 : 1);
1298 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1299 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1300 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1301 #pragma omp parallel for
1302 for (index_t i=0; i<nDOF1; i++) {
1303 for (index_t j=0; j<nDOF0; j++) {
1304 const index_t n=j+left+(i+bottom)*m_NN[0];
1305 const double* src=in.getSampleDataRO(n);
1306 copy(src, src+numComp, out.getSampleDataRW(j+i*nDOF0));
1307 }
1308 }
1309 }
1310
1311 //protected
1312 void Rectangle::dofToNodes(escript::Data& out, const escript::Data& in) const
1313 {
1314 const dim_t numComp = in.getDataPointSize();
1315 Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp);
1316 Paso_Coupler_startCollect(coupler, in.getSampleDataRO(0));
1317
1318 const dim_t numDOF = getNumDOF();
1319 out.requireWrite();
1320 const double* buffer = Paso_Coupler_finishCollect(coupler);
1321
1322 #pragma omp parallel for
1323 for (index_t i=0; i<getNumNodes(); i++) {
1324 const double* src=(m_dofMap[i]<numDOF ?
1325 in.getSampleDataRO(m_dofMap[i])
1326 : &buffer[(m_dofMap[i]-numDOF)*numComp]);
1327 copy(src, src+numComp, out.getSampleDataRW(i));
1328 }
1329 Paso_Coupler_free(coupler);
1330 }
1331
1332 //private
1333 void Rectangle::populateSampleIds()
1334 {
1335 // degrees of freedom are numbered from left to right, bottom to top in
1336 // each rank, continuing on the next rank (ranks also go left-right,
1337 // bottom-top).
1338 // This means rank 0 has id 0...n0-1, rank 1 has id n0...n1-1 etc. which
1339 // helps when writing out data rank after rank.
1340
1341 // build node distribution vector first.
1342 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes which is
1343 // constant for all ranks in this implementation
1344 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1345 const dim_t numDOF=getNumDOF();
1346 for (dim_t k=1; k<m_mpiInfo->size; k++) {
1347 m_nodeDistribution[k]=k*numDOF;
1348 }
1349 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1350 m_nodeId.resize(getNumNodes());
1351 m_dofId.resize(numDOF);
1352 m_elementId.resize(getNumElements());
1353
1354 // populate face element counts
1355 //left
1356 if (m_offset[0]==0)
1357 m_faceCount[0]=m_NE[1];
1358 else
1359 m_faceCount[0]=0;
1360 //right
1361 if (m_mpiInfo->rank%m_NX[0]==m_NX[0]-1)
1362 m_faceCount[1]=m_NE[1];
1363 else
1364 m_faceCount[1]=0;
1365 //bottom
1366 if (m_offset[1]==0)
1367 m_faceCount[2]=m_NE[0];
1368 else
1369 m_faceCount[2]=0;
1370 //top
1371 if (m_mpiInfo->rank/m_NX[0]==m_NX[1]-1)
1372 m_faceCount[3]=m_NE[0];
1373 else
1374 m_faceCount[3]=0;
1375
1376 m_faceId.resize(getNumFaceElements());
1377
1378 const index_t left = (m_offset[0]==0 ? 0 : 1);
1379 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1380 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1381 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1382
1383 #define globalNodeId(x,y) \
1384 ((m_offset[0]+x)/nDOF0)*nDOF0*nDOF1+(m_offset[0]+x)%nDOF0 \
1385 + ((m_offset[1]+y)/nDOF1)*nDOF0*nDOF1*m_NX[0]+((m_offset[1]+y)%nDOF1)*nDOF0
1386
1387 // set corner id's outside the parallel region
1388 m_nodeId[0] = globalNodeId(0, 0);
1389 m_nodeId[m_NN[0]-1] = globalNodeId(m_NN[0]-1, 0);
1390 m_nodeId[m_NN[0]*(m_NN[1]-1)] = globalNodeId(0, m_NN[1]-1);
1391 m_nodeId[m_NN[0]*m_NN[1]-1] = globalNodeId(m_NN[0]-1,m_NN[1]-1);
1392 #undef globalNodeId
1393
1394 #pragma omp parallel
1395 {
1396 // populate degrees of freedom and own nodes (identical id)
1397 #pragma omp for nowait
1398 for (dim_t i=0; i<nDOF1; i++) {
1399 for (dim_t j=0; j<nDOF0; j++) {
1400 const index_t nodeIdx=j+left+(i+bottom)*m_NN[0];
1401 const index_t dofIdx=j+i*nDOF0;
1402 m_dofId[dofIdx] = m_nodeId[nodeIdx]
1403 = m_nodeDistribution[m_mpiInfo->rank]+dofIdx;
1404 }
1405 }
1406
1407 // populate the rest of the nodes (shared with other ranks)
1408 if (m_faceCount[0]==0) { // left column
1409 #pragma omp for nowait
1410 for (dim_t i=0; i<nDOF1; i++) {
1411 const index_t nodeIdx=(i+bottom)*m_NN[0];
1412 const index_t dofId=(i+1)*nDOF0-1;
1413 m_nodeId[nodeIdx]
1414 = m_nodeDistribution[m_mpiInfo->rank-1]+dofId;
1415 }
1416 }
1417 if (m_faceCount[1]==0) { // right column
1418 #pragma omp for nowait
1419 for (dim_t i=0; i<nDOF1; i++) {
1420 const index_t nodeIdx=(i+bottom+1)*m_NN[0]-1;
1421 const index_t dofId=i*nDOF0;
1422 m_nodeId[nodeIdx]
1423 = m_nodeDistribution[m_mpiInfo->rank+1]+dofId;
1424 }
1425 }
1426 if (m_faceCount[2]==0) { // bottom row
1427 #pragma omp for nowait
1428 for (dim_t i=0; i<nDOF0; i++) {
1429 const index_t nodeIdx=i+left;
1430 const index_t dofId=nDOF0*(nDOF1-1)+i;
1431 m_nodeId[nodeIdx]
1432 = m_nodeDistribution[m_mpiInfo->rank-m_NX[0]]+dofId;
1433 }
1434 }
1435 if (m_faceCount[3]==0) { // top row
1436 #pragma omp for nowait
1437 for (dim_t i=0; i<nDOF0; i++) {
1438 const index_t nodeIdx=m_NN[0]*(m_NN[1]-1)+i+left;
1439 const index_t dofId=i;
1440 m_nodeId[nodeIdx]
1441 = m_nodeDistribution[m_mpiInfo->rank+m_NX[0]]+dofId;
1442 }
1443 }
1444
1445 // populate element id's
1446 #pragma omp for nowait
1447 for (dim_t i1=0; i1<m_NE[1]; i1++) {
1448 for (dim_t i0=0; i0<m_NE[0]; i0++) {
1449 m_elementId[i0+i1*m_NE[0]]=(m_offset[1]+i1)*m_gNE[0]+m_offset[0]+i0;
1450 }
1451 }
1452
1453 // face elements
1454 #pragma omp for
1455 for (dim_t k=0; k<getNumFaceElements(); k++)
1456 m_faceId[k]=k;
1457 } // end parallel section
1458
1459 m_nodeTags.assign(getNumNodes(), 0);
1460 updateTagsInUse(Nodes);
1461
1462 m_elementTags.assign(getNumElements(), 0);
1463 updateTagsInUse(Elements);
1464
1465 // generate face offset vector and set face tags
1466 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
1467 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
1468 m_faceOffset.assign(4, -1);
1469 m_faceTags.clear();
1470 index_t offset=0;
1471 for (size_t i=0; i<4; i++) {
1472 if (m_faceCount[i]>0) {
1473 m_faceOffset[i]=offset;
1474 offset+=m_faceCount[i];
1475 m_faceTags.insert(m_faceTags.end(), m_faceCount[i], faceTag[i]);
1476 }
1477 }
1478 setTagMap("left", LEFT);
1479 setTagMap("right", RIGHT);
1480 setTagMap("bottom", BOTTOM);
1481 setTagMap("top", TOP);
1482 updateTagsInUse(FaceElements);
1483 }
1484
1485 //private
1486 void Rectangle::createPattern()
1487 {
1488 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1489 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1490 const index_t left = (m_offset[0]==0 ? 0 : 1);
1491 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1492
1493 // populate node->DOF mapping with own degrees of freedom.
1494 // The rest is assigned in the loop further down
1495 m_dofMap.assign(getNumNodes(), 0);
1496 #pragma omp parallel for
1497 for (index_t i=bottom; i<bottom+nDOF1; i++) {
1498 for (index_t j=left; j<left+nDOF0; j++) {
1499 m_dofMap[i*m_NN[0]+j]=(i-bottom)*nDOF0+j-left;
1500 }
1501 }
1502
1503 // build list of shared components and neighbours by looping through
1504 // all potential neighbouring ranks and checking if positions are
1505 // within bounds
1506 const dim_t numDOF=nDOF0*nDOF1;
1507 vector<IndexVector> colIndices(numDOF); // for the couple blocks
1508 RankVector neighbour;
1509 IndexVector offsetInShared(1,0);
1510 IndexVector sendShared, recvShared;
1511 int numShared=0;
1512 const int x=m_mpiInfo->rank%m_NX[0];
1513 const int y=m_mpiInfo->rank/m_NX[0];
1514 for (int i1=-1; i1<2; i1++) {
1515 for (int i0=-1; i0<2; i0++) {
1516 // skip this rank
1517 if (i0==0 && i1==0)
1518 continue;
1519 // location of neighbour rank
1520 const int nx=x+i0;
1521 const int ny=y+i1;
1522 if (nx>=0 && ny>=0 && nx<m_NX[0] && ny<m_NX[1]) {
1523 neighbour.push_back(ny*m_NX[0]+nx);
1524 if (i0==0) {
1525 // sharing top or bottom edge
1526 const int firstDOF=(i1==-1 ? 0 : numDOF-nDOF0);
1527 const int firstNode=(i1==-1 ? left : m_NN[0]*(m_NN[1]-1)+left);
1528 offsetInShared.push_back(offsetInShared.back()+nDOF0);
1529 for (dim_t i=0; i<nDOF0; i++, numShared++) {
1530 sendShared.push_back(firstDOF+i);
1531 recvShared.push_back(numDOF+numShared);
1532 if (i>0)
1533 colIndices[firstDOF+i-1].push_back(numShared);
1534 colIndices[firstDOF+i].push_back(numShared);
1535 if (i<nDOF0-1)
1536 colIndices[firstDOF+i+1].push_back(numShared);
1537 m_dofMap[firstNode+i]=numDOF+numShared;
1538 }
1539 } else if (i1==0) {
1540 // sharing left or right edge
1541 const int firstDOF=(i0==-1 ? 0 : nDOF0-1);
1542 const int firstNode=(i0==-1 ? bottom*m_NN[0] : (bottom+1)*m_NN[0]-1);
1543 offsetInShared.push_back(offsetInShared.back()+nDOF1);
1544 for (dim_t i=0; i<nDOF1; i++, numShared++) {
1545 sendShared.push_back(firstDOF+i*nDOF0);
1546 recvShared.push_back(numDOF+numShared);
1547 if (i>0)
1548 colIndices[firstDOF+(i-1)*nDOF0].push_back(numShared);
1549 colIndices[firstDOF+i*nDOF0].push_back(numShared);
1550 if (i<nDOF1-1)
1551 colIndices[firstDOF+(i+1)*nDOF0].push_back(numShared);
1552 m_dofMap[firstNode+i*m_NN[0]]=numDOF+numShared;
1553 }
1554 } else {
1555 // sharing a node
1556 const int dof=(i0+1)/2*(nDOF0-1)+(i1+1)/2*(numDOF-nDOF0);
1557 const int node=(i0+1)/2*(m_NN[0]-1)+(i1+1)/2*m_NN[0]*(m_NN[1]-1);
1558 offsetInShared.push_back(offsetInShared.back()+1);
1559 sendShared.push_back(dof);
1560 recvShared.push_back(numDOF+numShared);
1561 colIndices[dof].push_back(numShared);
1562 m_dofMap[node]=numDOF+numShared;
1563 ++numShared;
1564 }
1565 }
1566 }
1567 }
1568
1569 // create connector
1570 Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
1571 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
1572 &offsetInShared[0], 1, 0, m_mpiInfo);
1573 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
1574 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
1575 &offsetInShared[0], 1, 0, m_mpiInfo);
1576 m_connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
1577 Paso_SharedComponents_free(snd_shcomp);
1578 Paso_SharedComponents_free(rcv_shcomp);
1579
1580 // create main and couple blocks
1581 Paso_Pattern *mainPattern = createMainPattern();
1582 Paso_Pattern *colPattern, *rowPattern;
1583 createCouplePatterns(colIndices, numShared, &colPattern, &rowPattern);
1584
1585 // allocate paso distribution
1586 Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
1587 const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
1588
1589 // finally create the system matrix
1590 m_pattern = Paso_SystemMatrixPattern_alloc(MATRIX_FORMAT_DEFAULT,
1591 distribution, distribution, mainPattern, colPattern, rowPattern,
1592 m_connector, m_connector);
1593
1594 Paso_Distribution_free(distribution);
1595
1596 // useful debug output
1597 /*
1598 cout << "--- rcv_shcomp ---" << endl;
1599 cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
1600 for (size_t i=0; i<neighbour.size(); i++) {
1601 cout << "neighbor[" << i << "]=" << neighbour[i]
1602 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
1603 }
1604 for (size_t i=0; i<recvShared.size(); i++) {
1605 cout << "shared[" << i << "]=" << recvShared[i] << endl;
1606 }
1607 cout << "--- snd_shcomp ---" << endl;
1608 for (size_t i=0; i<sendShared.size(); i++) {
1609 cout << "shared[" << i << "]=" << sendShared[i] << endl;
1610 }
1611 cout << "--- dofMap ---" << endl;
1612 for (size_t i=0; i<m_dofMap.size(); i++) {
1613 cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << endl;
1614 }
1615 cout << "--- colIndices ---" << endl;
1616 for (size_t i=0; i<colIndices.size(); i++) {
1617 cout << "colIndices[" << i << "].size()=" << colIndices[i].size() << endl;
1618 }
1619 */
1620
1621 /*
1622 cout << "--- main_pattern ---" << endl;
1623 cout << "M=" << mainPattern->numOutput << ", N=" << mainPattern->numInput << endl;
1624 for (size_t i=0; i<mainPattern->numOutput+1; i++) {
1625 cout << "ptr[" << i << "]=" << mainPattern->ptr[i] << endl;
1626 }
1627 for (size_t i=0; i<mainPattern->ptr[mainPattern->numOutput]; i++) {
1628 cout << "index[" << i << "]=" << mainPattern->index[i] << endl;
1629 }
1630 */
1631
1632 /*
1633 cout << "--- colCouple_pattern ---" << endl;
1634 cout << "M=" << colPattern->numOutput << ", N=" << colPattern->numInput << endl;
1635 for (size_t i=0; i<colPattern->numOutput+1; i++) {
1636 cout << "ptr[" << i << "]=" << colPattern->ptr[i] << endl;
1637 }
1638 for (size_t i=0; i<colPattern->ptr[colPattern->numOutput]; i++) {
1639 cout << "index[" << i << "]=" << colPattern->index[i] << endl;
1640 }
1641 */
1642
1643 /*
1644 cout << "--- rowCouple_pattern ---" << endl;
1645 cout << "M=" << rowPattern->numOutput << ", N=" << rowPattern->numInput << endl;
1646 for (size_t i=0; i<rowPattern->numOutput+1; i++) {
1647 cout << "ptr[" << i << "]=" << rowPattern->ptr[i] << endl;
1648 }
1649 for (size_t i=0; i<rowPattern->ptr[rowPattern->numOutput]; i++) {
1650 cout << "index[" << i << "]=" << rowPattern->index[i] << endl;
1651 }
1652 */
1653
1654 Paso_Pattern_free(mainPattern);
1655 Paso_Pattern_free(colPattern);
1656 Paso_Pattern_free(rowPattern);
1657 }
1658
1659 //private
1660 void Rectangle::addToMatrixAndRHS(Paso_SystemMatrix* S, escript::Data& F,
1661 const vector<double>& EM_S, const vector<double>& EM_F, bool addS,
1662 bool addF, index_t firstNode, dim_t nEq, dim_t nComp) const
1663 {
1664 IndexVector rowIndex;
1665 rowIndex.push_back(m_dofMap[firstNode]);
1666 rowIndex.push_back(m_dofMap[firstNode+1]);
1667 rowIndex.push_back(m_dofMap[firstNode+m_NN[0]]);
1668 rowIndex.push_back(m_dofMap[firstNode+m_NN[0]+1]);
1669 if (addF) {
1670 double *F_p=F.getSampleDataRW(0);
1671 for (index_t i=0; i<rowIndex.size(); i++) {
1672 if (rowIndex[i]<getNumDOF()) {
1673 for (index_t eq=0; eq<nEq; eq++) {
1674 F_p[INDEX2(eq, rowIndex[i], nEq)]+=EM_F[INDEX2(eq,i,nEq)];
1675 }
1676 }
1677 }
1678 }
1679 if (addS) {
1680 addToSystemMatrix(S, rowIndex, nEq, rowIndex, nComp, EM_S);
1681 }
1682 }
1683
1684 //protected
1685 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1686 const escript::Data& in,
1687 bool reduced) const
1688 {
1689 const dim_t numComp = in.getDataPointSize();
1690 if (reduced) {
1691 out.requireWrite();
1692 const double c0 = 0.25;
1693 #pragma omp parallel
1694 {
1695 vector<double> f_00(numComp);
1696 vector<double> f_01(numComp);
1697 vector<double> f_10(numComp);
1698 vector<double> f_11(numComp);
1699 #pragma omp for
1700 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1701 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1702 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1703 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1704 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1705 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1706 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1707 for (index_t i=0; i < numComp; ++i) {
1708 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
1709 } /* end of component loop i */
1710 } /* end of k0 loop */
1711 } /* end of k1 loop */
1712 } /* end of parallel section */
1713 } else {
1714 out.requireWrite();
1715 const double c0 = 0.16666666666666666667;
1716 const double c1 = 0.044658198738520451079;
1717 const double c2 = 0.62200846792814621559;
1718 #pragma omp parallel
1719 {
1720 vector<double> f_00(numComp);
1721 vector<double> f_01(numComp);
1722 vector<double> f_10(numComp);
1723 vector<double> f_11(numComp);
1724 #pragma omp for
1725 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1726 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1727 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1728 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1729 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1730 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1731 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1732 for (index_t i=0; i < numComp; ++i) {
1733 o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_10[i]) + c1*f_11[i] + c2*f_00[i];
1734 o[INDEX2(i,numComp,1)] = c0*(f_00[i] + f_11[i]) + c1*f_01[i] + c2*f_10[i];
1735 o[INDEX2(i,numComp,2)] = c0*(f_00[i] + f_11[i]) + c1*f_10[i] + c2*f_01[i];
1736 o[INDEX2(i,numComp,3)] = c0*(f_01[i] + f_10[i]) + c1*f_00[i] + c2*f_11[i];
1737 } /* end of component loop i */
1738 } /* end of k0 loop */
1739 } /* end of k1 loop */
1740 } /* end of parallel section */
1741 }
1742 }
1743
1744 //protected
1745 void Rectangle::interpolateNodesOnFaces(escript::Data& out,
1746 const escript::Data& in,
1747 bool reduced) const
1748 {
1749 const dim_t numComp = in.getDataPointSize();
1750 if (reduced) {
1751 out.requireWrite();
1752 #pragma omp parallel
1753 {
1754 vector<double> f_00(numComp);
1755 vector<double> f_01(numComp);
1756 vector<double> f_10(numComp);
1757 vector<double> f_11(numComp);
1758 if (m_faceOffset[0] > -1) {
1759 #pragma omp for nowait
1760 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1761 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1762 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1763 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1764 for (index_t i=0; i < numComp; ++i) {
1765 o[INDEX2(i,numComp,0)] = (f_00[i] + f_01[i])/2;
1766 } /* end of component loop i */
1767 } /* end of k1 loop */
1768 } /* end of face 0 */
1769 if (m_faceOffset[1] > -1) {
1770 #pragma omp for nowait
1771 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1772 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1773 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1774 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1775 for (index_t i=0; i < numComp; ++i) {
1776 o[INDEX2(i,numComp,0)] = (f_10[i] + f_11[i])/2;
1777 } /* end of component loop i */
1778 } /* end of k1 loop */
1779 } /* end of face 1 */
1780 if (m_faceOffset[2] > -1) {
1781 #pragma omp for nowait
1782 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1783 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1784 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1785 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1786 for (index_t i=0; i < numComp; ++i) {
1787 o[INDEX2(i,numComp,0)] = (f_00[i] + f_10[i])/2;
1788 } /* end of component loop i */
1789 } /* end of k0 loop */
1790 } /* end of face 2 */
1791 if (m_faceOffset[3] > -1) {
1792 #pragma omp for nowait
1793 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1794 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1795 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1796 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1797 for (index_t i=0; i < numComp; ++i) {
1798 o[INDEX2(i,numComp,0)] = (f_01[i] + f_11[i])/2;
1799 } /* end of component loop i */
1800 } /* end of k0 loop */
1801 } /* end of face 3 */
1802 } /* end of parallel section */
1803 } else {
1804 out.requireWrite();
1805 const double c0 = 0.21132486540518711775;
1806 const double c1 = 0.78867513459481288225;
1807 #pragma omp parallel
1808 {
1809 vector<double> f_00(numComp);
1810 vector<double> f_01(numComp);
1811 vector<double> f_10(numComp);
1812 vector<double> f_11(numComp);
1813 if (m_faceOffset[0] > -1) {
1814 #pragma omp for nowait
1815 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1816 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1817 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1818 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1819 for (index_t i=0; i < numComp; ++i) {
1820 o[INDEX2(i,numComp,0)] = c0*f_01[i] + c1*f_00[i];
1821 o[INDEX2(i,numComp,1)] = c0*f_00[i] + c1*f_01[i];
1822 } /* end of component loop i */
1823 } /* end of k1 loop */
1824 } /* end of face 0 */
1825 if (m_faceOffset[1] > -1) {
1826 #pragma omp for nowait
1827 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1828 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1829 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1830 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1831 for (index_t i=0; i < numComp; ++i) {
1832 o[INDEX2(i,numComp,0)] = c1*f_10[i] + c0*f_11[i];
1833 o[INDEX2(i,numComp,1)] = c1*f_11[i] + c0*f_10[i];
1834 } /* end of component loop i */
1835 } /* end of k1 loop */
1836 } /* end of face 1 */
1837 if (m_faceOffset[2] > -1) {
1838 #pragma omp for nowait
1839 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1840 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1841 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1842 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1843 for (index_t i=0; i < numComp; ++i) {
1844 o[INDEX2(i,numComp,0)] = c0*f_10[i] + c1*f_00[i];
1845 o[INDEX2(i,numComp,1)] = c0*f_00[i] + c1*f_10[i];
1846 } /* end of component loop i */
1847 } /* end of k0 loop */
1848 } /* end of face 2 */
1849 if (m_faceOffset[3] > -1) {
1850 #pragma omp for nowait
1851 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1852 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1853 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1854 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1855 for (index_t i=0; i < numComp; ++i) {
1856 o[INDEX2(i,numComp,0)] = c0*f_11[i] + c1*f_01[i];
1857 o[INDEX2(i,numComp,1)] = c0*f_01[i] + c1*f_11[i];
1858 } /* end of component loop i */
1859 } /* end of k0 loop */
1860 } /* end of face 3 */
1861 } /* end of parallel section */
1862 }
1863 }
1864
1865
1866
1867 namespace
1868 {
1869 // Calculates a guassian blur colvolution matrix for 2D
1870 double* get2DGauss(unsigned radius, double sigma)
1871 {
1872 double* arr=new double[(radius*2+1)*(radius*2+1)];
1873 double common=M_1_PI*0.5*1/(sigma*sigma);
1874 double total=0;
1875 int r=static_cast<int>(radius);
1876 for (int y=-r;y<=r;++y)
1877 {
1878 for (int x=-r;x<=r;++x)
1879 {
1880 arr[(x+r)+(y+r)*(r*2+1)]=common*exp(-(x*x+y*y)/(2*sigma*sigma));
1881 // cout << (x+y*(r*2+1)) << " " << arr[(x+r)+(y+r)*(r*2+1)] << endl;
1882 total+=arr[(x+r)+(y+r)*(r*2+1)];
1883 }
1884 }
1885 double invtotal=1/total;
1886 //cout << "Inv total is " << invtotal << endl;
1887 for (size_t p=0;p<(radius*2+1)*(radius*2+1);++p)
1888 {
1889 arr[p]*=invtotal;
1890 //cout << p << "->" << arr[p] << endl;
1891 }
1892 return arr;
1893 }
1894
1895 // applies conv to source to get a point.
1896 // (xp, yp) are the coords in the source matrix not the destination matrix
1897 double Convolve2D(double* conv, double* source, size_t xp, size_t yp, unsigned radius, size_t width)
1898 {
1899 size_t bx=xp-radius, by=yp-radius;
1900 size_t sbase=bx+by*width;
1901 double result=0;
1902 for (int y=0;y<2*radius+1;++y)
1903 {
1904 for (int x=0;x<2*radius+1;++x)
1905 {
1906 result+=conv[x+y*(2*radius+1)] * source[sbase + x+y*width];
1907 }
1908 }
1909 return result;
1910 }
1911 }
1912
1913
1914
1915 /* This routine produces a Data object filled with smoothed random data.
1916 The dimensions of the rectangle being filled are internal[0] x internal[1] points.
1917 A parameter radius dives the size of the stencil used for the smoothing.
1918 A point on the left hand edge for example, will still require `radius` extra points to the left
1919 in order to complete the stencil.
1920
1921 All local calculation is done on an array called `src`, with
1922 dimensions = ext[0] * ext[1].
1923 Where ext[i]= internal[i]+2*radius.
1924
1925 Now for MPI there is overlap to deal with. We need to share both the overlapping
1926 values themselves but also the external region.
1927
1928 In a hypothetical 1-D case:
1929
1930
1931 1234567
1932 would be split into two ranks thus:
1933 123(4) (4)567 [4 being a shared element]
1934
1935 If the radius is 2. There will be padding elements on the outside:
1936
1937 pp123(4) (4)567pp
1938
1939 To ensure that 4 can be correctly computed on both ranks, values from the other rank
1940 need to be known.
1941
1942 pp123(4)56 23(4)567pp
1943
1944 Now in our case, we wout set all the values 23456 on the left rank and send them to the
1945 right hand rank.
1946
1947 So the edges _may_ need to be shared at a distance `inset` from all boundaries.
1948 inset=2*radius+1
1949 This is to ensure that values at distance `radius` from the shared/overlapped element
1950 that ripley has.
1951
1952
1953 */
1954 escript::Data Rectangle::randomFill(long seed, const boost::python::tuple& filter) const
1955 {
1956 if (m_numDim!=2)
1957 {
1958 throw RipleyException("Only 2D supported at this time.");
1959 }
1960 if (len(filter)!=3) {
1961 throw RipleyException("Unsupported random filter");
1962 }
1963 boost::python::extract<string> ex(filter[0]);
1964 if (!ex.check() || (ex()!="gaussian"))
1965 {
1966 throw RipleyException("Unsupported random filter");
1967 }
1968 boost::python::extract<unsigned int> ex1(filter[1]);
1969 if (!ex1.check())
1970 {
1971 throw RipleyException("Radius of gaussian filter must be a positive integer.");
1972 }
1973 unsigned int radius=ex1();
1974 double sigma=0.5;
1975 boost::python::extract<double> ex2(filter[2]);
1976 if (!ex2.check() || (sigma=ex2())<=0)
1977 {
1978 throw RipleyException("Sigma must be a postive floating point number.");
1979 }
1980
1981 size_t internal[2];
1982 internal[0]=m_NE[0]+1; // number of points in the internal region
1983 internal[1]=m_NE[1]+1; // that is, the ones we need smoothed versions of
1984 size_t ext[2];
1985 ext[0]=internal[0]+2*radius; // includes points we need as input
1986 ext[1]=internal[1]+2*radius; // for smoothing
1987
1988 // now we check to see if the radius is acceptable
1989 // That is, would not cross multiple ranks in MPI
1990
1991 if ((2*radius>=internal[0]) || (2*radius>=internal[1]))
1992 {
1993 throw RipleyException("Radius of gaussian filter must be less than half the width/height of a rank");
1994 }
1995
1996
1997 double* src=new double[ext[0]*ext[1]];
1998 esysUtils::randomFillArray(seed, src, ext[0]*ext[1]);
1999
2000
2001 #ifdef ESYS_MPI
2002 size_t inset=2*radius+1;
2003 size_t Eheight=ext[1]-2*inset; // how high the E (shared) region is
2004 size_t Swidth=ext[0]-2*inset;
2005
2006 double* SWin=new double[inset*inset]; memset(SWin, 0, inset*inset*sizeof(double));
2007 double* SEin=new double[inset*inset]; memset(SEin, 0, inset*inset*sizeof(double));
2008 double* NWin=new double[inset*inset]; memset(NWin, 0, inset*inset*sizeof(double));
2009 double* Sin=new double[inset*Swidth]; memset(Sin, 0, inset*Swidth*sizeof(double));
2010 double* Win=new double[inset*Eheight]; memset(Win, 0, inset*Eheight*sizeof(double));
2011
2012 double* NEout=new double[inset*inset]; memset(NEout, 0, inset*inset*sizeof(double));
2013 unsigned int base=ext[0]-inset+(ext[1]-inset)*ext[0];
2014 for (unsigned int i=0;i<inset;++i)
2015 {
2016 memcpy(NEout+inset*i, src+base, inset*sizeof(double));
2017 base+=ext[0];
2018 }
2019 double* NWout=new double[inset*inset]; memset(NWout, 0, inset*inset*sizeof(double));
2020 base=(ext[1]-inset)*ext[0];
2021 for (unsigned int i=0;i<inset;++i)
2022 {
2023 memcpy(NWout+inset*i, src+base, inset*sizeof(double));
2024 base+=ext[0];
2025 }
2026
2027 double* SEout=new double[inset*inset]; memset(SEout, 0, inset*inset*sizeof(double));
2028 base=ext[0]-inset;
2029 for (int i=0;i<inset;++i)
2030 {
2031 memcpy(SEout+inset*i, src+base, inset*sizeof(double));
2032 base+=ext[0];
2033 }
2034 double* Nout=new double[inset*Swidth]; memset(Nout, 0, inset*Swidth*sizeof(double));
2035 base=inset+(ext[1]-inset)*ext[0];
2036 for (unsigned int i=0;i<inset;++i)
2037 {
2038 memcpy(Nout+Swidth*i, src+base, Swidth*sizeof(double));
2039 base+=ext[0];
2040 }
2041
2042 double* Eout=new double[inset*Eheight]; memset(Eout, 0, inset*Eheight*sizeof(double));
2043 base=ext[0]-inset+inset*ext[0];
2044 for (unsigned int i=0;i<Eheight;++i)
2045 {
2046 memcpy(Eout+i*inset, src+base, inset*sizeof(double));
2047 base+=ext[0];
2048 }
2049
2050 MPI_Request reqs[10];
2051 MPI_Status stats[10];
2052 short rused=0;
2053
2054 dim_t X=m_mpiInfo->rank%m_NX[0];
2055 dim_t Y=m_mpiInfo->rank/m_NX[0];
2056 dim_t row=m_NX[0];
2057 bool swused=false; // These vars will be true if data needs to be copied out of them
2058 bool seused=false;
2059 bool nwused=false;
2060 bool sused=false;
2061 bool wused=false;
2062
2063 // Tags:
2064 // 10 : EW transfer (middle)
2065 // 8 : NS transfer (middle)
2066 // 7 : NE corner -> to N, E and NE
2067 // 11 : NW corner to SW corner (only used on the left hand edge
2068 // 12 : SE corner to SW corner (only used on the bottom edge
2069
2070
2071
2072 int comserr=0;
2073 if (Y!=0) // not on bottom row,
2074 {
2075 if (X!=0) // not on the left hand edge
2076 {
2077 // recv bottomleft from SW
2078 comserr|=MPI_Irecv(SWin, inset*inset, MPI_DOUBLE, (X-1)+(Y-1)*row, 7, m_mpiInfo->comm, reqs+(rused++));
2079 swused=true;
2080 comserr|=MPI_Irecv(Win, Eheight*inset, MPI_DOUBLE, X-1+Y*row, 10, m_mpiInfo->comm, reqs+(rused++));
2081 wused=true;
2082 }
2083 else // on the left hand edge
2084 {
2085 comserr|=MPI_Irecv(SWin, inset*inset, MPI_DOUBLE, (Y-1)*row, 11, m_mpiInfo->comm, reqs+(rused++));
2086 swused=true;
2087 }
2088 comserr|=MPI_Irecv(Sin, Swidth*inset, MPI_DOUBLE, X+(Y-1)*row, 8, m_mpiInfo->comm, reqs+(rused++));
2089 sused=true;
2090 comserr|=MPI_Irecv(SEin, inset*inset, MPI_DOUBLE, X+(Y-1)*row, 7, m_mpiInfo->comm, reqs+(rused++));
2091 seused=true;
2092
2093
2094 }
2095 else // on the bottom row
2096 {
2097 if (X!=0)
2098 {
2099 comserr|=MPI_Irecv(Win, Eheight*inset, MPI_DOUBLE, X-1+Y*row, 10, m_mpiInfo->comm, reqs+(rused++));
2100 wused=true;
2101 // Need to use tag 12 here because SW is coming from the East not South East
2102 comserr|=MPI_Irecv(SWin, inset*inset, MPI_DOUBLE, X-1+Y*row, 12, m_mpiInfo->comm, reqs+(rused++));
2103 swused=true;
2104 }
2105 if (X!=(row-1))
2106 {
2107 comserr|=MPI_Isend(SEout, inset*inset, MPI_DOUBLE, X+1+(Y)*row, 12, m_mpiInfo->comm, reqs+(rused++));
2108 }
2109 }
2110
2111 if (Y!=(m_NX[1]-1)) // not on the top row
2112 {
2113 comserr|=MPI_Isend(Nout, inset*Swidth, MPI_DOUBLE, X+(Y+1)*row, 8, m_mpiInfo->comm, reqs+(rused++));
2114 comserr|=MPI_Isend(NEout, inset*inset, MPI_DOUBLE, X+(Y+1)*row, 7, m_mpiInfo->comm, reqs+(rused++));
2115 if (X!=(row-1)) // not on right hand edge
2116 {
2117 comserr|=MPI_Isend(NEout, inset*inset, MPI_DOUBLE, X+1+(Y+1)*row, 7, m_mpiInfo->comm, reqs+(rused++));
2118 }
2119 if (X==0) // left hand edge
2120 {
2121 comserr|=MPI_Isend(NWout, inset*inset, MPI_DOUBLE, (Y+1)*row,11, m_mpiInfo->comm, reqs+(rused++));
2122 }
2123 }
2124 if (X!=(row-1)) // not on right hand edge
2125 {
2126 comserr|=MPI_Isend(NEout, inset*inset, MPI_DOUBLE, X+1+(Y)*row, 7, m_mpiInfo->comm, reqs+(rused++));
2127 comserr|=MPI_Isend(Eout, Eheight*inset, MPI_DOUBLE, X+1+(Y)*row, 10, m_mpiInfo->comm, reqs+(rused++));
2128 }
2129 if (X!=0)
2130 {
2131 comserr|=MPI_Irecv(NWin, inset*inset, MPI_DOUBLE, (X-1)+Y*row, 7, m_mpiInfo->comm, reqs+(rused++));
2132 nwused=true;
2133
2134
2135 }
2136
2137 if (!comserr)
2138 {
2139 comserr=MPI_Waitall(rused, reqs, stats);
2140 }
2141
2142 if (comserr)
2143 {
2144 // Yes this is throwing an exception as a result of an MPI error.
2145 // and no we don't inform the other ranks that we are doing this.
2146 // however, we have no reason to believe coms work at this point anyway
2147 throw RipleyException("Error in coms for randomFill");
2148 }
2149
2150
2151 // Need to copy the values back from the buffers
2152 // Copy from SW
2153
2154 if (swused)
2155 {
2156 base=0;
2157 for (unsigned int i=0;i<inset;++i)
2158 {
2159 memcpy(src+base, SWin+i*inset, inset*sizeof(double));
2160 base+=ext[0];
2161 }
2162 }
2163 if (seused)
2164 {
2165 base=ext[0]-inset;
2166 for (unsigned int i=0;i<inset;++i)
2167 {
2168 memcpy(src+base, SEin+i*inset, inset*sizeof(double));
2169 base+=ext[0];
2170 }
2171 }
2172 if (nwused)
2173 {
2174 base=(ext[1]-inset)*ext[0];
2175 for (unsigned int i=0;i<inset;++i)
2176 {
2177 memcpy(src+base, NWin+i*inset, inset*sizeof(double));
2178 base+=ext[0];
2179 }
2180 }
2181 if (sused)
2182 {
2183 base=inset;
2184 for (unsigned int i=0;i<inset;++i)
2185 {
2186 memcpy(src+base, Sin+i*Swidth, Swidth*sizeof(double));
2187 base+=ext[0];
2188 }
2189 }
2190 if (wused)
2191 {
2192 base=inset*ext[0];
2193 for (unsigned int i=0;i<Eheight;++i)
2194 {
2195 memcpy(src+base, Win+i*inset, inset*sizeof(double));
2196 base+=ext[0];
2197 }
2198
2199 }
2200
2201 delete[] SWin;
2202 delete[] SEin;
2203 delete[] NWin;
2204 delete[] Sin;
2205 delete[] Win;
2206
2207 delete[] NEout;
2208 delete[] NWout;
2209 delete[] SEout;
2210 delete[] Nout;
2211 delete[] Eout;
2212 #endif
2213 escript::FunctionSpace fs(getPtr(), getContinuousFunctionCode());
2214 escript::Data resdat(0, escript::DataTypes::scalarShape, fs , true);
2215 // don't need to check for exwrite because we just made it
2216 escript::DataVector& dv=resdat.getExpandedVectorReference();
2217 double* convolution=get2DGauss(radius, sigma);
2218 for (size_t y=0;y<(internal[1]);++y)
2219 {
2220 for (size_t x=0;x<(internal[0]);++x)
2221 {
2222 dv[x+y*(internal[0])]=Convolve2D(convolution, src, x+radius, y+radius, radius, ext[0]);
2223
2224 }
2225 }
2226 delete[] convolution;
2227 delete[] src;
2228 return resdat;
2229 }
2230
2231 int Rectangle::findNode(const double *coords) const {
2232 const int NOT_MINE = -1;
2233 //is the found element even owned by this rank
2234 for (int dim = 0; dim < m_numDim; dim++) {
2235 if (m_origin[dim] + m_offset[dim] > coords[dim] || m_origin[dim]
2236 + m_offset[dim] + m_dx[dim]*m_ownNE[dim] < coords[dim]) {
2237 return NOT_MINE;
2238 }
2239 }
2240 // get distance from origin
2241 double x = coords[0] - m_origin[0];
2242 double y = coords[1] - m_origin[1];
2243 // distance in elements
2244 int ex = (int) floor(x / m_dx[0]);
2245 int ey = (int) floor(y / m_dx[1]);
2246 // set the min distance high enough to be outside the element plus a bit
2247 int closest = NOT_MINE;
2248 double minDist = 1;
2249 for (int dim = 0; dim < m_numDim; dim++) {
2250 minDist += m_dx[dim]*m_dx[dim];
2251 }
2252 //find the closest node
2253 for (int dx = 0; dx < 2; dx++) {
2254 double xdist = (x - (ex + dx)*m_dx[0]);
2255 for (int dy = 0; dy < 2; dy++) {
2256 double ydist = (y - (ey + dy)*m_dx[1]);
2257 double total = xdist*xdist + ydist*ydist;
2258 if (total < minDist) {
2259 closest = INDEX2(ex+dx, ey+dy, m_NE[0] + 1);
2260 minDist = total;
2261 }
2262 }
2263 }
2264 return closest;
2265 }
2266
2267 void Rectangle::setAssembler(std::string type, std::map<std::string,
2268 escript::Data> constants) {
2269 if (type.compare("WaveAssembler") == 0) {
2270 delete assembler;
2271 assembler = new WaveAssembler2D(this, m_dx, m_NX, m_NE, m_NN, constants);
2272 } else { //else ifs would go before this for other types
2273 throw RipleyException("Ripley::Rectangle does not support the"
2274 " requested assembler");
2275 }
2276 }
2277
2278 } // end of namespace ripley
2279

  ViewVC Help
Powered by ViewVC 1.1.26