/[escript]/trunk/ripley/src/Rectangle.cpp
ViewVC logotype

Contents of /trunk/ripley/src/Rectangle.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4638 - (show annotations)
Thu Jan 30 06:25:10 2014 UTC (5 years, 2 months ago) by jfenwick
File size: 88889 byte(s)
Fixes necessary to get a clang/macports build working on 10.9
1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15
16 #include <ripley/Rectangle.h>
17 #include <paso/SystemMatrix.h>
18 #include <esysUtils/esysFileWriter.h>
19 #include <ripley/DefaultAssembler2D.h>
20 #include <ripley/WaveAssembler2D.h>
21 #include <boost/scoped_array.hpp>
22 #include "esysUtils/EsysRandom.h"
23
24 #ifdef USE_NETCDF
25 #include <netcdfcpp.h>
26 #endif
27
28 #if USE_SILO
29 #include <silo.h>
30 #ifdef ESYS_MPI
31 #include <pmpio.h>
32 #endif
33 #endif
34
35 #include <iomanip>
36
37 using namespace std;
38 using esysUtils::FileWriter;
39
40 namespace ripley {
41
42 Rectangle::Rectangle(int n0, int n1, double x0, double y0, double x1,
43 double y1, int d0, int d1,
44 const std::vector<double>& points,
45 const std::vector<int>& tags,
46 const simap_t& tagnamestonums) :
47 RipleyDomain(2)
48 {
49 // ignore subdivision parameters for serial run
50 if (m_mpiInfo->size == 1) {
51 d0=1;
52 d1=1;
53 }
54
55 bool warn=false;
56 // if number of subdivisions is non-positive, try to subdivide by the same
57 // ratio as the number of elements
58 if (d0<=0 && d1<=0) {
59 warn=true;
60 d0=max(1, (int)(sqrt(m_mpiInfo->size*(n0+1)/(float)(n1+1))));
61 d1=m_mpiInfo->size/d0;
62 if (d0*d1 != m_mpiInfo->size) {
63 // ratios not the same so subdivide side with more elements only
64 if (n0>n1) {
65 d0=0;
66 d1=1;
67 } else {
68 d0=1;
69 d1=0;
70 }
71 }
72 }
73 if (d0<=0) {
74 // d1 is preset, determine d0 - throw further down if result is no good
75 d0=m_mpiInfo->size/d1;
76 } else if (d1<=0) {
77 // d0 is preset, determine d1 - throw further down if result is no good
78 d1=m_mpiInfo->size/d0;
79 }
80
81 // ensure number of subdivisions is valid and nodes can be distributed
82 // among number of ranks
83 if (d0*d1 != m_mpiInfo->size)
84 throw RipleyException("Invalid number of spatial subdivisions");
85
86 if (warn) {
87 cout << "Warning: Automatic domain subdivision (d0=" << d0 << ", d1="
88 << d1 << "). This may not be optimal!" << endl;
89 }
90
91 double l0 = x1-x0;
92 double l1 = y1-y0;
93 m_dx[0] = l0/n0;
94 m_dx[1] = l1/n1;
95
96 if ((n0+1)%d0 > 0) {
97 n0=(int)round((float)(n0+1)/d0+0.5)*d0-1;
98 l0=m_dx[0]*n0;
99 cout << "Warning: Adjusted number of elements and length. N0="
100 << n0 << ", l0=" << l0 << endl;
101 }
102 if ((n1+1)%d1 > 0) {
103 n1=(int)round((float)(n1+1)/d1+0.5)*d1-1;
104 l1=m_dx[1]*n1;
105 cout << "Warning: Adjusted number of elements and length. N1="
106 << n1 << ", l1=" << l1 << endl;
107 }
108
109 if ((d0 > 1 && (n0+1)/d0<2) || (d1 > 1 && (n1+1)/d1<2))
110 throw RipleyException("Too few elements for the number of ranks");
111
112 m_gNE[0] = n0;
113 m_gNE[1] = n1;
114 m_origin[0] = x0;
115 m_origin[1] = y0;
116 m_length[0] = l0;
117 m_length[1] = l1;
118 m_NX[0] = d0;
119 m_NX[1] = d1;
120
121 // local number of elements (with and without overlap)
122 m_NE[0] = m_ownNE[0] = (d0>1 ? (n0+1)/d0 : n0);
123 if (m_mpiInfo->rank%d0>0 && m_mpiInfo->rank%d0<d0-1)
124 m_NE[0]++;
125 else if (d0>1 && m_mpiInfo->rank%d0==d0-1)
126 m_ownNE[0]--;
127
128 m_NE[1] = m_ownNE[1] = (d1>1 ? (n1+1)/d1 : n1);
129 if (m_mpiInfo->rank/d0>0 && m_mpiInfo->rank/d0<d1-1)
130 m_NE[1]++;
131 else if (d1>1 && m_mpiInfo->rank/d0==d1-1)
132 m_ownNE[1]--;
133
134 // local number of nodes
135 m_NN[0] = m_NE[0]+1;
136 m_NN[1] = m_NE[1]+1;
137
138 // bottom-left node is at (offset0,offset1) in global mesh
139 m_offset[0] = (n0+1)/d0*(m_mpiInfo->rank%d0);
140 if (m_offset[0] > 0)
141 m_offset[0]--;
142 m_offset[1] = (n1+1)/d1*(m_mpiInfo->rank/d0);
143 if (m_offset[1] > 0)
144 m_offset[1]--;
145
146 populateSampleIds();
147 createPattern();
148 assembler = new DefaultAssembler2D(this, m_dx, m_NX, m_NE, m_NN);
149 for (map<string, int>::const_iterator i = tagnamestonums.begin();
150 i != tagnamestonums.end(); i++) {
151 setTagMap(i->first, i->second);
152 }
153 addPoints(tags.size(), &points[0], &tags[0]);
154 }
155
156 Rectangle::~Rectangle()
157 {
158 Paso_SystemMatrixPattern_free(m_pattern);
159 Paso_Connector_free(m_connector);
160 delete assembler;
161 }
162
163 string Rectangle::getDescription() const
164 {
165 return "ripley::Rectangle";
166 }
167
168 bool Rectangle::operator==(const AbstractDomain& other) const
169 {
170 const Rectangle* o=dynamic_cast<const Rectangle*>(&other);
171 if (o) {
172 return (RipleyDomain::operator==(other) &&
173 m_gNE[0]==o->m_gNE[0] && m_gNE[1]==o->m_gNE[1]
174 && m_origin[0]==o->m_origin[0] && m_origin[1]==o->m_origin[1]
175 && m_length[0]==o->m_length[0] && m_length[1]==o->m_length[1]
176 && m_NX[0]==o->m_NX[0] && m_NX[1]==o->m_NX[1]);
177 }
178
179 return false;
180 }
181
182 void Rectangle::readNcGrid(escript::Data& out, string filename, string varname,
183 const ReaderParameters& params) const
184 {
185 #ifdef USE_NETCDF
186 // check destination function space
187 int myN0, myN1;
188 if (out.getFunctionSpace().getTypeCode() == Nodes) {
189 myN0 = m_NN[0];
190 myN1 = m_NN[1];
191 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
192 out.getFunctionSpace().getTypeCode() == ReducedElements) {
193 myN0 = m_NE[0];
194 myN1 = m_NE[1];
195 } else
196 throw RipleyException("readNcGrid(): invalid function space for output data object");
197
198 if (params.first.size() != 2)
199 throw RipleyException("readNcGrid(): argument 'first' must have 2 entries");
200
201 if (params.numValues.size() != 2)
202 throw RipleyException("readNcGrid(): argument 'numValues' must have 2 entries");
203
204 if (params.multiplier.size() != 2)
205 throw RipleyException("readNcGrid(): argument 'multiplier' must have 2 entries");
206 for (size_t i=0; i<params.multiplier.size(); i++)
207 if (params.multiplier[i]<1)
208 throw RipleyException("readNcGrid(): all multipliers must be positive");
209 if (params.reverse.size() != 2)
210 throw RipleyException("readNcGrid(): argument 'reverse' must have 2 entries");
211
212 // check file existence and size
213 NcFile f(filename.c_str(), NcFile::ReadOnly);
214 if (!f.is_valid())
215 throw RipleyException("readNcGrid(): cannot open file");
216
217 NcVar* var = f.get_var(varname.c_str());
218 if (!var)
219 throw RipleyException("readNcGrid(): invalid variable");
220
221 // TODO: rank>0 data support
222 const int numComp = out.getDataPointSize();
223 if (numComp > 1)
224 throw RipleyException("readNcGrid(): only scalar data supported");
225
226 const int dims = var->num_dims();
227 boost::scoped_array<long> edges(var->edges());
228
229 // is this a slice of the data object (dims!=2)?
230 // note the expected ordering of edges (as in numpy: y,x)
231 if ( (dims==2 && (params.numValues[1] > edges[0] || params.numValues[0] > edges[1]))
232 || (dims==1 && params.numValues[1]>1) ) {
233 throw RipleyException("readNcGrid(): not enough data in file");
234 }
235
236 // check if this rank contributes anything
237 if (params.first[0] >= m_offset[0]+myN0 ||
238 params.first[0]+params.numValues[0]*params.multiplier[0] <= m_offset[0] ||
239 params.first[1] >= m_offset[1]+myN1 ||
240 params.first[1]+params.numValues[1]*params.multiplier[1] <= m_offset[1])
241 return;
242
243 // now determine how much this rank has to write
244
245 // first coordinates in data object to write to
246 const int first0 = max(0, params.first[0]-m_offset[0]);
247 const int first1 = max(0, params.first[1]-m_offset[1]);
248 // indices to first value in file (not accounting for reverse yet)
249 int idx0 = max(0, m_offset[0]-params.first[0]);
250 int idx1 = max(0, m_offset[1]-params.first[1]);
251 // number of values to read
252 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
253 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
254
255 // make sure we read the right block if going backwards through file
256 if (params.reverse[0])
257 idx0 = edges[dims-1]-num0-idx0;
258 if (dims>1 && params.reverse[1])
259 idx1 = edges[dims-2]-num1-idx1;
260
261 vector<double> values(num0*num1);
262 if (dims==2) {
263 var->set_cur(idx1, idx0);
264 var->get(&values[0], num1, num0);
265 } else {
266 var->set_cur(idx0);
267 var->get(&values[0], num0);
268 }
269
270 const int dpp = out.getNumDataPointsPerSample();
271 out.requireWrite();
272
273 // helpers for reversing
274 const int x0 = (params.reverse[0] ? num0-1 : 0);
275 const int x_mult = (params.reverse[0] ? -1 : 1);
276 const int y0 = (params.reverse[1] ? num1-1 : 0);
277 const int y_mult = (params.reverse[1] ? -1 : 1);
278
279 for (index_t y=0; y<num1; y++) {
280 #pragma omp parallel for
281 for (index_t x=0; x<num0; x++) {
282 const int baseIndex = first0+x*params.multiplier[0]
283 +(first1+y*params.multiplier[1])*myN0;
284 const int srcIndex = (y0+y_mult*y)*num0+(x0+x_mult*x);
285 if (!isnan(values[srcIndex])) {
286 for (index_t m1=0; m1<params.multiplier[1]; m1++) {
287 for (index_t m0=0; m0<params.multiplier[0]; m0++) {
288 const int dataIndex = baseIndex+m0+m1*myN0;
289 double* dest = out.getSampleDataRW(dataIndex);
290 for (index_t q=0; q<dpp; q++) {
291 *dest++ = values[srcIndex];
292 }
293 }
294 }
295 }
296 }
297 }
298 #else
299 throw RipleyException("readNcGrid(): not compiled with netCDF support");
300 #endif
301 }
302
303 void Rectangle::readBinaryGrid(escript::Data& out, string filename,
304 const ReaderParameters& params) const
305 {
306 // the mapping is not universally correct but should work on our
307 // supported platforms
308 switch (params.dataType) {
309 case DATATYPE_INT32:
310 readBinaryGridImpl<int>(out, filename, params);
311 break;
312 case DATATYPE_FLOAT32:
313 readBinaryGridImpl<float>(out, filename, params);
314 break;
315 case DATATYPE_FLOAT64:
316 readBinaryGridImpl<double>(out, filename, params);
317 break;
318 default:
319 throw RipleyException("readBinaryGrid(): invalid or unsupported datatype");
320 }
321 }
322
323 template<typename ValueType>
324 void Rectangle::readBinaryGridImpl(escript::Data& out, const string& filename,
325 const ReaderParameters& params) const
326 {
327 // check destination function space
328 int myN0, myN1;
329 if (out.getFunctionSpace().getTypeCode() == Nodes) {
330 myN0 = m_NN[0];
331 myN1 = m_NN[1];
332 } else if (out.getFunctionSpace().getTypeCode() == Elements ||
333 out.getFunctionSpace().getTypeCode() == ReducedElements) {
334 myN0 = m_NE[0];
335 myN1 = m_NE[1];
336 } else
337 throw RipleyException("readBinaryGrid(): invalid function space for output data object");
338
339 // check file existence and size
340 ifstream f(filename.c_str(), ifstream::binary);
341 if (f.fail()) {
342 throw RipleyException("readBinaryGrid(): cannot open file");
343 }
344 f.seekg(0, ios::end);
345 const int numComp = out.getDataPointSize();
346 const int filesize = f.tellg();
347 const int reqsize = params.numValues[0]*params.numValues[1]*numComp*sizeof(ValueType);
348 if (filesize < reqsize) {
349 f.close();
350 throw RipleyException("readBinaryGrid(): not enough data in file");
351 }
352
353 // check if this rank contributes anything
354 if (params.first[0] >= m_offset[0]+myN0 ||
355 params.first[0]+params.numValues[0] <= m_offset[0] ||
356 params.first[1] >= m_offset[1]+myN1 ||
357 params.first[1]+params.numValues[1] <= m_offset[1]) {
358 f.close();
359 return;
360 }
361
362 // now determine how much this rank has to write
363
364 // first coordinates in data object to write to
365 const int first0 = max(0, params.first[0]-m_offset[0]);
366 const int first1 = max(0, params.first[1]-m_offset[1]);
367 // indices to first value in file
368 const int idx0 = max(0, m_offset[0]-params.first[0]);
369 const int idx1 = max(0, m_offset[1]-params.first[1]);
370 // number of values to read
371 const int num0 = min(params.numValues[0]-idx0, myN0-first0);
372 const int num1 = min(params.numValues[1]-idx1, myN1-first1);
373
374 out.requireWrite();
375 vector<ValueType> values(num0*numComp);
376 const int dpp = out.getNumDataPointsPerSample();
377
378 for (int y=0; y<num1; y++) {
379 const int fileofs = numComp*(idx0+(idx1+y)*params.numValues[0]);
380 f.seekg(fileofs*sizeof(ValueType));
381 f.read((char*)&values[0], num0*numComp*sizeof(ValueType));
382 for (int x=0; x<num0; x++) {
383 const int baseIndex = first0+x*params.multiplier[0]
384 +(first1+y*params.multiplier[1])*myN0;
385 for (int m1=0; m1<params.multiplier[1]; m1++) {
386 for (int m0=0; m0<params.multiplier[0]; m0++) {
387 const int dataIndex = baseIndex+m0+m1*myN0;
388 double* dest = out.getSampleDataRW(dataIndex);
389 for (int c=0; c<numComp; c++) {
390 ValueType val = values[x*numComp+c];
391
392 if (params.byteOrder != BYTEORDER_NATIVE) {
393 char* cval = reinterpret_cast<char*>(&val);
394 // this will alter val!!
395 byte_swap32(cval);
396 }
397 if (!std::isnan(val)) {
398 for (int q=0; q<dpp; q++) {
399 *dest++ = static_cast<double>(val);
400 }
401 }
402 }
403 }
404 }
405 }
406 }
407
408 f.close();
409 }
410
411 void Rectangle::writeBinaryGrid(const escript::Data& in, string filename,
412 int byteOrder, int dataType) const
413 {
414 // the mapping is not universally correct but should work on our
415 // supported platforms
416 switch (dataType) {
417 case DATATYPE_INT32:
418 writeBinaryGridImpl<int>(in, filename, byteOrder);
419 break;
420 case DATATYPE_FLOAT32:
421 writeBinaryGridImpl<float>(in, filename, byteOrder);
422 break;
423 case DATATYPE_FLOAT64:
424 writeBinaryGridImpl<double>(in, filename, byteOrder);
425 break;
426 default:
427 throw RipleyException("writeBinaryGrid(): invalid or unsupported datatype");
428 }
429 }
430
431 template<typename ValueType>
432 void Rectangle::writeBinaryGridImpl(const escript::Data& in,
433 const string& filename, int byteOrder) const
434 {
435 // check function space and determine number of points
436 int myN0, myN1;
437 int totalN0, totalN1;
438 if (in.getFunctionSpace().getTypeCode() == Nodes) {
439 myN0 = m_NN[0];
440 myN1 = m_NN[1];
441 totalN0 = m_gNE[0]+1;
442 totalN1 = m_gNE[1]+1;
443 } else if (in.getFunctionSpace().getTypeCode() == Elements ||
444 in.getFunctionSpace().getTypeCode() == ReducedElements) {
445 myN0 = m_NE[0];
446 myN1 = m_NE[1];
447 totalN0 = m_gNE[0];
448 totalN1 = m_gNE[1];
449 } else
450 throw RipleyException("writeBinaryGrid(): invalid function space of data object");
451
452 const int numComp = in.getDataPointSize();
453 const int dpp = in.getNumDataPointsPerSample();
454
455 if (numComp > 1 || dpp > 1)
456 throw RipleyException("writeBinaryGrid(): only scalar, single-value data supported");
457
458 const int fileSize = sizeof(ValueType)*numComp*dpp*totalN0*totalN1;
459
460 // from here on we know that each sample consists of one value
461 FileWriter fw;
462 fw.openFile(filename, fileSize);
463 MPIBarrier();
464
465 for (index_t y=0; y<myN1; y++) {
466 const int fileofs = (m_offset[0]+(m_offset[1]+y)*totalN0)*sizeof(ValueType);
467 ostringstream oss;
468
469 for (index_t x=0; x<myN0; x++) {
470 const double* sample = in.getSampleDataRO(y*myN0+x);
471 ValueType fvalue = static_cast<ValueType>(*sample);
472 if (byteOrder == BYTEORDER_NATIVE) {
473 oss.write((char*)&fvalue, sizeof(fvalue));
474 } else {
475 char* value = reinterpret_cast<char*>(&fvalue);
476 oss.write(byte_swap32(value), sizeof(fvalue));
477 }
478 }
479 fw.writeAt(oss, fileofs);
480 }
481 fw.close();
482 }
483
484 void Rectangle::dump(const string& fileName) const
485 {
486 #if USE_SILO
487 string fn(fileName);
488 if (fileName.length() < 6 || fileName.compare(fileName.length()-5, 5, ".silo") != 0) {
489 fn+=".silo";
490 }
491
492 int driver=DB_HDF5;
493 DBfile* dbfile = NULL;
494 const char* blockDirFmt = "/block%04d";
495
496 #ifdef ESYS_MPI
497 PMPIO_baton_t* baton = NULL;
498 const int NUM_SILO_FILES = 1;
499 #endif
500
501 if (m_mpiInfo->size > 1) {
502 #ifdef ESYS_MPI
503 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
504 0x1337, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
505 PMPIO_DefaultClose, (void*)&driver);
506 // try the fallback driver in case of error
507 if (!baton && driver != DB_PDB) {
508 driver = DB_PDB;
509 baton = PMPIO_Init(NUM_SILO_FILES, PMPIO_WRITE, m_mpiInfo->comm,
510 0x1338, PMPIO_DefaultCreate, PMPIO_DefaultOpen,
511 PMPIO_DefaultClose, (void*)&driver);
512 }
513 if (baton) {
514 char siloPath[64];
515 snprintf(siloPath, 64, blockDirFmt, PMPIO_RankInGroup(baton, m_mpiInfo->rank));
516 dbfile = (DBfile*) PMPIO_WaitForBaton(baton, fn.c_str(), siloPath);
517 }
518 #endif
519 } else {
520 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
521 getDescription().c_str(), driver);
522 // try the fallback driver in case of error
523 if (!dbfile && driver != DB_PDB) {
524 driver = DB_PDB;
525 dbfile = DBCreate(fn.c_str(), DB_CLOBBER, DB_LOCAL,
526 getDescription().c_str(), driver);
527 }
528 char siloPath[64];
529 snprintf(siloPath, 64, blockDirFmt, 0);
530 DBMkDir(dbfile, siloPath);
531 DBSetDir(dbfile, siloPath);
532 }
533
534 if (!dbfile)
535 throw RipleyException("dump: Could not create Silo file");
536
537 /*
538 if (driver==DB_HDF5) {
539 // gzip level 1 already provides good compression with minimal
540 // performance penalty. Some tests showed that gzip levels >3 performed
541 // rather badly on escript data both in terms of time and space
542 DBSetCompression("ERRMODE=FALLBACK METHOD=GZIP LEVEL=1");
543 }
544 */
545
546 boost::scoped_ptr<double> x(new double[m_NN[0]]);
547 boost::scoped_ptr<double> y(new double[m_NN[1]]);
548 double* coords[2] = { x.get(), y.get() };
549 #pragma omp parallel
550 {
551 #pragma omp for nowait
552 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
553 coords[0][i0]=getLocalCoordinate(i0, 0);
554 }
555 #pragma omp for nowait
556 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
557 coords[1][i1]=getLocalCoordinate(i1, 1);
558 }
559 }
560 int* dims = const_cast<int*>(getNumNodesPerDim());
561
562 // write mesh
563 DBPutQuadmesh(dbfile, "mesh", NULL, coords, dims, 2, DB_DOUBLE,
564 DB_COLLINEAR, NULL);
565
566 // write node ids
567 DBPutQuadvar1(dbfile, "nodeId", "mesh", (void*)&m_nodeId[0], dims, 2,
568 NULL, 0, DB_INT, DB_NODECENT, NULL);
569
570 // write element ids
571 dims = const_cast<int*>(getNumElementsPerDim());
572 DBPutQuadvar1(dbfile, "elementId", "mesh", (void*)&m_elementId[0],
573 dims, 2, NULL, 0, DB_INT, DB_ZONECENT, NULL);
574
575 // rank 0 writes multimesh and multivar
576 if (m_mpiInfo->rank == 0) {
577 vector<string> tempstrings;
578 vector<char*> names;
579 for (dim_t i=0; i<m_mpiInfo->size; i++) {
580 stringstream path;
581 path << "/block" << setw(4) << setfill('0') << right << i << "/mesh";
582 tempstrings.push_back(path.str());
583 names.push_back((char*)tempstrings.back().c_str());
584 }
585 vector<int> types(m_mpiInfo->size, DB_QUAD_RECT);
586 DBSetDir(dbfile, "/");
587 DBPutMultimesh(dbfile, "multimesh", m_mpiInfo->size, &names[0],
588 &types[0], NULL);
589 tempstrings.clear();
590 names.clear();
591 for (dim_t i=0; i<m_mpiInfo->size; i++) {
592 stringstream path;
593 path << "/block" << setw(4) << setfill('0') << right << i << "/nodeId";
594 tempstrings.push_back(path.str());
595 names.push_back((char*)tempstrings.back().c_str());
596 }
597 types.assign(m_mpiInfo->size, DB_QUADVAR);
598 DBPutMultivar(dbfile, "nodeId", m_mpiInfo->size, &names[0],
599 &types[0], NULL);
600 tempstrings.clear();
601 names.clear();
602 for (dim_t i=0; i<m_mpiInfo->size; i++) {
603 stringstream path;
604 path << "/block" << setw(4) << setfill('0') << right << i << "/elementId";
605 tempstrings.push_back(path.str());
606 names.push_back((char*)tempstrings.back().c_str());
607 }
608 DBPutMultivar(dbfile, "elementId", m_mpiInfo->size, &names[0],
609 &types[0], NULL);
610 }
611
612 if (m_mpiInfo->size > 1) {
613 #ifdef ESYS_MPI
614 PMPIO_HandOffBaton(baton, dbfile);
615 PMPIO_Finish(baton);
616 #endif
617 } else {
618 DBClose(dbfile);
619 }
620
621 #else // USE_SILO
622 throw RipleyException("dump: no Silo support");
623 #endif
624 }
625
626 const int* Rectangle::borrowSampleReferenceIDs(int fsType) const
627 {
628 switch (fsType) {
629 case Nodes:
630 case ReducedNodes: // FIXME: reduced
631 return &m_nodeId[0];
632 case DegreesOfFreedom:
633 case ReducedDegreesOfFreedom: // FIXME: reduced
634 return &m_dofId[0];
635 case Elements:
636 case ReducedElements:
637 return &m_elementId[0];
638 case FaceElements:
639 case ReducedFaceElements:
640 return &m_faceId[0];
641 case Points:
642 return &m_diracPointNodeIDs[0];
643 default:
644 break;
645 }
646
647 stringstream msg;
648 msg << "borrowSampleReferenceIDs: invalid function space type " << fsType;
649 throw RipleyException(msg.str());
650 }
651
652 bool Rectangle::ownSample(int fsType, index_t id) const
653 {
654 if (getMPISize()==1)
655 return true;
656
657 switch (fsType) {
658 case Nodes:
659 case ReducedNodes: // FIXME: reduced
660 return (m_dofMap[id] < getNumDOF());
661 case DegreesOfFreedom:
662 case ReducedDegreesOfFreedom:
663 return true;
664 case Elements:
665 case ReducedElements:
666 // check ownership of element's bottom left node
667 return (m_dofMap[id%m_NE[0]+m_NN[0]*(id/m_NE[0])] < getNumDOF());
668 case FaceElements:
669 case ReducedFaceElements:
670 {
671 // determine which face the sample belongs to before
672 // checking ownership of corresponding element's first node
673 dim_t n=0;
674 for (size_t i=0; i<4; i++) {
675 n+=m_faceCount[i];
676 if (id<n) {
677 index_t k;
678 if (i==1)
679 k=m_NN[0]-2;
680 else if (i==3)
681 k=m_NN[0]*(m_NN[1]-2);
682 else
683 k=0;
684 // determine whether to move right or up
685 const index_t delta=(i/2==0 ? m_NN[0] : 1);
686 return (m_dofMap[k+(id-n+m_faceCount[i])*delta] < getNumDOF());
687 }
688 }
689 return false;
690 }
691 default:
692 break;
693 }
694
695 stringstream msg;
696 msg << "ownSample: invalid function space type " << fsType;
697 throw RipleyException(msg.str());
698 }
699
700 void Rectangle::setToNormal(escript::Data& out) const
701 {
702 if (out.getFunctionSpace().getTypeCode() == FaceElements) {
703 out.requireWrite();
704 #pragma omp parallel
705 {
706 if (m_faceOffset[0] > -1) {
707 #pragma omp for nowait
708 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
709 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
710 // set vector at two quadrature points
711 *o++ = -1.;
712 *o++ = 0.;
713 *o++ = -1.;
714 *o = 0.;
715 }
716 }
717
718 if (m_faceOffset[1] > -1) {
719 #pragma omp for nowait
720 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
721 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
722 // set vector at two quadrature points
723 *o++ = 1.;
724 *o++ = 0.;
725 *o++ = 1.;
726 *o = 0.;
727 }
728 }
729
730 if (m_faceOffset[2] > -1) {
731 #pragma omp for nowait
732 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
733 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
734 // set vector at two quadrature points
735 *o++ = 0.;
736 *o++ = -1.;
737 *o++ = 0.;
738 *o = -1.;
739 }
740 }
741
742 if (m_faceOffset[3] > -1) {
743 #pragma omp for nowait
744 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
745 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
746 // set vector at two quadrature points
747 *o++ = 0.;
748 *o++ = 1.;
749 *o++ = 0.;
750 *o = 1.;
751 }
752 }
753 } // end of parallel section
754 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
755 out.requireWrite();
756 #pragma omp parallel
757 {
758 if (m_faceOffset[0] > -1) {
759 #pragma omp for nowait
760 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
761 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
762 *o++ = -1.;
763 *o = 0.;
764 }
765 }
766
767 if (m_faceOffset[1] > -1) {
768 #pragma omp for nowait
769 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
770 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
771 *o++ = 1.;
772 *o = 0.;
773 }
774 }
775
776 if (m_faceOffset[2] > -1) {
777 #pragma omp for nowait
778 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
779 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
780 *o++ = 0.;
781 *o = -1.;
782 }
783 }
784
785 if (m_faceOffset[3] > -1) {
786 #pragma omp for nowait
787 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
788 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
789 *o++ = 0.;
790 *o = 1.;
791 }
792 }
793 } // end of parallel section
794
795 } else {
796 stringstream msg;
797 msg << "setToNormal: invalid function space type "
798 << out.getFunctionSpace().getTypeCode();
799 throw RipleyException(msg.str());
800 }
801 }
802
803 void Rectangle::setToSize(escript::Data& out) const
804 {
805 if (out.getFunctionSpace().getTypeCode() == Elements
806 || out.getFunctionSpace().getTypeCode() == ReducedElements) {
807 out.requireWrite();
808 const dim_t numQuad=out.getNumDataPointsPerSample();
809 const double size=sqrt(m_dx[0]*m_dx[0]+m_dx[1]*m_dx[1]);
810 #pragma omp parallel for
811 for (index_t k = 0; k < getNumElements(); ++k) {
812 double* o = out.getSampleDataRW(k);
813 fill(o, o+numQuad, size);
814 }
815 } else if (out.getFunctionSpace().getTypeCode() == FaceElements
816 || out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
817 out.requireWrite();
818 const dim_t numQuad=out.getNumDataPointsPerSample();
819 #pragma omp parallel
820 {
821 if (m_faceOffset[0] > -1) {
822 #pragma omp for nowait
823 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
824 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
825 fill(o, o+numQuad, m_dx[1]);
826 }
827 }
828
829 if (m_faceOffset[1] > -1) {
830 #pragma omp for nowait
831 for (index_t k1 = 0; k1 < m_NE[1]; ++k1) {
832 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
833 fill(o, o+numQuad, m_dx[1]);
834 }
835 }
836
837 if (m_faceOffset[2] > -1) {
838 #pragma omp for nowait
839 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
840 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
841 fill(o, o+numQuad, m_dx[0]);
842 }
843 }
844
845 if (m_faceOffset[3] > -1) {
846 #pragma omp for nowait
847 for (index_t k0 = 0; k0 < m_NE[0]; ++k0) {
848 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
849 fill(o, o+numQuad, m_dx[0]);
850 }
851 }
852 } // end of parallel section
853
854 } else {
855 stringstream msg;
856 msg << "setToSize: invalid function space type "
857 << out.getFunctionSpace().getTypeCode();
858 throw RipleyException(msg.str());
859 }
860 }
861
862 void Rectangle::Print_Mesh_Info(const bool full) const
863 {
864 RipleyDomain::Print_Mesh_Info(full);
865 if (full) {
866 cout << " Id Coordinates" << endl;
867 cout.precision(15);
868 cout.setf(ios::scientific, ios::floatfield);
869 for (index_t i=0; i < getNumNodes(); i++) {
870 cout << " " << setw(5) << m_nodeId[i]
871 << " " << getLocalCoordinate(i%m_NN[0], 0)
872 << " " << getLocalCoordinate(i/m_NN[0], 1) << endl;
873 }
874 }
875 }
876
877
878 //protected
879 void Rectangle::assembleCoordinates(escript::Data& arg) const
880 {
881 escriptDataC x = arg.getDataC();
882 int numDim = m_numDim;
883 if (!isDataPointShapeEqual(&x, 1, &numDim))
884 throw RipleyException("setToX: Invalid Data object shape");
885 if (!numSamplesEqual(&x, 1, getNumNodes()))
886 throw RipleyException("setToX: Illegal number of samples in Data object");
887
888 arg.requireWrite();
889 #pragma omp parallel for
890 for (dim_t i1 = 0; i1 < m_NN[1]; i1++) {
891 for (dim_t i0 = 0; i0 < m_NN[0]; i0++) {
892 double* point = arg.getSampleDataRW(i0+m_NN[0]*i1);
893 point[0] = getLocalCoordinate(i0, 0);
894 point[1] = getLocalCoordinate(i1, 1);
895 }
896 }
897 }
898
899 //protected
900 void Rectangle::assembleGradient(escript::Data& out, const escript::Data& in) const
901 {
902 const dim_t numComp = in.getDataPointSize();
903 const double cx0 = .21132486540518711775/m_dx[0];
904 const double cx1 = .78867513459481288225/m_dx[0];
905 const double cx2 = 1./m_dx[0];
906 const double cy0 = .21132486540518711775/m_dx[1];
907 const double cy1 = .78867513459481288225/m_dx[1];
908 const double cy2 = 1./m_dx[1];
909
910 if (out.getFunctionSpace().getTypeCode() == Elements) {
911 out.requireWrite();
912 #pragma omp parallel
913 {
914 vector<double> f_00(numComp);
915 vector<double> f_01(numComp);
916 vector<double> f_10(numComp);
917 vector<double> f_11(numComp);
918 #pragma omp for
919 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
920 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
921 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
922 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
923 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
924 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
925 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
926 for (index_t i=0; i < numComp; ++i) {
927 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
928 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
929 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
930 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
931 o[INDEX3(i,0,2,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
932 o[INDEX3(i,1,2,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
933 o[INDEX3(i,0,3,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
934 o[INDEX3(i,1,3,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
935 } // end of component loop i
936 } // end of k0 loop
937 } // end of k1 loop
938 } // end of parallel section
939 } else if (out.getFunctionSpace().getTypeCode() == ReducedElements) {
940 out.requireWrite();
941 #pragma omp parallel
942 {
943 vector<double> f_00(numComp);
944 vector<double> f_01(numComp);
945 vector<double> f_10(numComp);
946 vector<double> f_11(numComp);
947 #pragma omp for
948 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
949 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
950 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
951 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
952 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
953 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
954 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
955 for (index_t i=0; i < numComp; ++i) {
956 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
957 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
958 } // end of component loop i
959 } // end of k0 loop
960 } // end of k1 loop
961 } // end of parallel section
962 } else if (out.getFunctionSpace().getTypeCode() == FaceElements) {
963 out.requireWrite();
964 #pragma omp parallel
965 {
966 vector<double> f_00(numComp);
967 vector<double> f_01(numComp);
968 vector<double> f_10(numComp);
969 vector<double> f_11(numComp);
970 if (m_faceOffset[0] > -1) {
971 #pragma omp for nowait
972 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
973 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
974 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
975 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double));
976 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double));
977 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
978 for (index_t i=0; i < numComp; ++i) {
979 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
980 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2;
981 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
982 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy2;
983 } // end of component loop i
984 } // end of k1 loop
985 } // end of face 0
986 if (m_faceOffset[1] > -1) {
987 #pragma omp for nowait
988 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
989 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double));
990 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double));
991 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
992 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
993 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
994 for (index_t i=0; i < numComp; ++i) {
995 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx1 + (f_11[i]-f_01[i])*cx0;
996 o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2;
997 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx0 + (f_11[i]-f_01[i])*cx1;
998 o[INDEX3(i,1,1,numComp,2)] = (f_11[i]-f_10[i])*cy2;
999 } // end of component loop i
1000 } // end of k1 loop
1001 } // end of face 1
1002 if (m_faceOffset[2] > -1) {
1003 #pragma omp for nowait
1004 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1005 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1006 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double));
1007 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1008 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double));
1009 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1010 for (index_t i=0; i < numComp; ++i) {
1011 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1012 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1013 o[INDEX3(i,0,1,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1014 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1015 } // end of component loop i
1016 } // end of k0 loop
1017 } // end of face 2
1018 if (m_faceOffset[3] > -1) {
1019 #pragma omp for nowait
1020 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1021 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1022 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1023 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1024 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1025 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1026 for (index_t i=0; i < numComp; ++i) {
1027 o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1028 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy1 + (f_11[i]-f_10[i])*cy0;
1029 o[INDEX3(i,0,1,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1030 o[INDEX3(i,1,1,numComp,2)] = (f_01[i]-f_00[i])*cy0 + (f_11[i]-f_10[i])*cy1;
1031 } // end of component loop i
1032 } // end of k0 loop
1033 } // end of face 3
1034 } // end of parallel section
1035
1036 } else if (out.getFunctionSpace().getTypeCode() == ReducedFaceElements) {
1037 out.requireWrite();
1038 #pragma omp parallel
1039 {
1040 vector<double> f_00(numComp);
1041 vector<double> f_01(numComp);
1042 vector<double> f_10(numComp);
1043 vector<double> f_11(numComp);
1044 if (m_faceOffset[0] > -1) {
1045 #pragma omp for nowait
1046 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1047 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1048 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1049 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(1,k1, m_NN[0])), numComp*sizeof(double));
1050 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(1,k1+1, m_NN[0])), numComp*sizeof(double));
1051 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1052 for (index_t i=0; i < numComp; ++i) {
1053 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1054 o[INDEX3(i,1,0,numComp,2)] = (f_01[i]-f_00[i])*cy2;
1055 } // end of component loop i
1056 } // end of k1 loop
1057 } // end of face 0
1058 if (m_faceOffset[1] > -1) {
1059 #pragma omp for nowait
1060 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1061 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1, m_NN[0])), numComp*sizeof(double));
1062 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(m_NN[0]-2,k1+1, m_NN[0])), numComp*sizeof(double));
1063 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1064 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1065 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1066 for (index_t i=0; i < numComp; ++i) {
1067 o[INDEX3(i,0,0,numComp,2)] = (f_10[i] + f_11[i] - f_00[i] - f_01[i])*cx2/2;
1068 o[INDEX3(i,1,0,numComp,2)] = (f_11[i]-f_10[i])*cy2;
1069 } // end of component loop i
1070 } // end of k1 loop
1071 } // end of face 1
1072 if (m_faceOffset[2] > -1) {
1073 #pragma omp for nowait
1074 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1075 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1076 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,1, m_NN[0])), numComp*sizeof(double));
1077 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1078 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,1, m_NN[0])), numComp*sizeof(double));
1079 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1080 for (index_t i=0; i < numComp; ++i) {
1081 o[INDEX3(i,0,0,numComp,2)] = (f_10[i]-f_00[i])*cx2;
1082 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1083 } // end of component loop i
1084 } // end of k0 loop
1085 } // end of face 2
1086 if (m_faceOffset[3] > -1) {
1087 #pragma omp for nowait
1088 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1089 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1090 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1091 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-2, m_NN[0])), numComp*sizeof(double));
1092 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1093 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1094 for (index_t i=0; i < numComp; ++i) {
1095 o[INDEX3(i,0,0,numComp,2)] = (f_11[i]-f_01[i])*cx2;
1096 o[INDEX3(i,1,0,numComp,2)] = (f_01[i] + f_11[i] - f_00[i] - f_10[i])*cy2/2;
1097 } // end of component loop i
1098 } // end of k0 loop
1099 } // end of face 3
1100 } // end of parallel section
1101 }
1102 }
1103
1104 //protected
1105 void Rectangle::assembleIntegrate(vector<double>& integrals,
1106 const escript::Data& arg) const
1107 {
1108 const dim_t numComp = arg.getDataPointSize();
1109 const index_t left = (m_offset[0]==0 ? 0 : 1);
1110 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1111 const int fs=arg.getFunctionSpace().getTypeCode();
1112 if (fs == Elements && arg.actsExpanded()) {
1113 #pragma omp parallel
1114 {
1115 vector<double> int_local(numComp, 0);
1116 const double w = m_dx[0]*m_dx[1]/4.;
1117 #pragma omp for nowait
1118 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1119 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1120 const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0]));
1121 for (index_t i=0; i < numComp; ++i) {
1122 const double f0 = f[INDEX2(i,0,numComp)];
1123 const double f1 = f[INDEX2(i,1,numComp)];
1124 const double f2 = f[INDEX2(i,2,numComp)];
1125 const double f3 = f[INDEX2(i,3,numComp)];
1126 int_local[i]+=(f0+f1+f2+f3)*w;
1127 } // end of component loop i
1128 } // end of k0 loop
1129 } // end of k1 loop
1130 #pragma omp critical
1131 for (index_t i=0; i<numComp; i++)
1132 integrals[i]+=int_local[i];
1133 } // end of parallel section
1134
1135 } else if (fs==ReducedElements || (fs==Elements && !arg.actsExpanded())) {
1136 const double w = m_dx[0]*m_dx[1];
1137 #pragma omp parallel
1138 {
1139 vector<double> int_local(numComp, 0);
1140 #pragma omp for nowait
1141 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1142 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1143 const double* f = arg.getSampleDataRO(INDEX2(k0, k1, m_NE[0]));
1144 for (index_t i=0; i < numComp; ++i) {
1145 int_local[i]+=f[i]*w;
1146 }
1147 }
1148 }
1149 #pragma omp critical
1150 for (index_t i=0; i<numComp; i++)
1151 integrals[i]+=int_local[i];
1152 } // end of parallel section
1153
1154 } else if (fs == FaceElements && arg.actsExpanded()) {
1155 #pragma omp parallel
1156 {
1157 vector<double> int_local(numComp, 0);
1158 const double w0 = m_dx[0]/2.;
1159 const double w1 = m_dx[1]/2.;
1160 if (m_faceOffset[0] > -1) {
1161 #pragma omp for nowait
1162 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1163 const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1);
1164 for (index_t i=0; i < numComp; ++i) {
1165 const double f0 = f[INDEX2(i,0,numComp)];
1166 const double f1 = f[INDEX2(i,1,numComp)];
1167 int_local[i]+=(f0+f1)*w1;
1168 } // end of component loop i
1169 } // end of k1 loop
1170 }
1171
1172 if (m_faceOffset[1] > -1) {
1173 #pragma omp for nowait
1174 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1175 const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1);
1176 for (index_t i=0; i < numComp; ++i) {
1177 const double f0 = f[INDEX2(i,0,numComp)];
1178 const double f1 = f[INDEX2(i,1,numComp)];
1179 int_local[i]+=(f0+f1)*w1;
1180 } // end of component loop i
1181 } // end of k1 loop
1182 }
1183
1184 if (m_faceOffset[2] > -1) {
1185 #pragma omp for nowait
1186 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1187 const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0);
1188 for (index_t i=0; i < numComp; ++i) {
1189 const double f0 = f[INDEX2(i,0,numComp)];
1190 const double f1 = f[INDEX2(i,1,numComp)];
1191 int_local[i]+=(f0+f1)*w0;
1192 } // end of component loop i
1193 } // end of k0 loop
1194 }
1195
1196 if (m_faceOffset[3] > -1) {
1197 #pragma omp for nowait
1198 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1199 const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0);
1200 for (index_t i=0; i < numComp; ++i) {
1201 const double f0 = f[INDEX2(i,0,numComp)];
1202 const double f1 = f[INDEX2(i,1,numComp)];
1203 int_local[i]+=(f0+f1)*w0;
1204 } // end of component loop i
1205 } // end of k0 loop
1206 }
1207 #pragma omp critical
1208 for (index_t i=0; i<numComp; i++)
1209 integrals[i]+=int_local[i];
1210 } // end of parallel section
1211
1212 } else if (fs==ReducedFaceElements || (fs==FaceElements && !arg.actsExpanded())) {
1213 #pragma omp parallel
1214 {
1215 vector<double> int_local(numComp, 0);
1216 if (m_faceOffset[0] > -1) {
1217 #pragma omp for nowait
1218 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1219 const double* f = arg.getSampleDataRO(m_faceOffset[0]+k1);
1220 for (index_t i=0; i < numComp; ++i) {
1221 int_local[i]+=f[i]*m_dx[1];
1222 }
1223 }
1224 }
1225
1226 if (m_faceOffset[1] > -1) {
1227 #pragma omp for nowait
1228 for (index_t k1 = bottom; k1 < bottom+m_ownNE[1]; ++k1) {
1229 const double* f = arg.getSampleDataRO(m_faceOffset[1]+k1);
1230 for (index_t i=0; i < numComp; ++i) {
1231 int_local[i]+=f[i]*m_dx[1];
1232 }
1233 }
1234 }
1235
1236 if (m_faceOffset[2] > -1) {
1237 #pragma omp for nowait
1238 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1239 const double* f = arg.getSampleDataRO(m_faceOffset[2]+k0);
1240 for (index_t i=0; i < numComp; ++i) {
1241 int_local[i]+=f[i]*m_dx[0];
1242 }
1243 }
1244 }
1245
1246 if (m_faceOffset[3] > -1) {
1247 #pragma omp for nowait
1248 for (index_t k0 = left; k0 < left+m_ownNE[0]; ++k0) {
1249 const double* f = arg.getSampleDataRO(m_faceOffset[3]+k0);
1250 for (index_t i=0; i < numComp; ++i) {
1251 int_local[i]+=f[i]*m_dx[0];
1252 }
1253 }
1254 }
1255
1256 #pragma omp critical
1257 for (index_t i=0; i<numComp; i++)
1258 integrals[i]+=int_local[i];
1259 } // end of parallel section
1260 } // function space selector
1261 }
1262
1263 //protected
1264 dim_t Rectangle::insertNeighbourNodes(IndexVector& index, index_t node) const
1265 {
1266 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1267 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1268 const int x=node%nDOF0;
1269 const int y=node/nDOF0;
1270 dim_t num=0;
1271 // loop through potential neighbours and add to index if positions are
1272 // within bounds
1273 for (int i1=-1; i1<2; i1++) {
1274 for (int i0=-1; i0<2; i0++) {
1275 // skip node itself
1276 if (i0==0 && i1==0)
1277 continue;
1278 // location of neighbour node
1279 const int nx=x+i0;
1280 const int ny=y+i1;
1281 if (nx>=0 && ny>=0 && nx<nDOF0 && ny<nDOF1) {
1282 index.push_back(ny*nDOF0+nx);
1283 num++;
1284 }
1285 }
1286 }
1287
1288 return num;
1289 }
1290
1291 //protected
1292 void Rectangle::nodesToDOF(escript::Data& out, const escript::Data& in) const
1293 {
1294 const dim_t numComp = in.getDataPointSize();
1295 out.requireWrite();
1296
1297 const index_t left = (m_offset[0]==0 ? 0 : 1);
1298 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1299 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1300 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1301 #pragma omp parallel for
1302 for (index_t i=0; i<nDOF1; i++) {
1303 for (index_t j=0; j<nDOF0; j++) {
1304 const index_t n=j+left+(i+bottom)*m_NN[0];
1305 const double* src=in.getSampleDataRO(n);
1306 copy(src, src+numComp, out.getSampleDataRW(j+i*nDOF0));
1307 }
1308 }
1309 }
1310
1311 //protected
1312 void Rectangle::dofToNodes(escript::Data& out, const escript::Data& in) const
1313 {
1314 const dim_t numComp = in.getDataPointSize();
1315 Paso_Coupler* coupler = Paso_Coupler_alloc(m_connector, numComp);
1316 // expand data object if necessary to be able to grab the whole data
1317 const_cast<escript::Data*>(&in)->expand();
1318 Paso_Coupler_startCollect(coupler, in.getSampleDataRO(0));
1319
1320 const dim_t numDOF = getNumDOF();
1321 out.requireWrite();
1322 const double* buffer = Paso_Coupler_finishCollect(coupler);
1323
1324 #pragma omp parallel for
1325 for (index_t i=0; i<getNumNodes(); i++) {
1326 const double* src=(m_dofMap[i]<numDOF ?
1327 in.getSampleDataRO(m_dofMap[i])
1328 : &buffer[(m_dofMap[i]-numDOF)*numComp]);
1329 copy(src, src+numComp, out.getSampleDataRW(i));
1330 }
1331 Paso_Coupler_free(coupler);
1332 }
1333
1334 //private
1335 void Rectangle::populateSampleIds()
1336 {
1337 // degrees of freedom are numbered from left to right, bottom to top in
1338 // each rank, continuing on the next rank (ranks also go left-right,
1339 // bottom-top).
1340 // This means rank 0 has id 0...n0-1, rank 1 has id n0...n1-1 etc. which
1341 // helps when writing out data rank after rank.
1342
1343 // build node distribution vector first.
1344 // rank i owns m_nodeDistribution[i+1]-nodeDistribution[i] nodes which is
1345 // constant for all ranks in this implementation
1346 m_nodeDistribution.assign(m_mpiInfo->size+1, 0);
1347 const dim_t numDOF=getNumDOF();
1348 for (dim_t k=1; k<m_mpiInfo->size; k++) {
1349 m_nodeDistribution[k]=k*numDOF;
1350 }
1351 m_nodeDistribution[m_mpiInfo->size]=getNumDataPointsGlobal();
1352 m_nodeId.resize(getNumNodes());
1353 m_dofId.resize(numDOF);
1354 m_elementId.resize(getNumElements());
1355
1356 // populate face element counts
1357 //left
1358 if (m_offset[0]==0)
1359 m_faceCount[0]=m_NE[1];
1360 else
1361 m_faceCount[0]=0;
1362 //right
1363 if (m_mpiInfo->rank%m_NX[0]==m_NX[0]-1)
1364 m_faceCount[1]=m_NE[1];
1365 else
1366 m_faceCount[1]=0;
1367 //bottom
1368 if (m_offset[1]==0)
1369 m_faceCount[2]=m_NE[0];
1370 else
1371 m_faceCount[2]=0;
1372 //top
1373 if (m_mpiInfo->rank/m_NX[0]==m_NX[1]-1)
1374 m_faceCount[3]=m_NE[0];
1375 else
1376 m_faceCount[3]=0;
1377
1378 m_faceId.resize(getNumFaceElements());
1379
1380 const index_t left = (m_offset[0]==0 ? 0 : 1);
1381 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1382 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1383 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1384
1385 #define globalNodeId(x,y) \
1386 ((m_offset[0]+x)/nDOF0)*nDOF0*nDOF1+(m_offset[0]+x)%nDOF0 \
1387 + ((m_offset[1]+y)/nDOF1)*nDOF0*nDOF1*m_NX[0]+((m_offset[1]+y)%nDOF1)*nDOF0
1388
1389 // set corner id's outside the parallel region
1390 m_nodeId[0] = globalNodeId(0, 0);
1391 m_nodeId[m_NN[0]-1] = globalNodeId(m_NN[0]-1, 0);
1392 m_nodeId[m_NN[0]*(m_NN[1]-1)] = globalNodeId(0, m_NN[1]-1);
1393 m_nodeId[m_NN[0]*m_NN[1]-1] = globalNodeId(m_NN[0]-1,m_NN[1]-1);
1394 #undef globalNodeId
1395
1396 #pragma omp parallel
1397 {
1398 // populate degrees of freedom and own nodes (identical id)
1399 #pragma omp for nowait
1400 for (dim_t i=0; i<nDOF1; i++) {
1401 for (dim_t j=0; j<nDOF0; j++) {
1402 const index_t nodeIdx=j+left+(i+bottom)*m_NN[0];
1403 const index_t dofIdx=j+i*nDOF0;
1404 m_dofId[dofIdx] = m_nodeId[nodeIdx]
1405 = m_nodeDistribution[m_mpiInfo->rank]+dofIdx;
1406 }
1407 }
1408
1409 // populate the rest of the nodes (shared with other ranks)
1410 if (m_faceCount[0]==0) { // left column
1411 #pragma omp for nowait
1412 for (dim_t i=0; i<nDOF1; i++) {
1413 const index_t nodeIdx=(i+bottom)*m_NN[0];
1414 const index_t dofId=(i+1)*nDOF0-1;
1415 m_nodeId[nodeIdx]
1416 = m_nodeDistribution[m_mpiInfo->rank-1]+dofId;
1417 }
1418 }
1419 if (m_faceCount[1]==0) { // right column
1420 #pragma omp for nowait
1421 for (dim_t i=0; i<nDOF1; i++) {
1422 const index_t nodeIdx=(i+bottom+1)*m_NN[0]-1;
1423 const index_t dofId=i*nDOF0;
1424 m_nodeId[nodeIdx]
1425 = m_nodeDistribution[m_mpiInfo->rank+1]+dofId;
1426 }
1427 }
1428 if (m_faceCount[2]==0) { // bottom row
1429 #pragma omp for nowait
1430 for (dim_t i=0; i<nDOF0; i++) {
1431 const index_t nodeIdx=i+left;
1432 const index_t dofId=nDOF0*(nDOF1-1)+i;
1433 m_nodeId[nodeIdx]
1434 = m_nodeDistribution[m_mpiInfo->rank-m_NX[0]]+dofId;
1435 }
1436 }
1437 if (m_faceCount[3]==0) { // top row
1438 #pragma omp for nowait
1439 for (dim_t i=0; i<nDOF0; i++) {
1440 const index_t nodeIdx=m_NN[0]*(m_NN[1]-1)+i+left;
1441 const index_t dofId=i;
1442 m_nodeId[nodeIdx]
1443 = m_nodeDistribution[m_mpiInfo->rank+m_NX[0]]+dofId;
1444 }
1445 }
1446
1447 // populate element id's
1448 #pragma omp for nowait
1449 for (dim_t i1=0; i1<m_NE[1]; i1++) {
1450 for (dim_t i0=0; i0<m_NE[0]; i0++) {
1451 m_elementId[i0+i1*m_NE[0]]=(m_offset[1]+i1)*m_gNE[0]+m_offset[0]+i0;
1452 }
1453 }
1454
1455 // face elements
1456 #pragma omp for
1457 for (dim_t k=0; k<getNumFaceElements(); k++)
1458 m_faceId[k]=k;
1459 } // end parallel section
1460
1461 m_nodeTags.assign(getNumNodes(), 0);
1462 updateTagsInUse(Nodes);
1463
1464 m_elementTags.assign(getNumElements(), 0);
1465 updateTagsInUse(Elements);
1466
1467 // generate face offset vector and set face tags
1468 const index_t LEFT=1, RIGHT=2, BOTTOM=10, TOP=20;
1469 const index_t faceTag[] = { LEFT, RIGHT, BOTTOM, TOP };
1470 m_faceOffset.assign(4, -1);
1471 m_faceTags.clear();
1472 index_t offset=0;
1473 for (size_t i=0; i<4; i++) {
1474 if (m_faceCount[i]>0) {
1475 m_faceOffset[i]=offset;
1476 offset+=m_faceCount[i];
1477 m_faceTags.insert(m_faceTags.end(), m_faceCount[i], faceTag[i]);
1478 }
1479 }
1480 setTagMap("left", LEFT);
1481 setTagMap("right", RIGHT);
1482 setTagMap("bottom", BOTTOM);
1483 setTagMap("top", TOP);
1484 updateTagsInUse(FaceElements);
1485 }
1486
1487 //private
1488 void Rectangle::createPattern()
1489 {
1490 const dim_t nDOF0 = (m_gNE[0]+1)/m_NX[0];
1491 const dim_t nDOF1 = (m_gNE[1]+1)/m_NX[1];
1492 const index_t left = (m_offset[0]==0 ? 0 : 1);
1493 const index_t bottom = (m_offset[1]==0 ? 0 : 1);
1494
1495 // populate node->DOF mapping with own degrees of freedom.
1496 // The rest is assigned in the loop further down
1497 m_dofMap.assign(getNumNodes(), 0);
1498 #pragma omp parallel for
1499 for (index_t i=bottom; i<bottom+nDOF1; i++) {
1500 for (index_t j=left; j<left+nDOF0; j++) {
1501 m_dofMap[i*m_NN[0]+j]=(i-bottom)*nDOF0+j-left;
1502 }
1503 }
1504
1505 // build list of shared components and neighbours by looping through
1506 // all potential neighbouring ranks and checking if positions are
1507 // within bounds
1508 const dim_t numDOF=nDOF0*nDOF1;
1509 vector<IndexVector> colIndices(numDOF); // for the couple blocks
1510 RankVector neighbour;
1511 IndexVector offsetInShared(1,0);
1512 IndexVector sendShared, recvShared;
1513 int numShared=0;
1514 const int x=m_mpiInfo->rank%m_NX[0];
1515 const int y=m_mpiInfo->rank/m_NX[0];
1516 for (int i1=-1; i1<2; i1++) {
1517 for (int i0=-1; i0<2; i0++) {
1518 // skip this rank
1519 if (i0==0 && i1==0)
1520 continue;
1521 // location of neighbour rank
1522 const int nx=x+i0;
1523 const int ny=y+i1;
1524 if (nx>=0 && ny>=0 && nx<m_NX[0] && ny<m_NX[1]) {
1525 neighbour.push_back(ny*m_NX[0]+nx);
1526 if (i0==0) {
1527 // sharing top or bottom edge
1528 const int firstDOF=(i1==-1 ? 0 : numDOF-nDOF0);
1529 const int firstNode=(i1==-1 ? left : m_NN[0]*(m_NN[1]-1)+left);
1530 offsetInShared.push_back(offsetInShared.back()+nDOF0);
1531 for (dim_t i=0; i<nDOF0; i++, numShared++) {
1532 sendShared.push_back(firstDOF+i);
1533 recvShared.push_back(numDOF+numShared);
1534 if (i>0)
1535 colIndices[firstDOF+i-1].push_back(numShared);
1536 colIndices[firstDOF+i].push_back(numShared);
1537 if (i<nDOF0-1)
1538 colIndices[firstDOF+i+1].push_back(numShared);
1539 m_dofMap[firstNode+i]=numDOF+numShared;
1540 }
1541 } else if (i1==0) {
1542 // sharing left or right edge
1543 const int firstDOF=(i0==-1 ? 0 : nDOF0-1);
1544 const int firstNode=(i0==-1 ? bottom*m_NN[0] : (bottom+1)*m_NN[0]-1);
1545 offsetInShared.push_back(offsetInShared.back()+nDOF1);
1546 for (dim_t i=0; i<nDOF1; i++, numShared++) {
1547 sendShared.push_back(firstDOF+i*nDOF0);
1548 recvShared.push_back(numDOF+numShared);
1549 if (i>0)
1550 colIndices[firstDOF+(i-1)*nDOF0].push_back(numShared);
1551 colIndices[firstDOF+i*nDOF0].push_back(numShared);
1552 if (i<nDOF1-1)
1553 colIndices[firstDOF+(i+1)*nDOF0].push_back(numShared);
1554 m_dofMap[firstNode+i*m_NN[0]]=numDOF+numShared;
1555 }
1556 } else {
1557 // sharing a node
1558 const int dof=(i0+1)/2*(nDOF0-1)+(i1+1)/2*(numDOF-nDOF0);
1559 const int node=(i0+1)/2*(m_NN[0]-1)+(i1+1)/2*m_NN[0]*(m_NN[1]-1);
1560 offsetInShared.push_back(offsetInShared.back()+1);
1561 sendShared.push_back(dof);
1562 recvShared.push_back(numDOF+numShared);
1563 colIndices[dof].push_back(numShared);
1564 m_dofMap[node]=numDOF+numShared;
1565 ++numShared;
1566 }
1567 }
1568 }
1569 }
1570
1571 // create connector
1572 Paso_SharedComponents *snd_shcomp = Paso_SharedComponents_alloc(
1573 numDOF, neighbour.size(), &neighbour[0], &sendShared[0],
1574 &offsetInShared[0], 1, 0, m_mpiInfo);
1575 Paso_SharedComponents *rcv_shcomp = Paso_SharedComponents_alloc(
1576 numDOF, neighbour.size(), &neighbour[0], &recvShared[0],
1577 &offsetInShared[0], 1, 0, m_mpiInfo);
1578 m_connector = Paso_Connector_alloc(snd_shcomp, rcv_shcomp);
1579 Paso_SharedComponents_free(snd_shcomp);
1580 Paso_SharedComponents_free(rcv_shcomp);
1581
1582 // create main and couple blocks
1583 Paso_Pattern *mainPattern = createMainPattern();
1584 Paso_Pattern *colPattern, *rowPattern;
1585 createCouplePatterns(colIndices, numShared, &colPattern, &rowPattern);
1586
1587 // allocate paso distribution
1588 Paso_Distribution* distribution = Paso_Distribution_alloc(m_mpiInfo,
1589 const_cast<index_t*>(&m_nodeDistribution[0]), 1, 0);
1590
1591 // finally create the system matrix
1592 m_pattern = Paso_SystemMatrixPattern_alloc(MATRIX_FORMAT_DEFAULT,
1593 distribution, distribution, mainPattern, colPattern, rowPattern,
1594 m_connector, m_connector);
1595
1596 Paso_Distribution_free(distribution);
1597
1598 // useful debug output
1599 /*
1600 cout << "--- rcv_shcomp ---" << endl;
1601 cout << "numDOF=" << numDOF << ", numNeighbors=" << neighbour.size() << endl;
1602 for (size_t i=0; i<neighbour.size(); i++) {
1603 cout << "neighbor[" << i << "]=" << neighbour[i]
1604 << " offsetInShared[" << i+1 << "]=" << offsetInShared[i+1] << endl;
1605 }
1606 for (size_t i=0; i<recvShared.size(); i++) {
1607 cout << "shared[" << i << "]=" << recvShared[i] << endl;
1608 }
1609 cout << "--- snd_shcomp ---" << endl;
1610 for (size_t i=0; i<sendShared.size(); i++) {
1611 cout << "shared[" << i << "]=" << sendShared[i] << endl;
1612 }
1613 cout << "--- dofMap ---" << endl;
1614 for (size_t i=0; i<m_dofMap.size(); i++) {
1615 cout << "m_dofMap[" << i << "]=" << m_dofMap[i] << endl;
1616 }
1617 cout << "--- colIndices ---" << endl;
1618 for (size_t i=0; i<colIndices.size(); i++) {
1619 cout << "colIndices[" << i << "].size()=" << colIndices[i].size() << endl;
1620 }
1621 */
1622
1623 /*
1624 cout << "--- main_pattern ---" << endl;
1625 cout << "M=" << mainPattern->numOutput << ", N=" << mainPattern->numInput << endl;
1626 for (size_t i=0; i<mainPattern->numOutput+1; i++) {
1627 cout << "ptr[" << i << "]=" << mainPattern->ptr[i] << endl;
1628 }
1629 for (size_t i=0; i<mainPattern->ptr[mainPattern->numOutput]; i++) {
1630 cout << "index[" << i << "]=" << mainPattern->index[i] << endl;
1631 }
1632 */
1633
1634 /*
1635 cout << "--- colCouple_pattern ---" << endl;
1636 cout << "M=" << colPattern->numOutput << ", N=" << colPattern->numInput << endl;
1637 for (size_t i=0; i<colPattern->numOutput+1; i++) {
1638 cout << "ptr[" << i << "]=" << colPattern->ptr[i] << endl;
1639 }
1640 for (size_t i=0; i<colPattern->ptr[colPattern->numOutput]; i++) {
1641 cout << "index[" << i << "]=" << colPattern->index[i] << endl;
1642 }
1643 */
1644
1645 /*
1646 cout << "--- rowCouple_pattern ---" << endl;
1647 cout << "M=" << rowPattern->numOutput << ", N=" << rowPattern->numInput << endl;
1648 for (size_t i=0; i<rowPattern->numOutput+1; i++) {
1649 cout << "ptr[" << i << "]=" << rowPattern->ptr[i] << endl;
1650 }
1651 for (size_t i=0; i<rowPattern->ptr[rowPattern->numOutput]; i++) {
1652 cout << "index[" << i << "]=" << rowPattern->index[i] << endl;
1653 }
1654 */
1655
1656 Paso_Pattern_free(mainPattern);
1657 Paso_Pattern_free(colPattern);
1658 Paso_Pattern_free(rowPattern);
1659 }
1660
1661 //private
1662 void Rectangle::addToMatrixAndRHS(Paso_SystemMatrix* S, escript::Data& F,
1663 const vector<double>& EM_S, const vector<double>& EM_F, bool addS,
1664 bool addF, index_t firstNode, dim_t nEq, dim_t nComp) const
1665 {
1666 IndexVector rowIndex;
1667 rowIndex.push_back(m_dofMap[firstNode]);
1668 rowIndex.push_back(m_dofMap[firstNode+1]);
1669 rowIndex.push_back(m_dofMap[firstNode+m_NN[0]]);
1670 rowIndex.push_back(m_dofMap[firstNode+m_NN[0]+1]);
1671 if (addF) {
1672 double *F_p=F.getSampleDataRW(0);
1673 for (index_t i=0; i<rowIndex.size(); i++) {
1674 if (rowIndex[i]<getNumDOF()) {
1675 for (index_t eq=0; eq<nEq; eq++) {
1676 F_p[INDEX2(eq, rowIndex[i], nEq)]+=EM_F[INDEX2(eq,i,nEq)];
1677 }
1678 }
1679 }
1680 }
1681 if (addS) {
1682 addToSystemMatrix(S, rowIndex, nEq, rowIndex, nComp, EM_S);
1683 }
1684 }
1685
1686 //protected
1687 void Rectangle::interpolateNodesOnElements(escript::Data& out,
1688 const escript::Data& in,
1689 bool reduced) const
1690 {
1691 const dim_t numComp = in.getDataPointSize();
1692 if (reduced) {
1693 out.requireWrite();
1694 const double c0 = 0.25;
1695 #pragma omp parallel
1696 {
1697 vector<double> f_00(numComp);
1698 vector<double> f_01(numComp);
1699 vector<double> f_10(numComp);
1700 vector<double> f_11(numComp);
1701 #pragma omp for
1702 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1703 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1704 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1705 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1706 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1707 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1708 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1709 for (index_t i=0; i < numComp; ++i) {
1710 o[INDEX2(i,numComp,0)] = c0*(f_00[i] + f_01[i] + f_10[i] + f_11[i]);
1711 } /* end of component loop i */
1712 } /* end of k0 loop */
1713 } /* end of k1 loop */
1714 } /* end of parallel section */
1715 } else {
1716 out.requireWrite();
1717 const double c0 = 0.16666666666666666667;
1718 const double c1 = 0.044658198738520451079;
1719 const double c2 = 0.62200846792814621559;
1720 #pragma omp parallel
1721 {
1722 vector<double> f_00(numComp);
1723 vector<double> f_01(numComp);
1724 vector<double> f_10(numComp);
1725 vector<double> f_11(numComp);
1726 #pragma omp for
1727 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1728 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1729 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,k1, m_NN[0])), numComp*sizeof(double));
1730 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,k1+1, m_NN[0])), numComp*sizeof(double));
1731 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,k1, m_NN[0])), numComp*sizeof(double));
1732 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,k1+1, m_NN[0])), numComp*sizeof(double));
1733 double* o = out.getSampleDataRW(INDEX2(k0,k1,m_NE[0]));
1734 for (index_t i=0; i < numComp; ++i) {
1735 o[INDEX2(i,numComp,0)] = c0*(f_01[i] + f_10[i]) + c1*f_11[i] + c2*f_00[i];
1736 o[INDEX2(i,numComp,1)] = c0*(f_00[i] + f_11[i]) + c1*f_01[i] + c2*f_10[i];
1737 o[INDEX2(i,numComp,2)] = c0*(f_00[i] + f_11[i]) + c1*f_10[i] + c2*f_01[i];
1738 o[INDEX2(i,numComp,3)] = c0*(f_01[i] + f_10[i]) + c1*f_00[i] + c2*f_11[i];
1739 } /* end of component loop i */
1740 } /* end of k0 loop */
1741 } /* end of k1 loop */
1742 } /* end of parallel section */
1743 }
1744 }
1745
1746 //protected
1747 void Rectangle::interpolateNodesOnFaces(escript::Data& out,
1748 const escript::Data& in,
1749 bool reduced) const
1750 {
1751 const dim_t numComp = in.getDataPointSize();
1752 if (reduced) {
1753 out.requireWrite();
1754 #pragma omp parallel
1755 {
1756 vector<double> f_00(numComp);
1757 vector<double> f_01(numComp);
1758 vector<double> f_10(numComp);
1759 vector<double> f_11(numComp);
1760 if (m_faceOffset[0] > -1) {
1761 #pragma omp for nowait
1762 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1763 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1764 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1765 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1766 for (index_t i=0; i < numComp; ++i) {
1767 o[INDEX2(i,numComp,0)] = (f_00[i] + f_01[i])/2;
1768 } /* end of component loop i */
1769 } /* end of k1 loop */
1770 } /* end of face 0 */
1771 if (m_faceOffset[1] > -1) {
1772 #pragma omp for nowait
1773 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1774 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1775 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1776 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1777 for (index_t i=0; i < numComp; ++i) {
1778 o[INDEX2(i,numComp,0)] = (f_10[i] + f_11[i])/2;
1779 } /* end of component loop i */
1780 } /* end of k1 loop */
1781 } /* end of face 1 */
1782 if (m_faceOffset[2] > -1) {
1783 #pragma omp for nowait
1784 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1785 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1786 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1787 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1788 for (index_t i=0; i < numComp; ++i) {
1789 o[INDEX2(i,numComp,0)] = (f_00[i] + f_10[i])/2;
1790 } /* end of component loop i */
1791 } /* end of k0 loop */
1792 } /* end of face 2 */
1793 if (m_faceOffset[3] > -1) {
1794 #pragma omp for nowait
1795 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1796 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1797 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1798 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1799 for (index_t i=0; i < numComp; ++i) {
1800 o[INDEX2(i,numComp,0)] = (f_01[i] + f_11[i])/2;
1801 } /* end of component loop i */
1802 } /* end of k0 loop */
1803 } /* end of face 3 */
1804 } /* end of parallel section */
1805 } else {
1806 out.requireWrite();
1807 const double c0 = 0.21132486540518711775;
1808 const double c1 = 0.78867513459481288225;
1809 #pragma omp parallel
1810 {
1811 vector<double> f_00(numComp);
1812 vector<double> f_01(numComp);
1813 vector<double> f_10(numComp);
1814 vector<double> f_11(numComp);
1815 if (m_faceOffset[0] > -1) {
1816 #pragma omp for nowait
1817 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1818 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(0,k1, m_NN[0])), numComp*sizeof(double));
1819 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(0,k1+1, m_NN[0])), numComp*sizeof(double));
1820 double* o = out.getSampleDataRW(m_faceOffset[0]+k1);
1821 for (index_t i=0; i < numComp; ++i) {
1822 o[INDEX2(i,numComp,0)] = c0*f_01[i] + c1*f_00[i];
1823 o[INDEX2(i,numComp,1)] = c0*f_00[i] + c1*f_01[i];
1824 } /* end of component loop i */
1825 } /* end of k1 loop */
1826 } /* end of face 0 */
1827 if (m_faceOffset[1] > -1) {
1828 #pragma omp for nowait
1829 for (index_t k1=0; k1 < m_NE[1]; ++k1) {
1830 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1, m_NN[0])), numComp*sizeof(double));
1831 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(m_NN[0]-1,k1+1, m_NN[0])), numComp*sizeof(double));
1832 double* o = out.getSampleDataRW(m_faceOffset[1]+k1);
1833 for (index_t i=0; i < numComp; ++i) {
1834 o[INDEX2(i,numComp,0)] = c1*f_10[i] + c0*f_11[i];
1835 o[INDEX2(i,numComp,1)] = c1*f_11[i] + c0*f_10[i];
1836 } /* end of component loop i */
1837 } /* end of k1 loop */
1838 } /* end of face 1 */
1839 if (m_faceOffset[2] > -1) {
1840 #pragma omp for nowait
1841 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1842 memcpy(&f_00[0], in.getSampleDataRO(INDEX2(k0,0, m_NN[0])), numComp*sizeof(double));
1843 memcpy(&f_10[0], in.getSampleDataRO(INDEX2(k0+1,0, m_NN[0])), numComp*sizeof(double));
1844 double* o = out.getSampleDataRW(m_faceOffset[2]+k0);
1845 for (index_t i=0; i < numComp; ++i) {
1846 o[INDEX2(i,numComp,0)] = c0*f_10[i] + c1*f_00[i];
1847 o[INDEX2(i,numComp,1)] = c0*f_00[i] + c1*f_10[i];
1848 } /* end of component loop i */
1849 } /* end of k0 loop */
1850 } /* end of face 2 */
1851 if (m_faceOffset[3] > -1) {
1852 #pragma omp for nowait
1853 for (index_t k0=0; k0 < m_NE[0]; ++k0) {
1854 memcpy(&f_01[0], in.getSampleDataRO(INDEX2(k0,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1855 memcpy(&f_11[0], in.getSampleDataRO(INDEX2(k0+1,m_NN[1]-1, m_NN[0])), numComp*sizeof(double));
1856 double* o = out.getSampleDataRW(m_faceOffset[3]+k0);
1857 for (index_t i=0; i < numComp; ++i) {
1858 o[INDEX2(i,numComp,0)] = c0*f_11[i] + c1*f_01[i];
1859 o[INDEX2(i,numComp,1)] = c0*f_01[i] + c1*f_11[i];
1860 } /* end of component loop i */
1861 } /* end of k0 loop */
1862 } /* end of face 3 */
1863 } /* end of parallel section */
1864 }
1865 }
1866
1867
1868
1869 namespace
1870 {
1871 // Calculates a guassian blur colvolution matrix for 2D
1872 double* get2DGauss(unsigned radius, double sigma)
1873 {
1874 double* arr=new double[(radius*2+1)*(radius*2+1)];
1875 double common=M_1_PI*0.5*1/(sigma*sigma);
1876 double total=0;
1877 int r=static_cast<int>(radius);
1878 for (int y=-r;y<=r;++y)
1879 {
1880 for (int x=-r;x<=r;++x)
1881 {
1882 arr[(x+r)+(y+r)*(r*2+1)]=common*exp(-(x*x+y*y)/(2*sigma*sigma));
1883 // cout << (x+y*(r*2+1)) << " " << arr[(x+r)+(y+r)*(r*2+1)] << endl;
1884 total+=arr[(x+r)+(y+r)*(r*2+1)];
1885 }
1886 }
1887 double invtotal=1/total;
1888 //cout << "Inv total is " << invtotal << endl;
1889 for (size_t p=0;p<(radius*2+1)*(radius*2+1);++p)
1890 {
1891 arr[p]*=invtotal;
1892 //cout << p << "->" << arr[p] << endl;
1893 }
1894 return arr;
1895 }
1896
1897 // applies conv to source to get a point.
1898 // (xp, yp) are the coords in the source matrix not the destination matrix
1899 double Convolve2D(double* conv, double* source, size_t xp, size_t yp, unsigned radius, size_t width)
1900 {
1901 size_t bx=xp-radius, by=yp-radius;
1902 size_t sbase=bx+by*width;
1903 double result=0;
1904 for (int y=0;y<2*radius+1;++y)
1905 {
1906 for (int x=0;x<2*radius+1;++x)
1907 {
1908 result+=conv[x+y*(2*radius+1)] * source[sbase + x+y*width];
1909 }
1910 }
1911 return result;
1912 }
1913 }
1914
1915
1916
1917 /* This routine produces a Data object filled with smoothed random data.
1918 The dimensions of the rectangle being filled are internal[0] x internal[1] points.
1919 A parameter radius dives the size of the stencil used for the smoothing.
1920 A point on the left hand edge for example, will still require `radius` extra points to the left
1921 in order to complete the stencil.
1922
1923 All local calculation is done on an array called `src`, with
1924 dimensions = ext[0] * ext[1].
1925 Where ext[i]= internal[i]+2*radius.
1926
1927 Now for MPI there is overlap to deal with. We need to share both the overlapping
1928 values themselves but also the external region.
1929
1930 In a hypothetical 1-D case:
1931
1932
1933 1234567
1934 would be split into two ranks thus:
1935 123(4) (4)567 [4 being a shared element]
1936
1937 If the radius is 2. There will be padding elements on the outside:
1938
1939 pp123(4) (4)567pp
1940
1941 To ensure that 4 can be correctly computed on both ranks, values from the other rank
1942 need to be known.
1943
1944 pp123(4)56 23(4)567pp
1945
1946 Now in our case, we wout set all the values 23456 on the left rank and send them to the
1947 right hand rank.
1948
1949 So the edges _may_ need to be shared at a distance `inset` from all boundaries.
1950 inset=2*radius+1
1951 This is to ensure that values at distance `radius` from the shared/overlapped element
1952 that ripley has.
1953
1954
1955 */
1956 escript::Data Rectangle::randomFill(long seed, const boost::python::tuple& filter) const
1957 {
1958 if (m_numDim!=2)
1959 {
1960 throw RipleyException("Only 2D supported at this time.");
1961 }
1962 if (len(filter)!=3) {
1963 throw RipleyException("Unsupported random filter");
1964 }
1965 boost::python::extract<string> ex(filter[0]);
1966 if (!ex.check() || (ex()!="gaussian"))
1967 {
1968 throw RipleyException("Unsupported random filter");
1969 }
1970 boost::python::extract<unsigned int> ex1(filter[1]);
1971 if (!ex1.check())
1972 {
1973 throw RipleyException("Radius of gaussian filter must be a positive integer.");
1974 }
1975 unsigned int radius=ex1();
1976 double sigma=0.5;
1977 boost::python::extract<double> ex2(filter[2]);
1978 if (!ex2.check() || (sigma=ex2())<=0)
1979 {
1980 throw RipleyException("Sigma must be a postive floating point number.");
1981 }
1982
1983 size_t internal[2];
1984 internal[0]=m_NE[0]+1; // number of points in the internal region
1985 internal[1]=m_NE[1]+1; // that is, the ones we need smoothed versions of
1986 size_t ext[2];
1987 ext[0]=internal[0]+2*radius; // includes points we need as input
1988 ext[1]=internal[1]+2*radius; // for smoothing
1989
1990 // now we check to see if the radius is acceptable
1991 // That is, would not cross multiple ranks in MPI
1992
1993 if ((2*radius>=internal[0]) || (2*radius>=internal[1]))
1994 {
1995 throw RipleyException("Radius of gaussian filter must be less than half the width/height of a rank");
1996 }
1997
1998
1999 double* src=new double[ext[0]*ext[1]];
2000 esysUtils::randomFillArray(seed, src, ext[0]*ext[1]);
2001
2002
2003 #ifdef ESYS_MPI
2004 size_t inset=2*radius+1;
2005 size_t Eheight=ext[1]-2*inset; // how high the E (shared) region is
2006 size_t Swidth=ext[0]-2*inset;
2007
2008 double* SWin=new double[inset*inset]; memset(SWin, 0, inset*inset*sizeof(double));
2009 double* SEin=new double[inset*inset]; memset(SEin, 0, inset*inset*sizeof(double));
2010 double* NWin=new double[inset*inset]; memset(NWin, 0, inset*inset*sizeof(double));
2011 double* Sin=new double[inset*Swidth]; memset(Sin, 0, inset*Swidth*sizeof(double));
2012 double* Win=new double[inset*Eheight]; memset(Win, 0, inset*Eheight*sizeof(double));
2013
2014 double* NEout=new double[inset*inset]; memset(NEout, 0, inset*inset*sizeof(double));
2015 unsigned int base=ext[0]-inset+(ext[1]-inset)*ext[0];
2016 for (unsigned int i=0;i<inset;++i)
2017 {
2018 memcpy(NEout+inset*i, src+base, inset*sizeof(double));
2019 base+=ext[0];
2020 }
2021 double* NWout=new double[inset*inset]; memset(NWout, 0, inset*inset*sizeof(double));
2022 base=(ext[1]-inset)*ext[0];
2023 for (unsigned int i=0;i<inset;++i)
2024 {
2025 memcpy(NWout+inset*i, src+base, inset*sizeof(double));
2026 base+=ext[0];
2027 }
2028
2029 double* SEout=new double[inset*inset]; memset(SEout, 0, inset*inset*sizeof(double));
2030 base=ext[0]-inset;
2031 for (int i=0;i<inset;++i)
2032 {
2033 memcpy(SEout+inset*i, src+base, inset*sizeof(double));
2034 base+=ext[0];
2035 }
2036 double* Nout=new double[inset*Swidth]; memset(Nout, 0, inset*Swidth*sizeof(double));
2037 base=inset+(ext[1]-inset)*ext[0];
2038 for (unsigned int i=0;i<inset;++i)
2039 {
2040 memcpy(Nout+Swidth*i, src+base, Swidth*sizeof(double));
2041 base+=ext[0];
2042 }
2043
2044 double* Eout=new double[inset*Eheight]; memset(Eout, 0, inset*Eheight*sizeof(double));
2045 base=ext[0]-inset+inset*ext[0];
2046 for (unsigned int i=0;i<Eheight;++i)
2047 {
2048 memcpy(Eout+i*inset, src+base, inset*sizeof(double));
2049 base+=ext[0];
2050 }
2051
2052 MPI_Request reqs[10];
2053 MPI_Status stats[10];
2054 short rused=0;
2055
2056 dim_t X=m_mpiInfo->rank%m_NX[0];
2057 dim_t Y=m_mpiInfo->rank/m_NX[0];
2058 dim_t row=m_NX[0];
2059 bool swused=false; // These vars will be true if data needs to be copied out of them
2060 bool seused=false;
2061 bool nwused=false;
2062 bool sused=false;
2063 bool wused=false;
2064
2065 // Tags:
2066 // 10 : EW transfer (middle)
2067 // 8 : NS transfer (middle)
2068 // 7 : NE corner -> to N, E and NE
2069 // 11 : NW corner to SW corner (only used on the left hand edge
2070 // 12 : SE corner to SW corner (only used on the bottom edge
2071
2072
2073
2074 int comserr=0;
2075 if (Y!=0) // not on bottom row,
2076 {
2077 if (X!=0) // not on the left hand edge
2078 {
2079 // recv bottomleft from SW
2080 comserr|=MPI_Irecv(SWin, inset*inset, MPI_DOUBLE, (X-1)+(Y-1)*row, 7, m_mpiInfo->comm, reqs+(rused++));
2081 swused=true;
2082 comserr|=MPI_Irecv(Win, Eheight*inset, MPI_DOUBLE, X-1+Y*row, 10, m_mpiInfo->comm, reqs+(rused++));
2083 wused=true;
2084 }
2085 else // on the left hand edge
2086 {
2087 comserr|=MPI_Irecv(SWin, inset*inset, MPI_DOUBLE, (Y-1)*row, 11, m_mpiInfo->comm, reqs+(rused++));
2088 swused=true;
2089 }
2090 comserr|=MPI_Irecv(Sin, Swidth*inset, MPI_DOUBLE, X+(Y-1)*row, 8, m_mpiInfo->comm, reqs+(rused++));
2091 sused=true;
2092 comserr|=MPI_Irecv(SEin, inset*inset, MPI_DOUBLE, X+(Y-1)*row, 7, m_mpiInfo->comm, reqs+(rused++));
2093 seused=true;
2094
2095
2096 }
2097 else // on the bottom row
2098 {
2099 if (X!=0)
2100 {
2101 comserr|=MPI_Irecv(Win, Eheight*inset, MPI_DOUBLE, X-1+Y*row, 10, m_mpiInfo->comm, reqs+(rused++));
2102 wused=true;
2103 // Need to use tag 12 here because SW is coming from the East not South East
2104 comserr|=MPI_Irecv(SWin, inset*inset, MPI_DOUBLE, X-1+Y*row, 12, m_mpiInfo->comm, reqs+(rused++));
2105 swused=true;
2106 }
2107 if (X!=(row-1))
2108 {
2109 comserr|=MPI_Isend(SEout, inset*inset, MPI_DOUBLE, X+1+(Y)*row, 12, m_mpiInfo->comm, reqs+(rused++));
2110 }
2111 }
2112
2113 if (Y!=(m_NX[1]-1)) // not on the top row
2114 {
2115 comserr|=MPI_Isend(Nout, inset*Swidth, MPI_DOUBLE, X+(Y+1)*row, 8, m_mpiInfo->comm, reqs+(rused++));
2116 comserr|=MPI_Isend(NEout, inset*inset, MPI_DOUBLE, X+(Y+1)*row, 7, m_mpiInfo->comm, reqs+(rused++));
2117 if (X!=(row-1)) // not on right hand edge
2118 {
2119 comserr|=MPI_Isend(NEout, inset*inset, MPI_DOUBLE, X+1+(Y+1)*row, 7, m_mpiInfo->comm, reqs+(rused++));
2120 }
2121 if (X==0) // left hand edge
2122 {
2123 comserr|=MPI_Isend(NWout, inset*inset, MPI_DOUBLE, (Y+1)*row,11, m_mpiInfo->comm, reqs+(rused++));
2124 }
2125 }
2126 if (X!=(row-1)) // not on right hand edge
2127 {
2128 comserr|=MPI_Isend(NEout, inset*inset, MPI_DOUBLE, X+1+(Y)*row, 7, m_mpiInfo->comm, reqs+(rused++));
2129 comserr|=MPI_Isend(Eout, Eheight*inset, MPI_DOUBLE, X+1+(Y)*row, 10, m_mpiInfo->comm, reqs+(rused++));
2130 }
2131 if (X!=0)
2132 {
2133 comserr|=MPI_Irecv(NWin, inset*inset, MPI_DOUBLE, (X-1)+Y*row, 7, m_mpiInfo->comm, reqs+(rused++));
2134 nwused=true;
2135
2136
2137 }
2138
2139 if (!comserr)
2140 {
2141 comserr=MPI_Waitall(rused, reqs, stats);
2142 }
2143
2144 if (comserr)
2145 {
2146 // Yes this is throwing an exception as a result of an MPI error.
2147 // and no we don't inform the other ranks that we are doing this.
2148 // however, we have no reason to believe coms work at this point anyway
2149 throw RipleyException("Error in coms for randomFill");
2150 }
2151
2152
2153 // Need to copy the values back from the buffers
2154 // Copy from SW
2155
2156 if (swused)
2157 {
2158 base=0;
2159 for (unsigned int i=0;i<inset;++i)
2160 {
2161 memcpy(src+base, SWin+i*inset, inset*sizeof(double));
2162 base+=ext[0];
2163 }
2164 }
2165 if (seused)
2166 {
2167 base=ext[0]-inset;
2168 for (unsigned int i=0;i<inset;++i)
2169 {
2170 memcpy(src+base, SEin+i*inset, inset*sizeof(double));
2171 base+=ext[0];
2172 }
2173 }
2174 if (nwused)
2175 {
2176 base=(ext[1]-inset)*ext[0];
2177 for (unsigned int i=0;i<inset;++i)
2178 {
2179 memcpy(src+base, NWin+i*inset, inset*sizeof(double));
2180 base+=ext[0];
2181 }
2182 }
2183 if (sused)
2184 {
2185 base=inset;
2186 for (unsigned int i=0;i<inset;++i)
2187 {
2188 memcpy(src+base, Sin+i*Swidth, Swidth*sizeof(double));
2189 base+=ext[0];
2190 }
2191 }
2192 if (wused)
2193 {
2194 base=inset*ext[0];
2195 for (unsigned int i=0;i<Eheight;++i)
2196 {
2197 memcpy(src+base, Win+i*inset, inset*sizeof(double));
2198 base+=ext[0];
2199 }
2200
2201 }
2202
2203 delete[] SWin;
2204 delete[] SEin;
2205 delete[] NWin;
2206 delete[] Sin;
2207 delete[] Win;
2208
2209 delete[] NEout;
2210 delete[] NWout;
2211 delete[] SEout;
2212 delete[] Nout;
2213 delete[] Eout;
2214 #endif
2215 escript::FunctionSpace fs(getPtr(), getContinuousFunctionCode());
2216 escript::Data resdat(0, escript::DataTypes::scalarShape, fs , true);
2217 // don't need to check for exwrite because we just made it
2218 escript::DataVector& dv=resdat.getExpandedVectorReference();
2219 double* convolution=get2DGauss(radius, sigma);
2220 for (size_t y=0;y<(internal[1]);++y)
2221 {
2222 for (size_t x=0;x<(internal[0]);++x)
2223 {
2224 dv[x+y*(internal[0])]=Convolve2D(convolution, src, x+radius, y+radius, radius, ext[0]);
2225
2226 }
2227 }
2228 delete[] convolution;
2229 delete[] src;
2230 return resdat;
2231 }
2232
2233 int Rectangle::findNode(const double *coords) const {
2234 const int NOT_MINE = -1;
2235 //is the found element even owned by this rank
2236 for (int dim = 0; dim < m_numDim; dim++) {
2237 if (m_origin[dim] + m_offset[dim] > coords[dim] || m_origin[dim]
2238 + m_offset[dim] + m_dx[dim]*m_ownNE[dim] < coords[dim]) {
2239 return NOT_MINE;
2240 }
2241 }
2242 // get distance from origin
2243 double x = coords[0] - m_origin[0];
2244 double y = coords[1] - m_origin[1];
2245 // distance in elements
2246 int ex = (int) floor(x / m_dx[0]);
2247 int ey = (int) floor(y / m_dx[1]);
2248 // set the min distance high enough to be outside the element plus a bit
2249 int closest = NOT_MINE;
2250 double minDist = 1;
2251 for (int dim = 0; dim < m_numDim; dim++) {
2252 minDist += m_dx[dim]*m_dx[dim];
2253 }
2254 //find the closest node
2255 for (int dx = 0; dx < 2; dx++) {
2256 double xdist = (x - (ex + dx)*m_dx[0]);
2257 for (int dy = 0; dy < 2; dy++) {
2258 double ydist = (y - (ey + dy)*m_dx[1]);
2259 double total = xdist*xdist + ydist*ydist;
2260 if (total < minDist) {
2261 closest = INDEX2(ex+dx, ey+dy, m_NE[0] + 1);
2262 minDist = total;
2263 }
2264 }
2265 }
2266 return closest;
2267 }
2268
2269 void Rectangle::setAssembler(std::string type, std::map<std::string,
2270 escript::Data> constants) {
2271 if (type.compare("WaveAssembler") == 0) {
2272 delete assembler;
2273 assembler = new WaveAssembler2D(this, m_dx, m_NX, m_NE, m_NN, constants);
2274 } else { //else ifs would go before this for other types
2275 throw RipleyException("Ripley::Rectangle does not support the"
2276 " requested assembler");
2277 }
2278 }
2279
2280 } // end of namespace ripley
2281

  ViewVC Help
Powered by ViewVC 1.1.26