/[escript]/trunk/escriptcore/src/MPIDataReducer.cpp
ViewVC logotype

Annotation of /trunk/escriptcore/src/MPIDataReducer.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6651 - (hide annotations)
Wed Feb 7 02:12:08 2018 UTC (17 months ago) by jfenwick
File size: 18882 byte(s)
Make everyone sad by touching all the files

Copyright dates update

1 jfenwick 5490 /*****************************************************************************
2     *
3 jfenwick 6651 * Copyright (c) 2014-2018 by The University of Queensland
4 jfenwick 5490 * http://www.uq.edu.au
5     *
6     * Primary Business: Queensland, Australia
7 jfenwick 6112 * Licensed under the Apache License, version 2.0
8     * http://www.apache.org/licenses/LICENSE-2.0
9 jfenwick 5490 *
10     * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
11     * Development 2012-2013 by School of Earth Sciences
12     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
13     *
14     *****************************************************************************/
15    
16 caltinay 5997 #include "MPIDataReducer.h"
17     #include "SplitWorldException.h"
18 jfenwick 5490
19 caltinay 5997 #include <limits>
20 jfenwick 5490 #include <sstream>
21     #include <boost/python/extract.hpp>
22     #include <boost/scoped_array.hpp>
23    
24     using namespace boost::python;
25     using namespace escript;
26    
27 caltinay 5965 namespace {
28 jfenwick 5490
29 caltinay 5965 void combineData(Data& d1, const Data& d2, MPI_Op op)
30 jfenwick 5490 {
31 caltinay 5965 if (op==MPI_SUM)
32     {
33     d1+=d2;
34     }
35     else if (op==MPI_OP_NULL)
36     {
37     throw SplitWorldException("Multiple 'simultaneous' attempts to export a 'SET' variable.");
38     }
39     }
40    
41     } // anonymous namespace
42    
43    
44     namespace escript {
45    
46 jfenwick 5490 Reducer_ptr makeDataReducer(std::string type)
47     {
48     MPI_Op op;
49     if (type=="SUM")
50     {
51 caltinay 5965 op=MPI_SUM;
52 jfenwick 5490 }
53 jfenwick 5643 else if (type=="SET")
54     {
55 caltinay 5965 op=MPI_OP_NULL;
56 jfenwick 5643 }
57 jfenwick 5490 else
58     {
59 caltinay 5965 throw SplitWorldException("Unsupported operation for makeDataReducer.");
60 jfenwick 5490 }
61     MPIDataReducer* m=new MPIDataReducer(op);
62     return Reducer_ptr(m);
63     }
64    
65     MPIDataReducer::MPIDataReducer(MPI_Op op)
66 jfenwick 5643 : reduceop(op), had_an_export_this_round(false)
67 jfenwick 5490 {
68     valueadded=false;
69 jfenwick 5643 if ((op==MPI_SUM) || (op==MPI_OP_NULL))
70 jfenwick 5490 {
71 caltinay 5965 // deliberately left blank
72 jfenwick 5490 }
73     else
74     {
75 caltinay 5965 throw SplitWorldException("Unsupported MPI_Op");
76 jfenwick 5490 }
77     }
78    
79 jfenwick 5643 void MPIDataReducer::newRunJobs()
80     {
81     had_an_export_this_round=false;
82     }
83 jfenwick 5490
84     void MPIDataReducer::setDomain(escript::Domain_ptr d)
85     {
86     dom=d;
87     }
88    
89     std::string MPIDataReducer::description()
90     {
91     std::string op="SUM";
92 jfenwick 5643 if (reduceop==MPI_OP_NULL)
93     {
94 caltinay 5965 op="SET";
95 jfenwick 5643 }
96 jfenwick 5490 return "Reducer("+op+") for Data objects";
97     }
98    
99     bool MPIDataReducer::valueCompatible(boost::python::object v)
100     {
101     extract<Data&> ex(v);
102     if (!ex.check())
103     {
104 caltinay 5965 return false;
105 jfenwick 5490 }
106     if (dom.get()!=0)
107     {
108 caltinay 5965 const Data& d=ex();
109     if (d.getDomain().get()!=dom.get())
110     {
111     return false; // the domains don't match
112     }
113 jfenwick 5490 }
114     return true;
115     }
116    
117    
118     bool MPIDataReducer::reduceLocalValue(boost::python::object v, std::string& errstring)
119     {
120     extract<Data&> ex(v);
121     if (!ex.check())
122     {
123 caltinay 5965 errstring="reduceLocalValue: expected Data object. Got something else.";
124     return false;
125 jfenwick 5490 }
126     Data& d=ex();
127 jfenwick 5680 if (d.isEmpty())
128     {
129 caltinay 5965 errstring="reduceLocalValue: Got an empty Data object. Not allowed to reduce those.";
130     return false;
131 jfenwick 5680 }
132 jfenwick 5490 if ((d.getDomain()!=dom) && (dom.get()!=0))
133     {
134 caltinay 5965 errstring="reduceLocalValue: Got a Data object, but it was not using the SubWorld's domain.";
135     return false;
136 jfenwick 5490 }
137 caltinay 5965 d.expand(); // because I don't want to mess about with types of Data
138     if (!valueadded || !had_an_export_this_round) // first value so answer becomes this one
139 jfenwick 5490 {
140 caltinay 5965 value=d;
141     dom=d.getDomain();
142 jfenwick 5643 had_an_export_this_round=true;
143 caltinay 5965 valueadded=true;
144 jfenwick 5490 }
145     else
146     {
147 caltinay 5965 if (reduceop==MPI_OP_NULL)
148     {
149     if (had_an_export_this_round)
150     {
151     reset();
152     errstring="reduceLocalValue: Multiple 'simultaneous' attempts to export a 'SET' variable.";
153     return false;
154     }
155     value=d;
156     dom=d.getDomain();
157     had_an_export_this_round=true;
158     }
159 jfenwick 5643 else
160     {
161 caltinay 5965 had_an_export_this_round=true;
162     if (d.getFunctionSpace()!=value.getFunctionSpace())
163     {
164     errstring="reduceLocalValue: FunctionSpaces for Data objects being combined must match.";
165     return false;
166     }
167     combineData(value, d, reduceop);
168 jfenwick 5643 }
169 jfenwick 5490 }
170     return true;
171     }
172    
173     void MPIDataReducer::reset()
174     {
175     valueadded=false;
176     value=Data();
177     }
178    
179 caltinay 5997 bool MPIDataReducer::checkRemoteCompatibility(JMPI& mpi_info, std::string& errstring)
180 jfenwick 5490 {
181     #ifdef ESYS_MPI
182     // since they can't add it unless it is using the proper domain, we need to check
183    
184     std::vector<unsigned> compat(6);
185     getCompatibilityInfo(compat);
186    
187     // still need to incorporate domain version into this
188     // or are domains not mutable in any way that matters?
189     int* rbuff=new int[mpi_info->size*compat.size()];
190 caltinay 5965 boost::scoped_array<int> dummy(rbuff); // to ensure cleanup
191 jfenwick 5490 for (int i=0;i<mpi_info->size;++i)
192     {
193 caltinay 5965 rbuff[i]=0; // since this won't match any valid value we can use it as a failure check
194 jfenwick 5490 }
195     if (MPI_Allgather(&compat[0], compat.size(), MPI_UNSIGNED, rbuff,
196 caltinay 5965 compat.size(), MPI_UNSIGNED, mpi_info->comm)!=MPI_SUCCESS)
197 jfenwick 5490 {
198 caltinay 5965 errstring="MPI failure in checkRemoteCompatibility.";
199     return false;
200 jfenwick 5490 }
201 jfenwick 5680 for (int i=0;i<(mpi_info->size-1);++i)
202 jfenwick 5490 {
203 caltinay 5965 if ((rbuff[i*compat.size()]==1) || (rbuff[(i+1)*compat.size()]==1)) // one of them doesn't have a value
204     {
205     continue;
206     }
207     for (int j=0;j<compat.size();++j)
208     {
209     if (rbuff[i*compat.size()+j]!=rbuff[(i+1)*compat.size()+j])
210     {
211     std::ostringstream oss;
212     oss << "Incompatible value found for SubWorld " << i+1 << '.';
213     errstring=oss.str();
214     return false;
215     }
216     }
217 jfenwick 5490 }
218     return true;
219     #else
220     return true;
221     #endif
222     }
223    
224     // By the time this function is called, we know that all the values
225     // are compatible
226 jfenwick 5697 bool MPIDataReducer::reduceRemoteValues(MPI_Comm& comm)
227 jfenwick 5490 {
228     #ifdef ESYS_MPI
229 jfenwick 5938 DataTypes::RealVectorType& vr=value.getExpandedVectorReference();
230 jfenwick 5490 Data result(0, value.getDataPointShape(), value.getFunctionSpace(), true);
231 jfenwick 5938 DataTypes::RealVectorType& rr=result.getExpandedVectorReference();
232 jfenwick 5643 if (reduceop==MPI_OP_NULL)
233     {
234 caltinay 5965 reset(); // we can't be sure what the value should be
235     return false; // this will stop bad things happening but won't give an informative error message
236 jfenwick 5643 }
237 jfenwick 5697 if (MPI_Allreduce(&(vr[0]), &(rr[0]), vr.size(), MPI_DOUBLE, reduceop, comm)!=MPI_SUCCESS)
238 jfenwick 5490 {
239 caltinay 5965 return false;
240 jfenwick 5490 }
241 jfenwick 5680 value=result;
242 jfenwick 5490 return true;
243     #else
244     return true;
245     #endif
246     }
247    
248     // populate a vector of ints with enough information to ensure two values are compatible
249     // or to construct a container for incomming data
250     // Format for this:
251 jfenwick 5680 // [0] Type of Data: {0 : error, 1:no value, 10: constant, 11:tagged, 12:expanded}
252 jfenwick 5490 // [1] Functionspace type code
253     // [2] Only used for tagged --- gives the number of tags (which exist in the data object)
254     // [3..6] Components of the shape
255 jfenwick 5938 // [7] Complexity: {0: real, 1:complex}
256 jfenwick 5490 void MPIDataReducer::getCompatibilityInfo(std::vector<unsigned>& params)
257     {
258 jfenwick 5938 params.resize(8);
259     for (int i=0;i<8;++i)
260 jfenwick 5490 {
261 caltinay 5965 params[i]=0;
262 jfenwick 5490 }
263 jfenwick 5680 if (!valueadded)
264     {
265 caltinay 5965 params[0]=1;
266     return;
267 jfenwick 5680 }
268 jfenwick 5490 if (value.isConstant())
269     {
270 caltinay 5965 params[0]=10;
271 jfenwick 5490 }
272     else if (value.isTagged())
273     {
274 caltinay 5965 params[0]=11;
275 jfenwick 5490 }
276     else if (value.isExpanded())
277     {
278 caltinay 5965 params[0]=12;
279 jfenwick 5490 }
280 caltinay 5965 else // This could be DataEmpty or some other weirdness but we won't allow that
281 jfenwick 5490 {
282 caltinay 5965 params[0]=0; // invalid type to send
283     return;
284 jfenwick 5490 }
285     params[1]=value.getFunctionSpace().getTypeCode();
286     params[2]=static_cast<unsigned>(value.getNumberOfTaggedValues());
287     const DataTypes::ShapeType& s=value.getDataPointShape();
288     for (int i=0;i<s.size();++i)
289     {
290 caltinay 5965 params[3+i]=s[i];
291 jfenwick 5938 }
292     params[7]=value.isComplex();
293 jfenwick 5490 }
294    
295    
296 caltinay 5965 // Get a value for this variable from another process
297     // This is not a reduction and will replace any existing value
298 caltinay 5997 bool MPIDataReducer::recvFrom(int localid, int source, JMPI& mpiinfo)
299 jfenwick 5490 {
300 jfenwick 5697 #ifdef ESYS_MPI
301 jfenwick 5490 // first we need to find out what we are expecting
302     unsigned params[7];
303     MPI_Status stat;
304     if (MPI_Recv(params, 7, MPI_UNSIGNED, source, PARAMTAG, mpiinfo->comm, &stat)!=MPI_SUCCESS)
305     {
306 caltinay 5965 return false;
307 jfenwick 5490 }
308 caltinay 5965 if (params[0]<10) // the sender somehow tried to send something invalid
309 jfenwick 5490 {
310 caltinay 5965 return false;
311 jfenwick 5490 }
312     // now we put the shape object together
313     escript::DataTypes::ShapeType s;
314     for (int i=0;i<4;++i)
315     {
316 caltinay 5965 if (params[3+i]>0)
317     {
318     s.push_back(params[3+i]);
319     }
320     else
321     {
322     break;
323     }
324 jfenwick 5490 }
325     // Now we need the FunctionSpace
326     FunctionSpace fs=FunctionSpace(dom, static_cast<int>(params[1]));
327     value=Data(0, s, fs, params[0]==12);
328 caltinay 5965 if (params[0]==11) // The Data is tagged so we need to work out what tags we need
329 jfenwick 5490 {
330 caltinay 5965 // TODO: Need to ship the tags and names over but for now just make sure there
331     // are the same number of tags
332     value.tag();
333    
334     DataTypes::RealVectorType dv(DataTypes::noValues(s), 0, 1);
335     for (unsigned i=0;i<params[2];++i)
336     {
337     value.setTaggedValueFromCPP(static_cast<int>(i)+1, s, dv, 0);
338     }
339     return false; // because I don't trust this yet
340 jfenwick 5490 }
341     #endif
342     return true;
343     }
344    
345 caltinay 5965 // Send a value to this variable to another process
346     // This is not a reduction and will replace any existing value
347 caltinay 5997 bool MPIDataReducer::sendTo(int localid, int target, JMPI& mpiinfo)
348 jfenwick 5490 {
349 jfenwick 5680 if (!valueadded)
350     {
351 caltinay 5965 return false; // May be misinterpreted as an MPI failure
352 jfenwick 5680 }
353 jfenwick 5490 #ifdef ESYS_MPI
354     // first step is to let the other world know what sort of thing it needs to make
355     if (value.isLazy())
356     {
357 caltinay 5965 value.resolve();
358 jfenwick 5490 }
359     std::vector<unsigned> params;
360     getCompatibilityInfo(params);
361     if (MPI_Send(&params[0], 6, MPI_UNSIGNED, target, PARAMTAG, mpiinfo->comm)!=MPI_SUCCESS)
362     {
363 caltinay 5965 return false;
364 jfenwick 5490 }
365 caltinay 5965 // now we have informed the other end of what happened
366     // are we done or is there actually data to send
367 jfenwick 5490 if (params[0]<10)
368     {
369 caltinay 5965 return false;
370 jfenwick 5490 }
371 jfenwick 5938
372     if (value.isComplex())
373 jfenwick 5490 {
374 caltinay 5965 DataTypes::cplx_t dummy=0;
375     // at this point, we know there is data to send
376     const DataTypes::cplx_t* vect=value.getDataRO(dummy);
377     // now the receiver knows how much data it should be receive
378     // need to make sure that we aren't trying to send data with no local samples
379     if (vect!=0)
380     {
381     // MPI v3 has this first param as a const void* (as it should be)
382     // Version on my machine expects void*
383     // we don't require MPIv3 yet ... so we can't use MPI_CXX_DOUBLE_COMPLEX
384     // We'll try just sending twice as many doubles
385     //if (MPI_Send(const_cast<DataTypes::cplx_t*>(vect), value.getLength(), MPI_CXX_DOUBLE_COMPLEX, target, PARAMTAG, mpiinfo->comm)!=MPI_SUCCESS)
386     if (MPI_Send(const_cast<DataTypes::cplx_t*>(vect), 2*value.getLength(), MPI_DOUBLE, target, PARAMTAG, mpiinfo->comm)!=MPI_SUCCESS)
387     {
388     return false;
389     }
390     }
391 jfenwick 5490 }
392 jfenwick 5938 else
393     {
394 caltinay 5965 DataTypes::real_t dummy=0;
395     // at this point, we know there is data to send
396     const DataTypes::real_t* vect=value.getDataRO(dummy);
397     // now the receiver knows how much data it should be receive
398     // need to make sure that we aren't trying to send data with no local samples
399     if (vect!=0)
400     {
401     // MPI v3 has this first param as a const void* (as it should be)
402     // Version on my machine expects void*
403     if (MPI_Send(const_cast<DataTypes::real_t*>(vect), value.getLength(), MPI_DOUBLE, target, PARAMTAG, mpiinfo->comm)!=MPI_SUCCESS)
404     {
405     return false;
406     }
407     }
408 jfenwick 5938 }
409 jfenwick 5490 #endif
410     return true;
411     }
412    
413     boost::python::object MPIDataReducer::getPyObj()
414     {
415 jfenwick 5647 boost::python::object o(value);
416     return o;
417 jfenwick 5490 }
418    
419    
420 caltinay 5965 // send from proc 0 in the communicator to all others
421     // second argument is true if this rank is sending
422 jfenwick 5683 bool MPIDataReducer::groupSend(MPI_Comm& comm, bool imsending)
423 jfenwick 5490 {
424 jfenwick 5683 if (dom.get()==0)
425     {
426 caltinay 5965 return 0; // trying to avoid throwing here
427     // this will still cause a lockup if it happens
428 jfenwick 5683 }
429     #ifdef ESYS_MPI
430     if (imsending)
431     {
432 caltinay 5965 // first step is to let the other world know what sort of thing it needs to make
433     if (value.isLazy())
434     {
435     value.resolve();
436     }
437     std::vector<unsigned> params;
438     getCompatibilityInfo(params);
439     if (MPI_Bcast(&params[0], params.size(), MPI_UNSIGNED, 0,comm)!=MPI_SUCCESS)
440     {
441     return false;
442     }
443     // now we have informed the other end of what happened
444     // are we done or is there actually data to send
445     if (params[0]<10)
446     {
447     return false;
448     }
449    
450     if (value.isComplex())
451     {
452     DataTypes::cplx_t dummy=0;
453     // at this point, we know there is data to send
454     const DataTypes::cplx_t* vect=value.getDataRO(dummy);
455     // now the receiver knows how much data it should be receive
456     // need to make sure that we aren't trying to send data with no local samples
457     if (vect!=0)
458     {
459     // we don't require MPIv3 yet ... so we can't use MPI_CXX_DOUBLE_COMPLEX
460     // We'll try just sending twice as many doubles
461     //if (MPI_Bcast(const_cast<DataTypes::cplx_t*>(vect), value.getLength(), MPI_CXX_DOUBLE_COMPLEX, 0, comm)!=MPI_SUCCESS)
462     if (MPI_Bcast(const_cast<DataTypes::cplx_t*>(vect), value.getLength()*2, MPI_DOUBLE, 0, comm)!=MPI_SUCCESS)
463     {
464     return false;
465     }
466     }
467     }
468     else
469     {
470     DataTypes::real_t dummy=0;
471     // at this point, we know there is data to send
472     const DataTypes::real_t* vect=value.getDataRO(dummy);
473     // now the receiver knows how much data it should be receive
474     // need to make sure that we aren't trying to send data with no local samples
475     if (vect!=0)
476     {
477     if (MPI_Bcast(const_cast<DataTypes::real_t*>(vect), value.getLength(), MPI_DOUBLE, 0, comm)!=MPI_SUCCESS)
478     {
479     return false;
480     }
481     }
482     }
483 jfenwick 5683 }
484 caltinay 5965 else // we are receiving
485 jfenwick 5683 {
486 caltinay 5965 bool createcplx=false;
487     // first we need to find out what we are expecting
488     unsigned params[8];
489     if (MPI_Bcast(params, 8, MPI_UNSIGNED, 0, comm)!=MPI_SUCCESS)
490     {
491     return false;
492     }
493     if (params[0]<10) // the sender somehow tried to send something invalid
494     {
495     return false;
496     }
497     // now we put the shape object together
498     escript::DataTypes::ShapeType s;
499     for (int i=0;i<4;++i)
500     {
501     if (params[3+i]>0)
502     {
503     s.push_back(params[3+i]);
504     }
505     else
506     {
507     break;
508     }
509     }
510     // Now we need the FunctionSpace
511     FunctionSpace fs=FunctionSpace(dom, static_cast<int>(params[1]));
512    
513     if (createcplx) // we need to make a complex data
514     {
515     value=Data(0, s, fs, params[0]==12);
516     value.complicate();
517     if (params[0]==11) // The Data is tagged so we need to work out what tags we need
518     {
519     // TODO: Need to ship the tags and names over but for now just make sure there
520     // are the same number of tags
521     value.tag();
522    
523     DataTypes::CplxVectorType dv(DataTypes::noValues(s), 0, 1);
524     for (unsigned i=0;i<params[2];++i)
525     {
526     value.setTaggedValueFromCPP(static_cast<int>(i)+1, s, dv, 0);
527     }
528     return false; // because I don't trust this yet
529     }
530     DataTypes::cplx_t* vect=&(value.getExpandedVectorReference(DataTypes::cplx_t(0))[0]);
531     //if (MPI_Bcast(const_cast<DataTypes::cplx_t*>(vect), value.getLength(), MPI_CXX_DOUBLE_COMPLEX, 0, comm)!=MPI_SUCCESS)
532     if (MPI_Bcast(const_cast<DataTypes::cplx_t*>(vect), value.getLength()*2, MPI_DOUBLE, 0, comm)!=MPI_SUCCESS)
533     {
534     return false;
535     }
536     }
537     else
538     {
539    
540     value=Data(0, s, fs, params[0]==12);
541     if (params[0]==11) // The Data is tagged so we need to work out what tags we need
542     {
543     // TODO: Need to ship the tags and names over but for now just make sure there
544     // are the same number of tags
545     value.tag();
546    
547     DataTypes::RealVectorType dv(DataTypes::noValues(s), 0, 1);
548     for (unsigned i=0;i<params[2];++i)
549     {
550     value.setTaggedValueFromCPP(static_cast<int>(i)+1, s, dv, 0);
551     }
552     return false; // because I don't trust this yet
553     }
554     DataTypes::real_t* vect=&(value.getExpandedVectorReference(0)[0]);
555     if (MPI_Bcast(const_cast<DataTypes::real_t*>(vect), value.getLength(), MPI_DOUBLE, 0, comm)!=MPI_SUCCESS)
556     {
557     return false;
558     }
559     }
560     valueadded=true;
561 jfenwick 5683 }
562     #endif
563     return true;
564 jfenwick 5490 }
565    
566 caltinay 5965 // We assume compatible values at this point
567 jfenwick 5490 bool MPIDataReducer::groupReduce(MPI_Comm& com, char mystate)
568     {
569     throw SplitWorldException("groupReduce Not implemented yet.");
570     }
571    
572 jfenwick 5649 void MPIDataReducer::copyValueFrom(boost::shared_ptr<AbstractReducer>& src)
573     {
574     MPIDataReducer* sr=dynamic_cast<MPIDataReducer*>(src.get());
575     if (sr==0)
576     {
577 caltinay 5965 throw SplitWorldException("Source and destination need to be the same reducer types.");
578 jfenwick 5649 }
579 jfenwick 5680 if (sr->value.isEmpty())
580     {
581 caltinay 5965 throw SplitWorldException("Attempt to copy DataEmpty.");
582 jfenwick 5680 }
583 jfenwick 5683 if (sr==this)
584     {
585 caltinay 5965 throw SplitWorldException("Source and destination can not be the same variable.");
586 jfenwick 5683 }
587     value.copy(sr->value);
588 jfenwick 5649 valueadded=true;
589     }
590    
591 jfenwick 5697 bool MPIDataReducer::canClash()
592     {
593     return (reduceop==MPI_OP_NULL);
594 sshaw 5775 }
595    
596 caltinay 5965 } // namespace escript
597    

  ViewVC Help
Powered by ViewVC 1.1.26