/[escript]/trunk/escriptcore/src/MPIDataReducer.cpp
ViewVC logotype

Annotation of /trunk/escriptcore/src/MPIDataReducer.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5775 - (hide annotations)
Thu Jul 30 08:01:06 2015 UTC (3 years, 11 months ago) by sshaw
File size: 13770 byte(s)
pushing release to trunk
1 jfenwick 5490 /*****************************************************************************
2     *
3 jfenwick 5593 * Copyright (c) 2014-2015 by The University of Queensland
4 jfenwick 5490 * http://www.uq.edu.au
5     *
6     * Primary Business: Queensland, Australia
7     * Licensed under the Open Software License version 3.0
8     * http://www.opensource.org/licenses/osl-3.0.php
9     *
10     * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
11     * Development 2012-2013 by School of Earth Sciences
12     * Development from 2014 by Centre for Geoscience Computing (GeoComp)
13     *
14     *****************************************************************************/
15    
16     #define ESNEEDPYTHON
17     #include "esysUtils/first.h"
18    
19    
20     #include <sstream>
21     #include <limits>
22     #include <boost/python/extract.hpp>
23     #include <boost/scoped_array.hpp>
24    
25     #include "MPIDataReducer.h"
26     #include "SplitWorldException.h"
27    
28     using namespace boost::python;
29     using namespace escript;
30    
31    
32     namespace escript
33     {
34     Reducer_ptr makeDataReducer(std::string type)
35     {
36     MPI_Op op;
37     if (type=="SUM")
38     {
39     op=MPI_SUM;
40     }
41 jfenwick 5643 else if (type=="SET")
42     {
43     op=MPI_OP_NULL;
44     }
45 jfenwick 5490 else
46     {
47     throw SplitWorldException("Unsupported operation for makeDataReducer.");
48     }
49     MPIDataReducer* m=new MPIDataReducer(op);
50     return Reducer_ptr(m);
51     }
52    
53     }
54    
55     namespace
56     {
57    
58     void combineData(Data& d1, const Data& d2, MPI_Op op)
59     {
60     if (op==MPI_SUM)
61     {
62     d1+=d2;
63 jfenwick 5643 }
64     else if (op==MPI_OP_NULL)
65     {
66     throw SplitWorldException("Multiple 'simultaneous' attempts to export a 'SET' variable.");
67 jfenwick 5490 }
68     }
69    
70     }
71    
72     MPIDataReducer::MPIDataReducer(MPI_Op op)
73 jfenwick 5643 : reduceop(op), had_an_export_this_round(false)
74 jfenwick 5490 {
75     valueadded=false;
76 jfenwick 5643 if ((op==MPI_SUM) || (op==MPI_OP_NULL))
77 jfenwick 5490 {
78     // deliberately left blank
79     }
80     else
81     {
82     throw SplitWorldException("Unsupported MPI_Op");
83     }
84     }
85    
86 jfenwick 5643 void MPIDataReducer::newRunJobs()
87     {
88     had_an_export_this_round=false;
89     }
90 jfenwick 5490
91     void MPIDataReducer::setDomain(escript::Domain_ptr d)
92     {
93     dom=d;
94     }
95    
96     std::string MPIDataReducer::description()
97     {
98     std::string op="SUM";
99 jfenwick 5643 if (reduceop==MPI_OP_NULL)
100     {
101     op="SET";
102     }
103 jfenwick 5490 return "Reducer("+op+") for Data objects";
104     }
105    
106     bool MPIDataReducer::valueCompatible(boost::python::object v)
107     {
108     extract<Data&> ex(v);
109     if (!ex.check())
110     {
111     return false;
112     }
113     if (dom.get()!=0)
114     {
115     const Data& d=ex();
116     if (d.getDomain().get()!=dom.get())
117     {
118     return false; // the domains don't match
119     }
120     }
121     return true;
122     }
123    
124    
125     bool MPIDataReducer::reduceLocalValue(boost::python::object v, std::string& errstring)
126     {
127     extract<Data&> ex(v);
128     if (!ex.check())
129     {
130     errstring="reduceLocalValue: expected Data object. Got something else.";
131     return false;
132     }
133     Data& d=ex();
134 jfenwick 5680 if (d.isEmpty())
135     {
136     errstring="reduceLocalValue: Got an empty Data object. Not allowed to reduce those.";
137     return false;
138     }
139 jfenwick 5490 if ((d.getDomain()!=dom) && (dom.get()!=0))
140     {
141     errstring="reduceLocalValue: Got a Data object, but it was not using the SubWorld's domain.";
142     return false;
143     }
144     d.expand(); // because I don't want to mess about with types of Data
145 jfenwick 5683 if (!valueadded || !had_an_export_this_round) // first value so answer becomes this one
146 jfenwick 5490 {
147     value=d;
148     dom=d.getDomain();
149 jfenwick 5643 had_an_export_this_round=true;
150 jfenwick 5645 valueadded=true;
151 jfenwick 5490 }
152     else
153     {
154 jfenwick 5643 if (reduceop==MPI_OP_NULL)
155 jfenwick 5490 {
156 jfenwick 5643 if (had_an_export_this_round)
157     {
158 jfenwick 5697 reset();
159 jfenwick 5643 errstring="reduceLocalValue: Multiple 'simultaneous' attempts to export a 'SET' variable.";
160     return false;
161     }
162     value=d;
163     dom=d.getDomain();
164     had_an_export_this_round=true;
165 jfenwick 5490 }
166 jfenwick 5643 else
167     {
168     had_an_export_this_round=true;
169     if (d.getFunctionSpace()!=value.getFunctionSpace())
170     {
171     errstring="reduceLocalValue: FunctionSpaces for Data objects being combined must match.";
172     return false;
173     }
174     combineData(value, d, reduceop);
175     }
176 jfenwick 5490 }
177     return true;
178     }
179    
180     void MPIDataReducer::reset()
181     {
182     valueadded=false;
183     value=Data();
184     }
185    
186     bool MPIDataReducer::checkRemoteCompatibility(esysUtils::JMPI& mpi_info, std::string& errstring)
187     {
188     #ifdef ESYS_MPI
189     // since they can't add it unless it is using the proper domain, we need to check
190    
191     std::vector<unsigned> compat(6);
192     getCompatibilityInfo(compat);
193    
194     // still need to incorporate domain version into this
195     // or are domains not mutable in any way that matters?
196     int* rbuff=new int[mpi_info->size*compat.size()];
197     boost::scoped_array<int> dummy(rbuff); // to ensure cleanup
198     for (int i=0;i<mpi_info->size;++i)
199     {
200     rbuff[i]=0; // since this won't match any valid value we can use it as a failure check
201     }
202     if (MPI_Allgather(&compat[0], compat.size(), MPI_UNSIGNED, rbuff,
203     compat.size(), MPI_UNSIGNED, mpi_info->comm)!=MPI_SUCCESS)
204     {
205     errstring="MPI failure in checkRemoteCompatibility.";
206     return false;
207     }
208 jfenwick 5680 for (int i=0;i<(mpi_info->size-1);++i)
209 jfenwick 5490 {
210 jfenwick 5680 if ((rbuff[i*compat.size()]==1) || (rbuff[(i+1)*compat.size()]==1)) // one of them doesn't have a value
211 jfenwick 5490 {
212 jfenwick 5680 continue;
213     }
214     for (int j=0;j<compat.size();++j)
215     {
216 jfenwick 5490 if (rbuff[i*compat.size()+j]!=rbuff[(i+1)*compat.size()+j])
217     {
218     std::ostringstream oss;
219     oss << "Incompatible value found for SubWorld " << i+1 << '.';
220     errstring=oss.str();
221     return false;
222     }
223     }
224     }
225     return true;
226     #else
227     return true;
228     #endif
229     }
230    
231     // By the time this function is called, we know that all the values
232     // are compatible
233 jfenwick 5697 bool MPIDataReducer::reduceRemoteValues(MPI_Comm& comm)
234 jfenwick 5490 {
235     #ifdef ESYS_MPI
236     DataTypes::ValueType& vr=value.getExpandedVectorReference();
237     Data result(0, value.getDataPointShape(), value.getFunctionSpace(), true);
238 jfenwick 5680 DataTypes::ValueType& rr=result.getExpandedVectorReference();
239 jfenwick 5643 if (reduceop==MPI_OP_NULL)
240     {
241 jfenwick 5697 reset(); // we can't be sure what the value should be
242 jfenwick 5643 return false; // this will stop bad things happening but won't give an informative error message
243     }
244 jfenwick 5697 if (MPI_Allreduce(&(vr[0]), &(rr[0]), vr.size(), MPI_DOUBLE, reduceop, comm)!=MPI_SUCCESS)
245 jfenwick 5490 {
246     return false;
247     }
248 jfenwick 5680 value=result;
249 jfenwick 5490 return true;
250     #else
251     return true;
252     #endif
253     }
254    
255     // populate a vector of ints with enough information to ensure two values are compatible
256     // or to construct a container for incomming data
257     // Format for this:
258 jfenwick 5680 // [0] Type of Data: {0 : error, 1:no value, 10: constant, 11:tagged, 12:expanded}
259 jfenwick 5490 // [1] Functionspace type code
260     // [2] Only used for tagged --- gives the number of tags (which exist in the data object)
261     // [3..6] Components of the shape
262     void MPIDataReducer::getCompatibilityInfo(std::vector<unsigned>& params)
263     {
264     params.resize(7);
265     for (int i=0;i<7;++i)
266     {
267     params[0]=0;
268     }
269 jfenwick 5680 if (!valueadded)
270     {
271     params[0]=1;
272     return;
273     }
274 jfenwick 5490 if (value.isConstant())
275     {
276     params[0]=10;
277     }
278     else if (value.isTagged())
279     {
280     params[0]=11;
281     }
282     else if (value.isExpanded())
283     {
284     params[0]=12;
285     }
286     else // This could be DataEmpty or some other weirdness but we won't allow that
287     {
288     params[0]=0; // invalid type to send
289 jfenwick 5680 return;
290 jfenwick 5490 }
291     params[1]=value.getFunctionSpace().getTypeCode();
292     params[2]=static_cast<unsigned>(value.getNumberOfTaggedValues());
293     const DataTypes::ShapeType& s=value.getDataPointShape();
294     for (int i=0;i<s.size();++i)
295     {
296     params[3+i]=s[i];
297     }
298     }
299    
300    
301     // Get a value for this variable from another process
302     // This is not a reduction and will replace any existing value
303     bool MPIDataReducer::recvFrom(Esys_MPI_rank localid, Esys_MPI_rank source, esysUtils::JMPI& mpiinfo)
304     {
305 jfenwick 5697 #ifdef ESYS_MPI
306 jfenwick 5490 // first we need to find out what we are expecting
307     unsigned params[7];
308     MPI_Status stat;
309     if (MPI_Recv(params, 7, MPI_UNSIGNED, source, PARAMTAG, mpiinfo->comm, &stat)!=MPI_SUCCESS)
310     {
311     return false;
312     }
313     if (params[0]<10) // the sender somehow tried to send something invalid
314     {
315     return false;
316     }
317     // now we put the shape object together
318     escript::DataTypes::ShapeType s;
319     for (int i=0;i<4;++i)
320     {
321     if (params[3+i]>0)
322     {
323     s.push_back(params[3+i]);
324     }
325     else
326     {
327     break;
328     }
329     }
330     // Now we need the FunctionSpace
331     FunctionSpace fs=FunctionSpace(dom, static_cast<int>(params[1]));
332     value=Data(0, s, fs, params[0]==12);
333     if (params[0]==11) // The Data is tagged so we need to work out what tags we need
334     {
335     // TODO: Need to ship the tags and names over but for now just make sure there
336     // are the same number of tags
337     value.tag();
338    
339     DataVector dv(DataTypes::noValues(s), 0, 1);
340     for (unsigned i=0;i<params[2];++i)
341     {
342     value.setTaggedValueFromCPP(static_cast<int>(i)+1, s, dv, 0);
343     }
344     return false; // because I don't trust this yet
345     }
346     #endif
347     return true;
348     }
349    
350     // Send a value to this variable to another process
351     // This is not a reduction and will replace any existing value
352     bool MPIDataReducer::sendTo(Esys_MPI_rank localid, Esys_MPI_rank target, esysUtils::JMPI& mpiinfo)
353     {
354 jfenwick 5680 if (!valueadded)
355     {
356     return false; // May be misinterpreted as an MPI failure
357     }
358 jfenwick 5490 #ifdef ESYS_MPI
359     // first step is to let the other world know what sort of thing it needs to make
360     if (value.isLazy())
361     {
362     value.resolve();
363     }
364     std::vector<unsigned> params;
365     getCompatibilityInfo(params);
366     if (MPI_Send(&params[0], 6, MPI_UNSIGNED, target, PARAMTAG, mpiinfo->comm)!=MPI_SUCCESS)
367     {
368     return false;
369     }
370     // now we have informed the other end of what happened
371     // are we done or is there actually data to send
372     if (params[0]<10)
373     {
374     return false;
375     }
376     // at this point, we know there is data to send
377     const DataAbstract::ValueType::value_type* vect=value.getDataRO();
378     // now the receiver knows how much data it should be receive
379     // need to make sure that we aren't trying to send data with no local samples
380     if (vect!=0)
381     {
382     // MPI v3 has this first param as a const void* (as it should be)
383     // Version on my machine expects void*
384     if (MPI_Send(const_cast<DataAbstract::ValueType::value_type*>(vect), value.getLength(), MPI_DOUBLE, target, PARAMTAG, mpiinfo->comm)!=MPI_SUCCESS)
385     {
386     return false;
387     }
388     }
389     #endif
390     return true;
391     }
392    
393     boost::python::object MPIDataReducer::getPyObj()
394     {
395 jfenwick 5647 boost::python::object o(value);
396     return o;
397 jfenwick 5490 }
398    
399    
400     // send from proc 0 in the communicator to all others
401 jfenwick 5683 // second argument is true if this rank is sending
402     bool MPIDataReducer::groupSend(MPI_Comm& comm, bool imsending)
403 jfenwick 5490 {
404 jfenwick 5683 if (dom.get()==0)
405     {
406     return 0; // trying to avoid throwing here
407     // this will still cause a lockup if it happens
408     }
409     #ifdef ESYS_MPI
410     if (imsending)
411     {
412     // first step is to let the other world know what sort of thing it needs to make
413     if (value.isLazy())
414     {
415     value.resolve();
416     }
417     std::vector<unsigned> params;
418     getCompatibilityInfo(params);
419     if (MPI_Bcast(&params[0], params.size(), MPI_UNSIGNED, 0,comm)!=MPI_SUCCESS)
420     {
421     return false;
422     }
423     // now we have informed the other end of what happened
424     // are we done or is there actually data to send
425     if (params[0]<10)
426     {
427     return false;
428     }
429     // at this point, we know there is data to send
430     const DataAbstract::ValueType::value_type* vect=value.getDataRO();
431     // now the receiver knows how much data it should be receive
432     // need to make sure that we aren't trying to send data with no local samples
433     if (vect!=0)
434     {
435     if (MPI_Bcast(const_cast<DataAbstract::ValueType::value_type*>(vect), value.getLength(), MPI_DOUBLE, 0, comm)!=MPI_SUCCESS)
436     {
437     return false;
438     }
439     }
440     }
441     else // we are receiving
442     {
443 jfenwick 5697
444 jfenwick 5683 // first we need to find out what we are expecting
445     unsigned params[7];
446     if (MPI_Bcast(params, 7, MPI_UNSIGNED, 0, comm)!=MPI_SUCCESS)
447     {
448     return false;
449     }
450     if (params[0]<10) // the sender somehow tried to send something invalid
451     {
452     return false;
453     }
454     // now we put the shape object together
455     escript::DataTypes::ShapeType s;
456     for (int i=0;i<4;++i)
457     {
458     if (params[3+i]>0)
459     {
460     s.push_back(params[3+i]);
461     }
462     else
463     {
464     break;
465     }
466     }
467     // Now we need the FunctionSpace
468     FunctionSpace fs=FunctionSpace(dom, static_cast<int>(params[1]));
469     value=Data(0, s, fs, params[0]==12);
470     if (params[0]==11) // The Data is tagged so we need to work out what tags we need
471     {
472     // TODO: Need to ship the tags and names over but for now just make sure there
473     // are the same number of tags
474     value.tag();
475    
476     DataVector dv(DataTypes::noValues(s), 0, 1);
477     for (unsigned i=0;i<params[2];++i)
478     {
479     value.setTaggedValueFromCPP(static_cast<int>(i)+1, s, dv, 0);
480     }
481     return false; // because I don't trust this yet
482     }
483     DataAbstract::ValueType::value_type* vect=&(value.getExpandedVectorReference()[0]);
484     if (MPI_Bcast(const_cast<DataAbstract::ValueType::value_type*>(vect), value.getLength(), MPI_DOUBLE, 0, comm)!=MPI_SUCCESS)
485     {
486     return false;
487     }
488     valueadded=true;
489     }
490     #endif
491     return true;
492 jfenwick 5490 }
493    
494 jfenwick 5697 // We assume compatible values at this point
495 jfenwick 5490 bool MPIDataReducer::groupReduce(MPI_Comm& com, char mystate)
496     {
497     throw SplitWorldException("groupReduce Not implemented yet.");
498     }
499    
500 jfenwick 5649 void MPIDataReducer::copyValueFrom(boost::shared_ptr<AbstractReducer>& src)
501     {
502     MPIDataReducer* sr=dynamic_cast<MPIDataReducer*>(src.get());
503     if (sr==0)
504     {
505     throw SplitWorldException("Source and destination need to be the same reducer types.");
506     }
507 jfenwick 5680 if (sr->value.isEmpty())
508     {
509     throw SplitWorldException("Attempt to copy DataEmpty.");
510     }
511 jfenwick 5683 if (sr==this)
512     {
513     throw SplitWorldException("Source and destination can not be the same variable.");
514     }
515     value.copy(sr->value);
516 jfenwick 5649 valueadded=true;
517     }
518    
519 jfenwick 5697 bool MPIDataReducer::canClash()
520     {
521     return (reduceop==MPI_OP_NULL);
522 sshaw 5775 }
523    

  ViewVC Help
Powered by ViewVC 1.1.26