/[escript]/branches/split/escriptcore/src/SplitWorld.cpp
ViewVC logotype

Diff of /branches/split/escriptcore/src/SplitWorld.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 4733 by jfenwick, Mon Mar 10 04:20:03 2014 UTC revision 4734 by jfenwick, Mon Mar 10 06:38:54 2014 UTC
# Line 14  Line 14 
14  *****************************************************************************/  *****************************************************************************/
15    
16  #include "esysUtils/Esys_MPI.h"  #include "esysUtils/Esys_MPI.h"
17  #include "WorldSplitter.h"  #include "SplitWorld.h"
18  #include "AbstractDomain.h"  #include "AbstractDomain.h"
19  #include "DomainException.h"  #include "SplitWorldException.h"
20  #include "SplitWorldException.h"  #include "SplitWorldException.h"
21    
22  #include <iostream>  #include <iostream>
# Line 24  Line 24 
24  using namespace boost::python;  using namespace boost::python;
25  using namespace escript;  using namespace escript;
26    
27  WorldSplitter::WorldSplitter(unsigned int numgroups, MPI_Comm global)  SplitWorld::SplitWorld(unsigned int numgroups, MPI_Comm global)
28      :globalcom(global), subcom(MPI_COMM_NULL), localworld((SubWorld*)0), groupcount(numgroups)      :globalcom(global), subcom(MPI_COMM_NULL), localworld((SubWorld*)0), groupcount(numgroups)
29  {  {
30      int gsize;      int gsize;
31      int grank;      int grank;
32      if ((MPI_Comm_size(global, &gsize)!=MPI_SUCCESS) || (MPI_Comm_rank(global, &grank)!=MPI_SUCCESS))      if ((MPI_Comm_size(global, &gsize)!=MPI_SUCCESS) || (MPI_Comm_rank(global, &grank)!=MPI_SUCCESS))
33      {      {
34      throw DomainException("MPI appears to be inoperative.");      throw SplitWorldException("MPI appears to be inoperative.");
35      }      }
36      if (gsize%numgroups!=0)      if (gsize%numgroups!=0)
37      {      {
38      throw DomainException("WorldSplitter error: requested number of groups is not a factor of global communicator size.");      throw SplitWorldException("SplitWorld error: requested number of groups is not a factor of global communicator size.");
39      }      }
40      int wsize=gsize/numgroups;  // each world has this many processes      int wsize=gsize/numgroups;  // each world has this many processes
41      int res=MPI_Comm_split(MPI_COMM_WORLD, grank/wsize, grank%wsize, &subcom);      int res=MPI_Comm_split(MPI_COMM_WORLD, grank/wsize, grank%wsize, &subcom);
42      if (res!=MPI_SUCCESS)      if (res!=MPI_SUCCESS)
43      {      {
44      throw DomainException("WorldSplitter error: Unable to form communicator.");      throw SplitWorldException("SplitWorld error: Unable to form communicator.");
45      }      }
46      localworld=SubWorld_ptr(new SubWorld(subcom));      localworld=SubWorld_ptr(new SubWorld(subcom));
47      localid=grank/wsize;      localid=grank/wsize;
# Line 49  WorldSplitter::WorldSplitter(unsigned in Line 49  WorldSplitter::WorldSplitter(unsigned in
49    
50  // We may need to look into this more closely.  // We may need to look into this more closely.
51  // What if the domain lives longer than the world splitter?  // What if the domain lives longer than the world splitter?
52  WorldSplitter::~WorldSplitter()  SplitWorld::~SplitWorld()
53  {  {
54      if (subcom!=MPI_COMM_NULL)      if (subcom!=MPI_COMM_NULL)
55      {      {
# Line 59  WorldSplitter::~WorldSplitter() Line 59  WorldSplitter::~WorldSplitter()
59    
60    
61  // The boost wrapper will ensure that there is at least one entry in the tuple  // The boost wrapper will ensure that there is at least one entry in the tuple
62  object WorldSplitter::buildDomains(tuple t, dict kwargs)  object SplitWorld::buildDomains(tuple t, dict kwargs)
63  {  {
64      int tsize=len(t);      int tsize=len(t);
65      // get the callable that we will invoke in a sec      // get the callable that we will invoke in a sec
# Line 86  object WorldSplitter::buildDomains(tuple Line 86  object WorldSplitter::buildDomains(tuple
86      // now do a sanity check to see if the domain has respected the communicator info we passed it.      // now do a sanity check to see if the domain has respected the communicator info we passed it.
87      if (dptr->getMPIComm()!=localworld->getComm())      if (dptr->getMPIComm()!=localworld->getComm())
88      {      {
89      throw DomainException("The newly constructed domain is not using the correct communicator.");      throw SplitWorldException("The newly constructed domain is not using the correct communicator.");
90      }      }
91      localworld->setDomain(dptr);      localworld->setDomain(dptr);
92      return object();    // return None      return object();    // return None
93  }  }
94    
95  /** a list of tuples/sequences:  (Job class, number of instances)*/  /** a list of tuples/sequences:  (Job class, number of instances)*/
96  void WorldSplitter::runJobs(boost::python::list l)  void SplitWorld::runJobs(boost::python::list l)
97  {  {
98      // first count up how many jobs we have in total      // first count up how many jobs we have in total
99      unsigned int numjobs=0;      unsigned int numjobs=0;
# Line 127  void WorldSplitter::runJobs(boost::pytho Line 127  void WorldSplitter::runJobs(boost::pytho
127      unsigned int classnum=0;      unsigned int classnum=0;
128      unsigned int lowend=1;      unsigned int lowend=1;
129      unsigned int highend=lowend+numjobs/groupcount+(numjobs%groupcount);      unsigned int highend=lowend+numjobs/groupcount+(numjobs%groupcount);
130  std::cout << localid << std::endl;  // std::cout << localid << std::endl;
131      for (int i=1;i<=localid;++i)      for (int i=1;i<=localid;++i)
132      {      {
133      lowend=highend;      lowend=highend;
# Line 137  std::cout << localid << std::endl; Line 137  std::cout << localid << std::endl;
137          highend++;          highend++;
138      }      }
139      }      }
140  std::cout << "There are " << numjobs << " jobs with range [" << lowend << ", " << highend << ")\n";      // std::cout << "There are " << numjobs << " jobs with range [" << lowend << ", " << highend << ")\n";    
141      // We could do something more clever about trying to fit Jobs to subworlds      // We could do something more clever about trying to fit Jobs to subworlds
142      // to ensure that instances sharing the same Job class would share the same      // to ensure that instances sharing the same Job class would share the same
143      // world as much as possible but for now we'll do this:      // world as much as possible but for now we'll do this:
# Line 150  std::cout << "There are " << numjobs << Line 150  std::cout << "There are " << numjobs <<
150      // now if this is one of the job numbers in our local range,      // now if this is one of the job numbers in our local range,
151      // create an instance of the appropriate class      // create an instance of the appropriate class
152      if (j>=lowend and j<highend)      if (j>=lowend and j<highend)
153      {      {  
 std::cout << "Added job\n";    
154          object o=classvec[classnum](localworld->getDomain(), object(j));          object o=classvec[classnum](localworld->getDomain(), object(j));
155          localworld->addJob(o);          localworld->addJob(o);
156      }      }
157      }      }
158            int mres=0;
159      // now we actually need to run the jobs      do
160      // everybody will be executing their localworld's jobs      {
161      localworld->runJobs();      // now we actually need to run the jobs
162        // everybody will be executing their localworld's jobs
163        int res=localworld->runJobs();  
164        // now we find out about the other worlds
165        mres=0;
166        if (MPI_Allreduce(&res, &mres, 1, MPI_INT, MPI_MAX, globalcom)!=MPI_SUCCESS)
167        {
168            throw SplitWorldException("MPI appears to have failed.");
169        }
170        } while (mres==1);
171        if (mres==2)
172        {
173        throw SplitWorldException("At least one Job's work() function did not return True/False.");
174        }
175        else if (mres==3)
176        {
177        throw SplitWorldException("At least one Job's work() function raised an exception.");
178        }
179  }  }
180    
181  namespace escript  namespace escript
# Line 170  boost::python::object raw_buildDomains(b Line 186  boost::python::object raw_buildDomains(b
186      int l=len(t);      int l=len(t);
187      if (l<2)      if (l<2)
188      {      {
189      throw DomainException("Insufficient parameters to buildDomains.");      throw SplitWorldException("Insufficient parameters to buildDomains.");
190      }      }
191      extract<WorldSplitter&> exw(t[0]);      extract<SplitWorld&> exw(t[0]);
192      if (!exw.check())      if (!exw.check())
193      {      {
194      throw DomainException("First parameter to buildDomains must be a WorldSplitter.");      throw SplitWorldException("First parameter to buildDomains must be a SplitWorld.");
195      }      }
196      WorldSplitter& ws=exw();      SplitWorld& ws=exw();
197      tuple ntup=tuple(t.slice(1,l)); // strip off the object param      tuple ntup=tuple(t.slice(1,l)); // strip off the object param
198      return ws.buildDomains(ntup, kwargs);      return ws.buildDomains(ntup, kwargs);
199  }  }

Legend:
Removed from v.4733  
changed lines
  Added in v.4734

  ViewVC Help
Powered by ViewVC 1.1.26