/[escript]/branches/split/escriptcore/src/SplitWorld.cpp
ViewVC logotype

Diff of /branches/split/escriptcore/src/SplitWorld.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 4746 by jfenwick, Thu Mar 13 06:23:15 2014 UTC revision 4747 by jfenwick, Thu Mar 13 22:52:45 2014 UTC
# Line 25  using namespace boost::python; Line 25  using namespace boost::python;
25  using namespace escript;  using namespace escript;
26    
27  SplitWorld::SplitWorld(unsigned int numgroups, MPI_Comm global)  SplitWorld::SplitWorld(unsigned int numgroups, MPI_Comm global)
28      :globalcom(global), subcom(MPI_COMM_NULL), localworld((SubWorld*)0), swcount(numgroups>0?numgroups:1), jobcounter(1)      :localworld((SubWorld*)0), swcount(numgroups>0?numgroups:1), jobcounter(1)
29  {  {
30        globalcom=esysUtils::makeInfo(global);
31        
32      int grank=0;      int grank=0;
33      int wsize=1;        // each world has this many processes      int wsize=1;        // each world has this many processes
34      #ifdef ESYS_MPI      #ifdef ESYS_MPI
35      int gsize=1;      int gsize=globalcom->size;
36      if ((MPI_Comm_size(global, &gsize)!=MPI_SUCCESS) || (MPI_Comm_rank(global, &grank)!=MPI_SUCCESS))      grank=globalcom->rank;
     {  
         throw SplitWorldException("MPI appears to be inoperative.");  
     }  
37      if (gsize%swcount!=0)      if (gsize%swcount!=0)
38      {      {
39          throw SplitWorldException("SplitWorld error: requested number of groups is not a factor of global communicator size.");          throw SplitWorldException("SplitWorld error: requested number of groups is not a factor of global communicator size.");
40      }      }
41      wsize=gsize/swcount;    // each world has this many processes      wsize=gsize/swcount;    // each world has this many processes
42      int res=MPI_Comm_split(MPI_COMM_WORLD, grank/wsize, grank%wsize, &subcom);      MPI_Comm sub;
43        int res=MPI_Comm_split(MPI_COMM_WORLD, grank/wsize, grank%wsize, &sub);
44      if (res!=MPI_SUCCESS)      if (res!=MPI_SUCCESS)
45      {      {
46          throw SplitWorldException("SplitWorld error: Unable to form communicator.");          throw SplitWorldException("SplitWorld error: Unable to form communicator.");
47      }      }
48        subcom=esysUtils::makeInfo(sub,true);
49        #else
50        subcom=esysUtils::makeInfo(0);
51      #endif      #endif
52      localworld=SubWorld_ptr(new SubWorld(subcom));      localworld=SubWorld_ptr(new SubWorld(subcom));
53      localid=grank/wsize;      localid=grank/wsize;
54  }  }
55    
 // We may need to look into this more closely.  
 // What if the domain lives longer than the world splitter?  
56  SplitWorld::~SplitWorld()  SplitWorld::~SplitWorld()
57  {  {
58  #ifdef ESYS_MPI        // communicator cleanup handled by the MPI_Info
     if (subcom!=MPI_COMM_NULL)  
     {  
     MPI_Comm_free(&subcom);  
     }  
 #endif      
59  }  }
60    
61    
# Line 89  object SplitWorld::buildDomains(tuple t, Line 85  object SplitWorld::buildDomains(tuple t,
85      Domain_ptr dptr=ex1();      Domain_ptr dptr=ex1();
86            
87      // now do a sanity check to see if the domain has respected the communicator info we passed it.      // now do a sanity check to see if the domain has respected the communicator info we passed it.
88      if (dptr->getMPIComm()!=localworld->getComm())      if (dptr->getMPIComm()!=localworld->getMPI()->comm)
89      {      {
90      throw SplitWorldException("The newly constructed domain is not using the correct communicator.");      throw SplitWorldException("The newly constructed domain is not using the correct communicator.");
91      }      }
# Line 110  void SplitWorld::runJobs() Line 106  void SplitWorld::runJobs()
106      // everybody will be executing their localworld's jobs      // everybody will be executing their localworld's jobs
107      int res=localworld->runJobs(err);        int res=localworld->runJobs(err);  
108      // now we find out about the other worlds      // now we find out about the other worlds
109      if (!esysUtils::checkResult(res, mres, globalcom))      if (!esysUtils::checkResult(res, mres, globalcom->comm))
110      {      {
111          throw SplitWorldException("MPI appears to have failed.");          throw SplitWorldException("MPI appears to have failed.");
112      }      }
 std::cerr << "I got a res of " << mres << std::endl;      
       
113      } while (mres==1);      } while (mres==1);
114      if (mres==2)      if (mres==2)
115      {      {
# Line 123  std::cerr << "I got a res of " << mres < Line 117  std::cerr << "I got a res of " << mres <
117      }      }
118      else if (mres==3)      else if (mres==3)
119      {      {
 std::cerr << "My err string is [" << err <<"]\n";    
         
120      char* resultstr=0;      char* resultstr=0;
121      // now we ship around the error message      // now we ship around the error message
122      if (!esysUtils::shipString(err.c_str(), &resultstr, globalcom))      if (!esysUtils::shipString(err.c_str(), &resultstr, globalcom->comm))
123      {      {
124          throw SplitWorldException("MPI appears to have failed.");          throw SplitWorldException("MPI appears to have failed.");
125      }      }
126      //throw SplitWorldException("At least one Job's work() function raised an exception.");      //throw SplitWorldException("At least one Job's work() function raised an exception.");
127      std::string s("At least one Job's work() function raised the following exception:\n");      std::string s("At least one Job's work() function raised the following exception:\n");
128      s+=resultstr;      s+=resultstr;
 // std::cerr << "My combined [" << s.c_str() << std::endl;  
 //  char* testing=new char[s.size()+1];  
 //  strcpy(testing, s.c_str());  
 std::cerr << "Pre-throw [[[" << s << "]]]\n";    
129      throw SplitWorldException(s);      throw SplitWorldException(s);
130      }      }
131  }  }
# Line 295  std::cerr << "Numjobs=" << numjobs << " Line 283  std::cerr << "Numjobs=" << numjobs << "
283            
284      // MPI check to ensure that it worked for everybody      // MPI check to ensure that it worked for everybody
285      int mstat=0;      int mstat=0;
286      if (!esysUtils::checkResult(errstat, mstat, globalcom))      if (!esysUtils::checkResult(errstat, mstat, globalcom->comm))
287      {      {
288      throw SplitWorldException("MPI appears to have failed.");      throw SplitWorldException("MPI appears to have failed.");
289      }      }

Legend:
Removed from v.4746  
changed lines
  Added in v.4747

  ViewVC Help
Powered by ViewVC 1.1.26