Exemplo n.º 1
0
int main(int argc, char **argv) {
    int me;
    int g_a;
    int status;
    int i,j;
    int dims[] = {n,n};
    int proc_group[PROC_LIST_SIZE],proclist[PROC_LIST_SIZE],inode;
    int sbuf[1],rbuf[1];
    MPI_Comm comm;

    MP_INIT(argc,argv);
    GA_Initialize();
    me = GA_Nodeid();

    status = MA_init(MT_DBL, 100000, 100000);
    if (!status) GA_Error("ma_init failed",-1);
    status = MA_set_auto_verify(1);
    status = MA_set_hard_fail(1);
    status = MA_set_error_print(1);

    inode = GA_Cluster_nodeid();
    if (me == 0) {
        printf("there are %d nodes, node 0 has %d procs\n",
                GA_Cluster_nnodes(), GA_Cluster_nprocs(0));
        fflush(stdout);
    }
    GA_Sync();
    for (i=0; i<GA_Cluster_nnodes(); ++i) {
        for (j=0; j<GA_Cluster_nprocs(i); ++j) {
            proclist[j]=GA_Cluster_procid(i,j);
        }
        proc_group[i]=GA_Pgroup_create(proclist,GA_Cluster_nprocs(i));
    }
    GA_Sync();
    for (i=0; i<GA_Cluster_nnodes(); ++i) {
        if (i == inode) {
            printf("%d joining group %d\n", me, proc_group[inode]);
            GA_Pgroup_set_default(proc_group[inode]);
            g_a = NGA_Create(C_DBL, 2, dims, "a", NULL);
            if (!g_a) GA_Error("NGA_Create failed",-1);
            printf("%d Created array of  group %d as proc no. %d\n",
                    me, proc_group[inode], GA_Nodeid());
            GA_Print_distribution(g_a);
            comm = GA_MPI_Comm_pgroup_default();
            if (comm != MPI_COMM_NULL) {
                sbuf[0] = GA_Nodeid();
                status = MPI_Allreduce(sbuf, rbuf, 1, MPI_INT, MPI_MAX, comm);
                printf("%d max nodeid is %d\n", me, rbuf[0]);
                if ((rbuf[0]+1) != GA_Cluster_nprocs(i)) {
                    GA_Error("MPI_Allreduce failed",1);
                }
            }
            else {
                printf("MPI_Comm was null!\n");
            }
            GA_Pgroup_set_default(GA_Pgroup_get_world());
        }
        GA_Sync();
    }

    GA_Terminate();
    MP_FINALIZE();

    return 0;
}
// -------------------------------------------------------------
// GraphPartitionerImplementation::partition
// -------------------------------------------------------------
void
GraphPartitionerImplementation::partition(void)
{
  static const bool verbose(false);

  // Make sure that all GA communication has been flushed from the system
  communicator().sync();
  gridpack::utility::CoarseTimer *timer;
  timer = NULL;
  // timer = gridpack::utility::CoarseTimer::instance();

  int t_total, t_adj, t_part, t_node_dest, t_edge_dest, t_gnode_dest, t_gedge_dest;

  if (timer != NULL) {
    t_total = timer->createCategory("GraphPartitioner::partition(): Total");
    t_adj = timer->createCategory("GraphPartitioner::partition: Adjacency");
    t_part = timer->createCategory("GraphPartitioner::partition: Partitioner");
    t_node_dest = timer->createCategory("GraphPartitioner::partition: Node Destination");
    t_edge_dest = timer->createCategory("GraphPartitioner::partition: Edge Destinations");
    t_gnode_dest = timer->createCategory("GraphPartitioner::partition: Ghost Node Destination");
    t_gedge_dest = timer->createCategory("GraphPartitioner::partition: Ghost Edge Destination");
  }

  if (timer != NULL) timer->start(t_total);
 
  if (timer != NULL) timer->start(t_adj);

  p_adjacency_list.ready();

  int maxdim(2);
  int dims[maxdim], lo[maxdim], hi[maxdim], ld[maxdim];
  ld[0] = 1;
  ld[1] = 1;

  int locnodes(p_adjacency_list.nodes());
  int locedges(p_adjacency_list.edges());
  int allnodes;
  int alledges;

  communicator().barrier();
  boost::mpi::all_reduce(communicator(), 
                         locnodes, allnodes, std::plus<int>());
  boost::mpi::all_reduce(communicator(), 
                         locedges, alledges, std::plus<int>());

  if (allnodes <= 0 || alledges <= 0) {
    boost::format fmt("%d: GraphPartitioner::partition(): called without nodes (%d) or edges (%)");
    
    std::string msg = boost::str(fmt % communicator().worldRank() % allnodes % alledges);
    throw Exception(msg);
  }

  if (timer != NULL) timer->stop(t_adj);

  if (timer != NULL) timer->start(t_part);

  this->p_partition();          // fills p_node_destinations

  if (timer != NULL) timer->stop(t_part);

  // make two GAs, one that holds the node source and another that
  // node destination; each is indexed by global node index

  if (timer != NULL) timer->start(t_node_dest);
  int theGAgroup(communicator().getGroup());
  int oldGAgroup = GA_Pgroup_get_default();
  GA_Pgroup_set_default(theGAgroup);

  std::vector<int> nodeidx(locnodes);
  std::vector<int *> stupid(locnodes);
  for (Index n = 0; n < static_cast<Index>(locnodes); ++n) {
    nodeidx[n] = p_adjacency_list.node_index(n);
    stupid[n] = &nodeidx[n];
  }

  

  dims[0] = allnodes;
  boost::scoped_ptr<GA::GlobalArray> 
    node_dest(new GA::GlobalArray(MT_C_INT, 1, dims, "Node Destinations Process", NULL)),
    node_src(new GA::GlobalArray(MT_C_INT, 1, dims, "Node Source Process", NULL));
  node_dest->scatter(&p_node_destinations[0], &stupid[0], locnodes);
  
  { 
    std::vector<int> nsrc(locnodes, this->processor_rank());
    node_src->scatter(&nsrc[0], &stupid[0], locnodes);
  }
  
  communicator().sync();

  if (verbose) {
    node_src->print();
    node_dest->print();
  }

  if (timer != NULL) timer->stop(t_node_dest);

  // edges are assigned to the same partition as the lowest numbered
  // node to which it connects, which are extracted from the node
  // destination GA.

  if (timer != NULL) timer->start(t_edge_dest);

  nodeidx.resize(locedges);
  stupid.resize(locedges);
  std::vector<int> e1dest(locedges);

  for (Index e = 0; e < static_cast<Index>(locedges); ++e) {
    Index n1, n2;
    p_adjacency_list.edge(e, n1, n2);
    nodeidx[e] = std::min(n1, n2);
    stupid[e] = &nodeidx[e];
  }

  node_dest->gather(&e1dest[0], &stupid[0], locedges);

  if (verbose) {
    for (Index e = 0; e < static_cast<Index>(locedges); ++e) {
      Index n1, n2;
      p_adjacency_list.edge(e, n1, n2);
      std::cout << processor_rank() << ": active edge " << e
                << " (" << n1 << "->" << n2 << "): "
                << "destination: " << e1dest[e] << std::endl;
    }
  }

  p_edge_destinations.clear();
  p_edge_destinations.reserve(locedges);
  std::copy(e1dest.begin(), e1dest.end(), 
            std::back_inserter(p_edge_destinations));

  if (timer != NULL) timer->stop(t_edge_dest);

  // determine (possible) destinations for ghost edges (highest numbered node) 

  if (timer != NULL) timer->start(t_gedge_dest);

  std::vector<int> e2dest(locedges);
  for (Index e = 0; e < static_cast<Index>(locedges); ++e) {
    Index n1, n2;
    p_adjacency_list.edge(e, n1, n2);
    nodeidx[e] = std::max(n1, n2);
    stupid[e] = &nodeidx[e];
  }

  node_dest->gather(&e2dest[0], &stupid[0], locedges);
  
  if (verbose) {
    for (Index e = 0; e < static_cast<Index>(locedges); ++e) {
      Index n1, n2;
      p_adjacency_list.edge(e, n1, n2);
      std::cout << processor_rank() << ": ghost edge " << e
                << " (" << n1 << "->" << n2 << "): "
                << "destination: " << e2dest[e] << std::endl;
    }
  }

  
  communicator().sync();

  // These are no longer needed

  node_dest.reset();
  node_src.reset();


  p_ghost_edge_destinations.reserve(locedges);
  std::copy(e2dest.begin(), e2dest.end(), 
            std::back_inserter(p_ghost_edge_destinations));

  if (timer != NULL) timer->stop(t_gedge_dest);

  if (timer != NULL) timer->start(t_gnode_dest);

  // determine destinations for ghost nodes: go thru the edges and
  // compare destinations of connected nodes; if they're different,
  // then both ends need to be ghosted (to different processors)

  // It's possible that edges are distributed over multiple processes,
  // which could result in a different set of ghost destinations for a
  // given node on each process. These need to be put together.

  // In this approach, which is really slow, take each local list of
  // ghost node, send it to all processes. Each process extracts the
  // ghost node destination for its locally owned nodes.

  // a particular node and destination needs to be unique, hence the
  // use of set<>; this may be too slow with large networks and
  // processors

  // typedef std::set< std::pair<Index, int> > DestList;
  // DestList gnodedest;
  // for (Index e = 0; e < locedges; ++e) {
  //   Index n1, n2;
  //   p_adjacency_list.edge(e, n1, n2);

  //   int n1dest(e1dest[e]);
  //   int n2dest(e2dest[e]);

  //   if (verbose) {
  //     std::cout << processor_rank() << ": edge " << e
  //               << " (" << n1 << "->" << n2 << "): "
  //               << "destinations: " << n1dest << ", " << n2dest << std::endl;
  //   }
      
  //   if (n1dest != n2dest) {
  //     gnodedest.insert(std::make_pair(std::min(n1,n2), n2dest));
  //     gnodedest.insert(std::make_pair(std::max(n1,n2), n1dest));
  //   }
  // }

  // if (verbose) {
  //   if (this->processor_rank() == 0) {
  //     std::cout << "Ghost node destinations: " << std::endl;
  //   }
  //   for (int p = 0; p < this->processor_size(); ++p) {
  //     if (this->processor_rank() == p) {
  //       std::cout << p << ": ";
  //       for (DestList::const_iterator i = gnodedest.begin();
  //            i != gnodedest.end(); ++i) {
  //         std::cout << "(" << i->first << ":" << i->second << "),";
  //       }
  //       std::cout << std::endl;
  //     }
  //     this->communicator().barrier();
  //   }
  // }

  // p_ghost_node_destinations.resize(locnodes);
  // DestList tmp;
  // for (int p = 0; p < this->processor_size(); ++p) {
  //   tmp.clear();
  //   if (this->processor_rank() == p) {
  //     tmp = gnodedest;
  //   }
  //   broadcast(communicator().getCommunicator(), tmp, p);
  //   for (Index n = 0; n < locnodes; ++n) {
  //     Index nodeidx(p_adjacency_list.node_index(n));
  //     for (DestList::const_iterator i = tmp.begin();
  //          i != tmp.end(); ++i) {
  //       if (nodeidx == i->first) {
  //         p_ghost_node_destinations[n].push_back(i->second);
  //       }
  //     }
  //   }
  // }



  // Here, a 2D GA is used to store ghost node destinations.  Each
  // process takes it's set of ghost node destinations and appends
  // those lists already in the GA.

  // Determine the maximum node connectivity. There needs to be enough
  // room in the GA to store all connections to a node.

  size_t lconn(0), maxconn(0);

  for (int l = 0; l < locnodes; ++l) {
    lconn = std::max(lconn, 
                     p_adjacency_list.node_neighbors(l));
  }
  boost::mpi::all_reduce(communicator(), lconn, maxconn, 
                         boost::mpi::maximum<int>());
  BOOST_ASSERT(maxconn >= lconn);

  dims[0] = allnodes;
  dims[1] = maxconn;
  ld[0] = maxconn;

  node_dest.reset(new GA::GlobalArray(MT_C_INT, 2, &dims[0], 
                                      "Ghost node dest processes", NULL));
  boost::scoped_ptr<GA::GlobalArray> 
    node_dest_count(new GA::GlobalArray(MT_C_INT, 1, &dims[0],
                                        "Ghost node dest count", NULL));

  {
    int bogus;
    bogus = -1; node_dest->fill(&bogus);
    bogus = 0; node_dest_count->fill(&bogus);
  }

  std::vector<int> lcount(allnodes, 0);
  for (int p = 0; p < this->processor_size(); ++p) {
    if (this->processor_rank() == p) {
      lo[0] = 0; hi[0] = allnodes - 1;
      node_dest_count->get(&lo[0], &hi[0], &lcount[0], &ld[0]);

      for (Index e = 0; e < static_cast<Index>(locedges); ++e) {
        Index n1, n2;
        p_adjacency_list.edge(e, n1, n2);
        
        int n1dest(e1dest[e]);
        int n2dest(e2dest[e]);

        if (verbose) {
          std::cout << processor_rank() << ": edge " << e
                    << " (" << n1 << "->" << n2 << "): "
                    << "destinations: " << n1dest << ", " << n2dest << std::endl;
        }
      
        if (n1dest != n2dest) {
          int nid, dest;

          nid = std::min(n1,n2);
          dest = n2dest;
          lo[0] = nid; hi[0] = lo[0];
          lo[1] = lcount[nid]; hi[1] = lo[1];
          node_dest->put(&lo[0], &hi[0], &dest, &ld[0]);
          lcount[nid] += 1;

          nid = std::max(n1,n2);
          dest = n1dest;
          lo[0] = nid; hi[0] = lo[0];
          lo[1] = lcount[nid]; hi[1] = lo[1];
          node_dest->put(&lo[0], &hi[0], &dest, &ld[0]);
          lcount[nid] += 1;
        }
      }
      lo[0] = 0; hi[0] = allnodes - 1;
      node_dest_count->put(&lo[0], &hi[0], &lcount[0], &ld[0]);
    }
    this->communicator().sync();
  }    


  // After all processes have made their contribution to the ghost
  // node destination GA, each process grabs that part that refers to
  // its local nodes and fills p_ghost_edge_destinations.

  lo[0] = 0; hi[0] = allnodes - 1;
  node_dest_count->get(&lo[0], &hi[0], &lcount[0], &ld[0]);

  p_ghost_node_destinations.clear();
  p_ghost_node_destinations.resize(locnodes);
  std::vector<int> tmpdest(this->processor_size(), 0);
  for (Index n = 0; n < static_cast<Index>(locnodes); ++n) {
    Index nid(p_adjacency_list.node_index(n));
    p_ghost_node_destinations[n].clear();
    
    if (lcount[nid] > 0) {
      lo[0] = nid;
      hi[0] = nid;
      lo[1] = 0;
      hi[1] = lcount[nid] - 1;
      tmpdest.resize(lcount[nid]);
      node_dest->get(&lo[0], &hi[0], &tmpdest[0], &ld[0]);

      // there may be duplicates, so get rid of them
      if (tmpdest.size() > 1) {
        std::stable_sort(tmpdest.begin(), tmpdest.end());
        std::unique(tmpdest.begin(), tmpdest.end());
      }

      p_ghost_node_destinations[n].reserve(tmpdest.size());
      std::copy(tmpdest.begin(), tmpdest.end(),
                std::back_inserter(p_ghost_node_destinations[n]));
    }
  }
  
  if (timer != NULL) timer->stop(t_gnode_dest);

  GA_Pgroup_set_default(oldGAgroup);

  if (timer != NULL) timer->stop(t_total);
  // if (timer) timer->dump();
}
Exemplo n.º 3
0
/** Client code. Receives signals from the server to process a task or
    terminate processing and return*/
void client_code() {
  int *buf = NULL, buf_size;
  int flag;
  MPI_Status status;
  Integer p_handle;
  int ntsks=0, src;
  const char *pname = "client_code";
  double e1, e2, e3, e4, e5, f1, f2, f3, f4,f5,f6,f7,f8;
  double t_prepar=0, t_wait_start=0, t_grp=0,t_sync=0,t_compl=0,t_dest=0;
/*   double get_doit_time_(); */
/*   double get_esp_time_(); */
/*   double get_gm_crt_time_(); */
/*   double get_chrg_set_time_(); */
/*   double get_gm_push_time_(); */
  const int server = GA_Pgroup_absolute_id(ga_pgroup_get_default_(),SVR);
  const int default_grp = ga_pgroup_get_default_();; /*default GA group for this dispatcher instance*/
  const int world_me = GA_Nodeid();
  const int nproc = GA_Nnodes();

  t_ptask = 0.0;
/*   fprintf(stderr, "%d: 0 server=%d %s\n", GA_Nodeid(), server,pname); */

  e1 = util_wallsec_();
/*   fprintf(stderr, "%d: 0 %s\n", GA_Nodeid(), pname); */

/*   GA_Pgroup_set_default(GA_Pgroup_get_world()); */

/*   fprintf(stderr, "%d: 1 %s\n", world_me, pname); */

  buf_size = 1+ /*action to perform*/
    1+ /*task id - if TASK_SIGNAL*/
    nproc /*process group info*/
    ;

/*   buf = (int *)malloc(buf_size*sizeof(int)); */
  buf = (int *)alloca(buf_size*sizeof(int));
  assert(buf != NULL);

/*   fprintf(stderr, "%d: 2 %s\n", world_me, pname); */

  e2 = util_wallsec_();
  while(1) {
    int nelem, grp_me;
    Integer tskid;

    f1 = util_wallsec_();
/*     fprintf(stderr, "%d:: Waiting for work\n", world_me); */
    MPI_Recv(buf, buf_size, MPI_INT, MPI_ANY_SOURCE, SIGNAL_TAG, MPI_COMM_WORLD, &status);
    f2 = util_wallsec_();
    t_wait_start += (f2-f1);
/*     fprintf(stderr, "%d:: Client got msg from %d\n", world_me, status.MPI_SOURCE); */

    MPI_Get_elements(&status, MPI_INT, &nelem);
    assert(nelem >= 1);
      
    if(buf[0] == TERM_CLIENT) {
      /*process termination and return*/
/*        fprintf(stderr, "%d:: Recv-ed term signal\n", GA_Nodeid()); */
/*       free(buf); */
/*       fprintf(stderr, "%d:: Terminating client\n", GA_Nodeid()); */
#ifdef LEADER_BCAST
      signal_termination(SVR,status.MPI_SOURCE);
#endif
      break;
    }
/*     fprintf(stderr, "%d:: got a task to process\n", world_me); */
    /*Got a task to process*/
    assert(buf[0] == TASK_START);
    ntsks += 1;

    if(status.MPI_SOURCE == server) {
      qsort(buf+2, nelem-2, sizeof(int), int_compare);
    }
    f3  = util_wallsec_();
    t_prepar += (f3-f2);

#if LEADER_BCAST
    src = (server==status.MPI_SOURCE)?buf[2]:status.MPI_SOURCE;
    broadcast(nelem-2,buf+2,buf[2],src,buf,nelem*sizeof(int));
#endif

    /*The proc ids are in world group. So create sub-group of world group*/
    GA_Pgroup_set_default(GA_Pgroup_get_world());
    p_handle = GA_Pgroup_create(&buf[2], nelem-2);
    GA_Pgroup_set_default(p_handle);
/*     GA_Pgroup_sync(p_handle); */
    f4 = MPI_Wtime();
    t_grp += (f4-f3);

    tskid = buf[1];
/*     fprintf(stderr, "%d(%d):: Invoking process task tskid=%d\n", grp_me, world_me, tskid); */
    process_task_(&tskid, &p_handle);
    f5 = MPI_Wtime();
    t_ptask += (f5-f4);
    
    GA_Pgroup_sync(p_handle);
    grp_me = GA_Nodeid();
    f6 = util_wallsec_();
    t_sync += (f6-f5);

    if(grp_me == 0) {
      int v[2] = {TASK_DONE, tskid};
/*        fprintf(stderr, "%d(%d):: Sending ack for task %d to %d\n", */
/*  	      grp_me, world_me, tskid, SERVER); */
      MPI_Send(v, 2, MPI_INT, server, SIGNAL_TAG, MPI_COMM_WORLD);
    }
    f7 = util_wallsec_();
    t_compl += (f7-f6);
/*     GA_Pgroup_sync(p_handle); */
    GA_Pgroup_destroy(p_handle);
    GA_Pgroup_set_default(default_grp);
    f8 = util_wallsec_();
    t_dest += (f8-f7);
  }
  e3 = util_wallsec_();
/*   fprintf(stderr, "%d:: CLIENT total time=%lf\n", ga_nodeid_(), e3-e1); */
/*   fprintf(stderr, "%d:: CLIENT ntsks=%d\n", ga_nodeid_(), ntsks); */
/*   fprintf(stderr, "%d:: CLIENT loop time=%lf\n", ga_nodeid_(), e3-e2); */
/*   fprintf(stderr, "%d:: CLIENT wait start time=%lf\n", ga_nodeid_(),t_wait_start); */
/*   fprintf(stderr, "%d:: CLIENT prepare time=%lf\n", ga_nodeid_(),t_prepar); */
/*   fprintf(stderr, "%d:: CLIENT grp crt time=%lf\n", ga_nodeid_(), t_grp); */
/*   fprintf(stderr, "%d:: CLIENT ptask time=%lf\n", ga_nodeid_(), t_ptask); */
/*   fprintf(stderr, "%d:: CLIENT sync time=%lf\n", ga_nodeid_(), t_sync); */
/*   fprintf(stderr, "%d:: CLIENT compl time=%lf\n", ga_nodeid_(), t_compl); */
/*   fprintf(stderr, "%d:: CLIENT grp dstry time=%lf\n", ga_nodeid_(), t_dest); */
/*   fflush(stdout); */
/*   fprintf(stderr, "%d:: CLIENT doit time=%lf\n",ga_nodeid_(),get_doit_time_()); */
/*   fprintf(stderr, "%d:: CLIENT esp time=%lf\n",ga_nodeid_(),get_esp_time_()); */
/*   fprintf(stderr, "%d:: CLIENT chrg_set time=%lf\n",ga_nodeid_(),get_chrg_set_time_()); */
/*   fprintf(stderr, "%d:: CLIENT gm_crt time=%lf\n",ga_nodeid_(),get_gm_crt_time_()); */
/*   fprintf(stderr, "%d:: CLIENT gm_push time=%lf\n",ga_nodeid_(),get_gm_push_time_()); */
}