int main(int argc, char **argv) { int me; int g_a; int status; int i,j; int dims[] = {n,n}; int proc_group[PROC_LIST_SIZE],proclist[PROC_LIST_SIZE],inode; int sbuf[1],rbuf[1]; MPI_Comm comm; MP_INIT(argc,argv); GA_Initialize(); me = GA_Nodeid(); status = MA_init(MT_DBL, 100000, 100000); if (!status) GA_Error("ma_init failed",-1); status = MA_set_auto_verify(1); status = MA_set_hard_fail(1); status = MA_set_error_print(1); inode = GA_Cluster_nodeid(); if (me == 0) { printf("there are %d nodes, node 0 has %d procs\n", GA_Cluster_nnodes(), GA_Cluster_nprocs(0)); fflush(stdout); } GA_Sync(); for (i=0; i<GA_Cluster_nnodes(); ++i) { for (j=0; j<GA_Cluster_nprocs(i); ++j) { proclist[j]=GA_Cluster_procid(i,j); } proc_group[i]=GA_Pgroup_create(proclist,GA_Cluster_nprocs(i)); } GA_Sync(); for (i=0; i<GA_Cluster_nnodes(); ++i) { if (i == inode) { printf("%d joining group %d\n", me, proc_group[inode]); GA_Pgroup_set_default(proc_group[inode]); g_a = NGA_Create(C_DBL, 2, dims, "a", NULL); if (!g_a) GA_Error("NGA_Create failed",-1); printf("%d Created array of group %d as proc no. %d\n", me, proc_group[inode], GA_Nodeid()); GA_Print_distribution(g_a); comm = GA_MPI_Comm_pgroup_default(); if (comm != MPI_COMM_NULL) { sbuf[0] = GA_Nodeid(); status = MPI_Allreduce(sbuf, rbuf, 1, MPI_INT, MPI_MAX, comm); printf("%d max nodeid is %d\n", me, rbuf[0]); if ((rbuf[0]+1) != GA_Cluster_nprocs(i)) { GA_Error("MPI_Allreduce failed",1); } } else { printf("MPI_Comm was null!\n"); } GA_Pgroup_set_default(GA_Pgroup_get_world()); } GA_Sync(); } GA_Terminate(); MP_FINALIZE(); return 0; }
// ------------------------------------------------------------- // GraphPartitionerImplementation::partition // ------------------------------------------------------------- void GraphPartitionerImplementation::partition(void) { static const bool verbose(false); // Make sure that all GA communication has been flushed from the system communicator().sync(); gridpack::utility::CoarseTimer *timer; timer = NULL; // timer = gridpack::utility::CoarseTimer::instance(); int t_total, t_adj, t_part, t_node_dest, t_edge_dest, t_gnode_dest, t_gedge_dest; if (timer != NULL) { t_total = timer->createCategory("GraphPartitioner::partition(): Total"); t_adj = timer->createCategory("GraphPartitioner::partition: Adjacency"); t_part = timer->createCategory("GraphPartitioner::partition: Partitioner"); t_node_dest = timer->createCategory("GraphPartitioner::partition: Node Destination"); t_edge_dest = timer->createCategory("GraphPartitioner::partition: Edge Destinations"); t_gnode_dest = timer->createCategory("GraphPartitioner::partition: Ghost Node Destination"); t_gedge_dest = timer->createCategory("GraphPartitioner::partition: Ghost Edge Destination"); } if (timer != NULL) timer->start(t_total); if (timer != NULL) timer->start(t_adj); p_adjacency_list.ready(); int maxdim(2); int dims[maxdim], lo[maxdim], hi[maxdim], ld[maxdim]; ld[0] = 1; ld[1] = 1; int locnodes(p_adjacency_list.nodes()); int locedges(p_adjacency_list.edges()); int allnodes; int alledges; communicator().barrier(); boost::mpi::all_reduce(communicator(), locnodes, allnodes, std::plus<int>()); boost::mpi::all_reduce(communicator(), locedges, alledges, std::plus<int>()); if (allnodes <= 0 || alledges <= 0) { boost::format fmt("%d: GraphPartitioner::partition(): called without nodes (%d) or edges (%)"); std::string msg = boost::str(fmt % communicator().worldRank() % allnodes % alledges); throw Exception(msg); } if (timer != NULL) timer->stop(t_adj); if (timer != NULL) timer->start(t_part); this->p_partition(); // fills p_node_destinations if (timer != NULL) timer->stop(t_part); // make two GAs, one that holds the node source and another that // node destination; each is indexed by global node index if (timer != NULL) timer->start(t_node_dest); int theGAgroup(communicator().getGroup()); int oldGAgroup = GA_Pgroup_get_default(); GA_Pgroup_set_default(theGAgroup); std::vector<int> nodeidx(locnodes); std::vector<int *> stupid(locnodes); for (Index n = 0; n < static_cast<Index>(locnodes); ++n) { nodeidx[n] = p_adjacency_list.node_index(n); stupid[n] = &nodeidx[n]; } dims[0] = allnodes; boost::scoped_ptr<GA::GlobalArray> node_dest(new GA::GlobalArray(MT_C_INT, 1, dims, "Node Destinations Process", NULL)), node_src(new GA::GlobalArray(MT_C_INT, 1, dims, "Node Source Process", NULL)); node_dest->scatter(&p_node_destinations[0], &stupid[0], locnodes); { std::vector<int> nsrc(locnodes, this->processor_rank()); node_src->scatter(&nsrc[0], &stupid[0], locnodes); } communicator().sync(); if (verbose) { node_src->print(); node_dest->print(); } if (timer != NULL) timer->stop(t_node_dest); // edges are assigned to the same partition as the lowest numbered // node to which it connects, which are extracted from the node // destination GA. if (timer != NULL) timer->start(t_edge_dest); nodeidx.resize(locedges); stupid.resize(locedges); std::vector<int> e1dest(locedges); for (Index e = 0; e < static_cast<Index>(locedges); ++e) { Index n1, n2; p_adjacency_list.edge(e, n1, n2); nodeidx[e] = std::min(n1, n2); stupid[e] = &nodeidx[e]; } node_dest->gather(&e1dest[0], &stupid[0], locedges); if (verbose) { for (Index e = 0; e < static_cast<Index>(locedges); ++e) { Index n1, n2; p_adjacency_list.edge(e, n1, n2); std::cout << processor_rank() << ": active edge " << e << " (" << n1 << "->" << n2 << "): " << "destination: " << e1dest[e] << std::endl; } } p_edge_destinations.clear(); p_edge_destinations.reserve(locedges); std::copy(e1dest.begin(), e1dest.end(), std::back_inserter(p_edge_destinations)); if (timer != NULL) timer->stop(t_edge_dest); // determine (possible) destinations for ghost edges (highest numbered node) if (timer != NULL) timer->start(t_gedge_dest); std::vector<int> e2dest(locedges); for (Index e = 0; e < static_cast<Index>(locedges); ++e) { Index n1, n2; p_adjacency_list.edge(e, n1, n2); nodeidx[e] = std::max(n1, n2); stupid[e] = &nodeidx[e]; } node_dest->gather(&e2dest[0], &stupid[0], locedges); if (verbose) { for (Index e = 0; e < static_cast<Index>(locedges); ++e) { Index n1, n2; p_adjacency_list.edge(e, n1, n2); std::cout << processor_rank() << ": ghost edge " << e << " (" << n1 << "->" << n2 << "): " << "destination: " << e2dest[e] << std::endl; } } communicator().sync(); // These are no longer needed node_dest.reset(); node_src.reset(); p_ghost_edge_destinations.reserve(locedges); std::copy(e2dest.begin(), e2dest.end(), std::back_inserter(p_ghost_edge_destinations)); if (timer != NULL) timer->stop(t_gedge_dest); if (timer != NULL) timer->start(t_gnode_dest); // determine destinations for ghost nodes: go thru the edges and // compare destinations of connected nodes; if they're different, // then both ends need to be ghosted (to different processors) // It's possible that edges are distributed over multiple processes, // which could result in a different set of ghost destinations for a // given node on each process. These need to be put together. // In this approach, which is really slow, take each local list of // ghost node, send it to all processes. Each process extracts the // ghost node destination for its locally owned nodes. // a particular node and destination needs to be unique, hence the // use of set<>; this may be too slow with large networks and // processors // typedef std::set< std::pair<Index, int> > DestList; // DestList gnodedest; // for (Index e = 0; e < locedges; ++e) { // Index n1, n2; // p_adjacency_list.edge(e, n1, n2); // int n1dest(e1dest[e]); // int n2dest(e2dest[e]); // if (verbose) { // std::cout << processor_rank() << ": edge " << e // << " (" << n1 << "->" << n2 << "): " // << "destinations: " << n1dest << ", " << n2dest << std::endl; // } // if (n1dest != n2dest) { // gnodedest.insert(std::make_pair(std::min(n1,n2), n2dest)); // gnodedest.insert(std::make_pair(std::max(n1,n2), n1dest)); // } // } // if (verbose) { // if (this->processor_rank() == 0) { // std::cout << "Ghost node destinations: " << std::endl; // } // for (int p = 0; p < this->processor_size(); ++p) { // if (this->processor_rank() == p) { // std::cout << p << ": "; // for (DestList::const_iterator i = gnodedest.begin(); // i != gnodedest.end(); ++i) { // std::cout << "(" << i->first << ":" << i->second << "),"; // } // std::cout << std::endl; // } // this->communicator().barrier(); // } // } // p_ghost_node_destinations.resize(locnodes); // DestList tmp; // for (int p = 0; p < this->processor_size(); ++p) { // tmp.clear(); // if (this->processor_rank() == p) { // tmp = gnodedest; // } // broadcast(communicator().getCommunicator(), tmp, p); // for (Index n = 0; n < locnodes; ++n) { // Index nodeidx(p_adjacency_list.node_index(n)); // for (DestList::const_iterator i = tmp.begin(); // i != tmp.end(); ++i) { // if (nodeidx == i->first) { // p_ghost_node_destinations[n].push_back(i->second); // } // } // } // } // Here, a 2D GA is used to store ghost node destinations. Each // process takes it's set of ghost node destinations and appends // those lists already in the GA. // Determine the maximum node connectivity. There needs to be enough // room in the GA to store all connections to a node. size_t lconn(0), maxconn(0); for (int l = 0; l < locnodes; ++l) { lconn = std::max(lconn, p_adjacency_list.node_neighbors(l)); } boost::mpi::all_reduce(communicator(), lconn, maxconn, boost::mpi::maximum<int>()); BOOST_ASSERT(maxconn >= lconn); dims[0] = allnodes; dims[1] = maxconn; ld[0] = maxconn; node_dest.reset(new GA::GlobalArray(MT_C_INT, 2, &dims[0], "Ghost node dest processes", NULL)); boost::scoped_ptr<GA::GlobalArray> node_dest_count(new GA::GlobalArray(MT_C_INT, 1, &dims[0], "Ghost node dest count", NULL)); { int bogus; bogus = -1; node_dest->fill(&bogus); bogus = 0; node_dest_count->fill(&bogus); } std::vector<int> lcount(allnodes, 0); for (int p = 0; p < this->processor_size(); ++p) { if (this->processor_rank() == p) { lo[0] = 0; hi[0] = allnodes - 1; node_dest_count->get(&lo[0], &hi[0], &lcount[0], &ld[0]); for (Index e = 0; e < static_cast<Index>(locedges); ++e) { Index n1, n2; p_adjacency_list.edge(e, n1, n2); int n1dest(e1dest[e]); int n2dest(e2dest[e]); if (verbose) { std::cout << processor_rank() << ": edge " << e << " (" << n1 << "->" << n2 << "): " << "destinations: " << n1dest << ", " << n2dest << std::endl; } if (n1dest != n2dest) { int nid, dest; nid = std::min(n1,n2); dest = n2dest; lo[0] = nid; hi[0] = lo[0]; lo[1] = lcount[nid]; hi[1] = lo[1]; node_dest->put(&lo[0], &hi[0], &dest, &ld[0]); lcount[nid] += 1; nid = std::max(n1,n2); dest = n1dest; lo[0] = nid; hi[0] = lo[0]; lo[1] = lcount[nid]; hi[1] = lo[1]; node_dest->put(&lo[0], &hi[0], &dest, &ld[0]); lcount[nid] += 1; } } lo[0] = 0; hi[0] = allnodes - 1; node_dest_count->put(&lo[0], &hi[0], &lcount[0], &ld[0]); } this->communicator().sync(); } // After all processes have made their contribution to the ghost // node destination GA, each process grabs that part that refers to // its local nodes and fills p_ghost_edge_destinations. lo[0] = 0; hi[0] = allnodes - 1; node_dest_count->get(&lo[0], &hi[0], &lcount[0], &ld[0]); p_ghost_node_destinations.clear(); p_ghost_node_destinations.resize(locnodes); std::vector<int> tmpdest(this->processor_size(), 0); for (Index n = 0; n < static_cast<Index>(locnodes); ++n) { Index nid(p_adjacency_list.node_index(n)); p_ghost_node_destinations[n].clear(); if (lcount[nid] > 0) { lo[0] = nid; hi[0] = nid; lo[1] = 0; hi[1] = lcount[nid] - 1; tmpdest.resize(lcount[nid]); node_dest->get(&lo[0], &hi[0], &tmpdest[0], &ld[0]); // there may be duplicates, so get rid of them if (tmpdest.size() > 1) { std::stable_sort(tmpdest.begin(), tmpdest.end()); std::unique(tmpdest.begin(), tmpdest.end()); } p_ghost_node_destinations[n].reserve(tmpdest.size()); std::copy(tmpdest.begin(), tmpdest.end(), std::back_inserter(p_ghost_node_destinations[n])); } } if (timer != NULL) timer->stop(t_gnode_dest); GA_Pgroup_set_default(oldGAgroup); if (timer != NULL) timer->stop(t_total); // if (timer) timer->dump(); }
/** Client code. Receives signals from the server to process a task or terminate processing and return*/ void client_code() { int *buf = NULL, buf_size; int flag; MPI_Status status; Integer p_handle; int ntsks=0, src; const char *pname = "client_code"; double e1, e2, e3, e4, e5, f1, f2, f3, f4,f5,f6,f7,f8; double t_prepar=0, t_wait_start=0, t_grp=0,t_sync=0,t_compl=0,t_dest=0; /* double get_doit_time_(); */ /* double get_esp_time_(); */ /* double get_gm_crt_time_(); */ /* double get_chrg_set_time_(); */ /* double get_gm_push_time_(); */ const int server = GA_Pgroup_absolute_id(ga_pgroup_get_default_(),SVR); const int default_grp = ga_pgroup_get_default_();; /*default GA group for this dispatcher instance*/ const int world_me = GA_Nodeid(); const int nproc = GA_Nnodes(); t_ptask = 0.0; /* fprintf(stderr, "%d: 0 server=%d %s\n", GA_Nodeid(), server,pname); */ e1 = util_wallsec_(); /* fprintf(stderr, "%d: 0 %s\n", GA_Nodeid(), pname); */ /* GA_Pgroup_set_default(GA_Pgroup_get_world()); */ /* fprintf(stderr, "%d: 1 %s\n", world_me, pname); */ buf_size = 1+ /*action to perform*/ 1+ /*task id - if TASK_SIGNAL*/ nproc /*process group info*/ ; /* buf = (int *)malloc(buf_size*sizeof(int)); */ buf = (int *)alloca(buf_size*sizeof(int)); assert(buf != NULL); /* fprintf(stderr, "%d: 2 %s\n", world_me, pname); */ e2 = util_wallsec_(); while(1) { int nelem, grp_me; Integer tskid; f1 = util_wallsec_(); /* fprintf(stderr, "%d:: Waiting for work\n", world_me); */ MPI_Recv(buf, buf_size, MPI_INT, MPI_ANY_SOURCE, SIGNAL_TAG, MPI_COMM_WORLD, &status); f2 = util_wallsec_(); t_wait_start += (f2-f1); /* fprintf(stderr, "%d:: Client got msg from %d\n", world_me, status.MPI_SOURCE); */ MPI_Get_elements(&status, MPI_INT, &nelem); assert(nelem >= 1); if(buf[0] == TERM_CLIENT) { /*process termination and return*/ /* fprintf(stderr, "%d:: Recv-ed term signal\n", GA_Nodeid()); */ /* free(buf); */ /* fprintf(stderr, "%d:: Terminating client\n", GA_Nodeid()); */ #ifdef LEADER_BCAST signal_termination(SVR,status.MPI_SOURCE); #endif break; } /* fprintf(stderr, "%d:: got a task to process\n", world_me); */ /*Got a task to process*/ assert(buf[0] == TASK_START); ntsks += 1; if(status.MPI_SOURCE == server) { qsort(buf+2, nelem-2, sizeof(int), int_compare); } f3 = util_wallsec_(); t_prepar += (f3-f2); #if LEADER_BCAST src = (server==status.MPI_SOURCE)?buf[2]:status.MPI_SOURCE; broadcast(nelem-2,buf+2,buf[2],src,buf,nelem*sizeof(int)); #endif /*The proc ids are in world group. So create sub-group of world group*/ GA_Pgroup_set_default(GA_Pgroup_get_world()); p_handle = GA_Pgroup_create(&buf[2], nelem-2); GA_Pgroup_set_default(p_handle); /* GA_Pgroup_sync(p_handle); */ f4 = MPI_Wtime(); t_grp += (f4-f3); tskid = buf[1]; /* fprintf(stderr, "%d(%d):: Invoking process task tskid=%d\n", grp_me, world_me, tskid); */ process_task_(&tskid, &p_handle); f5 = MPI_Wtime(); t_ptask += (f5-f4); GA_Pgroup_sync(p_handle); grp_me = GA_Nodeid(); f6 = util_wallsec_(); t_sync += (f6-f5); if(grp_me == 0) { int v[2] = {TASK_DONE, tskid}; /* fprintf(stderr, "%d(%d):: Sending ack for task %d to %d\n", */ /* grp_me, world_me, tskid, SERVER); */ MPI_Send(v, 2, MPI_INT, server, SIGNAL_TAG, MPI_COMM_WORLD); } f7 = util_wallsec_(); t_compl += (f7-f6); /* GA_Pgroup_sync(p_handle); */ GA_Pgroup_destroy(p_handle); GA_Pgroup_set_default(default_grp); f8 = util_wallsec_(); t_dest += (f8-f7); } e3 = util_wallsec_(); /* fprintf(stderr, "%d:: CLIENT total time=%lf\n", ga_nodeid_(), e3-e1); */ /* fprintf(stderr, "%d:: CLIENT ntsks=%d\n", ga_nodeid_(), ntsks); */ /* fprintf(stderr, "%d:: CLIENT loop time=%lf\n", ga_nodeid_(), e3-e2); */ /* fprintf(stderr, "%d:: CLIENT wait start time=%lf\n", ga_nodeid_(),t_wait_start); */ /* fprintf(stderr, "%d:: CLIENT prepare time=%lf\n", ga_nodeid_(),t_prepar); */ /* fprintf(stderr, "%d:: CLIENT grp crt time=%lf\n", ga_nodeid_(), t_grp); */ /* fprintf(stderr, "%d:: CLIENT ptask time=%lf\n", ga_nodeid_(), t_ptask); */ /* fprintf(stderr, "%d:: CLIENT sync time=%lf\n", ga_nodeid_(), t_sync); */ /* fprintf(stderr, "%d:: CLIENT compl time=%lf\n", ga_nodeid_(), t_compl); */ /* fprintf(stderr, "%d:: CLIENT grp dstry time=%lf\n", ga_nodeid_(), t_dest); */ /* fflush(stdout); */ /* fprintf(stderr, "%d:: CLIENT doit time=%lf\n",ga_nodeid_(),get_doit_time_()); */ /* fprintf(stderr, "%d:: CLIENT esp time=%lf\n",ga_nodeid_(),get_esp_time_()); */ /* fprintf(stderr, "%d:: CLIENT chrg_set time=%lf\n",ga_nodeid_(),get_chrg_set_time_()); */ /* fprintf(stderr, "%d:: CLIENT gm_crt time=%lf\n",ga_nodeid_(),get_gm_crt_time_()); */ /* fprintf(stderr, "%d:: CLIENT gm_push time=%lf\n",ga_nodeid_(),get_gm_push_time_()); */ }