void partitionInfo(agi::binGraph* g) { lid_t total_verts = g->numTotalVtxs(); lid_t global_verts = g->numGlobalVtxs(); lid_t edges = g->numLocalEdges()+g->numLocalEdges(SPLIT_TYPE); lid_t min = PCU_Min_Int(total_verts); lid_t max = PCU_Max_Int(total_verts); lid_t tot = PCU_Add_Long(total_verts); double avg = ((double)tot)/PCU_Comm_Peers(); double imb = max/avg; double inc = ((double)(tot-global_verts))/global_verts*100; if (!PCU_Comm_Self()) printf("Vertices: Min %lu Max %lu Tot %lu Inc %1.4f Avg %1.4f Imb %1.3f\n",min,max,tot,inc,avg,imb); min = PCU_Min_Int(edges); max = PCU_Max_Int(edges); tot = PCU_Add_Long(edges); avg = ((double)tot)/PCU_Comm_Peers(); imb = max/avg; if (!PCU_Comm_Self()) printf("Edges: Min %lu Max %lu Tot %lu Avg %1.4f Imb %1.3f\n",min,max,tot,avg,imb); lid_t edge_cut =0; agi::EdgeIterator* eitr = g->begin(0); for (int i=0;i<g->numLocalEdges();i++) { agi::GraphEdge* e = g->iterate(eitr); if (g->owner(g->v(e))!=PCU_Comm_Self()) edge_cut++; } edge_cut+=g->numLocalEdges(SPLIT_TYPE); edge_cut = PCU_Add_Long(edge_cut); if (!PCU_Comm_Self()) printf("Edge Cut: %lu\n",edge_cut); }
int binGraph::vert_block_ranks(int32_t* ranks) { uint64_t n_per_rank = num_global_verts / (uint64_t)PCU_Comm_Peers() + 1; for (uint64_t i = 0; i < num_global_verts; ++i) ranks[i] = i / n_per_rank; return 0; }
static void note_local_link(mds_id i, struct mds_copy c, void* u) { if (c.p == PCU_Comm_Self()) { if (i < mds_index(c.e)) note_peer(u, PCU_Comm_Self()); else /* hack id to store self-receivers */ note_peer(u, PCU_Comm_Peers()); } }
void mds_free_local_links(struct mds_links* ln) { int self, other; self = find_peer(ln, PCU_Comm_Self()); if (self == -1) return; other = find_peer(ln, PCU_Comm_Peers()); ln->n[self] = ln->n[other] = 0; free(ln->l[self]); free(ln->l[other]); ln->l[self] = ln->l[other] = NULL; }
//TODO: optimize these operations using PCU //Private Functions etype binGraph::load_edges(char *filename, uint64_t*& read_edges, uint64_t& m_read) { FILE *infp = fopen(filename, "rb"); fseek(infp, 0L, SEEK_END); uint64_t file_size = ftell(infp); fseek(infp, 0L, SEEK_SET); etype t = addEdgeType(); num_global_edges[t] = file_size/(2*sizeof(uint32_t)); uint64_t read_offset_start = PCU_Comm_Self()*2*sizeof(uint32_t)* (num_global_edges[t] / (uint64_t)PCU_Comm_Peers()); uint64_t read_offset_end = (PCU_Comm_Self()+1)*2*sizeof(uint32_t)* (num_global_edges[t] / (uint64_t)PCU_Comm_Peers()); if (PCU_Comm_Self() == PCU_Comm_Peers() - 1) read_offset_end = 2*sizeof(uint32_t)*num_global_edges[t]; m_read = (read_offset_end - read_offset_start)/(2*sizeof(uint32_t)); uint32_t* temp_read = (uint32_t*)malloc(2*m_read*sizeof(uint32_t)); read_edges = (uint64_t*)malloc(2*m_read*sizeof(uint64_t)); fseek(infp, read_offset_start, SEEK_SET); fread(temp_read, m_read, 2*sizeof(uint32_t), infp); fclose(infp); for (uint64_t i = 0; i < m_read*2; ++i) read_edges[i] = (uint64_t)temp_read[i]; free(temp_read); num_global_verts = 0; for (uint64_t i = 0; i < m_read*2; ++i) if (read_edges[i] > num_global_verts) { num_global_verts = read_edges[i]; } MPI_Allreduce(MPI_IN_PLACE, &num_global_verts, 1, MPI_UINT64_T, MPI_MAX, PCU_Get_Comm()); num_global_verts += 1; return t; }
static void take_local_link(mds_id i, struct mds_copy c, void* u) { struct mds_links* ln = u; int self = find_peer(ln, PCU_Comm_Self()); int other = find_peer(ln, PCU_Comm_Peers()); mds_id j = mds_index(c.e); if ((PCU_Comm_Self() == c.p) && (i < j)) { ln->l[self][ln->n[self]] = i; ln->l[other][ln->n[other]] = j; /* use ns as (redundant) position keepers */ ++ln->n[self]; ++ln->n[other]; } }
void mds_get_local_matches(struct mds_net* net, struct mds* m, int t, struct mds_links* ln) { int self, other; for_type_net(net, m, t, note_local_link, ln); self = find_peer(ln, PCU_Comm_Self()); if (self == -1) return; other = find_peer(ln, PCU_Comm_Peers()); assert(ln->n[self] == ln->n[other]); ln->l[self] = malloc(ln->n[self] * sizeof(unsigned)); ln->l[other] = malloc(ln->n[other] * sizeof(unsigned)); ln->n[self] = 0; ln->n[other] = 0; for_type_net(net, m, t, take_local_link, ln); }
/* algorithm courtesy of Sebastian Rettenberger: use brokers/routers for the vertex global ids. Although we have used this trick before (see mpas/apfMPAS.cc), I didn't think to use it here, so credit is given. */ static void constructResidence(Mesh2* m, GlobalToVert& globalToVert) { Gid max = getMax(globalToVert); Gid total = max + 1; int peers = PCU_Comm_Peers(); int quotient = total / peers; int remainder = total % peers; int mySize = quotient; int self = PCU_Comm_Self(); if (self == (peers - 1)) mySize += remainder; typedef std::vector< std::vector<int> > TmpParts; TmpParts tmpParts(mySize); /* if we have a vertex, send its global id to the broker for that global id */ PCU_Comm_Begin(); APF_ITERATE(GlobalToVert, globalToVert, it) { int gid = it->first; int to = std::min(peers - 1, gid / quotient); PCU_COMM_PACK(to, gid); }
void mds_set_local_matches(struct mds_net* net, struct mds* m, int t, struct mds_links* ln) { int self, other; unsigned i; mds_id a, b; struct mds_copy c; c.p = PCU_Comm_Self(); self = find_peer(ln, PCU_Comm_Self()); if (self == -1) return; other = find_peer(ln, PCU_Comm_Peers()); assert(ln->n[self] = ln->n[other]); for (i = 0; i < ln->n[self]; ++i) { a = mds_identify(t, ln->l[self][i]); b = mds_identify(t, ln->l[other][i]); c.e = b; mds_add_copy(net, m, a, c); c.e = a; mds_add_copy(net, m, b, c); } }
void binGraph::migrate(agi::EdgePartitionMap& map) { EdgePartitionMap::iterator itr; PCU_Comm_Begin(); for (itr = map.begin();itr!=map.end();itr++) { lid_t lid = itr->first; gid_t v1 = local_unmap[u(lid)]; gid_t v2 = local_unmap[edge_list[0][lid]]; PCU_COMM_PACK(itr->second,v1); PCU_COMM_PACK(itr->second,v2); } PCU_Comm_Send(); std::vector<gid_t> recv_edges; while (PCU_Comm_Receive()) { gid_t v1; PCU_COMM_UNPACK(v1); recv_edges.push_back(v1); } num_global_verts = PCU_Add_Long(num_global_verts); if (numEdgeTypes()==0) addEdgeType(); num_local_edges[0] = recv_edges.size()/2; num_global_edges[0] = PCU_Add_Long(num_local_edges[0]); if (edge_list[0]) delete edge_list[0]; edge_list[0] = new gid_t[num_local_edges[0]*2]; std::copy(recv_edges.begin(),recv_edges.end(),edge_list[0]); vtx_mapping.clear(); int32_t* ranks = (int32_t*)malloc(num_global_verts*sizeof(int32_t)); for (int i=0;i<num_global_verts;i++) ranks[i] = -1; for (lid_t i=0;i<num_local_edges[0]*2;i++) { ranks[edge_list[0][i]] = PCU_Comm_Self(); } create_dist_csr(ranks,0,false); delete [] ranks; //TODO: Make much more efficient PCU_Comm_Begin(); for (int i=0;i<num_local_verts;i++) { for (int j=1;j<PCU_Comm_Peers();j++) PCU_COMM_PACK((PCU_Comm_Self()+j)%PCU_Comm_Peers(),local_unmap[i]); } PCU_Comm_Send(); std::vector<part_t> owns; std::vector<gid_t> dups; degree_list[SPLIT_TYPE] = new lid_t[num_local_verts+1]; for (int i=0;i<num_local_verts+1;++i) degree_list[SPLIT_TYPE][i]=0; while (PCU_Comm_Receive()) { gid_t gid; PCU_COMM_UNPACK(gid); map_t::iterator itr = vtx_mapping.find(gid); if (itr!=vtx_mapping.end()) { dups.push_back(gid); owns.push_back(PCU_Comm_Sender()); degree_list[SPLIT_TYPE][itr->second+1]++; } } for (int i=1;i<num_local_verts+1;++i) degree_list[SPLIT_TYPE][i]+=degree_list[SPLIT_TYPE][i-1]; assert(degree_list[SPLIT_TYPE][num_local_verts] ==dups.size()); num_ghost_verts = dups.size(); num_local_edges[SPLIT_TYPE] = dups.size(); ghost_unmap = new gid_t[dups.size()]; owners = new part_t[dups.size()]; uint64_t* temp_counts = (uint64_t*)malloc(num_local_verts*sizeof(uint64_t)); memcpy(temp_counts, degree_list[SPLIT_TYPE], num_local_verts*sizeof(uint64_t)); edge_list[SPLIT_TYPE] = new lid_t[dups.size()]; for (unsigned int i=0;i<dups.size();i++) { lid_t lid = vtx_mapping[dups[i]]; edge_list[SPLIT_TYPE][temp_counts[lid]++] = num_local_verts+i; ghost_unmap[i]=dups[i]; owners[i] = owns[i]; } num_global_edges[SPLIT_TYPE] = PCU_Add_Long(num_local_edges[SPLIT_TYPE]); }
int binGraph::exchange_edges(uint64_t m_read, uint64_t* read_edges, int32_t* ranks,etype t) { int32_t* scounts = (int32_t*)malloc(PCU_Comm_Peers()*sizeof(int32_t)); int32_t* rcounts = (int32_t*)malloc(PCU_Comm_Peers()*sizeof(int32_t)); int32_t* sdispls = (int32_t*)malloc(PCU_Comm_Peers()*sizeof(int32_t)); int32_t* sdispls_cpy = (int32_t*)malloc(PCU_Comm_Peers()*sizeof(int32_t)); int32_t* rdispls = (int32_t*)malloc(PCU_Comm_Peers()*sizeof(int32_t)); for (int i = 0; i < PCU_Comm_Peers(); ++i) { scounts[i] = 0; rcounts[i] = 0; sdispls[i] = 0; sdispls_cpy[i] = 0; rdispls[i] = 0; } uint64_t n_per_rank = num_global_verts / PCU_Comm_Peers() + 1; for (uint64_t i = 0; i < m_read*2; i+=2) { uint64_t vert = read_edges[i]; int vert_task = ranks[vert]; scounts[vert_task] += 2; } MPI_Alltoall(scounts, 1, MPI_INT32_T, rcounts, 1, MPI_INT32_T, PCU_Get_Comm()); for (uint64_t i = 1; i < PCU_Comm_Peers(); ++i) { sdispls[i] = sdispls[i-1] + scounts[i-1]; sdispls_cpy[i] = sdispls[i]; rdispls[i] = rdispls[i-1] + rcounts[i-1]; } int32_t total_send = sdispls[PCU_Comm_Peers()-1] + scounts[PCU_Comm_Peers()-1]; int32_t total_recv = rdispls[PCU_Comm_Peers()-1] + rcounts[PCU_Comm_Peers()-1]; uint64_t* sendbuf = (uint64_t*)malloc(total_send*sizeof(uint64_t)); edge_list[t] = (uint64_t*)malloc(total_recv*sizeof(uint64_t)); num_local_edges[t] = total_recv / 2; for (uint64_t i = 0; i < m_read*2; i+=2) { uint64_t vert1 = read_edges[i]; uint64_t vert2 = read_edges[i+1]; int vert_task = ranks[vert1]; sendbuf[sdispls_cpy[vert_task]++] = vert1; sendbuf[sdispls_cpy[vert_task]++] = vert2; } MPI_Alltoallv(sendbuf, scounts, sdispls, MPI_UINT64_T, edge_list[t], rcounts, rdispls, MPI_UINT64_T, PCU_Get_Comm()); free(sendbuf); return 0; }
int main(int argc, char* argv[]) { MPI_Init(&argc,&argv); PCU_Comm_Init(); PCU_Debug_Open(); if ( argc != 2&&argc!=3 ) { if ( !PCU_Comm_Self() ) printf("Usage: %s <binary_graph_file> [vertex_partition_file]",argv[0]); PCU_Comm_Free(); MPI_Finalize(); assert(false); } agi::binGraph* g; zagi::ZoltanCutVertex* ptn; int self = PCU_Comm_Self(); int num_parts = PCU_Comm_Peers(); PCU_Switch_Comm(MPI_COMM_SELF); if (!self) { g = new agi::binGraph(argv[1]); ptn = new zagi::ZoltanCutVertex(g,num_parts); ptn->run(); } else g = new agi::binGraph(); agi::EdgePartitionMap map; if (!self) { ptn->createPtn(map); delete ptn; } PCU_Switch_Comm(MPI_COMM_WORLD); g->migrate(map); partitionInfo(g); delete g; if (!PCU_Comm_Self()) printf("Block Partitioning\n"); g = new agi::binGraph(argv[1]); partitionInfo(g); delete g; if (argc==3) { if (!PCU_Comm_Self()) printf("Partitioned: %s\n",argv[2]); g = new agi::binGraph(argv[1],argv[2]); partitionInfo(g); delete g; } PCU_Barrier(); if (!PCU_Comm_Self()) printf("\nAll tests passed\n"); PCU_Comm_Free(); MPI_Finalize(); }