int binGraph::exchange_edges(uint64_t m_read, uint64_t* read_edges, int32_t* ranks,etype t) { int32_t* scounts = (int32_t*)malloc(PCU_Comm_Peers()*sizeof(int32_t)); int32_t* rcounts = (int32_t*)malloc(PCU_Comm_Peers()*sizeof(int32_t)); int32_t* sdispls = (int32_t*)malloc(PCU_Comm_Peers()*sizeof(int32_t)); int32_t* sdispls_cpy = (int32_t*)malloc(PCU_Comm_Peers()*sizeof(int32_t)); int32_t* rdispls = (int32_t*)malloc(PCU_Comm_Peers()*sizeof(int32_t)); for (int i = 0; i < PCU_Comm_Peers(); ++i) { scounts[i] = 0; rcounts[i] = 0; sdispls[i] = 0; sdispls_cpy[i] = 0; rdispls[i] = 0; } uint64_t n_per_rank = num_global_verts / PCU_Comm_Peers() + 1; for (uint64_t i = 0; i < m_read*2; i+=2) { uint64_t vert = read_edges[i]; int vert_task = ranks[vert]; scounts[vert_task] += 2; } MPI_Alltoall(scounts, 1, MPI_INT32_T, rcounts, 1, MPI_INT32_T, PCU_Get_Comm()); for (uint64_t i = 1; i < PCU_Comm_Peers(); ++i) { sdispls[i] = sdispls[i-1] + scounts[i-1]; sdispls_cpy[i] = sdispls[i]; rdispls[i] = rdispls[i-1] + rcounts[i-1]; } int32_t total_send = sdispls[PCU_Comm_Peers()-1] + scounts[PCU_Comm_Peers()-1]; int32_t total_recv = rdispls[PCU_Comm_Peers()-1] + rcounts[PCU_Comm_Peers()-1]; uint64_t* sendbuf = (uint64_t*)malloc(total_send*sizeof(uint64_t)); edge_list[t] = (uint64_t*)malloc(total_recv*sizeof(uint64_t)); num_local_edges[t] = total_recv / 2; for (uint64_t i = 0; i < m_read*2; i+=2) { uint64_t vert1 = read_edges[i]; uint64_t vert2 = read_edges[i+1]; int vert_task = ranks[vert1]; sendbuf[sdispls_cpy[vert_task]++] = vert1; sendbuf[sdispls_cpy[vert_task]++] = vert2; } MPI_Alltoallv(sendbuf, scounts, sdispls, MPI_UINT64_T, edge_list[t], rcounts, rdispls, MPI_UINT64_T, PCU_Get_Comm()); free(sendbuf); return 0; }
static void switchToAll() { MPI_Comm prevComm = PCU_Get_Comm(); PCU_Switch_Comm(MPI_COMM_WORLD); MPI_Comm_free(&prevComm); PCU_Barrier(); }
//TODO: optimize these operations using PCU //Private Functions etype binGraph::load_edges(char *filename, uint64_t*& read_edges, uint64_t& m_read) { FILE *infp = fopen(filename, "rb"); fseek(infp, 0L, SEEK_END); uint64_t file_size = ftell(infp); fseek(infp, 0L, SEEK_SET); etype t = addEdgeType(); num_global_edges[t] = file_size/(2*sizeof(uint32_t)); uint64_t read_offset_start = PCU_Comm_Self()*2*sizeof(uint32_t)* (num_global_edges[t] / (uint64_t)PCU_Comm_Peers()); uint64_t read_offset_end = (PCU_Comm_Self()+1)*2*sizeof(uint32_t)* (num_global_edges[t] / (uint64_t)PCU_Comm_Peers()); if (PCU_Comm_Self() == PCU_Comm_Peers() - 1) read_offset_end = 2*sizeof(uint32_t)*num_global_edges[t]; m_read = (read_offset_end - read_offset_start)/(2*sizeof(uint32_t)); uint32_t* temp_read = (uint32_t*)malloc(2*m_read*sizeof(uint32_t)); read_edges = (uint64_t*)malloc(2*m_read*sizeof(uint64_t)); fseek(infp, read_offset_start, SEEK_SET); fread(temp_read, m_read, 2*sizeof(uint32_t), infp); fclose(infp); for (uint64_t i = 0; i < m_read*2; ++i) read_edges[i] = (uint64_t)temp_read[i]; free(temp_read); num_global_verts = 0; for (uint64_t i = 0; i < m_read*2; ++i) if (read_edges[i] > num_global_verts) { num_global_verts = read_edges[i]; } MPI_Allreduce(MPI_IN_PLACE, &num_global_verts, 1, MPI_UINT64_T, MPI_MAX, PCU_Get_Comm()); num_global_verts += 1; return t; }