/* Returns true if result is valid. Also, updates high 16 bits of each element * of pred to contain the BFS level number (or -1 if not visited) of each * vertex; this is based on the predecessor map if the user didn't provide it. * */ int validate_bfs_result(const tuple_graph* const tg, const int64_t nglobalverts, const size_t nlocalverts, const int64_t root, int64_t* const pred, int64_t* const edge_visit_count_ptr) { assert (tg->edgememory_size >= 0 && tg->max_edgememory_size >= tg->edgememory_size && tg->max_edgememory_size <= tg->nglobaledges); assert (pred); *edge_visit_count_ptr = 0; /* Ensure it is a valid pointer */ int ranges_ok = check_value_ranges(nglobalverts, nlocalverts, pred); if (root < 0 || root >= nglobalverts) { fprintf(stderr, "%d: Validation error: root vertex %" PRId64 " is invalid.\n", rank, root); ranges_ok = 0; } if (!ranges_ok) return 0; /* Fail */ assert (tg->edgememory_size >= 0 && tg->max_edgememory_size >= tg->edgememory_size && tg->max_edgememory_size <= tg->nglobaledges); assert (pred); int validation_passed = 1; int root_owner; size_t root_local; get_vertex_distribution_for_pred(1, &root, &root_owner, &root_local); int root_is_mine = (root_owner == rank); /* Get maximum values so loop counts are consistent across ranks. */ uint64_t maxlocalverts_ui = nlocalverts; MPI_Allreduce(MPI_IN_PLACE, &maxlocalverts_ui, 1, MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD); size_t maxlocalverts = (size_t)maxlocalverts_ui; ptrdiff_t max_bufsize = tuple_graph_max_bufsize(tg); ptrdiff_t edge_chunk_size = ptrdiff_min(HALF_CHUNKSIZE, max_bufsize); assert (tg->edgememory_size >= 0 && tg->max_edgememory_size >= tg->edgememory_size && tg->max_edgememory_size <= tg->nglobaledges); assert (pred); /* Check that root is its own parent. */ if (root_is_mine) { assert (root_local < nlocalverts); if (get_pred_from_pred_entry(pred[root_local]) != root) { fprintf(stderr, "%d: Validation error: parent of root vertex %" PRId64 " is %" PRId64 ", not the root itself.\n", rank, root, get_pred_from_pred_entry(pred[root_local])); validation_passed = 0; } } assert (tg->edgememory_size >= 0 && tg->max_edgememory_size >= tg->edgememory_size && tg->max_edgememory_size <= tg->nglobaledges); assert (pred); /* Check that nothing else is its own parent. */ { int* restrict pred_owner = (int*)xmalloc(size_min(CHUNKSIZE, nlocalverts) * sizeof(int)); size_t* restrict pred_local = (size_t*)xmalloc(size_min(CHUNKSIZE, nlocalverts) * sizeof(size_t)); int64_t* restrict pred_vtx = (int64_t*)xmalloc(size_min(CHUNKSIZE, nlocalverts) * sizeof(int64_t)); /* Vertex (not depth) part of pred map */ ptrdiff_t ii; for (ii = 0; ii < (ptrdiff_t)nlocalverts; ii += CHUNKSIZE) { ptrdiff_t i_start = ii; ptrdiff_t i_end = ptrdiff_min(ii + CHUNKSIZE, nlocalverts); ptrdiff_t i; assert (i_start >= 0 && i_start <= (ptrdiff_t)nlocalverts); assert (i_end >= 0 && i_end <= (ptrdiff_t)nlocalverts); #pragma omp parallel for for (i = i_start; i < i_end; ++i) { pred_vtx[i - i_start] = get_pred_from_pred_entry(pred[i]); } get_vertex_distribution_for_pred(i_end - i_start, pred_vtx, pred_owner, pred_local); #pragma omp parallel for reduction(&&:validation_passed) for (i = i_start; i < i_end; ++i) { if ((!root_is_mine || (size_t)i != root_local) && get_pred_from_pred_entry(pred[i]) != -1 && pred_owner[i - i_start] == rank && pred_local[i - i_start] == (size_t)i) { fprintf(stderr, "%d: Validation error: parent of non-root vertex %" PRId64 " is itself.\n", rank, vertex_to_global_for_pred(rank, i)); validation_passed = 0; } } } free(pred_owner); free(pred_local); free(pred_vtx); } assert (tg->edgememory_size >= 0 && tg->max_edgememory_size >= tg->edgememory_size && tg->max_edgememory_size <= tg->nglobaledges); assert (pred); if (bfs_writes_depth_map()) { int check_ok = check_bfs_depth_map_using_predecessors(tg, nglobalverts, nlocalverts, maxlocalverts, root, pred); if (!check_ok) validation_passed = 0; } else { /* Create a vertex depth map to use for later validation. */ int pred_ok = build_bfs_depth_map(nglobalverts, nlocalverts, maxlocalverts, root, pred); //shit happened here if (!pred_ok) validation_passed = 0; } { /* Check that all edges connect vertices whose depths differ by at most * one, and check that there is an edge from each vertex to its claimed * predecessor. Also, count visited edges (including duplicates and * self-loops). */ unsigned char* restrict pred_valid = (unsigned char*)xMPI_Alloc_mem(nlocalverts * sizeof(unsigned char)); memset(pred_valid, 0, nlocalverts * sizeof(unsigned char)); int64_t* restrict edge_endpoint = (int64_t*)xmalloc(2 * edge_chunk_size * sizeof(int64_t)); int* restrict edge_owner = (int*)xmalloc(2 * edge_chunk_size * sizeof(int)); size_t* restrict edge_local = (size_t*)xmalloc(2 * edge_chunk_size * sizeof(size_t)); int64_t* restrict edge_preds = (int64_t*)xMPI_Alloc_mem(2 * edge_chunk_size * sizeof(int64_t)); gather* pred_win = init_gather((void*)pred, nlocalverts, sizeof(int64_t), edge_preds, 2 * edge_chunk_size, 2 * edge_chunk_size, MPI_INT64_T); unsigned char one = 1; scatter_constant* pred_valid_win = init_scatter_constant((void*)pred_valid, nlocalverts, sizeof(unsigned char), &one, 2 * edge_chunk_size, MPI_UNSIGNED_CHAR); int64_t edge_visit_count = 0; ITERATE_TUPLE_GRAPH_BEGIN(tg, buf, bufsize) { ptrdiff_t ii; for (ii = 0; ii < max_bufsize; ii += HALF_CHUNKSIZE) { ptrdiff_t i_start = ptrdiff_min(ii, bufsize); ptrdiff_t i_end = ptrdiff_min(ii + HALF_CHUNKSIZE, bufsize); assert (i_end - i_start <= edge_chunk_size); ptrdiff_t i; #pragma omp parallel for for (i = i_start; i < i_end; ++i) { int64_t v0 = get_v0_from_edge(&buf[i]); int64_t v1 = get_v1_from_edge(&buf[i]); edge_endpoint[(i - i_start) * 2 + 0] = v0; edge_endpoint[(i - i_start) * 2 + 1] = v1; } get_vertex_distribution_for_pred(2 * (i_end - i_start), edge_endpoint, edge_owner, edge_local); begin_gather(pred_win); #pragma omp parallel for for (i = i_start; i < i_end; ++i) { add_gather_request(pred_win, (i - i_start) * 2 + 0, edge_owner[(i - i_start) * 2 + 0], edge_local[(i - i_start) * 2 + 0], (i - i_start) * 2 + 0); add_gather_request(pred_win, (i - i_start) * 2 + 1, edge_owner[(i - i_start) * 2 + 1], edge_local[(i - i_start) * 2 + 1], (i - i_start) * 2 + 1); } end_gather(pred_win); begin_scatter_constant(pred_valid_win); #pragma omp parallel for reduction(&&:validation_passed) reduction(+:edge_visit_count) for (i = i_start; i < i_end; ++i) { int64_t src = get_v0_from_edge(&buf[i]); int64_t tgt = get_v1_from_edge(&buf[i]); uint16_t src_depth = get_depth_from_pred_entry(edge_preds[(i - i_start) * 2 + 0]); uint16_t tgt_depth = get_depth_from_pred_entry(edge_preds[(i - i_start) * 2 + 1]); if (src_depth != UINT16_MAX && tgt_depth == UINT16_MAX) { fprintf(stderr, "%d: Validation error: edge connects vertex %" PRId64 " in the BFS tree (depth %" PRIu16 ") to vertex %" PRId64 " outside the tree.\n", rank, src, src_depth, tgt); validation_passed = 0; } else if (src_depth == UINT16_MAX && tgt_depth != UINT16_MAX) { fprintf(stderr, "%d: Validation error: edge connects vertex %" PRId64 " in the BFS tree (depth %" PRIu16 ") to vertex %" PRId64 " outside the tree.\n", rank, tgt, tgt_depth, src); validation_passed = 0; } else if (src_depth - tgt_depth < -1 || src_depth - tgt_depth > 1) { fprintf(stderr, "%d: Validation error: depths of edge endpoints %" PRId64 " (depth %" PRIu16 ") and %" PRId64 " (depth %" PRIu16 ") are too far apart (abs. val. > 1).\n", rank, src, src_depth, tgt, tgt_depth); validation_passed = 0; } else if (src_depth != UINT16_MAX) { ++edge_visit_count; } if (get_pred_from_pred_entry(edge_preds[(i - i_start) * 2 + 0]) == tgt) { add_scatter_constant_request(pred_valid_win, edge_owner[(i - i_start) * 2 + 0], edge_local[(i - i_start) * 2 + 0], (i - i_start) * 2 + 0); } if (get_pred_from_pred_entry(edge_preds[(i - i_start) * 2 + 1]) == src) { add_scatter_constant_request(pred_valid_win, edge_owner[(i - i_start) * 2 + 1], edge_local[(i - i_start) * 2 + 1], (i - i_start) * 2 + 1); } } end_scatter_constant(pred_valid_win); } } ITERATE_TUPLE_GRAPH_END; destroy_gather(pred_win); MPI_Free_mem(edge_preds); free(edge_owner); free(edge_local); free(edge_endpoint); destroy_scatter_constant(pred_valid_win); ptrdiff_t i; #pragma omp parallel for reduction(&&:validation_passed) for (i = 0; i < (ptrdiff_t)nlocalverts; ++i) { int64_t p = get_pred_from_pred_entry(pred[i]); if (p == -1) continue; int found_pred_edge = pred_valid[i]; if (root_owner == rank && root_local == (size_t)i) found_pred_edge = 1; /* Root vertex */ if (!found_pred_edge) { int64_t v = vertex_to_global_for_pred(rank, i); fprintf(stderr, "%d: Validation error: no graph edge from vertex %" PRId64 " to its parent %" PRId64 ".\n", rank, v, get_pred_from_pred_entry(pred[i])); validation_passed = 0; } } MPI_Free_mem(pred_valid); MPI_Allreduce(MPI_IN_PLACE, &edge_visit_count, 1, MPI_INT64_T, MPI_SUM, MPI_COMM_WORLD); *edge_visit_count_ptr = edge_visit_count; }
int main(int argc, char** argv) { MPI_Init(&argc, &argv); setup_globals(); /* Parse arguments. */ int SCALE = 16; int edgefactor = 16; /* nedges / nvertices, i.e., 2*avg. degree */ // if (argc >= 2) SCALE = atoi(argv[1]); // if (argc >= 3) edgefactor = atoi(argv[2]); char* name = argv[1]; if (argc >= 3) SCALE = atoi(argv[2]); if (argc >= 4) edgefactor = atoi(argv[3]); // if (argc <= 1 || argc >= 4 || SCALE == 0 || edgefactor == 0) { // if (rank == 0) { // fprintf(stderr, "Usage: %s SCALE edgefactor\n SCALE = log_2(# vertices) [integer, required]\n edgefactor = (# edges) / (# vertices) = .5 * (average vertex degree) [integer, defaults to 16]\n(Random number seed and Kronecker initiator are in main.c)\n", argv[0]); // } if (argc <= 2 || argc >= 5 || SCALE == 0 || edgefactor == 0) { if (rank == 0) { fprintf(stderr, "Usage: %s filename SCALE edgefactor\n SCALE = log_2(# vertices) [integer, required]\n edgefactor = (# edges) / (# vertices) = .5 * (average vertex degree) [integer, defaults to 16]\n(Random number seed and Kronecker initiator are in main.c)\n", argv[0]); } MPI_Abort(MPI_COMM_WORLD, 1); } uint64_t seed1 = 2, seed2 = 3; // const char* filename = getenv("TMPFILE"); const char* filename = name; /* If filename is NULL, store data in memory */ tuple_graph tg; tg.nglobaledges = (int64_t)(edgefactor) << SCALE; int64_t nglobalverts = (int64_t)(1) << SCALE; tg.data_in_file = (filename != NULL); if (tg.data_in_file) { printf("data in file \n"); MPI_File_set_errhandler(MPI_FILE_NULL, MPI_ERRORS_ARE_FATAL); // MPI_File_open(MPI_COMM_WORLD, (char*)filename, MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_EXCL | MPI_MODE_DELETE_ON_CLOSE | MPI_MODE_UNIQUE_OPEN, MPI_INFO_NULL, &tg.edgefile); MPI_File_open(MPI_COMM_WORLD, (char*)filename, MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_EXCL | MPI_MODE_UNIQUE_OPEN, MPI_INFO_NULL, &tg.edgefile); MPI_File_set_size(tg.edgefile, tg.nglobaledges * sizeof(packed_edge)); MPI_File_set_view(tg.edgefile, 0, packed_edge_mpi_type, packed_edge_mpi_type, "native", MPI_INFO_NULL); MPI_File_set_atomicity(tg.edgefile, 0); } /* Make the raw graph edges. */ /* Get roots for BFS runs, plus maximum vertex with non-zero degree (used by * validator). */ int num_bfs_roots = 64; int64_t* bfs_roots = (int64_t*)xmalloc(num_bfs_roots * sizeof(int64_t)); int64_t max_used_vertex = 0; double make_graph_start = MPI_Wtime(); { /* Spread the two 64-bit numbers into five nonzero values in the correct * range. */ uint_fast32_t seed[5]; make_mrg_seed(seed1, seed2, seed); /* As the graph is being generated, also keep a bitmap of vertices with * incident edges. We keep a grid of processes, each row of which has a * separate copy of the bitmap (distributed among the processes in the * row), and then do an allreduce at the end. This scheme is used to avoid * non-local communication and reading the file separately just to find BFS * roots. */ MPI_Offset nchunks_in_file = (tg.nglobaledges + FILE_CHUNKSIZE - 1) / FILE_CHUNKSIZE; int64_t bitmap_size_in_bytes = int64_min(BITMAPSIZE, (nglobalverts + CHAR_BIT - 1) / CHAR_BIT); if (bitmap_size_in_bytes * size * CHAR_BIT < nglobalverts) { bitmap_size_in_bytes = (nglobalverts + size * CHAR_BIT - 1) / (size * CHAR_BIT); } int ranks_per_row = ((nglobalverts + CHAR_BIT - 1) / CHAR_BIT + bitmap_size_in_bytes - 1) / bitmap_size_in_bytes; int nrows = size / ranks_per_row; int my_row = -1, my_col = -1; unsigned char* restrict has_edge = NULL; MPI_Comm cart_comm; { int dims[2] = {size / ranks_per_row, ranks_per_row}; int periods[2] = {0, 0}; MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &cart_comm); } int in_generating_rectangle = 0; if (cart_comm != MPI_COMM_NULL) { in_generating_rectangle = 1; { int dims[2], periods[2], coords[2]; MPI_Cart_get(cart_comm, 2, dims, periods, coords); my_row = coords[0]; my_col = coords[1]; } MPI_Comm this_col; MPI_Comm_split(cart_comm, my_col, my_row, &this_col); MPI_Comm_free(&cart_comm); has_edge = (unsigned char*)xMPI_Alloc_mem(bitmap_size_in_bytes); memset(has_edge, 0, bitmap_size_in_bytes); /* Every rank in a given row creates the same vertices (for updating the * bitmap); only one writes them to the file (or final memory buffer). */ packed_edge* buf = (packed_edge*)xmalloc(FILE_CHUNKSIZE * sizeof(packed_edge)); MPI_Offset block_limit = (nchunks_in_file + nrows - 1) / nrows; // fprintf(stderr, "%d: nchunks_in_file = %" PRId64 ", block_limit = %" PRId64 " in grid of %d rows, %d cols\n", rank, (int64_t)nchunks_in_file, (int64_t)block_limit, nrows, ranks_per_row); if (tg.data_in_file) { tg.edgememory_size = 0; tg.edgememory = NULL; } else { int my_pos = my_row + my_col * nrows; int last_pos = (tg.nglobaledges % ((int64_t)FILE_CHUNKSIZE * nrows * ranks_per_row) != 0) ? (tg.nglobaledges / FILE_CHUNKSIZE) % (nrows * ranks_per_row) : -1; int64_t edges_left = tg.nglobaledges % FILE_CHUNKSIZE; int64_t nedges = FILE_CHUNKSIZE * (tg.nglobaledges / ((int64_t)FILE_CHUNKSIZE * nrows * ranks_per_row)) + FILE_CHUNKSIZE * (my_pos < (tg.nglobaledges / FILE_CHUNKSIZE) % (nrows * ranks_per_row)) + (my_pos == last_pos ? edges_left : 0); /* fprintf(stderr, "%d: nedges = %" PRId64 " of %" PRId64 "\n", rank, (int64_t)nedges, (int64_t)tg.nglobaledges); */ tg.edgememory_size = nedges; tg.edgememory = (packed_edge*)xmalloc(nedges * sizeof(packed_edge)); } MPI_Offset block_idx; for (block_idx = 0; block_idx < block_limit; ++block_idx) { /* fprintf(stderr, "%d: On block %d of %d\n", rank, (int)block_idx, (int)block_limit); */ MPI_Offset start_edge_index = int64_min(FILE_CHUNKSIZE * (block_idx * nrows + my_row), tg.nglobaledges); MPI_Offset edge_count = int64_min(tg.nglobaledges - start_edge_index, FILE_CHUNKSIZE); packed_edge* actual_buf = (!tg.data_in_file && block_idx % ranks_per_row == my_col) ? tg.edgememory + FILE_CHUNKSIZE * (block_idx / ranks_per_row) : buf; /* fprintf(stderr, "%d: My range is [%" PRId64 ", %" PRId64 ") %swriting into index %" PRId64 "\n", rank, (int64_t)start_edge_index, (int64_t)(start_edge_index + edge_count), (my_col == (block_idx % ranks_per_row)) ? "" : "not ", (int64_t)(FILE_CHUNKSIZE * (block_idx / ranks_per_row))); */ if (!tg.data_in_file && block_idx % ranks_per_row == my_col) { assert (FILE_CHUNKSIZE * (block_idx / ranks_per_row) + edge_count <= tg.edgememory_size); } // debug char* wtxbuf = (char*)xmalloc(FILE_CHUNKSIZE * sizeof(packed_edge)); // generate_kronecker_range(seed, SCALE, start_edge_index, start_edge_index + edge_count, actual_buf); generate_kronecker_range(seed, SCALE, start_edge_index, start_edge_index + edge_count, actual_buf); if (tg.data_in_file && my_col == (block_idx % ranks_per_row)) { /* Try to spread writes among ranks */ // MPI_File_write_at(tg.edgefile, start_edge_index, actual_buf, edge_count, packed_edge_mpi_type, MPI_STATUS_IGNORE); // debug printf("%d: %d, %d\n", rank, start_edge_index, edge_count); int i; // for (i = start_edge_index; i < start_edge_index + 3; i++) { // if(block_idx == 0) { // for (i = 0; i < 3; i++) { // if (edge_count > 3) // printf("%d: %d\t%d\n", rank, actual_buf[i].v0, actual_buf[i].v1); // } // } MPI_File_write_at(tg.edgefile, start_edge_index, actual_buf, edge_count, packed_edge_mpi_type, MPI_STATUS_IGNORE); } ptrdiff_t i; #ifdef _OPENMP #pragma omp parallel for #endif for (i = 0; i < edge_count; ++i) { int64_t src = get_v0_from_edge(&actual_buf[i]); int64_t tgt = get_v1_from_edge(&actual_buf[i]); if (src == tgt) continue; if (src / bitmap_size_in_bytes / CHAR_BIT == my_col) { #ifdef _OPENMP #pragma omp atomic #endif has_edge[(src / CHAR_BIT) % bitmap_size_in_bytes] |= (1 << (src % CHAR_BIT)); } if (tgt / bitmap_size_in_bytes / CHAR_BIT == my_col) { #ifdef _OPENMP #pragma omp atomic #endif has_edge[(tgt / CHAR_BIT) % bitmap_size_in_bytes] |= (1 << (tgt % CHAR_BIT)); } } } free(buf); #if 0 /* The allreduce for each root acts like we did this: */ MPI_Allreduce(MPI_IN_PLACE, has_edge, bitmap_size_in_bytes, MPI_UNSIGNED_CHAR, MPI_BOR, this_col); #endif MPI_Comm_free(&this_col); } else { tg.edgememory = NULL; tg.edgememory_size = 0; } MPI_Allreduce(&tg.edgememory_size, &tg.max_edgememory_size, 1, MPI_INT64_T, MPI_MAX, MPI_COMM_WORLD); #ifndef GEN_ONLY /* Find roots and max used vertex */ { uint64_t counter = 0; int bfs_root_idx; for (bfs_root_idx = 0; bfs_root_idx < num_bfs_roots; ++bfs_root_idx) { int64_t root; while (1) { double d[2]; make_random_numbers(2, seed1, seed2, counter, d); root = (int64_t)((d[0] + d[1]) * nglobalverts) % nglobalverts; counter += 2; if (counter > 2 * nglobalverts) break; int is_duplicate = 0; int i; for (i = 0; i < bfs_root_idx; ++i) { if (root == bfs_roots[i]) { is_duplicate = 1; break; } } if (is_duplicate) continue; /* Everyone takes the same path here */ int root_ok = 0; if (in_generating_rectangle && (root / CHAR_BIT / bitmap_size_in_bytes) == my_col) { root_ok = (has_edge[(root / CHAR_BIT) % bitmap_size_in_bytes] & (1 << (root % CHAR_BIT))) != 0; } MPI_Allreduce(MPI_IN_PLACE, &root_ok, 1, MPI_INT, MPI_LOR, MPI_COMM_WORLD); if (root_ok) break; } bfs_roots[bfs_root_idx] = root; } num_bfs_roots = bfs_root_idx; /* Find maximum non-zero-degree vertex. */ { int64_t i; max_used_vertex = 0; if (in_generating_rectangle) { for (i = bitmap_size_in_bytes * CHAR_BIT; i > 0; --i) { if (i > nglobalverts) continue; if (has_edge[(i - 1) / CHAR_BIT] & (1 << ((i - 1) % CHAR_BIT))) { max_used_vertex = (i - 1) + my_col * CHAR_BIT * bitmap_size_in_bytes; break; } } } MPI_Allreduce(MPI_IN_PLACE, &max_used_vertex, 1, MPI_INT64_T, MPI_MAX, MPI_COMM_WORLD); } } #endif if (in_generating_rectangle) { MPI_Free_mem(has_edge); } if (tg.data_in_file) { MPI_File_sync(tg.edgefile); } } double make_graph_stop = MPI_Wtime(); double make_graph_time = make_graph_stop - make_graph_start; if (rank == 0) { /* Not an official part of the results */ fprintf(stderr, "graph_generation: %f s\n", make_graph_time); } //debug #ifndef GEN_ONLY //!GEN_ONLY /* Make user's graph data structure. */ double data_struct_start = MPI_Wtime(); make_graph_data_structure(&tg); double data_struct_stop = MPI_Wtime(); double data_struct_time = data_struct_stop - data_struct_start; if (rank == 0) { /* Not an official part of the results */ fprintf(stderr, "construction_time: %f s\n", data_struct_time); } /* Number of edges visited in each BFS; a double so get_statistics can be * used directly. */ double* edge_counts = (double*)xmalloc(num_bfs_roots * sizeof(double)); /* Run BFS. */ int validation_passed = 1; double* bfs_times = (double*)xmalloc(num_bfs_roots * sizeof(double)); double* validate_times = (double*)xmalloc(num_bfs_roots * sizeof(double)); uint64_t nlocalverts = get_nlocalverts_for_pred(); int64_t* pred = (int64_t*)xMPI_Alloc_mem(nlocalverts * sizeof(int64_t)); int bfs_root_idx; for (bfs_root_idx = 0; bfs_root_idx < num_bfs_roots; ++bfs_root_idx) { int64_t root = bfs_roots[bfs_root_idx]; if (rank == 0) fprintf(stderr, "Running BFS %d\n", bfs_root_idx); /* Clear the pred array. */ memset(pred, 0, nlocalverts * sizeof(int64_t)); /* Do the actual BFS. */ double bfs_start = MPI_Wtime(); run_bfs(root, &pred[0]); double bfs_stop = MPI_Wtime(); bfs_times[bfs_root_idx] = bfs_stop - bfs_start; if (rank == 0) fprintf(stderr, "Time for BFS %d is %f\n", bfs_root_idx, bfs_times[bfs_root_idx]); /* Validate result. */ if (rank == 0) fprintf(stderr, "Validating BFS %d\n", bfs_root_idx); double validate_start = MPI_Wtime(); int64_t edge_visit_count; int validation_passed_one = validate_bfs_result(&tg, max_used_vertex + 1, nlocalverts, root, pred, &edge_visit_count); double validate_stop = MPI_Wtime(); validate_times[bfs_root_idx] = validate_stop - validate_start; if (rank == 0) fprintf(stderr, "Validate time for BFS %d is %f\n", bfs_root_idx, validate_times[bfs_root_idx]); edge_counts[bfs_root_idx] = (double)edge_visit_count; if (rank == 0) fprintf(stderr, "TEPS for BFS %d is %g\n", bfs_root_idx, edge_visit_count / bfs_times[bfs_root_idx]); if (!validation_passed_one) { validation_passed = 0; if (rank == 0) fprintf(stderr, "Validation failed for this BFS root; skipping rest.\n"); break; } } MPI_Free_mem(pred); free(bfs_roots); free_graph_data_structure(); #endif //!GEN_ONLY if (tg.data_in_file) { MPI_File_close(&tg.edgefile); } else { free(tg.edgememory); tg.edgememory = NULL; } #ifndef GEN_ONLY /* Print results. */ if (rank == 0) { if (!validation_passed) { fprintf(stdout, "No results printed for invalid run.\n"); } else { int i; fprintf(stdout, "SCALE: %d\n", SCALE); fprintf(stdout, "edgefactor: %d\n", edgefactor); fprintf(stdout, "NBFS: %d\n", num_bfs_roots); fprintf(stdout, "graph_generation: %g\n", make_graph_time); fprintf(stdout, "num_mpi_processes: %d\n", size); fprintf(stdout, "construction_time: %g\n", data_struct_time); double stats[s_LAST]; get_statistics(bfs_times, num_bfs_roots, stats); fprintf(stdout, "min_time: %g\n", stats[s_minimum]); fprintf(stdout, "firstquartile_time: %g\n", stats[s_firstquartile]); fprintf(stdout, "median_time: %g\n", stats[s_median]); fprintf(stdout, "thirdquartile_time: %g\n", stats[s_thirdquartile]); fprintf(stdout, "max_time: %g\n", stats[s_maximum]); fprintf(stdout, "mean_time: %g\n", stats[s_mean]); fprintf(stdout, "stddev_time: %g\n", stats[s_std]); get_statistics(edge_counts, num_bfs_roots, stats); fprintf(stdout, "min_nedge: %.11g\n", stats[s_minimum]); fprintf(stdout, "firstquartile_nedge: %.11g\n", stats[s_firstquartile]); fprintf(stdout, "median_nedge: %.11g\n", stats[s_median]); fprintf(stdout, "thirdquartile_nedge: %.11g\n", stats[s_thirdquartile]); fprintf(stdout, "max_nedge: %.11g\n", stats[s_maximum]); fprintf(stdout, "mean_nedge: %.11g\n", stats[s_mean]); fprintf(stdout, "stddev_nedge: %.11g\n", stats[s_std]); double* secs_per_edge = (double*)xmalloc(num_bfs_roots * sizeof(double)); for (i = 0; i < num_bfs_roots; ++i) secs_per_edge[i] = bfs_times[i] / edge_counts[i]; get_statistics(secs_per_edge, num_bfs_roots, stats); fprintf(stdout, "min_TEPS: %g\n", 1. / stats[s_maximum]); fprintf(stdout, "firstquartile_TEPS: %g\n", 1. / stats[s_thirdquartile]); fprintf(stdout, "median_TEPS: %g\n", 1. / stats[s_median]); fprintf(stdout, "thirdquartile_TEPS: %g\n", 1. / stats[s_firstquartile]); fprintf(stdout, "max_TEPS: %g\n", 1. / stats[s_minimum]); fprintf(stdout, "harmonic_mean_TEPS: %g\n", 1. / stats[s_mean]); /* Formula from: * Title: The Standard Errors of the Geometric and Harmonic Means and * Their Application to Index Numbers * Author(s): Nilan Norris * Source: The Annals of Mathematical Statistics, Vol. 11, No. 4 (Dec., 1940), pp. 445-448 * Publisher(s): Institute of Mathematical Statistics * Stable URL: http://www.jstor.org/stable/2235723 * (same source as in specification). */ fprintf(stdout, "harmonic_stddev_TEPS: %g\n", stats[s_std] / (stats[s_mean] * stats[s_mean] * sqrt(num_bfs_roots - 1))); free(secs_per_edge); secs_per_edge = NULL; free(edge_counts); edge_counts = NULL; get_statistics(validate_times, num_bfs_roots, stats); fprintf(stdout, "min_validate: %g\n", stats[s_minimum]); fprintf(stdout, "firstquartile_validate: %g\n", stats[s_firstquartile]); fprintf(stdout, "median_validate: %g\n", stats[s_median]); fprintf(stdout, "thirdquartile_validate: %g\n", stats[s_thirdquartile]); fprintf(stdout, "max_validate: %g\n", stats[s_maximum]); fprintf(stdout, "mean_validate: %g\n", stats[s_mean]); fprintf(stdout, "stddev_validate: %g\n", stats[s_std]); #if 0 for (i = 0; i < num_bfs_roots; ++i) { fprintf(stdout, "Run %3d: %g s, validation %g s\n", i + 1, bfs_times[i], validate_times[i]); } #endif } } free(bfs_times); free(validate_times); #endif cleanup_globals(); MPI_Finalize(); return 0; }
int main(int argc, char* argv[]) { struct timeval currentTime; gettimeofday(¤tTime, NULL); int seed = currentTime.tv_sec ^ currentTime.tv_usec; seed ^= seed >> 12; seed ^= seed << 25; seed ^= seed >> 27; FILE *fout; if (argc < 2 || argc > 10) { printError(); } // define all the variables int log_numverts = -1; char * filename = ""; long int numEdges; double start, time_taken, start_write, time_taken_write; int64_t nedges; packed_edge* result; int binary = 0; // set default to be not binary, normal tsv numEdges = 16; // default 16 fout = stdout; // default the stdout int opt; while(optind < argc) { if ((opt = getopt(argc, argv, "+e:o:s:b")) != -1) { switch (opt) { case 'e': numEdges = atoi(optarg); break; case 'o': filename = optarg; fout = fopen(optarg, "wb"); if (fout == NULL) { fprintf(stderr, "%s -- ", optarg); perror("fopen for write failed"); exit(1); } break; case 's': seed = atoi(optarg); break; case 'b': binary = 1; break; default: printError(); break; } } else { if(argv[optind] == NULL) { printError(); } else { log_numverts = atoi(argv[optind]); // In base 2 optind++; } } } if( log_numverts < 0 ) { printError(); } //Start of graph generation timing start = get_time(); make_graph(log_numverts, numEdges << log_numverts, seed, seed, &nedges, &result); time_taken = get_time() - start; printf("For 2^%d\n", log_numverts); printf("\t%f seconds for making graph\n", time_taken); if (binary == 0) { // print to the file start_write = get_time(); for (int i = 0; i < (numEdges << log_numverts); i++) { fprintf(fout, "%lu\t%lu\n", get_v0_from_edge(result + i), get_v1_from_edge(result + i)); } time_taken_write = get_time() - start_write; printf("\t%f seconds for writing ascii version\n", time_taken_write); } else { // need to print binary start_write = get_time(); for (int i = 0; i < (numEdges << log_numverts); i++) { uint32_t from = get_v0_from_edge(result + i); uint32_t to = get_v1_from_edge(result + i); // add the check for not exceed the uint32_t max fwrite((const void*) & from,sizeof(uint32_t),1,fout); fwrite((const void*) & to,sizeof(uint32_t),1,fout); } time_taken_write = get_time() - start_write; printf("\t%f seconds for writing binary version\n", time_taken_write); } int check_correctness; check_correctness = fclose(fout); if (check_correctness == EOF) { fprintf(stderr, "%s -- ", filename); perror("fclose failed"); exit(1); } return 0; }
int main (int argc, char **argv) { int * restrict has_adj; int fd; int64_t desired_nedge; if (sizeof (int64_t) < 8) { fprintf (stderr, "No 64-bit support.\n"); return EXIT_FAILURE; } if (argc > 1) get_options (argc, argv); nvtx_scale = 1L<<SCALE; init_random (); desired_nedge = nvtx_scale * edgefactor; /* Catch a few possible overflows. */ assert (desired_nedge >= nvtx_scale); assert (desired_nedge >= edgefactor); if (VERBOSE) fprintf (stderr, "Generating edge list..."); if (use_RMAT) { nedge = desired_nedge; IJ = xmalloc_large_ext (nedge * sizeof (*IJ)); rmat_edgelist (IJ, nedge, SCALE, A, B, C); } else { make_graph(SCALE, desired_nedge, userseed, userseed, &nedge, (struct packed_edge**)(&IJ)); } if (VERBOSE) fprintf (stderr, " done.\n"); if (dumpname) fd = open (dumpname, O_WRONLY|O_CREAT|O_TRUNC, 0666); else fd = 1; if (fd < 0) { fprintf (stderr, "Cannot open output file : %s\n", (dumpname? dumpname : "stdout")); return EXIT_FAILURE; } write (fd, IJ, 2 * nedge * sizeof (*IJ)); int buflen = strlen(dumpname) + strlen(".graph") + 1; char * graphname = (char *) malloc(buflen * sizeof(char)); snprintf(graphname, buflen, "%s.graph", dumpname); FILE * file = fopen(graphname, "w"); if(!file){ fprintf (stderr, "Cannot open output file : %s\n", (graphname? graphname : "stdout")); return EXIT_FAILURE; } for (int64_t k = 0; k < nedge; ++k) { const int64_t i = get_v0_from_edge(&IJ[k]); const int64_t j = get_v1_from_edge(&IJ[k]); if (i != j) fprintf(file, "%" PRId64 " %" PRId64 "\n", i+1, j+1); } fclose(file); //close (fd); free(graphname); return EXIT_SUCCESS; }
// Modified part void produce_graph(int64_t M, packed_edge** result_ptr_in, FILE *fout, int64_t binary) { uint32_t element_count = M * 2; uint32_t buffer_size = M * 2 * sizeof(uint32_t); uint32_t buffer_constant = 1 << 20; if (binary == 0) { #ifdef GRAPH_GENERATOR_OMP #pragma omp parallel #endif { char* buff = (char*)xmalloc(buffer_constant); int total_length = 0; #ifdef GRAPH_GENERATOR_OMP #pragma omp for #endif for (int64_t i = 0; i < M; i++) { char temp[50]; int temp_length; int check_correctness; uint32_t from = get_v0_from_edge(*result_ptr_in + i); uint32_t to = get_v1_from_edge(*result_ptr_in + i); temp_length = snprintf(temp, 50, "%u\t%u\n", from, to); if (temp_length < 0) { fprintf(stderr, "snprintf error\n"); exit(1); } if (total_length + temp_length < buffer_constant) { // still enough room available check_correctness = snprintf(&(buff[total_length]), buffer_constant - total_length, "%s", temp); if (check_correctness < 0) { fprintf(stderr, "snprintf error\n"); exit(1); } total_length += temp_length; } else { // the buffer is run out of memory #ifdef GRAPH_GENERATOR_OMP #pragma omp critical #endif { check_correctness = fprintf(fout, "%s", buff); if (check_correctness < 0) { fprintf(stderr, "fprintf error;\n"); exit(1); } } buff[0] = '\0'; check_correctness = snprintf(&(buff[0]), buffer_constant, "%s", temp); if (check_correctness < 0) { fprintf(stderr, "snprintf error;\n"); exit(1); } total_length = temp_length; } } #ifdef GRAPH_GENERATOR_OMP #pragma omp critical #endif { int check_correctness; check_correctness = fprintf(fout, "%s", buff); if (check_correctness < 0) { fprintf(stderr, "fprintf error;\n"); exit(1); } } } } else { uint32_t* buff = (uint32_t*)xmalloc(buffer_size); #ifdef GRAPH_GENERATOR_OMP #pragma omp parallel for #endif for (int64_t i = 0; i < M; i++) { uint32_t from = get_v0_from_edge(*result_ptr_in + i); buff[2 * i] = from; uint32_t to = get_v1_from_edge(*result_ptr_in + i); buff[2 * i + 1] = to; } size_t check_correctness; check_correctness = fwrite(buff, sizeof(uint32_t), element_count, fout); if (check_correctness != element_count) { fprintf(stderr, "fwrite error;\n"); exit(1); } } }