/* Returns true if result is valid.  Also, updates high 16 bits of each element
 * of pred to contain the BFS level number (or -1 if not visited) of each
 * vertex; this is based on the predecessor map if the user didn't provide it.
 * */
int validate_bfs_result(const tuple_graph* const tg, const int64_t nglobalverts, const size_t nlocalverts, const int64_t root, int64_t* const pred, int64_t* const edge_visit_count_ptr) {

  assert (tg->edgememory_size >= 0 && tg->max_edgememory_size >= tg->edgememory_size && tg->max_edgememory_size <= tg->nglobaledges);
  assert (pred);
  *edge_visit_count_ptr = 0; /* Ensure it is a valid pointer */
  int ranges_ok = check_value_ranges(nglobalverts, nlocalverts, pred);
  if (root < 0 || root >= nglobalverts) {
    fprintf(stderr, "%d: Validation error: root vertex %" PRId64 " is invalid.\n", rank, root);
    ranges_ok = 0;
  }
  if (!ranges_ok) return 0; /* Fail */

  assert (tg->edgememory_size >= 0 && tg->max_edgememory_size >= tg->edgememory_size && tg->max_edgememory_size <= tg->nglobaledges);
  assert (pred);

  int validation_passed = 1;
  int root_owner;
  size_t root_local;
  get_vertex_distribution_for_pred(1, &root, &root_owner, &root_local);
  int root_is_mine = (root_owner == rank);

  /* Get maximum values so loop counts are consistent across ranks. */
  uint64_t maxlocalverts_ui = nlocalverts;
  MPI_Allreduce(MPI_IN_PLACE, &maxlocalverts_ui, 1, MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD);
  size_t maxlocalverts = (size_t)maxlocalverts_ui;

  ptrdiff_t max_bufsize = tuple_graph_max_bufsize(tg);
  ptrdiff_t edge_chunk_size = ptrdiff_min(HALF_CHUNKSIZE, max_bufsize);

  assert (tg->edgememory_size >= 0 && tg->max_edgememory_size >= tg->edgememory_size && tg->max_edgememory_size <= tg->nglobaledges);
  assert (pred);

  /* Check that root is its own parent. */
  if (root_is_mine) {
    assert (root_local < nlocalverts);
    if (get_pred_from_pred_entry(pred[root_local]) != root) {
      fprintf(stderr, "%d: Validation error: parent of root vertex %" PRId64 " is %" PRId64 ", not the root itself.\n", rank, root, get_pred_from_pred_entry(pred[root_local]));
      validation_passed = 0;
    }
  }

  assert (tg->edgememory_size >= 0 && tg->max_edgememory_size >= tg->edgememory_size && tg->max_edgememory_size <= tg->nglobaledges);
  assert (pred);
  
  /* Check that nothing else is its own parent. */
  {
    int* restrict pred_owner = (int*)xmalloc(size_min(CHUNKSIZE, nlocalverts) * sizeof(int));
    size_t* restrict pred_local = (size_t*)xmalloc(size_min(CHUNKSIZE, nlocalverts) * sizeof(size_t));
    int64_t* restrict pred_vtx = (int64_t*)xmalloc(size_min(CHUNKSIZE, nlocalverts) * sizeof(int64_t)); /* Vertex (not depth) part of pred map */
    ptrdiff_t ii;
    for (ii = 0; ii < (ptrdiff_t)nlocalverts; ii += CHUNKSIZE) {
      ptrdiff_t i_start = ii;
      ptrdiff_t i_end = ptrdiff_min(ii + CHUNKSIZE, nlocalverts);
      ptrdiff_t i;
      assert (i_start >= 0 && i_start <= (ptrdiff_t)nlocalverts);
      assert (i_end >= 0 && i_end <= (ptrdiff_t)nlocalverts);
#pragma omp parallel for
      for (i = i_start; i < i_end; ++i) {
        pred_vtx[i - i_start] = get_pred_from_pred_entry(pred[i]);
      }
      get_vertex_distribution_for_pred(i_end - i_start, pred_vtx, pred_owner, pred_local);
#pragma omp parallel for reduction(&&:validation_passed)
      for (i = i_start; i < i_end; ++i) {

        if ((!root_is_mine || (size_t)i != root_local) &&
            get_pred_from_pred_entry(pred[i]) != -1 &&
            pred_owner[i - i_start] == rank &&
            pred_local[i - i_start] == (size_t)i) {
          fprintf(stderr, "%d: Validation error: parent of non-root vertex %" PRId64 " is itself.\n", rank, vertex_to_global_for_pred(rank, i));
          validation_passed = 0;
        }
      }
    }
    free(pred_owner);
    free(pred_local);
    free(pred_vtx);
  }

  assert (tg->edgememory_size >= 0 && tg->max_edgememory_size >= tg->edgememory_size && tg->max_edgememory_size <= tg->nglobaledges);
  assert (pred);

  if (bfs_writes_depth_map()) {
    int check_ok = check_bfs_depth_map_using_predecessors(tg, nglobalverts, nlocalverts, maxlocalverts, root, pred);
    if (!check_ok) validation_passed = 0;
  } else {
    
    /* Create a vertex depth map to use for later validation. */
    int pred_ok = build_bfs_depth_map(nglobalverts, nlocalverts, maxlocalverts, root, pred); //shit happened here
    if (!pred_ok) validation_passed = 0;
  }

  {
    /* Check that all edges connect vertices whose depths differ by at most
     * one, and check that there is an edge from each vertex to its claimed
     * predecessor.  Also, count visited edges (including duplicates and
     * self-loops).  */
    unsigned char* restrict pred_valid = (unsigned char*)xMPI_Alloc_mem(nlocalverts * sizeof(unsigned char));
    memset(pred_valid, 0, nlocalverts * sizeof(unsigned char));
    int64_t* restrict edge_endpoint = (int64_t*)xmalloc(2 * edge_chunk_size * sizeof(int64_t));
    int* restrict edge_owner = (int*)xmalloc(2 * edge_chunk_size * sizeof(int));
    size_t* restrict edge_local = (size_t*)xmalloc(2 * edge_chunk_size * sizeof(size_t));
    int64_t* restrict edge_preds = (int64_t*)xMPI_Alloc_mem(2 * edge_chunk_size * sizeof(int64_t));
    gather* pred_win = init_gather((void*)pred, nlocalverts, sizeof(int64_t), edge_preds, 2 * edge_chunk_size, 2 * edge_chunk_size, MPI_INT64_T);
    unsigned char one = 1;
    scatter_constant* pred_valid_win = init_scatter_constant((void*)pred_valid, nlocalverts, sizeof(unsigned char), &one, 2 * edge_chunk_size, MPI_UNSIGNED_CHAR);
    int64_t edge_visit_count = 0;
    ITERATE_TUPLE_GRAPH_BEGIN(tg, buf, bufsize) {
      ptrdiff_t ii;
      for (ii = 0; ii < max_bufsize; ii += HALF_CHUNKSIZE) {
        ptrdiff_t i_start = ptrdiff_min(ii, bufsize);
        ptrdiff_t i_end = ptrdiff_min(ii + HALF_CHUNKSIZE, bufsize);
        assert (i_end - i_start <= edge_chunk_size);
        ptrdiff_t i;
#pragma omp parallel for
        for (i = i_start; i < i_end; ++i) {
          int64_t v0 = get_v0_from_edge(&buf[i]);
          int64_t v1 = get_v1_from_edge(&buf[i]);
          edge_endpoint[(i - i_start) * 2 + 0] = v0;
          edge_endpoint[(i - i_start) * 2 + 1] = v1;
        }
        get_vertex_distribution_for_pred(2 * (i_end - i_start), edge_endpoint, edge_owner, edge_local);
        begin_gather(pred_win);
#pragma omp parallel for
        for (i = i_start; i < i_end; ++i) {
          add_gather_request(pred_win, (i - i_start) * 2 + 0, edge_owner[(i - i_start) * 2 + 0], edge_local[(i - i_start) * 2 + 0], (i - i_start) * 2 + 0);
          add_gather_request(pred_win, (i - i_start) * 2 + 1, edge_owner[(i - i_start) * 2 + 1], edge_local[(i - i_start) * 2 + 1], (i - i_start) * 2 + 1);
        }
        end_gather(pred_win);
        begin_scatter_constant(pred_valid_win);
#pragma omp parallel for reduction(&&:validation_passed) reduction(+:edge_visit_count)
        for (i = i_start; i < i_end; ++i) {
          int64_t src = get_v0_from_edge(&buf[i]);
          int64_t tgt = get_v1_from_edge(&buf[i]);
          uint16_t src_depth = get_depth_from_pred_entry(edge_preds[(i - i_start) * 2 + 0]);
          uint16_t tgt_depth = get_depth_from_pred_entry(edge_preds[(i - i_start) * 2 + 1]);
          if (src_depth != UINT16_MAX && tgt_depth == UINT16_MAX) {
            fprintf(stderr, "%d: Validation error: edge connects vertex %" PRId64 " in the BFS tree (depth %" PRIu16 ") to vertex %" PRId64 " outside the tree.\n", rank, src, src_depth, tgt);
            validation_passed = 0;
          } else if (src_depth == UINT16_MAX && tgt_depth != UINT16_MAX) {
            fprintf(stderr, "%d: Validation error: edge connects vertex %" PRId64 " in the BFS tree (depth %" PRIu16 ") to vertex %" PRId64 " outside the tree.\n", rank, tgt, tgt_depth, src);
            validation_passed = 0;
          } else if (src_depth - tgt_depth < -1 ||
                     src_depth - tgt_depth > 1) {
            fprintf(stderr, "%d: Validation error: depths of edge endpoints %" PRId64 " (depth %" PRIu16 ") and %" PRId64 " (depth %" PRIu16 ") are too far apart (abs. val. > 1).\n", rank, src, src_depth, tgt, tgt_depth);
            validation_passed = 0;
          } else if (src_depth != UINT16_MAX) {
            ++edge_visit_count;
          }
          if (get_pred_from_pred_entry(edge_preds[(i - i_start) * 2 + 0]) == tgt) {
            add_scatter_constant_request(pred_valid_win, edge_owner[(i - i_start) * 2 + 0], edge_local[(i - i_start) * 2 + 0], (i - i_start) * 2 + 0);
          }
          if (get_pred_from_pred_entry(edge_preds[(i - i_start) * 2 + 1]) == src) {
            add_scatter_constant_request(pred_valid_win, edge_owner[(i - i_start) * 2 + 1], edge_local[(i - i_start) * 2 + 1], (i - i_start) * 2 + 1);
          }
        }
        end_scatter_constant(pred_valid_win);
      }
    } ITERATE_TUPLE_GRAPH_END;
    destroy_gather(pred_win);
    MPI_Free_mem(edge_preds);
    free(edge_owner);
    free(edge_local);
    free(edge_endpoint);
    destroy_scatter_constant(pred_valid_win);
    ptrdiff_t i;
#pragma omp parallel for reduction(&&:validation_passed)
    for (i = 0; i < (ptrdiff_t)nlocalverts; ++i) {
      int64_t p = get_pred_from_pred_entry(pred[i]);
      if (p == -1) continue;
      int found_pred_edge = pred_valid[i];
      if (root_owner == rank && root_local == (size_t)i) found_pred_edge = 1; /* Root vertex */
      if (!found_pred_edge) {
        int64_t v = vertex_to_global_for_pred(rank, i);
        fprintf(stderr, "%d: Validation error: no graph edge from vertex %" PRId64 " to its parent %" PRId64 ".\n", rank, v, get_pred_from_pred_entry(pred[i]));
        validation_passed = 0;
      }
    }
    MPI_Free_mem(pred_valid);

    MPI_Allreduce(MPI_IN_PLACE, &edge_visit_count, 1, MPI_INT64_T, MPI_SUM, MPI_COMM_WORLD);
    *edge_visit_count_ptr = edge_visit_count;
  }
Exemple #2
0
int main(int argc, char** argv) {
  MPI_Init(&argc, &argv);

  setup_globals();

  /* Parse arguments. */
  int SCALE = 16;
  int edgefactor = 16; /* nedges / nvertices, i.e., 2*avg. degree */
  // if (argc >= 2) SCALE = atoi(argv[1]);
  // if (argc >= 3) edgefactor = atoi(argv[2]);
  char* name = argv[1];
  if (argc >= 3) SCALE = atoi(argv[2]);
  if (argc >= 4) edgefactor = atoi(argv[3]);
  // if (argc <= 1 || argc >= 4 || SCALE == 0 || edgefactor == 0) {
  //   if (rank == 0) {
  //     fprintf(stderr, "Usage: %s SCALE edgefactor\n  SCALE = log_2(# vertices) [integer, required]\n  edgefactor = (# edges) / (# vertices) = .5 * (average vertex degree) [integer, defaults to 16]\n(Random number seed and Kronecker initiator are in main.c)\n", argv[0]);
  //   }
  if (argc <= 2 || argc >= 5 || SCALE == 0 || edgefactor == 0) {
    if (rank == 0) {
      fprintf(stderr, "Usage: %s filename SCALE edgefactor\n  SCALE = log_2(# vertices) [integer, required]\n  edgefactor = (# edges) / (# vertices) = .5 * (average vertex degree) [integer, defaults to 16]\n(Random number seed and Kronecker initiator are in main.c)\n", argv[0]);
    }
    MPI_Abort(MPI_COMM_WORLD, 1);
  }
  uint64_t seed1 = 2, seed2 = 3;

  // const char* filename = getenv("TMPFILE");
  const char* filename = name;

  /* If filename is NULL, store data in memory */

  tuple_graph tg;
  tg.nglobaledges = (int64_t)(edgefactor) << SCALE;
  int64_t nglobalverts = (int64_t)(1) << SCALE;

  tg.data_in_file = (filename != NULL);

  if (tg.data_in_file) {
      printf("data in file \n");

    MPI_File_set_errhandler(MPI_FILE_NULL, MPI_ERRORS_ARE_FATAL);
    // MPI_File_open(MPI_COMM_WORLD, (char*)filename, MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_EXCL | MPI_MODE_DELETE_ON_CLOSE | MPI_MODE_UNIQUE_OPEN, MPI_INFO_NULL, &tg.edgefile);
    MPI_File_open(MPI_COMM_WORLD, (char*)filename, MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_EXCL | MPI_MODE_UNIQUE_OPEN, MPI_INFO_NULL, &tg.edgefile);
    MPI_File_set_size(tg.edgefile, tg.nglobaledges * sizeof(packed_edge));
    MPI_File_set_view(tg.edgefile, 0, packed_edge_mpi_type, packed_edge_mpi_type, "native", MPI_INFO_NULL);
    MPI_File_set_atomicity(tg.edgefile, 0);
  }

  /* Make the raw graph edges. */
  /* Get roots for BFS runs, plus maximum vertex with non-zero degree (used by
   * validator). */
  int num_bfs_roots = 64;
  int64_t* bfs_roots = (int64_t*)xmalloc(num_bfs_roots * sizeof(int64_t));
  int64_t max_used_vertex = 0;

  double make_graph_start = MPI_Wtime();
  {
    /* Spread the two 64-bit numbers into five nonzero values in the correct
     * range. */
    uint_fast32_t seed[5];
    make_mrg_seed(seed1, seed2, seed);

    /* As the graph is being generated, also keep a bitmap of vertices with
     * incident edges.  We keep a grid of processes, each row of which has a
     * separate copy of the bitmap (distributed among the processes in the
     * row), and then do an allreduce at the end.  This scheme is used to avoid
     * non-local communication and reading the file separately just to find BFS
     * roots. */
    MPI_Offset nchunks_in_file = (tg.nglobaledges + FILE_CHUNKSIZE - 1) / FILE_CHUNKSIZE;
    int64_t bitmap_size_in_bytes = int64_min(BITMAPSIZE, (nglobalverts + CHAR_BIT - 1) / CHAR_BIT);
    if (bitmap_size_in_bytes * size * CHAR_BIT < nglobalverts) {
      bitmap_size_in_bytes = (nglobalverts + size * CHAR_BIT - 1) / (size * CHAR_BIT);
    }
    int ranks_per_row = ((nglobalverts + CHAR_BIT - 1) / CHAR_BIT + bitmap_size_in_bytes - 1) / bitmap_size_in_bytes;
    int nrows = size / ranks_per_row;
    int my_row = -1, my_col = -1;
    unsigned char* restrict has_edge = NULL;
    MPI_Comm cart_comm;
    {
      int dims[2] = {size / ranks_per_row, ranks_per_row};
      int periods[2] = {0, 0};
      MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &cart_comm);
    }
    int in_generating_rectangle = 0;
    if (cart_comm != MPI_COMM_NULL) {
      in_generating_rectangle = 1;
      {
        int dims[2], periods[2], coords[2];
        MPI_Cart_get(cart_comm, 2, dims, periods, coords);
        my_row = coords[0];
        my_col = coords[1];
      }
      MPI_Comm this_col;
      MPI_Comm_split(cart_comm, my_col, my_row, &this_col);
      MPI_Comm_free(&cart_comm);
      has_edge = (unsigned char*)xMPI_Alloc_mem(bitmap_size_in_bytes);
      memset(has_edge, 0, bitmap_size_in_bytes);
      /* Every rank in a given row creates the same vertices (for updating the
       * bitmap); only one writes them to the file (or final memory buffer). */
      packed_edge* buf = (packed_edge*)xmalloc(FILE_CHUNKSIZE * sizeof(packed_edge));
      MPI_Offset block_limit = (nchunks_in_file + nrows - 1) / nrows;
      // fprintf(stderr, "%d: nchunks_in_file = %" PRId64 ", block_limit = %" PRId64 " in grid of %d rows, %d cols\n", rank, (int64_t)nchunks_in_file, (int64_t)block_limit, nrows, ranks_per_row);
      if (tg.data_in_file) {
        tg.edgememory_size = 0;
        tg.edgememory = NULL;
      } else {
        int my_pos = my_row + my_col * nrows;
        int last_pos = (tg.nglobaledges % ((int64_t)FILE_CHUNKSIZE * nrows * ranks_per_row) != 0) ?
                       (tg.nglobaledges / FILE_CHUNKSIZE) % (nrows * ranks_per_row) :
                       -1;
        int64_t edges_left = tg.nglobaledges % FILE_CHUNKSIZE;
        int64_t nedges = FILE_CHUNKSIZE * (tg.nglobaledges / ((int64_t)FILE_CHUNKSIZE * nrows * ranks_per_row)) +
                         FILE_CHUNKSIZE * (my_pos < (tg.nglobaledges / FILE_CHUNKSIZE) % (nrows * ranks_per_row)) +
                         (my_pos == last_pos ? edges_left : 0);
        /* fprintf(stderr, "%d: nedges = %" PRId64 " of %" PRId64 "\n", rank, (int64_t)nedges, (int64_t)tg.nglobaledges); */
        tg.edgememory_size = nedges;
        tg.edgememory = (packed_edge*)xmalloc(nedges * sizeof(packed_edge));
      }
      MPI_Offset block_idx;
      for (block_idx = 0; block_idx < block_limit; ++block_idx) {
        /* fprintf(stderr, "%d: On block %d of %d\n", rank, (int)block_idx, (int)block_limit); */
        MPI_Offset start_edge_index = int64_min(FILE_CHUNKSIZE * (block_idx * nrows + my_row), tg.nglobaledges);
        MPI_Offset edge_count = int64_min(tg.nglobaledges - start_edge_index, FILE_CHUNKSIZE);
        packed_edge* actual_buf = (!tg.data_in_file && block_idx % ranks_per_row == my_col) ?
                                  tg.edgememory + FILE_CHUNKSIZE * (block_idx / ranks_per_row) :
                                  buf;
        /* fprintf(stderr, "%d: My range is [%" PRId64 ", %" PRId64 ") %swriting into index %" PRId64 "\n", rank, (int64_t)start_edge_index, (int64_t)(start_edge_index + edge_count), (my_col == (block_idx % ranks_per_row)) ? "" : "not ", (int64_t)(FILE_CHUNKSIZE * (block_idx / ranks_per_row))); */
        if (!tg.data_in_file && block_idx % ranks_per_row == my_col) {
          assert (FILE_CHUNKSIZE * (block_idx / ranks_per_row) + edge_count <= tg.edgememory_size);
        }

	// debug
	char* wtxbuf = (char*)xmalloc(FILE_CHUNKSIZE * sizeof(packed_edge));

        // generate_kronecker_range(seed, SCALE, start_edge_index, start_edge_index + edge_count, actual_buf);
        generate_kronecker_range(seed, SCALE, start_edge_index, start_edge_index + edge_count, actual_buf);
        if (tg.data_in_file && my_col == (block_idx % ranks_per_row)) { /* Try to spread writes among ranks */
          // MPI_File_write_at(tg.edgefile, start_edge_index, actual_buf, edge_count, packed_edge_mpi_type, MPI_STATUS_IGNORE);


	    // debug
	    printf("%d: %d, %d\n", rank, start_edge_index, edge_count);
	    int i;
	    // for (i = start_edge_index; i < start_edge_index + 3; i++) {
	    // if(block_idx == 0) {
	    // 	for (i = 0; i < 3; i++) {
	    // 	    if (edge_count > 3)
	    // 		printf("%d: %d\t%d\n", rank, actual_buf[i].v0, actual_buf[i].v1);
	    // 	}

	    // }

	    
	    

          MPI_File_write_at(tg.edgefile, start_edge_index, actual_buf, edge_count, packed_edge_mpi_type, MPI_STATUS_IGNORE);
        }
        ptrdiff_t i;
#ifdef _OPENMP
#pragma omp parallel for
#endif
        for (i = 0; i < edge_count; ++i) {
          int64_t src = get_v0_from_edge(&actual_buf[i]);
          int64_t tgt = get_v1_from_edge(&actual_buf[i]);
          if (src == tgt) continue;
          if (src / bitmap_size_in_bytes / CHAR_BIT == my_col) {
#ifdef _OPENMP
#pragma omp atomic
#endif
            has_edge[(src / CHAR_BIT) % bitmap_size_in_bytes] |= (1 << (src % CHAR_BIT));
          }
          if (tgt / bitmap_size_in_bytes / CHAR_BIT == my_col) {
#ifdef _OPENMP
#pragma omp atomic
#endif
            has_edge[(tgt / CHAR_BIT) % bitmap_size_in_bytes] |= (1 << (tgt % CHAR_BIT));
          }
        }
      }
      free(buf);
#if 0
      /* The allreduce for each root acts like we did this: */
      MPI_Allreduce(MPI_IN_PLACE, has_edge, bitmap_size_in_bytes, MPI_UNSIGNED_CHAR, MPI_BOR, this_col);
#endif
      MPI_Comm_free(&this_col);
    } else {
      tg.edgememory = NULL;
      tg.edgememory_size = 0;
    }
    MPI_Allreduce(&tg.edgememory_size, &tg.max_edgememory_size, 1, MPI_INT64_T, MPI_MAX, MPI_COMM_WORLD);

#ifndef GEN_ONLY
    /* Find roots and max used vertex */
    {
      uint64_t counter = 0;
      int bfs_root_idx;
      for (bfs_root_idx = 0; bfs_root_idx < num_bfs_roots; ++bfs_root_idx) {
        int64_t root;
        while (1) {
          double d[2];
          make_random_numbers(2, seed1, seed2, counter, d);
          root = (int64_t)((d[0] + d[1]) * nglobalverts) % nglobalverts;
          counter += 2;
          if (counter > 2 * nglobalverts) break;
          int is_duplicate = 0;
          int i;
          for (i = 0; i < bfs_root_idx; ++i) {
            if (root == bfs_roots[i]) {
              is_duplicate = 1;
              break;
            }
          }
          if (is_duplicate) continue; /* Everyone takes the same path here */
          int root_ok = 0;
          if (in_generating_rectangle && (root / CHAR_BIT / bitmap_size_in_bytes) == my_col) {
            root_ok = (has_edge[(root / CHAR_BIT) % bitmap_size_in_bytes] & (1 << (root % CHAR_BIT))) != 0;
          }
          MPI_Allreduce(MPI_IN_PLACE, &root_ok, 1, MPI_INT, MPI_LOR, MPI_COMM_WORLD);
          if (root_ok) break;
        }
        bfs_roots[bfs_root_idx] = root;
      }
      num_bfs_roots = bfs_root_idx;

      /* Find maximum non-zero-degree vertex. */
      {
        int64_t i;
        max_used_vertex = 0;
        if (in_generating_rectangle) {
          for (i = bitmap_size_in_bytes * CHAR_BIT; i > 0; --i) {
            if (i > nglobalverts) continue;
            if (has_edge[(i - 1) / CHAR_BIT] & (1 << ((i - 1) % CHAR_BIT))) {
              max_used_vertex = (i - 1) + my_col * CHAR_BIT * bitmap_size_in_bytes;
              break;
            }
          }
        }
        MPI_Allreduce(MPI_IN_PLACE, &max_used_vertex, 1, MPI_INT64_T, MPI_MAX, MPI_COMM_WORLD);
      }
    }
#endif

    if (in_generating_rectangle) {
      MPI_Free_mem(has_edge);
    }
    if (tg.data_in_file) {
      MPI_File_sync(tg.edgefile);
    }
  }

  double make_graph_stop = MPI_Wtime();
  double make_graph_time = make_graph_stop - make_graph_start;
  if (rank == 0) { /* Not an official part of the results */
    fprintf(stderr, "graph_generation:               %f s\n", make_graph_time);
  }


  //debug
#ifndef GEN_ONLY //!GEN_ONLY

  /* Make user's graph data structure. */
  double data_struct_start = MPI_Wtime();
  make_graph_data_structure(&tg);
  double data_struct_stop = MPI_Wtime();
  double data_struct_time = data_struct_stop - data_struct_start;
  if (rank == 0) { /* Not an official part of the results */
    fprintf(stderr, "construction_time:              %f s\n", data_struct_time);
  }

  /* Number of edges visited in each BFS; a double so get_statistics can be
   * used directly. */
  double* edge_counts = (double*)xmalloc(num_bfs_roots * sizeof(double));

  /* Run BFS. */
  int validation_passed = 1;
  double* bfs_times = (double*)xmalloc(num_bfs_roots * sizeof(double));
  double* validate_times = (double*)xmalloc(num_bfs_roots * sizeof(double));
  uint64_t nlocalverts = get_nlocalverts_for_pred();
  int64_t* pred = (int64_t*)xMPI_Alloc_mem(nlocalverts * sizeof(int64_t));

  int bfs_root_idx;
  for (bfs_root_idx = 0; bfs_root_idx < num_bfs_roots; ++bfs_root_idx) {
    int64_t root = bfs_roots[bfs_root_idx];

    if (rank == 0) fprintf(stderr, "Running BFS %d\n", bfs_root_idx);

    /* Clear the pred array. */
    memset(pred, 0, nlocalverts * sizeof(int64_t));

    /* Do the actual BFS. */
    double bfs_start = MPI_Wtime();
    run_bfs(root, &pred[0]);
    double bfs_stop = MPI_Wtime();
    bfs_times[bfs_root_idx] = bfs_stop - bfs_start;
    if (rank == 0) fprintf(stderr, "Time for BFS %d is %f\n", bfs_root_idx, bfs_times[bfs_root_idx]);

    /* Validate result. */
    if (rank == 0) fprintf(stderr, "Validating BFS %d\n", bfs_root_idx);

    double validate_start = MPI_Wtime();
    int64_t edge_visit_count;
    int validation_passed_one = validate_bfs_result(&tg, max_used_vertex + 1, nlocalverts, root, pred, &edge_visit_count);
    double validate_stop = MPI_Wtime();
    validate_times[bfs_root_idx] = validate_stop - validate_start;
    if (rank == 0) fprintf(stderr, "Validate time for BFS %d is %f\n", bfs_root_idx, validate_times[bfs_root_idx]);
    edge_counts[bfs_root_idx] = (double)edge_visit_count;
    if (rank == 0) fprintf(stderr, "TEPS for BFS %d is %g\n", bfs_root_idx, edge_visit_count / bfs_times[bfs_root_idx]);

    if (!validation_passed_one) {
      validation_passed = 0;
      if (rank == 0) fprintf(stderr, "Validation failed for this BFS root; skipping rest.\n");
      break;
    }
  }

  MPI_Free_mem(pred);
  free(bfs_roots);
  free_graph_data_structure();

#endif //!GEN_ONLY

  if (tg.data_in_file) {
    MPI_File_close(&tg.edgefile);
  } else {
    free(tg.edgememory); tg.edgememory = NULL;
  }

#ifndef GEN_ONLY
  /* Print results. */
  if (rank == 0) {
    if (!validation_passed) {
      fprintf(stdout, "No results printed for invalid run.\n");
    } else {
      int i;
      fprintf(stdout, "SCALE:                          %d\n", SCALE);
      fprintf(stdout, "edgefactor:                     %d\n", edgefactor);
      fprintf(stdout, "NBFS:                           %d\n", num_bfs_roots);
      fprintf(stdout, "graph_generation:               %g\n", make_graph_time);
      fprintf(stdout, "num_mpi_processes:              %d\n", size);
      fprintf(stdout, "construction_time:              %g\n", data_struct_time);
      double stats[s_LAST];
      get_statistics(bfs_times, num_bfs_roots, stats);
      fprintf(stdout, "min_time:                       %g\n", stats[s_minimum]);
      fprintf(stdout, "firstquartile_time:             %g\n", stats[s_firstquartile]);
      fprintf(stdout, "median_time:                    %g\n", stats[s_median]);
      fprintf(stdout, "thirdquartile_time:             %g\n", stats[s_thirdquartile]);
      fprintf(stdout, "max_time:                       %g\n", stats[s_maximum]);
      fprintf(stdout, "mean_time:                      %g\n", stats[s_mean]);
      fprintf(stdout, "stddev_time:                    %g\n", stats[s_std]);
      get_statistics(edge_counts, num_bfs_roots, stats);
      fprintf(stdout, "min_nedge:                      %.11g\n", stats[s_minimum]);
      fprintf(stdout, "firstquartile_nedge:            %.11g\n", stats[s_firstquartile]);
      fprintf(stdout, "median_nedge:                   %.11g\n", stats[s_median]);
      fprintf(stdout, "thirdquartile_nedge:            %.11g\n", stats[s_thirdquartile]);
      fprintf(stdout, "max_nedge:                      %.11g\n", stats[s_maximum]);
      fprintf(stdout, "mean_nedge:                     %.11g\n", stats[s_mean]);
      fprintf(stdout, "stddev_nedge:                   %.11g\n", stats[s_std]);
      double* secs_per_edge = (double*)xmalloc(num_bfs_roots * sizeof(double));
      for (i = 0; i < num_bfs_roots; ++i) secs_per_edge[i] = bfs_times[i] / edge_counts[i];
      get_statistics(secs_per_edge, num_bfs_roots, stats);
      fprintf(stdout, "min_TEPS:                       %g\n", 1. / stats[s_maximum]);
      fprintf(stdout, "firstquartile_TEPS:             %g\n", 1. / stats[s_thirdquartile]);
      fprintf(stdout, "median_TEPS:                    %g\n", 1. / stats[s_median]);
      fprintf(stdout, "thirdquartile_TEPS:             %g\n", 1. / stats[s_firstquartile]);
      fprintf(stdout, "max_TEPS:                       %g\n", 1. / stats[s_minimum]);
      fprintf(stdout, "harmonic_mean_TEPS:             %g\n", 1. / stats[s_mean]);
      /* Formula from:
       * Title: The Standard Errors of the Geometric and Harmonic Means and
       *        Their Application to Index Numbers
       * Author(s): Nilan Norris
       * Source: The Annals of Mathematical Statistics, Vol. 11, No. 4 (Dec., 1940), pp. 445-448
       * Publisher(s): Institute of Mathematical Statistics
       * Stable URL: http://www.jstor.org/stable/2235723
       * (same source as in specification). */
      fprintf(stdout, "harmonic_stddev_TEPS:           %g\n", stats[s_std] / (stats[s_mean] * stats[s_mean] * sqrt(num_bfs_roots - 1)));
      free(secs_per_edge); secs_per_edge = NULL;
      free(edge_counts); edge_counts = NULL;
      get_statistics(validate_times, num_bfs_roots, stats);
      fprintf(stdout, "min_validate:                   %g\n", stats[s_minimum]);
      fprintf(stdout, "firstquartile_validate:         %g\n", stats[s_firstquartile]);
      fprintf(stdout, "median_validate:                %g\n", stats[s_median]);
      fprintf(stdout, "thirdquartile_validate:         %g\n", stats[s_thirdquartile]);
      fprintf(stdout, "max_validate:                   %g\n", stats[s_maximum]);
      fprintf(stdout, "mean_validate:                  %g\n", stats[s_mean]);
      fprintf(stdout, "stddev_validate:                %g\n", stats[s_std]);
#if 0
      for (i = 0; i < num_bfs_roots; ++i) {
        fprintf(stdout, "Run %3d:                        %g s, validation %g s\n", i + 1, bfs_times[i], validate_times[i]);
      }
#endif
    }
  }
  free(bfs_times);
  free(validate_times);

#endif
  cleanup_globals();
  MPI_Finalize();
  return 0;
}
Exemple #3
0
int main(int argc, char* argv[]) {
  struct timeval currentTime;
  gettimeofday(&currentTime, NULL);
  int seed = currentTime.tv_sec ^ currentTime.tv_usec;
  seed ^= seed >> 12;
  seed ^= seed << 25;
  seed ^= seed >> 27;

  FILE *fout;

  if (argc < 2 || argc > 10) {
      printError();
  }

  // define all the variables
  int log_numverts = -1;
  char * filename = "";
  long int numEdges;
  double start, time_taken, start_write, time_taken_write;
  int64_t nedges;
  packed_edge* result;
  int binary = 0; // set default to be not binary, normal tsv

  numEdges = 16;  // default 16
  fout = stdout;  // default the stdout

  int opt;
  while(optind < argc) {
    if ((opt = getopt(argc, argv, "+e:o:s:b")) != -1) {
      switch (opt) {
        case 'e':
            numEdges = atoi(optarg);
            break;
        case 'o':
            filename = optarg;
            fout = fopen(optarg, "wb");
            if (fout == NULL) {
              fprintf(stderr, "%s -- ", optarg);
              perror("fopen for write failed");
              exit(1);
            }
            break;
        case 's':
            seed = atoi(optarg);
            break;
        case 'b':
            binary = 1;
            break;
        default: 
            printError();
            break;
        }
    } else {
      if(argv[optind] == NULL) {
        printError();
      } else {
        log_numverts = atoi(argv[optind]); // In base 2
        optind++;
      }
    }
  }

  if( log_numverts < 0 ) {
    printError();
  }

  //Start of graph generation timing
  start = get_time();
  make_graph(log_numverts, numEdges << log_numverts, seed, seed, &nedges, &result);
  time_taken = get_time() - start;

  printf("For 2^%d\n", log_numverts);
  printf("\t%f seconds for making graph\n", time_taken);

  if (binary == 0) {
  // print to the file
    start_write = get_time();
    for (int i = 0; i < (numEdges << log_numverts); i++) {
      fprintf(fout, "%lu\t%lu\n", get_v0_from_edge(result + i), get_v1_from_edge(result + i));
    }
    time_taken_write = get_time() - start_write;
    printf("\t%f seconds for writing ascii version\n", time_taken_write);
  } else {
    // need to print binary
    start_write = get_time();
    for (int i = 0; i < (numEdges << log_numverts); i++) {
      uint32_t from = get_v0_from_edge(result + i);
      uint32_t to = get_v1_from_edge(result + i);
      // add the check for not exceed the uint32_t max
      fwrite((const void*) & from,sizeof(uint32_t),1,fout);
      fwrite((const void*) & to,sizeof(uint32_t),1,fout);
    }
    time_taken_write = get_time() - start_write;
    printf("\t%f seconds for writing binary version\n", time_taken_write);
  }

  int check_correctness;
  check_correctness = fclose(fout);
  if (check_correctness == EOF) {
    fprintf(stderr, "%s -- ", filename);
    perror("fclose failed");
    exit(1);
  }

  return 0;
}
int
main (int argc, char **argv)
{
  int * restrict has_adj;
  int fd;
  int64_t desired_nedge;
  if (sizeof (int64_t) < 8) {
    fprintf (stderr, "No 64-bit support.\n");
    return EXIT_FAILURE;
  }

  if (argc > 1)
    get_options (argc, argv);

  nvtx_scale = 1L<<SCALE;

  init_random ();

  desired_nedge = nvtx_scale * edgefactor;
  /* Catch a few possible overflows. */
  assert (desired_nedge >= nvtx_scale);
  assert (desired_nedge >= edgefactor);


  if (VERBOSE) fprintf (stderr, "Generating edge list...");
  if (use_RMAT) {
    nedge = desired_nedge;
    IJ = xmalloc_large_ext (nedge * sizeof (*IJ));
    rmat_edgelist (IJ, nedge, SCALE, A, B, C);
  } else {
    make_graph(SCALE, desired_nedge, userseed, userseed, &nedge, (struct packed_edge**)(&IJ));
  }
  if (VERBOSE) fprintf (stderr, " done.\n");

  if (dumpname)
    fd = open (dumpname, O_WRONLY|O_CREAT|O_TRUNC, 0666);
  else
    fd = 1;

  if (fd < 0) {
    fprintf (stderr, "Cannot open output file : %s\n",
	     (dumpname? dumpname : "stdout"));
    return EXIT_FAILURE;
  }

  write (fd, IJ, 2 * nedge * sizeof (*IJ));

  int buflen = strlen(dumpname) + strlen(".graph") + 1;
  char * graphname = (char *) malloc(buflen * sizeof(char));
  snprintf(graphname, buflen, "%s.graph", dumpname);

  FILE * file = fopen(graphname, "w");
  if(!file){
      fprintf (stderr, "Cannot open output file : %s\n",
  	     (graphname? graphname : "stdout"));
      return EXIT_FAILURE;
  }

  for (int64_t k = 0; k < nedge; ++k) {
    const int64_t i = get_v0_from_edge(&IJ[k]);
    const int64_t j = get_v1_from_edge(&IJ[k]);
    if (i != j)
      fprintf(file, "%" PRId64 " %" PRId64 "\n", i+1, j+1);
  }

  fclose(file);
  //close (fd);
  free(graphname);



  return EXIT_SUCCESS;
}
Exemple #5
0
// Modified part
void produce_graph(int64_t M, packed_edge** result_ptr_in, FILE *fout, int64_t binary) {
    uint32_t element_count = M * 2;
    uint32_t buffer_size = M * 2 * sizeof(uint32_t);
    uint32_t buffer_constant = 1 << 20;

    if (binary == 0) {

#ifdef GRAPH_GENERATOR_OMP
        #pragma omp parallel
#endif

        {
            char* buff = (char*)xmalloc(buffer_constant);
            int total_length = 0;

#ifdef GRAPH_GENERATOR_OMP
            #pragma omp for
#endif

            for (int64_t i = 0; i < M; i++) {
                char temp[50];
                int temp_length;
                int check_correctness;

                uint32_t from = get_v0_from_edge(*result_ptr_in + i);
                uint32_t to = get_v1_from_edge(*result_ptr_in + i);
                temp_length = snprintf(temp, 50, "%u\t%u\n", from, to);

                if (temp_length < 0) {
                    fprintf(stderr, "snprintf error\n");
                    exit(1);
                }

                if (total_length + temp_length < buffer_constant) {
                    // still enough room available
                    check_correctness = snprintf(&(buff[total_length]), buffer_constant - total_length, "%s", temp);
                    if (check_correctness < 0) {
                        fprintf(stderr, "snprintf error\n");
                        exit(1);
                    }
                    total_length += temp_length;
                } else {
                    // the buffer is run out of memory
#ifdef GRAPH_GENERATOR_OMP
                    #pragma omp critical
#endif
                    {
                        check_correctness = fprintf(fout, "%s", buff);
                        if (check_correctness < 0) {
                            fprintf(stderr, "fprintf error;\n");
                            exit(1);
                        }
                    }
                    buff[0] = '\0';
                    check_correctness = snprintf(&(buff[0]), buffer_constant, "%s", temp);
                    if (check_correctness < 0) {
                        fprintf(stderr, "snprintf error;\n");
                        exit(1);
                    }
                    total_length = temp_length;
                }
            }
#ifdef GRAPH_GENERATOR_OMP
            #pragma omp critical
#endif
            {
                int check_correctness;
                check_correctness = fprintf(fout, "%s", buff);
                if (check_correctness < 0) {
                    fprintf(stderr, "fprintf error;\n");
                    exit(1);
                }
            }
        }
    } else {
        uint32_t* buff = (uint32_t*)xmalloc(buffer_size);

#ifdef GRAPH_GENERATOR_OMP
        #pragma omp parallel for
#endif
        for (int64_t i = 0; i < M; i++) {
            uint32_t from = get_v0_from_edge(*result_ptr_in + i);
            buff[2 * i] = from;
            uint32_t to = get_v1_from_edge(*result_ptr_in + i);
            buff[2 * i + 1] = to;
        }

        size_t check_correctness;
        check_correctness = fwrite(buff, sizeof(uint32_t), element_count, fout);
        if (check_correctness != element_count) {
            fprintf(stderr, "fwrite error;\n");
            exit(1);
        }
    }
}