Пример #1
0
void
compute_imbalance(const int global_box[][2],
                  const int local_box[][2],
                  float& largest_imbalance,
                  float& std_dev,
                  YAML_Doc& doc,
                  bool record_in_doc)
{
  int numprocs = 1, myproc = 0;
#ifdef HAVE_MPI
  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
  MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
#endif

  GlobalOrdinal local_nrows = get_num_ids<GlobalOrdinal>(local_box);
  GlobalOrdinal min_nrows = 0, max_nrows = 0, global_nrows = 0;
  int min_proc = myproc, max_proc = myproc;
  get_global_min_max(local_nrows, global_nrows, min_nrows, min_proc,
                     max_nrows, max_proc);

  float avg_nrows = global_nrows;
  avg_nrows /= numprocs;

  //largest_imbalance will be the difference between the min (or max)
  //rows-per-processor and avg_nrows, represented as a percentage:
  largest_imbalance = percentage_difference<float>(min_nrows, avg_nrows);

  float tmp = percentage_difference<float>(max_nrows, avg_nrows);
  if (tmp > largest_imbalance) largest_imbalance = tmp;

  std_dev = compute_std_dev_as_percentage<float>(local_nrows, avg_nrows);

  if (myproc == 0 && record_in_doc) {
    doc.add("Rows-per-proc Load Imbalance","");
    doc.get("Rows-per-proc Load Imbalance")->add("Largest (from avg, %)",largest_imbalance);
    doc.get("Rows-per-proc Load Imbalance")->add("Std Dev (%)",std_dev);
  }
}
Пример #2
0
void
add_imbalance(const int global_box[][2],
              int local_box[][2],
              float imbalance,
              YAML_Doc& doc)
{
  int numprocs = 1, myproc = 0;
#ifdef HAVE_MPI
  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
  MPI_Comm_rank(MPI_COMM_WORLD, &myproc);
#endif

  if (numprocs == 1) {
    return;
  }

  float cur_imbalance = 0, cur_std_dev = 0;
  compute_imbalance<GlobalOrdinal>(global_box, local_box,
                                  cur_imbalance, cur_std_dev, doc, false);

  while (cur_imbalance < imbalance) {
    GlobalOrdinal local_nrows = get_num_ids<GlobalOrdinal>(local_box);
    GlobalOrdinal min_nrows = 0, max_nrows = 0, global_nrows = 0;
    int min_proc = myproc, max_proc = myproc;
    get_global_min_max(local_nrows, global_nrows, min_nrows, min_proc,
                       max_nrows, max_proc);

    std::pair<int,int> grow(NONE,UPPER);
    int grow_axis_val = -1;
    std::pair<int,int> shrink(NONE,UPPER);
    int shrink_axis_val = -1;

    if (myproc == max_proc) {
      grow = decide_how_to_grow(global_box, local_box);
      if (grow.first != NONE) {
        grow_axis_val = local_box[grow.first][grow.second];
      }
    }
    if (myproc == min_proc) {
      shrink = decide_how_to_shrink(global_box, local_box);
      if (shrink.first != NONE) {
        shrink_axis_val = local_box[shrink.first][shrink.second];
      }
    }

    int grow_info[8] = {grow.first, grow.second,
                        local_box[X][0], local_box[X][1],
                        local_box[Y][0], local_box[Y][1],
                        local_box[Z][0], local_box[Z][1]};

    int shrink_info[8] = {shrink.first, shrink.second,
                        local_box[X][0], local_box[X][1],
                        local_box[Y][0], local_box[Y][1],
                        local_box[Z][0], local_box[Z][1]};
#ifdef HAVE_MPI
    MPI_Bcast(&grow_info[0], 8, MPI_INT, max_proc, MPI_COMM_WORLD);
    MPI_Bcast(&shrink_info[0], 8, MPI_INT, min_proc, MPI_COMM_WORLD);
#endif

    int grow_axis = grow_info[0];
    int grow_end = grow_info[1];
    int shrink_axis = shrink_info[0];
    int shrink_end = shrink_info[1];
    int grow_incr = 1;
    if (grow_end == LOWER) grow_incr = -1;
    int shrink_incr = -1;
    if (shrink_end == LOWER) shrink_incr = 1;
    if (grow_axis != NONE) grow_axis_val = grow_info[2+grow_axis*2+grow_end];
    if (shrink_axis != NONE) shrink_axis_val = shrink_info[2+shrink_axis*2+shrink_end];

    if (grow_axis == NONE && shrink_axis == NONE) break;

    bool grow_status = grow_axis==NONE ? false : true;
    if (grow_axis != NONE) {
      if ((grow_incr ==  1 && local_box[grow_axis][0] == grow_axis_val) ||
          (grow_incr == -1 && local_box[grow_axis][1] == grow_axis_val)) {
        if (local_box[grow_axis][1] - local_box[grow_axis][0] < 2) {
          grow_status = false;
        }
      }
    }

    bool shrink_status = shrink_axis==NONE ? false : true;
    if (shrink_axis != NONE) {
      if ((shrink_incr ==  1 && local_box[shrink_axis][0] == shrink_axis_val) ||
          (shrink_incr == -1 && local_box[shrink_axis][1] == shrink_axis_val)) {
        if (local_box[shrink_axis][1] - local_box[shrink_axis][0] < 2) {
          shrink_status = false;
        }
      }
    }

#ifdef HAVE_MPI
    int statusints[2] = { grow_status ? 0 : 1, shrink_status ? 0 : 1 };
    int globalstatus[2] = { 0, 0 };
    MPI_Allreduce(&statusints, &globalstatus, 2, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
    grow_status = globalstatus[0]>0 ? false : true;
    shrink_status = globalstatus[1]>0 ? false : true;
#endif

    if (grow_status == false && shrink_status == false) break;

    if (grow_status && grow_axis != NONE) {
      if (local_box[grow_axis][0] == grow_axis_val) {
        local_box[grow_axis][0] += grow_incr;
      }

      if (local_box[grow_axis][1] == grow_axis_val) {
        local_box[grow_axis][1] += grow_incr;
      }
    }

    if (shrink_status && shrink_axis != NONE) {
      if (local_box[shrink_axis][0] == shrink_axis_val) {
        local_box[shrink_axis][0] += shrink_incr;
      }

      if (local_box[shrink_axis][1] == shrink_axis_val) {
        local_box[shrink_axis][1] += shrink_incr;
      }
    }

    compute_imbalance<GlobalOrdinal>(global_box, local_box,
                                    cur_imbalance, cur_std_dev, doc, false);
  }
}
Пример #3
0
size_t
compute_matrix_stats(const MatrixType& A, int myproc, int numprocs, YAML_Doc& ydoc)
{
  typedef typename MatrixType::GlobalOrdinalType GlobalOrdinal;
  typedef typename MatrixType::LocalOrdinalType LocalOrdinal;
  typedef typename MatrixType::ScalarType Scalar;

  GlobalOrdinal min_nrows = 0, max_nrows = 0, global_nrows = 0;
  int min_proc = 0, max_proc = 0;

  GlobalOrdinal local_nrows = A.rows.size();

  get_global_min_max(local_nrows, global_nrows, min_nrows, min_proc,
                     max_nrows, max_proc);

  //Gather stats on global, min/max matrix num-nonzeros:

  double local_nnz = A.num_nonzeros();
  double dglobal_nnz = 0, dmin_nnz = 0, dmax_nnz = 0;

  get_global_min_max(local_nnz, dglobal_nnz, dmin_nnz, min_proc,
                     dmax_nnz, max_proc);

  double avg_nrows = global_nrows;
  avg_nrows /= numprocs;
  double avg_nnz = dglobal_nnz;
  avg_nnz /= numprocs;

  double mem_overhead_MB = parallel_memory_overhead_MB(A);

  size_t global_nnz = static_cast<size_t>(std::ceil(dglobal_nnz));
  size_t min_nnz = static_cast<size_t>(std::ceil(dmin_nnz));
  size_t max_nnz = static_cast<size_t>(std::ceil(dmax_nnz));
  size_t global_num_rows = global_nrows;

  if (myproc == 0) {
    ydoc.add("Matrix attributes","");
    ydoc.get("Matrix attributes")->add("Global Nrows",global_num_rows);
    ydoc.get("Matrix attributes")->add("Global NNZ",global_nnz);

    //compute how much memory the matrix occupies:
    //num-bytes = sizeof(GlobalOrdinal)*global_nrows   for A.rows
    //          + sizeof(LocalOrdinal)*global_nrows    for A.rows_offsets
    //          + sizeof(GlobalOrdinal)*global_nnz     for A.packed_cols
    //          + sizeof(Scalar)*global_nnz            for A.packed_coefs

    double invGB = 1.0/(1024*1024*1024);
    double memGB = invGB*global_nrows*sizeof(GlobalOrdinal);
    memGB += invGB*global_nrows*sizeof(LocalOrdinal);
    memGB += invGB*global_nnz*sizeof(GlobalOrdinal);
    memGB += invGB*global_nnz*sizeof(Scalar);
    ydoc.get("Matrix attributes")->add("Global Memory (GB)",memGB);

    ydoc.get("Matrix attributes")->add("Pll Memory Overhead (MB)",mem_overhead_MB);

    size_t min_num_rows = min_nrows;
    size_t max_num_rows = max_nrows;
    ydoc.get("Matrix attributes")->add("Rows per proc MIN",min_num_rows);
    ydoc.get("Matrix attributes")->add("Rows per proc MAX",max_num_rows);
    ydoc.get("Matrix attributes")->add("Rows per proc AVG",avg_nrows);
    ydoc.get("Matrix attributes")->add("NNZ per proc MIN",min_nnz);
    ydoc.get("Matrix attributes")->add("NNZ per proc MAX",max_nnz);
    ydoc.get("Matrix attributes")->add("NNZ per proc AVG",avg_nnz);
  }

  return global_nnz;
}