コード例 #1
0
ファイル: test2level.c プロジェクト: LLNL/lwgrp
int split_bin_2level(
  int bins,
  int bin,
  const lwgrp_ring* lev1_ring,
  const lwgrp_logring* lev1_logring,
  const lwgrp_ring* lev2_ring,
  const lwgrp_logring* lev2_logring,
  lwgrp_ring* new_lev1_ring,
  lwgrp_logring* new_lev1_logring,
  lwgrp_ring* new_lev2_ring,
  lwgrp_logring* new_lev2_logring)
{
  int i;

  /* initialize new rings and logrings to empty groups,
   * we'll overwrite these if proc is really in a group */
  lwgrp_ring_set_null(new_lev1_ring);
  lwgrp_ring_set_null(new_lev2_ring);
  lwgrp_logring_build_from_ring(new_lev1_ring, new_lev1_logring);
  lwgrp_logring_build_from_ring(new_lev2_ring, new_lev2_logring);

  if (bins <= 0) {
    return 0;
  }

  /* get our rank within and the size of the parent communicator */
  int comm_size;
  int comm_rank = lev1_ring->comm_rank;
  MPI_Comm_size(lev1_ring->comm, &comm_size);

  /* allocate memory to execute collectives */
  int* reduce_inbuf   = (int*) malloc(bins * sizeof(int));
  int* reduce_outbuf  = (int*) malloc(bins * sizeof(int));
  int* scan_inbuf     = (int*) malloc(2 * bins * sizeof(int));
  int* scan_recvleft  = (int*) malloc(2 * bins * sizeof(int));
  int* scan_recvright = (int*) malloc(2 * bins * sizeof(int));

  /* intiaize all bins to MPI_PROC_NULL, except for our
   * bin in which case we list our rank within comm */
  for (i = 0; i < bins; i++) {
    /* initialize all bins to size(lev1), would like MPI_PROC_NULL,
     * but we use size instead so that reduce(min) does the right thing */
    reduce_inbuf[i] = comm_size;
  }
  if (bin >= 0) {
    reduce_inbuf[bin] = comm_rank;
  }

  /* reduce to node leader to find lowest rank in each bin */
  lwgrp_logring_reduce(
    reduce_inbuf, reduce_outbuf, bins, MPI_INT, MPI_MIN,
    0, lev1_ring, lev1_logring
  );

  /* create the scan type (a rank and a count pair) */
  MPI_Datatype scan_type;
  MPI_Type_contiguous(2, MPI_INT, &scan_type);
  MPI_Type_commit(&scan_type);

  /* double exscan across node leaders to
   * build info for new node leader chains */
  int lev1_rank = lev1_ring->group_rank;
  if (lev1_rank == 0) {
    /* prepare data for input to double scan, for each bin
     * record the lowest rank and a count of either 0 or 1 */
    for (i = 0; i < bins; i++) {
      if (reduce_outbuf[i] != comm_size) {
        scan_inbuf[i*2 + SCAN_RANK]  = reduce_outbuf[i];
        scan_inbuf[i*2 + SCAN_COUNT] = 1;
      } else {
        scan_inbuf[i*2 + SCAN_RANK]  = MPI_PROC_NULL;
        scan_inbuf[i*2 + SCAN_COUNT] = 0;
      }
    }

    /* create the scan operation */
    MPI_Op scan_op;
    int commutative = 0;
    MPI_Op_create(scan_chain, commutative, &scan_op);

    /* execute the double exclusive scan to get next rank and
     * count of ranks to either side for each bin */
    lwgrp_logring_double_exscan(
      scan_inbuf, scan_recvright, scan_inbuf, scan_recvleft,
      bins, scan_type, scan_op, lev2_ring, lev2_logring
    );

    /* if we're on the end of the level 2 group, need to initialize
     * the recv values */
    int lev2_rank = lev2_ring->group_rank;
    int lev2_size = lev2_ring->group_size;
    if (lev2_rank == 0) {
      /* we're on the left end of lev2 group, so we didn't get
       * anything from the left side */
      for (i = 0; i < bins; i++) {
        scan_recvleft[i*2 + SCAN_RANK]  = MPI_PROC_NULL;
        scan_recvleft[i*2 + SCAN_COUNT] = 0;
      }
    }
    if (lev2_rank == lev2_size-1) {
      /* we're on the right end of lev2 group, so we didn't get
       * anything from the right side */
      for (i = 0; i < bins; i++) {
        scan_recvright[i*2 + SCAN_RANK]  = MPI_PROC_NULL;
        scan_recvright[i*2 + SCAN_COUNT] = 0;
      }
    }

    /* free the scan op */
    MPI_Op_free(&scan_op);
  }

  /* broadcast scan results to local comm */
  lwgrp_logring_bcast(scan_recvleft,  bins, scan_type, 0, lev1_ring, lev1_logring);
  lwgrp_logring_bcast(scan_recvright, bins, scan_type, 0, lev1_ring, lev1_logring);

  /* free the scan type */
  MPI_Type_free(&scan_type);

  /* call bin_split on local chain */
  lwgrp_ring_split_bin_radix(bins, bin, lev1_ring, new_lev1_ring);
  lwgrp_logring_build_from_ring(new_lev1_ring, new_lev1_logring);

  /* for each valid bin, all rank 0 procs of new lev1 groups form new lev2 groups */
  if (bin >= 0) {
    int new_lev1_rank = new_lev1_ring->group_rank;
    if (new_lev1_rank == 0) {
      /* extract chain values from scan results */
      MPI_Comm comm = new_lev1_ring->comm;
      int left  = scan_recvleft[2*bin  + SCAN_RANK];
      int right = scan_recvright[2*bin + SCAN_RANK];
      int size  = scan_recvleft[2*bin + SCAN_COUNT] + scan_recvright[2*bin + SCAN_COUNT] + 1;
      int rank  = scan_recvleft[2*bin + SCAN_COUNT];

      /* build chain, then ring, then logring, and finally free chain */
      lwgrp_chain tmp_chain;
      lwgrp_chain_build_from_vals(comm, left, right, size, rank, &tmp_chain);
      lwgrp_ring_build_from_chain(&tmp_chain, new_lev2_ring);
      lwgrp_logring_build_from_ring(new_lev2_ring, new_lev2_logring);
      lwgrp_chain_free(&tmp_chain);
    }
  }

  /* free our temporary memory */
  free(scan_recvright);
  free(scan_recvleft);
  free(scan_inbuf);
  free(reduce_outbuf);
  free(reduce_inbuf);

  return 0;
}
コード例 #2
0
ファイル: lwgrp_ring_ops.c プロジェクト: tgamblin/lwgrp
/* given a specified number of bins, an index into those bins, and a
 * input group, create and return a new group consisting of all ranks
 * belonging to the same bin, runs in O(num_bins * log N) time */
int lwgrp_ring_split_bin(int num_bins, int my_bin, const lwgrp_ring* in, lwgrp_ring* out)
{
  /* With this function, we split the "in" group into up to "num_bins"
   * subgroups.  A process is grouped with all other processes that
   * specify the same value for "my_bin".  The descriptor for the new
   * group is returned in "out".  If my_bin is less than 0, an empty
   * (NULL) group is returned.
   *
   * Implementation:
   * We run two exclusive scans, one scanning from left to right, and
   * another scanning from right to left.  As the output of the
   * left-going scan, a process acquires the number of ranks to its
   * left that are in its bin as well as the rank of the process that
   * is immediately to its left that is also in its bin.  Similarly,
   * the right-going scan provides the process with the number of ranks
   * to the right and the rank of the process immediately to its right
   * that is in the same bin.  With this info, a process can determine
   * its rank and the number of ranks in its group, as well as, the
   * ranks of its left and right partners, which is sufficient to fully
   * define the "out" group. */
  int i;

  if (my_bin >= num_bins) {
    /* TODO: fail */
  }

  /* define some frequently used indicies into our arrays */
  int my_bin_index = 2 * my_bin;
  int rank_index   = 2 * num_bins;

  /* allocate space for our send and receive buffers */
  int elements = 2 * num_bins + 1;
  int* bins = (int*) lwgrp_malloc(4 * elements * sizeof(int), sizeof(int), __FILE__, __LINE__);
  if (bins == NULL) {
    /* TODO: fail */
  }

  /* set up pointers to our send and receive buffers */
  int* send_left_bins  = bins + (0 * elements);
  int* recv_left_bins  = bins + (1 * elements);
  int* send_right_bins = bins + (2 * elements);
  int* recv_right_bins = bins + (3 * elements);

  /* initialize our send buffers,
   * set all ranks to MPI_PROC_NULL and set all counts to 0 */
  for(i = 0; i < 2*num_bins; i += 2) {
    send_left_bins[i+INDEX_COUNT]    = 0;
    send_right_bins[i+INDEX_COUNT]   = 0;
    send_left_bins[i+INDEX_CLOSEST]  = MPI_PROC_NULL;
    send_right_bins[i+INDEX_CLOSEST] = MPI_PROC_NULL;
  }

  /* for the bin we are in, set the rank to our rank and set the count to 1 */
  if (my_bin >= 0) {
    send_left_bins[my_bin_index+INDEX_COUNT]    = 1;
    send_right_bins[my_bin_index+INDEX_COUNT]   = 1;
    send_left_bins[my_bin_index+INDEX_CLOSEST]  = in->comm_rank;
    send_right_bins[my_bin_index+INDEX_CLOSEST] = in->comm_rank;
  }

  /* execute double, inclusive scan, one going left-to-right,
   * and another right-to-left */
  MPI_Request request[4];
  MPI_Status  status[4];
  MPI_Comm comm  = in->comm;
  int comm_rank  = in->comm_rank;
  int left_rank  = in->comm_left;
  int right_rank = in->comm_right;
  int rank       = in->group_rank;
  int ranks      = in->group_size;
  int my_left    = MPI_PROC_NULL;
  int my_right   = MPI_PROC_NULL;
  int dist = 1;
  while (dist < ranks) {
    /* left-to-right shift:
     * inform rank to our right about the rank on our left,
     * recv data from left and send data to the right */
    send_right_bins[rank_index] = left_rank;
    MPI_Irecv(recv_left_bins,  elements, MPI_INT, left_rank,  LWGRP_MSG_TAG_0, comm, &request[0]);
    MPI_Isend(send_right_bins, elements, MPI_INT, right_rank, LWGRP_MSG_TAG_0, comm, &request[1]);

    /* right-to-left shift:
     * inform rank to our left about the rank on our right
     * recv data from right and send data to the left */
    send_left_bins[rank_index] = right_rank;
    MPI_Irecv(recv_right_bins, elements, MPI_INT, right_rank, LWGRP_MSG_TAG_0, comm, &request[2]);
    MPI_Isend(send_left_bins,  elements, MPI_INT, left_rank,  LWGRP_MSG_TAG_0, comm, &request[3]);

    /* wait for all communication to complete */
    MPI_Waitall(4, request, status);

    /* make note of the rightmost rank in our bin
     * to the left if we haven't already found one */
    if (my_left == MPI_PROC_NULL && my_bin >= 0) {
      my_left = recv_left_bins[my_bin_index+INDEX_CLOSEST];
    }

    /* make note of the leftmost rank in our bin to the
     * right if we haven't already found one */
    if (my_right == MPI_PROC_NULL && my_bin >= 0) {
      my_right = recv_right_bins[my_bin_index+INDEX_CLOSEST];
    }

    /* merge data from left into our right-going data */
    for(i = 0; i < 2*num_bins; i += 2) {
      /* if we haven't wrapped, add the counts for this bin */
      if (rank - dist >= 0) {
        send_right_bins[i+INDEX_COUNT] += recv_left_bins[i+INDEX_COUNT];
      }

      /* if we haven't already defined the rightmost rank for this bin,
       * set it to the value defined in the message from the left */
      if (send_right_bins[i+INDEX_CLOSEST] == MPI_PROC_NULL) {
        send_right_bins[i+INDEX_CLOSEST] = recv_left_bins[i+INDEX_CLOSEST];
      }
    }

    /* merge data from right into our left-going data */
    for(i = 0; i < 2*num_bins; i += 2) {
      /* if we haven't wrapped, add the counts for this bin */
      if (rank + dist < ranks) {
        send_left_bins[i+INDEX_COUNT] += recv_right_bins[i+INDEX_COUNT];
      }

      /* if we haven't already defined the leftmost rank for this bin,
       * set it to the value defined in the message from the left */
      if (send_left_bins[i+INDEX_CLOSEST] == MPI_PROC_NULL) {
        send_left_bins[i+INDEX_CLOSEST] = recv_right_bins[i+INDEX_CLOSEST];
      }
    }

    /* get next processes on the left and right sides */
    left_rank  = recv_left_bins[rank_index];
    right_rank = recv_right_bins[rank_index];
    dist <<= 1;
  }

  /* if we're the only rank, set our ourself as our left and right neighbor */
  if (ranks == 1) {
    my_left  = comm_rank;
    my_right = comm_rank;
  }

  if (my_bin >= 0) {
    /* get count of number of ranks in our bin to our left and right sides */
    int count_left  = send_right_bins[my_bin_index + INDEX_COUNT] - 1;
    int count_right = send_left_bins[my_bin_index + INDEX_COUNT]  - 1;

    /* the number of ranks to our left defines our rank, while we add
     * the number of ranks to our left with the number of ranks to our
     * right plus ourselves to get the total number of ranks in our bin */
    out->comm       = in->comm;
    out->comm_rank  = in->comm_rank;
    out->comm_left  = my_left;
    out->comm_right = my_right;
    out->group_rank = count_left;
    out->group_size = count_left + count_right + 1;
  } else {
    /* create an empty group */
    lwgrp_ring_set_null(out);
  }

  /* free memory */
  lwgrp_free(&bins);

  return LWGRP_SUCCESS; 
}