示例#1
0
/** General ARMCI global operation (reduction).  Collective on group.
  *
  * @param[in]    scope Scope in which to perform the GOP (only SCOPE_ALL is supported)
  * @param[inout] x     Vector of n doubles, contains input and will contain output.
  * @param[in]    n     Length of x
  * @param[in]    op    One of '+', '*', 'max', 'min', 'absmax', 'absmin'
  * @param[in]    type  Data type of x
  * @param[in]    group Group on which to perform the GOP
  */
void armci_msg_group_gop_scope(int scope, void *x, int n, char *op, int type, ARMCI_Group *group) {
  void        *out;
  MPI_Op       mpi_op;
  MPI_Datatype mpi_type;
  MPI_Comm     comm;
  int          mpi_type_size;

  if (scope == SCOPE_ALL || scope == SCOPE_MASTERS)
    comm = group->comm;
  else
    comm = MPI_COMM_SELF;

  if (op[0] == '+') {
    mpi_op = MPI_SUM;
  } else if (op[0] == '*') {
    mpi_op = MPI_PROD;
  } else if (strncmp(op, "max", 3) == 0) {
    mpi_op = MPI_MAX;
  } else if (strncmp(op, "min", 3) == 0) {
    mpi_op = MPI_MIN;
  } else if (strncmp(op, "or", 2) == 0) {
    mpi_op = MPI_BOR;
  } else if (strncmp(op, "absmax", 6) == 0) {
    mpi_op = MPI_ABSMAX_OP;
  } else if (strncmp(op, "absmin", 6) == 0) {
    mpi_op = MPI_ABSMIN_OP;
  } else {
    ARMCII_Error("unknown operation \'%s\'", op);
    return;
  }

  switch(type) {
    case ARMCI_INT:
      mpi_type = MPI_INT;
      break;
    case ARMCI_LONG:
      mpi_type = MPI_LONG;
      break;
    case ARMCI_LONG_LONG:
      mpi_type = MPI_LONG_LONG;
      break;
    case ARMCI_FLOAT:
      mpi_type = MPI_FLOAT;
      break;
    case ARMCI_DOUBLE:
      mpi_type = MPI_DOUBLE;
      break;
    default:
      ARMCII_Error("unknown type (%d)", type);
      return;
  }

  // ABS MAX/MIN are unary as well as binary.  We need to also apply abs in the
  // single processor case when reduce would normally just be a no-op.
  if (group->size == 1 && (mpi_op == MPI_ABSMAX_OP || mpi_op == MPI_ABSMIN_OP)) {
    ARMCII_Absv_op(x, x, &n, &mpi_type);
    return;
  }

  MPI_Type_size(mpi_type, &mpi_type_size);

  out = malloc(n*mpi_type_size);
  ARMCII_Assert(out != NULL);

  MPI_Allreduce(x, out, n, mpi_type, mpi_op, group->comm);

  ARMCI_Copy(out, x, n*mpi_type_size);
  free(out);
}
示例#2
0
/** General ARMCI global operation (reduction).  Collective on group.
  *
  * @param[in]    scope Scope in which to perform the GOP (only SCOPE_ALL is supported)
  * @param[inout] x     Vector of n data elements, contains input and will contain output.
  * @param[in]    n     Length of x
  * @param[in]    op    One of '+', '*', 'max', 'min', 'absmax', 'absmin'
  * @param[in]    type  Data type of x (e.g. ARMCI_INT, ...)
  * @param[in]    group Group on which to perform the GOP
  */
void armci_msg_group_gop_scope(int scope, void *x, int n, char *op, int type, ARMCI_Group *group) {
  void        *out, **x_buf;
  MPI_Op       mpi_op;
  MPI_Datatype mpi_type;
  MPI_Comm     comm;
  int          mpi_type_size;

  /* FIXME: scope argument presently ignored */
  if (scope == SCOPE_ALL || scope == SCOPE_MASTERS)
    comm = group->comm;
  else
    comm = MPI_COMM_SELF;

  if (op[0] == '+') {
    mpi_op = MPI_SUM;
  } else if (op[0] == '*') {
    mpi_op = MPI_PROD;
  } else if (strncmp(op, "max", 3) == 0) {
    mpi_op = MPI_MAX;
  } else if (strncmp(op, "min", 3) == 0) {
    mpi_op = MPI_MIN;
  } else if (strncmp(op, "or", 2) == 0) {
    mpi_op = MPI_BOR;
  } else if (strncmp(op, "absmax", 6) == 0) {
    mpi_op = ARMCI_MPI_ABSMAX_OP;
  } else if (strncmp(op, "absmin", 6) == 0) {
    mpi_op = ARMCI_MPI_ABSMIN_OP;
  /* The following were added ComEx/ARMCI in 2017. */
  /* https://github.com/GlobalArrays/ga/commit/14ef3cfa4ea3ffa7ee721c2a98685669359f7044 */
  /* && and || need to be tested before & and | to avoid the latter matching the former. */
  } else if ((strncmp(op, "land", 4) == 0) || (strncmp(op, "&&", 2) == 0)) {
    mpi_op = MPI_LAND;
  } else if ((strncmp(op, "lor", 3) == 0) || (strncmp(op, "||", 2) == 0)) {
    mpi_op = MPI_LOR;
  } else if ((strncmp(op, "band", 4) == 0) || (strncmp(op, "&", 1) == 0)) {
    mpi_op = MPI_BAND;
  } else if ((strncmp(op, "bor", 3) == 0) || (strncmp(op, "|", 1) == 0)) {
    mpi_op = MPI_BOR;
  } else {
    ARMCII_Error("unknown operation \'%s\'", op);
    return;
  }

  switch(type) {
    case ARMCI_INT:
      mpi_type = MPI_INT;
      break;
    case ARMCI_LONG:
      mpi_type = MPI_LONG;
      break;
    case ARMCI_LONG_LONG:
      mpi_type = MPI_LONG_LONG;
      break;
    case ARMCI_FLOAT:
      mpi_type = MPI_FLOAT;
      break;
    case ARMCI_DOUBLE:
      mpi_type = MPI_DOUBLE;
      break;
    default:
      ARMCII_Error("unknown type (%d)", type);
      return;
  }

  MPI_Type_size(mpi_type, &mpi_type_size);

  ARMCII_Buf_prepare_read_vec(&x, &x_buf, 1, n*mpi_type_size);

  // ABS MAX/MIN are unary as well as binary.  We need to also apply abs in the
  // single processor case when reduce would normally just be a no-op.
  if (group->size == 1 && (mpi_op == ARMCI_MPI_ABSMAX_OP || mpi_op == ARMCI_MPI_ABSMIN_OP)) {
    ARMCII_Absv_op(x_buf[0], x_buf[0], &n, &mpi_type);
  }

  else {
    out = malloc(n*mpi_type_size);
    ARMCII_Assert(out != NULL);

    MPI_Allreduce(x_buf[0], out, n, mpi_type, mpi_op, comm);

    ARMCI_Copy(out, x_buf[0], n*mpi_type_size);
    free(out);
  }

  ARMCII_Buf_finish_write_vec(&x, x_buf, 1, n*mpi_type_size);
}