Пример #1
0
/** Prepare a set of buffers for use with an accumulate operation.  The
  * returned set of buffers is guaranteed to be in private space and scaled.
  * Copies will be made if needed, the result should be completed by finish.
  *
  * @param[in]  orig_bufs Original set of buffers.
  * @param[out] new_bufs  Pointer to the set of private buffers.
  * @param[in]  count     Number of entries in the buffer list.
  * @param[in]  size      The size of the buffers (all are of the same size).
  * @param[in]  datatype  The type of the buffer.
  * @param[in]  scale     Scaling constant to apply to each buffer.
  * @return               Number of buffers that were moved.
  */
int ARMCII_Buf_prepare_acc_vec(void **orig_bufs, void ***new_bufs_ptr, int count, int size,
                            int datatype, void *scale) {

  void **new_bufs;
  int i, scaled, num_moved = 0;
  
  new_bufs = malloc(count*sizeof(void*));
  ARMCII_Assert(new_bufs != NULL);

  scaled = ARMCII_Buf_acc_is_scaled(datatype, scale);

  for (i = 0; i < count; i++) {
    gmr_t *mreg = NULL;

    // Check if the source buffer is within a shared region.
    if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD)
      mreg = gmr_lookup(orig_bufs[i], ARMCI_GROUP_WORLD.rank);

    if (scaled) {
      MPI_Alloc_mem(size, MPI_INFO_NULL, &new_bufs[i]);
      ARMCII_Assert(new_bufs[i] != NULL);

      // Lock if needed so we can directly access the buffer
      if (mreg != NULL)
        gmr_dla_lock(mreg);

      ARMCII_Buf_acc_scale(orig_bufs[i], new_bufs[i], size, datatype, scale);

      if (mreg != NULL)
        gmr_dla_unlock(mreg);
    } else {
      new_bufs[i] = orig_bufs[i];
    }

    if (mreg != NULL) {
      // If the buffer wasn't copied, we should copy it into a private buffer
      if (new_bufs[i] == orig_bufs[i]) {
        MPI_Alloc_mem(size, MPI_INFO_NULL, &new_bufs[i]);
        ARMCII_Assert(new_bufs[i] != NULL);

        gmr_dla_lock(mreg);
        ARMCI_Copy(orig_bufs[i], new_bufs[i], size);
        gmr_dla_unlock(mreg);
      }
    }

    if (new_bufs[i] == orig_bufs[i])
      num_moved++;
  }

  *new_bufs_ptr = new_bufs;
  
  return num_moved;
}
Пример #2
0
/** One-sided put operation.
  *
  * @param[in] src    Source address (remote)
  * @param[in] dst    Destination address (local)
  * @param[in] size   Number of bytes to transfer
  * @param[in] target Process id to target
  * @return           0 on success, non-zero on failure
  */
int ARMCI_Put(void *src, void *dst, int size, int target) {
  gmr_t *src_mreg, *dst_mreg;

  src_mreg = gmr_lookup(src, ARMCI_GROUP_WORLD.rank);
  dst_mreg = gmr_lookup(dst, target);

  ARMCII_Assert_msg(dst_mreg != NULL, "Invalid remote pointer");

  /* Local operation */
  if (target == ARMCI_GROUP_WORLD.rank) {
    if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) {
      gmr_dla_lock(dst_mreg);
      if (src_mreg) gmr_dla_lock(src_mreg);
    }

    ARMCI_Copy(src, dst, size);
    
    if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) {
      gmr_dla_unlock(dst_mreg);
      if (src_mreg) gmr_dla_unlock(src_mreg);
    }
  }

  /* Origin buffer is private */
  else if (src_mreg == NULL || ARMCII_GLOBAL_STATE.shr_buf_method == ARMCII_SHR_BUF_NOGUARD) {
    gmr_lock(dst_mreg, target);
    gmr_put(dst_mreg, src, dst, size, target);
    gmr_unlock(dst_mreg, target);
  }

  /* COPY: Either origin and target buffers are in the same window and we can't
   * lock the same window twice (MPI semantics) or the user has requested
   * always-copy mode. */
  else {
    void *src_buf;

    MPI_Alloc_mem(size, MPI_INFO_NULL, &src_buf);
    ARMCII_Assert(src_buf != NULL);

    gmr_dla_lock(src_mreg);
    ARMCI_Copy(src, src_buf, size);
    gmr_dla_unlock(src_mreg);

    gmr_lock(dst_mreg, target);
    gmr_put(dst_mreg, src_buf, dst, size, target);
    gmr_unlock(dst_mreg, target);

    MPI_Free_mem(src_buf);
  }

  return 0;
}
Пример #3
0
/** Declare the end of a local access epoch.
  *
  * \note MPI-2 does not allow multiple locks at once, so you can have only one
  * access epoch open at a time and cannot do put/get/acc while in an access
  * region.
  *
  * @param[in] ptr Pointer to the allocation that was accessed directly 
  */
void ARMCI_Access_end(void *ptr) {
  gmr_t *mreg;

  mreg = gmr_lookup(ptr, ARMCI_GROUP_WORLD.rank);
  ARMCII_Assert_msg(mreg != NULL, "Invalid remote pointer");

  gmr_dla_unlock(mreg);
}
Пример #4
0
/** Finish a set of prepared buffers.  Will perform communication and copies as
  * needed to ensure results are in the original buffers.  Temporary space will be
  * freed.
  *
  * @param[in]  orig_bufs Original set of buffers.
  * @param[out] new_bufs  Set of private buffers.
  * @param[in]  count     Number of entries in the buffer list.
  * @param[in]  size      The size of the buffers (all are of the same size).
  */
void ARMCII_Buf_finish_write_vec(void **orig_bufs, void **new_bufs, int count, int size) {
  if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) {
    int i;

    for (i = 0; i < count; i++) {
      if (orig_bufs[i] != new_bufs[i]) {
        gmr_t *mreg = gmr_lookup(orig_bufs[i], ARMCI_GROUP_WORLD.rank);
        ARMCII_Assert(mreg != NULL);

        gmr_dla_lock(mreg);
        ARMCI_Copy(new_bufs[i], orig_bufs[i], size);
        // gmr_put(mreg, new_bufs[i], orig_bufs[i], size, ARMCI_GROUP_WORLD.rank);
        gmr_dla_unlock(mreg);

        MPI_Free_mem(new_bufs[i]);
      }
    }

    free(new_bufs);
  }
}
Пример #5
0
/** Prepare a set of buffers for use with a put operation.  The returned set of
  * buffers is guaranteed to be in private space.  Copies will be made if needed,
  * the result should be completed by finish.
  *
  * @param[in]  orig_bufs Original set of buffers.
  * @param[out] new_bufs  Pointer to the set of private buffers.
  * @param[in]  count     Number of entries in the buffer list.
  * @param[in]  size      The size of the buffers (all are of the same size).
  * @return               Number of buffers that were moved.
  */
int ARMCII_Buf_prepare_read_vec(void **orig_bufs, void ***new_bufs_ptr, int count, int size) {
  int num_moved = 0;

  if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) {
    void **new_bufs = malloc(count*sizeof(void*));
    int i;

    for (i = 0; i < count; i++)
      new_bufs[i] = NULL;

    for (i = 0; i < count; i++) {
      // Check if the source buffer is within a shared region.  If so, copy it
      // into a private buffer.
      gmr_t *mreg = gmr_lookup(orig_bufs[i], ARMCI_GROUP_WORLD.rank);

      if (mreg != NULL) {
        MPI_Alloc_mem(size, MPI_INFO_NULL, &new_bufs[i]);
        ARMCII_Assert(new_bufs[i] != NULL);

        gmr_dla_lock(mreg);
        ARMCI_Copy(orig_bufs[i], new_bufs[i], size);
        // gmr_get(mreg, orig_bufs[i], new_bufs[i], size, ARMCI_GROUP_WORLD.rank);
        gmr_dla_unlock(mreg);

        num_moved++;
      } else {
        new_bufs[i] = orig_bufs[i];
      }
    }

    *new_bufs_ptr = new_bufs;
  }
  else {
    *new_bufs_ptr = orig_bufs;
  }
  
  return num_moved;
}
Пример #6
0
/** Destroy/free a shared memory region.
  *
  * @param[in] ptr   Pointer within range of the segment (e.g. base pointer).
  * @param[in] group Group on which to perform the free.
  */
void gmr_destroy(gmr_t *mreg, ARMCI_Group *group) {
  int   search_proc_in, search_proc_out, search_proc_out_grp;
  void *search_base;
  int   alloc_me, alloc_nproc;
  int   world_me, world_nproc;

  MPI_Comm_rank(group->comm, &alloc_me);
  MPI_Comm_size(group->comm, &alloc_nproc);
  MPI_Comm_rank(ARMCI_GROUP_WORLD.comm, &world_me);
  MPI_Comm_size(ARMCI_GROUP_WORLD.comm, &world_nproc);

  /* All-to-all exchange of a <base address, proc> pair.  This is so that we
   * can support passing NULL into ARMCI_Free() which is permitted when a
   * process allocates 0 bytes.  Unfortunately, in this case we still need to
   * identify the mem region and free it.
   */

  if (mreg == NULL)
    search_proc_in = -1;
  else {
    search_proc_in = world_me;
    search_base    = mreg->slices[world_me].base;
  }

  /* Collectively decide on who will provide the base address */
  MPI_Allreduce(&search_proc_in, &search_proc_out, 1, MPI_INT, MPI_MAX, group->comm);

  /* Everyone passed NULL.  Nothing to free. */
  if (search_proc_out < 0)
    return;

  /* Translate world rank to group rank */
  search_proc_out_grp = ARMCII_Translate_absolute_to_group(group, search_proc_out);

  /* Broadcast the base address */
  MPI_Bcast(&search_base, sizeof(void*), MPI_BYTE, search_proc_out_grp, group->comm);

  /* If we were passed NULL, look up the mem region using the <base, proc> pair */
  if (mreg == NULL)
    mreg = gmr_lookup(search_base, search_proc_out);

  /* If it's still not found, the user may have passed the wrong group */
  ARMCII_Assert_msg(mreg != NULL, "Could not locate the desired allocation");

  switch (mreg->lock_state) {
    case GMR_LOCK_UNLOCKED:
      break;
    case GMR_LOCK_DLA:
      ARMCII_Warning("Releasing direct local access before freeing shared allocation\n");
      gmr_dla_unlock(mreg);
      break;
    default:
      ARMCII_Error("Unable to free locked memory region (%d)\n", mreg->lock_state);
  }

  /* Remove from the list of mem regions */
  if (mreg->prev == NULL) {
    ARMCII_Assert(gmr_list == mreg);
    gmr_list = mreg->next;

    if (mreg->next != NULL)
      mreg->next->prev = NULL;

  } else {
    mreg->prev->next = mreg->next;
    if (mreg->next != NULL)
      mreg->next->prev = mreg->prev;
  }

  /* Destroy the window and free all buffers */
  MPI_Win_free(&mreg->window);

  if (mreg->slices[world_me].base != NULL)
    MPI_Free_mem(mreg->slices[world_me].base);

  free(mreg->slices);
  ARMCIX_Destroy_mutexes_hdl(mreg->rmw_mutex);

  free(mreg);
}
Пример #7
0
/** Blocking operation that accumulates data from the local process into the
  * memory of the remote process.  The data transfer is strided and blocking.
  *
  * @param[in] datatype        Type of data to be transferred.
  * @param[in] scale           Pointer to the value that input data should be scaled by.
  * @param[in] src_ptr         Source starting address of the data block to put.
  * @param[in] src_stride_arr  Source array of stride distances in bytes.
  * @param[in] dst_ptr         Destination starting address to put data.
  * @param[in] dst_stride_ar   Destination array of stride distances in bytes.
  * @param[in] count           Block size in each dimension. count[0] should be the
  *                            number of bytes of contiguous data in leading dimension.
  * @param[in] stride_levels   The level of strides.
  * @param[in] proc            Remote process ID (destination).
  *
  * @return                    Zero on success, error code otherwise.
  */
int PARMCI_AccS(int datatype, void *scale,
               void *src_ptr, int src_stride_ar[/*stride_levels*/],
               void *dst_ptr, int dst_stride_ar[/*stride_levels*/],
               int count[/*stride_levels+1*/], int stride_levels, int proc) {

  int err;

  if (ARMCII_GLOBAL_STATE.strided_method == ARMCII_STRIDED_DIRECT) {
    void         *src_buf = NULL;
    gmr_t *mreg, *gmr_loc = NULL;
    MPI_Datatype src_type, dst_type, mpi_datatype;
    int          scaled, mpi_datatype_size;

    ARMCII_Acc_type_translate(datatype, &mpi_datatype, &mpi_datatype_size);
    scaled = ARMCII_Buf_acc_is_scaled(datatype, scale);

    /* SCALE: copy and scale if requested */
    if (scaled) {
      armci_giov_t iov;
      int i, nelem;

      if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD)
        gmr_loc = gmr_lookup(src_ptr, ARMCI_GROUP_WORLD.rank);

      for (i = 1, nelem = count[0]/mpi_datatype_size; i < stride_levels+1; i++)
        nelem *= count[i];

      MPI_Alloc_mem(nelem*mpi_datatype_size, MPI_INFO_NULL, &src_buf);
      ARMCII_Assert(src_buf != NULL);

      if (gmr_loc != NULL) gmr_dla_lock(gmr_loc);

      /* Shoehorn the strided information into an IOV */
      ARMCII_Strided_to_iov(&iov, src_ptr, src_stride_ar, src_ptr, src_stride_ar, count, stride_levels);

      for (i = 0; i < iov.ptr_array_len; i++)
        ARMCII_Buf_acc_scale(iov.src_ptr_array[i], ((uint8_t*)src_buf) + i*iov.bytes, iov.bytes, datatype, scale);

      free(iov.src_ptr_array);
      free(iov.dst_ptr_array);

      if (gmr_loc != NULL) gmr_dla_unlock(gmr_loc);

      MPI_Type_contiguous(nelem, mpi_datatype, &src_type);
    }

    /* COPY: Guard shared buffers */
    else if (ARMCII_GLOBAL_STATE.shr_buf_method == ARMCII_SHR_BUF_COPY) {
      gmr_loc = gmr_lookup(src_ptr, ARMCI_GROUP_WORLD.rank);

      if (gmr_loc != NULL) {
        int i, nelem;

        for (i = 1, nelem = count[0]/mpi_datatype_size; i < stride_levels+1; i++)
          nelem *= count[i];

        MPI_Alloc_mem(nelem*mpi_datatype_size, MPI_INFO_NULL, &src_buf);
        ARMCII_Assert(src_buf != NULL);

        gmr_dla_lock(gmr_loc);
        armci_write_strided(src_ptr, stride_levels, src_stride_ar, count, src_buf);
        gmr_dla_unlock(gmr_loc);

        MPI_Type_contiguous(nelem, mpi_datatype, &src_type);
      }
    }

    /* NOGUARD: If src_buf hasn't been assigned to a copy, the strided source
     * buffer is going to be used directly. */
    if (src_buf == NULL) { 
        src_buf = src_ptr;
        ARMCII_Strided_to_dtype(src_stride_ar, count, stride_levels, mpi_datatype, &src_type);
    }

    ARMCII_Strided_to_dtype(dst_stride_ar, count, stride_levels, mpi_datatype, &dst_type);

    MPI_Type_commit(&src_type);
    MPI_Type_commit(&dst_type);

    int src_size, dst_size;

    MPI_Type_size(src_type, &src_size);
    MPI_Type_size(dst_type, &dst_size);

    ARMCII_Assert(src_size == dst_size);

    mreg = gmr_lookup(dst_ptr, proc);
    ARMCII_Assert_msg(mreg != NULL, "Invalid shared pointer");

    gmr_lock(mreg, proc);
    gmr_accumulate_typed(mreg, src_buf, 1, src_type, dst_ptr, 1, dst_type, proc);
    gmr_unlock(mreg, proc);

    MPI_Type_free(&src_type);
    MPI_Type_free(&dst_type);

    /* COPY/SCALE: Free temp buffer */
    if (src_buf != src_ptr)
      MPI_Free_mem(src_buf);

    err = 0;

  } else {
    armci_giov_t iov;

    ARMCII_Strided_to_iov(&iov, src_ptr, src_stride_ar, dst_ptr, dst_stride_ar, count, stride_levels);
    err = PARMCI_AccV(datatype, scale, &iov, 1, proc);

    free(iov.src_ptr_array);
    free(iov.dst_ptr_array);
  }

  return err;
}
Пример #8
0
/** Blocking operation that transfers data from the remote process to the
  * memory of the calling process.  The data transfer is strided and blocking.
  *
  * @param[in] src_ptr         Source starting address of the data block to put.
  * @param[in] src_stride_arr  Source array of stride distances in bytes.
  * @param[in] dst_ptr         Destination starting address to put data.
  * @param[in] dst_stride_ar   Destination array of stride distances in bytes.
  * @param[in] count           Block size in each dimension. count[0] should be the
  *                            number of bytes of contiguous data in leading dimension.
  * @param[in] stride_levels   The level of strides.
  * @param[in] proc            Remote process ID (destination).
  *
  * @return                    Zero on success, error code otherwise.
  */
int PARMCI_GetS(void *src_ptr, int src_stride_ar[/*stride_levels*/],
               void *dst_ptr, int dst_stride_ar[/*stride_levels*/], 
               int count[/*stride_levels+1*/], int stride_levels, int proc) {

  int err;

  if (ARMCII_GLOBAL_STATE.strided_method == ARMCII_STRIDED_DIRECT) {
    void         *dst_buf = NULL;
    gmr_t *mreg, *gmr_loc = NULL;
    MPI_Datatype src_type, dst_type;

    /* COPY: Guard shared buffers */
    if (ARMCII_GLOBAL_STATE.shr_buf_method == ARMCII_SHR_BUF_COPY) {
      gmr_loc = gmr_lookup(dst_ptr, ARMCI_GROUP_WORLD.rank);

      if (gmr_loc != NULL) {
        int i, size;

        for (i = 1, size = count[0]; i < stride_levels+1; i++)
          size *= count[i];

        MPI_Alloc_mem(size, MPI_INFO_NULL, &dst_buf);
        ARMCII_Assert(dst_buf != NULL);

        MPI_Type_contiguous(size, MPI_BYTE, &dst_type);
      }
    }

    /* NOGUARD: If dst_buf hasn't been assigned to a copy, the strided source
     * buffer is going to be used directly. */
    if (dst_buf == NULL) { 
        dst_buf = dst_ptr;
        ARMCII_Strided_to_dtype(dst_stride_ar, count, stride_levels, MPI_BYTE, &dst_type);
    }

    ARMCII_Strided_to_dtype(src_stride_ar, count, stride_levels, MPI_BYTE, &src_type);

    MPI_Type_commit(&src_type);
    MPI_Type_commit(&dst_type);

    mreg = gmr_lookup(src_ptr, proc);
    ARMCII_Assert_msg(mreg != NULL, "Invalid shared pointer");

    gmr_lock(mreg, proc);
    gmr_get_typed(mreg, src_ptr, 1, src_type, dst_buf, 1, dst_type, proc);
    gmr_unlock(mreg, proc);

    /* COPY: Finish the transfer */
    if (dst_buf != dst_ptr) {
      gmr_dla_lock(gmr_loc);
      armci_read_strided(dst_ptr, stride_levels, dst_stride_ar, count, dst_buf);
      gmr_dla_unlock(gmr_loc);
      MPI_Free_mem(dst_buf);
    }

    MPI_Type_free(&src_type);
    MPI_Type_free(&dst_type);

    err = 0;

  } else {
    armci_giov_t iov;

    ARMCII_Strided_to_iov(&iov, src_ptr, src_stride_ar, dst_ptr, dst_stride_ar, count, stride_levels);
    err = PARMCI_GetV(&iov, 1, proc);

    free(iov.src_ptr_array);
    free(iov.dst_ptr_array);
  }

  return err;
}
Пример #9
0
/** One-sided accumulate operation.
  *
  * @param[in] datatype ARMCI data type for the accumulate operation (see armci.h)
  * @param[in] scale    Pointer for a scalar of type datatype that will be used to
  *                     scale values in the source buffer
  * @param[in] src      Source address (remote)
  * @param[in] dst      Destination address (local)
  * @param[in] bytes    Number of bytes to transfer
  * @param[in] proc     Process id to target
  * @return             0 on success, non-zero on failure
  */
int ARMCI_Acc(int datatype, void *scale, void *src, void *dst, int bytes, int proc) {
  void  *src_buf;
  int    count, type_size, scaled, src_is_locked = 0;
  MPI_Datatype type;
  gmr_t *src_mreg, *dst_mreg;

  src_mreg = gmr_lookup(src, ARMCI_GROUP_WORLD.rank);
  dst_mreg = gmr_lookup(dst, proc);

  ARMCII_Assert_msg(dst_mreg != NULL, "Invalid remote pointer");

  /* Prepare the input data: Apply scaling if needed and acquire the DLA lock if
   * needed.  We hold the DLA lock if (src_buf == src && src_mreg != NULL). */

  scaled = ARMCII_Buf_acc_is_scaled(datatype, scale);

  if (src_mreg && ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) {
    gmr_dla_lock(src_mreg);
    src_is_locked = 1;
  }

  if (scaled) {
      MPI_Alloc_mem(bytes, MPI_INFO_NULL, &src_buf);
      ARMCII_Assert(src_buf != NULL);
      ARMCII_Buf_acc_scale(src, src_buf, bytes, datatype, scale);
  } else {
    src_buf = src;
  }

  /* Check if we need to copy: user requested it or same mem region */
  if (   (src_buf == src) /* buf_prepare didn't make a copy */
      && (ARMCII_GLOBAL_STATE.shr_buf_method == ARMCII_SHR_BUF_COPY || src_mreg == dst_mreg) )
  {
    MPI_Alloc_mem(bytes, MPI_INFO_NULL, &src_buf);
    ARMCII_Assert(src_buf != NULL);
    ARMCI_Copy(src, src_buf, bytes);
  }

  /* Unlock early if src_buf is a copy */
  if (src_buf != src && src_is_locked) {
    gmr_dla_unlock(src_mreg);
    src_is_locked = 0;
  }

  ARMCII_Acc_type_translate(datatype, &type, &type_size);
  count = bytes/type_size;

  ARMCII_Assert_msg(bytes % type_size == 0, 
      "Transfer size is not a multiple of the datatype size");

  /* TODO: Support a local accumulate operation more efficiently */

  gmr_lock(dst_mreg, proc);
  gmr_accumulate(dst_mreg, src_buf, dst, count, type, proc);
  gmr_unlock(dst_mreg, proc);

  if (src_is_locked) {
    gmr_dla_unlock(src_mreg);
    src_is_locked = 0;
  }

  if (src_buf != src)
    MPI_Free_mem(src_buf);

  return 0;
}