Ejemplo n.º 1
0
/** Blocking operation that transfers data from the remote process to the
  * memory of the calling process.  The data transfer is strided and blocking.
  *
  * @param[in] src_ptr         Source starting address of the data block to put.
  * @param[in] src_stride_arr  Source array of stride distances in bytes.
  * @param[in] dst_ptr         Destination starting address to put data.
  * @param[in] dst_stride_ar   Destination array of stride distances in bytes.
  * @param[in] count           Block size in each dimension. count[0] should be the
  *                            number of bytes of contiguous data in leading dimension.
  * @param[in] stride_levels   The level of strides.
  * @param[in] proc            Remote process ID (destination).
  *
  * @return                    Zero on success, error code otherwise.
  */
int PARMCI_GetS(void *src_ptr, int src_stride_ar[/*stride_levels*/],
               void *dst_ptr, int dst_stride_ar[/*stride_levels*/], 
               int count[/*stride_levels+1*/], int stride_levels, int proc) {

  int err;

  if (ARMCII_GLOBAL_STATE.strided_method == ARMCII_STRIDED_DIRECT) {
    void         *dst_buf = NULL;
    gmr_t *mreg, *gmr_loc = NULL;
    MPI_Datatype src_type, dst_type;

    /* COPY: Guard shared buffers */
    if (ARMCII_GLOBAL_STATE.shr_buf_method == ARMCII_SHR_BUF_COPY) {
      gmr_loc = gmr_lookup(dst_ptr, ARMCI_GROUP_WORLD.rank);

      if (gmr_loc != NULL) {
        int i, size;

        for (i = 1, size = count[0]; i < stride_levels+1; i++)
          size *= count[i];

        MPI_Alloc_mem(size, MPI_INFO_NULL, &dst_buf);
        ARMCII_Assert(dst_buf != NULL);

        MPI_Type_contiguous(size, MPI_BYTE, &dst_type);
      }
    }

    /* NOGUARD: If dst_buf hasn't been assigned to a copy, the strided source
     * buffer is going to be used directly. */
    if (dst_buf == NULL) { 
        dst_buf = dst_ptr;
        ARMCII_Strided_to_dtype(dst_stride_ar, count, stride_levels, MPI_BYTE, &dst_type);
    }

    ARMCII_Strided_to_dtype(src_stride_ar, count, stride_levels, MPI_BYTE, &src_type);

    MPI_Type_commit(&src_type);
    MPI_Type_commit(&dst_type);

    mreg = gmr_lookup(src_ptr, proc);
    ARMCII_Assert_msg(mreg != NULL, "Invalid shared pointer");

    gmr_lock(mreg, proc);
    gmr_get_typed(mreg, src_ptr, 1, src_type, dst_buf, 1, dst_type, proc);
    gmr_unlock(mreg, proc);

    /* COPY: Finish the transfer */
    if (dst_buf != dst_ptr) {
      gmr_dla_lock(gmr_loc);
      armci_read_strided(dst_ptr, stride_levels, dst_stride_ar, count, dst_buf);
      gmr_dla_unlock(gmr_loc);
      MPI_Free_mem(dst_buf);
    }

    MPI_Type_free(&src_type);
    MPI_Type_free(&dst_type);

    err = 0;

  } else {
    armci_giov_t iov;

    ARMCII_Strided_to_iov(&iov, src_ptr, src_stride_ar, dst_ptr, dst_stride_ar, count, stride_levels);
    err = PARMCI_GetV(&iov, 1, proc);

    free(iov.src_ptr_array);
    free(iov.dst_ptr_array);
  }

  return err;
}
Ejemplo n.º 2
0
/** One-sided get operation.  Destination buffer must be private.
  *
  * @param[in] mreg   Memory region
  * @param[in] src    Source address (remote)
  * @param[in] dst    Destination address (local)
  * @param[in] size   Number of bytes to transfer
  * @param[in] proc   Absolute process id of target process
  * @return           0 on success, non-zero on failure
  */
int gmr_get(gmr_t *mreg, void *src, void *dst, int size, int proc) {
  ARMCII_Assert_msg(dst != NULL, "Invalid local address");
  return gmr_get_typed(mreg, src, size, MPI_BYTE, dst, size, MPI_BYTE, proc);
}
Ejemplo n.º 3
0
/** Optimized implementation of the ARMCI IOV operation that uses an MPI
  * datatype to achieve a one-sided gather/scatter.  Does not use MPI_BOTTOM.
  */
int ARMCII_Iov_op_datatype_no_bottom(enum ARMCII_Op_e op, void **src, void **dst, int count, int elem_count,
    MPI_Datatype type, int proc) {

    gmr_t *mreg;
    MPI_Datatype  type_loc, type_rem;
    MPI_Aint      disp_loc[count];
    int           disp_rem[count];
    int           block_len[count];
    void         *dst_win_base;
    int           dst_win_size, i, type_size;
    void        **buf_rem, **buf_loc;
    MPI_Aint      base_rem;
    MPI_Aint      base_loc;
    void         *base_loc_ptr;

    switch(op) {
      case ARMCII_OP_ACC:
      case ARMCII_OP_PUT:
        buf_rem = dst;
        buf_loc = src;
        break;
      case ARMCII_OP_GET:
        buf_rem = src;
        buf_loc = dst;
        break;
      default:
        ARMCII_Error("unknown operation (%d)", op);
        return 1;
    }

    MPI_Type_size(type, &type_size);

    mreg = gmr_lookup(buf_rem[0], proc);
    ARMCII_Assert_msg(mreg != NULL, "Invalid remote pointer");

    dst_win_base = mreg->slices[proc].base;
    dst_win_size = mreg->slices[proc].size;

    MPI_Get_address(dst_win_base, &base_rem);

    /* Pick a base address for the start of the origin's datatype */
    base_loc_ptr = buf_loc[0];
    MPI_Get_address(base_loc_ptr, &base_loc);

    for (i = 0; i < count; i++) {
      MPI_Aint target_rem, target_loc;
      MPI_Get_address(buf_loc[i], &target_loc);
      MPI_Get_address(buf_rem[i], &target_rem);
      disp_loc[i]  =  target_loc - base_loc;
      disp_rem[i]  = (target_rem - base_rem)/type_size;
      block_len[i] = elem_count;

      ARMCII_Assert_msg((target_rem - base_rem) % type_size == 0, "Transfer size is not a multiple of type size");
      ARMCII_Assert_msg(disp_rem[i] >= 0 && disp_rem[i] < dst_win_size, "Invalid remote pointer");
      ARMCII_Assert_msg(((uint8_t*)buf_rem[i]) + block_len[i] <= ((uint8_t*)dst_win_base) + dst_win_size, "Transfer exceeds buffer length");
    }

    MPI_Type_create_hindexed(count, block_len, disp_loc, type, &type_loc);
    MPI_Type_create_indexed_block(count, elem_count, disp_rem, type, &type_rem);
    //MPI_Type_indexed(count, block_len, disp_rem, type, &type_rem);

    MPI_Type_commit(&type_loc);
    MPI_Type_commit(&type_rem);

    gmr_lock(mreg, proc);

    switch(op) {
      case ARMCII_OP_ACC:
        gmr_accumulate_typed(mreg, base_loc_ptr, 1, type_loc, MPI_BOTTOM, 1, type_rem, proc);
        break;
      case ARMCII_OP_PUT:
        gmr_put_typed(mreg, base_loc_ptr, 1, type_loc, MPI_BOTTOM, 1, type_rem, proc);
        break;
      case ARMCII_OP_GET:
        gmr_get_typed(mreg, MPI_BOTTOM, 1, type_rem, base_loc_ptr, 1, type_loc, proc);
        break;
      default:
        ARMCII_Error("unknown operation (%d)", op);
        return 1;
    }

    gmr_unlock(mreg, proc);

    MPI_Type_free(&type_loc);
    MPI_Type_free(&type_rem);

    return 0;
}