int ARMCI_AccV(int op, void *scale, armci_giov_t *darr, int len, int proc) { int ret; armci_profile_start_vector(darr, len, proc, ARMCI_PROF_ACCV); ret = PARMCI_AccV(op, scale, darr, len, proc); armci_profile_stop_vector(ARMCI_PROF_ACCV); return ret; }
int ARMCI_AccV(int op, void *scale, armci_giov_t * darr, int len, int proc) { int rval; static double stime, etime; stime = TIME(); rval = PARMCI_AccV(op, scale, darr, len, proc); etime = TIME(); ARMCI_AccV_t += etime - stime; return rval; }
int ARMCI_AccV(int datatype, void *scale, armci_giov_t *iov, int iov_len, int proc) { return PARMCI_AccV(datatype, scale, iov, iov_len, proc); }
/** Blocking operation that accumulates data from the local process into the * memory of the remote process. The data transfer is strided and blocking. * * @param[in] datatype Type of data to be transferred. * @param[in] scale Pointer to the value that input data should be scaled by. * @param[in] src_ptr Source starting address of the data block to put. * @param[in] src_stride_arr Source array of stride distances in bytes. * @param[in] dst_ptr Destination starting address to put data. * @param[in] dst_stride_ar Destination array of stride distances in bytes. * @param[in] count Block size in each dimension. count[0] should be the * number of bytes of contiguous data in leading dimension. * @param[in] stride_levels The level of strides. * @param[in] proc Remote process ID (destination). * * @return Zero on success, error code otherwise. */ int PARMCI_AccS(int datatype, void *scale, void *src_ptr, int src_stride_ar[/*stride_levels*/], void *dst_ptr, int dst_stride_ar[/*stride_levels*/], int count[/*stride_levels+1*/], int stride_levels, int proc) { int err; if (ARMCII_GLOBAL_STATE.strided_method == ARMCII_STRIDED_DIRECT) { void *src_buf = NULL; gmr_t *mreg, *gmr_loc = NULL; MPI_Datatype src_type, dst_type, mpi_datatype; int scaled, mpi_datatype_size; ARMCII_Acc_type_translate(datatype, &mpi_datatype, &mpi_datatype_size); scaled = ARMCII_Buf_acc_is_scaled(datatype, scale); /* SCALE: copy and scale if requested */ if (scaled) { armci_giov_t iov; int i, nelem; if (ARMCII_GLOBAL_STATE.shr_buf_method != ARMCII_SHR_BUF_NOGUARD) gmr_loc = gmr_lookup(src_ptr, ARMCI_GROUP_WORLD.rank); for (i = 1, nelem = count[0]/mpi_datatype_size; i < stride_levels+1; i++) nelem *= count[i]; MPI_Alloc_mem(nelem*mpi_datatype_size, MPI_INFO_NULL, &src_buf); ARMCII_Assert(src_buf != NULL); if (gmr_loc != NULL) gmr_dla_lock(gmr_loc); /* Shoehorn the strided information into an IOV */ ARMCII_Strided_to_iov(&iov, src_ptr, src_stride_ar, src_ptr, src_stride_ar, count, stride_levels); for (i = 0; i < iov.ptr_array_len; i++) ARMCII_Buf_acc_scale(iov.src_ptr_array[i], ((uint8_t*)src_buf) + i*iov.bytes, iov.bytes, datatype, scale); free(iov.src_ptr_array); free(iov.dst_ptr_array); if (gmr_loc != NULL) gmr_dla_unlock(gmr_loc); MPI_Type_contiguous(nelem, mpi_datatype, &src_type); } /* COPY: Guard shared buffers */ else if (ARMCII_GLOBAL_STATE.shr_buf_method == ARMCII_SHR_BUF_COPY) { gmr_loc = gmr_lookup(src_ptr, ARMCI_GROUP_WORLD.rank); if (gmr_loc != NULL) { int i, nelem; for (i = 1, nelem = count[0]/mpi_datatype_size; i < stride_levels+1; i++) nelem *= count[i]; MPI_Alloc_mem(nelem*mpi_datatype_size, MPI_INFO_NULL, &src_buf); ARMCII_Assert(src_buf != NULL); gmr_dla_lock(gmr_loc); armci_write_strided(src_ptr, stride_levels, src_stride_ar, count, src_buf); gmr_dla_unlock(gmr_loc); MPI_Type_contiguous(nelem, mpi_datatype, &src_type); } } /* NOGUARD: If src_buf hasn't been assigned to a copy, the strided source * buffer is going to be used directly. */ if (src_buf == NULL) { src_buf = src_ptr; ARMCII_Strided_to_dtype(src_stride_ar, count, stride_levels, mpi_datatype, &src_type); } ARMCII_Strided_to_dtype(dst_stride_ar, count, stride_levels, mpi_datatype, &dst_type); MPI_Type_commit(&src_type); MPI_Type_commit(&dst_type); int src_size, dst_size; MPI_Type_size(src_type, &src_size); MPI_Type_size(dst_type, &dst_size); ARMCII_Assert(src_size == dst_size); mreg = gmr_lookup(dst_ptr, proc); ARMCII_Assert_msg(mreg != NULL, "Invalid shared pointer"); gmr_lock(mreg, proc); gmr_accumulate_typed(mreg, src_buf, 1, src_type, dst_ptr, 1, dst_type, proc); gmr_unlock(mreg, proc); MPI_Type_free(&src_type); MPI_Type_free(&dst_type); /* COPY/SCALE: Free temp buffer */ if (src_buf != src_ptr) MPI_Free_mem(src_buf); err = 0; } else { armci_giov_t iov; ARMCII_Strided_to_iov(&iov, src_ptr, src_stride_ar, dst_ptr, dst_stride_ar, count, stride_levels); err = PARMCI_AccV(datatype, scale, &iov, 1, proc); free(iov.src_ptr_array); free(iov.dst_ptr_array); } return err; }
int PARMCI_NbAccV(int datatype, void *scale, armci_giov_t *iov, int iov_len, int proc, armci_hdl_t* handle) { return PARMCI_AccV(datatype, scale, iov, iov_len, proc); }