int ARMCI_PutV(armci_giov_t *darr, int len, int proc) { int ret; armci_profile_start_vector(darr, len, proc, ARMCI_PROF_PUTV); ret = PARMCI_PutV(darr, len, proc); armci_profile_stop_vector(ARMCI_PROF_PUTV); return ret; }
int ARMCI_PutV(armci_giov_t * darr, int len, int proc) { int rval; static double stime, etime; stime = TIME(); rval = PARMCI_PutV(darr, len, proc); etime = TIME(); ARMCI_PutV_t += etime - stime; return rval; }
int ARMCI_PutV(armci_giov_t *iov, int iov_len, int proc) { return PARMCI_PutV(iov, iov_len, proc); }
/** Blocking operation that transfers data from the calling process to the * memory of the remote process. The data transfer is strided and blocking. * * @param[in] src_ptr Source starting address of the data block to put. * @param[in] src_stride_arr Source array of stride distances in bytes. * @param[in] dst_ptr Destination starting address to put data. * @param[in] dst_stride_ar Destination array of stride distances in bytes. * @param[in] count Block size in each dimension. count[0] should be the * number of bytes of contiguous data in leading dimension. * @param[in] stride_levels The level of strides. * @param[in] proc Remote process ID (destination). * * @return Zero on success, error code otherwise. */ int PARMCI_PutS(void *src_ptr, int src_stride_ar[/*stride_levels*/], void *dst_ptr, int dst_stride_ar[/*stride_levels*/], int count[/*stride_levels+1*/], int stride_levels, int proc) { int err; if (ARMCII_GLOBAL_STATE.strided_method == ARMCII_STRIDED_DIRECT) { void *src_buf = NULL; gmr_t *mreg, *gmr_loc = NULL; MPI_Datatype src_type, dst_type; /* COPY: Guard shared buffers */ if (ARMCII_GLOBAL_STATE.shr_buf_method == ARMCII_SHR_BUF_COPY) { gmr_loc = gmr_lookup(src_ptr, ARMCI_GROUP_WORLD.rank); if (gmr_loc != NULL) { int i, size; for (i = 1, size = count[0]; i < stride_levels+1; i++) size *= count[i]; MPI_Alloc_mem(size, MPI_INFO_NULL, &src_buf); ARMCII_Assert(src_buf != NULL); gmr_dla_lock(gmr_loc); armci_write_strided(src_ptr, stride_levels, src_stride_ar, count, src_buf); gmr_dla_unlock(gmr_loc); MPI_Type_contiguous(size, MPI_BYTE, &src_type); } } /* NOGUARD: If src_buf hasn't been assigned to a copy, the strided source * buffer is going to be used directly. */ if (src_buf == NULL) { src_buf = src_ptr; ARMCII_Strided_to_dtype(src_stride_ar, count, stride_levels, MPI_BYTE, &src_type); } ARMCII_Strided_to_dtype(dst_stride_ar, count, stride_levels, MPI_BYTE, &dst_type); MPI_Type_commit(&src_type); MPI_Type_commit(&dst_type); mreg = gmr_lookup(dst_ptr, proc); ARMCII_Assert_msg(mreg != NULL, "Invalid shared pointer"); gmr_lock(mreg, proc); gmr_put_typed(mreg, src_buf, 1, src_type, dst_ptr, 1, dst_type, proc); gmr_unlock(mreg, proc); MPI_Type_free(&src_type); MPI_Type_free(&dst_type); /* COPY: Free temporary buffer */ if (src_buf != src_ptr) MPI_Free_mem(src_buf); err = 0; } else { armci_giov_t iov; ARMCII_Strided_to_iov(&iov, src_ptr, src_stride_ar, dst_ptr, dst_stride_ar, count, stride_levels); err = PARMCI_PutV(&iov, 1, proc); free(iov.src_ptr_array); free(iov.dst_ptr_array); } return err; }
void armci_agg_complete(armci_ihdl_t nb_handle, int condition) { int i, index=0, rc; /* get the buffer index for this handle */ for(i=ulist.size-1; i>=0; i--) { index = ulist.index[i]; if(aggr[index]->tag == nb_handle->tag && aggr[index]->proc == nb_handle->proc) break; } if(i<0) return; /* implies this handle has no requests at all */ #if 0 printf("%d: Aggregation Complete to remote process %d (%d:%d requests)\n", armci_me, nb_handle->proc, index, aggr[index]->request_len); #endif /* complete the data transfer. NOTE: in LAPI, Non-blocking calls (followed by wait) performs better than blocking put/get */ if(aggr[index]->request_len) { switch(nb_handle->op) { #ifdef LAPI armci_hdl_t usr_hdl; case PUT: ARMCI_INIT_HANDLE(&usr_hdl); if((rc=PARMCI_NbPutV(aggr[index]->darr, aggr[index]->request_len, nb_handle->proc, (armci_hdl_t*)&usr_hdl))) ARMCI_Error("armci_agg_complete: nbputv failed",rc); PARMCI_Wait((armci_hdl_t*)&usr_hdl); break; case GET: ARMCI_INIT_HANDLE(&usr_hdl); if((rc=PARMCI_NbGetV(aggr[index]->darr, aggr[index]->request_len, nb_handle->proc, (armci_hdl_t*)&usr_hdl))) ARMCI_Error("armci_agg_complete: nbgetv failed",rc); PARMCI_Wait((armci_hdl_t*)&usr_hdl); break; #else case PUT: if((rc=PARMCI_PutV(aggr[index]->darr, aggr[index]->request_len, nb_handle->proc))) ARMCI_Error("armci_agg_complete: putv failed",rc); break; case GET: if((rc=PARMCI_GetV(aggr[index]->darr, aggr[index]->request_len, nb_handle->proc))) ARMCI_Error("armci_agg_complete: getv failed",rc); break; #endif } } /* setting request length to zero, as the requests are completed */ aggr[index]->request_len = 0; aggr[index]->ptr_array_len = 0; aggr[index]->buf_pos_end = _MAX_AGG_BUFSIZE; /* If armci_agg_complete() is called PARMCI_Wait(), then unset nb_handle*/ if(condition==UNSET) { nb_handle->proc = -1; _armci_agg_update_lists(index); } }
int PARMCI_NbPutV(armci_giov_t *iov, int iov_len, int proc, armci_hdl_t* handle) { return PARMCI_PutV(iov, iov_len, proc); }