void MPIDI_CH3_Request_destroy(MPID_Request * req) { MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_REQUEST_DESTROY); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_REQUEST_DESTROY); MPIU_DBG_MSG_P(CH3_CHANNEL,VERBOSE, "freeing request, handle=0x%08x", req->handle); #ifdef MPICH_DBG_OUTPUT /*MPIU_Assert(HANDLE_GET_MPI_KIND(req->handle) == MPID_REQUEST);*/ if (HANDLE_GET_MPI_KIND(req->handle) != MPID_REQUEST) { int mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**invalid_handle", "**invalid_handle %d", req->handle); MPID_Abort(MPIR_Process.comm_world, mpi_errno, -1, NULL); } /* XXX DJG FIXME should we be checking this? */ /*MPIU_Assert(req->ref_count == 0);*/ if (req->ref_count != 0) { int mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**invalid_refcount", "**invalid_refcount %d", req->ref_count); MPID_Abort(MPIR_Process.comm_world, mpi_errno, -1, NULL); } #endif /* FIXME: We need a better way to handle these so that we do not always need to initialize these fields and check them when we destroy a request */ /* FIXME: We need a way to call these routines ONLY when the related ref count has become zero. */ if (req->comm != NULL) { MPIR_Comm_release(req->comm, 0); } if (req->greq_fns != NULL) { MPIU_Free(req->greq_fns); } if (req->dev.datatype_ptr != NULL) { MPID_Datatype_release(req->dev.datatype_ptr); } if (req->dev.segment_ptr != NULL) { MPID_Segment_free(req->dev.segment_ptr); } if (MPIDI_Request_get_srbuf_flag(req)) { MPIDI_CH3U_SRBuf_free(req); } MPIU_Handle_obj_free(&MPID_Request_mem, req); MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_REQUEST_DESTROY); }
int MPIDI_CH3U_Request_load_recv_iov(MPID_Request * const rreq) { MPI_Aint last; int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_RECV_IOV); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_RECV_IOV); if (rreq->dev.segment_first < rreq->dev.segment_size) { /* still reading data that needs to go into the user buffer */ if (MPIDI_Request_get_type(rreq) != MPIDI_REQUEST_TYPE_ACCUM_RECV && MPIDI_Request_get_type(rreq) != MPIDI_REQUEST_TYPE_GET_ACCUM_RECV && MPIDI_Request_get_srbuf_flag(rreq)) { MPIDI_msg_sz_t data_sz; MPIDI_msg_sz_t tmpbuf_sz; /* Once a SRBuf is in use, we continue to use it since a small amount of data may already be present at the beginning of the buffer. This data is left over from the previous unpack, most like a result of alignment issues. NOTE: we could force the use of the SRBuf only when (rreq->dev.tmpbuf_off > 0)... */ data_sz = rreq->dev.segment_size - rreq->dev.segment_first - rreq->dev.tmpbuf_off; MPIU_Assert(data_sz > 0); tmpbuf_sz = rreq->dev.tmpbuf_sz - rreq->dev.tmpbuf_off; if (data_sz > tmpbuf_sz) { data_sz = tmpbuf_sz; } rreq->dev.iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)((char *) rreq->dev.tmpbuf + rreq->dev.tmpbuf_off); rreq->dev.iov[0].MPID_IOV_LEN = data_sz; rreq->dev.iov_offset = 0; rreq->dev.iov_count = 1; MPIU_Assert(rreq->dev.segment_first + data_sz + rreq->dev.tmpbuf_off <= rreq->dev.recv_data_sz); if (rreq->dev.segment_first + data_sz + rreq->dev.tmpbuf_off == rreq->dev.recv_data_sz) { MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE, "updating rreq to read the remaining data into the SRBuf"); rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_UnpackSRBufComplete; } else { MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE, "updating rreq to read more data into the SRBuf"); rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_UnpackSRBufReloadIOV; } goto fn_exit; } last = rreq->dev.segment_size; rreq->dev.iov_count = MPID_IOV_LIMIT; rreq->dev.iov_offset = 0; MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST, "pre-upv: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT ", iov_n=%d", rreq->dev.segment_first, last, rreq->dev.iov_count)); MPIU_Assert(rreq->dev.segment_first < last); MPIU_Assert(last > 0); MPID_Segment_unpack_vector(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, &rreq->dev.iov[0], &rreq->dev.iov_count); MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST, "post-upv: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT ", iov_n=%d, iov_offset=%lld", rreq->dev.segment_first, last, rreq->dev.iov_count, (long long)rreq->dev.iov_offset)); MPIU_Assert(rreq->dev.iov_count >= 0 && rreq->dev.iov_count <= MPID_IOV_LIMIT); /* --BEGIN ERROR HANDLING-- */ if (rreq->dev.iov_count == 0) { /* If the data can't be unpacked, the we have a mis-match between the datatype and the amount of data received. Adjust the segment info so that the remaining data is received and thrown away. */ rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE, "**dtypemismatch", 0); MPIR_STATUS_SET_COUNT(rreq->status, rreq->dev.segment_first); rreq->dev.segment_size = rreq->dev.segment_first; mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq); goto fn_exit; } else { MPIU_Assert(rreq->dev.iov_offset < rreq->dev.iov_count); } /* --END ERROR HANDLING-- */ if (last == rreq->dev.recv_data_sz) { MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE, "updating rreq to read the remaining data directly into the user buffer"); /* Eventually, use OnFinal for this instead */ rreq->dev.OnDataAvail = rreq->dev.OnFinal; } else if (MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_ACCUM_RECV || MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_GET_ACCUM_RECV || (last == rreq->dev.segment_size || (last - rreq->dev.segment_first) / rreq->dev.iov_count >= MPIDI_IOV_DENSITY_MIN)) { MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE, "updating rreq to read more data directly into the user buffer"); rreq->dev.segment_first = last; rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_ReloadIOV; } else { /* Too little data would have been received using an IOV. We will start receiving data into a SRBuf and unpacking it later. */ MPIU_Assert(MPIDI_Request_get_srbuf_flag(rreq) == FALSE); MPIDI_CH3U_SRBuf_alloc(rreq, rreq->dev.segment_size - rreq->dev.segment_first); rreq->dev.tmpbuf_off = 0; /* --BEGIN ERROR HANDLING-- */ if (rreq->dev.tmpbuf_sz == 0) { /* FIXME - we should drain the data off the pipe here, but we don't have a buffer to drain it into. should this be a fatal error? */ MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"SRBuf allocation failure"); mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", "**nomem %d", rreq->dev.segment_size - rreq->dev.segment_first); rreq->status.MPI_ERROR = mpi_errno; goto fn_exit; } /* --END ERROR HANDLING-- */ /* fill in the IOV using a recursive call */ mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq); } } else { /* receive and toss any extra data that does not fit in the user's buffer */ MPIDI_msg_sz_t data_sz; data_sz = rreq->dev.recv_data_sz - rreq->dev.segment_first; if (!MPIDI_Request_get_srbuf_flag(rreq)) { MPIDI_CH3U_SRBuf_alloc(rreq, data_sz); /* --BEGIN ERROR HANDLING-- */ if (rreq->dev.tmpbuf_sz == 0) { MPIU_DBG_MSG(CH3_CHANNEL,TYPICAL,"SRBuf allocation failure"); mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0); rreq->status.MPI_ERROR = mpi_errno; goto fn_exit; } /* --END ERROR HANDLING-- */ } if (data_sz <= rreq->dev.tmpbuf_sz) { MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE, "updating rreq to read overflow data into the SRBuf and complete"); rreq->dev.iov[0].MPID_IOV_LEN = data_sz; MPIU_Assert(MPIDI_Request_get_type(rreq) == MPIDI_REQUEST_TYPE_RECV); /* Eventually, use OnFinal for this instead */ rreq->dev.OnDataAvail = rreq->dev.OnFinal; } else { MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE, "updating rreq to read overflow data into the SRBuf and reload IOV"); rreq->dev.iov[0].MPID_IOV_LEN = rreq->dev.tmpbuf_sz; rreq->dev.segment_first += rreq->dev.tmpbuf_sz; rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_ReloadIOV; } rreq->dev.iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)rreq->dev.tmpbuf; rreq->dev.iov_count = 1; } fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_RECV_IOV); return mpi_errno; }
int MPIDI_CH3U_Request_load_send_iov(MPID_Request * const sreq, MPID_IOV * const iov, int * const iov_n) { MPI_Aint last; int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_SEND_IOV); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_SEND_IOV); MPIU_Assert(sreq->dev.segment_ptr != NULL); last = sreq->dev.segment_size; MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST, "pre-pv: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT ", iov_n=%d", sreq->dev.segment_first, last, *iov_n)); MPIU_Assert(sreq->dev.segment_first < last); MPIU_Assert(last > 0); MPIU_Assert(*iov_n > 0 && *iov_n <= MPID_IOV_LIMIT); MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, iov, iov_n); MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST, "post-pv: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT ", iov_n=%d", sreq->dev.segment_first, last, *iov_n)); MPIU_Assert(*iov_n > 0 && *iov_n <= MPID_IOV_LIMIT); if (last == sreq->dev.segment_size) { MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"remaining data loaded into IOV"); sreq->dev.OnDataAvail = sreq->dev.OnFinal; } else if ((last - sreq->dev.segment_first) / *iov_n >= MPIDI_IOV_DENSITY_MIN) { MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"more data loaded into IOV"); sreq->dev.segment_first = last; sreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_SendReloadIOV; } else { MPIDI_msg_sz_t data_sz; int i, iov_data_copied; MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"low density. using SRBuf."); data_sz = sreq->dev.segment_size - sreq->dev.segment_first; if (!MPIDI_Request_get_srbuf_flag(sreq)) { MPIDI_CH3U_SRBuf_alloc(sreq, data_sz); /* --BEGIN ERROR HANDLING-- */ if (sreq->dev.tmpbuf_sz == 0) { MPIU_DBG_MSG(CH3_CHANNEL,TYPICAL,"SRBuf allocation failure"); mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", "**nomem %d", data_sz); sreq->status.MPI_ERROR = mpi_errno; goto fn_exit; } /* --END ERROR HANDLING-- */ } iov_data_copied = 0; for (i = 0; i < *iov_n; i++) { MPIU_Memcpy((char*) sreq->dev.tmpbuf + iov_data_copied, iov[i].MPID_IOV_BUF, iov[i].MPID_IOV_LEN); iov_data_copied += iov[i].MPID_IOV_LEN; } sreq->dev.segment_first = last; last = (data_sz <= sreq->dev.tmpbuf_sz - iov_data_copied) ? sreq->dev.segment_size : sreq->dev.segment_first + sreq->dev.tmpbuf_sz - iov_data_copied; MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST, "pre-pack: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT, sreq->dev.segment_first, last)); MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, (char*) sreq->dev.tmpbuf + iov_data_copied); MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST, "post-pack: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT, sreq->dev.segment_first, last)); iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)sreq->dev.tmpbuf; iov[0].MPID_IOV_LEN = last - sreq->dev.segment_first + iov_data_copied; *iov_n = 1; if (last == sreq->dev.segment_size) { MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"remaining data packed into SRBuf"); sreq->dev.OnDataAvail = sreq->dev.OnFinal; } else { MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"more data packed into SRBuf"); sreq->dev.segment_first = last; sreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_SendReloadIOV; } } fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_SEND_IOV); return mpi_errno; }
int MPID_nem_send_iov(MPIDI_VC_t *vc, MPIR_Request **sreq_ptr, MPL_IOV *iov, int n_iov) { int mpi_errno = MPI_SUCCESS; intptr_t data_sz; int i; int iov_data_copied; MPIR_Request *sreq = *sreq_ptr; MPL_IOV *data_iov = &iov[1]; /* iov of just the data, not the header */ int data_n_iov = n_iov - 1; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_SEND_IOV); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_SEND_IOV); if (*sreq_ptr == NULL) { /* create a request */ sreq = MPIR_Request_create(MPIR_REQUEST_KIND__UNDEFINED); MPIR_Assert(sreq != NULL); MPIR_Object_set_ref(sreq, 2); sreq->kind = MPIR_REQUEST_KIND__SEND; sreq->dev.OnDataAvail = 0; } data_sz = 0; for (i = 0; i < data_n_iov; ++i) data_sz += data_iov[i].MPL_IOV_LEN; if (!MPIDI_Request_get_srbuf_flag(sreq)) { MPIDI_CH3U_SRBuf_alloc(sreq, data_sz); /* --BEGIN ERROR HANDLING-- */ if (sreq->dev.tmpbuf_sz == 0) { MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL,TYPICAL,"SRBuf allocation failure"); mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0); sreq->status.MPI_ERROR = mpi_errno; goto fn_exit; } /* --END ERROR HANDLING-- */ } MPIR_Assert(sreq->dev.tmpbuf_sz >= data_sz); iov_data_copied = 0; for (i = 0; i < data_n_iov; ++i) { MPIR_Memcpy((char*) sreq->dev.tmpbuf + iov_data_copied, data_iov[i].MPL_IOV_BUF, data_iov[i].MPL_IOV_LEN); iov_data_copied += data_iov[i].MPL_IOV_LEN; } mpi_errno = vc->ch.iSendContig(vc, sreq, iov[0].MPL_IOV_BUF, iov[0].MPL_IOV_LEN, sreq->dev.tmpbuf, data_sz); if (mpi_errno) MPIR_ERR_POP(mpi_errno); *sreq_ptr = sreq; fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_SEND_IOV); return mpi_errno; fn_fail: goto fn_exit; }