int MPIDI_CH3_Rendezvous_unpack_data(MPIDI_VC_t *vc, MPID_Request *rreq) { /* If we are using datatype, then need to unpack data from tmpbuf */ int iter = 0; int copied = 0; int mpi_errno = MPI_SUCCESS; int complete; uintptr_t buf = (uintptr_t) rreq->mrail.rndv_buf; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_RNDV_UNPACK_DATA); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_RNDV_UNPACK_DATA); for (iter=0; iter < rreq->dev.iov_count; ++iter) { MPIU_Memcpy(rreq->dev.iov[iter].MPID_IOV_BUF, (void *) buf, rreq->dev.iov[iter].MPID_IOV_LEN); buf += rreq->dev.iov[iter].MPID_IOV_LEN; copied += rreq->dev.iov[iter].MPID_IOV_LEN; } MPIDI_CH3I_Request_adjust_iov(rreq, copied); while (rreq->dev.OnDataAvail == MPIDI_CH3_ReqHandler_UnpackSRBufReloadIOV || rreq->dev.OnDataAvail == MPIDI_CH3_ReqHandler_ReloadIOV) { /* XXX: dev.ca should only be CA_COMPLETE? */ /* end of XXX */ mpi_errno = MPIDI_CH3U_Handle_recv_req(vc, rreq, &complete); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS || complete == TRUE) { mpi_errno = MPIR_Err_create_code( mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0); goto fn_exit; } copied = 0; for (iter = 0; iter < rreq->dev.iov_count; ++iter) { MPIU_Memcpy(rreq->dev.iov[iter].MPID_IOV_BUF, (void *) buf, rreq->dev.iov[iter].MPID_IOV_LEN); buf += rreq->dev.iov[iter].MPID_IOV_LEN; copied += rreq->dev.iov[iter].MPID_IOV_LEN; } MPIDI_CH3I_Request_adjust_iov(rreq, copied); } fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_RNDV_UNPACK_DATA); return mpi_errno; }
void MPIDI_WinAtomicCB(pami_context_t context, void * cookie, const void * _hdr, size_t size, const void * sndbuf, size_t sndlen, pami_endpoint_t sender, pami_recv_t * recv) { MPIDI_AtomicHeader_t *ahdr = (MPIDI_AtomicHeader_t *) _hdr; MPID_assert (ahdr != NULL); MPID_assert (sizeof(MPIDI_AtomicHeader_t) == size); MPIDI_AtomicHeader_t ack_hdr = *ahdr; void *dest_addr = ahdr->remote_addr; int len; len = MPID_Datatype_get_basic_size (ahdr->datatype); if (ahdr->atomic_type == MPIDI_WIN_REQUEST_COMPARE_AND_SWAP) { //overwrite value with result in ack_hdr MPIU_Memcpy(ack_hdr.buf, dest_addr, len); if (MPIR_Compare_equal (&ahdr->test, dest_addr, ahdr->datatype)) MPIU_Memcpy(dest_addr, ahdr->buf, len); } else if (ahdr->atomic_type == MPIDI_WIN_REQUEST_FETCH_AND_OP) { //overwrite value with result MPIU_Memcpy(ack_hdr.buf, dest_addr, len); MPI_User_function *uop; int one = 1; uop = MPIR_OP_HDL_TO_FN(ahdr->op); if (ahdr->op == MPI_REPLACE) MPIU_Memcpy(dest_addr, ahdr->buf, len); else if (ahdr->op == MPI_NO_OP); else (*uop) ((void *)ahdr->buf, dest_addr, &one, &ahdr->datatype); } else MPID_abort(); pami_send_immediate_t params = { .dispatch = MPIDI_Protocols_WinAtomicAck, .dest = sender, .header = { .iov_base = &ack_hdr, .iov_len = sizeof(MPIDI_AtomicHeader_t), }, .data = { .iov_base = NULL, .iov_len = 0, }, .hints = {0},
void MPIDI_Datatype_get_contents_types(MPID_Datatype_contents *cp, MPI_Datatype *user_types) { char *ptr; int align_sz = 8, epsilon; int struct_sz; #ifdef HAVE_MAX_STRUCT_ALIGNMENT if (align_sz > HAVE_MAX_STRUCT_ALIGNMENT) { align_sz = HAVE_MAX_STRUCT_ALIGNMENT; } #endif struct_sz = sizeof(MPID_Datatype_contents); /* pad the struct, types, and ints before we allocate. * * note: it's not necessary that we pad the aints, * because they are last in the region. */ if ((epsilon = struct_sz % align_sz)) { struct_sz += align_sz - epsilon; } ptr = ((char *) cp) + struct_sz; MPIU_Memcpy(user_types, ptr, cp->nr_types * sizeof(MPI_Datatype)); return; }
int MPID_nem_mxm_iStartContigMsg(MPIDI_VC_t * vc, void *hdr, MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz, MPID_Request ** sreq_ptr) { int mpi_errno = MPI_SUCCESS; MPID_Request *sreq = NULL; MPID_nem_mxm_vc_area *vc_area = NULL; MPID_nem_mxm_req_area *req_area = NULL; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_ISTARTCONTIGMSG); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_ISTARTCONTIGMSG); MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t)); MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "mxm_iStartContigMsg"); MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *) hdr); /* create a request */ sreq = MPID_Request_create(); MPIU_Assert(sreq != NULL); MPIU_Object_set_ref(sreq, 2); MPIU_Memcpy(&(sreq->dev.pending_pkt), (char *) hdr, sizeof(MPIDI_CH3_Pkt_t)); sreq->kind = MPID_REQUEST_SEND; sreq->dev.OnDataAvail = NULL; sreq->dev.tmpbuf = NULL; _dbg_mxm_output(5, "iStartContigMsg ========> Sending ADI msg (to=%d type=%d) for req %p (data_size %d, %d) \n", vc->pg_rank, sreq->dev.pending_pkt.type, sreq, sizeof(MPIDI_CH3_Pkt_t), data_sz); vc_area = VC_BASE(vc); req_area = REQ_BASE(sreq); req_area->ctx = sreq; req_area->iov_buf = req_area->tmp_buf; req_area->iov_count = 1; req_area->iov_buf[0].ptr = (void *) &(sreq->dev.pending_pkt); req_area->iov_buf[0].length = sizeof(MPIDI_CH3_Pkt_t); if (data_sz) { req_area->iov_count = 2; req_area->iov_buf[1].ptr = (void *) data; req_area->iov_buf[1].length = data_sz; } vc_area->pending_sends += 1; sreq->ch.vc = vc; sreq->ch.noncontig = FALSE; mpi_errno = _mxm_isend(vc_area->mxm_ep, req_area, MXM_MPICH_ISEND_AM, mxm_obj->mxm_mq, mxm_obj->mxm_rank, MXM_MPICH_HID_ADI_MSG, 0, 0); if (mpi_errno) MPIU_ERR_POP(mpi_errno); fn_exit: *sreq_ptr = sreq; MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_ISTARTCONTIGMSG); return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_mxm_iSendContig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr, MPIDI_msg_sz_t hdr_sz, void *data, MPIDI_msg_sz_t data_sz) { int mpi_errno = MPI_SUCCESS; MPID_nem_mxm_vc_area *vc_area = NULL; MPID_nem_mxm_req_area *req_area = NULL; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_ISENDCONTIGMSG); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_ISENDCONTIGMSG); MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t)); MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "mxm_iSendContig"); MPIDI_DBG_Print_packet((MPIDI_CH3_Pkt_t *) hdr); MPIU_Memcpy(&(sreq->dev.pending_pkt), (char *) hdr, sizeof(MPIDI_CH3_Pkt_t)); _dbg_mxm_output(5, "iSendContig ========> Sending ADI msg (to=%d type=%d) for req %p (data_size %d, %d) \n", vc->pg_rank, sreq->dev.pending_pkt.type, sreq, sizeof(MPIDI_CH3_Pkt_t), data_sz); vc_area = VC_BASE(vc); req_area = REQ_BASE(sreq); req_area->ctx = sreq; req_area->iov_buf = req_area->tmp_buf; req_area->iov_count = 0; req_area->iov_buf[req_area->iov_count].ptr = (void *) &(sreq->dev.pending_pkt); req_area->iov_buf[req_area->iov_count].length = sizeof(MPIDI_CH3_Pkt_t); (req_area->iov_count)++; if (sreq->dev.ext_hdr_sz != 0) { req_area->iov_buf[req_area->iov_count].ptr = (void *) (sreq->dev.ext_hdr_ptr); req_area->iov_buf[req_area->iov_count].length = sreq->dev.ext_hdr_sz; (req_area->iov_count)++; } if (data_sz) { req_area->iov_buf[req_area->iov_count].ptr = (void *) data; req_area->iov_buf[req_area->iov_count].length = data_sz; (req_area->iov_count)++; } vc_area->pending_sends += 1; sreq->ch.vc = vc; sreq->ch.noncontig = FALSE; mpi_errno = _mxm_isend(vc_area->mxm_ep, req_area, MXM_MPICH_ISEND_AM, mxm_obj->mxm_mq, mxm_obj->mxm_rank, MXM_MPICH_HID_ADI_MSG, 0, 0); if (mpi_errno) MPIR_ERR_POP(mpi_errno); fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_ISENDCONTIGMSG); return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_mxm_SendNoncontig(MPIDI_VC_t * vc, MPID_Request * sreq, void *hdr, MPIDI_msg_sz_t hdr_sz) { int mpi_errno = MPI_SUCCESS; MPIDI_msg_sz_t last; MPID_nem_mxm_vc_area *vc_area = NULL; MPID_nem_mxm_req_area *req_area = NULL; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_MXM_SENDNONCONTIGMSG); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_MXM_SENDNONCONTIGMSG); MPIU_Assert(hdr_sz <= sizeof(MPIDI_CH3_Pkt_t)); MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "MPID_nem_mxm_iSendNoncontig"); MPIU_Memcpy(&(sreq->dev.pending_pkt), (char *) hdr, sizeof(MPIDI_CH3_Pkt_t)); _dbg_mxm_output(5, "SendNoncontig ========> Sending ADI msg (to=%d type=%d) for req %p (data_size %d, %d) \n", vc->pg_rank, sreq->dev.pending_pkt.type, sreq, sizeof(MPIDI_CH3_Pkt_t), sreq->dev.segment_size); vc_area = VC_BASE(vc); req_area = REQ_BASE(sreq); req_area->ctx = sreq; req_area->iov_buf = req_area->tmp_buf; req_area->iov_count = 1; req_area->iov_buf[0].ptr = (void *) &(sreq->dev.pending_pkt); req_area->iov_buf[0].length = sizeof(MPIDI_CH3_Pkt_t); MPIU_Assert(sreq->dev.segment_first == 0); last = sreq->dev.segment_size; if (last > 0) { sreq->dev.tmpbuf = MPIU_Malloc((size_t) sreq->dev.segment_size); MPIU_Assert(sreq->dev.tmpbuf); MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, sreq->dev.tmpbuf); MPIU_Assert(last == sreq->dev.segment_size); req_area->iov_count = 2; req_area->iov_buf[1].ptr = sreq->dev.tmpbuf; req_area->iov_buf[1].length = last; } vc_area->pending_sends += 1; sreq->ch.vc = vc; sreq->ch.noncontig = TRUE; mpi_errno = _mxm_isend(vc_area->mxm_ep, req_area, MXM_MPICH_ISEND_AM, mxm_obj->mxm_mq, mxm_obj->mxm_rank, MXM_MPICH_HID_ADI_MSG, 0, 0); if (mpi_errno) MPIU_ERR_POP(mpi_errno); fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_SENDNONCONTIGMSG); return mpi_errno; fn_fail: goto fn_exit; }
static int handle_mprobe(const ptl_event_t *e) { int mpi_errno = MPI_SUCCESS; MPID_Request *const req = e->user_ptr; MPIU_CHKPMEM_DECL(1); MPIDI_STATE_DECL(MPID_STATE_HANDLE_PROBE); MPIDI_FUNC_ENTER(MPID_STATE_HANDLE_PROBE); if (e->ni_fail_type == PTL_NI_NO_MATCH) { REQ_PTL(req)->found = FALSE; goto finish_mprobe; } REQ_PTL(req)->found = TRUE; req->status.MPI_SOURCE = NPTL_MATCH_GET_RANK(e->match_bits); req->status.MPI_TAG = NPTL_MATCH_GET_TAG(e->match_bits); MPIR_STATUS_SET_COUNT(req->status, NPTL_HEADER_GET_LENGTH(e->hdr_data)); MPIDI_Request_set_sync_send_flag(req, e->hdr_data & NPTL_SSEND); MPIU_CHKPMEM_MALLOC(req->dev.tmpbuf, void *, e->mlength, mpi_errno, "tmpbuf"); MPIU_Memcpy((char *)req->dev.tmpbuf, e->start, e->mlength); req->dev.recv_data_sz = e->mlength; if (!(e->hdr_data & NPTL_LARGE)) { MPIDI_Request_set_msg_type(req, MPIDI_REQUEST_EAGER_MSG); } else { MPIU_Assert (e->mlength == PTL_LARGE_THRESHOLD); req->dev.match.parts.tag = req->status.MPI_TAG; req->dev.match.parts.context_id = NPTL_MATCH_GET_CTX(e->match_bits); req->dev.match.parts.rank = req->status.MPI_SOURCE; MPIDI_Request_set_msg_type(req, MPIDI_REQUEST_RNDV_MSG); } /* At this point we know the ME is unlinked. Invalidate the handle to prevent further accesses, e.g. an attempted cancel. */ REQ_PTL(req)->put_me = PTL_INVALID_HANDLE; req->dev.recv_pending_count = 1; finish_mprobe: mpi_errno = MPID_Request_complete(req); if (mpi_errno != MPI_SUCCESS) { MPIR_ERR_POP(mpi_errno); } fn_exit: MPIU_CHKPMEM_COMMIT(); MPIDI_FUNC_EXIT(MPID_STATE_HANDLE_PROBE); return mpi_errno; fn_fail: MPIU_CHKPMEM_REAP(); goto fn_exit; }
int MPIDI_nem_ib_fill_request(MPID_Request * req, vbuf * v, int header_size, int *nb) { MPID_IOV *iov; int n_iov; int len_avail; void *data_buf; int i; len_avail = v->content_size - header_size; iov = (req == NULL) ? NULL : req->dev.iov; n_iov = (req == NULL) ? 0 : req->dev.iov_count; data_buf = (void *) ((uintptr_t) v->pheader + header_size); DEBUG_PRINT ("[recv:fill request] total len %d, head len %d, n iov %d\n", v->content_size, header_size, n_iov); *nb = 0; for (i = req->dev.iov_offset; i < n_iov; i++) { if (len_avail >= (int) iov[i].MPID_IOV_LEN && iov[i].MPID_IOV_LEN != 0) { MPIU_Memcpy(iov[i].MPID_IOV_BUF, data_buf, iov[i].MPID_IOV_LEN); data_buf = (void *) ((uintptr_t) data_buf + iov[i].MPID_IOV_LEN); len_avail -= iov[i].MPID_IOV_LEN; *nb += iov[i].MPID_IOV_LEN; } else if (len_avail > 0) { MPIU_Memcpy(iov[i].MPID_IOV_BUF, data_buf, len_avail); *nb += len_avail; break; } } v->content_consumed = header_size + *nb; DEBUG_PRINT ("[recv:fill request] about to return form request, nb %d\n", *nb); return MPI_SUCCESS; }
static int handler_recv_dequeue_complete(const ptl_event_t *e) { int mpi_errno = MPI_SUCCESS; MPID_Request *const rreq = e->user_ptr; int is_contig; MPI_Aint last; MPI_Aint dt_true_lb; MPIDI_msg_sz_t data_sz; MPID_Datatype *dt_ptr ATTRIBUTE((unused)); MPIDI_STATE_DECL(MPID_STATE_HANDLER_RECV_DEQUEUE_COMPLETE); MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_RECV_DEQUEUE_COMPLETE); MPIU_Assert(e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW); MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, is_contig, data_sz, dt_ptr, dt_true_lb); dequeue_req(e); if (e->type == PTL_EVENT_PUT_OVERFLOW) { /* unpack the data from unexpected buffer */ MPIU_DBG_MSG_D(CH3_CHANNEL, VERBOSE, "is_contig = %d", is_contig); if (is_contig) { MPIU_Memcpy((char *)rreq->dev.user_buf + dt_true_lb, e->start, e->mlength); } else { last = e->mlength; MPID_Segment_unpack(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, e->start); if (last != e->mlength) MPIR_ERR_SET(rreq->status.MPI_ERROR, MPI_ERR_TYPE, "**dtypemismatch"); } } else { /* Data was placed directly into the user buffer, so datatype mismatch is harder to detect. We use a simple check ensuring the received bytes are a multiple of a single basic element. Currently, we do not detect mismatches with datatypes constructed of more than one basic type */ MPI_Datatype dt_basic_type; MPID_Datatype_get_basic_type(rreq->dev.datatype, dt_basic_type); if (dt_basic_type != MPI_DATATYPE_NULL && (e->mlength % MPID_Datatype_get_basic_size(dt_basic_type)) != 0) MPIR_ERR_SET(rreq->status.MPI_ERROR, MPI_ERR_TYPE, "**dtypemismatch"); } mpi_errno = handler_recv_complete(e); fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_RECV_DEQUEUE_COMPLETE); return mpi_errno; fn_fail: goto fn_exit; }
int MPIDI_CH3U_Receive_data_unexpected(MPID_Request * rreq, char *buf, MPIDI_msg_sz_t *buflen, int *complete) { int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_UNEXPECTED); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_UNEXPECTED); /* FIXME: to improve performance, allocate temporary buffer from a specialized buffer pool. */ /* FIXME: to avoid memory exhaustion, integrate buffer pool management with flow control */ MPIU_DBG_MSG(CH3_OTHER,VERBOSE,"unexpected request allocated"); rreq->dev.tmpbuf = MPIU_Malloc(rreq->dev.recv_data_sz); if (!rreq->dev.tmpbuf) { MPIU_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER,"**nomem","**nomem %d", rreq->dev.recv_data_sz); } rreq->dev.tmpbuf_sz = rreq->dev.recv_data_sz; /* if all of the data has already been received, copy it now, otherwise build an iov and let the channel copy it */ if (rreq->dev.recv_data_sz <= *buflen) { MPIU_Memcpy(rreq->dev.tmpbuf, buf, rreq->dev.recv_data_sz); *buflen = rreq->dev.recv_data_sz; rreq->dev.recv_pending_count = 1; *complete = TRUE; } else { rreq->dev.iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)((char *)rreq->dev.tmpbuf); rreq->dev.iov[0].MPID_IOV_LEN = rreq->dev.recv_data_sz; rreq->dev.iov_count = 1; rreq->dev.recv_pending_count = 2; *buflen = 0; *complete = FALSE; } if (MPIDI_Request_get_msg_type(rreq) == MPIDI_REQUEST_EAGER_MSG) MPIR_T_ADD(RECVQ_STATISTICS, MPIDI_CH3I_unexpected_recvq_buffer_size, rreq->dev.tmpbuf_sz); rreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_UnpackUEBufComplete; fn_fail: MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_RECEIVE_DATA_UNEXPECTED); return mpi_errno; }
int MPIR_T_cvar_read_impl(MPI_T_cvar_handle handle, void *buf) { int mpi_errno = MPI_SUCCESS; struct MPIR_Param_t *p = handle->p; switch (p->default_val.type) { case MPIR_PARAM_TYPE_INT: { int *i_buf = buf; *i_buf = *(int *)p->val_p; } break; case MPIR_PARAM_TYPE_DOUBLE: { double *d_buf = buf; *d_buf = *(double *)p->val_p; } break; case MPIR_PARAM_TYPE_BOOLEAN: { int *i_buf = buf; *i_buf = *(int *)p->val_p; } break; case MPIR_PARAM_TYPE_STRING: if (*(char **)p->val_p == NULL) { char *c_buf = buf; c_buf[0] = '\0'; } else { MPIU_Strncpy(buf, *(char **)p->val_p, MPIR_PARAM_MAX_STRLEN); } break; case MPIR_PARAM_TYPE_RANGE: MPIU_Memcpy(buf, p->val_p, 2*sizeof(int)); break; default: /* FIXME the error handling code may not have been setup yet */ MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_INTERN, "**intern", "**intern %s", "unexpected parameter type"); break; } fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPIR_T_pvar_read_impl(MPI_T_pvar_session session, MPI_T_pvar_handle handle, void *buf) { int mpi_errno = MPI_SUCCESS; /* the extra indirection through "info" might be too costly for some tools, * consider moving this value to or caching it in the handle itself */ if (likely(handle->info->impl_kind == MPIR_T_PVAR_IMPL_SIMPLE)) { MPIU_Memcpy(buf, handle->handle_state, handle->count * handle->bytes); } else { MPIU_Assertp(FALSE); /* _IMPL_CB not yet implemented */ } fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
static inline int MPID_nem_gm_lmt_pre (struct iovec *iov, size_t n_iov, MPIDI_VC_t *remote_vc, struct iovec *cookie) { int ret = 0; int i, j; struct iovec *iov_copy; for (i = 0; i < n_iov; ++i) { ret = MPID_nem_gm_register_mem (iov[i].iov_base, iov[i].iov_len); if (ret != 0) { ret = -1; goto error_exit; } } iov_copy = MPIU_Malloc (sizeof (struct iovec) * n_iov); if (iov_copy == 0) { ret = -1; goto error_exit; } MPIU_Memcpy (iov_copy, iov, sizeof (struct iovec) * n_iov); cookie->iov_base = iov_copy; cookie->iov_len = sizeof (struct iovec) * n_iov; return ret; error_exit: for (j = i-1; j <= 0; --j) { MPID_nem_gm_deregister_mem (iov[j].iov_base, iov[j].iov_len); } return ret; }
int MPIDI_CH3U_Request_unpack_uebuf(MPID_Request * rreq) { int dt_contig; MPI_Aint dt_true_lb; MPIDI_msg_sz_t userbuf_sz; MPID_Datatype * dt_ptr; MPIDI_msg_sz_t unpack_sz; int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_UEBUF); MPIDI_STATE_DECL(MPID_STATE_MEMCPY); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_UEBUF); MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, dt_contig, userbuf_sz, dt_ptr, dt_true_lb); if (rreq->dev.recv_data_sz <= userbuf_sz) { unpack_sz = rreq->dev.recv_data_sz; } else { /* --BEGIN ERROR HANDLING-- */ MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST, "receive buffer overflow; message truncated, msg_sz=" MPIDI_MSG_SZ_FMT ", buf_sz=" MPIDI_MSG_SZ_FMT, rreq->dev.recv_data_sz, userbuf_sz)); unpack_sz = userbuf_sz; MPIR_STATUS_SET_COUNT(rreq->status, userbuf_sz); rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", rreq->dev.recv_data_sz, userbuf_sz); /* --END ERROR HANDLING-- */ } if (unpack_sz > 0) { if (dt_contig) { /* TODO - check that amount of data is consistent with datatype. In other words, if we were to use Segment_unpack() would last = unpack? If not we should return an error (unless configured with --enable-fast) */ MPIDI_FUNC_ENTER(MPID_STATE_MEMCPY); MPIU_Memcpy((char *)rreq->dev.user_buf + dt_true_lb, rreq->dev.tmpbuf, unpack_sz); MPIDI_FUNC_EXIT(MPID_STATE_MEMCPY); } else { MPID_Segment seg; MPI_Aint last; MPID_Segment_init(rreq->dev.user_buf, rreq->dev.user_count, rreq->dev.datatype, &seg, 0); last = unpack_sz; MPID_Segment_unpack(&seg, 0, &last, rreq->dev.tmpbuf); if (last != unpack_sz) { /* --BEGIN ERROR HANDLING-- */ /* received data was not entirely consumed by unpack() because too few bytes remained to fill the next basic datatype */ MPIR_STATUS_SET_COUNT(rreq->status, last); rreq->status.MPI_ERROR = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_TYPE, "**dtypemismatch", 0); /* --END ERROR HANDLING-- */ } } } MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_UNPACK_UEBUF); return mpi_errno; }
int MPIDI_CH3U_Request_load_send_iov(MPID_Request * const sreq, MPID_IOV * const iov, int * const iov_n) { MPI_Aint last; int mpi_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_SEND_IOV); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_SEND_IOV); MPIU_Assert(sreq->dev.segment_ptr != NULL); last = sreq->dev.segment_size; MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST, "pre-pv: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT ", iov_n=%d", sreq->dev.segment_first, last, *iov_n)); MPIU_Assert(sreq->dev.segment_first < last); MPIU_Assert(last > 0); MPIU_Assert(*iov_n > 0 && *iov_n <= MPID_IOV_LIMIT); MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, iov, iov_n); MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST, "post-pv: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT ", iov_n=%d", sreq->dev.segment_first, last, *iov_n)); MPIU_Assert(*iov_n > 0 && *iov_n <= MPID_IOV_LIMIT); if (last == sreq->dev.segment_size) { MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"remaining data loaded into IOV"); sreq->dev.OnDataAvail = sreq->dev.OnFinal; } else if ((last - sreq->dev.segment_first) / *iov_n >= MPIDI_IOV_DENSITY_MIN) { MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"more data loaded into IOV"); sreq->dev.segment_first = last; sreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_SendReloadIOV; } else { MPIDI_msg_sz_t data_sz; int i, iov_data_copied; MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"low density. using SRBuf."); data_sz = sreq->dev.segment_size - sreq->dev.segment_first; if (!MPIDI_Request_get_srbuf_flag(sreq)) { MPIDI_CH3U_SRBuf_alloc(sreq, data_sz); /* --BEGIN ERROR HANDLING-- */ if (sreq->dev.tmpbuf_sz == 0) { MPIU_DBG_MSG(CH3_CHANNEL,TYPICAL,"SRBuf allocation failure"); mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", "**nomem %d", data_sz); sreq->status.MPI_ERROR = mpi_errno; goto fn_exit; } /* --END ERROR HANDLING-- */ } iov_data_copied = 0; for (i = 0; i < *iov_n; i++) { MPIU_Memcpy((char*) sreq->dev.tmpbuf + iov_data_copied, iov[i].MPID_IOV_BUF, iov[i].MPID_IOV_LEN); iov_data_copied += iov[i].MPID_IOV_LEN; } sreq->dev.segment_first = last; last = (data_sz <= sreq->dev.tmpbuf_sz - iov_data_copied) ? sreq->dev.segment_size : sreq->dev.segment_first + sreq->dev.tmpbuf_sz - iov_data_copied; MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST, "pre-pack: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT, sreq->dev.segment_first, last)); MPID_Segment_pack(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, (char*) sreq->dev.tmpbuf + iov_data_copied); MPIU_DBG_MSG_FMT(CH3_CHANNEL,VERBOSE,(MPIU_DBG_FDEST, "post-pack: first=" MPIDI_MSG_SZ_FMT ", last=" MPIDI_MSG_SZ_FMT, sreq->dev.segment_first, last)); iov[0].MPID_IOV_BUF = (MPID_IOV_BUF_CAST)sreq->dev.tmpbuf; iov[0].MPID_IOV_LEN = last - sreq->dev.segment_first + iov_data_copied; *iov_n = 1; if (last == sreq->dev.segment_size) { MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"remaining data packed into SRBuf"); sreq->dev.OnDataAvail = sreq->dev.OnFinal; } else { MPIU_DBG_MSG(CH3_CHANNEL,VERBOSE,"more data packed into SRBuf"); sreq->dev.segment_first = last; sreq->dev.OnDataAvail = MPIDI_CH3_ReqHandler_SendReloadIOV; } } fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_REQUEST_LOAD_SEND_IOV); return mpi_errno; }
/** * FIXME: Ideally the header size should be determined by high level macros, * instead of hacking the message header at the device layer */ int MPIDI_CH3I_nem_ib_parse_header(MPIDI_VC_t * vc, vbuf * v, void **pkt, int *header_size) { void *vstart; MPIDI_nem_ib_pkt_comm_header *header; #ifdef CRC_CHECK unsigned long crc; #endif int mpi_errno = MPI_SUCCESS; int ret; DEBUG_PRINT("[parse header] vbuf address %p\n", v); vstart = v->pheader; header = (MPIDI_nem_ib_pkt_comm_header *)v->iheader; DEBUG_PRINT("[parse header] header type %d\n", header->type); /* set it to the header size by default */ *header_size = sizeof(MPIDI_CH3_Pkt_t); #ifdef CRC_CHECK crc = update_crc(1, (void *)((uintptr_t)header+sizeof *header), v->content_size - sizeof *header); if (crc != header->mrail.crc) { int rank; PMI_Get_rank(&rank); MPIU_Error_printf(stderr, "CRC mismatch, get %lx, should be %lx " "type %d, ocntent size %d\n", crc, header->mrail.crc, header->type, v->content_size); exit( EXIT_FAILURE ); } #endif switch (header->type) { /*header caching codes */ #ifndef MV2_DISABLE_HEADER_CACHING case (MPIDI_CH3_PKT_FAST_EAGER_SEND): case (MPIDI_CH3_PKT_FAST_EAGER_SEND_WITH_REQ): { /* since header caching do not have regular iheader, * revert back pre-adjust */ v->content_size += IB_PKT_HEADER_LENGTH; vstart -= IB_PKT_HEADER_LENGTH; v->pheader -= IB_PKT_HEADER_LENGTH; MPIDI_nem_ib_pkt_fast_eager *fast_header = vstart; MPIDI_CH3_Pkt_eager_send_t *eager_header = (MPIDI_CH3_Pkt_eager_send_t *) VC_FIELD(vc, connection)->rfp. cached_incoming; MPIDI_nem_ib_pkt_comm_header *eager_iheader = (MPIDI_nem_ib_pkt_comm_header *) VC_FIELD(vc, connection)->rfp. cached_incoming_iheader; if (MPIDI_CH3_PKT_FAST_EAGER_SEND == header->type) { *header_size = sizeof(MPIDI_nem_ib_pkt_fast_eager); } else { *header_size = sizeof(MPIDI_nem_ib_pkt_fast_eager_with_req); eager_header->sender_req_id = ((MPIDI_nem_ib_pkt_fast_eager_with_req *) vstart)->sender_req_id; } header = eager_iheader; DEBUG_PRINT("[receiver side] cached credit %d\n", eager_iheader->rdma_credit); eager_header->data_sz = fast_header->bytes_in_pkt; *pkt = (void *) eager_header; DEBUG_PRINT ("[recv: parse header] faster headersize returned %d\n", *header_size); } break; #endif case (MPIDI_CH3_PKT_EAGER_SEND): { DEBUG_PRINT("[recv: parse header] pkt eager send\n"); /* header caching codes */ #ifndef MV2_DISABLE_HEADER_CACHING if (v->padding != NORMAL_VBUF_FLAG && (v->content_size - sizeof(MPIDI_CH3_Pkt_t) <= MAX_SIZE_WITH_HEADER_CACHING) ) { /* Only cache header if the packet is from RdMA path * XXXX: what is R3_FLAG? */ MPIU_Memcpy((VC_FIELD(vc, connection)->rfp.cached_incoming), vstart, sizeof(MPIDI_CH3_Pkt_eager_send_t)); MPIU_Memcpy((VC_FIELD(vc, connection)->rfp.cached_incoming_iheader), header, sizeof(MPIDI_nem_ib_pkt_comm_header)); } #endif *pkt = (MPIDI_CH3_Pkt_t *) vstart; *header_size = sizeof(MPIDI_CH3_Pkt_t); DEBUG_PRINT("[recv: parse header] headersize returned %d\n", *header_size); } break; case (MPIDI_CH3_PKT_RNDV_REQ_TO_SEND): case (MPIDI_CH3_PKT_RNDV_CLR_TO_SEND): case MPIDI_CH3_PKT_EAGER_SYNC_ACK: case MPIDI_NEM_PKT_LMT_RTS: case MPIDI_NEM_PKT_LMT_CTS: case MPIDI_NEM_PKT_LMT_DONE: case MPIDI_NEM_PKT_LMT_COOKIE: /* CKPT codes */ #ifdef CKPT case MPIDI_CH3_PKT_CM_SUSPEND: case MPIDI_CH3_PKT_CM_REACTIVATION_DONE: case MPIDI_CH3_PKT_CR_REMOTE_UPDATE: #endif { *pkt = vstart; } break; case MPIDI_CH3_PKT_CANCEL_SEND_REQ: { *pkt = vstart; /*Fix: Need to unregister and free the rndv buffer in get protocol.*/ } break; case MPIDI_CH3_PKT_CANCEL_SEND_RESP: { MPID_Request *req; *pkt = vstart; MPID_Request_get_ptr(((MPIDI_CH3_Pkt_cancel_send_resp_t *)(*pkt))->sender_req_id, req); if (req != NULL) { /* unregister and free the rndv buffer */ MPIDI_NEM_IB_RREQ_RNDV_FINISH(req); } } break; case (MPIDI_CH3_PKT_NOOP): { *pkt = v->iheader; } break; /* rfp codes */ case MPIDI_CH3_PKT_ADDRESS: { *pkt = v->iheader; MPIDI_nem_ib_recv_addr(vc, vstart); break; } case MPIDI_CH3_PKT_ADDRESS_REPLY: { *pkt = v->iheader; MPIDI_nem_ib_recv_addr_reply(vc, vstart); break; } case MPIDI_CH3_PKT_PACKETIZED_SEND_START: { *pkt = vstart; *header_size = sizeof(MPIDI_CH3_Pkt_packetized_send_start_t); break; } case MPIDI_CH3_PKT_PACKETIZED_SEND_DATA: { *header_size = sizeof(MPIDI_CH3_Pkt_packetized_send_data_t); *pkt = vstart; break; } case MPIDI_CH3_PKT_RNDV_R3_DATA: { *header_size = sizeof(MPIDI_CH3_Pkt_rndv_r3_data_t); *pkt = vstart; break; } case MPIDI_CH3_PKT_RNDV_R3_ACK: { *pkt = v->iheader; MPIDI_nem_ib_lmt_r3_recv_ack(vc, vstart); break; } #if defined(USE_EAGER_SHORT) case MPIDI_CH3_PKT_EAGERSHORT_SEND: #endif case MPIDI_CH3_PKT_EAGER_SYNC_SEND: case MPIDI_CH3_PKT_READY_SEND: { *pkt = vstart; break; } case MPIDI_CH3_PKT_PUT: { *pkt = vstart; break; } case MPIDI_CH3_PKT_GET: { *pkt = vstart; break; } case MPIDI_CH3_PKT_GET_RESP: /*15 */ { *pkt = vstart; break; } case MPIDI_CH3_PKT_ACCUMULATE: { *pkt = vstart; break; } case MPIDI_CH3_PKT_LOCK: { *pkt = vstart; break; } case MPIDI_CH3_PKT_LOCK_GRANTED: { *pkt = vstart; break; } case MPIDI_CH3_PKT_PT_RMA_DONE: { *pkt = vstart; break; } case MPIDI_CH3_PKT_LOCK_PUT_UNLOCK: { *pkt = vstart; break; } case MPIDI_CH3_PKT_LOCK_GET_UNLOCK: { *pkt = vstart; break; } case MPIDI_CH3_PKT_LOCK_ACCUM_UNLOCK: case MPIDI_CH3_PKT_ACCUM_IMMED: { *pkt = vstart; break; } case MPIDI_CH3_PKT_FLOW_CNTL_UPDATE: { *pkt = vstart; break; } case MPIDI_CH3_PKT_CLOSE: { *pkt = vstart; } break; default: { /* Header is corrupted if control has reached here in prototype */ /* */ MPIU_ERR_SETFATALANDJUMP2(mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s %d", "Control shouldn't reach here " "in prototype, header %d\n", header->type); } } DEBUG_PRINT("Before set credit, vc: %p, v->rail: %d, " "pkt: %p, pheader: %p\n", vc, v->rail, pkt, v->pheader); SET_CREDIT(header, VC_FIELD(vc, connection), (v->rail)); if (VC_FIELD(vc, connection)->srp.credits[v->rail].remote_credit > 0 && VC_FIELD(vc, connection)->srp.credits[v->rail].backlog.len > 0) { /* backlog send codes */ MRAILI_Backlog_send(vc, v->rail); } /* if any credits remain, schedule rendezvous progress */ if ((VC_FIELD(vc, connection)->srp.credits[v->rail].remote_credit > 0 /* rfp codes */ || (VC_FIELD(vc, connection)->rfp.ptail_RDMA_send != VC_FIELD(vc, connection)->rfp.phead_RDMA_send) ) && (VC_FIELD(vc, connection)->sreq_head != NULL)) { /* rndv codes */ #if 0 PUSH_FLOWLIST(vc); #endif } /* rfp codes */ if ((VC_FIELD(vc, connection)->rfp.RDMA_recv_buf == NULL) && /*(c->initialized) && */ num_rdma_buffer && !VC_FIELD(vc, connection)->rfp.rdma_failed) { if ((process_info.polling_group_size + rdma_pending_conn_request) < rdma_polling_set_limit) { VC_FIELD(vc, connection)->rfp.eager_start_cnt++; if (rdma_polling_set_threshold < VC_FIELD(vc, connection)->rfp.eager_start_cnt) { { ret = vbuf_fast_rdma_alloc(vc, 1); if (ret == MPI_SUCCESS) { vbuf_address_send(vc); rdma_pending_conn_request++; } else { VC_FIELD(vc, connection)->rfp.rdma_failed = 1; } goto fn_exit; } } } } fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPIR_Localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype, void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype) { int mpi_errno = MPI_SUCCESS; int sendtype_iscontig, recvtype_iscontig; MPI_Aint sendsize, recvsize, sdata_sz, rdata_sz, copy_sz; MPI_Aint true_extent, sendtype_true_lb, recvtype_true_lb; MPIU_CHKLMEM_DECL(1); MPID_MPI_STATE_DECL(MPID_STATE_MPIR_LOCALCOPY); MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_LOCALCOPY); MPID_Datatype_get_size_macro(sendtype, sendsize); MPID_Datatype_get_size_macro(recvtype, recvsize); sdata_sz = sendsize * sendcount; rdata_sz = recvsize * recvcount; /* if there is no data to copy, bail out */ if (!sdata_sz || !rdata_sz) goto fn_exit; #if defined(HAVE_ERROR_CHECKING) if (sdata_sz > rdata_sz) { MPIU_ERR_SET2(mpi_errno, MPI_ERR_TRUNCATE, "**truncate", "**truncate %d %d", sdata_sz, rdata_sz); copy_sz = rdata_sz; } else #endif /* HAVE_ERROR_CHECKING */ copy_sz = sdata_sz; /* Builtin types is the common case; optimize for it */ if ((HANDLE_GET_KIND(sendtype) == HANDLE_KIND_BUILTIN) && HANDLE_GET_KIND(recvtype) == HANDLE_KIND_BUILTIN) { MPIU_Memcpy(recvbuf, sendbuf, copy_sz); goto fn_exit; } MPIR_Datatype_iscontig(sendtype, &sendtype_iscontig); MPIR_Datatype_iscontig(recvtype, &recvtype_iscontig); MPIR_Type_get_true_extent_impl(sendtype, &sendtype_true_lb, &true_extent); MPIR_Type_get_true_extent_impl(recvtype, &recvtype_true_lb, &true_extent); if (sendtype_iscontig && recvtype_iscontig) { #if defined(HAVE_ERROR_CHECKING) MPIU_ERR_CHKMEMCPYANDJUMP(mpi_errno, ((char *)recvbuf + recvtype_true_lb), ((char *)sendbuf + sendtype_true_lb), copy_sz); #endif MPIU_Memcpy(((char *) recvbuf + recvtype_true_lb), ((char *) sendbuf + sendtype_true_lb), copy_sz); } else if (sendtype_iscontig) { MPID_Segment seg; MPI_Aint last; MPID_Segment_init(recvbuf, recvcount, recvtype, &seg, 0); last = copy_sz; MPID_Segment_unpack(&seg, 0, &last, (char*)sendbuf + sendtype_true_lb); MPIU_ERR_CHKANDJUMP(last != copy_sz, mpi_errno, MPI_ERR_TYPE, "**dtypemismatch"); } else if (recvtype_iscontig) { MPID_Segment seg; MPI_Aint last; MPID_Segment_init(sendbuf, sendcount, sendtype, &seg, 0); last = copy_sz; MPID_Segment_pack(&seg, 0, &last, (char*)recvbuf + recvtype_true_lb); MPIU_ERR_CHKANDJUMP(last != copy_sz, mpi_errno, MPI_ERR_TYPE, "**dtypemismatch"); } else { char * buf; MPIDI_msg_sz_t buf_off; MPID_Segment sseg; MPIDI_msg_sz_t sfirst; MPID_Segment rseg; MPIDI_msg_sz_t rfirst; MPIU_CHKLMEM_MALLOC(buf, char *, COPY_BUFFER_SZ, mpi_errno, "buf"); MPID_Segment_init(sendbuf, sendcount, sendtype, &sseg, 0); MPID_Segment_init(recvbuf, recvcount, recvtype, &rseg, 0); sfirst = 0; rfirst = 0; buf_off = 0; while (1) { MPI_Aint last; char * buf_end; if (copy_sz - sfirst > COPY_BUFFER_SZ - buf_off) { last = sfirst + (COPY_BUFFER_SZ - buf_off); } else { last = copy_sz; } MPID_Segment_pack(&sseg, sfirst, &last, buf + buf_off); MPIU_Assert(last > sfirst); buf_end = buf + buf_off + (last - sfirst); sfirst = last; MPID_Segment_unpack(&rseg, rfirst, &last, buf); MPIU_Assert(last > rfirst); rfirst = last; if (rfirst == copy_sz) { /* successful completion */ break; } /* if the send side finished, but the recv side couldn't unpack it, there's a datatype mismatch */ MPIU_ERR_CHKANDJUMP(sfirst == copy_sz, mpi_errno, MPI_ERR_TYPE, "**dtypemismatch"); /* if not all data was unpacked, copy it to the front of the buffer for next time */ buf_off = sfirst - rfirst; if (buf_off > 0) { memmove(buf, buf_end - buf_off, buf_off); } } } fn_exit: MPIU_CHKLMEM_FREEALL(); MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_LOCALCOPY); return mpi_errno; fn_fail: goto fn_exit; }
int MPIDU_CH3U_GetSockInterfaceAddr( int myRank, char *ifname, int maxIfname, MPIDU_Sock_ifaddr_t *ifaddr ) { char *ifname_string; int mpi_errno = MPI_SUCCESS; int ifaddrFound = 0; if (dbg_ifname < 0) { int rc; rc = MPL_env2bool( "MPICH_DBG_IFNAME", &dbg_ifname ); if (rc != 1) dbg_ifname = 0; } /* Set "not found" for ifaddr */ ifaddr->len = 0; /* Check for the name supplied through an environment variable */ ifname_string = getenv("MPICH_INTERFACE_HOSTNAME"); if (!ifname_string) { /* See if there is a per-process name for the interfaces (e.g., the process manager only delievers the same values for the environment to each process */ char namebuf[1024]; MPL_snprintf( namebuf, sizeof(namebuf), "MPICH_INTERFACE_HOSTNAME_R%d", myRank ); ifname_string = getenv( namebuf ); if (dbg_ifname && ifname_string) { fprintf( stdout, "Found interface name %s from %s\n", ifname_string, namebuf ); fflush( stdout ); } } else if (dbg_ifname) { fprintf( stdout, "Found interface name %s from MPICH_INTERFACE_HOSTNAME\n", ifname_string ); fflush( stdout ); } if (!ifname_string) { int len; /* If we have nothing, then use the host name */ mpi_errno = MPID_Get_processor_name(ifname, maxIfname, &len ); if (mpi_errno) MPIR_ERR_POP(mpi_errno); ifname_string = ifname; /* If we didn't find a specific name, then try to get an IP address directly from the available interfaces, if that is supported on this platform. Otherwise, we'll drop into the next step that uses the ifname */ mpi_errno = MPIDI_CH3U_GetIPInterface( ifaddr, &ifaddrFound ); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } else { /* Copy this name into the output name */ MPIU_Strncpy( ifname, ifname_string, maxIfname ); } /* If we don't have an IP address, try to get it from the name */ if (!ifaddrFound) { struct hostent *info; /* printf( "Name to check is %s\n", ifname_string ); fflush(stdout); */ info = gethostbyname( ifname_string ); if (info && info->h_addr_list) { /* Use the primary address */ ifaddr->len = info->h_length; ifaddr->type = info->h_addrtype; if (ifaddr->len > sizeof(ifaddr->ifaddr)) { /* If the address won't fit in the field, reset to no address */ ifaddr->len = 0; ifaddr->type = -1; } else { MPIU_Memcpy( ifaddr->ifaddr, info->h_addr_list[0], ifaddr->len ); } } } fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPIDI_CH3_Rendezvous_rget_recv_finish(MPIDI_VC_t * vc, MPID_Request * rreq) { int mpi_errno = MPI_SUCCESS; int complete; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_RGET_RECV_FINISH); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_RGET_RECV_FINISH); if (!MPIDI_CH3I_MRAIL_Finish_request(rreq)) { return MPI_SUCCESS; } if (rreq->mrail.rndv_buf_alloc == 1) { /* If we are using datatype, then need to unpack data from tmpbuf */ int iter = 0; int copied = 0; uintptr_t buf = (uintptr_t) rreq->mrail.rndv_buf; for (; iter < rreq->dev.iov_count; ++iter) { MPIU_Memcpy(rreq->dev.iov[iter].MPID_IOV_BUF, (void *) buf, rreq->dev.iov[iter].MPID_IOV_LEN); buf += rreq->dev.iov[iter].MPID_IOV_LEN; copied += rreq->dev.iov[iter].MPID_IOV_LEN; } MPIDI_CH3I_Request_adjust_iov(rreq, copied); while (rreq->dev.OnDataAvail == MPIDI_CH3_ReqHandler_UnpackSRBufReloadIOV || rreq->dev.OnDataAvail == MPIDI_CH3_ReqHandler_ReloadIOV) { /* XXX: dev.ca should only be CA_COMPLETE? */ /* end of XXX */ mpi_errno = MPIDI_CH3U_Handle_recv_req(vc, rreq, &complete); if (mpi_errno != MPI_SUCCESS || complete == TRUE) { mpi_errno = MPIR_Err_create_code( mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0); goto fn_exit; } copied = 0; for (iter = 0; iter < rreq->dev.iov_count; ++iter) { MPIU_Memcpy(rreq->dev.iov[iter].MPID_IOV_BUF, (void *) buf, rreq->dev.iov[iter].MPID_IOV_LEN); buf += rreq->dev.iov[iter].MPID_IOV_LEN; copied += rreq->dev.iov[iter].MPID_IOV_LEN; } MPIDI_CH3I_Request_adjust_iov(rreq, copied); } } else { rreq->mrail.rndv_buf = NULL; } #if defined(CKPT) MPIDI_CH3I_CR_req_dequeue(rreq); #endif /* defined(CKPT) */ MPIDI_CH3I_MRAILI_RREQ_RNDV_FINISH(rreq); mpi_errno = MPIDI_CH3U_Handle_recv_req(vc, rreq, &complete); if (mpi_errno != MPI_SUCCESS) { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0); } if (complete) { vc->ch.recv_active = NULL; } else { mpi_errno = MPIR_Err_create_code(mpi_errno, MPIR_ERR_FATAL, FCNAME, __LINE__, MPI_ERR_OTHER, "**fail", 0); goto fn_exit; } fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_RGET_RECV_FINISH); return mpi_errno; }
void MPIDI_CH3_Rendezvous_r3_push(MPIDI_VC_t * vc, MPID_Request * sreq) { vbuf *buf; MPID_IOV iov[MPID_IOV_LIMIT + 1]; int n_iov; int msg_buffered = 0; int nb; int complete = 0; int seqnum; int finished = 0; int mpi_errno; int wait_for_rndv_r3_ack = 0; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_RNDV_R3_PUSH); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_RNDV_R3_PUSH); MPIDI_CH3_Pkt_rndv_r3_data_t pkt_head; MPIDI_Pkt_init(&pkt_head, MPIDI_CH3_PKT_RNDV_R3_DATA); iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_Pkt_rndv_r3_data_t); iov[0].MPID_IOV_BUF = (void*) &pkt_head; pkt_head.receiver_req_id = sreq->mrail.partner_id; do { do { #ifndef DAPL_DEFAULT_PROVIDER /* stop sending more R3 data to avoid SRQ flooding at receiver */ if (MPIDI_CH3I_RDMA_Process.has_srq) { if (vc->ch.pending_r3_data >= rdma_max_r3_pending_data) { wait_for_rndv_r3_ack = 1; break; } } #endif MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(&pkt_head, seqnum); MPIDI_Request_set_seqnum(sreq, seqnum); MPIU_Memcpy((void *) &iov[1], &sreq->dev.iov[sreq->dev.iov_offset], (sreq->dev.iov_count - sreq->dev.iov_offset) * sizeof(MPID_IOV)); n_iov = sreq->dev.iov_count - sreq->dev.iov_offset + 1; DEBUG_PRINT("iov count (sreq): %d, offset %d, len[1] %d\n", sreq->dev.iov_count, sreq->dev.iov_offset, sreq->dev.iov[0].MPID_IOV_LEN); { int i = 0; size_t total_len = 0; for (i = 0; i < n_iov; i++) { total_len += (iov[i].MPID_IOV_LEN); } mpi_errno = MPIDI_CH3I_MRAILI_Eager_send(vc, iov, n_iov, total_len, &nb, &buf); } DEBUG_PRINT("[istartmsgv] mpierr %d, nb %d\n", mpi_errno, nb); if (MPI_SUCCESS != mpi_errno && MPI_MRAIL_MSG_QUEUED != mpi_errno) { vc->ch.state = MPIDI_CH3I_VC_STATE_FAILED; sreq->status.MPI_ERROR = MPI_ERR_INTERN; MPIDI_CH3U_Request_complete(sreq); return; } else if (MPI_MRAIL_MSG_QUEUED == mpi_errno) { msg_buffered = 1; } nb -= sizeof(MPIDI_CH3_Pkt_rndv_r3_data_t); finished = MPIDI_CH3I_Request_adjust_iov(sreq, nb); DEBUG_PRINT("ajust iov finish: %d\n", finished); vc->ch.pending_r3_data += nb; } while (!finished/* && !msg_buffered*/); if (wait_for_rndv_r3_ack) { break; } if (finished && sreq->dev.OnDataAvail == MPIDI_CH3_ReqHandler_SendReloadIOV) { MPIDI_CH3U_Handle_send_req(vc, sreq, &complete); nb = 0; complete = 0; } else if (finished) { complete = 1; } } while (/* 1 != msg_buffered && */0 == complete); DEBUG_PRINT("exit loop with complete %d, msg_buffered %d wiat %d pending data:%d \n", complete, msg_buffered, wait_for_rndv_r3_ack, vc->ch.pending_r3_data); if (wait_for_rndv_r3_ack) { //|| 0 == complete && 1 == msg_buffered) { sreq->mrail.nearly_complete = 0; } else if (1 == msg_buffered) { buf->sreq = (void *) sreq; sreq->mrail.nearly_complete = 1; } else { buf->sreq = NULL; MPIDI_CH3U_Handle_send_req(vc, sreq, &complete); sreq->mrail.nearly_complete = 1; } if (sreq->mrail.nearly_complete) { DEBUG_PRINT("R3 PUSH completed\n"); } else { DEBUG_PRINT("Send Max R3 Pending Data. waiting for ACK\n"); } MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_RNDV_R3_PUSH); }
int MPIDI_CH3_Get_rndv_recv(MPIDI_VC_t * vc, MPID_Request * req) { int mpi_errno = MPI_SUCCESS; int complete; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_RNDV_RECV); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_RNDV_RECV); MPIU_Assert(req->mrail.protocol == VAPI_PROTOCOL_RPUT); #ifdef CKPT MPIDI_CH3I_CR_lock(); #endif if (req->mrail.rndv_buf_alloc == 1) { /* If we are using datatype, then need to unpack data from tmpbuf */ int iter = 0; uintptr_t buf = (uintptr_t) req->mrail.rndv_buf; for (iter=0; iter < req->dev.iov_count; ++iter) { MPIU_Memcpy(req->dev.iov[iter].MPID_IOV_BUF, (void *) buf, req->dev.iov[iter].MPID_IOV_LEN); buf += req->dev.iov[iter].MPID_IOV_LEN; } while (req->dev.OnDataAvail == MPIDI_CH3_ReqHandler_UnpackSRBufReloadIOV || req->dev.OnDataAvail == MPIDI_CH3_ReqHandler_ReloadIOV) { /* mpi_errno = MPIDI_CH3U_Request_load_recv_iov(req); */ mpi_errno = MPIDI_CH3U_Handle_recv_req(vc, req, &complete); if (mpi_errno != MPI_SUCCESS) { goto fn_exit; } for (iter = 0; iter < req->dev.iov_count; ++iter) { MPIU_Memcpy(req->dev.iov[iter].MPID_IOV_BUF, (void *) buf, req->dev.iov[iter].MPID_IOV_LEN); buf += req->dev.iov[iter].MPID_IOV_LEN; } } } else { req->mrail.rndv_buf = NULL; } MPIDI_CH3I_MRAILI_RREQ_RNDV_FINISH(req); mpi_errno = MPIDI_CH3U_Handle_recv_req(vc, req, &complete); if (mpi_errno != MPI_SUCCESS) { goto fn_exit; } MPIU_Assert(complete == TRUE); fn_exit: #if defined(CKPT) MPIDI_CH3I_CR_unlock(); #endif /* defined(CKPT) */ MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_RNDV_RECV); return mpi_errno; }
int MPIDI_CH3I_MRAIL_Prepare_rndv_transfer (MPID_Request * sreq, /* contains local info */ MPIDI_CH3I_MRAILI_Rndv_info_t * rndv) { if (rndv->protocol == VAPI_PROTOCOL_R3) { if (sreq->mrail.d_entry != NULL) { dreg_unregister (sreq->mrail.d_entry); sreq->mrail.d_entry = NULL; } if (1 == sreq->mrail.rndv_buf_alloc && NULL != sreq->mrail.rndv_buf) { MPIU_Free (sreq->mrail.rndv_buf); sreq->mrail.rndv_buf_alloc = 0; sreq->mrail.rndv_buf = NULL; } sreq->mrail.remote_addr = NULL; sreq->mrail.remote_handle.hndl = DAT_HANDLE_NULL; sreq->mrail.protocol = VAPI_PROTOCOL_R3; } else { sreq->mrail.remote_addr = rndv->buf_addr; sreq->mrail.remote_handle = rndv->memhandle; DEBUG_PRINT ("[add rndv list] addr %p, key %p\n", sreq->mrail.remote_addr, sreq->mrail.remote_handle.rkey); if (1 == sreq->mrail.rndv_buf_alloc) { int mpi_errno = MPI_SUCCESS; int i; aint_t buf; buf = (aint_t) sreq->mrail.rndv_buf; for (i = 0; i < sreq->dev.iov_count; i++) { MPIU_Memcpy ((void *) buf, sreq->dev.iov[i].MPID_IOV_BUF, sreq->dev.iov[i].MPID_IOV_LEN); buf += sreq->dev.iov[i].MPID_IOV_LEN; } /* TODO: Following part is a workaround to deal with datatype with large number * of segments. We check if the datatype has finished loading and reload if not. * May be better interface with upper layer should be considered*/ while (sreq->dev.OnDataAvail == MPIDI_CH3_ReqHandler_SendReloadIOV) { sreq->dev.iov_count = MPID_IOV_LIMIT; mpi_errno = MPIDI_CH3U_Request_load_send_iov (sreq, sreq->dev.iov, &sreq->dev. iov_count); /* --BEGIN ERROR HANDLING-- */ if (mpi_errno != MPI_SUCCESS) { udapl_error_abort (UDAPL_STATUS_ERR, "Reload iov error"); } for (i = 0; i < sreq->dev.iov_count; i++) { MPIU_Memcpy ((void *) buf, sreq->dev.iov[i].MPID_IOV_BUF, sreq->dev.iov[i].MPID_IOV_LEN); buf += sreq->dev.iov[i].MPID_IOV_LEN; } } } } return MPI_SUCCESS; }
static int handler_recv_dequeue_large(const ptl_event_t *e) { int mpi_errno = MPI_SUCCESS; MPID_Request *const rreq = e->user_ptr; MPIDI_VC_t *vc; MPID_nem_ptl_vc_area *vc_ptl; int ret; int dt_contig; MPIDI_msg_sz_t data_sz; MPID_Datatype *dt_ptr; MPI_Aint dt_true_lb; MPI_Aint last; MPIU_CHKPMEM_DECL(1); MPIDI_STATE_DECL(MPID_STATE_HANDLER_RECV_DEQUEUE_LARGE); MPIDI_FUNC_ENTER(MPID_STATE_HANDLER_RECV_DEQUEUE_LARGE); MPIU_Assert(e->type == PTL_EVENT_PUT || e->type == PTL_EVENT_PUT_OVERFLOW); MPIDI_Comm_get_vc(rreq->comm, NPTL_MATCH_GET_RANK(e->match_bits), &vc); vc_ptl = VC_PTL(vc); dequeue_req(e); MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); /* unpack data from unexpected buffer first */ if (e->type == PTL_EVENT_PUT_OVERFLOW) { if (dt_contig) { MPIU_Memcpy((char *)rreq->dev.user_buf + dt_true_lb, e->start, e->mlength); } else { last = e->mlength; MPID_Segment_unpack(rreq->dev.segment_ptr, 0, &last, e->start); MPIU_Assert(last == e->mlength); rreq->dev.segment_first = e->mlength; } } if (!(e->hdr_data & NPTL_LARGE)) { /* all data has already been received; we're done */ mpi_errno = handler_recv_complete(e); if (mpi_errno) MPIR_ERR_POP(mpi_errno); goto fn_exit; } MPIU_Assert (e->mlength == PTL_LARGE_THRESHOLD); /* we need to GET the rest of the data from the sender's buffer */ if (dt_contig) { big_get((char *)rreq->dev.user_buf + dt_true_lb + PTL_LARGE_THRESHOLD, data_sz - PTL_LARGE_THRESHOLD, vc, e->match_bits, rreq); goto fn_exit; } /* noncontig recv buffer */ last = rreq->dev.segment_size; rreq->dev.iov_count = MPL_IOV_LIMIT; MPID_Segment_pack_vector(rreq->dev.segment_ptr, rreq->dev.segment_first, &last, rreq->dev.iov, &rreq->dev.iov_count); if (last == rreq->dev.segment_size && rreq->dev.segment_size <= MPIDI_nem_ptl_ni_limits.max_msg_size + PTL_LARGE_THRESHOLD) { /* Rest of message fits in one IOV */ ptl_md_t md; md.start = rreq->dev.iov; md.length = rreq->dev.iov_count; md.options = PTL_IOVEC; md.eq_handle = MPIDI_nem_ptl_origin_eq; md.ct_handle = PTL_CT_NONE; ret = PtlMDBind(MPIDI_nem_ptl_ni, &md, &REQ_PTL(rreq)->md); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlmdbind", "**ptlmdbind %s", MPID_nem_ptl_strerror(ret)); REQ_PTL(rreq)->event_handler = handler_recv_complete; ret = MPID_nem_ptl_rptl_get(REQ_PTL(rreq)->md, 0, rreq->dev.segment_size - rreq->dev.segment_first, vc_ptl->id, vc_ptl->ptg, e->match_bits, 0, rreq); MPIR_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**ptlget", "**ptlget %s", MPID_nem_ptl_strerror(ret)); goto fn_exit; } /* message won't fit in a single IOV, allocate buffer and unpack when received */ /* FIXME: For now, allocate a single large buffer to hold entire message */ MPIU_CHKPMEM_MALLOC(REQ_PTL(rreq)->chunk_buffer[0], void *, data_sz - PTL_LARGE_THRESHOLD, mpi_errno, "chunk_buffer"); big_get(REQ_PTL(rreq)->chunk_buffer[0], data_sz - PTL_LARGE_THRESHOLD, vc, e->match_bits, rreq); fn_exit: MPIU_CHKPMEM_COMMIT(); fn_exit2: MPIDI_FUNC_EXIT(MPID_STATE_HANDLER_RECV_DEQUEUE_LARGE); return mpi_errno; fn_fail: MPIU_CHKPMEM_REAP(); goto fn_exit2; }
/*@ MPI_Dist_graph_create_adjacent - returns a handle to a new communicator to which the distributed graph topology information is attached. Input Parameters: + comm_old - input communicator (handle) . indegree - size of sources and sourceweights arrays (non-negative integer) . sources - ranks of processes for which the calling process is a destination (array of non-negative integers) . sourceweights - weights of the edges into the calling process (array of non-negative integers or MPI_UNWEIGHTED) . outdegree - size of destinations and destweights arrays (non-negative integer) . destinations - ranks of processes for which the calling process is a source (array of non-negative integers) . destweights - weights of the edges out of the calling process (array of non-negative integers or MPI_UNWEIGHTED) . info - hints on optimization and interpretation of weights (handle) - reorder - the ranks may be reordered (true) or not (false) (logical) Output Parameters: . comm_dist_graph - communicator with distributed graph topology (handle) .N ThreadSafe .N Fortran .N Errors .N MPI_SUCCESS .N MPI_ERR_ARG .N MPI_ERR_OTHER @*/ int MPI_Dist_graph_create_adjacent(MPI_Comm comm_old, int indegree, const int sources[], const int sourceweights[], int outdegree, const int destinations[], const int destweights[], MPI_Info info, int reorder, MPI_Comm *comm_dist_graph) { int mpi_errno = MPI_SUCCESS; MPID_Comm *comm_ptr = NULL; MPID_Comm *comm_dist_graph_ptr = NULL; MPIR_Topology *topo_ptr = NULL; MPIR_Dist_graph_topology *dist_graph_ptr = NULL; MPIU_CHKPMEM_DECL(5); MPID_MPI_STATE_DECL(MPID_STATE_MPI_DIST_GRAPH_CREATE_ADJACENT); MPIR_ERRTEST_INITIALIZED_ORDIE(); MPIU_THREAD_CS_ENTER(ALLFUNC,); MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_DIST_GRAPH_CREATE_ADJACENT); /* Validate parameters, especially handles needing to be converted */ # ifdef HAVE_ERROR_CHECKING { MPID_BEGIN_ERROR_CHECKS; { MPIR_ERRTEST_COMM(comm_old, mpi_errno); MPIR_ERRTEST_INFO_OR_NULL(info, mpi_errno); } MPID_END_ERROR_CHECKS; } # endif /* Convert MPI object handles to object pointers */ MPID_Comm_get_ptr(comm_old, comm_ptr); /* Validate parameters and objects (post conversion) */ # ifdef HAVE_ERROR_CHECKING { MPID_BEGIN_ERROR_CHECKS; { /* Validate comm_ptr */ MPID_Comm_valid_ptr( comm_ptr, mpi_errno, FALSE ); if (mpi_errno != MPI_SUCCESS) goto fn_fail; /* If comm_ptr is not valid, it will be reset to null */ if (comm_ptr) { MPIR_ERRTEST_COMM_INTRA(comm_ptr, mpi_errno); } MPIR_ERRTEST_ARGNEG(indegree, "indegree", mpi_errno); MPIR_ERRTEST_ARGNEG(outdegree, "outdegree", mpi_errno); if (indegree > 0) { MPIR_ERRTEST_ARGNULL(sources, "sources", mpi_errno); if (sourceweights == MPI_UNWEIGHTED && destweights != MPI_UNWEIGHTED) { MPIU_ERR_SET(mpi_errno, MPI_ERR_TOPOLOGY, "**unweightedboth"); goto fn_fail; } /* TODO check ranges for array elements too (**argarrayneg / **rankarray)*/ } if (outdegree > 0) { MPIR_ERRTEST_ARGNULL(destinations, "destinations", mpi_errno); if (destweights == MPI_UNWEIGHTED && sourceweights != MPI_UNWEIGHTED) { MPIU_ERR_SET(mpi_errno, MPI_ERR_TOPOLOGY, "**unweightedboth"); goto fn_fail; } } MPIR_ERRTEST_ARGNULL(comm_dist_graph, "comm_dist_graph", mpi_errno); } MPID_END_ERROR_CHECKS; } # endif /* HAVE_ERROR_CHECKING */ /* ... body of routine ... */ /* Implementation based on Torsten Hoefler's reference implementation * attached to MPI-2.2 ticket #33. */ *comm_dist_graph = MPI_COMM_NULL; /* following the spirit of the old topo interface, attributes do not * propagate to the new communicator (see MPI-2.1 pp. 243 line 11) */ mpi_errno = MPIR_Comm_copy(comm_ptr, comm_ptr->local_size, &comm_dist_graph_ptr); if (mpi_errno) MPIU_ERR_POP(mpi_errno); /* Create the topology structure */ MPIU_CHKPMEM_MALLOC(topo_ptr, MPIR_Topology *, sizeof(MPIR_Topology), mpi_errno, "topo_ptr"); topo_ptr->kind = MPI_DIST_GRAPH; dist_graph_ptr = &topo_ptr->topo.dist_graph; dist_graph_ptr->indegree = indegree; dist_graph_ptr->in = NULL; dist_graph_ptr->in_weights = NULL; dist_graph_ptr->outdegree = outdegree; dist_graph_ptr->out = NULL; dist_graph_ptr->out_weights = NULL; dist_graph_ptr->is_weighted = (sourceweights != MPI_UNWEIGHTED); MPIU_CHKPMEM_MALLOC(dist_graph_ptr->in, int *, indegree*sizeof(int), mpi_errno, "dist_graph_ptr->in"); MPIU_CHKPMEM_MALLOC(dist_graph_ptr->out, int *, outdegree*sizeof(int), mpi_errno, "dist_graph_ptr->out"); MPIU_Memcpy(dist_graph_ptr->in, sources, indegree*sizeof(int)); MPIU_Memcpy(dist_graph_ptr->out, destinations, outdegree*sizeof(int)); if (dist_graph_ptr->is_weighted) { MPIU_CHKPMEM_MALLOC(dist_graph_ptr->in_weights, int *, indegree*sizeof(int), mpi_errno, "dist_graph_ptr->in_weights"); MPIU_CHKPMEM_MALLOC(dist_graph_ptr->out_weights, int *, outdegree*sizeof(int), mpi_errno, "dist_graph_ptr->out_weights"); MPIU_Memcpy(dist_graph_ptr->in_weights, sourceweights, indegree*sizeof(int)); MPIU_Memcpy(dist_graph_ptr->out_weights, destweights, outdegree*sizeof(int)); }
static int GetSockInterfaceAddr(int myRank, char *ifname, int maxIfname, MPIDU_Sock_ifaddr_t *ifaddr) { const char *ifname_string; int mpi_errno = MPI_SUCCESS; int ifaddrFound = 0; MPIU_Assert(maxIfname); ifname[0] = '\0'; MPIR_ERR_CHKANDJUMP(MPIR_CVAR_CH3_INTERFACE_HOSTNAME && MPIR_CVAR_NEMESIS_TCP_NETWORK_IFACE, mpi_errno, MPI_ERR_OTHER, "**ifname_and_hostname"); /* Set "not found" for ifaddr */ ifaddr->len = 0; /* Check if user specified ethernet interface name, e.g., ib0, eth1 */ if (MPIR_CVAR_NEMESIS_TCP_NETWORK_IFACE) { int len; mpi_errno = MPIDI_Get_IP_for_iface(MPIR_CVAR_NEMESIS_TCP_NETWORK_IFACE, ifaddr, &ifaddrFound); MPIR_ERR_CHKANDJUMP1(mpi_errno || !ifaddrFound, mpi_errno, MPI_ERR_OTHER, "**iface_notfound", "**iface_notfound %s", MPIR_CVAR_NEMESIS_TCP_NETWORK_IFACE); MPIU_DBG_MSG_FMT(CH3_CONNECT, VERBOSE, (MPIU_DBG_FDEST, "ifaddrFound=TRUE ifaddr->type=%d ifaddr->len=%d ifaddr->ifaddr[0-3]=%d.%d.%d.%d", ifaddr->type, ifaddr->len, ifaddr->ifaddr[0], ifaddr->ifaddr[1], ifaddr->ifaddr[2], ifaddr->ifaddr[3])); /* In this case, ifname is only used for debugging purposes */ mpi_errno = MPID_Get_processor_name(ifname, maxIfname, &len ); if (mpi_errno) MPIR_ERR_POP(mpi_errno); goto fn_exit; } /* Check for a host name supplied through an environment variable */ ifname_string = MPIR_CVAR_CH3_INTERFACE_HOSTNAME; if (!ifname_string) { /* See if there is a per-process name for the interfaces (e.g., the process manager only delievers the same values for the environment to each process. There's no way to do this with the param interface, so we need to use getenv() here. */ char namebuf[1024]; MPL_snprintf( namebuf, sizeof(namebuf), "MPICH_INTERFACE_HOSTNAME_R%d", myRank ); ifname_string = getenv( namebuf ); if (DBG_IFNAME && ifname_string) { fprintf( stdout, "Found interface name %s from %s\n", ifname_string, namebuf ); fflush( stdout ); } } else if (DBG_IFNAME) { fprintf( stdout, "Found interface name %s from MPICH_INTERFACE_HOSTNAME\n", ifname_string ); fflush( stdout ); } if (!ifname_string) { int len; /* User did not specify a hostname. Look it up. */ mpi_errno = MPID_Get_processor_name(ifname, maxIfname, &len ); if (mpi_errno) MPIR_ERR_POP(mpi_errno); ifname_string = ifname; /* If we didn't find a specific name, then try to get an IP address directly from the available interfaces, if that is supported on this platform. Otherwise, we'll drop into the next step that uses the ifname */ mpi_errno = MPIDI_GetIPInterface( ifaddr, &ifaddrFound ); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } else { /* Copy this name into the output name */ MPIU_Strncpy( ifname, ifname_string, maxIfname ); } /* If we don't have an IP address, try to get it from the name */ if (!ifaddrFound) { int i; struct hostent *info = NULL; for (i = 0; i < MPIR_CVAR_NEMESIS_TCP_HOST_LOOKUP_RETRIES; ++i) { info = gethostbyname( ifname_string ); if (info || h_errno != TRY_AGAIN) break; } MPIR_ERR_CHKANDJUMP2(!info || !info->h_addr_list, mpi_errno, MPI_ERR_OTHER, "**gethostbyname", "**gethostbyname %s %d", ifname_string, h_errno); /* Use the primary address */ ifaddr->len = info->h_length; ifaddr->type = info->h_addrtype; if (ifaddr->len > sizeof(ifaddr->ifaddr)) { /* If the address won't fit in the field, reset to no address */ ifaddr->len = 0; ifaddr->type = -1; MPIR_ERR_INTERNAL(mpi_errno, "Address too long to fit in field"); } else { MPIU_Memcpy( ifaddr->ifaddr, info->h_addr_list[0], ifaddr->len ); } } fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPIR_Bsend_isend(const void *buf, int count, MPI_Datatype dtype, int dest, int tag, MPID_Comm *comm_ptr, MPIR_Bsend_kind_t kind, MPID_Request **request ) { int mpi_errno = MPI_SUCCESS; MPIR_Bsend_data_t *p; MPIR_Bsend_msg_t *msg; int packsize, pass; /* Find a free segment and copy the data into it. If we could have, we would already have used tBsend to send the message with no copying. We may want to decide here whether we need to pack at all or if we can just use (a MPIU_Memcpy) of the buffer. */ /* We check the active buffer first. This helps avoid storage fragmentation */ mpi_errno = MPIR_Bsend_check_active(); if (mpi_errno) MPIU_ERR_POP(mpi_errno); if (dtype != MPI_PACKED) MPIR_Pack_size_impl( count, dtype, &packsize ); else packsize = count; MPIU_DBG_MSG_D(BSEND,TYPICAL,"looking for buffer of size %d", packsize); /* * Use two passes. Each pass is the same; between the two passes, * attempt to complete any active requests, and start any pending * ones. If the message can be initiated in the first pass, * do not perform the second pass. */ for (pass = 0; pass < 2; pass++) { p = MPIR_Bsend_find_buffer( packsize ); if (p) { MPIU_DBG_MSG_FMT(BSEND,TYPICAL,(MPIU_DBG_FDEST, "found buffer of size %d with address %p",packsize,p)); /* Found a segment */ msg = &p->msg; /* Pack the data into the buffer */ /* We may want to optimize for the special case of either primative or contiguous types, and just use MPIU_Memcpy and the provided datatype */ msg->count = 0; if (dtype != MPI_PACKED) { mpi_errno = MPIR_Pack_impl( buf, count, dtype, p->msg.msgbuf, packsize, &p->msg.count); if (mpi_errno) MPIU_ERR_POP(mpi_errno); } else { MPIU_Memcpy(p->msg.msgbuf, buf, count); p->msg.count = count; } /* Try to send the message. We must use MPID_Isend because this call must not block */ mpi_errno = MPID_Isend(msg->msgbuf, msg->count, MPI_PACKED, dest, tag, comm_ptr, MPID_CONTEXT_INTRA_PT2PT, &p->request ); MPIU_ERR_CHKINTERNAL(mpi_errno, mpi_errno, "Bsend internal error: isend returned err"); /* If the error is "request not available", we should put this on the pending list. This will depend on how we signal failure to send. */ if (p->request) { MPIU_DBG_MSG_FMT(BSEND,TYPICAL, (MPIU_DBG_FDEST,"saving request %p in %p",p->request,p)); /* An optimization is to check to see if the data has already been sent. The original code to do this was commented out and probably did not match the current request internals */ MPIR_Bsend_take_buffer( p, p->msg.count ); p->kind = kind; *request = p->request; } break; } /* If we found a buffer or we're in the seccond pass, then break. Note that the test on phere is redundant, as the code breaks out of the loop in the test above if a block p is found. */ if (p || pass == 1) break; MPIU_DBG_MSG(BSEND,TYPICAL,"Could not find storage, checking active"); /* Try to complete some pending bsends */ MPIR_Bsend_check_active( ); /* Give priority to any pending operations */ MPIR_Bsend_retry_pending( ); } if (!p) { /* Return error for no buffer space found */ /* Generate a traceback of the allocated space, explaining why packsize could not be found */ MPIU_DBG_MSG(BSEND,TYPICAL,"Could not find space; dumping arena" ); MPIU_DBG_STMT(BSEND,TYPICAL,MPIR_Bsend_dump()); MPIU_ERR_SETANDJUMP2(mpi_errno, MPI_ERR_BUFFER, "**bufbsend", "**bufbsend %d %d", packsize, BsendBuffer.buffer_size); } fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
/* create a request */ MPIDI_Request_create_sreq(sreq, mpi_errno, goto fn_exit); MPIU_Assert(sreq != NULL); MPIDI_Request_set_type(sreq, MPIDI_REQUEST_TYPE_SEND); MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Request_set_seqnum(sreq, seqnum); if (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) { MPID_Datatype_get_ptr(datatype, sreq->dev.datatype_ptr); MPID_Datatype_add_ref(sreq->dev.datatype_ptr); } sreq->partner_request = NULL; sreq->dev.OnDataAvail = NULL; sreq->dev.tmpbuf = NULL; sreq->ch.vc = vc; sreq->ch.noncontig = FALSE; _dbg_mxm_output(5, "isSend ========> Sending USER msg for req %p (context %d to %d tag %d size %d) \n", sreq, comm->context_id + context_offset, rank, tag, data_sz); vc_area = VC_BASE(vc); req_area = REQ_BASE(sreq); req_area-> ctx = sreq; req_area->iov_buf = req_area->tmp_buf; req_area->iov_count = 0; req_area->iov_buf[0].ptr = NULL; req_area->iov_buf[0].length = 0; if (data_sz) { if (dt_contig) { req_area->iov_count = 1; req_area->iov_buf[0].ptr = (char *) (buf) + dt_true_lb; req_area->iov_buf[0].length = data_sz; } else { MPIDI_msg_sz_t last; MPI_Aint packsize = 0; sreq->ch.noncontig = TRUE; sreq->dev.segment_ptr = MPID_Segment_alloc(); MPIU_ERR_CHKANDJUMP1((sreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc"); MPIR_Pack_size_impl(count, datatype, &packsize); last = data_sz; if (packsize > 0) { sreq->dev.tmpbuf = MPIU_Malloc((size_t) packsize); MPIU_Assert(sreq->dev.tmpbuf); MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0); MPID_Segment_pack(sreq->dev.segment_ptr, 0, &last, sreq->dev.tmpbuf); req_area->iov_count = 1; req_area->iov_buf[0].ptr = sreq->dev.tmpbuf; req_area->iov_buf[0].length = last; } } } vc_area->pending_sends += 1; mpi_errno = _mxm_isend(vc_area->mxm_ep, req_area, MXM_MPICH_ISEND_SYNC, (mxm_mq_h) comm->dev.ch.netmod_priv, comm->rank, tag, _mxm_tag_mpi2mxm(tag, comm->context_id + context_offset), 0); if (mpi_errno) MPIU_ERR_POP(mpi_errno); _dbg_mxm_out_req(sreq); fn_exit: *sreq_ptr = sreq; MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_MXM_ISSEND); return mpi_errno; fn_fail: goto fn_exit; } static int _mxm_handle_sreq(MPID_Request * req) { int complete = FALSE; int (*reqFn) (MPIDI_VC_t *, MPID_Request *, int *); MPID_nem_mxm_vc_area *vc_area = NULL; MPID_nem_mxm_req_area *req_area = NULL; vc_area = VC_BASE(req->ch.vc); req_area = REQ_BASE(req); _dbg_mxm_out_buf(req_area->iov_buf[0].ptr, (req_area->iov_buf[0].length > 16 ? 16 : req_area->iov_buf[0].length)); vc_area->pending_sends -= 1; if (((req->dev.datatype_ptr != NULL) && (req->dev.tmpbuf != NULL))) { MPIU_Free(req->dev.tmpbuf); } if (req_area->iov_count > MXM_MPICH_MAX_IOV) { MPIU_Free(req_area->iov_buf); req_area->iov_buf = req_area->tmp_buf; req_area->iov_count = 0; } reqFn = req->dev.OnDataAvail; if (!reqFn) { MPIDI_CH3U_Request_complete(req); MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete"); } else { MPIDI_VC_t *vc = req->ch.vc; reqFn(vc, req, &complete); if (!complete) { MPIU_Assert(complete == TRUE); } } return complete; } static void _mxm_send_completion_cb(void *context) { MPID_Request *req = (MPID_Request *) context; MPID_nem_mxm_vc_area *vc_area = NULL; MPID_nem_mxm_req_area *req_area = NULL; MPIU_Assert(req); _dbg_mxm_out_req(req); vc_area = VC_BASE(req->ch.vc); req_area = REQ_BASE(req); _mxm_to_mpi_status(req_area->mxm_req->item.base.error, &req->status); list_enqueue(&vc_area->mxm_ep->free_queue, &req_area->mxm_req->queue); _dbg_mxm_output(5, "========> %s SEND req %p status %d\n", (MPIR_STATUS_GET_CANCEL_BIT(req->status) ? "Canceling" : "Completing"), req, req->status.MPI_ERROR); if (likely(!MPIR_STATUS_GET_CANCEL_BIT(req->status))) { _mxm_handle_sreq(req); } } static int _mxm_isend(MPID_nem_mxm_ep_t * ep, MPID_nem_mxm_req_area * req, int type, mxm_mq_h mxm_mq, int mxm_rank, int id, mxm_tag_t mxm_tag, int block) { int mpi_errno = MPI_SUCCESS; mxm_error_t ret = MXM_OK; mxm_send_req_t *mxm_sreq; list_head_t *free_queue = NULL; MPIU_Assert(ep); MPIU_Assert(req); free_queue = &ep->free_queue; req->mxm_req = list_dequeue_mxm_req(free_queue); if (!req->mxm_req) { list_grow_mxm_req(free_queue); req->mxm_req = list_dequeue_mxm_req(free_queue); if (!req->mxm_req) { MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, "empty free queue"); mpi_errno = MPI_ERR_OTHER; goto fn_fail; } } mxm_sreq = &(req->mxm_req->item.send); mxm_sreq->base.state = MXM_REQ_NEW; mxm_sreq->base.mq = mxm_mq; mxm_sreq->base.conn = ep->mxm_conn; mxm_sreq->base.completed_cb = _mxm_send_completion_cb; mxm_sreq->base.context = req->ctx; if (type == MXM_MPICH_ISEND_AM) { mxm_sreq->opcode = MXM_REQ_OP_AM; mxm_sreq->flags = 0; mxm_sreq->op.am.hid = id; mxm_sreq->op.am.imm_data = mxm_rank; } else if (type == MXM_MPICH_ISEND_SYNC) { mxm_sreq->opcode = MXM_REQ_OP_SEND_SYNC; mxm_sreq->flags = 0; mxm_sreq->op.send.tag = mxm_tag; mxm_sreq->op.send.imm_data = mxm_rank; } else { mxm_sreq->opcode = MXM_REQ_OP_SEND; mxm_sreq->flags = 0; mxm_sreq->op.send.tag = mxm_tag; mxm_sreq->op.send.imm_data = mxm_rank; } if (likely(req->iov_count == 1)) { mxm_sreq->base.data_type = MXM_REQ_DATA_BUFFER; mxm_sreq->base.data.buffer.ptr = req->iov_buf[0].ptr; mxm_sreq->base.data.buffer.length = req->iov_buf[0].length; } else { mxm_sreq->base.data_type = MXM_REQ_DATA_IOV; mxm_sreq->base.data.iov.vector = req->iov_buf; mxm_sreq->base.data.iov.count = req->iov_count; } ret = mxm_req_send(mxm_sreq); if (MXM_OK != ret) { list_enqueue(free_queue, &req->mxm_req->queue); mpi_errno = MPI_ERR_OTHER; goto fn_fail; } if (block) _mxm_req_wait(&mxm_sreq->base); fn_exit: return mpi_errno; fn_fail: goto fn_exit; } #if 0 /* Consider using this function in case non contiguous data */ static int _mxm_process_sdtype(MPID_Request ** sreq_p, MPI_Datatype datatype, MPID_Datatype * dt_ptr, MPIDI_msg_sz_t data_sz, const void *buf, int count, mxm_req_buffer_t ** iov_buf, int *iov_count) { int mpi_errno = MPI_SUCCESS; MPID_Request *sreq = *sreq_p; MPIDI_msg_sz_t last; MPID_IOV *iov; int n_iov = 0; int index; int size_to_copy = 0; sreq->dev.segment_ptr = MPID_Segment_alloc(); MPIU_ERR_CHKANDJUMP1((sreq->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment_alloc"); MPID_Segment_init(buf, count, datatype, sreq->dev.segment_ptr, 0); sreq->dev.segment_first = 0; sreq->dev.segment_size = data_sz; last = sreq->dev.segment_size; MPID_Segment_count_contig_blocks(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, (MPI_Aint *) & n_iov); MPIU_Assert(n_iov > 0); iov = MPIU_Malloc(n_iov * sizeof(*iov)); MPIU_Assert(iov); last = sreq->dev.segment_size; MPID_Segment_pack_vector(sreq->dev.segment_ptr, sreq->dev.segment_first, &last, iov, &n_iov); MPIU_Assert(last == sreq->dev.segment_size); #if defined(MXM_DEBUG) && (MXM_DEBUG > 0) _dbg_mxm_output(7, "Send Noncontiguous data vector %i entries (free slots : %i)\n", n_iov, MXM_REQ_DATA_MAX_IOV); for(index = 0; index < n_iov; index++) { _dbg_mxm_output(7, "======= Recv iov[%i] = ptr : %p, len : %i \n", index, iov[index].MPID_IOV_BUF, iov[index].MPID_IOV_LEN); } #endif if (n_iov > MXM_MPICH_MAX_IOV) { *iov_buf = (mxm_req_buffer_t *) MPIU_Malloc(n_iov * sizeof(**iov_buf)); MPIU_Assert(*iov_buf); } for (index = 0; index < n_iov; index++) { if (index < (MXM_REQ_DATA_MAX_IOV - 1)) { (*iov_buf)[index].ptr = iov[index].MPID_IOV_BUF; (*iov_buf)[index].length = iov[index].MPID_IOV_LEN; } else { size_to_copy += iov[index].MPID_IOV_LEN; } } if (size_to_copy == 0) { sreq->dev.tmpbuf = NULL; sreq->dev.tmpbuf_sz = 0; *iov_count = n_iov; } else { int offset = 0; sreq->dev.tmpbuf = MPIU_Malloc(size_to_copy); sreq->dev.tmpbuf_sz = size_to_copy; MPIU_Assert(sreq->dev.tmpbuf); for (index = (MXM_REQ_DATA_MAX_IOV - 1); index < n_iov; index++) { MPIU_Memcpy((char *) (sreq->dev.tmpbuf) + offset, iov[index].MPID_IOV_BUF, iov[index].MPID_IOV_LEN); offset += iov[index].MPID_IOV_LEN; } (*iov_buf)[MXM_REQ_DATA_MAX_IOV - 1].ptr = sreq->dev.tmpbuf; (*iov_buf)[MXM_REQ_DATA_MAX_IOV - 1].length = size_to_copy; *iov_count = MXM_REQ_DATA_MAX_IOV; } MPIU_Free(iov); fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPIR_Pack_impl(const void *inbuf, int incount, MPI_Datatype datatype, void *outbuf, MPI_Aint outsize, MPI_Aint *position) { int mpi_errno = MPI_SUCCESS; MPI_Aint first, last; MPID_Segment *segp; int contig; MPI_Aint dt_true_lb; MPI_Aint data_sz; if (incount == 0) { goto fn_exit; } /* Handle contig case quickly */ if (HANDLE_GET_KIND(datatype) == HANDLE_KIND_BUILTIN) { contig = TRUE; dt_true_lb = 0; data_sz = incount * MPID_Datatype_get_basic_size(datatype); } else { MPID_Datatype *dt_ptr; MPID_Datatype_get_ptr(datatype, dt_ptr); contig = dt_ptr->is_contig; dt_true_lb = dt_ptr->true_lb; data_sz = incount * dt_ptr->size; } if (contig) { MPIU_Memcpy((char *) outbuf + *position, (char *)inbuf + dt_true_lb, data_sz); *position = (int)((MPI_Aint)*position + data_sz); goto fn_exit; } /* non-contig case */ /* TODO: CHECK RETURN VALUES?? */ /* TODO: SHOULD THIS ALL BE IN A MPID_PACK??? */ segp = MPID_Segment_alloc(); MPIU_ERR_CHKANDJUMP1(segp == NULL, mpi_errno, MPI_ERR_OTHER, "**nomem", "**nomem %s", "MPID_Segment"); mpi_errno = MPID_Segment_init(inbuf, incount, datatype, segp, 0); if (mpi_errno) MPIU_ERR_POP(mpi_errno); /* NOTE: the use of buffer values and positions in MPI_Pack and in * MPID_Segment_pack are quite different. See code or docs or something. */ first = 0; last = SEGMENT_IGNORE_LAST; /* Ensure that pointer increment fits in a pointer */ MPID_Ensure_Aint_fits_in_pointer((MPI_VOID_PTR_CAST_TO_MPI_AINT outbuf) + (MPI_Aint) *position); MPID_Segment_pack(segp, first, &last, (void *) ((char *) outbuf + *position)); /* Ensure that calculation fits into an int datatype. */ MPID_Ensure_Aint_fits_in_int((MPI_Aint)*position + last); *position = (int)((MPI_Aint)*position + last); MPID_Segment_free(segp); fn_exit: return mpi_errno; fn_fail: goto fn_exit; }
int MPIDI_CH3_Packetized_send(MPIDI_VC_t * vc, MPID_Request * sreq) { MPIDI_CH3_Pkt_packetized_send_start_t send_start; MPIDI_CH3_Pkt_packetized_send_data_t pkt_head; vbuf *buf; int mpi_errno = MPI_SUCCESS; int n_iov; int msg_buffered = 0; int nb; int complete; int pkt_len; int seqnum; MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_SENDV); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_SENDV); MPIU_DBG_PRINTF(("ch3_isendv\n")); MPIDI_DBG_PRINTF((50, FCNAME, "entering")); MPIDI_Pkt_init(&send_start, MPIDI_CH3_PKT_PACKETIZED_SEND_START); iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_Pkt_packetized_send_start_t); iov[0].MPID_IOV_BUF = (void*) &send_start; MPIU_Memcpy(&iov[1], sreq->dev.iov, sreq->dev.iov_count * sizeof(MPID_IOV)); n_iov = 1 + sreq->dev.iov_count; GET_SEQ_NUM(sreq->dev.iov[0].MPID_IOV_BUF, seqnum); if (-1 == seqnum) { MPIDI_VC_FAI_send_seqnum(vc, seqnum); } MPIDI_Pkt_set_seqnum(&send_start, seqnum); MPIDI_Request_set_seqnum(sreq, seqnum); send_start.origin_head_size = sreq->dev.iov[0].MPID_IOV_LEN; Calculate_IOV_len(iov, n_iov, pkt_len); mpi_errno = MPIDI_CH3I_MRAILI_Eager_send(vc, iov, n_iov, pkt_len, &nb, &buf); DEBUG_PRINT("[pkt send] mpierr %d, nb %d\n", mpi_errno, nb); if (MPI_SUCCESS != mpi_errno && MPI_MRAIL_MSG_QUEUED != mpi_errno) { vc->ch.state = MPIDI_CH3I_VC_STATE_FAILED; sreq->status.MPI_ERROR = MPI_ERR_INTERN; MPIDI_CH3U_Request_complete(sreq); goto fn_exit; } else if (MPI_MRAIL_MSG_QUEUED == mpi_errno) { msg_buffered = 1; } nb -= sizeof(MPIDI_CH3_Pkt_packetized_send_start_t); MPIDI_Pkt_init(&pkt_head, MPIDI_CH3_PKT_PACKETIZED_SEND_DATA); iov[0].MPID_IOV_LEN = sizeof(MPIDI_CH3_Pkt_packetized_send_data_t); iov[0].MPID_IOV_BUF = (void*) &pkt_head; do { while (!MPIDI_CH3I_Request_adjust_iov(sreq, nb)) { MPIDI_VC_FAI_send_seqnum(vc, seqnum); MPIDI_Pkt_set_seqnum(&pkt_head, seqnum); MPIDI_Request_set_seqnum(sreq, seqnum); MPIU_Memcpy((void *) &iov[1], &sreq->dev.iov[sreq->dev.iov_offset], (sreq->dev.iov_count - sreq->dev.iov_offset) * sizeof(MPID_IOV)); n_iov = sreq->dev.iov_count - sreq->dev.iov_offset + 1; Calculate_IOV_len(iov, n_iov, pkt_len); mpi_errno = MPIDI_CH3I_MRAILI_Eager_send(vc, iov, n_iov, pkt_len, &nb, &buf); DEBUG_PRINT("[istartmsgv] mpierr %d, nb %d\n", mpi_errno, nb); MPIU_Assert(NULL == buf->sreq); if (MPI_SUCCESS != mpi_errno && MPI_MRAIL_MSG_QUEUED != mpi_errno) { vc->ch.state = MPIDI_CH3I_VC_STATE_FAILED; sreq->status.MPI_ERROR = MPI_ERR_INTERN; MPIDI_CH3U_Request_complete(sreq); goto fn_exit; } else if (MPI_MRAIL_MSG_QUEUED == mpi_errno) { msg_buffered = 1; } nb -= sizeof(MPIDI_CH3_Pkt_packetized_send_data_t); } if (sreq->dev.OnDataAvail == MPIDI_CH3_ReqHandler_SendReloadIOV) { MPIDI_CH3U_Handle_send_req(vc, sreq, &complete); nb = 0; complete = 0; } else { complete = 1; } } while (!complete); if (msg_buffered) { mpi_errno = MPI_MRAIL_MSG_QUEUED; buf->sreq = (void *) sreq; } else { MPIDI_CH3U_Handle_send_req(vc, sreq, &complete); } fn_exit: MPIDI_DBG_PRINTF((50, FCNAME, "exiting")); MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_SENDV); return mpi_errno; }
static int MPIDI_CH3U_GetIPInterface( MPIDU_Sock_ifaddr_t *ifaddr, int *found ) { char *buf_ptr, *ptr; int buf_len, buf_len_prev; int fd; MPIDU_Sock_ifaddr_t myifaddr; int nfound = 0, foundLocalhost = 0; /* We predefine the LSB and MSB localhost addresses */ unsigned int localhost = 0x0100007f; #ifdef WORDS_BIGENDIAN unsigned int MSBlocalhost = 0x7f000001; #endif if (dbg_ifname < 0) { int rc; rc = MPL_env2bool( "MPICH_DBG_IFNAME", &dbg_ifname ); if (rc != 1) dbg_ifname = 0; } fd = socket(AF_INET, SOCK_DGRAM, 0); if (fd < 0) { fprintf( stderr, "Unable to open an AF_INET socket\n" ); return 1; } /* Use MSB localhost if necessary */ #ifdef WORDS_BIGENDIAN localhost = MSBlocalhost; #endif /* * Obtain the interface information from the operating system * * Note: much of this code is borrowed from W. Richard Stevens' book * entitled "UNIX Network Programming", Volume 1, Second Edition. See * section 16.6 for details. */ buf_len = NUM_IFREQS * sizeof(struct ifreq); buf_len_prev = 0; for(;;) { struct ifconf ifconf; int rc; buf_ptr = (char *) MPIU_Malloc(buf_len); if (buf_ptr == NULL) { fprintf( stderr, "Unable to allocate %d bytes\n", buf_len ); return 1; } ifconf.ifc_buf = buf_ptr; ifconf.ifc_len = buf_len; rc = ioctl(fd, SIOCGIFCONF, &ifconf); if (rc < 0) { if (errno != EINVAL || buf_len_prev != 0) { fprintf( stderr, "Error from ioctl = %d\n", errno ); perror(" Error is: "); return 1; } } else { if (ifconf.ifc_len == buf_len_prev) { buf_len = ifconf.ifc_len; break; } buf_len_prev = ifconf.ifc_len; } MPIU_Free(buf_ptr); buf_len += NUM_IFREQS * sizeof(struct ifreq); } /* * Now that we've got the interface information, we need to run through * the interfaces and check out the ip addresses. If we find a * unique, non-lcoal host (127.0.0.1) address, return that, otherwise * return nothing. */ ptr = buf_ptr; while(ptr < buf_ptr + buf_len) { struct ifreq * ifreq; ifreq = (struct ifreq *) ptr; if (dbg_ifname) { fprintf( stdout, "%10s\t", ifreq->ifr_name ); fflush(stdout); } if (ifreq->ifr_addr.sa_family == AF_INET) { struct in_addr addr; addr = ((struct sockaddr_in *) &(ifreq->ifr_addr))->sin_addr; if (dbg_ifname) { fprintf( stdout, "IPv4 address = %08x (%s)\n", addr.s_addr, inet_ntoa( addr ) ); } if (addr.s_addr == localhost && dbg_ifname) { fprintf( stdout, "Found local host\n" ); } /* Save localhost if we find it. Let any new interface overwrite localhost. However, if we find more than one non-localhost interface, then we'll choose none for the interfaces */ if (addr.s_addr == localhost) { foundLocalhost = 1; if (nfound == 0) { myifaddr.type = AF_INET; myifaddr.len = 4; MPIU_Memcpy( myifaddr.ifaddr, &addr.s_addr, 4 ); } } else { nfound++; myifaddr.type = AF_INET; myifaddr.len = 4; MPIU_Memcpy( myifaddr.ifaddr, &addr.s_addr, 4 ); } } else { if (dbg_ifname) { fprintf( stdout, "\n" ); } } /* * Increment pointer to the next ifreq; some adjustment may be * required if the address is an IPv6 address */ /* This is needed for MAX OSX */ #ifdef _SIZEOF_ADDR_IFREQ ptr += _SIZEOF_ADDR_IFREQ(*ifreq); #else ptr += sizeof(struct ifreq); # if defined(AF_INET6) { if (ifreq->ifr_addr.sa_family == AF_INET6) { ptr += sizeof(struct sockaddr_in6) - sizeof(struct sockaddr); } } # endif #endif } MPIU_Free(buf_ptr); close(fd); /* If we found a unique address, use that */ if (nfound == 1 || (nfound == 0 && foundLocalhost == 1)) { *ifaddr = myifaddr; *found = 1; } else { *found = 0; } return 0; }