int MPID_nem_lmt_vmsplice_initiate_lmt(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPID_Request *sreq) { int mpi_errno = MPI_SUCCESS; MPID_nem_pkt_lmt_rts_t * const rts_pkt = (MPID_nem_pkt_lmt_rts_t *)pkt; MPIDI_CH3I_VC *vc_ch = &vc->ch; int complete = 0; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_VMSPLICE_INITIATE_LMT); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_VMSPLICE_INITIATE_LMT); /* re-use the same pipe per-pair,per-sender */ if (vc_ch->lmt_copy_buf_handle == NULL) { int err; char *pipe_name; MPIDI_CH3I_VC *vc_ch = &vc->ch; pipe_name = tempnam(NULL, "lmt_"); MPIR_ERR_CHKANDJUMP2(!pipe_name, mpi_errno, MPI_ERR_OTHER, "**tempnam", "**tempnam %d %s", errno, MPIU_Strerror(errno)); vc_ch->lmt_copy_buf_handle = MPL_strdup(pipe_name); /* XXX DJG hack */ #undef free free(pipe_name); err = mkfifo(vc_ch->lmt_copy_buf_handle, 0660); MPIR_ERR_CHKANDJUMP2(err < 0, mpi_errno, MPI_ERR_OTHER, "**mkfifo", "**mkfifo %d %s", errno, MPIU_Strerror(errno)); } /* can't start sending data yet, need full RTS/CTS handshake */ MPID_nem_lmt_send_RTS(vc, rts_pkt, vc_ch->lmt_copy_buf_handle, strlen(vc_ch->lmt_copy_buf_handle)+1); fn_fail: fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_VMSPLICE_INITIATE_LMT); return mpi_errno; }
int MPID_nem_lmt_dma_initiate_lmt(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, MPID_Request *sreq) { int mpi_errno = MPI_SUCCESS; MPID_nem_pkt_lmt_rts_t * const rts_pkt = (MPID_nem_pkt_lmt_rts_t *)pkt; MPIU_CHKPMEM_DECL(1); MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_INITIATE_LMT); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_INITIATE_LMT); MPIU_CHKPMEM_MALLOC(sreq->ch.s_cookie, knem_cookie_t *, sizeof(knem_cookie_t), mpi_errno, "s_cookie"); mpi_errno = send_sreq_data(vc, sreq, sreq->ch.s_cookie); if (mpi_errno) MPIU_ERR_POP(mpi_errno); MPID_nem_lmt_send_RTS(vc, rts_pkt, sreq->ch.s_cookie, sizeof(knem_cookie_t)); fn_exit: MPIU_CHKPMEM_COMMIT(); MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_INITIATE_LMT); return mpi_errno; fn_fail: MPIU_CHKPMEM_REAP(); goto fn_exit; }
int MPID_nem_ib_lmt_initiate_lmt(struct MPIDI_VC *vc, union MPIDI_CH3_Pkt *rts_pkt, struct MPID_Request *req) { int mpi_errno = MPI_SUCCESS; int dt_contig; MPIDI_msg_sz_t data_sz; MPID_Datatype *dt_ptr; MPI_Aint dt_true_lb; #if 0 MPID_nem_ib_vc_area *vc_ib = VC_IB(vc); #endif MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_IB_LMT_INITIATE_LMT); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_IB_LMT_INITIATE_LMT); dprintf("lmt_initiate_lmt,enter,%d->%d,req=%p\n", MPID_nem_ib_myrank, vc->pg_rank, req); /* obtain dt_true_lb */ /* see MPIDI_Datatype_get_info(in, in, out, out, out, out) (in src/mpid/ch3/include/mpidimpl.h) */ MPIDI_Datatype_get_info(req->dev.user_count, req->dev.datatype, dt_contig, data_sz, dt_ptr, dt_true_lb); /* FIXME: who frees s_cookie_buf? */ /* malloc memory area for cookie. auto variable is NG because isend does not copy payload */ MPID_nem_ib_lmt_cookie_t *s_cookie_buf = (MPID_nem_ib_lmt_cookie_t *) MPIU_Malloc(sizeof(MPID_nem_ib_lmt_cookie_t)); /* remember address to "free" when receiving DONE from receiver */ req->ch.s_cookie = s_cookie_buf; /* see MPIDI_CH3_PktHandler_RndvClrToSend (in src/mpid/ch3/src/ch3u_rndv.c) */ //assert(dt_true_lb == 0); void *write_from_buf; if (dt_contig) { write_from_buf = (void *) ((char *) req->dev.user_buf + dt_true_lb); } else { /* see MPIDI_CH3_EagerNoncontigSend (in ch3u_eager.c) */ req->dev.segment_ptr = MPID_Segment_alloc(); MPIU_ERR_CHKANDJUMP((req->dev.segment_ptr == NULL), mpi_errno, MPI_ERR_OTHER, "**outofmemory"); MPID_Segment_init(req->dev.user_buf, req->dev.user_count, req->dev.datatype, req->dev.segment_ptr, 0); req->dev.segment_first = 0; req->dev.segment_size = data_sz; MPIDI_msg_sz_t last; last = req->dev.segment_size; /* segment_size is byte offset */ MPIU_Assert(last > 0); REQ_FIELD(req, lmt_pack_buf) = MPIU_Malloc((size_t) req->dev.segment_size); MPIU_ERR_CHKANDJUMP(!REQ_FIELD(req, lmt_pack_buf), mpi_errno, MPI_ERR_OTHER, "**outofmemory"); MPID_Segment_pack(req->dev.segment_ptr, req->dev.segment_first, &last, (char *) (REQ_FIELD(req, lmt_pack_buf))); MPIU_Assert(last == req->dev.segment_size); write_from_buf = REQ_FIELD(req, lmt_pack_buf); } dprintf ("lmt_initate_lmt,dt_contig=%d,write_from_buf=%p,req->dev.user_buf=%p,REQ_FIELD(req, lmt_pack_buf)=%p\n", dt_contig, write_from_buf, req->dev.user_buf, REQ_FIELD(req, lmt_pack_buf)); #ifdef HAVE_LIBDCFA #else s_cookie_buf->addr = write_from_buf; #endif /* put sz, see MPID_nem_lmt_RndvSend (in src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c) */ /* TODO remove sz field * pkt_RTS_handler (in src/mpid/ch3/channels/nemesis/src/mpid_nem_lmt.c) * rreq->ch.lmt_data_sz = rts_pkt->data_sz; */ //s_cookie_buf->sz = (uint32_t)((MPID_nem_pkt_lmt_rts_t*)rts_pkt)->data_sz; /* preserve and put tail, because tail magic is written on the tail of payload * because we don't want to add another SGE or RDMA command */ MPIU_Assert(((MPID_nem_pkt_lmt_rts_t *) rts_pkt)->data_sz == data_sz); s_cookie_buf->tail = *((uint8_t *) ((uint8_t *) write_from_buf + data_sz - sizeof(uint8_t))); /* prepare magic */ //*((uint32_t*)(write_from_buf + data_sz - sizeof(tailmagic_t))) = MPID_NEM_IB_COM_MAGIC; #if 0 /* moving to packet header */ /* embed RDMA-write-to buffer occupancy information */ dprintf("lmt_initiate_lmt,rsr_seq_num_tail=%d\n", vc_ib->ibcom->rsr_seq_num_tail); /* embed RDMA-write-to buffer occupancy information */ s_cookie_buf->seq_num_tail = vc_ib->ibcom->rsr_seq_num_tail; /* remember the last one sent */ vc_ib->ibcom->rsr_seq_num_tail_last_sent = vc_ib->ibcom->rsr_seq_num_tail; #endif int post_num; uint32_t max_msg_sz; MPID_nem_ib_vc_area *vc_ib = VC_IB(vc); MPID_nem_ib_com_get_info_conn(vc_ib->sc->fd, MPID_NEM_IB_COM_INFOKEY_PATTR_MAX_MSG_SZ, &max_msg_sz, sizeof(uint32_t)); /* Type of max_msg_sz is uint32_t. */ post_num = (data_sz + (long) max_msg_sz - 1) / (long) max_msg_sz; s_cookie_buf->max_msg_sz = max_msg_sz; s_cookie_buf->seg_seq_num = 1; s_cookie_buf->seg_num = post_num; REQ_FIELD(req, buf.from) = write_from_buf; REQ_FIELD(req, data_sz) = data_sz; REQ_FIELD(req, seg_seq_num) = 1; // only send 1st-segment, even if there are some segments. REQ_FIELD(req, seg_num) = post_num; REQ_FIELD(req, max_msg_sz) = max_msg_sz; long length; if (post_num > 1) { length = max_msg_sz; } else { length = data_sz; } /* put IB rkey */ struct MPID_nem_ib_com_reg_mr_cache_entry_t *mr_cache = MPID_nem_ib_com_reg_mr_fetch(write_from_buf, length, 0, MPID_NEM_IB_COM_REG_MR_GLOBAL); MPIU_ERR_CHKANDJUMP(!mr_cache, mpi_errno, MPI_ERR_OTHER, "**MPID_nem_ib_com_reg_mr_fetch"); struct ibv_mr *mr = mr_cache->mr; REQ_FIELD(req, lmt_mr_cache) = (void *) mr_cache; #ifdef HAVE_LIBDCFA s_cookie_buf->addr = (void *) mr->host_addr; dprintf("lmt_initiate_lmt,s_cookie_buf->addr=%p\n", s_cookie_buf->addr); #endif s_cookie_buf->rkey = mr->rkey; dprintf("lmt_initiate_lmt,tail=%02x,mem-tail=%p,%02x,sz=%ld,raddr=%p,rkey=%08x\n", s_cookie_buf->tail, write_from_buf + data_sz - sizeof(uint8_t), *((uint8_t *) (write_from_buf + data_sz - sizeof(uint8_t))), data_sz, s_cookie_buf->addr, s_cookie_buf->rkey); /* send cookie. rts_pkt as the MPI-header, s_cookie_buf as the payload */ MPID_nem_lmt_send_RTS(vc, (MPID_nem_pkt_lmt_rts_t *) rts_pkt, s_cookie_buf, sizeof(MPID_nem_ib_lmt_cookie_t)); fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_IB_LMT_INITIATE_LMT); return mpi_errno; fn_fail: goto fn_exit; }