static void rxd_handle_connect_ack(struct rxd_ep *ep, struct ofi_ctrl_hdr *ctrl, struct rxd_rx_buf *rx_buf) { struct rxd_peer *peer; struct dlist_entry *match; struct rxd_tx_entry *tx_entry; FI_INFO(&rxd_prov, FI_LOG_EP_CTRL, "connect ack- msg_id: %" PRIu64 ", segno: %d\n", ctrl->msg_id, ctrl->seg_no); match = dlist_find_first_match(&ep->tx_entry_list, rxd_conn_msg_match, ctrl); if (!match) { FI_INFO(&rxd_prov, FI_LOG_EP_CTRL, "no matching connect\n"); goto out; } tx_entry = container_of(match, struct rxd_tx_entry, entry); peer = rxd_ep_getpeer_info(ep, tx_entry->peer); peer->state = CMAP_CONNECTED; peer->conn_data = ctrl->conn_id; dlist_remove(match); rxd_tx_entry_done(ep, tx_entry); out: rxd_ep_repost_buff(rx_buf); }
static void rxd_handle_dup_datastart(struct rxd_ep *ep, struct ofi_ctrl_hdr *ctrl, struct rxd_rx_buf *rx_buf) { struct dlist_entry *item; struct rxd_rx_entry *rx_entry; struct rxd_peer *peer; peer = rxd_ep_getpeer_info(ep, ctrl->conn_id); item = dlist_find_first_match(&ep->rx_entry_list, rxd_rx_entry_match, ctrl); if (!item) { /* for small (1-packet) messages we may have situation * when receiver completed operation and destroyed * rx_entry, but ack is lost (not delivered to sender). * in this case just send ack with zero window to * allow sender complete operation on sender side */ rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, 0, UINT64_MAX, peer->conn_data, ctrl->conn_id); return; } FI_INFO(&rxd_prov, FI_LOG_EP_CTRL, "duplicate start-data: msg_id: %" PRIu64 ", seg_no: %d\n", ctrl->msg_id, ctrl->seg_no); rx_entry = container_of(item, struct rxd_rx_entry, entry); rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, rx_entry->credits, rx_entry->key, peer->conn_data, ctrl->conn_id); return; }
ssize_t rxd_ep_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg, uint64_t flags) { ssize_t ret; uint64_t peer_addr; struct rxd_ep *rxd_ep; struct rxd_peer *peer; struct rxd_tx_entry *tx_entry; rxd_ep = container_of(ep, struct rxd_ep, ep); peer_addr = rxd_av_get_dg_addr(rxd_ep->av, msg->addr); peer = rxd_ep_getpeer_info(rxd_ep, peer_addr); #if ENABLE_DEBUG if (msg->iov_count > RXD_IOV_LIMIT || msg->rma_iov_count > RXD_IOV_LIMIT) return -FI_EINVAL; #endif rxd_ep_lock_if_required(rxd_ep); if (!peer->addr_published) { ret = rxd_ep_post_conn_msg(rxd_ep, peer, peer_addr); ret = (ret) ? ret : -FI_EAGAIN; goto out; } tx_entry = rxd_tx_entry_acquire(rxd_ep, peer); if (!tx_entry) { ret = -FI_EAGAIN; goto out; } dlist_init(&tx_entry->pkt_list); tx_entry->op_type = RXD_TX_READ_REQ; tx_entry->read_req.msg = *msg; tx_entry->flags = flags; tx_entry->peer = peer_addr; rxd_ep_copy_msg_iov(msg->msg_iov, &tx_entry->read_req.dst_iov[0], msg->iov_count); rxd_ep_copy_rma_iov(msg->rma_iov, &tx_entry->read_req.src_iov[0], msg->rma_iov_count); ret = rxd_ep_post_start_msg(rxd_ep, peer, ofi_op_read_req, tx_entry); if (ret) goto err; dlist_insert_tail(&tx_entry->entry, &rxd_ep->tx_entry_list); out: rxd_ep_unlock_if_required(rxd_ep); return ret; err: rxd_tx_entry_release(rxd_ep, tx_entry); goto out; }
void rxd_handle_recv_comp(struct rxd_ep *ep, struct fi_cq_msg_entry *comp) { struct ofi_ctrl_hdr *ctrl; struct rxd_rx_buf *rx_buf; struct rxd_peer *peer; FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "got recv completion\n"); assert(rxd_reposted_bufs); rxd_reposted_bufs--; rx_buf = container_of(comp->op_context, struct rxd_rx_buf, context); ctrl = (struct ofi_ctrl_hdr *) rx_buf->buf; peer = rxd_ep_getpeer_info(ep, ctrl->conn_id); if (ctrl->version != OFI_CTRL_VERSION) { FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "ctrl version mismatch\n"); return; } switch (ctrl->type) { case ofi_ctrl_connreq: rxd_handle_conn_req(ep, ctrl, comp, rx_buf); break; case ofi_ctrl_ack: rxd_handle_ack(ep, ctrl, rx_buf); break; case ofi_ctrl_discard: rxd_handle_discard(ep, ctrl, rx_buf); break; case ofi_ctrl_connresp: rxd_handle_connect_ack(ep, ctrl, rx_buf); break; case ofi_ctrl_start_data: rxd_handle_start_data(ep, peer, ctrl, comp, rx_buf); break; case ofi_ctrl_data: rxd_handle_data(ep, peer, ctrl, comp, rx_buf); break; default: rxd_ep_repost_buff(rx_buf); FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid ctrl type \n", ctrl->type); } rxd_check_waiting_rx(ep); }
ssize_t rxd_ep_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg, uint64_t flags) { struct rxd_ep *rxd_ep; struct rxd_peer *peer; struct rxd_tx_entry *tx_entry; uint64_t peer_addr; ssize_t ret; rxd_ep = container_of(ep, struct rxd_ep, util_ep.ep_fid); peer_addr = rxd_av_dg_addr(rxd_ep_av(rxd_ep), msg->addr); peer = rxd_ep_getpeer_info(rxd_ep, peer_addr); fastlock_acquire(&rxd_ep->lock); if (peer->state != CMAP_CONNECTED) { ret = rxd_ep_connect(rxd_ep, peer, peer_addr); fastlock_release(&rxd_ep->lock); if (ret == -FI_EALREADY) { rxd_ep->util_ep.progress(&rxd_ep->util_ep); ret = -FI_EAGAIN; } return ret ? ret : -FI_EAGAIN; } tx_entry = rxd_tx_entry_alloc(rxd_ep, peer, peer_addr, flags, RXD_TX_READ_REQ); if (!tx_entry) { ret = -FI_EAGAIN; goto out; } tx_entry->read_req.msg = *msg; memcpy(&tx_entry->read_req.dst_iov[0], msg->msg_iov, sizeof(*msg->msg_iov)* msg->iov_count); memcpy(&tx_entry->read_req.src_iov[0], msg->rma_iov, sizeof(*msg->rma_iov) * msg->rma_iov_count); ret = rxd_ep_start_xfer(rxd_ep, peer, ofi_op_read_req, tx_entry); if (ret) rxd_tx_entry_free(rxd_ep, tx_entry); out: fastlock_release(&rxd_ep->lock); return ret; }
int rxd_handle_conn_req(struct rxd_ep *ep, struct ofi_ctrl_hdr *ctrl, struct fi_cq_msg_entry *comp, struct rxd_rx_buf *rx_buf) { int ret; void *addr; size_t addrlen; uint64_t peer; struct rxd_pkt_data *pkt_data; struct rxd_peer *peer_info; rxd_ep_lock_if_required(ep); pkt_data = (struct rxd_pkt_data *) ctrl; addr = pkt_data->data; addrlen = ctrl->seg_size; ret = rxd_av_dg_reverse_lookup(ep->av, ctrl->rx_key, addr, addrlen, &peer); if (ret == -FI_ENODATA) { ret = rxd_av_insert_dg_av(ep->av, addr); assert(ret == 1); ret = rxd_av_dg_reverse_lookup(ep->av, ctrl->rx_key, addr, addrlen, &peer); assert(ret == 0); } peer_info = rxd_ep_getpeer_info(ep, peer); if (!peer_info->addr_published) { peer_info->addr_published = 1; peer_info->conn_initiated = 1; peer_info->conn_data = ctrl->conn_id; peer_info->exp_msg_id++; } rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_connresp, 0, ctrl->conn_id, peer, peer); rxd_ep_repost_buff(rx_buf); rxd_ep_unlock_if_required(ep); return ret; }
static void rxd_handle_dup_datastart(struct rxd_ep *ep, struct ofi_ctrl_hdr *ctrl, struct rxd_rx_buf *rx_buf) { struct dlist_entry *item; struct rxd_rx_entry *rx_entry; struct rxd_peer *peer; item = dlist_find_first_match(&ep->rx_entry_list, rxd_rx_entry_match, ctrl); if (!item) return; FI_INFO(&rxd_prov, FI_LOG_EP_CTRL, "duplicate start-data: msg_id: %" PRIu64 ", seg_no: %d\n", ctrl->msg_id, ctrl->seg_no); rx_entry = container_of(item, struct rxd_rx_entry, entry); peer = rxd_ep_getpeer_info(ep, ctrl->conn_id); rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_ack, rx_entry->window, rx_entry->key, peer->conn_data, ctrl->conn_id); return; }
static void rxd_handle_conn_req(struct rxd_ep *ep, struct ofi_ctrl_hdr *ctrl, struct fi_cq_msg_entry *comp, struct rxd_rx_buf *rx_buf) { struct rxd_pkt_data *pkt_data; struct rxd_peer *peer_info; fi_addr_t dg_fiaddr; void *addr; int ret; FI_INFO(&rxd_prov, FI_LOG_EP_DATA, "conn req - rx_key: %" PRIu64 "\n", ctrl->rx_key); pkt_data = (struct rxd_pkt_data *) ctrl; addr = pkt_data->data; if (ctrl->seg_size > RXD_MAX_DGRAM_ADDR) { FI_WARN(&rxd_prov, FI_LOG_EP_DATA, "addr too large\n"); goto repost; } ret = rxd_av_insert_dg_addr(rxd_ep_av(ep), ctrl->rx_key, addr, &dg_fiaddr); if (ret) { FI_WARN(&rxd_prov, FI_LOG_EP_DATA, "failed to insert peer address\n"); goto repost; } peer_info = rxd_ep_getpeer_info(ep, dg_fiaddr); if (peer_info->state != CMAP_CONNECTED) { peer_info->state = CMAP_CONNECTED; peer_info->conn_data = ctrl->conn_id; peer_info->exp_msg_id++; } rxd_ep_reply_ack(ep, ctrl, ofi_ctrl_connresp, 0, ctrl->conn_id, dg_fiaddr, dg_fiaddr); repost: rxd_ep_repost_buff(rx_buf); }
void rxd_handle_connect_ack(struct rxd_ep *ep, struct ofi_ctrl_hdr *ctrl, struct rxd_rx_buf *rx_buf) { struct rxd_peer *peer; struct dlist_entry *match; struct rxd_tx_entry *tx_entry; rxd_ep_lock_if_required(ep); match = dlist_find_first_match(&ep->tx_entry_list, rxd_conn_msg_match, ctrl); if (!match) goto out; tx_entry = container_of(match, struct rxd_tx_entry, entry); peer = rxd_ep_getpeer_info(ep, tx_entry->peer); peer->addr_published = 1; peer->conn_data = ctrl->conn_id; dlist_remove(match); rxd_tx_entry_done(ep, tx_entry); out: rxd_ep_repost_buff(rx_buf); rxd_ep_unlock_if_required(ep); }
void rxd_handle_recv_comp(struct rxd_cq *cq, struct fi_cq_msg_entry *comp, int is_unexpected) { struct rxd_ep *ep; struct ofi_ctrl_hdr *ctrl; struct rxd_rx_buf *rx_buf; struct rxd_peer *peer; FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "got recv completion\n"); rx_buf = container_of(comp->op_context, struct rxd_rx_buf, context); ctrl = (struct ofi_ctrl_hdr *) rx_buf->buf; ep = rx_buf->ep; peer = rxd_ep_getpeer_info(ep, ctrl->conn_id); if (ctrl->type != ofi_ctrl_ack && ctrl->type != ofi_ctrl_nack) { FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "got data pkt - msg_id:[%p - %d], type: %d on buf %p [unexp: %d]\n", ctrl->msg_id, ctrl->seg_no, ctrl->type, rx_buf, is_unexpected); } else { FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "got ack pkt - msg_id:[%p - %d], type: %d on buf %p [unexp: %d]\n", ctrl->msg_id, ctrl->seg_no, ctrl->type, rx_buf, is_unexpected); } if (ctrl->version != OFI_CTRL_VERSION) { FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "ctrl version mismatch\n"); return; } switch(ctrl->type) { case ofi_ctrl_connreq: rxd_handle_conn_req(ep, ctrl, comp, rx_buf); break; case ofi_ctrl_ack: rxd_handle_ack(ep, ctrl, rx_buf); break; case ofi_ctrl_nack: rxd_handle_nack(ep, ctrl, rx_buf); break; case ofi_ctrl_discard: rxd_handle_discard(ep, ctrl, rx_buf); break; case ofi_ctrl_connresp: rxd_handle_connect_ack(ep, ctrl, rx_buf); break; case ofi_ctrl_start_data: FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "start data msg for tx: %p\n", ctrl->msg_id); rxd_handle_start_data(ep, peer, ctrl, comp, rx_buf); break; case ofi_ctrl_data: FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "data msg for tx: %p, %d \n", ctrl->msg_id, ctrl->seg_no); rxd_handle_data(ep, peer, ctrl, comp, rx_buf); break; default: FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "invalid ctrl type \n", ctrl->type); } rxd_ep_lock_if_required(ep); rxd_check_waiting_rx(ep); rxd_ep_unlock_if_required(ep); return; }