示例#1
0
ssize_t
usdf_msg_sendv(struct fid_ep *fep, const struct iovec *iov, void **desc,
                 size_t count, fi_addr_t dest_addr, void *context)
{
	size_t i;
	struct usdf_ep *ep;
	struct usdf_tx *tx;
	struct usdf_msg_qe *wqe;
	struct usdf_domain *udp;
	size_t tot_len;
	uint64_t op_flags;

	ep = ep_ftou(fep);
	tx = ep->ep_tx;
	udp = ep->ep_domain;

	if (TAILQ_EMPTY(&tx->t.msg.tx_free_wqe)) {
		return -FI_EAGAIN;
	}

	pthread_spin_lock(&udp->dom_progress_lock);

	wqe = TAILQ_FIRST(&tx->t.msg.tx_free_wqe);
	TAILQ_REMOVE(&tx->t.msg.tx_free_wqe, wqe, ms_link);

	wqe->ms_context = context;
	tot_len = 0;
	for (i = 0; i < count; ++i) {
		wqe->ms_iov[i].iov_base = (void *)iov[i].iov_base;
		wqe->ms_iov[i].iov_len = iov[i].iov_len;
		tot_len += iov[i].iov_len;
	}
	wqe->ms_last_iov = count - 1;

	wqe->ms_cur_iov = 0;
	wqe->ms_cur_ptr = iov[0].iov_base;
	wqe->ms_iov_resid = iov[0].iov_len;
	wqe->ms_resid = tot_len;
	wqe->ms_length = tot_len;

	op_flags = ep->ep_tx->tx_attr.op_flags;
	wqe->ms_signal_comp = ep->ep_tx_dflt_signal_comp ||
		(op_flags & FI_COMPLETION) ? 1 : 0;

	/* add send to EP, and add EP to TX list if not present */
	TAILQ_INSERT_TAIL(&ep->e.msg.ep_posted_wqe, wqe, ms_link);
	usdf_msg_ep_ready(ep);

	pthread_spin_unlock(&udp->dom_progress_lock);

	usdf_domain_progress(udp);

	return 0;
}
示例#2
0
ssize_t
usdf_msg_inject(struct fid_ep *fep, const void *buf, size_t len,
		fi_addr_t dest_addr)
{
	struct usdf_ep *ep;
	struct usdf_tx *tx;
	struct usdf_msg_qe *wqe;
	struct usdf_domain *udp;

	if (len > USDF_MSG_MAX_INJECT_SIZE) {
		USDF_WARN_SYS(EP_DATA,
				"cannot inject more than inject_size bytes\n");
		return -EINVAL;
	}

	ep = ep_ftou(fep);
	tx = ep->ep_tx;
	udp = ep->ep_domain;

	if (TAILQ_EMPTY(&tx->t.msg.tx_free_wqe)) {
		return -FI_EAGAIN;
	}

	pthread_spin_lock(&udp->dom_progress_lock);

	wqe = usdf_msg_get_tx_wqe(tx);

	wqe->ms_context = NULL;
	memcpy(wqe->ms_inject_buf, buf, len);
	wqe->ms_iov[0].iov_base = wqe->ms_inject_buf;
	wqe->ms_iov[0].iov_len = len;
	wqe->ms_last_iov = 0;

	wqe->ms_cur_iov = 0;
	wqe->ms_cur_ptr = buf;
	wqe->ms_iov_resid = len;
	wqe->ms_resid = len;
	wqe->ms_length = len;

	/* fi_inject() never signals a completion */
	wqe->ms_signal_comp = 0;

	/* add send to EP, and add EP to TX list if not present */
	TAILQ_INSERT_TAIL(&ep->e.msg.ep_posted_wqe, wqe, ms_link);
	usdf_msg_ep_ready(ep);

	pthread_spin_unlock(&udp->dom_progress_lock);

	usdf_domain_progress(udp);

	return 0;
}
示例#3
0
static inline void
usdf_msg_process_ack(struct usdf_ep *ep, uint16_t seq)
{
	struct usdf_cq_hard *hcq;
	struct usdf_msg_qe *wqe;
	struct usdf_tx *tx;
	uint16_t max_ack;
	unsigned credits;

	tx = ep->ep_tx;

	/* don't try to ACK what we don't think we've sent */
	max_ack = ep->e.msg.ep_next_tx_seq - 1;
	if (RUDP_SEQ_GT(seq, max_ack)) {
		seq = max_ack;
	}

	hcq = tx->t.msg.tx_hcq;
	while (!TAILQ_EMPTY(&ep->e.msg.ep_sent_wqe)) {
		wqe = TAILQ_FIRST(&ep->e.msg.ep_sent_wqe);
		if (RUDP_SEQ_LE(wqe->ms_last_seq, seq)) {
			TAILQ_REMOVE(&ep->e.msg.ep_sent_wqe, wqe, ms_link);
			USDF_DBG_SYS(EP_DATA, "send complete, signal_comp=%u\n", wqe->ms_signal_comp);
			if (wqe->ms_signal_comp)
				hcq->cqh_post(hcq, wqe->ms_context,
					      wqe->ms_length, FI_SUCCESS,
					      FI_MSG | FI_SEND);

			usdf_msg_put_tx_wqe(tx, wqe);
		} else {
			break;
		}
	}

	credits = RUDP_SEQ_DIFF(seq, ep->e.msg.ep_last_rx_ack);
	if (ep->e.msg.ep_seq_credits == 0 && credits > 0 &&
			!TAILQ_EMPTY(&ep->e.msg.ep_posted_wqe)) {
		usdf_msg_ep_ready(ep);
	}
	ep->e.msg.ep_seq_credits += credits;
	ep->e.msg.ep_last_rx_ack = seq;

	/* If all ACKed, cancel timer, else reset it */
	if (seq == max_ack) {
		usdf_timer_cancel(ep->ep_domain->dom_fabric,
				ep->e.msg.ep_ack_timer);
	} else {
		usdf_timer_reset(ep->ep_domain->dom_fabric,
			ep->e.msg.ep_ack_timer, USDF_RUDP_ACK_TIMEOUT);
	}
}
示例#4
0
ssize_t
usdf_msg_send(struct fid_ep *fep, const void *buf, size_t len, void *desc,
		fi_addr_t dest_addr, void *context)
{
	struct usdf_ep *ep;
	struct usdf_tx *tx;
	struct usdf_msg_qe *wqe;
	struct usdf_domain *udp;
	uint64_t op_flags;

	ep = ep_ftou(fep);
	tx = ep->ep_tx;
	udp = ep->ep_domain;

	if (TAILQ_EMPTY(&tx->t.msg.tx_free_wqe)) {
		return -FI_EAGAIN;
	}

	pthread_spin_lock(&udp->dom_progress_lock);

	wqe = usdf_msg_get_tx_wqe(tx);

	wqe->ms_context = context;
	wqe->ms_iov[0].iov_base = (void *)buf;
	wqe->ms_iov[0].iov_len = len;
	wqe->ms_last_iov = 0;

	wqe->ms_cur_iov = 0;
	wqe->ms_cur_ptr = buf;
	wqe->ms_iov_resid = len;
	wqe->ms_resid = len;
	wqe->ms_length = len;

	op_flags = ep->ep_tx->tx_attr.op_flags;
	wqe->ms_signal_comp = ep->ep_tx_dflt_signal_comp ||
		(op_flags & FI_COMPLETION) ? 1 : 0;

	/* add send to EP, and add EP to TX list if not present */
	TAILQ_INSERT_TAIL(&ep->e.msg.ep_posted_wqe, wqe, ms_link);
	usdf_msg_ep_ready(ep);

	pthread_spin_unlock(&udp->dom_progress_lock);

	usdf_domain_progress(udp);

	return 0;
}
示例#5
0
文件: usdf_msg.c 项目: ORNL/ompi
static inline void
usdf_msg_process_ack(struct usdf_ep *ep, uint16_t seq)
{
	struct usdf_cq_hard *hcq;
	struct usdf_msg_qe *wqe;
	uint16_t max_ack;
	unsigned credits;

	/* don't try to ACK what we don't think we've sent */
	max_ack = ep->e.msg.ep_next_tx_seq - 1;
	if (RUDP_SEQ_GT(seq, max_ack)) {
		seq = max_ack;
	}

	hcq = ep->ep_tx->t.msg.tx_hcq;
	while (!TAILQ_EMPTY(&ep->e.msg.ep_sent_wqe)) {
		wqe = TAILQ_FIRST(&ep->e.msg.ep_sent_wqe);
		if (RUDP_SEQ_LE(wqe->ms_last_seq, seq)) {
			TAILQ_REMOVE(&ep->e.msg.ep_sent_wqe, wqe, ms_link);
			hcq->cqh_post(hcq, wqe->ms_context, wqe->ms_length);

			TAILQ_INSERT_HEAD(&ep->ep_tx->t.msg.tx_free_wqe,
					wqe, ms_link);
		} else {
			break;
		}
	}

	credits = RUDP_SEQ_DIFF(seq, ep->e.msg.ep_last_rx_ack);
	if (ep->e.msg.ep_seq_credits == 0 && credits > 0 &&
			!TAILQ_EMPTY(&ep->e.msg.ep_posted_wqe)) {
		usdf_msg_ep_ready(ep);
	}
	ep->e.msg.ep_seq_credits += credits;
	ep->e.msg.ep_last_rx_ack = seq;

	/* If all ACKed, cancel timer, else reset it */
	if (seq == max_ack) {
		usdf_timer_cancel(ep->ep_domain->dom_fabric,
				ep->e.msg.ep_ack_timer);
	} else {
		usdf_timer_reset(ep->ep_domain->dom_fabric,
			ep->e.msg.ep_ack_timer, USDF_RUDP_ACK_TIMEOUT);
	}
}
示例#6
0
文件: usdf_msg.c 项目: ORNL/ompi
ssize_t
usdf_msg_send(struct fid_ep *fep, const void *buf, size_t len, void *desc,
		fi_addr_t dest_addr, void *context)
{
	struct usdf_ep *ep;
	struct usdf_tx *tx;
	struct usdf_msg_qe *wqe;
	struct usdf_domain *udp;

	ep = ep_ftou(fep);
	tx = ep->ep_tx;
	udp = ep->ep_domain;

	if (TAILQ_EMPTY(&tx->t.msg.tx_free_wqe)) {
		return -FI_EAGAIN;
	}

	pthread_spin_lock(&udp->dom_progress_lock);

	wqe = TAILQ_FIRST(&tx->t.msg.tx_free_wqe);
	TAILQ_REMOVE(&tx->t.msg.tx_free_wqe, wqe, ms_link);

	wqe->ms_context = context;
	wqe->ms_iov[0].iov_base = (void *)buf;
	wqe->ms_iov[0].iov_len = len;
	wqe->ms_last_iov = 0;

	wqe->ms_cur_iov = 0;
	wqe->ms_cur_ptr = buf;
	wqe->ms_iov_resid = len;
	wqe->ms_resid = len;
	wqe->ms_length = len;

	/* add send to EP, and add EP to TX list if not present */
	TAILQ_INSERT_TAIL(&ep->e.msg.ep_posted_wqe, wqe, ms_link);
	usdf_msg_ep_ready(ep);

	pthread_spin_unlock(&udp->dom_progress_lock);

	usdf_domain_progress(udp);

	return 0;
}
示例#7
0
static inline void
usdf_process_nak(struct usdf_ep *ep, uint16_t seq)
{
	struct usdf_msg_qe *wqe;
	size_t rewind;

	/* Ignore NAKs of future packets */
	if (RUDP_SEQ_GE(seq, ep->e.msg.ep_next_tx_seq)) {
		return;
	}

	/*
	 * Move any WQEs that contain NAKed sequences back to the 
	 * posted list.  We set ms_resid == 0 here because final set to zero
	 * is optimized out of the fastpath
	 */
	while (!TAILQ_EMPTY(&ep->e.msg.ep_sent_wqe)) {
		wqe = TAILQ_LAST(&ep->e.msg.ep_sent_wqe, usdf_msg_qe_head);
		TAILQ_REMOVE(&ep->e.msg.ep_sent_wqe, wqe, ms_link);
		wqe->ms_resid = 0;
		TAILQ_INSERT_HEAD(&ep->e.msg.ep_posted_wqe, wqe, ms_link);
	}
	wqe = TAILQ_FIRST(&ep->e.msg.ep_posted_wqe);

	/* reset WQE to old sequence # */
	if (wqe->ms_resid == 0) {
		rewind = RUDP_SEQ_DIFF(wqe->ms_last_seq, seq) + 1;
	} else {
		rewind = RUDP_SEQ_DIFF(ep->e.msg.ep_next_tx_seq, seq);
	}
	if (rewind > 0) {
		ep->e.msg.ep_seq_credits = USDF_RUDP_SEQ_CREDITS;
		ep->e.msg.ep_next_tx_seq = seq;

		usdf_msg_rewind_qe(wqe, rewind,
			ep->ep_domain->dom_fabric->fab_dev_attrs->uda_mtu -
			sizeof(struct rudp_pkt));

		usdf_msg_ep_ready(ep);
	}
}
示例#8
0
ssize_t
usdf_msg_sendmsg(struct fid_ep *fep, const struct fi_msg *msg, uint64_t flags)
{
	size_t i;
	struct usdf_ep *ep;
	struct usdf_tx *tx;
	struct usdf_msg_qe *wqe;
	struct usdf_domain *udp;
	size_t tot_len;
	const struct iovec *iov;

	ep = ep_ftou(fep);
	tx = ep->ep_tx;
	udp = ep->ep_domain;
	iov = msg->msg_iov;

	if (flags & ~USDF_MSG_SUPP_SENDMSG_FLAGS) {
		USDF_DBG_SYS(EP_DATA,
				"one or more flags in %#" PRIx64 " not supported\n",
				flags);
		return -FI_EOPNOTSUPP;
	}

	/* check for inject overrun before acquiring lock and allocating wqe,
	 * easier to unwind this way */
	if (flags & FI_INJECT) {
		iov = msg->msg_iov;
		tot_len = 0;
		for (i = 0; i < msg->iov_count; ++i) {
			tot_len += iov[i].iov_len;
			if (tot_len > USDF_MSG_MAX_INJECT_SIZE) {
				USDF_DBG_SYS(EP_DATA, "max inject len exceeded (%zu)\n",
						tot_len);
				return -FI_EINVAL;
			}
		}
	}

	if (TAILQ_EMPTY(&tx->t.msg.tx_free_wqe)) {
		return -FI_EAGAIN;
	}

	pthread_spin_lock(&udp->dom_progress_lock);

	wqe = usdf_msg_get_tx_wqe(tx);

	wqe->ms_context = msg->context;
	if (flags & FI_INJECT) {
		tot_len = 0;
		for (i = 0; i < msg->iov_count; ++i) {
			assert(tot_len + iov[i].iov_len <= USDF_MSG_MAX_INJECT_SIZE);
			memcpy(&wqe->ms_inject_buf[tot_len], iov[i].iov_base,
				iov[i].iov_len);
			tot_len += iov[i].iov_len;
		}
		wqe->ms_iov[0].iov_base = wqe->ms_inject_buf;
		wqe->ms_iov[0].iov_len = tot_len;
		wqe->ms_last_iov = 0;

	} else {
		tot_len = 0;
		for (i = 0; i < msg->iov_count; ++i) {
			wqe->ms_iov[i].iov_base = (void *)iov[i].iov_base;
			wqe->ms_iov[i].iov_len = iov[i].iov_len;
			tot_len += iov[i].iov_len;
		}
		wqe->ms_last_iov = msg->iov_count - 1;
	}

	wqe->ms_cur_iov = 0;
	wqe->ms_resid = tot_len;
	wqe->ms_length = tot_len;
	wqe->ms_cur_ptr = iov[0].iov_base;
	wqe->ms_iov_resid = iov[0].iov_len;

	wqe->ms_signal_comp = ep->ep_tx_dflt_signal_comp ||
		(flags & FI_COMPLETION) ? 1 : 0;

	/* add send to EP, and add EP to TX list if not present */
	TAILQ_INSERT_TAIL(&ep->e.msg.ep_posted_wqe, wqe, ms_link);
	usdf_msg_ep_ready(ep);

	pthread_spin_unlock(&udp->dom_progress_lock);

	usdf_domain_progress(udp);

	return 0;
}