Exemplo n.º 1
0
ssize_t
usdf_dgram_sendv(struct fid_ep *fep, const struct iovec *iov, void **desc,
		 size_t count, fi_addr_t dest_addr, void *context)
{
	struct usd_dest *dest;
	struct usdf_ep *ep;
	size_t len;

	ep = ep_ftou(fep);
	len = sizeof(struct usd_udp_hdr);
	dest = (struct usd_dest *)(uintptr_t) dest_addr;

	len += _usdf_iov_len(iov, count);

	if (len <= USD_SEND_MAX_COPY) {
		return _usdf_dgram_send_iov_copy(ep, dest, iov, count, context,
						ep->ep_tx_completion);
	} else if (ep->e.dg.tx_op_flags & FI_INJECT) {
		USDF_DBG_SYS(EP_DATA,
				"given inject length (%zu) exceeds max inject length (%d)\n",
				len, USD_SEND_MAX_COPY);
		return -FI_ENOSPC;
	}

	if (count > ep->e.dg.tx_iov_limit) {
		USDF_DBG_SYS(EP_DATA, "max iov count exceeded: %zu\n", count);
		return -FI_ENOSPC;
	}

	return _usdf_dgram_send_iov(ep, dest, iov, count, context,
					ep->ep_tx_completion);
}
Exemplo n.º 2
0
/* Register as a callback triggered by the socket becoming writeable. Write as
 * much data as can be written in a single write, and keep track of how much
 * data is left. If the data is not fully written, it will finish getting
 * written in another iteration of the progression.
 */
static int usdf_pep_reject_async(void *vreq)
{
    struct usdf_connreq *crp;
    int ret;

    crp = vreq;

    do {
        ret = write(crp->cr_sockfd, crp->cr_ptr, crp->cr_resid);
    } while ((ret < 0) && (errno == EINTR));

    if ((ret <= 0) && (errno != EAGAIN)) {
        USDF_DBG_SYS(EP_CTRL, "write failed: %s\n",
                     strerror(errno));
        usdf_cm_msg_connreq_failed(crp, -errno);
        return -errno;
    }

    crp->cr_resid -= ret;
    crp->cr_ptr += ret;

    if (crp->cr_resid == 0)
        usdf_cm_msg_connreq_cleanup(crp);

    return FI_SUCCESS;
}
Exemplo n.º 3
0
ssize_t
usdf_dgram_sendmsg(struct fid_ep *fep, const struct fi_msg *msg, uint64_t flags)
{
	USDF_DBG_SYS(EP_DATA, "flags ignored!");
	return usdf_dgram_sendv(fep, msg->msg_iov, msg->desc, msg->iov_count,
				(fi_addr_t)msg->addr, msg->context);
}
Exemplo n.º 4
0
ssize_t
usdf_dgram_send(struct fid_ep *fep, const void *buf, size_t len, void *desc,
		fi_addr_t dest_addr, void *context)
{
	struct usdf_dest *dest;
	struct usdf_ep *ep;
	uint32_t flags;

	ep = ep_ftou(fep);
	dest = (struct usdf_dest *)(uintptr_t) dest_addr;
	flags = (ep->ep_tx_completion) ? USD_SF_SIGNAL : 0;

	if (len + sizeof(struct usd_udp_hdr) <= USD_SEND_MAX_COPY) {
		return usd_post_send_one_copy(ep->e.dg.ep_qp, &dest->ds_dest,
						buf, len, flags,
						context);
	} else if (ep->e.dg.tx_op_flags & FI_INJECT) {
		USDF_DBG_SYS(EP_DATA,
				"given inject length (%zu) exceeds max inject length (%d)\n",
				len + sizeof(struct usd_udp_hdr),
				USD_SEND_MAX_COPY);
		return -FI_ENOSPC;
	}

	return usd_post_send_one(ep->e.dg.ep_qp, &dest->ds_dest, buf, len,
				flags, context);
}
Exemplo n.º 5
0
ssize_t
usdf_dgram_prefix_sendmsg(struct fid_ep *fep, const struct fi_msg *msg,
	uint64_t flags)
{
	struct iovec send_iov[USDF_DGRAM_MAX_SGE];
	struct usd_dest *dest;
	struct usdf_ep *ep;
	uint8_t completion;
	size_t len;
	size_t padding;

	ep = ep_ftou(fep);
	dest = (struct usd_dest *)(uintptr_t) msg->addr;
	len = _usdf_iov_len(msg->msg_iov, msg->iov_count);
	completion = ep->ep_tx_dflt_signal_comp || (flags & FI_COMPLETION);
	padding = USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr);

	if (msg->iov_count > ep->e.dg.tx_iov_limit) {
		USDF_DBG_SYS(EP_DATA, "max iov count exceeded: %zu\n",
				msg->iov_count);
		return -FI_ENOSPC;
	}

	if ((len - padding) <= USD_SEND_MAX_COPY) {
		/* _usdf_dgram_send_iov_copy isn't prefix aware and allocates
		 * its own prefix. reorganize iov[0] base to point to data and
		 * len to reflect data length.
		 */
		memcpy(send_iov, msg->msg_iov,
				sizeof(struct iovec) * msg->iov_count);
		send_iov[0].iov_base = ((char *) send_iov[0].iov_base +
				USDF_HDR_BUF_ENTRY);
		send_iov[0].iov_len -= USDF_HDR_BUF_ENTRY;

		return _usdf_dgram_send_iov_copy(ep, dest, send_iov,
				msg->iov_count, msg->context, completion);
	} else if (flags & FI_INJECT) {
		USDF_DBG_SYS(EP_DATA,
				"given inject length (%zu) exceeds max inject length (%d)\n",
				len, USD_SEND_MAX_COPY);
		return -FI_ENOSPC;
	}

	return _usdf_dgram_send_iov_prefix(ep, dest, msg->msg_iov,
			msg->iov_count, msg->context, completion);
}
Exemplo n.º 6
0
ssize_t
usdf_msg_recvmsg(struct fid_ep *fep, const struct fi_msg *msg, uint64_t flags)
{
	size_t i;
	struct usdf_ep *ep;
	struct usdf_rx *rx;
	struct usdf_msg_qe *rqe;
	struct usdf_domain *udp;
	size_t tot_len;
	const struct iovec *iov;

	ep = ep_ftou(fep);
	rx = ep->ep_rx;
	udp = ep->ep_domain;
	iov = msg->msg_iov;

	if (TAILQ_EMPTY(&rx->r.msg.rx_free_rqe)) {
		return -FI_EAGAIN;
	}

	if (flags & ~USDF_MSG_SUPP_RECVMSG_FLAGS) {
		USDF_DBG_SYS(EP_DATA,
				"one or more flags in %#" PRIx64 " not supported\n",
				flags);
		return -FI_EOPNOTSUPP;
	}

	pthread_spin_lock(&udp->dom_progress_lock);

	rqe = usdf_msg_get_rx_rqe(rx);

	rqe->ms_context = msg->context;
	tot_len = 0;
	for (i = 0; i < msg->iov_count; ++i) {
		rqe->ms_iov[i].iov_base = (void *)iov[i].iov_base;
		rqe->ms_iov[i].iov_len = iov[i].iov_len;
		tot_len += iov[i].iov_len;
	}
	rqe->ms_last_iov = msg->iov_count - 1;

	rqe->ms_cur_iov = 0;
	rqe->ms_resid = tot_len;
	rqe->ms_length = 0;
	rqe->ms_cur_ptr = iov[0].iov_base;
	rqe->ms_iov_resid = iov[0].iov_len;

	rqe->ms_signal_comp = ep->ep_rx_dflt_signal_comp ||
		(flags & FI_COMPLETION) ? 1 : 0;

	TAILQ_INSERT_TAIL(&rx->r.msg.rx_posted_rqe, rqe, ms_link);

	pthread_spin_unlock(&udp->dom_progress_lock);

	return 0;
}
Exemplo n.º 7
0
ssize_t
usdf_dgram_prefix_sendv(struct fid_ep *fep, const struct iovec *iov,
		void **desc, size_t count, fi_addr_t dest_addr, void *context)
{
	struct iovec send_iov[USDF_DGRAM_MAX_SGE];
	struct usd_dest *dest;
	struct usdf_ep *ep;
	size_t len;
	size_t padding;

	ep = ep_ftou(fep);
	dest = (struct usd_dest *)(uintptr_t) dest_addr;
	len = _usdf_iov_len(iov, count);
	padding = USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr);

	if (count > ep->e.dg.tx_iov_limit) {
		USDF_DBG_SYS(EP_DATA, "max iov count exceeded: %zu\n", count);
		return -FI_ENOSPC;
	}

	if ((len - padding) <= USD_SEND_MAX_COPY) {
		/* _usdf_dgram_send_iov_copy isn't prefix aware and allocates
		 * its own prefix. reorganize iov[0] base to point to data and
		 * len to reflect data length.
		 */
		memcpy(send_iov, iov, sizeof(struct iovec) * count);
		send_iov[0].iov_base = ((char *) send_iov[0].iov_base +
				USDF_HDR_BUF_ENTRY);
		send_iov[0].iov_len -= USDF_HDR_BUF_ENTRY;

		return _usdf_dgram_send_iov_copy(ep, dest, send_iov, count,
				context, ep->ep_tx_completion);
	} else if (ep->e.dg.tx_op_flags & FI_INJECT) {
		USDF_DBG_SYS(EP_DATA,
				"given inject length (%zu) exceeds max inject length (%d)\n",
				len, USD_SEND_MAX_COPY);
		return -FI_ENOSPC;
	}

	return _usdf_dgram_send_iov_prefix(ep, dest, iov, count, context,
					   ep->ep_tx_completion);
}
Exemplo n.º 8
0
static inline void
usdf_msg_process_ack(struct usdf_ep *ep, uint16_t seq)
{
	struct usdf_cq_hard *hcq;
	struct usdf_msg_qe *wqe;
	struct usdf_tx *tx;
	uint16_t max_ack;
	unsigned credits;

	tx = ep->ep_tx;

	/* don't try to ACK what we don't think we've sent */
	max_ack = ep->e.msg.ep_next_tx_seq - 1;
	if (RUDP_SEQ_GT(seq, max_ack)) {
		seq = max_ack;
	}

	hcq = tx->t.msg.tx_hcq;
	while (!TAILQ_EMPTY(&ep->e.msg.ep_sent_wqe)) {
		wqe = TAILQ_FIRST(&ep->e.msg.ep_sent_wqe);
		if (RUDP_SEQ_LE(wqe->ms_last_seq, seq)) {
			TAILQ_REMOVE(&ep->e.msg.ep_sent_wqe, wqe, ms_link);
			USDF_DBG_SYS(EP_DATA, "send complete, signal_comp=%u\n", wqe->ms_signal_comp);
			if (wqe->ms_signal_comp)
				hcq->cqh_post(hcq, wqe->ms_context,
					      wqe->ms_length, FI_SUCCESS,
					      FI_MSG | FI_SEND);

			usdf_msg_put_tx_wqe(tx, wqe);
		} else {
			break;
		}
	}

	credits = RUDP_SEQ_DIFF(seq, ep->e.msg.ep_last_rx_ack);
	if (ep->e.msg.ep_seq_credits == 0 && credits > 0 &&
			!TAILQ_EMPTY(&ep->e.msg.ep_posted_wqe)) {
		usdf_msg_ep_ready(ep);
	}
	ep->e.msg.ep_seq_credits += credits;
	ep->e.msg.ep_last_rx_ack = seq;

	/* If all ACKed, cancel timer, else reset it */
	if (seq == max_ack) {
		usdf_timer_cancel(ep->ep_domain->dom_fabric,
				ep->e.msg.ep_ack_timer);
	} else {
		usdf_timer_reset(ep->ep_domain->dom_fabric,
			ep->e.msg.ep_ack_timer, USDF_RUDP_ACK_TIMEOUT);
	}
}
Exemplo n.º 9
0
ssize_t usdf_msg_rx_size_left(struct fid_ep *fep)
{
	struct usdf_ep *ep;
	struct usdf_rx *rx;

	USDF_DBG_SYS(EP_DATA, "\n");

	ep = ep_ftou(fep);
	rx = ep->ep_rx;
	if (rx == NULL)
		return -FI_EOPBADSTATE; /* EP not enabled */

	return rx->r.msg.rx_num_free_rqe;
}
Exemplo n.º 10
0
ssize_t usdf_msg_tx_size_left(struct fid_ep *fep)
{
	struct usdf_ep *ep;
	struct usdf_tx *tx;

	USDF_DBG_SYS(EP_DATA, "\n");

	ep = ep_ftou(fep);
	tx = ep->ep_tx;
	if (tx == NULL)
		return -FI_EOPBADSTATE; /* EP not enabled */

	return tx->t.msg.tx_num_free_wqe;
}
Exemplo n.º 11
0
static inline int usdf_cqe_to_flags(struct usd_completion *comp)
{
	switch (comp->uc_type) {
	case USD_COMPTYPE_SEND:
		return (FI_MSG | FI_SEND);
	case USD_COMPTYPE_RECV:
		return (FI_MSG | FI_RECV);
	default:
		USDF_DBG_SYS(CQ, "WARNING: unknown completion type! (%d)\n",
				comp->uc_type);
		return 0;
	}

}
Exemplo n.º 12
0
ssize_t usdf_msg_rx_size_left(struct fid_ep *fep)
{
	struct usdf_ep *ep;
	struct usdf_rx *rx;

	USDF_DBG_SYS(EP_DATA, "\n");

	ep = ep_ftou(fep);
	rx = ep->ep_rx;

	if (!(ep->flags & USDF_EP_ENABLED))
		return -FI_EOPBADSTATE;

	return rx->r.msg.rx_num_free_rqe;
}
Exemplo n.º 13
0
ssize_t usdf_msg_tx_size_left(struct fid_ep *fep)
{
	struct usdf_ep *ep;
	struct usdf_tx *tx;

	USDF_DBG_SYS(EP_DATA, "\n");

	ep = ep_ftou(fep);
	tx = ep->ep_tx;

	if (!(ep->flags & USDF_EP_ENABLED))
		return -FI_EOPBADSTATE;

	return tx->t.msg.tx_num_free_wqe;
}
Exemplo n.º 14
0
ssize_t
usdf_dgram_sendmsg(struct fid_ep *fep, const struct fi_msg *msg, uint64_t flags)
{
	struct usd_dest *dest;
	struct usdf_ep *ep;
	uint8_t completion;
	size_t len;

	ep = ep_ftou(fep);
	len = sizeof(struct usd_udp_hdr);
	dest = (struct usd_dest *)(uintptr_t) msg->addr;
	completion = ep->ep_tx_dflt_signal_comp || (flags & FI_COMPLETION);

	len += _usdf_iov_len(msg->msg_iov, msg->iov_count);

	if (len <= USD_SEND_MAX_COPY) {
		return _usdf_dgram_send_iov_copy(ep, dest, msg->msg_iov,
							msg->iov_count,
							msg->context,
							completion);
	} else if (flags & FI_INJECT) {
		USDF_DBG_SYS(EP_DATA,
				"given inject length (%zu) exceeds max inject length (%d)\n",
				len, USD_SEND_MAX_COPY);
		return -FI_ENOSPC;
	}

	if (msg->iov_count > ep->e.dg.tx_iov_limit) {
		USDF_DBG_SYS(EP_DATA, "max iov count exceeded: %zu\n",
				msg->iov_count);
		return -FI_ENOSPC;
	}

	return _usdf_dgram_send_iov(ep, dest, msg->msg_iov, msg->iov_count,
					msg->context, completion);
}
Exemplo n.º 15
0
ssize_t
usdf_dgram_prefix_send(struct fid_ep *fep, const void *buf, size_t len,
		void *desc, fi_addr_t dest_addr, void *context)
{
	struct usd_udp_hdr *hdr;
	struct usd_qp_impl *qp;
	struct usdf_dest *dest;
	struct usdf_ep *ep;
	struct usd_wq *wq;
	uint32_t last_post;
	uint32_t flags;
	size_t padding;

	ep = ep_ftou(fep);
	dest = (struct usdf_dest *)(uintptr_t) dest_addr;
	padding = USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr);
	flags = (ep->ep_tx_completion) ? USD_SF_SIGNAL : 0;

	if (ep->e.dg.tx_op_flags & FI_INJECT) {
		if ((len - padding) > USD_SEND_MAX_COPY) {
			USDF_DBG_SYS(EP_DATA,
					"given inject length (%zu) exceeds max inject length (%d)\n",
					len, USD_SEND_MAX_COPY);
			return -FI_ENOSPC;
		}

		return usd_post_send_one_copy(ep->e.dg.ep_qp, &dest->ds_dest,
				buf + USDF_HDR_BUF_ENTRY, len -
				USDF_HDR_BUF_ENTRY, flags,
				context);
	}

	qp = to_qpi(ep->e.dg.ep_qp);
	wq = &qp->uq_wq;

	hdr = (struct usd_udp_hdr *) ((char *) buf + padding);
	memcpy(hdr, &dest->ds_dest.ds_dest.ds_udp.u_hdr, sizeof(*hdr));

	_usdf_adjust_prefix_hdr(hdr, qp, len, padding);

	last_post = _usd_post_send_one(wq, hdr, len - padding,
			ep->ep_tx_completion);

	_usdf_adjust_post_info(wq, last_post, context, len - USDF_HDR_BUF_ENTRY);

	return FI_SUCCESS;
}
Exemplo n.º 16
0
ssize_t usdf_dgram_tx_size_left(struct fid_ep *fep)
{
	struct usdf_ep *ep;

	USDF_DBG_SYS(EP_DATA, "\n");

	if (fep == NULL)
		return -FI_EINVAL;

	ep = ep_ftou(fep);

	if (ep->e.dg.ep_qp == NULL)
		return -FI_EOPBADSTATE; /* EP not enabled */

	return usd_get_send_credits(ep->e.dg.ep_qp) /
		(ep->e.dg.tx_iov_limit + 1);
}
Exemplo n.º 17
0
ssize_t usdf_dgram_tx_size_left(struct fid_ep *fep)
{
	struct usdf_ep *ep;

	USDF_DBG_SYS(EP_DATA, "\n");

	if (fep == NULL)
		return -FI_EINVAL;

	ep = ep_ftou(fep);

	if (ep->e.dg.ep_qp == NULL)
		return -FI_EOPBADSTATE; /* EP not enabled */

	/* see NOTE-SIZE-LEFT */
	return usd_get_send_credits(ep->e.dg.ep_qp) / (USDF_DGRAM_DFLT_SGE + 1);
}
Exemplo n.º 18
0
/* Given a connection request structure containing data, make a copy of the data
 * that can be accessed in error entries on the EQ. The return value is the size
 * of the data stored in the error entry. If the return value is a non-negative
 * value, then the function has suceeded and the size and output data can be
 * assumed to be valid. If the function fails, then the data will be NULL and
 * the size will be a negative error value.
 */
static int usdf_cm_generate_err_data(struct usdf_eq *eq,
		struct usdf_connreq *crp, void **data)
{
	struct usdf_err_data_entry *err_data_entry;
	struct usdf_connreq_msg *reqp;
	size_t entry_size;
	size_t data_size;

	if (!eq || !crp || !data) {
		USDF_DBG_SYS(EP_CTRL,
				"eq, crp, or data is NULL.\n");
		return -FI_EINVAL;
	}

	/* Initialize to NULL so data can't be used in the error case. */
	*data = NULL;

	reqp = (struct usdf_connreq_msg *) crp->cr_data;

	/* This is a normal case, maybe there was no data. */
	if (!reqp || !reqp->creq_datalen)
		return 0;

	data_size = reqp->creq_datalen;

	entry_size = sizeof(*err_data_entry) + data_size;

	err_data_entry = calloc(1, entry_size);
	if (!err_data_entry) {
		USDF_WARN_SYS(EP_CTRL,
				"failed to allocate err data entry\n");
		return -FI_ENOMEM;
	}

	/* This data should be copied and owned by the provider. Keep
	 * track of it in the EQ, this will be freed in the next EQ read
	 * call after it has been read.
	 */
	memcpy(err_data_entry->err_data, reqp->creq_data, data_size);
	slist_insert_tail(&err_data_entry->entry, &eq->eq_err_data);

	*data = err_data_entry->err_data;

	return data_size;
}
Exemplo n.º 19
0
ssize_t usdf_dgram_prefix_tx_size_left(struct fid_ep *fep)
{
	struct usdf_ep *ep;

	USDF_DBG_SYS(EP_DATA, "\n");

	if (fep == NULL)
		return -FI_EINVAL;

	ep = ep_ftou(fep);

	if (ep->e.dg.ep_qp == NULL)
		return -FI_EOPBADSTATE; /* EP not enabled */

	/* prefix_sendvcan post up to iov_limit descriptors
	 */
	return (usd_get_send_credits(ep->e.dg.ep_qp) / ep->e.dg.tx_iov_limit);
}
Exemplo n.º 20
0
ssize_t usdf_dgram_prefix_rx_size_left(struct fid_ep *fep)
{
	struct usdf_ep *ep;

	USDF_DBG_SYS(EP_DATA, "\n");

	if (fep == NULL)
		return -FI_EINVAL;

	ep = ep_ftou(fep);

	if (ep->e.dg.ep_qp == NULL)
		return -FI_EOPBADSTATE; /* EP not enabled */

	/* prefix_recvv can post up to iov_limit descriptors
	 *
	 * also see NOTE-SIZE-LEFT */
	return (usd_get_recv_credits(ep->e.dg.ep_qp) / USDF_DGRAM_DFLT_SGE);
}
Exemplo n.º 21
0
/* Report a connection management related failure. Sometimes there is connection
 * event data that should be copied into the generated event. If the copy_data
 * parameter evaluates to true, then the data will be copied.
 *
 * If data is to be generated for the error entry, then the connection request
 * is assumed to have the data size in host order. If something fails during
 * processing of the error data, then the EQ entry will still be generated
 * without the error data.
 */
void usdf_cm_report_failure(struct usdf_connreq *crp, int error, bool copy_data)
{
	struct fi_eq_err_entry err = {0};
        struct usdf_pep *pep;
        struct usdf_ep *ep;
        struct usdf_eq *eq;
	fid_t fid;
	int ret;

	USDF_DBG_SYS(EP_CTRL, "error=%d (%s)\n", error, fi_strerror(error));

        pep = crp->cr_pep;
        ep = crp->cr_ep;

	if (ep != NULL) {
		fid = ep_utofid(ep);
		eq = ep->ep_eq;
		ep->ep_domain->dom_peer_tab[ep->e.msg.ep_rem_peer_id] = NULL;
	} else {
		fid = pep_utofid(pep);
		eq = pep->pep_eq;
	}

	/* Try to generate the space necessary for the error data. If the
	 * function returns a number greater than or equal to 0, then it was a
	 * success. The return value is the size of the data.
	 */
	if (copy_data) {
		ret = usdf_cm_generate_err_data(eq, crp, &err.err_data);
		if (ret >= 0)
			err.err_data_size = ret;
	}

        err.fid = fid;
        err.err = -error;

        usdf_eq_write_internal(eq, 0, &err, sizeof(err), USDF_EVENT_FLAG_ERROR);

        usdf_cm_msg_connreq_cleanup(crp);
}
Exemplo n.º 22
0
ssize_t usdf_dgram_rx_size_left(struct fid_ep *fep)
{
	struct usdf_ep *ep;

	USDF_DBG_SYS(EP_DATA, "\n");

	if (fep == NULL)
		return -FI_EINVAL;

	ep = ep_ftou(fep);

	if (ep->e.dg.ep_qp == NULL)
		return -FI_EOPBADSTATE; /* EP not enabled */

	/* NOTE-SIZE-LEFT: divide by constant right now, rather than keeping
	 * track of the rx_attr->iov_limit value we gave to the user.  This
	 * sometimes under-reports the number of RX ops that could be posted,
	 * but it avoids touching a cache line that we don't otherwise need.
	 *
	 * sendv/recvv could potentially post iov_limit+1 descriptors
	 */
	return usd_get_recv_credits(ep->e.dg.ep_qp) / (USDF_DGRAM_DFLT_SGE + 1);
}
Exemplo n.º 23
0
ssize_t
usdf_dgram_inject(struct fid_ep *fep, const void *buf, size_t len,
		  fi_addr_t dest_addr)
{
	struct usdf_dest *dest;
	struct usdf_ep *ep;

	ep = ep_ftou(fep);
	dest = (struct usdf_dest *)(uintptr_t) dest_addr;

	if (len + sizeof(struct usd_udp_hdr) > USD_SEND_MAX_COPY) {
		USDF_DBG_SYS(EP_DATA,
				"given inject length (%zu) exceeds max inject length (%d)\n",
				len + sizeof(struct usd_udp_hdr),
				USD_SEND_MAX_COPY);
		return -FI_ENOSPC;
	}

	/*
	 * fi_inject never generates a completion
	 */
	return usd_post_send_one_copy(ep->e.dg.ep_qp, &dest->ds_dest, buf, len,
					0, NULL);
}
Exemplo n.º 24
0
int
usdf_pep_open(struct fid_fabric *fabric, struct fi_info *info,
              struct fid_pep **pep_o, void *context)
{
    struct usdf_pep *pep;
    struct usdf_fabric *fp;
    struct sockaddr_in *sin;
    int ret;
    int optval;

    USDF_TRACE_SYS(EP_CTRL, "\n");

    if (!info) {
        USDF_DBG_SYS(EP_CTRL, "null fi_info struct is invalid\n");
        return -FI_EINVAL;
    }

    if (info->ep_attr->type != FI_EP_MSG) {
        return -FI_ENODEV;
    }

    if ((info->caps & ~USDF_MSG_CAPS) != 0) {
        return -FI_EBADF;
    }

    switch (info->addr_format) {
    case FI_SOCKADDR:
        if (((struct sockaddr *)info->src_addr)->sa_family != AF_INET) {
            USDF_WARN_SYS(EP_CTRL, "non-AF_INET src_addr specified\n");
            return -FI_EINVAL;
        }
        break;
    case FI_SOCKADDR_IN:
        break;
    default:
        USDF_WARN_SYS(EP_CTRL, "unknown/unsupported addr_format\n");
        return -FI_EINVAL;
    }

    if (info->src_addrlen &&
            info->src_addrlen != sizeof(struct sockaddr_in)) {
        USDF_WARN_SYS(EP_CTRL, "unexpected src_addrlen\n");
        return -FI_EINVAL;
    }

    fp = fab_ftou(fabric);

    pep = calloc(1, sizeof(*pep));
    if (pep == NULL) {
        return -FI_ENOMEM;
    }

    pep->pep_fid.fid.fclass = FI_CLASS_PEP;
    pep->pep_fid.fid.context = context;
    pep->pep_fid.fid.ops = &usdf_pep_ops;
    pep->pep_fid.ops = &usdf_pep_base_ops;
    pep->pep_fid.cm = &usdf_pep_cm_ops;
    pep->pep_fabric = fp;

    pep->pep_state = USDF_PEP_UNBOUND;
    pep->pep_sock = socket(AF_INET, SOCK_STREAM, 0);
    if (pep->pep_sock == -1) {
        ret = -errno;
        goto fail;
    }
    ret = fcntl(pep->pep_sock, F_GETFL, 0);
    if (ret == -1) {
        ret = -errno;
        goto fail;
    }
    ret = fcntl(pep->pep_sock, F_SETFL, ret | O_NONBLOCK);
    if (ret == -1) {
        ret = -errno;
        goto fail;
    }

    /* set SO_REUSEADDR to prevent annoying "Address already in use" errors
     * on successive runs of programs listening on a well known port */
    optval = 1;
    ret = setsockopt(pep->pep_sock, SOL_SOCKET, SO_REUSEADDR, &optval,
                     sizeof(optval));
    if (ret == -1) {
        ret = -errno;
        goto fail;
    }

    pep->pep_info = fi_dupinfo(info);
    if (!pep->pep_info) {
        ret = -FI_ENOMEM;
        goto fail;
    }

    if (info->src_addrlen == 0) {
        /* Copy the source address information from the device
         * attributes.
         */
        pep->pep_info->src_addrlen = sizeof(struct sockaddr_in);
        sin = calloc(1, pep->pep_info->src_addrlen);
        if (!sin) {
            USDF_WARN_SYS(EP_CTRL,
                          "calloc for src address failed\n");
            goto fail;
        }

        sin->sin_family = AF_INET;
        sin->sin_addr.s_addr = fp->fab_dev_attrs->uda_ipaddr_be;
        pep->pep_info->src_addr = sin;
    }

    memcpy(&pep->pep_src_addr, pep->pep_info->src_addr,
           pep->pep_info->src_addrlen);

    /* initialize connreq freelist */
    ret = pthread_spin_init(&pep->pep_cr_lock, PTHREAD_PROCESS_PRIVATE);
    if (ret != 0) {
        ret = -ret;
        goto fail;
    }
    TAILQ_INIT(&pep->pep_cr_free);
    TAILQ_INIT(&pep->pep_cr_pending);
    pep->pep_backlog = 10;
    ret = usdf_pep_grow_backlog(pep);
    if (ret != 0) {
        goto fail;
    }

    atomic_initialize(&pep->pep_refcnt, 0);
    atomic_inc(&fp->fab_refcnt);

    *pep_o = pep_utof(pep);
    return 0;

fail:
    if (pep != NULL) {
        usdf_pep_free_cr_lists(pep);
        if (pep->pep_sock != -1) {
            close(pep->pep_sock);
        }
        fi_freeinfo(pep->pep_info);
        free(pep);
    }
    return ret;
}
Exemplo n.º 25
0
ssize_t
usdf_msg_sendmsg(struct fid_ep *fep, const struct fi_msg *msg, uint64_t flags)
{
	size_t i;
	struct usdf_ep *ep;
	struct usdf_tx *tx;
	struct usdf_msg_qe *wqe;
	struct usdf_domain *udp;
	size_t tot_len;
	const struct iovec *iov;

	ep = ep_ftou(fep);
	tx = ep->ep_tx;
	udp = ep->ep_domain;
	iov = msg->msg_iov;

	if (flags & ~USDF_MSG_SUPP_SENDMSG_FLAGS) {
		USDF_DBG_SYS(EP_DATA,
				"one or more flags in %#" PRIx64 " not supported\n",
				flags);
		return -FI_EOPNOTSUPP;
	}

	/* check for inject overrun before acquiring lock and allocating wqe,
	 * easier to unwind this way */
	if (flags & FI_INJECT) {
		iov = msg->msg_iov;
		tot_len = 0;
		for (i = 0; i < msg->iov_count; ++i) {
			tot_len += iov[i].iov_len;
			if (tot_len > USDF_MSG_MAX_INJECT_SIZE) {
				USDF_DBG_SYS(EP_DATA, "max inject len exceeded (%zu)\n",
						tot_len);
				return -FI_EINVAL;
			}
		}
	}

	if (TAILQ_EMPTY(&tx->t.msg.tx_free_wqe)) {
		return -FI_EAGAIN;
	}

	pthread_spin_lock(&udp->dom_progress_lock);

	wqe = usdf_msg_get_tx_wqe(tx);

	wqe->ms_context = msg->context;
	if (flags & FI_INJECT) {
		tot_len = 0;
		for (i = 0; i < msg->iov_count; ++i) {
			assert(tot_len + iov[i].iov_len <= USDF_MSG_MAX_INJECT_SIZE);
			memcpy(&wqe->ms_inject_buf[tot_len], iov[i].iov_base,
				iov[i].iov_len);
			tot_len += iov[i].iov_len;
		}
		wqe->ms_iov[0].iov_base = wqe->ms_inject_buf;
		wqe->ms_iov[0].iov_len = tot_len;
		wqe->ms_last_iov = 0;

	} else {
		tot_len = 0;
		for (i = 0; i < msg->iov_count; ++i) {
			wqe->ms_iov[i].iov_base = (void *)iov[i].iov_base;
			wqe->ms_iov[i].iov_len = iov[i].iov_len;
			tot_len += iov[i].iov_len;
		}
		wqe->ms_last_iov = msg->iov_count - 1;
	}

	wqe->ms_cur_iov = 0;
	wqe->ms_resid = tot_len;
	wqe->ms_length = tot_len;
	wqe->ms_cur_ptr = iov[0].iov_base;
	wqe->ms_iov_resid = iov[0].iov_len;

	wqe->ms_signal_comp = ep->ep_tx_dflt_signal_comp ||
		(flags & FI_COMPLETION) ? 1 : 0;

	/* add send to EP, and add EP to TX list if not present */
	TAILQ_INSERT_TAIL(&ep->e.msg.ep_posted_wqe, wqe, ms_link);
	usdf_msg_ep_ready(ep);

	pthread_spin_unlock(&udp->dom_progress_lock);

	usdf_domain_progress(udp);

	return 0;
}
Exemplo n.º 26
0
int usdf_msg_fill_dom_attr(uint32_t version, const struct fi_info *hints,
			   struct fi_info *fi, struct usd_device_attrs *dap)
{
	int ret;
	struct fi_domain_attr defaults;

	defaults = msg_dflt_domain_attr;
	ret = usdf_domain_getname(version, dap, &defaults.name);
	if (ret < 0)
		return -FI_ENODATA;

	if (!hints || !hints->domain_attr)
		goto catch;

	/* how to handle fi_thread_fid, fi_thread_completion, etc?
	 */
	switch (hints->domain_attr->threading) {
	case FI_THREAD_UNSPEC:
	case FI_THREAD_ENDPOINT:
		break;
	default:
		return -FI_ENODATA;
	}

	/* how to handle fi_progress_manual?
	 */
	switch (hints->domain_attr->control_progress) {
	case FI_PROGRESS_UNSPEC:
	case FI_PROGRESS_AUTO:
		break;
	default:
		return -FI_ENODATA;
	}

	switch (hints->domain_attr->data_progress) {
	case FI_PROGRESS_UNSPEC:
	case FI_PROGRESS_MANUAL:
		break;
	default:
		return -FI_ENODATA;
	}

	switch (hints->domain_attr->resource_mgmt) {
	case FI_RM_UNSPEC:
	case FI_RM_DISABLED:
		break;
	default:
		return -FI_ENODATA;
	}

	switch (hints->domain_attr->caps) {
	case 0:
	case FI_REMOTE_COMM:
		break;
	default:
		USDF_WARN_SYS(DOMAIN,
			"invalid domain capabilities\n");
		return -FI_ENODATA;
	}

	if (usdf_check_mr_mode(version, hints, defaults.mr_mode))
		return -FI_ENODATA;

	if (hints->domain_attr->mr_cnt <= USDF_MSG_MR_CNT) {
		defaults.mr_cnt = hints->domain_attr->mr_cnt;
	} else {
		USDF_DBG_SYS(DOMAIN, "mr_count exceeded provider limit\n");
		return -FI_ENODATA;
	}

catch:
Exemplo n.º 27
0
/*
 * Handle a receive on a queue servicing a message endpoint
 */
static inline void
usdf_msg_handle_recv(struct usdf_domain *udp, struct usd_completion *comp)
{
	struct rudp_pkt *pkt;
	struct usdf_msg_qe *rqe;
	struct usdf_ep *ep;
	struct usd_qp *qp;
	struct usdf_rx *rx;
	uint32_t peer_id;
	uint32_t opcode;
	uint8_t *rx_ptr;
	uint8_t *rqe_ptr;
	size_t cur_iov;
	size_t iov_resid;
	size_t ms_resid;
	size_t rxlen;
	size_t copylen;
	int ret;

	pkt = comp->uc_context;
	opcode = ntohs(pkt->msg.opcode);
	peer_id = ntohs(pkt->msg.src_peer_id);
	if (peer_id > USDF_MAX_PEERS) {
		qp = comp->uc_qp;
		rx = qp->uq_context;
		goto dropit;
	}
	ep = udp->dom_peer_tab[peer_id];
	if (ep == NULL) {
		qp = comp->uc_qp;
		rx = qp->uq_context;
		goto dropit;
	}
	rx = ep->ep_rx;

	if (comp->uc_status != USD_COMPSTAT_SUCCESS)
		goto dropit;

	switch (opcode) {
	case RUDP_OP_ACK:
		usdf_msg_rx_ack(ep, pkt);
		goto dropit;
	case RUDP_OP_NAK:
		usdf_msg_rx_nak(ep, pkt);
		goto dropit;
	case RUDP_OP_FIRST:
	case RUDP_OP_LAST:
		break;
	default:
		USDF_DBG_SYS(EP_DATA,
				"encountered unexpected opcode %" PRIu32 "\n",
				opcode);
		goto dropit;
	}

	ret = usdf_msg_check_seq(ep, pkt);
	if (ret == -1) {
		goto dropit;
	}

	rqe = ep->e.msg.ep_cur_recv;
	if (rqe == NULL) {
		if (TAILQ_EMPTY(&rx->r.msg.rx_posted_rqe)) {
			goto dropit;
		}
		rqe = TAILQ_FIRST(&rx->r.msg.rx_posted_rqe);
		TAILQ_REMOVE(&rx->r.msg.rx_posted_rqe, rqe, ms_link);
		ep->e.msg.ep_cur_recv = rqe;
	}

	rx_ptr = (uint8_t *)(pkt + 1);
	rxlen = ntohs(pkt->msg.m.rc_data.length);
	rqe->ms_length += rxlen;
	rqe_ptr = (uint8_t *)rqe->ms_cur_ptr;
	iov_resid = rqe->ms_iov_resid;
	cur_iov = rqe->ms_cur_iov;
	ms_resid = rqe->ms_resid;
	while (rxlen > 0) {
		copylen = MIN(rxlen, iov_resid);
		memcpy(rqe_ptr, rx_ptr, copylen);
		rx_ptr += copylen;
		rxlen -= copylen;
		iov_resid -= copylen;
		ms_resid -= copylen;
		if (iov_resid == 0) {
			if (cur_iov == rqe->ms_last_iov) {
				break;
			}
			++cur_iov;
			rqe_ptr = rqe->ms_iov[cur_iov].iov_base;
			iov_resid = rqe->ms_iov[cur_iov].iov_len;
		} else {
			rqe_ptr += copylen;
		}
	}

	if (opcode & RUDP_OP_LAST) {
		/*
		* Normally we need to store back the updated values of
		* ms_resid, ms_cur_iov, ms_cur_ptr and ms_iov_resid. But
		* being the last step of the process, updating these
		* values are not necessary
		*/
		if (rxlen > 0) {
			USDF_DBG_SYS(EP_DATA, "message truncated by %zu bytes",
					rxlen);
			rqe->ms_length -= rxlen;
			usdf_msg_recv_complete(ep, rqe, FI_ETRUNC);
		} else {
			usdf_msg_recv_complete(ep, rqe, FI_SUCCESS);
		}

		ep->e.msg.ep_cur_recv = NULL;
	} else {
		rqe->ms_cur_ptr = rqe_ptr;
		rqe->ms_iov_resid = iov_resid;
		rqe->ms_cur_iov = cur_iov;
		rqe->ms_resid = ms_resid;
	}

dropit:
	/* repost buffer */
	_usdf_msg_post_recv(rx, pkt,
			rx->rx_domain->dom_fabric->fab_dev_attrs->uda_mtu);
}
Exemplo n.º 28
0
int
usdf_domain_open(struct fid_fabric *fabric, struct fi_info *info,
	   struct fid_domain **domain, void *context)
{
	struct usdf_fabric *fp;
	struct usdf_domain *udp;
	struct sockaddr_in *sin;
	size_t addrlen;
	int ret;
#if ENABLE_DEBUG
	char requested[INET_ADDRSTRLEN], actual[INET_ADDRSTRLEN];
#endif

	USDF_TRACE_SYS(DOMAIN, "\n");
	sin = NULL;

	fp = fab_fidtou(fabric);

	if (info->domain_attr != NULL) {
		/* No versioning information available here. */
		if (!usdf_domain_checkname(0, fp->fab_dev_attrs,
					   info->domain_attr->name)) {
			USDF_WARN_SYS(DOMAIN, "domain name mismatch\n");
			return -FI_ENODATA;
		}

		if (ofi_check_mr_mode(fabric->api_version,
				      OFI_MR_BASIC_MAP | FI_MR_LOCAL,
				      info->domain_attr->mr_mode)) {
			/* the caller ignored our fi_getinfo results */
			USDF_WARN_SYS(DOMAIN, "MR mode (%d) not supported\n",
				      info->domain_attr->mr_mode);
			return -FI_ENODATA;
		}
	}

	udp = calloc(1, sizeof *udp);
	if (udp == NULL) {
		USDF_DBG("unable to alloc mem for domain\n");
		ret = -FI_ENOMEM;
		goto fail;
	}

	USDF_DBG("uda_devname=%s\n", fp->fab_dev_attrs->uda_devname);

	/*
	 * Make sure address format is good and matches this fabric
	 */
	switch (info->addr_format) {
	case FI_SOCKADDR:
		addrlen = sizeof(struct sockaddr);
		sin = info->src_addr;
		break;
	case FI_SOCKADDR_IN:
		addrlen = sizeof(struct sockaddr_in);
		sin = info->src_addr;
		break;
	case FI_ADDR_STR:
		sin = usdf_format_to_sin(info, info->src_addr);
		goto skip_size_check;
	default:
		ret = -FI_EINVAL;
		goto fail;
	}

	if (info->src_addrlen != addrlen) {
		ret =  -FI_EINVAL;
		goto fail;
	}

skip_size_check:
	if (sin->sin_family != AF_INET ||
	    sin->sin_addr.s_addr != fp->fab_dev_attrs->uda_ipaddr_be) {
		USDF_DBG_SYS(DOMAIN, "requested src_addr (%s) != fabric addr (%s)\n",
			inet_ntop(AF_INET, &sin->sin_addr.s_addr,
				requested, sizeof(requested)),
			inet_ntop(AF_INET, &fp->fab_dev_attrs->uda_ipaddr_be,
				actual, sizeof(actual)));

		ret = -FI_EINVAL;
		usdf_free_sin_if_needed(info, sin);
		goto fail;
	}
	usdf_free_sin_if_needed(info, sin);

	ret = usd_open(fp->fab_dev_attrs->uda_devname, &udp->dom_dev);
	if (ret != 0) {
		goto fail;
	}

	udp->dom_fid.fid.fclass = FI_CLASS_DOMAIN;
	udp->dom_fid.fid.context = context;
	udp->dom_fid.fid.ops = &usdf_fid_ops;
	udp->dom_fid.ops = &usdf_domain_ops;
	udp->dom_fid.mr = &usdf_domain_mr_ops;

	ret = pthread_spin_init(&udp->dom_progress_lock,
			PTHREAD_PROCESS_PRIVATE);
	if (ret != 0) {
		ret = -ret;
		goto fail;
	}
	TAILQ_INIT(&udp->dom_tx_ready);
	TAILQ_INIT(&udp->dom_hcq_list);

	udp->dom_info = fi_dupinfo(info);
	if (udp->dom_info == NULL) {
		ret = -FI_ENOMEM;
		goto fail;
	}
	if (udp->dom_info->dest_addr != NULL) {
		free(udp->dom_info->dest_addr);
		udp->dom_info->dest_addr = NULL;
	}

	ret = usdf_dom_rdc_alloc_data(udp);
	if (ret != 0) {
		goto fail;
	}

	udp->dom_fabric = fp;
	LIST_INSERT_HEAD(&fp->fab_domain_list, udp, dom_link);
	ofi_atomic_initialize32(&udp->dom_refcnt, 0);
	ofi_atomic_inc32(&fp->fab_refcnt);

	*domain = &udp->dom_fid;
	return 0;

fail:
	if (udp != NULL) {
		if (udp->dom_info != NULL) {
			fi_freeinfo(udp->dom_info);
		}
		if (udp->dom_dev != NULL) {
			usd_close(udp->dom_dev);
		}
		usdf_dom_rdc_free_data(udp);
		free(udp);
	}
	return ret;
}