示例#1
0
static ssize_t
mrail_send_common(struct fid_ep *ep_fid, const struct iovec *iov, void **desc,
		  size_t count, size_t len, fi_addr_t dest_addr, uint64_t data,
		  void *context, uint64_t flags)
{
	struct mrail_ep *mrail_ep = container_of(ep_fid, struct mrail_ep,
						 util_ep.ep_fid.fid);
	struct mrail_peer_info *peer_info;
	struct iovec *iov_dest = alloca(sizeof(*iov_dest) * (count + 1));
	struct mrail_tx_buf *tx_buf;
	uint32_t i = mrail_get_tx_rail(mrail_ep);
	struct fi_msg msg;
	ssize_t ret;

	peer_info = ofi_av_get_addr(mrail_ep->util_ep.av, (int) dest_addr);

	ofi_ep_lock_acquire(&mrail_ep->util_ep);

	tx_buf = mrail_get_tx_buf(mrail_ep, context, peer_info->seq_no++,
				  ofi_op_msg, flags | FI_MSG);
	if (OFI_UNLIKELY(!tx_buf)) {
		ret = -FI_ENOMEM;
		goto err1;
	}
	mrail_copy_iov_hdr(&tx_buf->hdr, iov_dest, iov, count);

	msg.msg_iov 	= iov_dest;
	msg.desc    	= desc;
	msg.iov_count	= count + 1;
	msg.addr	= dest_addr;
	msg.context	= tx_buf;
	msg.data	= data;

	if (len < mrail_ep->rails[i].info->tx_attr->inject_size)
		flags |= FI_INJECT;

	FI_DBG(&mrail_prov, FI_LOG_EP_DATA, "Posting send of length: %" PRIu64
	       " dest_addr: 0x%" PRIx64 "  seq: %d on rail: %d\n",
	       len, dest_addr, peer_info->seq_no - 1, i);

	ret = fi_sendmsg(mrail_ep->rails[i].ep, &msg, flags);
	if (ret) {
		FI_WARN(&mrail_prov, FI_LOG_EP_DATA,
			"Unable to fi_sendmsg on rail: %" PRIu32 "\n", i);
		goto err2;
	} else if (!(flags & FI_COMPLETION)) {
		ofi_ep_tx_cntr_inc(&mrail_ep->util_ep);
	}
	ofi_ep_lock_release(&mrail_ep->util_ep);
	return ret;
err2:
	util_buf_release(mrail_ep->tx_buf_pool, tx_buf);
err1:
	peer_info->seq_no--;
	ofi_ep_lock_release(&mrail_ep->util_ep);
	return ret;
}
示例#2
0
static int
util_mr_cache_create(struct ofi_mr_cache *cache, const struct iovec *iov,
		     uint64_t access, struct ofi_mr_entry **entry)
{
	int ret;

	FI_DBG(cache->domain->prov, FI_LOG_MR, "create %p (len: %" PRIu64 ")\n",
	       iov->iov_base, iov->iov_len);

	util_mr_cache_process_events(cache);

	*entry = util_buf_alloc(cache->entry_pool);
	if (OFI_UNLIKELY(!*entry))
		return -FI_ENOMEM;

	(*entry)->iov = *iov;
	(*entry)->use_cnt = 1;

	ret = cache->add_region(cache, *entry);
	if (ret) {
		while (ret && ofi_mr_cache_flush(cache)) {
			ret = cache->add_region(cache, *entry);
		}
		if (ret) {
			assert(!ofi_mr_cache_flush(cache));
			util_buf_release(cache->entry_pool, *entry);
			return ret;
		}
	}

	cache->cached_size += iov->iov_len;
	if ((++cache->cached_cnt > cache->max_cached_cnt) ||
	    (cache->cached_size > cache->max_cached_size)) {
		(*entry)->cached = 0;
	} else {
		if (cache->mr_storage.insert(&cache->mr_storage,
					     &(*entry)->iov, *entry)) {
			ret = -FI_ENOMEM;
			goto err;
		}
		(*entry)->cached = 1;

		ret = ofi_monitor_subscribe(&cache->nq, iov->iov_base, iov->iov_len,
					    &(*entry)->subscription);
		if (ret)
			goto err;
		(*entry)->subscribed = 1;
	}

	return 0;

err:
	util_mr_free_entry(cache, *entry);
	return ret;
}
示例#3
0
static inline void rxd_handle_send_comp(struct fi_cq_msg_entry *comp)
{
	struct rxd_pkt_meta *pkt_meta;
	pkt_meta = container_of(comp->op_context, struct rxd_pkt_meta, context);

	FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "Send completion for: %p\n", pkt_meta);
	rxd_ep_lock_if_required(pkt_meta->ep);
	RXD_PKT_MARK_LOCAL_ACK(pkt_meta);
	rxd_tx_pkt_release(pkt_meta);
	rxd_ep_unlock_if_required(pkt_meta->ep);
}
示例#4
0
struct mrail_recv *
mrail_match_recv_handle_unexp(struct mrail_recv_queue *recv_queue, uint64_t tag,
			      uint64_t addr, char *data, size_t len, void *context)
{
	struct dlist_entry *entry;
	struct mrail_unexp_msg_entry *unexp_msg_entry;
	struct mrail_match_attr match_attr = {
		.tag	= tag,
		.addr	= addr,
	};

	entry = dlist_remove_first_match(&recv_queue->recv_list,
					 recv_queue->match_recv, &match_attr);
	if (OFI_UNLIKELY(!entry)) {
		unexp_msg_entry = recv_queue->get_unexp_msg_entry(recv_queue,
								  context);
		if (!unexp_msg_entry) {
			FI_WARN(recv_queue->prov, FI_LOG_CQ,
				"Unable to get unexp_msg_entry!");
			assert(0);
			return NULL;
		}

		unexp_msg_entry->addr		= addr;
		unexp_msg_entry->tag		= tag;
		unexp_msg_entry->context	= context;
		memcpy(unexp_msg_entry->data, data, len);

		FI_DBG(recv_queue->prov, FI_LOG_CQ, "No matching recv found for"
		       " incoming msg with addr: 0x%" PRIx64 " tag: 0x%" PRIx64
		       "\n", unexp_msg_entry->addr, unexp_msg_entry->tag);

		FI_DBG(recv_queue->prov, FI_LOG_CQ, "Enqueueing unexp_msg_entry to "
		       "unexpected msg list\n");

		dlist_insert_tail(&unexp_msg_entry->entry,
				  &recv_queue->unexp_msg_list);
		return NULL;
	}
	return container_of(entry, struct mrail_recv, entry);
}
示例#5
0
int rxd_av_dg_reverse_lookup(struct rxd_av *av, uint64_t start_idx,
			      const void *addr, fi_addr_t *dg_fiaddr)
{
	uint8_t curr_addr[RXD_MAX_DGRAM_ADDR];
	size_t i, len;
	int ret;

	for (i = 0; i < (size_t) av->dg_av_used; i++) {
		len = sizeof curr_addr;
		ret = fi_av_lookup(av->dg_av, (i + start_idx) % av->dg_av_used,
				   curr_addr, &len);
		if (!ret) {
			*dg_fiaddr = (i + start_idx) % av->dg_av_used;
			FI_DBG(&rxd_prov, FI_LOG_AV, "found: %" PRIu64 "\n",
				*dg_fiaddr);
			return 0;
		}
	}
	FI_DBG(&rxd_prov, FI_LOG_AV, "addr not found\n");
	return -FI_ENODATA;
}
示例#6
0
int mrail_cq_write_recv_comp(struct mrail_ep *mrail_ep, struct mrail_hdr *hdr,
			     struct fi_cq_tagged_entry *comp,
			     struct mrail_recv *recv)
{
	FI_DBG(&mrail_prov, FI_LOG_CQ, "writing recv completion: length: %zu "
	       "tag: 0x%" PRIx64 "\n", comp->len - sizeof(struct mrail_pkt),
	       hdr->tag);
	return ofi_cq_write(mrail_ep->util_ep.rx_cq, recv->context,
			   recv->comp_flags |
			   (comp->flags & FI_REMOTE_CQ_DATA),
			   comp->len - sizeof(struct mrail_pkt),
			   NULL, comp->data, hdr->tag);
}
示例#7
0
struct rxd_trecv_entry *rxd_get_trecv_entry(struct rxd_ep *ep,
					      struct rxd_rx_entry *rx_entry)
{
	struct dlist_entry *match;
	struct rxd_trecv_entry *trecv_entry;

	match = dlist_find_first_match(&ep->trecv_list, &rxd_match_trecv_entry,
				       (void *)rx_entry);
	if (!match) {
		/*todo: queue the pkt */
		FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "no matching trecv entry, tag: %p\n",
			rx_entry->op_hdr.tag);
		return NULL;
	}

	FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "matched - tag: %p\n", rx_entry->op_hdr.tag);

	dlist_remove(match);
	trecv_entry = container_of(match, struct rxd_trecv_entry, entry);
	trecv_entry->rx_entry = rx_entry;
	return trecv_entry;
}
示例#8
0
static int
util_mr_cache_merge(struct ofi_mr_cache *cache, const struct fi_mr_attr *attr,
		    struct ofi_mr_entry *old_entry, struct ofi_mr_entry **entry)
{
	struct iovec iov, *old_iov;

	iov = *attr->mr_iov;
	do {
		FI_DBG(cache->domain->prov, FI_LOG_MR,
		       "merging %p (len: %" PRIu64 ") with %p (len: %" PRIu64 ")\n",
		       iov.iov_base, iov.iov_len,
		       old_entry->iov.iov_base, old_entry->iov.iov_len);
		old_iov = &old_entry->iov;

		iov.iov_len = ((uintptr_t)
			MAX(ofi_iov_end(&iov), ofi_iov_end(old_iov))) -
			((uintptr_t) MIN(iov.iov_base, old_iov->iov_base));
		iov.iov_base = MIN(iov.iov_base, old_iov->iov_base);
		FI_DBG(cache->domain->prov, FI_LOG_MR, "merged %p (len: %" PRIu64 ")\n",
		       iov.iov_base, iov.iov_len);

		if (old_entry->subscribed) {
			/* old entry will be removed as soon as `use_cnt == 0`.
			 * unsubscribe from the entry */
			ofi_monitor_unsubscribe(&old_entry->subscription);
			old_entry->subscribed = 0;
		}
		cache->mr_storage.erase(&cache->mr_storage, old_entry);
		old_entry->cached = 0;

		if (old_entry->use_cnt == 0) {
			dlist_remove_init(&old_entry->lru_entry);
			util_mr_free_entry(cache, old_entry); 
		}

	} while ((old_entry = cache->mr_storage.find(&cache->mr_storage, &iov)));

	return util_mr_cache_create(cache, &iov, attr->access, entry);
}
示例#9
0
static void server_sock_accept(struct util_wait *wait,
			       struct tcpx_cm_context *cm_ctx)
{
	struct tcpx_conn_handle *handle;
	struct tcpx_pep *pep;
	SOCKET sock;
	int ret;

	FI_DBG(&tcpx_prov, FI_LOG_EP_CTRL, "Received Connreq\n");
	assert(cm_ctx->fid->fclass == FI_CLASS_PEP);
	pep = container_of(cm_ctx->fid, struct tcpx_pep,
			   util_pep.pep_fid.fid);

	sock = accept(pep->sock, NULL, 0);
	if (sock < 0) {
		FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL,
			"accept error: %d\n", ofi_sockerr());
		return;
	}

	handle = calloc(1, sizeof(*handle));
	if (!handle) {
		FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL,
			"cannot allocate memory \n");
		goto err1;
	}

	cm_ctx = calloc(1, sizeof(*cm_ctx));
	if (!cm_ctx)
		goto err2;

	handle->conn_fd = sock;
	handle->handle.fclass = FI_CLASS_CONNREQ;
	handle->pep = pep;
	cm_ctx->fid = &handle->handle;
	cm_ctx->type = SERVER_RECV_CONNREQ;

	ret = ofi_wait_fd_add(wait, sock, FI_EPOLL_IN,
			      tcpx_eq_wait_try_func,
			      NULL, (void *) cm_ctx);
	if (ret)
		goto err3;
	wait->signal(wait);
	return;
err3:
	free(cm_ctx);
err2:
	free(handle);
err1:
	ofi_close_socket(sock);
}
示例#10
0
void ofi_monitor_unsubscribe(struct ofi_subscription *subscription)
{
	FI_DBG(&core_prov, FI_LOG_MR,
	       "unsubscribing addr=%p len=%zu subscription=%p\n",
	       subscription->addr, subscription->len, subscription);
	subscription->nq->monitor->unsubscribe(subscription->nq->monitor,
					       subscription->addr,
					       subscription->len,
					       subscription);
	fastlock_acquire(&subscription->nq->lock);
	dlist_init(&subscription->entry);
	subscription->nq->refcnt--;
	fastlock_release(&subscription->nq->lock);
}
示例#11
0
static int rxm_ep_txrx_res_open(struct rxm_ep *rxm_ep)
{
	struct rxm_domain *rxm_domain;
	uint8_t local_mr;
	int ret;

	rxm_domain = container_of(rxm_ep->util_ep.domain, struct rxm_domain, util_domain);
	local_mr = rxm_ep->msg_info->mode & FI_LOCAL_MR ? 1 : 0;

	FI_DBG(&rxm_prov, FI_LOG_EP_CTRL, "MSG provider mode & FI_LOCAL_MR: %d\n",
			local_mr);

	ret = rxm_buf_pool_create(local_mr, rxm_ep->msg_info->tx_attr->size,
			sizeof(struct rxm_pkt), &rxm_ep->tx_pool, rxm_domain->msg_domain);
	if (ret)
	        return ret;

	ret = rxm_buf_pool_create(local_mr, rxm_ep->msg_info->rx_attr->size,
			sizeof(struct rxm_rx_buf), &rxm_ep->rx_pool, rxm_domain->msg_domain);
	if (ret)
		goto err1;

	rxm_ep->txe_fs = rxm_txe_fs_create(rxm_ep->rxm_info->tx_attr->size);
	if (!rxm_ep->txe_fs) {
		ret = -FI_ENOMEM;
		goto err2;
	}

	ofi_key_idx_init(&rxm_ep->tx_key_idx, fi_size_bits(rxm_ep->rxm_info->tx_attr->size));

	ret = rxm_recv_queue_init(&rxm_ep->recv_queue, rxm_ep->rxm_info->rx_attr->size);
	if (ret)
		goto err3;

	ret = rxm_recv_queue_init(&rxm_ep->trecv_queue, rxm_ep->rxm_info->rx_attr->size);
	if (ret)
		goto err4;

	return 0;
err4:
	rxm_recv_queue_close(&rxm_ep->recv_queue);
err3:
	rxm_txe_fs_free(rxm_ep->txe_fs);
err2:
	util_buf_pool_destroy(rxm_ep->tx_pool);
err1:
	util_buf_pool_destroy(rxm_ep->rx_pool);
	return ret;
}
示例#12
0
static void rxd_handle_ack(struct rxd_ep *ep, struct ofi_ctrl_hdr *ctrl,
			   struct rxd_rx_buf *rx_buf)
{
	struct rxd_tx_entry *tx_entry;
	uint64_t idx;

	FI_DBG(&rxd_prov, FI_LOG_EP_CTRL,
	       "ack- msg_id: %" PRIu64 ", segno: %d, segsz: %d, buf: %p\n",
	       ctrl->msg_id, ctrl->seg_no, ctrl->seg_size, rx_buf);

	idx = ctrl->msg_id & RXD_TX_IDX_BITS;
	tx_entry = &ep->tx_entry_fs->buf[idx];
	if (tx_entry->msg_id != ctrl->msg_id)
		goto out;

	rxd_ep_free_acked_pkts(ep, tx_entry, ctrl->seg_no);
	if ((tx_entry->bytes_sent == tx_entry->op_hdr.size) &&
	    dlist_empty(&tx_entry->pkt_list)) {
		FI_DBG(&rxd_prov, FI_LOG_EP_CTRL,
			"reporting TX completion : %p\n", tx_entry);
		if (tx_entry->op_type != RXD_TX_READ_REQ) {
			rxd_cq_report_tx_comp(rxd_ep_tx_cq(ep), tx_entry);
			rxd_cntr_report_tx_comp(ep, tx_entry);
			rxd_tx_entry_free(ep, tx_entry);
		}
	} else {
		tx_entry->rx_key = ctrl->rx_key;
		/* do not allow reduce window size (on duplicate acks) */
		tx_entry->window = MAX(tx_entry->window, ctrl->seg_no + ctrl->seg_size);
		FI_DBG(&rxd_prov, FI_LOG_EP_CTRL,
		       "ack- msg_id: %" PRIu64 ", window: %d\n",
		       ctrl->msg_id, tx_entry->window);
	}
out:
	rxd_ep_repost_buff(rx_buf);
}
示例#13
0
static int verify_addr(struct ofi_util_mr * in_mr, struct fi_mr_attr * item, uint64_t in_access,
                                 uint64_t in_addr, ssize_t in_len)
{
    int i = 0;
    uint64_t start = (uintptr_t) item->mr_iov[i].iov_base;
    uint64_t end = start + item->mr_iov[i].iov_len;

    if (!in_addr) {
        FI_DBG(in_mr->prov, FI_LOG_MR, "verify_addr: input address to is zero\n");
        return -FI_EINVAL;
    }

    if ((in_access & item->access) != in_access) {
        FI_DBG(in_mr->prov, FI_LOG_MR, "verify_addr: requested access is not valid\n");
        return -FI_EACCES;
    }

      for (i = 0; i < item->iov_count; i++) {
        if (start <= in_addr && end >= (in_addr + in_len))
            return 0;
    }

    return -FI_EACCES;
}
示例#14
0
static void util_monitor_read_events(struct ofi_mem_monitor *monitor)
{
	struct ofi_subscription *subscription;

	do {
		subscription = monitor->get_event(monitor);
		if (!subscription) {
			FI_DBG(&core_prov, FI_LOG_MR,
			       "no more events to be read\n");
			break;
		}

		FI_DBG(&core_prov, FI_LOG_MR,
		       "found event, context=%p, addr=%p, len=%zu nq=%p\n",
		       subscription, subscription->addr,
		       subscription->len, subscription->nq);

		fastlock_acquire(&subscription->nq->lock);
		if (dlist_empty(&subscription->entry))
			dlist_insert_tail(&subscription->entry,
					   &subscription->nq->list);
		fastlock_release(&subscription->nq->lock);
	} while (1);
}
示例#15
0
static int rxd_cq_write_tagged(struct rxd_cq *cq,
				struct fi_cq_tagged_entry *cq_entry)
{
	struct fi_cq_tagged_entry *comp;
	if (ofi_cirque_isfull(cq->util_cq.cirq))
		return -FI_ENOSPC;

	FI_DBG(&rxd_prov, FI_LOG_EP_CTRL,
		"report completion: %p\n", cq_entry->tag);

	comp = ofi_cirque_tail(cq->util_cq.cirq);
	*comp = *cq_entry;
	ofi_cirque_commit(cq->util_cq.cirq);
	return 0;
}
示例#16
0
void rxd_ep_check_unexp_msg_list(struct rxd_ep *ep, struct rxd_recv_entry *recv_entry)
{
	struct dlist_entry *match;
	struct rxd_rx_entry *rx_entry;
	struct rxd_pkt_data_start *pkt_start;

	FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "ep->num_unexp_msg: %d\n", ep->num_unexp_msg);
	match = dlist_remove_first_match(&ep->unexp_msg_list, &rxd_match_unexp_msg,
					 (void *) recv_entry);
	if (match) {
		FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "progressing unexp msg entry\n");
		dlist_remove(&recv_entry->entry);
		ep->num_unexp_msg--;

		rx_entry = container_of(match, struct rxd_rx_entry, unexp_entry);
		rx_entry->recv = recv_entry;

		pkt_start = (struct rxd_pkt_data_start *) rx_entry->unexp_buf->buf;
		rxd_ep_handle_data_msg(ep, rx_entry->peer_info, rx_entry, rx_entry->recv->iov,
				     rx_entry->recv->msg.iov_count, &pkt_start->ctrl,
				     pkt_start->data, rx_entry->unexp_buf);
		rxd_ep_repost_buff(rx_entry->unexp_buf);
	}
}
示例#17
0
int ofi_monitor_subscribe(struct ofi_mem_monitor *monitor,
			  const void *addr, size_t len)
{
	int ret;

	FI_DBG(&core_prov, FI_LOG_MR,
	       "subscribing addr=%p len=%zu\n", addr, len);

	ret = monitor->subscribe(monitor, addr, len);
	if (OFI_UNLIKELY(ret)) {
		FI_WARN(&core_prov, FI_LOG_MR,
			"Failed (ret = %d) to monitor addr=%p len=%zu\n",
			ret, addr, len);
	}
	return ret;
}
示例#18
0
void ofi_mr_cache_delete(struct ofi_mr_cache *cache, struct ofi_mr_entry *entry)
{
	FI_DBG(cache->domain->prov, FI_LOG_MR, "delete %p (len: %" PRIu64 ")\n",
	       entry->iov.iov_base, entry->iov.iov_len);
	cache->delete_cnt++;

	util_mr_cache_process_events(cache);

	if (--entry->use_cnt == 0) {
		if (entry->cached) {
			dlist_insert_tail(&entry->lru_entry, &cache->lru_list);
		} else {
			util_mr_free_entry(cache, entry);
		}
	}
}
示例#19
0
int udpx_setname(fid_t fid, void *addr, size_t addrlen)
{
	struct udpx_ep *ep;
	int ret;

	ep = container_of(fid, struct udpx_ep, util_ep.ep_fid.fid);
	FI_DBG(&udpx_prov, FI_LOG_EP_CTRL, "%s\n", ofi_hex_str(addr, addrlen));
	ret = bind(ep->sock, addr, addrlen);
	if (ret) {
		FI_WARN(&udpx_prov, FI_LOG_EP_CTRL, "bind %d (%s)\n",
			errno, strerror(errno));
		return -errno;
	}
	ep->is_bound = 1;
	return 0;
}
示例#20
0
static void client_send_connreq(struct util_wait *wait,
				struct tcpx_cm_context *cm_ctx)
{
	struct tcpx_ep *ep;
	struct fi_eq_err_entry err_entry;
	socklen_t len;
	int status, ret = FI_SUCCESS;

	FI_DBG(&tcpx_prov, FI_LOG_EP_CTRL, "client send connreq\n");
	assert(cm_ctx->fid->fclass == FI_CLASS_EP);

	ep = container_of(cm_ctx->fid, struct tcpx_ep, util_ep.ep_fid.fid);

	len = sizeof(status);
	ret = getsockopt(ep->conn_fd, SOL_SOCKET, SO_ERROR, (char *) &status, &len);
	if (ret < 0 || status) {
		FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "connection failure\n");
		ret = (ret < 0)? -ofi_sockerr() : status;
		goto err;
	}

	ret = tx_cm_data(ep->conn_fd, ofi_ctrl_connreq, cm_ctx);
	if (ret)
		goto err;

	ret = ofi_wait_fd_del(wait, ep->conn_fd);
	if (ret)
		goto err;

	cm_ctx->type = CLIENT_RECV_CONNRESP;
	ret = ofi_wait_fd_add(wait, ep->conn_fd, FI_EPOLL_IN,
			      tcpx_eq_wait_try_func, NULL, cm_ctx);
	if (ret)
		goto err;

	wait->signal(wait);
	return;
err:
	memset(&err_entry, 0, sizeof err_entry);
	err_entry.fid = cm_ctx->fid;
	err_entry.context = cm_ctx->fid->context;
	err_entry.err = -ret;

	free(cm_ctx);
	fi_eq_write(&ep->util_ep.eq->eq_fid, FI_NOTIFY,
		    &err_entry, sizeof(err_entry), UTIL_FLAG_ERROR);
}
示例#21
0
void rxd_handle_recv_comp(struct rxd_ep *ep, struct fi_cq_msg_entry *comp)
{
	struct ofi_ctrl_hdr *ctrl;
	struct rxd_rx_buf *rx_buf;
	struct rxd_peer *peer;

	FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "got recv completion\n");

	assert(rxd_reposted_bufs);
	rxd_reposted_bufs--;

	rx_buf = container_of(comp->op_context, struct rxd_rx_buf, context);
	ctrl = (struct ofi_ctrl_hdr *) rx_buf->buf;
	peer = rxd_ep_getpeer_info(ep, ctrl->conn_id);

	if (ctrl->version != OFI_CTRL_VERSION) {
		FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "ctrl version mismatch\n");
		return;
	}

	switch (ctrl->type) {
	case ofi_ctrl_connreq:
		rxd_handle_conn_req(ep, ctrl, comp, rx_buf);
		break;
	case ofi_ctrl_ack:
		rxd_handle_ack(ep, ctrl, rx_buf);
		break;
	case ofi_ctrl_discard:
		rxd_handle_discard(ep, ctrl, rx_buf);
		break;
	case ofi_ctrl_connresp:
		rxd_handle_connect_ack(ep, ctrl, rx_buf);
		break;
	case ofi_ctrl_start_data:
		rxd_handle_start_data(ep, peer, ctrl, comp, rx_buf);
		break;
	case ofi_ctrl_data:
		rxd_handle_data(ep, peer, ctrl, comp, rx_buf);
		break;
	default:
		rxd_ep_repost_buff(rx_buf);
		FI_WARN(&rxd_prov, FI_LOG_EP_CTRL,
			"invalid ctrl type \n", ctrl->type);
	}

	rxd_check_waiting_rx(ep);
}
示例#22
0
bool ofi_mr_cache_flush(struct ofi_mr_cache *cache)
{
	struct ofi_mr_entry *entry;

	if (dlist_empty(&cache->lru_list))
		return false;

	dlist_pop_front(&cache->lru_list, struct ofi_mr_entry,
			entry, lru_entry);
	dlist_init(&entry->lru_entry);
	FI_DBG(cache->domain->prov, FI_LOG_MR, "flush %p (len: %" PRIu64 ")\n",
	       entry->iov.iov_base, entry->iov.iov_len);

	util_mr_uncache_entry(cache, entry);
	util_mr_free_entry(cache, entry);
	return true;
}
示例#23
0
struct rxd_recv_entry *rxd_get_recv_entry(struct rxd_ep *ep, struct rxd_rx_entry *rx_entry)
{
	struct dlist_entry *match;
	struct rxd_recv_entry *recv_entry;

	match = dlist_find_first_match(&ep->recv_list, &rxd_match_recv_entry,
				       (void *) rx_entry);
	if (!match) {
		/*todo: queue the pkt */
		FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "no matching recv entry\n");
		return NULL;
	}

	dlist_remove(match);
	recv_entry = container_of(match, struct rxd_recv_entry, entry);
	return recv_entry;
}
示例#24
0
static void server_send_cm_accept(struct util_wait *wait,
				  struct tcpx_cm_context *cm_ctx)
{
	struct fi_eq_cm_entry cm_entry = {0};
	struct fi_eq_err_entry err_entry;
	struct tcpx_ep *ep;
	int ret;

	assert(cm_ctx->fid->fclass == FI_CLASS_EP);
	ep = container_of(cm_ctx->fid, struct tcpx_ep, util_ep.ep_fid.fid);

	ret = tx_cm_data(ep->conn_fd, ofi_ctrl_connresp, cm_ctx);
	if (ret)
		goto err;

	cm_entry.fid =  cm_ctx->fid;
	ret = (int) fi_eq_write(&ep->util_ep.eq->eq_fid, FI_CONNECTED,
				&cm_entry, sizeof(cm_entry), 0);
	if (ret < 0) {
		FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL, "Error writing to EQ\n");
	}

	ret = ofi_wait_fd_del(wait, ep->conn_fd);
	if (ret) {
		FI_WARN(&tcpx_prov, FI_LOG_EP_CTRL,
			"Could not remove fd from wait\n");
		goto err;
	}

	ret = tcpx_ep_msg_xfer_enable(ep);
	if (ret)
		goto err;

	FI_DBG(&tcpx_prov, FI_LOG_EP_CTRL, "Connection Accept Successful\n");
	free(cm_ctx);
	return;
err:
	memset(&err_entry, 0, sizeof err_entry);
	err_entry.fid = cm_ctx->fid;
	err_entry.context = cm_ctx->fid->context;
	err_entry.err = -ret;

	free(cm_ctx);
	fi_eq_write(&ep->util_ep.eq->eq_fid, FI_NOTIFY,
		    &err_entry, sizeof(err_entry), UTIL_FLAG_ERROR);
}
示例#25
0
static void ofi_ini_dir(const char *dir)
{
	int n = 0;
	char *lib;
	void *dlhandle;
	struct dirent **liblist = NULL;
	struct fi_provider* (*inif)(void);

	n = scandir(dir, &liblist, lib_filter, NULL);
	if (n < 0)
		goto libdl_done;

	while (n--) {
		if (asprintf(&lib, "%s/%s", dir, liblist[n]->d_name) < 0) {
			FI_WARN(&core_prov, FI_LOG_CORE,
			       "asprintf failed to allocate memory\n");
			goto libdl_done;
		}
		FI_DBG(&core_prov, FI_LOG_CORE, "opening provider lib %s\n", lib);

		dlhandle = dlopen(lib, RTLD_NOW);
		free(liblist[n]);
		if (dlhandle == NULL) {
			FI_WARN(&core_prov, FI_LOG_CORE,
			       "dlopen(%s): %s\n", lib, dlerror());
			free(lib);
			continue;
		}
		free(lib);

		inif = dlsym(dlhandle, "fi_prov_ini");
		if (inif == NULL) {
			FI_WARN(&core_prov, FI_LOG_CORE, "dlsym: %s\n", dlerror());
			dlclose(dlhandle);
		} else {
			ofi_register_provider((inif)(), dlhandle);
		}
	}

libdl_done:
	while (n-- > 0)
		free(liblist[n]);
	free(liblist);
}
示例#26
0
static void util_mr_free_entry(struct ofi_mr_cache *cache,
			       struct ofi_mr_entry *entry)
{
	FI_DBG(cache->domain->prov, FI_LOG_MR, "free %p (len: %" PRIu64 ")\n",
	       entry->iov.iov_base, entry->iov.iov_len);

	assert(!entry->cached);
	if (entry->subscribed) {
		ofi_monitor_unsubscribe(&entry->subscription);
		entry->subscribed = 0;
	}
	cache->delete_region(cache, entry);
	assert((cache->cached_cnt != 0) &&
	       (((ssize_t)cache->cached_size - (ssize_t)entry->iov.iov_len) >= 0));
	cache->cached_cnt--;
	cache->cached_size -= entry->iov.iov_len;

	util_buf_release(cache->entry_pool, entry);
}
示例#27
0
int rxm_info_to_core(uint32_t version, const struct fi_info *hints,
		     struct fi_info *core_info)
{
	int use_srx = 0;

	rxm_info_to_core_mr_modes(version, hints, core_info);

	core_info->mode |= FI_RX_CQ_DATA | FI_CONTEXT;

	if (hints) {
		core_info->caps = hints->caps & RXM_PASSTHRU_CAPS;
		if (hints->caps & (FI_ATOMIC | FI_TAGGED))
			core_info->caps |= FI_MSG | FI_SEND | FI_RECV;

		/* FI_RMA cap is needed for large message transfer protocol */
		if (core_info->caps & FI_MSG)
			core_info->caps |= FI_RMA | FI_READ | FI_REMOTE_READ;

		if (hints->domain_attr) {
			core_info->domain_attr->caps |= hints->domain_attr->caps;
			core_info->domain_attr->threading = hints->domain_attr->threading;
		}
		if (hints->tx_attr) {
			core_info->tx_attr->msg_order = hints->tx_attr->msg_order;
			core_info->tx_attr->comp_order = hints->tx_attr->comp_order;
		}
		if (hints->rx_attr) {
			core_info->rx_attr->msg_order = hints->rx_attr->msg_order;
			core_info->rx_attr->comp_order = hints->rx_attr->comp_order;
		}
	}
	core_info->ep_attr->type = FI_EP_MSG;
	if (!fi_param_get_bool(&rxm_prov, "use_srx", &use_srx) && use_srx) {
		FI_DBG(&rxm_prov, FI_LOG_FABRIC,
		       "Requesting shared receive context from core provider\n");
		core_info->ep_attr->rx_ctx_cnt = FI_SHARED_CONTEXT;
	}

	core_info->tx_attr->size = rxm_msg_tx_size;
	core_info->rx_attr->size = rxm_msg_rx_size;

	return 0;
}
示例#28
0
int ofi_wait_fd_add(struct util_wait *wait, int fd, uint32_t events,
		    ofi_wait_fd_try_func wait_try, void *arg, void *context)
{
	struct ofi_wait_fd_entry *fd_entry;
	struct dlist_entry *entry;
	struct util_wait_fd *wait_fd = container_of(wait, struct util_wait_fd,
						    util_wait);
	int ret = 0;

	fastlock_acquire(&wait_fd->lock);
	entry = dlist_find_first_match(&wait_fd->fd_list, ofi_wait_fd_match, &fd);
	if (entry) {
		FI_DBG(wait->prov, FI_LOG_EP_CTRL,
		       "Given fd (%d) already added to wait list - %p \n",
		       fd, wait_fd);
		fd_entry = container_of(entry, struct ofi_wait_fd_entry, entry);
		ofi_atomic_inc32(&fd_entry->ref);
		goto out;
	}

	ret = fi_epoll_add(wait_fd->epoll_fd, fd, events, context);
	if (ret) {
		FI_WARN(wait->prov, FI_LOG_FABRIC, "Unable to add fd to epoll\n");
		goto out;
	}

	fd_entry = calloc(1, sizeof *fd_entry);
	if (!fd_entry) {
		ret = -FI_ENOMEM;
		fi_epoll_del(wait_fd->epoll_fd, fd);
		goto out;
	}
	fd_entry->fd = fd;
	fd_entry->wait_try = wait_try;
	fd_entry->arg = arg;
	ofi_atomic_initialize32(&fd_entry->ref, 1);

	dlist_insert_tail(&fd_entry->entry, &wait_fd->fd_list);
out:
	fastlock_release(&wait_fd->lock);
	return ret;
}
示例#29
0
void rxd_handle_discard(struct rxd_ep *ep, struct ofi_ctrl_hdr *ctrl,
			struct rxd_rx_buf *rx_buf)
{
	uint64_t idx;
	struct rxd_tx_entry *tx_entry;

	rxd_ep_lock_if_required(ep);
	FI_DBG(&rxd_prov, FI_LOG_EP_CTRL, "got Reject: msg: %p - %d\n",
		ctrl->msg_id, ctrl->seg_no);

	idx = ctrl->msg_id & RXD_TX_IDX_BITS;
	tx_entry = &ep->tx_entry_fs->buf[idx];
	if (tx_entry->msg_id != ctrl->msg_id)
		goto out;

	rxd_tx_entry_discard(ep, tx_entry);
out:
	rxd_ep_repost_buff(rx_buf);
	rxd_ep_unlock_if_required(ep);
}
示例#30
0
/*
 * Discarded transfers were discarded by the receiving side, so we abort
 * transferring the rest of the data.  However, the completion is still
 * reported to the sender as successful.  This ensures that short and long
 * messages are treated the same, since short messages would be entirely
 * buffered at the receiver, with no notification that the application later
 * discarded the message.
 */
static void rxd_handle_discard(struct rxd_ep *ep, struct ofi_ctrl_hdr *ctrl,
			       struct rxd_rx_buf *rx_buf)
{
	struct rxd_tx_entry *tx_entry;
	uint64_t idx;

	FI_DBG(&rxd_prov, FI_LOG_EP_CTRL,
	       "discard- msg_id: %" PRIu64 ", segno: %d\n",
	       ctrl->msg_id, ctrl->seg_no);

	idx = ctrl->msg_id & RXD_TX_IDX_BITS;
	tx_entry = &ep->tx_entry_fs->buf[idx];
	if (tx_entry->msg_id == ctrl->msg_id) {
		rxd_cq_report_tx_comp(rxd_ep_tx_cq(ep), tx_entry);
		rxd_cntr_report_tx_comp(ep, tx_entry);
		rxd_tx_entry_done(ep, tx_entry);
	}

	rxd_ep_repost_buff(rx_buf);
}