Esempio n. 1
0
static inline int rxm_finish_sar_segment_send(struct rxm_ep *rxm_ep, struct rxm_tx_sar_buf *tx_buf)
{
	int ret = FI_SUCCESS;
	struct rxm_tx_sar_buf *first_tx_buf;

	switch (rxm_sar_get_seg_type(&tx_buf->pkt.ctrl_hdr)) {
	case RXM_SAR_SEG_FIRST:
		break;
	case RXM_SAR_SEG_MIDDLE:
		ofi_buf_free(tx_buf);
		break;
	case RXM_SAR_SEG_LAST:
		ret = rxm_cq_tx_comp_write(rxm_ep, ofi_tx_cq_flags(tx_buf->pkt.hdr.op),
					   tx_buf->app_context, tx_buf->flags);

		assert(ofi_tx_cq_flags(tx_buf->pkt.hdr.op) & FI_SEND);
		ofi_ep_tx_cntr_inc(&rxm_ep->util_ep);
		first_tx_buf = ofi_bufpool_get_ibuf(rxm_ep->
					buf_pools[RXM_BUF_POOL_TX_SAR].pool,
					tx_buf->pkt.ctrl_hdr.msg_id);
		ofi_buf_free(first_tx_buf);
		ofi_buf_free(tx_buf);
		break;
	}

	return ret;
}
Esempio n. 2
0
static inline int rxm_finish_eager_send(struct rxm_ep *rxm_ep, struct rxm_tx_eager_buf *tx_buf)
{
	int ret = rxm_cq_tx_comp_write(rxm_ep, ofi_tx_cq_flags(tx_buf->pkt.hdr.op),
				       tx_buf->app_context, tx_buf->flags);

	assert(ofi_tx_cq_flags(tx_buf->pkt.hdr.op) & FI_SEND);
	ofi_ep_tx_cntr_inc(&rxm_ep->util_ep);

	return ret;
}
Esempio n. 3
0
static int rxm_rndv_tx_finish(struct rxm_ep *rxm_ep, struct rxm_tx_rndv_buf *tx_buf)
{
	int ret;

	RXM_LOG_STATE_TX(FI_LOG_CQ, tx_buf, RXM_RNDV_FINISH);
	tx_buf->hdr.state = RXM_RNDV_FINISH;
	tx_buf->conn->rndv_tx_credits++;

	if (!rxm_ep->rxm_mr_local)
		rxm_ep_msg_mr_closev(tx_buf->mr, tx_buf->count);

	ret = rxm_cq_tx_comp_write(rxm_ep, ofi_tx_cq_flags(tx_buf->pkt.hdr.op),
				   tx_buf->app_context, tx_buf->flags);

	assert(ofi_tx_cq_flags(tx_buf->pkt.hdr.op) & FI_SEND);
	ofi_ep_tx_cntr_inc(&rxm_ep->util_ep);

	ofi_buf_free(tx_buf);

	return ret;
}
Esempio n. 4
0
static ssize_t smr_generic_atomic(struct fid_ep *ep_fid,
			const struct fi_ioc *ioc, void **desc, size_t count,
			const struct fi_ioc *compare_ioc, void **compare_desc,
			size_t compare_count, struct fi_ioc *result_ioc,
			void **result_desc, size_t result_count,
			fi_addr_t addr, const struct fi_rma_ioc *rma_ioc,
			size_t rma_count, enum fi_datatype datatype,
			enum fi_op atomic_op, void *context, uint32_t op)
{
	struct smr_ep *ep;
	struct smr_domain *domain;
	struct smr_region *peer_smr;
	struct smr_inject_buf *tx_buf;
	struct smr_cmd *cmd;
	struct iovec iov[SMR_IOV_LIMIT];
	struct iovec compare_iov[SMR_IOV_LIMIT];
	struct iovec result_iov[SMR_IOV_LIMIT];
	int peer_id, err = 0;
	uint16_t flags = 0;
	ssize_t ret = 0;
	size_t msg_len, total_len;

	assert(count <= SMR_IOV_LIMIT);
	assert(result_count <= SMR_IOV_LIMIT);
	assert(compare_count <= SMR_IOV_LIMIT);
	assert(rma_count <= SMR_IOV_LIMIT);

	ep = container_of(ep_fid, struct smr_ep, util_ep.ep_fid.fid);
	domain = container_of(ep->util_ep.domain, struct smr_domain, util_domain);

	peer_id = (int) addr;
	ret = smr_verify_peer(ep, peer_id);
	if(ret)
		return ret;

	peer_smr = smr_peer_region(ep->region, peer_id);
	fastlock_acquire(&peer_smr->lock);
	if (peer_smr->cmd_cnt < 2) {
		ret = -FI_EAGAIN;
		goto unlock_region;
	}

	fastlock_acquire(&ep->util_ep.tx_cq->cq_lock);
	if (ofi_cirque_isfull(ep->util_ep.tx_cq->cirq)) {
		ret = -FI_EAGAIN;
		goto unlock_cq;
	}

	cmd = ofi_cirque_tail(smr_cmd_queue(peer_smr));
	msg_len = total_len = ofi_datatype_size(datatype) *
			      ofi_total_ioc_cnt(ioc, count);
	
	switch (op) {
	case ofi_op_atomic_compare:
		assert(compare_ioc);
		ofi_ioc_to_iov(compare_ioc, compare_iov, compare_count,
			       ofi_datatype_size(datatype));
		total_len *= 2;
		/* fall through */
	case ofi_op_atomic_fetch:
		assert(result_ioc);
		ofi_ioc_to_iov(result_ioc, result_iov, result_count,
			       ofi_datatype_size(datatype));
		if (!domain->fast_rma)
			flags |= SMR_RMA_REQ;
		/* fall through */
	case ofi_op_atomic:
		if (atomic_op != FI_ATOMIC_READ) {
			assert(ioc);
			ofi_ioc_to_iov(ioc, iov, count, ofi_datatype_size(datatype));
		} else {
			count = 0;
		}
		break;
	default:
		break;
	}

	if (total_len <= SMR_MSG_DATA_LEN && !(flags & SMR_RMA_REQ)) {
		smr_format_inline_atomic(cmd, smr_peer_addr(ep->region)[peer_id].addr,
					 iov, count, compare_iov, compare_count,
					 op, datatype, atomic_op);
	} else if (total_len <= SMR_INJECT_SIZE) {
		tx_buf = smr_freestack_pop(smr_inject_pool(peer_smr));
		smr_format_inject_atomic(cmd, smr_peer_addr(ep->region)[peer_id].addr,
					 iov, count, result_iov, result_count,
					 compare_iov, compare_count, op, datatype,
					 atomic_op, peer_smr, tx_buf);
	} else {
		FI_WARN(&smr_prov, FI_LOG_EP_CTRL,
			"message too large\n");
		ret = -FI_EINVAL;
		goto unlock_cq;
	}
	cmd->msg.hdr.op_flags |= flags;

	ofi_cirque_commit(smr_cmd_queue(peer_smr));
	peer_smr->cmd_cnt--;

	if (op != ofi_op_atomic) {
		if (flags & SMR_RMA_REQ) {
			smr_post_fetch_resp(ep, cmd,
				(const struct iovec *) result_iov,
				result_count);
			goto format_rma;
		}
		err = smr_fetch_result(ep, peer_smr, result_iov, result_count,
				       rma_ioc, rma_count, datatype, msg_len);
		if (err)
			FI_WARN(&smr_prov, FI_LOG_EP_CTRL,
				"unable to fetch results");
	}

	ret = ep->tx_comp(ep, context, ofi_tx_cq_flags(op), err);
	if (ret) {
		FI_WARN(&smr_prov, FI_LOG_EP_CTRL,
			"unable to process tx completion\n");
	}

format_rma:
	cmd = ofi_cirque_tail(smr_cmd_queue(peer_smr));
	smr_format_rma_ioc(cmd, rma_ioc, rma_count);
	ofi_cirque_commit(smr_cmd_queue(peer_smr));
	peer_smr->cmd_cnt--;
unlock_cq:
	fastlock_release(&ep->util_ep.tx_cq->cq_lock);
unlock_region:
	fastlock_release(&peer_smr->lock);
	return ret;
}
Esempio n. 5
0
struct rxd_x_entry *rxd_tx_entry_init(struct rxd_ep *ep, const struct iovec *iov,
				      size_t iov_count, const struct iovec *res_iov,
				      size_t res_count, size_t rma_count,
				      uint64_t data, uint64_t tag, void *context,
				      fi_addr_t addr, uint32_t op, uint32_t flags)
{
	struct rxd_x_entry *tx_entry;
	struct rxd_domain *rxd_domain = rxd_ep_domain(ep);
	size_t max_inline;

	tx_entry = rxd_get_tx_entry(ep, op);
	if (!tx_entry) {
		FI_WARN(&rxd_prov, FI_LOG_EP_CTRL, "could not get tx entry\n");
		return NULL;
	}

	tx_entry->op = op;
	tx_entry->peer = addr;
	tx_entry->flags = flags;
	tx_entry->bytes_done = 0;
	tx_entry->offset = 0;
	tx_entry->next_seg_no = 0;
	tx_entry->iov_count = iov_count;
	memcpy(&tx_entry->iov[0], iov, sizeof(*iov) * iov_count);
	if (res_count) {
		tx_entry->res_count = res_count;
		memcpy(&tx_entry->res_iov[0], res_iov, sizeof(*res_iov) * res_count);
	}

	tx_entry->cq_entry.op_context = context;
	tx_entry->cq_entry.len = ofi_total_iov_len(iov, iov_count);
	tx_entry->cq_entry.buf = iov[0].iov_base;
	tx_entry->cq_entry.flags = ofi_tx_cq_flags(op);
	tx_entry->cq_entry.tag = tag;

	tx_entry->pkt = NULL;

	max_inline = rxd_domain->max_inline_msg;
	if (tx_entry->cq_entry.flags & FI_RMA)
		max_inline -= sizeof(struct ofi_rma_iov) * rma_count;

	if (tx_entry->flags & RXD_TAG_HDR)
		max_inline -= sizeof(tx_entry->cq_entry.tag);
	if (tx_entry->flags & RXD_REMOTE_CQ_DATA) {
		max_inline -= sizeof(tx_entry->cq_entry.data);
		tx_entry->cq_entry.data = data;
	}

	if (rma_count > 1 || tx_entry->cq_entry.flags & FI_READ ||
	    tx_entry->cq_entry.len > max_inline)
		max_inline -= sizeof(struct rxd_sar_hdr);
	else
		tx_entry->flags |= RXD_INLINE;

	if (tx_entry->cq_entry.flags & FI_ATOMIC || tx_entry->cq_entry.len <= max_inline)
		tx_entry->num_segs = 1;
	else if (tx_entry->cq_entry.flags & FI_READ)
		tx_entry->num_segs = ofi_div_ceil(tx_entry->cq_entry.len,
						  rxd_domain->max_seg_sz);
	else
		tx_entry->num_segs = ofi_div_ceil(tx_entry->cq_entry.len - max_inline,
						  rxd_domain->max_seg_sz) + 1;

	if ((tx_entry->op == RXD_READ_REQ || tx_entry->op == RXD_ATOMIC_FETCH ||
	     tx_entry->op == RXD_ATOMIC_COMPARE) &&
	    ep->peers[tx_entry->peer].unacked_cnt < ep->peers[tx_entry->peer].tx_window &&
	    ep->peers[tx_entry->peer].peer_addr != FI_ADDR_UNSPEC)
		dlist_insert_tail(&tx_entry->entry,
				  &ep->peers[tx_entry->peer].rma_rx_list);
	else
		dlist_insert_tail(&tx_entry->entry,
				  &ep->peers[tx_entry->peer].tx_list);

	return tx_entry;
}