Esempio n. 1
0
static int smr_fetch_result(struct smr_ep *ep, struct smr_region *peer_smr,
			    struct iovec *iov, size_t iov_count,
			    const struct fi_rma_ioc *rma_ioc, size_t rma_count,
			    enum fi_datatype datatype, size_t total_len)
{
	int ret, i;
	struct iovec rma_iov[SMR_IOV_LIMIT];

	for (i = 0; i < rma_count; i++) {
		rma_iov[i].iov_base = (void *) rma_ioc[i].addr;
		rma_iov[i].iov_len = rma_ioc[i].count * ofi_datatype_size(datatype);
	}

	ret = process_vm_readv(peer_smr->pid, iov, iov_count,
			       rma_iov, rma_count, 0);
	if (ret != total_len) {
		if (ret < 0) {
			FI_WARN(&smr_prov, FI_LOG_EP_CTRL,
				"CMA write error\n");
			return -errno;
		} else { 
			FI_WARN(&smr_prov, FI_LOG_EP_CTRL,
				"partial read occurred\n");
			return -FI_EIO;
		}
	}

	return 0; 
}
Esempio n. 2
0
int psmx_query_atomic(struct fid_domain *doamin, enum fi_datatype datatype,
		      enum fi_op op, struct fi_atomic_attr *attr, uint64_t flags)
{
	int ret;
	size_t count;

	if (flags & FI_TAGGED)
		return -FI_EOPNOTSUPP;

	if (flags & FI_COMPARE_ATOMIC) {
		if (flags & FI_FETCH_ATOMIC)
			return -FI_EINVAL;
		ret = psmx_atomic_compwritevalid(NULL, datatype, op, &count);
	} else if (flags & FI_FETCH_ATOMIC) {
		ret = psmx_atomic_readwritevalid(NULL, datatype, op, &count);
	} else {
		ret = psmx_atomic_writevalid(NULL, datatype, op, &count);
	}

	if (attr && !ret) {
		attr->size = ofi_datatype_size(datatype);
		attr->count = count;
	}

	return ret;
}
Esempio n. 3
0
static int psmx_atomic_readwritevalid(struct fid_ep *ep,
				      enum fi_datatype datatype,
				      enum fi_op op, size_t *count)
{
	int chunk_size;

	if (datatype >= FI_DATATYPE_LAST)
		return -FI_EOPNOTSUPP;

	switch (op) {
	case FI_MIN:
	case FI_MAX:
	case FI_SUM:
	case FI_PROD:
	case FI_LOR:
	case FI_LAND:
	case FI_BOR:
	case FI_BAND:
	case FI_LXOR:
	case FI_BXOR:
	case FI_ATOMIC_READ:
	case FI_ATOMIC_WRITE:
		break;

	default:
		return -FI_EOPNOTSUPP;
	}

	if (count) {
		chunk_size = MIN(PSMX_AM_CHUNK_SIZE,
				 psmx_am_param.max_request_short);
		*count = chunk_size / ofi_datatype_size(datatype);
	}
	return 0;
}
Esempio n. 4
0
static int psmx_atomic_compwritevalid(struct fid_ep *ep,
				      enum fi_datatype datatype,
				      enum fi_op op, size_t *count)
{
	int chunk_size;

	if (datatype >= FI_DATATYPE_LAST)
		return -FI_EOPNOTSUPP;

	switch (op) {
	case FI_CSWAP:
	case FI_CSWAP_NE:
		break;

	case FI_CSWAP_LE:
	case FI_CSWAP_LT:
	case FI_CSWAP_GE:
	case FI_CSWAP_GT:
		if (datatype == FI_FLOAT_COMPLEX ||
		    datatype == FI_DOUBLE_COMPLEX ||
		    datatype == FI_LONG_DOUBLE_COMPLEX)
			return -FI_EOPNOTSUPP;
		break;

	case FI_MSWAP:
		if (datatype == FI_FLOAT_COMPLEX ||
		    datatype == FI_DOUBLE_COMPLEX ||
		    datatype == FI_LONG_DOUBLE_COMPLEX ||
		    datatype == FI_FLOAT ||
		    datatype == FI_DOUBLE ||
		    datatype == FI_LONG_DOUBLE)
			return -FI_EOPNOTSUPP;
		break;

	default:
		return -FI_EOPNOTSUPP;
	}

	if (count) {
		chunk_size = MIN(PSMX_AM_CHUNK_SIZE,
				 psmx_am_param.max_request_short);
		*count = chunk_size / (2 * ofi_datatype_size(datatype));
	}
	return 0;
}
Esempio n. 5
0
int smr_query_atomic(struct fid_domain *domain, enum fi_datatype datatype,
		     enum fi_op op, struct fi_atomic_attr *attr, uint64_t flags)
{
	int ret;
	size_t total_size;

	if (flags & FI_TAGGED) {
		FI_WARN(&smr_prov, FI_LOG_EP_CTRL,
			"tagged atomic op not supported\n");
		return -FI_EINVAL;
	}

	ret = ofi_atomic_valid(&smr_prov, datatype, op, flags);
	if (ret || !attr)
		return ret;

	attr->size = ofi_datatype_size(datatype);

	total_size = (flags & FI_COMPARE_ATOMIC) ? SMR_COMP_INJECT_SIZE :
		      SMR_INJECT_SIZE;
	attr->count = total_size / attr->size;

	return ret;
}
Esempio n. 6
0
static ssize_t smr_atomic_inject(struct fid_ep *ep_fid, const void *buf,
			size_t count, fi_addr_t dest_addr, uint64_t addr,
			uint64_t key, enum fi_datatype datatype, enum fi_op op)
{
	struct smr_ep *ep;
	struct smr_region *peer_smr;
	struct smr_inject_buf *tx_buf;
	struct smr_cmd *cmd;
	struct iovec iov;
	struct fi_rma_ioc rma_ioc;
	int peer_id;
	ssize_t ret = 0;
	size_t total_len;

	assert(count <= SMR_INJECT_SIZE);

	ep = container_of(ep_fid, struct smr_ep, util_ep.ep_fid.fid);

	peer_id = (int) dest_addr;
	ret = smr_verify_peer(ep, peer_id);
	if(ret)
		return ret;

	peer_smr = smr_peer_region(ep->region, peer_id);
	fastlock_acquire(&peer_smr->lock);
	if (peer_smr->cmd_cnt < 2) {
		ret = -FI_EAGAIN;
		goto unlock_region;
	}

	cmd = ofi_cirque_tail(smr_cmd_queue(peer_smr));
	total_len = count * ofi_datatype_size(datatype);
	
	iov.iov_base = (void *) buf;
	iov.iov_len = total_len;

	rma_ioc.addr = addr;
	rma_ioc.count = count;
	rma_ioc.key = key;

	if (total_len <= SMR_MSG_DATA_LEN) {
		smr_format_inline_atomic(cmd, smr_peer_addr(ep->region)[peer_id].addr,
					 &iov, 1, NULL, 0, ofi_op_atomic,
					 datatype, op);
	} else if (total_len <= SMR_INJECT_SIZE) {
		tx_buf = smr_freestack_pop(smr_inject_pool(peer_smr));
		smr_format_inject_atomic(cmd, smr_peer_addr(ep->region)[peer_id].addr,
					 &iov, 1, NULL, 0, NULL, 0, ofi_op_atomic,
					 datatype, op, peer_smr, tx_buf);
	}

	ofi_cirque_commit(smr_cmd_queue(peer_smr));
	peer_smr->cmd_cnt--;
	cmd = ofi_cirque_tail(smr_cmd_queue(peer_smr));
	smr_format_rma_ioc(cmd, &rma_ioc, 1);
	ofi_cirque_commit(smr_cmd_queue(peer_smr));
	peer_smr->cmd_cnt--;

unlock_region:
	fastlock_release(&peer_smr->lock);
	return ret;
}
Esempio n. 7
0
static ssize_t smr_generic_atomic(struct fid_ep *ep_fid,
			const struct fi_ioc *ioc, void **desc, size_t count,
			const struct fi_ioc *compare_ioc, void **compare_desc,
			size_t compare_count, struct fi_ioc *result_ioc,
			void **result_desc, size_t result_count,
			fi_addr_t addr, const struct fi_rma_ioc *rma_ioc,
			size_t rma_count, enum fi_datatype datatype,
			enum fi_op atomic_op, void *context, uint32_t op)
{
	struct smr_ep *ep;
	struct smr_domain *domain;
	struct smr_region *peer_smr;
	struct smr_inject_buf *tx_buf;
	struct smr_cmd *cmd;
	struct iovec iov[SMR_IOV_LIMIT];
	struct iovec compare_iov[SMR_IOV_LIMIT];
	struct iovec result_iov[SMR_IOV_LIMIT];
	int peer_id, err = 0;
	uint16_t flags = 0;
	ssize_t ret = 0;
	size_t msg_len, total_len;

	assert(count <= SMR_IOV_LIMIT);
	assert(result_count <= SMR_IOV_LIMIT);
	assert(compare_count <= SMR_IOV_LIMIT);
	assert(rma_count <= SMR_IOV_LIMIT);

	ep = container_of(ep_fid, struct smr_ep, util_ep.ep_fid.fid);
	domain = container_of(ep->util_ep.domain, struct smr_domain, util_domain);

	peer_id = (int) addr;
	ret = smr_verify_peer(ep, peer_id);
	if(ret)
		return ret;

	peer_smr = smr_peer_region(ep->region, peer_id);
	fastlock_acquire(&peer_smr->lock);
	if (peer_smr->cmd_cnt < 2) {
		ret = -FI_EAGAIN;
		goto unlock_region;
	}

	fastlock_acquire(&ep->util_ep.tx_cq->cq_lock);
	if (ofi_cirque_isfull(ep->util_ep.tx_cq->cirq)) {
		ret = -FI_EAGAIN;
		goto unlock_cq;
	}

	cmd = ofi_cirque_tail(smr_cmd_queue(peer_smr));
	msg_len = total_len = ofi_datatype_size(datatype) *
			      ofi_total_ioc_cnt(ioc, count);
	
	switch (op) {
	case ofi_op_atomic_compare:
		assert(compare_ioc);
		ofi_ioc_to_iov(compare_ioc, compare_iov, compare_count,
			       ofi_datatype_size(datatype));
		total_len *= 2;
		/* fall through */
	case ofi_op_atomic_fetch:
		assert(result_ioc);
		ofi_ioc_to_iov(result_ioc, result_iov, result_count,
			       ofi_datatype_size(datatype));
		if (!domain->fast_rma)
			flags |= SMR_RMA_REQ;
		/* fall through */
	case ofi_op_atomic:
		if (atomic_op != FI_ATOMIC_READ) {
			assert(ioc);
			ofi_ioc_to_iov(ioc, iov, count, ofi_datatype_size(datatype));
		} else {
			count = 0;
		}
		break;
	default:
		break;
	}

	if (total_len <= SMR_MSG_DATA_LEN && !(flags & SMR_RMA_REQ)) {
		smr_format_inline_atomic(cmd, smr_peer_addr(ep->region)[peer_id].addr,
					 iov, count, compare_iov, compare_count,
					 op, datatype, atomic_op);
	} else if (total_len <= SMR_INJECT_SIZE) {
		tx_buf = smr_freestack_pop(smr_inject_pool(peer_smr));
		smr_format_inject_atomic(cmd, smr_peer_addr(ep->region)[peer_id].addr,
					 iov, count, result_iov, result_count,
					 compare_iov, compare_count, op, datatype,
					 atomic_op, peer_smr, tx_buf);
	} else {
		FI_WARN(&smr_prov, FI_LOG_EP_CTRL,
			"message too large\n");
		ret = -FI_EINVAL;
		goto unlock_cq;
	}
	cmd->msg.hdr.op_flags |= flags;

	ofi_cirque_commit(smr_cmd_queue(peer_smr));
	peer_smr->cmd_cnt--;

	if (op != ofi_op_atomic) {
		if (flags & SMR_RMA_REQ) {
			smr_post_fetch_resp(ep, cmd,
				(const struct iovec *) result_iov,
				result_count);
			goto format_rma;
		}
		err = smr_fetch_result(ep, peer_smr, result_iov, result_count,
				       rma_ioc, rma_count, datatype, msg_len);
		if (err)
			FI_WARN(&smr_prov, FI_LOG_EP_CTRL,
				"unable to fetch results");
	}

	ret = ep->tx_comp(ep, context, ofi_tx_cq_flags(op), err);
	if (ret) {
		FI_WARN(&smr_prov, FI_LOG_EP_CTRL,
			"unable to process tx completion\n");
	}

format_rma:
	cmd = ofi_cirque_tail(smr_cmd_queue(peer_smr));
	smr_format_rma_ioc(cmd, rma_ioc, rma_count);
	ofi_cirque_commit(smr_cmd_queue(peer_smr));
	peer_smr->cmd_cnt--;
unlock_cq:
	fastlock_release(&ep->util_ep.tx_cq->cq_lock);
unlock_region:
	fastlock_release(&peer_smr->lock);
	return ret;
}
Esempio n. 8
0
ssize_t _psmx_atomic_readwrite(struct fid_ep *ep,
				const void *buf,
				size_t count, void *desc,
				void *result, void *result_desc,
				fi_addr_t dest_addr,
				uint64_t addr, uint64_t key,
				enum fi_datatype datatype,
				enum fi_op op, void *context,
				uint64_t flags)
{
	struct psmx_fid_ep *ep_priv;
	struct psmx_fid_av *av;
	struct psmx_epaddr_context *epaddr_context;
	struct psmx_am_request *req;
	psm_amarg_t args[8];
	int am_flags = PSM_AM_FLAG_ASYNC;
	int chunk_size, len;
	size_t idx;

	ep_priv = container_of(ep, struct psmx_fid_ep, ep);

	if (flags & FI_TRIGGER) {
		struct psmx_trigger *trigger;
		struct fi_triggered_context *ctxt = context;

		trigger = calloc(1, sizeof(*trigger));
		if (!trigger)
			return -FI_ENOMEM;

		trigger->op = PSMX_TRIGGERED_ATOMIC_READWRITE;
		trigger->cntr = container_of(ctxt->trigger.threshold.cntr,
					     struct psmx_fid_cntr, cntr);
		trigger->threshold = ctxt->trigger.threshold.threshold;
		trigger->atomic_readwrite.ep = ep;
		trigger->atomic_readwrite.buf = buf;
		trigger->atomic_readwrite.count = count;
		trigger->atomic_readwrite.desc = desc;
		trigger->atomic_readwrite.result = result;
		trigger->atomic_readwrite.result_desc = result_desc;
		trigger->atomic_readwrite.dest_addr = dest_addr;
		trigger->atomic_readwrite.addr = addr;
		trigger->atomic_readwrite.key = key;
		trigger->atomic_readwrite.datatype = datatype;
		trigger->atomic_readwrite.atomic_op = op;
		trigger->atomic_readwrite.context = context;
		trigger->atomic_readwrite.flags = flags & ~FI_TRIGGER;

		psmx_cntr_add_trigger(trigger->cntr, trigger);
		return 0;
	}

	if (!buf && op != FI_ATOMIC_READ)
		return -FI_EINVAL;

	if (datatype >= FI_DATATYPE_LAST)
		return -FI_EINVAL;

	if (op >= FI_ATOMIC_OP_LAST)
		return -FI_EINVAL;

	av = ep_priv->av;
	if (av && av->type == FI_AV_TABLE) {
		idx = dest_addr;
		if (idx >= av->last)
			return -FI_EINVAL;

		dest_addr = (fi_addr_t) av->psm_epaddrs[idx];
	} else if (!dest_addr) {
		return -FI_EINVAL;
	}

	epaddr_context = psm_epaddr_getctxt((void *)dest_addr);
	if (epaddr_context->epid == ep_priv->domain->psm_epid)
		return psmx_atomic_self(PSMX_AM_REQ_ATOMIC_READWRITE,
					ep_priv, buf, count, desc,
					NULL, NULL, result, result_desc,
					addr, key, datatype, op,
					context, flags);

	chunk_size = MIN(PSMX_AM_CHUNK_SIZE, psmx_am_param.max_request_short);
	len = ofi_datatype_size(datatype) * count;
	if (len > chunk_size)
		return -FI_EMSGSIZE;

	if ((flags & FI_INJECT) && op != FI_ATOMIC_READ) {
		req = malloc(sizeof(*req) + len);
		if (!req)
			return -FI_ENOMEM;
		memset(req, 0, sizeof(*req));
		memcpy((uint8_t *)req+sizeof(*req), (void *)buf, len);
		buf = (uint8_t *)req + sizeof(*req);
	} else {
		req = calloc(1, sizeof(*req));
		if (!req)
			return -FI_ENOMEM;
	}

	req->no_event = (flags & PSMX_NO_COMPLETION) ||
			(ep_priv->send_selective_completion && !(flags & FI_COMPLETION));

	req->op = PSMX_AM_REQ_ATOMIC_READWRITE;
	req->atomic.buf = (void *)buf;
	req->atomic.len = len;
	req->atomic.addr = addr;
	req->atomic.key = key;
	req->atomic.context = context;
	req->atomic.result = result;
	req->ep = ep_priv;
	if (op == FI_ATOMIC_READ)
		req->cq_flags = FI_READ | FI_ATOMIC;
	else
		req->cq_flags = FI_WRITE | FI_ATOMIC;

	args[0].u32w0 = PSMX_AM_REQ_ATOMIC_READWRITE;
	args[0].u32w1 = count;
	args[1].u64 = (uint64_t)(uintptr_t)req;
	args[2].u64 = addr;
	args[3].u64 = key;
	args[4].u32w0 = datatype;
	args[4].u32w1 = op;
	psm_am_request_short((psm_epaddr_t) dest_addr,
				PSMX_AM_ATOMIC_HANDLER, args, 5,
				(void *)buf, (buf?len:0), am_flags, NULL, NULL);

	return 0;
}
Esempio n. 9
0
static int psmx_atomic_self(int am_cmd,
			    struct psmx_fid_ep *ep,
			    const void *buf,
			    size_t count, void *desc,
			    const void *compare, void *compare_desc,
			    void *result, void *result_desc,
			    uint64_t addr, uint64_t key,
			    enum fi_datatype datatype,
			    enum fi_op op, void *context,
			    uint64_t flags)
{
	struct psmx_fid_mr *mr;
	struct psmx_cq_event *event;
	struct psmx_fid_ep *target_ep;
	struct psmx_fid_cntr *cntr = NULL;
	struct psmx_fid_cntr *mr_cntr = NULL;
	void *tmp_buf;
	size_t len;
	int no_event;
	int err = 0;
	int op_error;
	int access;
	uint64_t cq_flags = 0;

	if (am_cmd == PSMX_AM_REQ_ATOMIC_WRITE)
		access = FI_REMOTE_WRITE;
	else
		access = FI_REMOTE_READ | FI_REMOTE_WRITE;

	len = ofi_datatype_size(datatype) * count;
	mr = psmx_mr_get(psmx_active_fabric->active_domain, key);
	op_error = mr ?  psmx_mr_validate(mr, addr, len, access) : -FI_EINVAL;

	if (op_error)
		goto gen_local_event;

	addr += mr->offset;

	switch (am_cmd) {
	case PSMX_AM_REQ_ATOMIC_WRITE:
		err = psmx_atomic_do_write((void *)addr, (void *)buf,
					   (int)datatype, (int)op, (int)count);
		cq_flags = FI_WRITE | FI_ATOMIC;
		break;

	case PSMX_AM_REQ_ATOMIC_READWRITE:
		if (result != buf) {
			err = psmx_atomic_do_readwrite((void *)addr, (void *)buf,
						       (void *)result, (int)datatype,
						       (int)op, (int)count);
		} else {
			tmp_buf = malloc(len);
			if (tmp_buf) {
				memcpy(tmp_buf, result, len);
				err = psmx_atomic_do_readwrite((void *)addr, (void *)buf,
							       tmp_buf, (int)datatype,
							       (int)op, (int)count);
				memcpy(result, tmp_buf, len);
				free(tmp_buf);
			} else {
				err = -FI_ENOMEM;
			}
		}
		if (op == FI_ATOMIC_READ)
			cq_flags = FI_READ | FI_ATOMIC;
		else
			cq_flags = FI_WRITE | FI_ATOMIC;
		break;

	case PSMX_AM_REQ_ATOMIC_COMPWRITE:
		if (result != buf && result != compare) {
			err = psmx_atomic_do_compwrite((void *)addr, (void *)buf,
						       (void *)compare, (void *)result,
						       (int)datatype, (int)op, (int)count);
		} else {
			tmp_buf = malloc(len);
			if (tmp_buf) {
				memcpy(tmp_buf, result, len);
				err = psmx_atomic_do_compwrite((void *)addr, (void *)buf,
							       (void *)compare, tmp_buf,
							       (int)datatype, (int)op, (int)count);
				memcpy(result, tmp_buf, len);
				free(tmp_buf);
			} else {
				err = -FI_ENOMEM;
			}
		}
		cq_flags = FI_WRITE | FI_ATOMIC;
		break;
	}

	target_ep = mr->domain->atomics_ep;
	if (op == FI_ATOMIC_READ) {
		cntr = target_ep->remote_read_cntr;
	} else {
		cntr = target_ep->remote_write_cntr;
		mr_cntr = mr->cntr;
	}

	if (cntr)
		psmx_cntr_inc(cntr);

	if (mr_cntr && mr_cntr != cntr)
		psmx_cntr_inc(mr_cntr);

gen_local_event:
	no_event = ((flags & PSMX_NO_COMPLETION) ||
		    (ep->send_selective_completion && !(flags & FI_COMPLETION)));
	if (ep->send_cq && (!no_event || op_error)) {
		event = psmx_cq_create_event(
				ep->send_cq,
				context,
				(void *)buf,
				cq_flags,
				len,
				0, /* data */
				0, /* tag */
				0, /* olen */
				op_error);
		if (event)
			psmx_cq_enqueue_event(ep->send_cq, event);
		else
			err = -FI_ENOMEM;
	}

	switch (am_cmd) {
	case PSMX_AM_REQ_ATOMIC_WRITE:
		if (ep->write_cntr)
			psmx_cntr_inc(ep->write_cntr);
		break;
	case PSMX_AM_REQ_ATOMIC_READWRITE:
	case PSMX_AM_REQ_ATOMIC_COMPWRITE:
		if (ep->read_cntr)
			psmx_cntr_inc(ep->read_cntr);
		break;
	}

	return err;
}
Esempio n. 10
0
int psmx_am_atomic_handler(psm_am_token_t token, psm_epaddr_t epaddr,
			   psm_amarg_t *args, int nargs, void *src,
			   uint32_t len)
{
	psm_amarg_t rep_args[8];
	int count;
	uint8_t *addr;
	uint64_t key;
	int datatype, op;
	int err = 0;
	int op_error = 0;
	struct psmx_am_request *req;
	struct psmx_cq_event *event;
	struct psmx_fid_mr *mr;
	struct psmx_fid_ep *target_ep;
	struct psmx_fid_cntr *cntr = NULL;
	struct psmx_fid_cntr *mr_cntr = NULL;
	void *tmp_buf;

	switch (args[0].u32w0 & PSMX_AM_OP_MASK) {
	case PSMX_AM_REQ_ATOMIC_WRITE:
		count = args[0].u32w1;
		addr = (uint8_t *)(uintptr_t)args[2].u64;
		key = args[3].u64;
		datatype = args[4].u32w0;
		op = args[4].u32w1;
		assert(len == ofi_datatype_size(datatype) * count);

		mr = psmx_mr_get(psmx_active_fabric->active_domain, key);
		op_error = mr ?
			psmx_mr_validate(mr, (uint64_t)addr, len, FI_REMOTE_WRITE) :
			-FI_EINVAL;

		if (!op_error) {
			addr += mr->offset;
			psmx_atomic_do_write(addr, src, datatype, op, count);

			target_ep = mr->domain->atomics_ep;
			cntr = target_ep->remote_write_cntr;
			mr_cntr = mr->cntr;

			if (cntr)
				psmx_cntr_inc(cntr);

			if (mr_cntr && mr_cntr != cntr)
				psmx_cntr_inc(mr_cntr);
		}

		rep_args[0].u32w0 = PSMX_AM_REP_ATOMIC_WRITE;
		rep_args[0].u32w1 = op_error;
		rep_args[1].u64 = args[1].u64;
		err = psm_am_reply_short(token, PSMX_AM_ATOMIC_HANDLER,
				rep_args, 2, NULL, 0, 0,
				NULL, NULL );
		break;

	case PSMX_AM_REQ_ATOMIC_READWRITE:
		count = args[0].u32w1;
		addr = (uint8_t *)(uintptr_t)args[2].u64;
		key = args[3].u64;
		datatype = args[4].u32w0;
		op = args[4].u32w1;

		if (op == FI_ATOMIC_READ)
			len = ofi_datatype_size(datatype) * count;

		assert(len == ofi_datatype_size(datatype) * count);

		mr = psmx_mr_get(psmx_active_fabric->active_domain, key);
		op_error = mr ?
			psmx_mr_validate(mr, (uint64_t)addr, len, FI_REMOTE_READ|FI_REMOTE_WRITE) :
			-FI_EINVAL;

		if (!op_error) {
			addr += mr->offset;
			tmp_buf = malloc(len);
			if (tmp_buf)
				psmx_atomic_do_readwrite(addr, src, tmp_buf,
							 datatype, op, count);
			else
				op_error = -FI_ENOMEM;

			target_ep = mr->domain->atomics_ep;
			if (op == FI_ATOMIC_READ) {
				cntr = target_ep->remote_read_cntr;
			} else {
				cntr = target_ep->remote_write_cntr;
				mr_cntr = mr->cntr;
			}

			if (cntr)
				psmx_cntr_inc(cntr);

			if (mr_cntr && mr_cntr != cntr)
				psmx_cntr_inc(mr_cntr);
		} else {
			tmp_buf = NULL;
		}

		rep_args[0].u32w0 = PSMX_AM_REP_ATOMIC_READWRITE;
		rep_args[0].u32w1 = op_error;
		rep_args[1].u64 = args[1].u64;
		err = psm_am_reply_short(token, PSMX_AM_ATOMIC_HANDLER,
				rep_args, 2, tmp_buf, (tmp_buf?len:0), 0,
				psmx_am_atomic_completion, tmp_buf );
		break;

	case PSMX_AM_REQ_ATOMIC_COMPWRITE:
		count = args[0].u32w1;
		addr = (uint8_t *)(uintptr_t)args[2].u64;
		key = args[3].u64;
		datatype = args[4].u32w0;
		op = args[4].u32w1;
		len /= 2;
		assert(len == ofi_datatype_size(datatype) * count);

		mr = psmx_mr_get(psmx_active_fabric->active_domain, key);
		op_error = mr ?
			psmx_mr_validate(mr, (uint64_t)addr, len, FI_REMOTE_READ|FI_REMOTE_WRITE) :
			-FI_EINVAL;

		if (!op_error) {
			addr += mr->offset;
			tmp_buf = malloc(len);
			if (tmp_buf)
				psmx_atomic_do_compwrite(addr, src, (uint8_t *)src + len,
							 tmp_buf, datatype, op, count);
			else
				op_error = -FI_ENOMEM;

			target_ep = mr->domain->atomics_ep;
			cntr = target_ep->remote_write_cntr;
			mr_cntr = mr->cntr;

			if (cntr)
				psmx_cntr_inc(cntr);

			if (mr_cntr && mr_cntr != cntr)
				psmx_cntr_inc(mr_cntr);
		} else {
			tmp_buf = NULL;
		}

		rep_args[0].u32w0 = PSMX_AM_REP_ATOMIC_READWRITE;
		rep_args[0].u32w1 = op_error;
		rep_args[1].u64 = args[1].u64;
		err = psm_am_reply_short(token, PSMX_AM_ATOMIC_HANDLER,
				rep_args, 2, tmp_buf, (tmp_buf?len:0), 0,
				psmx_am_atomic_completion, tmp_buf );
		break;

	case PSMX_AM_REP_ATOMIC_WRITE:
		req = (struct psmx_am_request *)(uintptr_t)args[1].u64;
		op_error = (int)args[0].u32w1;
		assert(req->op == PSMX_AM_REQ_ATOMIC_WRITE);
		if (req->ep->send_cq && (!req->no_event || op_error)) {
			event = psmx_cq_create_event(
					req->ep->send_cq,
					req->atomic.context,
					req->atomic.buf,
					req->cq_flags,
					req->atomic.len,
					0, /* data */
					0, /* tag */
					0, /* olen */
					op_error);
			if (event)
				psmx_cq_enqueue_event(req->ep->send_cq, event);
			else
				err = -FI_ENOMEM;
		}

		if (req->ep->write_cntr)
			psmx_cntr_inc(req->ep->write_cntr);

		free(req);
		break;

	case PSMX_AM_REP_ATOMIC_READWRITE:
	case PSMX_AM_REP_ATOMIC_COMPWRITE:
		req = (struct psmx_am_request *)(uintptr_t)args[1].u64;
		op_error = (int)args[0].u32w1;
		assert(op_error || req->atomic.len == len);

		if (!op_error)
			memcpy(req->atomic.result, src, len);

		if (req->ep->send_cq && (!req->no_event || op_error)) {
			event = psmx_cq_create_event(
					req->ep->send_cq,
					req->atomic.context,
					req->atomic.buf,
					req->cq_flags,
					req->atomic.len,
					0, /* data */
					0, /* tag */
					0, /* olen */
					op_error);
			if (event)
				psmx_cq_enqueue_event(req->ep->send_cq, event);
			else
				err = -FI_ENOMEM;
		}

		if (req->ep->read_cntr)
			psmx_cntr_inc(req->ep->read_cntr);

		free(req);
		break;

	default:
		err = -FI_EINVAL;
	}
	return err;
}
Esempio n. 11
0
ssize_t _gnix_atomic(struct gnix_fid_ep *ep,
		     enum gnix_fab_req_type fr_type,
		     const struct fi_msg_atomic *msg,
		     const struct fi_ioc *comparev,
		     void **compare_desc,
		     size_t compare_count,
		     struct fi_ioc *resultv,
		     void **result_desc,
		     size_t result_count,
		     uint64_t flags)
{
	struct gnix_vc *vc;
	struct gnix_fab_req *req;
	struct gnix_fid_mem_desc *md = NULL;
	int rc, len;
	struct fid_mr *auto_mr = NULL;
	void *mdesc = NULL;
	uint64_t compare_operand = 0;
	void *loc_addr = NULL;
	int dt_len, dt_align;
	int connected;

	if (!(flags & FI_INJECT) && !ep->send_cq &&
	    (((fr_type == GNIX_FAB_RQ_AMO ||
	      fr_type == GNIX_FAB_RQ_NAMO_AX ||
	      fr_type == GNIX_FAB_RQ_NAMO_AX_S) &&
	      !ep->write_cntr) ||
	     ((fr_type == GNIX_FAB_RQ_FAMO ||
	      fr_type == GNIX_FAB_RQ_CAMO ||
	      fr_type == GNIX_FAB_RQ_NAMO_FAX ||
	      fr_type == GNIX_FAB_RQ_NAMO_FAX_S) &&
	      !ep->read_cntr))) {
		return -FI_ENOCQ;
	}


	if (!ep || !msg || !msg->msg_iov ||
	    msg->msg_iov[0].count != 1 ||
	    msg->iov_count != GNIX_MAX_ATOMIC_IOV_LIMIT ||
	    !msg->rma_iov)
		return -FI_EINVAL;

	/*
	 * see fi_atomic man page
	 */

	if ((msg->op != FI_ATOMIC_READ) &&
		!msg->msg_iov[0].addr)
		return -FI_EINVAL;

	if (flags & FI_TRIGGER) {
		struct fi_triggered_context *trigger_context =
				(struct fi_triggered_context *)msg->context;
		if ((trigger_context->event_type != FI_TRIGGER_THRESHOLD) ||
		    (flags & FI_INJECT)) {
			return -FI_EINVAL;
		}
	}

	if (fr_type == GNIX_FAB_RQ_CAMO) {
		if (!comparev || !comparev[0].addr || compare_count != 1)
			return -FI_EINVAL;

		compare_operand = *(uint64_t *)comparev[0].addr;
	}

	dt_len = ofi_datatype_size(msg->datatype);
	dt_align = dt_len - 1;
	len = dt_len * msg->msg_iov->count;

	if (msg->rma_iov->addr & dt_align) {
		GNIX_INFO(FI_LOG_EP_DATA,
			  "Invalid target alignment: %d (mask 0x%x)\n",
			  msg->rma_iov->addr, dt_align);
		return -FI_EINVAL;
	}

	/* need a memory descriptor for all fetching and comparison AMOs */
	if (fr_type == GNIX_FAB_RQ_FAMO ||
	    fr_type == GNIX_FAB_RQ_CAMO ||
	    fr_type == GNIX_FAB_RQ_NAMO_FAX ||
	    fr_type == GNIX_FAB_RQ_NAMO_FAX_S) {
		if (!resultv || !resultv[0].addr || result_count != 1)
			return -FI_EINVAL;

		loc_addr = resultv[0].addr;

		if ((uint64_t)loc_addr & dt_align) {
			GNIX_INFO(FI_LOG_EP_DATA,
				  "Invalid source alignment: %d (mask 0x%x)\n",
				  loc_addr, dt_align);
			return -FI_EINVAL;
		}

		if (!result_desc || !result_desc[0]) {
			rc = _gnix_mr_reg(&ep->domain->domain_fid.fid,
					 loc_addr, len, FI_READ | FI_WRITE,
					 0, 0, 0, &auto_mr,
					 NULL, ep->auth_key, GNIX_PROV_REG);
			if (rc != FI_SUCCESS) {
				GNIX_INFO(FI_LOG_EP_DATA,
					  "Failed to auto-register local buffer: %d\n",
					  rc);
				return rc;
			}
			flags |= FI_LOCAL_MR;
			mdesc = (void *)auto_mr;
			GNIX_INFO(FI_LOG_EP_DATA, "auto-reg MR: %p\n",
				  auto_mr);
		} else {
			mdesc = result_desc[0];
		}
	}

	/* setup fabric request */
	req = _gnix_fr_alloc(ep);
	if (!req) {
		GNIX_INFO(FI_LOG_EP_DATA, "_gnix_fr_alloc() failed\n");
		rc = -FI_ENOSPC;
		goto err_fr_alloc;
	}

	req->type = fr_type;
	req->gnix_ep = ep;
	req->user_context = msg->context;
	req->work_fn = _gnix_amo_post_req;

	if (mdesc) {
		md = container_of(mdesc, struct gnix_fid_mem_desc, mr_fid);
	}
	req->amo.loc_md = (void *)md;
	req->amo.loc_addr = (uint64_t)loc_addr;

	if ((fr_type == GNIX_FAB_RQ_NAMO_AX)   ||
	    (fr_type == GNIX_FAB_RQ_NAMO_FAX)  ||
	    (fr_type == GNIX_FAB_RQ_NAMO_AX_S) ||
	    (fr_type == GNIX_FAB_RQ_NAMO_FAX_S)) {
		req->amo.first_operand =
			*(uint64_t *)msg->msg_iov[0].addr;
		req->amo.second_operand =
			*((uint64_t *)(msg->msg_iov[0].addr) + 1);
	} else if (msg->op == FI_ATOMIC_READ) {
		req->amo.first_operand = 0xFFFFFFFFFFFFFFFF; /* operand to FAND */
	} else if (msg->op == FI_CSWAP) {
		req->amo.first_operand = compare_operand;
		req->amo.second_operand = *(uint64_t *)msg->msg_iov[0].addr;
	} else if (msg->op == FI_MSWAP) {
		req->amo.first_operand = ~compare_operand;
		req->amo.second_operand = *(uint64_t *)msg->msg_iov[0].addr;
		req->amo.second_operand &= compare_operand;
	} else {
		req->amo.first_operand = *(uint64_t *)msg->msg_iov[0].addr;
	}

	req->amo.rem_addr = msg->rma_iov->addr;
	req->amo.rem_mr_key = msg->rma_iov->key;
	req->amo.len = len;
	req->amo.imm = msg->data;
	req->amo.datatype = msg->datatype;
	req->amo.op = msg->op;
	req->flags = flags;

	/* Inject interfaces always suppress completions.  If
	 * SELECTIVE_COMPLETION is set, honor any setting.  Otherwise, always
	 * deliver a completion. */
	if ((flags & GNIX_SUPPRESS_COMPLETION) ||
	    (ep->send_selective_completion && !(flags & FI_COMPLETION))) {
		req->flags &= ~FI_COMPLETION;
	} else {
		req->flags |= FI_COMPLETION;
	}

	COND_ACQUIRE(ep->requires_lock, &ep->vc_lock);

	/* find VC for target */
	rc = _gnix_vc_ep_get_vc(ep, msg->addr, &vc);
	if (rc) {
		GNIX_INFO(FI_LOG_EP_DATA,
			  "_gnix_vc_ep_get_vc() failed, addr: %lx, rc:\n",
			  msg->addr, rc);
		goto err_get_vc;
	}

	req->vc = vc;

	rc = _gnix_vc_queue_tx_req(req);
	connected = (vc->conn_state == GNIX_VC_CONNECTED);

	COND_RELEASE(ep->requires_lock, &ep->vc_lock);

	/*
	 *If a new VC was allocated, progress CM before returning.
	 * If the VC is connected and there's a backlog, poke
	 * the nic progress engine befure returning.
	 */
	if (!connected) {
		_gnix_cm_nic_progress(ep->cm_nic);
	} else if (!dlist_empty(&vc->tx_queue)) {
		_gnix_nic_progress(vc->ep->nic);
	}

	return rc;

err_get_vc:
	COND_RELEASE(ep->requires_lock, &ep->vc_lock);
err_fr_alloc:
	if (auto_mr) {
		fi_close(&auto_mr->fid);
	}
	return rc;
}
Esempio n. 12
0
static ssize_t
rxm_ep_atomic_common(struct rxm_ep *rxm_ep, struct rxm_conn *rxm_conn,
		const struct fi_msg_atomic *msg, const struct fi_ioc *comparev,
		void **compare_desc, size_t compare_iov_count,
		struct fi_ioc *resultv, void **result_desc,
		size_t result_iov_count, uint32_t op, uint64_t flags)
{
	struct rxm_tx_atomic_buf *tx_buf;
	struct rxm_atomic_hdr *atomic_hdr;
	struct iovec buf_iov[RXM_IOV_LIMIT];
	struct iovec cmp_iov[RXM_IOV_LIMIT];
	size_t datatype_sz = ofi_datatype_size(msg->datatype);
	size_t buf_len = 0;
	size_t cmp_len = 0;
	size_t tot_len;
	ssize_t ret;

	assert(msg->iov_count <= RXM_IOV_LIMIT &&
	       msg->rma_iov_count <= RXM_IOV_LIMIT);

	if (flags & FI_REMOTE_CQ_DATA) {
		FI_WARN(&rxm_prov, FI_LOG_EP_DATA,
			"atomic with remote CQ data not supported\n");
		return -FI_EINVAL;
	}

	if (msg->op != FI_ATOMIC_READ) {
		assert(msg->msg_iov);
		ofi_ioc_to_iov(msg->msg_iov, buf_iov, msg->iov_count,
			       datatype_sz);
		buf_len = ofi_total_iov_len(buf_iov, msg->iov_count);
	}

	if (op == ofi_op_atomic_compare) {
		assert(comparev);
		ofi_ioc_to_iov(comparev, cmp_iov, compare_iov_count,
			       datatype_sz);
		cmp_len = ofi_total_iov_len(cmp_iov, compare_iov_count);
		assert(buf_len == cmp_len);
	}

	tot_len = buf_len + cmp_len + sizeof(struct rxm_atomic_hdr) +
			sizeof(struct rxm_pkt);

	if (tot_len > rxm_eager_limit) {
		FI_WARN(&rxm_prov, FI_LOG_EP_DATA,
			"atomic data too large %zu\n", tot_len);
		return -FI_EINVAL;
	}

	ofi_ep_lock_acquire(&rxm_ep->util_ep);
	tx_buf = (struct rxm_tx_atomic_buf *)
		 rxm_tx_buf_alloc(rxm_ep, RXM_BUF_POOL_TX_ATOMIC);
	if (OFI_UNLIKELY(!tx_buf)) {
		FI_WARN(&rxm_prov, FI_LOG_EP_DATA,
			"Ran out of buffers from Atomic buffer pool\n");
		ret = -FI_EAGAIN;
		goto unlock;
	}

	rxm_ep_format_atomic_pkt_hdr(rxm_conn, tx_buf, tot_len, op,
				msg->datatype, msg->op, flags, msg->data,
				msg->rma_iov, msg->rma_iov_count);
	tx_buf->pkt.ctrl_hdr.msg_id = ofi_buf_index(tx_buf);
	tx_buf->app_context = msg->context;

	atomic_hdr = (struct rxm_atomic_hdr *) tx_buf->pkt.data;

	ofi_copy_from_iov(atomic_hdr->data, buf_len, buf_iov,
			  msg->iov_count, 0);
	if (cmp_len)
		ofi_copy_from_iov(atomic_hdr->data + buf_len, cmp_len,
				  cmp_iov, compare_iov_count, 0);

	tx_buf->result_iov_count = result_iov_count;
	if (resultv)
		ofi_ioc_to_iov(resultv, tx_buf->result_iov, result_iov_count,
			       datatype_sz);

	ret = rxm_ep_send_atomic_req(rxm_ep, rxm_conn, tx_buf, tot_len);
	if (ret)
		ofi_buf_free(tx_buf);
unlock:
	ofi_ep_lock_release(&rxm_ep->util_ep);
	return ret;
}
Esempio n. 13
0
static ssize_t
rxm_ep_atomic_writev(struct fid_ep *ep_fid, const struct fi_ioc *iov,
		     void **desc, size_t count, fi_addr_t dest_addr,
		     uint64_t addr, uint64_t key, enum fi_datatype datatype,
		     enum fi_op op, void *context)
{
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);
	struct fi_rma_ioc rma_iov = {
		.addr = addr,
		.count = ofi_total_ioc_cnt(iov, count),
		.key = key,
	};
	struct fi_msg_atomic msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.datatype = datatype,
		.op = op,
		.context = context,
		.data = 0,
	};

	return rxm_ep_generic_atomic_writemsg(rxm_ep, &msg, rxm_ep_tx_flags(rxm_ep));
}

static ssize_t
rxm_ep_atomic_write(struct fid_ep *ep_fid, const void *buf, size_t count,
		    void *desc, fi_addr_t dest_addr, uint64_t addr,
		    uint64_t key, enum fi_datatype datatype, enum fi_op op,
		    void *context)
{
	const struct fi_ioc iov = {
		.addr = (void *) buf,
		.count = count,
	};

	return rxm_ep_atomic_writev(ep_fid, &iov, &desc, 1, dest_addr, addr,
				    key, datatype, op, context);
}

static ssize_t
rxm_ep_atomic_inject(struct fid_ep *ep_fid, const void *buf, size_t count,
		     fi_addr_t dest_addr, uint64_t addr, uint64_t key,
		     enum fi_datatype datatype, enum fi_op op)
{
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);
	struct fi_ioc msg_iov = {
		.addr = (void *) buf,
		.count = count,
	};
	struct fi_rma_ioc rma_iov = {
		.addr = addr,
		.count = count,
		.key = key,
	};
	struct fi_msg_atomic msg = {
		.msg_iov = &msg_iov,
		.desc = NULL,
		.iov_count = 1,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.datatype = datatype,
		.op = op,
		.context = NULL,
		.data = 0,
	};


	return rxm_ep_generic_atomic_writemsg(rxm_ep, &msg, FI_INJECT);
}

static ssize_t
rxm_ep_generic_atomic_readwritemsg(struct rxm_ep *rxm_ep,
				   const struct fi_msg_atomic *msg,
				   struct fi_ioc *resultv, void **result_desc,
				   size_t result_count, uint64_t flags)
{
	int ret;
	struct rxm_conn *rxm_conn;

	ret = rxm_ep_prepare_tx(rxm_ep, msg->addr, &rxm_conn);
	if (OFI_UNLIKELY(ret))
		return ret;

	return rxm_ep_atomic_common(rxm_ep, rxm_conn, msg, NULL, NULL, 0,
				    resultv, result_desc, result_count,
				    ofi_op_atomic_fetch, flags);
}

static ssize_t
rxm_ep_atomic_readwritemsg(struct fid_ep *ep_fid,
			   const struct fi_msg_atomic *msg,
			   struct fi_ioc *resultv, void **result_desc,
			   size_t result_count, uint64_t flags)
{

	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_generic_atomic_readwritemsg(rxm_ep, msg,
			resultv, result_desc, result_count,
			flags | rxm_ep->util_ep.tx_msg_flags);
}

static ssize_t
rxm_ep_atomic_readwritev(struct fid_ep *ep_fid, const struct fi_ioc *iov,
		 void **desc, size_t count, struct fi_ioc *resultv,
		 void **result_desc, size_t result_count, fi_addr_t dest_addr,
		 uint64_t addr, uint64_t key, enum fi_datatype datatype,
		 enum fi_op op, void *context)
{
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);
	struct fi_rma_ioc rma_iov = {
		.addr = addr,
		.count = ofi_total_ioc_cnt(iov, count),
		.key = key,
	};
	struct fi_msg_atomic msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.datatype = datatype,
		.op = op,
		.context = context,
		.data = 0,
	};

	return rxm_ep_generic_atomic_readwritemsg(rxm_ep, &msg, resultv,
			result_desc, result_count, rxm_ep_tx_flags(rxm_ep));
}

static ssize_t
rxm_ep_atomic_readwrite(struct fid_ep *ep_fid, const void *buf, size_t count,
			void *desc, void *result, void *result_desc,
			fi_addr_t dest_addr, uint64_t addr, uint64_t key,
			enum fi_datatype datatype, enum fi_op op, void *context)
{
	struct fi_ioc iov = {
		.addr = (op == FI_ATOMIC_READ) ? NULL : (void *) buf,
		.count = count,
	};
	struct fi_ioc result_iov = {
		.addr = result,
		.count = count,
	};

	if (!buf && op != FI_ATOMIC_READ)
		return -FI_EINVAL;

	return rxm_ep_atomic_readwritev(ep_fid, &iov, &desc, 1, &result_iov,
					&result_desc, 1, dest_addr, addr, key,
					datatype, op, context);
}

static ssize_t
rxm_ep_generic_atomic_compwritemsg(struct rxm_ep *rxm_ep,
				   const struct fi_msg_atomic *msg,
				   const struct fi_ioc *comparev, void **compare_desc,
				   size_t compare_count, struct fi_ioc *resultv,
				   void **result_desc, size_t result_count,
				   uint64_t flags)
{
	int ret;
	struct rxm_conn *rxm_conn;

	ret = rxm_ep_prepare_tx(rxm_ep, msg->addr, &rxm_conn);
	if (OFI_UNLIKELY(ret))
		return ret;

	return rxm_ep_atomic_common(rxm_ep, rxm_conn, msg, comparev,
				    compare_desc, compare_count, resultv,
				    result_desc, result_count,
				    ofi_op_atomic_compare, flags);
}

static ssize_t
rxm_ep_atomic_compwritemsg(struct fid_ep *ep_fid,
			   const struct fi_msg_atomic *msg,
			   const struct fi_ioc *comparev, void **compare_desc,
			   size_t compare_count, struct fi_ioc *resultv,
			   void **result_desc, size_t result_count,
			   uint64_t flags)
{
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);

	return rxm_ep_generic_atomic_compwritemsg(rxm_ep, msg, comparev,
				    compare_desc, compare_count, resultv,
				    result_desc, result_count,
				    flags | rxm_ep->util_ep.tx_msg_flags);
}

static ssize_t
rxm_ep_atomic_compwritev(struct fid_ep *ep_fid, const struct fi_ioc *iov,
		 void **desc, size_t count, const struct fi_ioc *comparev,
		 void **compare_desc, size_t compare_count,
		 struct fi_ioc *resultv, void **result_desc,
		 size_t result_count, fi_addr_t dest_addr, uint64_t addr,
		 uint64_t key, enum fi_datatype datatype, enum fi_op op,
		 void *context)
{
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid.fid);
	struct fi_rma_ioc rma_iov = {
		.addr = addr,
		.count = ofi_total_ioc_cnt(iov, count),
		.key = key,
	};
	struct fi_msg_atomic msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = dest_addr,
		.rma_iov = &rma_iov,
		.rma_iov_count = 1,
		.datatype = datatype,
		.op = op,
		.context = context,
		.data = 0,
	};

	return rxm_ep_generic_atomic_compwritemsg(rxm_ep, &msg, comparev,
			compare_desc, compare_count, resultv, result_desc,
			result_count, rxm_ep_tx_flags(rxm_ep));
}

static ssize_t
rxm_ep_atomic_compwrite(struct fid_ep *ep_fid, const void *buf, size_t count,
			void *desc, const void *compare, void *compare_desc,
			void *result, void *result_desc, fi_addr_t dest_addr,
			uint64_t addr, uint64_t key, enum fi_datatype datatype,
			enum fi_op op, void *context)
{
	struct fi_ioc iov = {
		.addr = (void *) buf,
		.count = count,
	};
	struct fi_ioc resultv = {
		.addr = result,
		.count = count,
	};
	struct fi_ioc comparev = {
		.addr = (void *) compare,
		.count = count,
	};

	return rxm_ep_atomic_compwritev(ep_fid, &iov, &desc, 1,
					&comparev, &compare_desc, 1,
					&resultv, &result_desc, 1,
					dest_addr, addr, key,
					datatype, op, context);
}

int rxm_ep_query_atomic(struct fid_domain *domain, enum fi_datatype datatype,
			enum fi_op op, struct fi_atomic_attr *attr,
			uint64_t flags)
{
	struct rxm_domain *rxm_domain = container_of(domain,
						     struct rxm_domain,
						     util_domain.domain_fid);
	size_t tot_size;
	int ret;

	if (flags & FI_TAGGED) {
		FI_WARN(&rxm_prov, FI_LOG_EP_DATA,
			"tagged atomic op not supported\n");
		return -FI_EINVAL;
	}

	ret = ofi_atomic_valid(&rxm_prov, datatype, op, flags);
	if (ret || !attr)
		return ret;

	tot_size = flags & FI_COMPARE_ATOMIC ?
		rxm_domain->max_atomic_size / 2 : rxm_domain->max_atomic_size;
	attr->size = ofi_datatype_size(datatype);
	attr->count = tot_size / attr->size;

	return FI_SUCCESS;
}

static int rxm_ep_atomic_valid(struct fid_ep *ep_fid, enum fi_datatype datatype,
			       enum fi_op op, size_t *count)
{
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid);
	struct fi_atomic_attr attr;
	int ret;

	ret = rxm_ep_query_atomic(&rxm_ep->util_ep.domain->domain_fid,
				  datatype, op, &attr, 0);
	if (!ret)
		*count = attr.count;

	return ret;
}

static int rxm_ep_atomic_fetch_valid(struct fid_ep *ep_fid,
				     enum fi_datatype datatype, enum fi_op op,
				     size_t *count)
{
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid);
	struct fi_atomic_attr attr;
	int ret;

	ret = rxm_ep_query_atomic(&rxm_ep->util_ep.domain->domain_fid,
				  datatype, op, &attr, FI_FETCH_ATOMIC);
	if (!ret)
		*count = attr.count;

	return ret;
}

static int rxm_ep_atomic_cswap_valid(struct fid_ep *ep_fid,
				     enum fi_datatype datatype, enum fi_op op,
				     size_t *count)
{
	struct rxm_ep *rxm_ep = container_of(ep_fid, struct rxm_ep,
					     util_ep.ep_fid);
	struct fi_atomic_attr attr;
	int ret;

	ret = rxm_ep_query_atomic(&rxm_ep->util_ep.domain->domain_fid,
				  datatype, op, &attr, FI_COMPARE_ATOMIC);
	if (!ret)
		*count = attr.count;

	return ret;
}

struct fi_ops_atomic rxm_ops_atomic = {
	.size = sizeof(struct fi_ops_atomic),
	.write = rxm_ep_atomic_write,
	.writev = rxm_ep_atomic_writev,
	.writemsg = rxm_ep_atomic_writemsg,
	.inject = rxm_ep_atomic_inject,
	.readwrite = rxm_ep_atomic_readwrite,
	.readwritev = rxm_ep_atomic_readwritev,
	.readwritemsg = rxm_ep_atomic_readwritemsg,
	.compwrite = rxm_ep_atomic_compwrite,
	.compwritev = rxm_ep_atomic_compwritev,
	.compwritemsg = rxm_ep_atomic_compwritemsg,
	.writevalid = rxm_ep_atomic_valid,
	.readwritevalid = rxm_ep_atomic_fetch_valid,
	.compwritevalid = rxm_ep_atomic_cswap_valid,
};