示例#1
0
ssize_t _psmx_tagged_peek(struct fid_ep *ep, void *buf, size_t len,
			  void *desc, fi_addr_t src_addr,
			  uint64_t tag, uint64_t ignore,
			  void *context, uint64_t flags)
{
	struct psmx_fid_ep *ep_priv;
	psm_mq_status_t psm_status;
	uint64_t psm_tag, psm_tagsel;
	struct psmx_cq_event *event;
	int err;

	ep_priv = container_of(ep, struct psmx_fid_ep, ep);

	if (tag & ep_priv->domain->reserved_tag_bits) {
		FI_WARN(&psmx_prov, FI_LOG_EP_DATA,
			"using reserved tag bits."
			"tag=%lx. reserved_bits=%lx.\n", tag,
			ep_priv->domain->reserved_tag_bits);
	}

	psm_tag = tag & (~ep_priv->domain->reserved_tag_bits);
	psm_tagsel = (~ignore) | ep_priv->domain->reserved_tag_bits;

	if (flags & (FI_CLAIM | FI_DISCARD))
		return -FI_EOPNOTSUPP;

	err = psm_mq_iprobe(ep_priv->domain->psm_mq, psm_tag, psm_tagsel,
			    &psm_status);
	switch (err) {
	case PSM_OK:
		if (ep_priv->recv_cq) {
			event = psmx_cq_create_event(
					ep_priv->recv_cq,
					context,		/* op_context */
					NULL,			/* buf */
					flags|FI_RECV|FI_TAGGED,/* flags */
					psm_status.msg_length,	/* len */
					0,			/* data */
					psm_status.msg_tag,	/* tag */
					psm_status.msg_length,	/* olen */
					0);			/* err */
			if (event)
				psmx_cq_enqueue_event(ep_priv->recv_cq, event);
			else
				return -FI_ENOMEM;

			/* TODO: set message source to FI_ADDR_NOTAVAIL? */
		}
		return 0;

	case PSM_MQ_NO_COMPLETIONS:
		return -FI_ENOMSG;

	default:
		return psmx_errno(err);
	}
}
示例#2
0
static ssize_t psmx_ep_cancel(fid_t fid, void *context)
{
	struct psmx_fid_ep *ep;
	psm_mq_status_t status;
	struct fi_context *fi_context = context;
	uint64_t flags;
	struct psmx_cq_event *event;
	int err;

	ep = container_of(fid, struct psmx_fid_ep, ep.fid);
	if (!ep->domain)
		return -FI_EBADF;

	if (!fi_context)
		return -FI_EINVAL;

	switch (PSMX_CTXT_TYPE(fi_context)) {
	case PSMX_TRECV_CONTEXT:
		flags = FI_RECV | FI_TAGGED;
		break;
	case PSMX_RECV_CONTEXT:
	case PSMX_MULTI_RECV_CONTEXT:
		flags = FI_RECV | FI_MSG;
		break;
	default:
		return  -FI_EOPNOTSUPP;
	}

	err = psm_mq_cancel((psm_mq_req_t *)&PSMX_CTXT_REQ(fi_context));
	if (err == PSM_OK) {
		err = psm_mq_test((psm_mq_req_t *)&PSMX_CTXT_REQ(fi_context), &status);
		if (err == PSM_OK && ep->recv_cq) {
			event = psmx_cq_create_event(
					ep->recv_cq,
					status.context,
					NULL,	/* buf */
					flags,
					0,	/* len */
					0,	/* data */
					0,	/* tag */
					0	/* olen */,
					-FI_ECANCELED);
			if (event)
				psmx_cq_enqueue_event(ep->recv_cq, event);
			else
				return -FI_ENOMEM;
		}
	}

	return psmx_errno(err);
}
示例#3
0
int psmx_am_rma_handler(psm_am_token_t token, psm_epaddr_t epaddr,
			psm_amarg_t *args, int nargs, void *src, uint32_t len)
{
	psm_amarg_t rep_args[8];
	void *rma_addr;
	ssize_t rma_len;
	uint64_t key;
	int err = 0;
	int op_error = 0;
	int cmd, eom, has_data;
	struct psmx_am_request *req;
	struct psmx_cq_event *event;
	int chunk_size;
	uint64_t offset;
	struct psmx_fid_mr *mr;

	cmd = args[0].u32w0 & PSMX_AM_OP_MASK;
	eom = args[0].u32w0 & PSMX_AM_EOM;
	has_data = args[0].u32w0 & PSMX_AM_DATA;

	switch (cmd) {
	case PSMX_AM_REQ_WRITE:
		rma_len = args[0].u32w1;
		rma_addr = (void *)(uintptr_t)args[2].u64;
		key = args[3].u64;
		mr = psmx_mr_hash_get(key);
		op_error = mr ?
			psmx_mr_validate(mr, (uint64_t)rma_addr, len, FI_REMOTE_WRITE) :
			-EINVAL;
		if (!op_error) {
			rma_addr += mr->offset;
			memcpy(rma_addr, src, len);
			if (eom) {
				if (mr->cq) {
					/* TODO: report the addr/len of the whole write */
					event = psmx_cq_create_event(
							mr->cq,
							0, /* context */
							rma_addr,
							0, /* flags */
							rma_len,
							has_data ? args[4].u64 : 0,
							0, /* tag */
							0, /* olen */
							0);

					if (event)
						psmx_cq_enqueue_event(mr->cq, event);
					else
						err = -ENOMEM;
				}
				if (mr->cntr)
					psmx_cntr_inc(mr->cntr);
				if (mr->domain->rma_ep->remote_write_cntr)
					psmx_cntr_inc(mr->domain->rma_ep->remote_write_cntr);
			}
		}
		if (eom || op_error) {
			rep_args[0].u32w0 = PSMX_AM_REP_WRITE | eom;
			rep_args[0].u32w1 = op_error;
			rep_args[1].u64 = args[1].u64;
			err = psm_am_reply_short(token, PSMX_AM_RMA_HANDLER,
					rep_args, 2, NULL, 0, 0,
					NULL, NULL );
		}
		break;

	case PSMX_AM_REQ_WRITE_LONG:
		rma_len = args[0].u32w1;
		rma_addr = (void *)(uintptr_t)args[2].u64;
		key = args[3].u64;
		mr = psmx_mr_hash_get(key);
		op_error = mr ?
			psmx_mr_validate(mr, (uint64_t)rma_addr, len, FI_REMOTE_WRITE) :
			-EINVAL;
		if (op_error) {
			rep_args[0].u32w0 = PSMX_AM_REP_WRITE | eom;
			rep_args[0].u32w1 = op_error;
			rep_args[1].u64 = args[1].u64;
			err = psm_am_reply_short(token, PSMX_AM_RMA_HANDLER,
					rep_args, 2, NULL, 0, 0,
					NULL, NULL );
			break;
		}

		rma_addr += mr->offset;

		req = calloc(1, sizeof(*req));
		if (!req) {
			err = -ENOMEM;
		}
		else {
			req->op = args[0].u32w0;
			req->write.addr = (uint64_t)rma_addr;
			req->write.len = rma_len;
			req->write.key = key;
			req->write.context = (void *)args[4].u64;
			req->write.data = has_data ? args[5].u64 : 0;
			PSMX_CTXT_TYPE(&req->fi_context) = PSMX_REMOTE_WRITE_CONTEXT;
			PSMX_CTXT_USER(&req->fi_context) = mr;
			psmx_am_enqueue_rma(mr->domain, req);
		}
		break;

	case PSMX_AM_REQ_READ:
		rma_len = args[0].u32w1;
		rma_addr = (void *)(uintptr_t)args[2].u64;
		key = args[3].u64;
		offset = args[4].u64;
		mr = psmx_mr_hash_get(key);
		op_error = mr ?
			psmx_mr_validate(mr, (uint64_t)rma_addr, rma_len, FI_REMOTE_READ) :
			-EINVAL;
		if (!op_error) {
			rma_addr += mr->offset;
		}
		else {
			rma_addr = NULL;
			rma_len = 0;
		}

		chunk_size = MIN(PSMX_AM_CHUNK_SIZE, psmx_am_param.max_reply_short);
		assert(rma_len <= chunk_size);
		rep_args[0].u32w0 = PSMX_AM_REP_READ | eom;
		rep_args[0].u32w1 = op_error;
		rep_args[1].u64 = args[1].u64;
		rep_args[2].u64 = offset;
		err = psm_am_reply_short(token, PSMX_AM_RMA_HANDLER,
				rep_args, 3, rma_addr, rma_len, 0,
				NULL, NULL );

		if (eom && !op_error) {
			if (mr->domain->rma_ep->remote_read_cntr)
				psmx_cntr_inc(mr->domain->rma_ep->remote_read_cntr);
		}
		break;

	case PSMX_AM_REQ_READ_LONG:
		rma_len = args[0].u32w1;
		rma_addr = (void *)(uintptr_t)args[2].u64;
		key = args[3].u64;
		mr = psmx_mr_hash_get(key);
		op_error = mr ?
			psmx_mr_validate(mr, (uint64_t)rma_addr, len, FI_REMOTE_WRITE) :
			-EINVAL;
		if (op_error) {
			rep_args[0].u32w0 = PSMX_AM_REP_READ | eom;
			rep_args[0].u32w1 = op_error;
			rep_args[1].u64 = args[1].u64;
			rep_args[2].u64 = 0;
			err = psm_am_reply_short(token, PSMX_AM_RMA_HANDLER,
					rep_args, 3, NULL, 0, 0,
					NULL, NULL );
			break;
		}

		rma_addr += mr->offset;

		req = calloc(1, sizeof(*req));
		if (!req) {
			err = -ENOMEM;
		}
		else {
			req->op = args[0].u32w0;
			req->read.addr = (uint64_t)rma_addr;
			req->read.len = rma_len;
			req->read.key = key;
			req->read.context = (void *)args[4].u64;
			req->read.peer_addr = (void *)epaddr;
			PSMX_CTXT_TYPE(&req->fi_context) = PSMX_REMOTE_READ_CONTEXT;
			PSMX_CTXT_USER(&req->fi_context) = mr;
			psmx_am_enqueue_rma(mr->domain, req);
		}
		break;

	case PSMX_AM_REP_WRITE:
		req = (struct psmx_am_request *)(uintptr_t)args[1].u64;
		assert(req->op == PSMX_AM_REQ_WRITE);
		op_error = (int)args[0].u32w1;
		if (!req->error)
			req->error = op_error;
		if (eom) {
			if (req->ep->send_cq && !req->no_event) {
				event = psmx_cq_create_event(
						req->ep->send_cq,
						req->write.context,
						req->write.buf,
						0, /* flags */
						req->write.len,
						0, /* data */
						0, /* tag */
						0, /* olen */
						req->error);
				if (event)
					psmx_cq_enqueue_event(req->ep->send_cq, event);
				else
					err = -ENOMEM;
			}

			if (req->ep->write_cntr)
				psmx_cntr_inc(req->ep->write_cntr);

			free(req);
		}
		break;

	case PSMX_AM_REP_READ:
		req = (struct psmx_am_request *)(uintptr_t)args[1].u64;
		assert(req->op == PSMX_AM_REQ_READ);
		op_error = (int)args[0].u32w1;
		offset = args[2].u64;
		if (!req->error)
			req->error = op_error;
		if (!op_error) {
			memcpy(req->read.buf + offset, src, len);
			req->read.len_read += len;
		}
		if (eom) {
			if (req->ep->send_cq && !req->no_event) {
				event = psmx_cq_create_event(
						req->ep->send_cq,
						req->read.context,
						req->read.buf,
						0, /* flags */
						req->read.len_read,
						0, /* data */
						0, /* tag */
						req->read.len - req->read.len_read,
						req->error);
				if (event)
					psmx_cq_enqueue_event(req->ep->send_cq, event);
				else
					err = -ENOMEM;
			}

			if (req->ep->read_cntr)
				psmx_cntr_inc(req->ep->read_cntr);

			free(req);
		}
		break;

	default:
		err = -EINVAL;
	}
	return err;
}
示例#4
0
static ssize_t psmx_rma_self(int am_cmd,
			     struct psmx_fid_ep *ep,
			     void *buf, size_t len, void *desc,
			     uint64_t addr, uint64_t key,
			     void *context, uint64_t flags, uint64_t data)
{
	struct psmx_fid_mr *mr;
	struct psmx_cq_event *event;
	struct psmx_fid_cntr *cntr;
	int no_event;
	int err = 0;
	int op_error = 0;
	int access;
	void *dst, *src;

	switch (am_cmd) {
	case PSMX_AM_REQ_WRITE:
		access = FI_REMOTE_WRITE;
		break;
	case PSMX_AM_REQ_READ:
		access = FI_REMOTE_READ;
		break;
	default:
		return -EINVAL;
	}

	mr = psmx_mr_hash_get(key);
	op_error = mr ? psmx_mr_validate(mr, addr, len, access) : -EINVAL;

	if (!op_error) {
		addr += mr->offset;
		if (am_cmd == PSMX_AM_REQ_WRITE) {
			dst = (void *)addr;
			src = buf;
			cntr = mr->domain->rma_ep->remote_write_cntr;
		}
		else {
			dst = buf;
			src = (void *)addr;
			cntr = mr->domain->rma_ep->remote_read_cntr;
		}

		memcpy(dst, src, len);

		if (mr->cq && am_cmd == PSMX_AM_REQ_WRITE) {
			event = psmx_cq_create_event(
					mr->cq,
					0, /* context */
					(void *)addr,
					0, /* flags */
					len,
					flags & FI_REMOTE_CQ_DATA ? data : 0,
					0, /* tag */
					0, /* olen */
					0 /* err */);

			if (event)
				psmx_cq_enqueue_event(mr->cq, event);
			else
				err = -ENOMEM;
		}
		if (mr->cntr && am_cmd == PSMX_AM_REQ_WRITE)
			psmx_cntr_inc(mr->cntr);
		if (cntr)
			psmx_cntr_inc(cntr);
	}

	no_event = (flags & FI_INJECT) ||
		   (ep->send_cq_event_flag && !(flags & FI_EVENT));

	if (ep->send_cq && !no_event) {
		event = psmx_cq_create_event(
				ep->send_cq,
				context,
				(void *)buf,
				0, /* flags */
				len,
				0, /* data */
				0, /* tag */
				0, /* olen */
				op_error);
		if (event)
			psmx_cq_enqueue_event(ep->send_cq, event);
		else
			err = -ENOMEM;
	}

	switch (am_cmd) {
	case PSMX_AM_REQ_WRITE:
		if (ep->write_cntr)
			psmx_cntr_inc(ep->write_cntr);
		break;

	case PSMX_AM_REQ_READ:
		if (ep->read_cntr)
			psmx_cntr_inc(ep->read_cntr);
		break;
	}

	return err;
}
示例#5
0
static int psmx_atomic_self(int am_cmd,
			    struct psmx_fid_ep *ep,
			    const void *buf,
			    size_t count, void *desc,
			    const void *compare, void *compare_desc,
			    void *result, void *result_desc,
			    uint64_t addr, uint64_t key,
			    enum fi_datatype datatype,
			    enum fi_op op, void *context,
			    uint64_t flags)
{
	struct psmx_fid_mr *mr;
	struct psmx_cq_event *event;
	struct psmx_fid_ep *target_ep;
	struct psmx_fid_cntr *cntr = NULL;
	struct psmx_fid_cntr *mr_cntr = NULL;
	void *tmp_buf;
	size_t len;
	int no_event;
	int err = 0;
	int op_error;
	int access;
	uint64_t cq_flags = 0;

	if (am_cmd == PSMX_AM_REQ_ATOMIC_WRITE)
		access = FI_REMOTE_WRITE;
	else
		access = FI_REMOTE_READ | FI_REMOTE_WRITE;

	len = fi_datatype_size(datatype) * count;
	mr = psmx_mr_get(psmx_active_fabric->active_domain, key);
	op_error = mr ?  psmx_mr_validate(mr, addr, len, access) : -FI_EINVAL;

	if (op_error)
		goto gen_local_event;

	addr += mr->offset;

	switch (am_cmd) {
	case PSMX_AM_REQ_ATOMIC_WRITE:
		err = psmx_atomic_do_write((void *)addr, (void *)buf,
					   (int)datatype, (int)op, (int)count);
		cq_flags = FI_WRITE | FI_ATOMIC;
		break;

	case PSMX_AM_REQ_ATOMIC_READWRITE:
		if (result != buf) {
			err = psmx_atomic_do_readwrite((void *)addr, (void *)buf,
						       (void *)result, (int)datatype,
						       (int)op, (int)count);
		}
		else {
			tmp_buf = malloc(len);
			if (tmp_buf) {
				memcpy(tmp_buf, result, len);
				err = psmx_atomic_do_readwrite((void *)addr, (void *)buf,
							       tmp_buf, (int)datatype,
							       (int)op, (int)count);
				memcpy(result, tmp_buf, len);
				free(tmp_buf);
			}
			else {
				err = -FI_ENOMEM;
			}
		}
		if (op == FI_ATOMIC_READ)
			cq_flags = FI_READ | FI_ATOMIC;
		else
			cq_flags = FI_WRITE | FI_ATOMIC;
		break;

	case PSMX_AM_REQ_ATOMIC_COMPWRITE:
		if (result != buf && result != compare) {
			err = psmx_atomic_do_compwrite((void *)addr, (void *)buf,
						       (void *)compare, (void *)result,
						       (int)datatype, (int)op, (int)count);
		}
		else {
			tmp_buf = malloc(len);
			if (tmp_buf) {
				memcpy(tmp_buf, result, len);
				err = psmx_atomic_do_compwrite((void *)addr, (void *)buf,
							       (void *)compare, tmp_buf,
							       (int)datatype, (int)op, (int)count);
				memcpy(result, tmp_buf, len);
				free(tmp_buf);
			}
			else {
				err = -FI_ENOMEM;
			}
		}
		cq_flags = FI_WRITE | FI_ATOMIC;
		break;
	}

	target_ep = mr->domain->atomics_ep;
	if (op == FI_ATOMIC_READ) {
		cntr = target_ep->remote_read_cntr;
	}
	else {
		cntr = target_ep->remote_write_cntr;
		mr_cntr = mr->cntr;
	}

	if (cntr)
		psmx_cntr_inc(cntr);

	if (mr_cntr && mr_cntr != cntr)
		psmx_cntr_inc(mr_cntr);

gen_local_event:
	no_event = ((flags & PSMX_NO_COMPLETION) ||
		    (ep->send_selective_completion && !(flags & FI_COMPLETION)));
	if (ep->send_cq && !no_event) {
		event = psmx_cq_create_event(
				ep->send_cq,
				context,
				(void *)buf,
				cq_flags,
				len,
				0, /* data */
				0, /* tag */
				0, /* olen */
				op_error);
		if (event)
			psmx_cq_enqueue_event(ep->send_cq, event);
		else
			err = -FI_ENOMEM;
	}

	switch (am_cmd) {
	case PSMX_AM_REQ_ATOMIC_WRITE:
		if (ep->write_cntr)
			psmx_cntr_inc(ep->write_cntr);
		break;
	case PSMX_AM_REQ_ATOMIC_READWRITE:
	case PSMX_AM_REQ_ATOMIC_COMPWRITE:
		if (ep->read_cntr)
			psmx_cntr_inc(ep->read_cntr);
		break;
	}

	return err;
}
示例#6
0
int psmx_am_atomic_handler(psm_am_token_t token, psm_epaddr_t epaddr,
			   psm_amarg_t *args, int nargs, void *src,
			   uint32_t len)
#endif
{
	psm_amarg_t rep_args[8];
	int count;
	void *addr;
	uint64_t key;
	int datatype, op;
	int err = 0;
	int op_error = 0;
	struct psmx_am_request *req;
	struct psmx_cq_event *event;
	struct psmx_fid_mr *mr;
	struct psmx_fid_ep *target_ep;
	struct psmx_fid_cntr *cntr = NULL;
	struct psmx_fid_cntr *mr_cntr = NULL;
	void *tmp_buf;
#if (PSM_VERNO_MAJOR >= 2)
	psm_epaddr_t epaddr;

	psm_am_get_source(token, &epaddr);
#endif

	switch (args[0].u32w0 & PSMX_AM_OP_MASK) {
	case PSMX_AM_REQ_ATOMIC_WRITE:
		count = args[0].u32w1;
		addr = (void *)(uintptr_t)args[2].u64;
		key = args[3].u64;
		datatype = args[4].u32w0;
		op = args[4].u32w1;
		assert(len == fi_datatype_size(datatype) * count);

		mr = psmx_mr_get(psmx_active_fabric->active_domain, key);
		op_error = mr ?
			psmx_mr_validate(mr, (uint64_t)addr, len, FI_REMOTE_WRITE) :
			-FI_EINVAL;

		if (!op_error) {
			addr += mr->offset;
			psmx_atomic_do_write(addr, src, datatype, op, count);

			target_ep = mr->domain->atomics_ep;
			cntr = target_ep->remote_write_cntr;
			mr_cntr = mr->cntr;

			if (cntr)
				psmx_cntr_inc(cntr);

			if (mr_cntr && mr_cntr != cntr)
				psmx_cntr_inc(mr_cntr);
		}

		rep_args[0].u32w0 = PSMX_AM_REP_ATOMIC_WRITE;
		rep_args[0].u32w1 = op_error;
		rep_args[1].u64 = args[1].u64;
		err = psm_am_reply_short(token, PSMX_AM_ATOMIC_HANDLER,
				rep_args, 2, NULL, 0, 0,
				NULL, NULL );
		break;

	case PSMX_AM_REQ_ATOMIC_READWRITE:
		count = args[0].u32w1;
		addr = (void *)(uintptr_t)args[2].u64;
		key = args[3].u64;
		datatype = args[4].u32w0;
		op = args[4].u32w1;

		if (op == FI_ATOMIC_READ)
			len = fi_datatype_size(datatype) * count;

		assert(len == fi_datatype_size(datatype) * count);

		mr = psmx_mr_get(psmx_active_fabric->active_domain, key);
		op_error = mr ?
			psmx_mr_validate(mr, (uint64_t)addr, len, FI_REMOTE_READ|FI_REMOTE_WRITE) :
			-FI_EINVAL;

		if (!op_error) {
			addr += mr->offset;
			tmp_buf = malloc(len);
			if (tmp_buf)
				psmx_atomic_do_readwrite(addr, src, tmp_buf,
							 datatype, op, count);
			else
				op_error = -FI_ENOMEM;

			target_ep = mr->domain->atomics_ep;
			if (op == FI_ATOMIC_READ) {
				cntr = target_ep->remote_read_cntr;
			}
			else {
				cntr = target_ep->remote_write_cntr;
				mr_cntr = mr->cntr;
			}

			if (cntr)
				psmx_cntr_inc(cntr);

			if (mr_cntr && mr_cntr != cntr)
				psmx_cntr_inc(mr_cntr);
		}
		else {
			tmp_buf = NULL;
		}

		rep_args[0].u32w0 = PSMX_AM_REP_ATOMIC_READWRITE;
		rep_args[0].u32w1 = op_error;
		rep_args[1].u64 = args[1].u64;
		err = psm_am_reply_short(token, PSMX_AM_ATOMIC_HANDLER,
				rep_args, 2, tmp_buf, (tmp_buf?len:0), 0,
				psmx_am_atomic_completion, tmp_buf );
		break;

	case PSMX_AM_REQ_ATOMIC_COMPWRITE:
		count = args[0].u32w1;
		addr = (void *)(uintptr_t)args[2].u64;
		key = args[3].u64;
		datatype = args[4].u32w0;
		op = args[4].u32w1;
		len /= 2;
		assert(len == fi_datatype_size(datatype) * count);

		mr = psmx_mr_get(psmx_active_fabric->active_domain, key);
		op_error = mr ?
			psmx_mr_validate(mr, (uint64_t)addr, len, FI_REMOTE_READ|FI_REMOTE_WRITE) :
			-FI_EINVAL;

		if (!op_error) {
			addr += mr->offset;
			tmp_buf = malloc(len);
			if (tmp_buf)
				psmx_atomic_do_compwrite(addr, src, src + len,
							 tmp_buf, datatype, op, count);
			else
				op_error = -FI_ENOMEM;

			target_ep = mr->domain->atomics_ep;
			cntr = target_ep->remote_write_cntr;
			mr_cntr = mr->cntr;

			if (cntr)
				psmx_cntr_inc(cntr);

			if (mr_cntr && mr_cntr != cntr)
				psmx_cntr_inc(mr_cntr);
		}
		else {
			tmp_buf = NULL;
		}

		rep_args[0].u32w0 = PSMX_AM_REP_ATOMIC_READWRITE;
		rep_args[0].u32w1 = op_error;
		rep_args[1].u64 = args[1].u64;
		err = psm_am_reply_short(token, PSMX_AM_ATOMIC_HANDLER,
				rep_args, 2, tmp_buf, (tmp_buf?len:0), 0,
				psmx_am_atomic_completion, tmp_buf );
		break;

	case PSMX_AM_REP_ATOMIC_WRITE:
		req = (struct psmx_am_request *)(uintptr_t)args[1].u64;
		op_error = (int)args[0].u32w1;
		assert(req->op == PSMX_AM_REQ_ATOMIC_WRITE);
		if (req->ep->send_cq && !req->no_event) {
			event = psmx_cq_create_event(
					req->ep->send_cq,
					req->atomic.context,
					req->atomic.buf,
					req->cq_flags,
					req->atomic.len,
					0, /* data */
					0, /* tag */
					0, /* olen */
					op_error);
			if (event)
				psmx_cq_enqueue_event(req->ep->send_cq, event);
			else
				err = -FI_ENOMEM;
		}

		if (req->ep->write_cntr)
			psmx_cntr_inc(req->ep->write_cntr);

		free(req);
		break;

	case PSMX_AM_REP_ATOMIC_READWRITE:
	case PSMX_AM_REP_ATOMIC_COMPWRITE:
		req = (struct psmx_am_request *)(uintptr_t)args[1].u64;
		op_error = (int)args[0].u32w1;
		assert(op_error || req->atomic.len == len);

		if (!op_error)
			memcpy(req->atomic.result, src, len);

		if (req->ep->send_cq && !req->no_event) {
			event = psmx_cq_create_event(
					req->ep->send_cq,
					req->atomic.context,
					req->atomic.buf,
					req->cq_flags,
					req->atomic.len,
					0, /* data */
					0, /* tag */
					0, /* olen */
					op_error);
			if (event)
				psmx_cq_enqueue_event(req->ep->send_cq, event);
			else
				err = -FI_ENOMEM;
		}

		if (req->ep->read_cntr)
			psmx_cntr_inc(req->ep->read_cntr);

		free(req);
		break;

	default:
		err = -FI_EINVAL;
	}
	return err;
}
示例#7
0
ssize_t _psmx_tagged_send(struct fid_ep *ep, const void *buf, size_t len,
			  void *desc, fi_addr_t dest_addr, uint64_t tag,
			  void *context, uint64_t flags)
#endif
{
	struct psmx_fid_ep *ep_priv;
	struct psmx_fid_av *av;
	psm_epaddr_t psm_epaddr;
	psm_mq_req_t psm_req;
	uint64_t psm_tag;
#if (PSM_VERNO_MAJOR >= 2)
	psm_mq_tag_t psm_tag2;
#endif
	struct fi_context *fi_context;
	int err;
	size_t idx;
	int no_completion = 0;
	struct psmx_cq_event *event;

	ep_priv = container_of(ep, struct psmx_fid_ep, ep);

	if (flags & FI_TRIGGER) {
		struct psmx_trigger *trigger;
		struct fi_triggered_context *ctxt = context;

		trigger = calloc(1, sizeof(*trigger));
		if (!trigger)
			return -FI_ENOMEM;

		trigger->op = PSMX_TRIGGERED_TSEND;
		trigger->cntr = container_of(ctxt->trigger.threshold.cntr,
					     struct psmx_fid_cntr, cntr);
		trigger->threshold = ctxt->trigger.threshold.threshold;
		trigger->tsend.ep = ep;
		trigger->tsend.buf = buf;
		trigger->tsend.len = len;
		trigger->tsend.desc = desc;
		trigger->tsend.dest_addr = dest_addr;
		trigger->tsend.tag = tag;
		trigger->tsend.context = context;
		trigger->tsend.flags = flags & ~FI_TRIGGER;
#if (PSM_VERNO_MAJOR >= 2)
		trigger->tsend.data = data;
#endif

		psmx_cntr_add_trigger(trigger->cntr, trigger);
		return 0;
	}

	if (tag & ep_priv->domain->reserved_tag_bits) {
		FI_WARN(&psmx_prov, FI_LOG_EP_DATA,
			"using reserved tag bits."
			"tag=%lx. reserved_bits=%lx.\n", tag,
			ep_priv->domain->reserved_tag_bits);
	}

	av = ep_priv->av;
	if (av && av->type == FI_AV_TABLE) {
		idx = (size_t)dest_addr;
		if (idx >= av->last)
			return -FI_EINVAL;

		psm_epaddr = av->psm_epaddrs[idx];
	}
	else  {
		psm_epaddr = (psm_epaddr_t) dest_addr;
	}

	psm_tag = tag & (~ep_priv->domain->reserved_tag_bits);
#if (PSM_VERNO_MAJOR >= 2)
	PSMX_SET_TAG(psm_tag2, psm_tag, data);
#endif

	if ((flags & PSMX_NO_COMPLETION) ||
	    (ep_priv->send_selective_completion && !(flags & FI_COMPLETION)))
		no_completion = 1;

	if (flags & FI_INJECT) {
		if (len > PSMX_INJECT_SIZE)
			return -FI_EMSGSIZE;

#if (PSM_VERNO_MAJOR >= 2)
		err = psm_mq_send2(ep_priv->domain->psm_mq, psm_epaddr, 0,
				   &psm_tag2, buf, len);
#else
		err = psm_mq_send(ep_priv->domain->psm_mq, psm_epaddr, 0,
				  psm_tag, buf, len);
#endif

		if (err != PSM_OK)
			return psmx_errno(err);

		if (ep_priv->send_cntr)
			psmx_cntr_inc(ep_priv->send_cntr);

		if (ep_priv->send_cq && !no_completion) {
			event = psmx_cq_create_event(
					ep_priv->send_cq,
					context, (void *)buf, flags, len,
#if (PSM_VERNO_MAJOR >= 2)
					(uint64_t) data, psm_tag,
#else
					0 /* data */, psm_tag,
#endif
					0 /* olen */,
					0 /* err */);

			if (event)
				psmx_cq_enqueue_event(ep_priv->send_cq, event);
			else
				return -FI_ENOMEM;
		}

		return 0;
	}

	if (no_completion && !context) {
		fi_context = &ep_priv->nocomp_send_context;
	}
	else {
		if (!context)
			return -FI_EINVAL;

		fi_context = context;
		PSMX_CTXT_TYPE(fi_context) = PSMX_TSEND_CONTEXT;
		PSMX_CTXT_USER(fi_context) = (void *)buf;
		PSMX_CTXT_EP(fi_context) = ep_priv;
	}

#if (PSM_VERNO_MAJOR >= 2)
	err = psm_mq_isend2(ep_priv->domain->psm_mq, psm_epaddr, 0,
				&psm_tag2, buf, len, (void*)fi_context, &psm_req);
#else
	err = psm_mq_isend(ep_priv->domain->psm_mq, psm_epaddr, 0,
				psm_tag, buf, len, (void*)fi_context, &psm_req);
#endif

	if (err != PSM_OK)
		return psmx_errno(err);

	if (fi_context == context)
		PSMX_CTXT_REQ(fi_context) = psm_req;

	return 0;
}
示例#8
0
ssize_t _psmx_tagged_peek(struct fid_ep *ep, void *buf, size_t len,
			  void *desc, fi_addr_t src_addr,
			  uint64_t tag, uint64_t ignore,
			  void *context, uint64_t flags)
{
	struct psmx_fid_ep *ep_priv;
#if (PSM_VERNO_MAJOR >= 2)
	psm_mq_status2_t psm_status2;
	psm_mq_tag_t psm_tag2, psm_tagsel2;
	psm_mq_req_t req;
	struct psmx_fid_av *av;
	size_t idx;
	psm_epaddr_t psm_src_addr;
#else
	psm_mq_status_t psm_status;
#endif
	uint64_t psm_tag, psm_tagsel;
	struct psmx_cq_event *event;
	int err;

	ep_priv = container_of(ep, struct psmx_fid_ep, ep);

	if (tag & ep_priv->domain->reserved_tag_bits) {
		FI_WARN(&psmx_prov, FI_LOG_EP_DATA,
			"using reserved tag bits."
			"tag=%lx. reserved_bits=%lx.\n", tag,
			ep_priv->domain->reserved_tag_bits);
	}

	psm_tag = tag & (~ep_priv->domain->reserved_tag_bits);
	psm_tagsel = (~ignore) | ep_priv->domain->reserved_tag_bits;

#if (PSM_VERNO_MAJOR >= 2)
	if (src_addr != FI_ADDR_UNSPEC) {
		av = ep_priv->av;
		if (av && av->type == FI_AV_TABLE) {
			idx = (size_t)src_addr;
			if (idx >= av->last)
				return -FI_EINVAL;

			psm_src_addr = av->psm_epaddrs[idx];
		}
		else {
			psm_src_addr = (psm_epaddr_t)src_addr;
		}
	}
	else {
		psm_src_addr = NULL;
	}

	PSMX_SET_TAG(psm_tag2, psm_tag, 0);
	PSMX_SET_TAG(psm_tagsel2, psm_tagsel, 0);

	if (flags & (FI_CLAIM | FI_DISCARD))
		err = psm_mq_improbe2(ep_priv->domain->psm_mq,
				      psm_src_addr,
				      &psm_tag2, &psm_tagsel2,
				      &req, &psm_status2);
	else
		err = psm_mq_iprobe2(ep_priv->domain->psm_mq,
				     psm_src_addr,
				     &psm_tag2, &psm_tagsel2,
				     &psm_status2);
#else
	if (flags & (FI_CLAIM | FI_DISCARD))
		return -FI_EOPNOTSUPP;

	err = psm_mq_iprobe(ep_priv->domain->psm_mq, psm_tag, psm_tagsel,
			    &psm_status);
#endif
	switch (err) {
	case PSM_OK:
		if (ep_priv->recv_cq) {
#if (PSM_VERNO_MAJOR >= 2)
			if ((flags & FI_CLAIM) && context)
				PSMX_CTXT_REQ((struct fi_context *)context) = req;

			tag = psm_status2.msg_tag.tag0 | (((uint64_t)psm_status2.msg_tag.tag1) << 32);
			len = psm_status2.msg_length;
			src_addr = (fi_addr_t)psm_status2.msg_peer;
#else
			tag = psm_status.msg_tag;
			len = psm_status.msg_length;
			src_addr = 0;
#endif
			event = psmx_cq_create_event(
					ep_priv->recv_cq,
					context,		/* op_context */
					NULL,			/* buf */
					flags|FI_RECV|FI_TAGGED,/* flags */
					len,			/* len */
					0,			/* data */
					tag,			/* tag */
					len,			/* olen */
					0);			/* err */

			if (!event)
				return -FI_ENOMEM;

			event->source = src_addr;
			psmx_cq_enqueue_event(ep_priv->recv_cq, event);
		}
		return 0;

	case PSM_MQ_NO_COMPLETIONS:
		return -FI_ENOMSG;

	default:
		return psmx_errno(err);
	}
}
示例#9
0
static int psmx_atomic_self(int am_cmd,
			    struct psmx_fid_ep *ep,
			    const void *buf,
			    size_t count, void *desc,
			    const void *compare, void *compare_desc,
			    void *result, void *result_desc,
			    uint64_t addr, uint64_t key,
			    enum fi_datatype datatype,
			    enum fi_op op, void *context,
			    uint64_t flags)
{
	struct psmx_fid_mr *mr;
	struct psmx_cq_event *event;
	struct psmx_fid_ep *target_ep;
	size_t len;
	int no_event;
	int err = 0;
	int op_error;
	int access;

	if (am_cmd == PSMX_AM_REQ_ATOMIC_WRITE)
		access = FI_REMOTE_WRITE;
	else
		access = FI_REMOTE_READ | FI_REMOTE_WRITE;

	len = fi_datatype_size(datatype) * count;
	mr = psmx_mr_hash_get(key);
	op_error = mr ?  psmx_mr_validate(mr, addr, len, access) : -EINVAL;

	if (op_error)
		goto gen_local_event;

	addr += mr->offset;

	switch (am_cmd) {
	case PSMX_AM_REQ_ATOMIC_WRITE:
		err = psmx_atomic_do_write((void *)addr, (void *)buf,
					   (int)datatype, (int)op, (int)count);
		break;

	case PSMX_AM_REQ_ATOMIC_READWRITE:
		err = psmx_atomic_do_readwrite((void *)addr, (void *)buf,
					       (void *)result, (int)datatype,
					       (int)op, (int)count);
		break;

	case PSMX_AM_REQ_ATOMIC_COMPWRITE:
		err = psmx_atomic_do_compwrite((void *)addr, (void *)buf,
					       (void *)compare, (void *)result,
					       (int)datatype, (int)op, (int)count);
		break;
	}

	if (op != FI_ATOMIC_READ) {
		if (mr->cq) {
			event = psmx_cq_create_event(
					mr->cq,
					0, /* context */
					(void *)addr,
					0, /* flags */
					len,
					0, /* data */
					0, /* tag */
					0, /* olen */
					0 /* err */);

			if (event)
				psmx_cq_enqueue_event(mr->cq, event);
			else
				err = -ENOMEM;
		}
		if (mr->cntr)
			psmx_cntr_inc(mr->cntr);
	}

	target_ep = mr->domain->atomics_ep;
	if (op == FI_ATOMIC_WRITE) {
		if (target_ep->remote_write_cntr)
			psmx_cntr_inc(target_ep->remote_write_cntr);
	}
	else if (op == FI_ATOMIC_READ) {
		if (target_ep->remote_read_cntr)
			psmx_cntr_inc(target_ep->remote_read_cntr);
	}
	else {
		if (target_ep->remote_write_cntr)
			psmx_cntr_inc(target_ep->remote_write_cntr);
		if (am_cmd != PSMX_AM_REQ_ATOMIC_WRITE &&
		    target_ep->remote_read_cntr &&
		    target_ep->remote_read_cntr != target_ep->remote_write_cntr)
			psmx_cntr_inc(target_ep->remote_read_cntr);
	}

gen_local_event:
	no_event = ((flags & FI_INJECT) ||
		    (ep->send_cq_event_flag && !(flags & FI_EVENT)));
	if (ep->send_cq && !no_event) {
		event = psmx_cq_create_event(
				ep->send_cq,
				context,
				(void *)buf,
				0, /* flags */
				len,
				0, /* data */
				0, /* tag */
				0, /* olen */
				op_error);
		if (event)
			psmx_cq_enqueue_event(ep->send_cq, event);
		else
			err = -ENOMEM;
	}

	switch (am_cmd) {
	case PSMX_AM_REQ_ATOMIC_WRITE:
		if (ep->write_cntr)
			psmx_cntr_inc(ep->write_cntr);
		break;
	case PSMX_AM_REQ_ATOMIC_READWRITE:
	case PSMX_AM_REQ_ATOMIC_COMPWRITE:
		if (ep->read_cntr)
			psmx_cntr_inc(ep->read_cntr);
		break;
	}

	return err;
}
示例#10
0
static ssize_t psmx_rma_self(int am_cmd,
			     struct psmx_fid_ep *ep,
			     void *buf, size_t len, void *desc,
			     uint64_t addr, uint64_t key,
			     void *context, uint64_t flags, uint64_t data)
{
	struct psmx_fid_mr *mr;
	struct psmx_cq_event *event;
	struct psmx_fid_cntr *cntr;
	struct psmx_fid_cntr *mr_cntr = NULL;
	struct psmx_fid_cq *cq = NULL;
	int no_event;
	int err = 0;
	int op_error = 0;
	int access;
	void *dst, *src;
	uint64_t cq_flags;

	switch (am_cmd) {
	case PSMX_AM_REQ_WRITE:
		access = FI_REMOTE_WRITE;
		cq_flags = FI_WRITE | FI_RMA;
		break;
	case PSMX_AM_REQ_READ:
		access = FI_REMOTE_READ;
		cq_flags = FI_READ | FI_RMA;
		break;
	default:
		return -FI_EINVAL;
	}

	mr = psmx_mr_get(psmx_active_fabric->active_domain, key);
	op_error = mr ? psmx_mr_validate(mr, addr, len, access) : -FI_EINVAL;

	if (!op_error) {
		addr += mr->offset;
		if (am_cmd == PSMX_AM_REQ_WRITE) {
			dst = (void *)addr;
			src = buf;
			cntr = mr->domain->rma_ep->remote_write_cntr;
			if (flags & FI_REMOTE_CQ_DATA)
				cq = mr->domain->rma_ep->recv_cq;
			if (mr->cntr != cntr)
				mr_cntr = mr->cntr;
		}
		else {
			dst = buf;
			src = (void *)addr;
			cntr = mr->domain->rma_ep->remote_read_cntr;
		}

		memcpy(dst, src, len);

		if (cq) {
			event = psmx_cq_create_event(
					cq,
					0, /* context */
					(void *)addr,
					FI_REMOTE_WRITE | FI_RMA | FI_REMOTE_CQ_DATA,
					len,
					data,
					0, /* tag */
					0, /* olen */
					0 /* err */);

			if (event)
				psmx_cq_enqueue_event(cq, event);
			else
				err = -FI_ENOMEM;
		}

		if (cntr)
			psmx_cntr_inc(cntr);

		if (mr_cntr)
			psmx_cntr_inc(mr_cntr);
	}

	no_event = (flags & PSMX_NO_COMPLETION) ||
		   (ep->send_selective_completion && !(flags & FI_COMPLETION));

	if (ep->send_cq && !no_event) {
		event = psmx_cq_create_event(
				ep->send_cq,
				context,
				(void *)buf,
				cq_flags,
				len,
				0, /* data */
				0, /* tag */
				0, /* olen */
				op_error);
		if (event)
			psmx_cq_enqueue_event(ep->send_cq, event);
		else
			err = -FI_ENOMEM;
	}

	switch (am_cmd) {
	case PSMX_AM_REQ_WRITE:
		if (ep->write_cntr)
			psmx_cntr_inc(ep->write_cntr);
		break;

	case PSMX_AM_REQ_READ:
		if (ep->read_cntr)
			psmx_cntr_inc(ep->read_cntr);
		break;
	}

	return err;
}
示例#11
0
static ssize_t _psmx_recv2(struct fid_ep *ep, void *buf, size_t len,
			   void *desc, fi_addr_t src_addr,
			   void *context, uint64_t flags)
{
	psm_amarg_t args[8];
	struct psmx_fid_ep *ep_priv;
	struct psmx_fid_av *av;
	struct psmx_am_request *req;
	struct psmx_unexp *unexp;
	struct psmx_cq_event *event;
	int recv_done;
	int err = 0;
	size_t idx;

        ep_priv = container_of(ep, struct psmx_fid_ep, ep);

        if ((ep_priv->caps & FI_DIRECTED_RECV) && src_addr != FI_ADDR_UNSPEC) {
		av = ep_priv->av;
		if (av && av->type == FI_AV_TABLE) {
			idx = (size_t)src_addr;
			if (idx >= av->last)
				return -FI_EINVAL;

			src_addr = (fi_addr_t)av->psm_epaddrs[idx];
		}
	}
	else {
		src_addr = 0;
	}

	req = calloc(1, sizeof(*req));
	if (!req)
		return -FI_ENOMEM;

	req->op = PSMX_AM_REQ_SEND;
	req->recv.buf = (void *)buf;
	req->recv.len = len;
	req->recv.context = context;
	req->recv.src_addr = (void *)src_addr;
	req->ep = ep_priv;
	req->cq_flags = FI_RECV | FI_MSG;

	if (ep_priv->recv_selective_completion && !(flags & FI_COMPLETION))
		req->no_event = 1;

	unexp = psmx_am_search_and_dequeue_unexp(ep_priv->domain,
						 (const void *)src_addr);
	if (!unexp) {
		psmx_am_enqueue_recv(ep_priv->domain, req);
		return 0;
	}

	req->recv.len_received = MIN(req->recv.len, unexp->len_received);
	memcpy(req->recv.buf, unexp->buf, req->recv.len_received);

	recv_done = (req->recv.len_received >= req->recv.len);

	if (unexp->done) {
		recv_done = 1;
	}
	else {
		args[0].u32w0 = PSMX_AM_REP_SEND;
		args[0].u32w1 = 0;
		args[1].u64 = unexp->sender_context;
		args[2].u64 = recv_done ? 0 : (uint64_t)(uintptr_t)req;
		err = psm_am_request_short(unexp->sender_addr,
					   PSMX_AM_MSG_HANDLER,
					   args, 3, NULL, 0, 0,
					   NULL, NULL );
	}

	free(unexp);

	if (recv_done) {
		if (req->ep->recv_cq && !req->no_event) {
			event = psmx_cq_create_event(
					req->ep->recv_cq,
					req->recv.context,
					req->recv.buf,
					req->cq_flags,
					req->recv.len_received,
					0, /* data */
					0, /* tag */
					req->recv.len - req->recv.len_received,
					0 /* err */);
			if (event)
				psmx_cq_enqueue_event(req->ep->recv_cq, event);
			else
				err = -FI_ENOMEM;
		}

		if (req->ep->recv_cntr)
			psmx_cntr_inc(req->ep->recv_cntr);

		free(req);
	}

	return err;
}
示例#12
0
int psmx_am_msg_handler(psm_am_token_t token, psm_epaddr_t epaddr,
			psm_amarg_t *args, int nargs, void *src, uint32_t len)
#endif
{
        psm_amarg_t rep_args[8];
        struct psmx_am_request *req;
        struct psmx_cq_event *event;
	struct psmx_epaddr_context *epaddr_context;
	struct psmx_fid_domain *domain;
	int copy_len;
	uint64_t offset;
	int cmd, eom;
	int err = 0;
	int op_error = 0;
	struct psmx_unexp *unexp;
#if (PSM_VERNO_MAJOR >= 2)
	psm_epaddr_t epaddr;

	psm_am_get_source(token, &epaddr);
#endif

	epaddr_context = psm_epaddr_getctxt(epaddr);
	if (!epaddr_context) {
		FI_WARN(&psmx_prov, FI_LOG_EP_DATA,
			"NULL context for epaddr %p\n", epaddr);
		return -FI_EIO;
	}

	domain = epaddr_context->domain;

	cmd = args[0].u32w0 & PSMX_AM_OP_MASK;
	eom = args[0].u32w0 & PSMX_AM_EOM;

	switch (cmd) {
	case PSMX_AM_REQ_SEND:
		assert(len == args[0].u32w1);
                offset = args[3].u64;
		if (offset == 0) {
			/* this is the first packet */
			req = psmx_am_search_and_dequeue_recv(domain, (const void *)epaddr);
			if (req) {
				copy_len = MIN(len, req->recv.len);
				memcpy(req->recv.buf, src, len);
				req->recv.len_received += copy_len;
			}
			else {
				unexp = malloc(sizeof(*unexp) + len);
				if (!unexp) {
					op_error = -FI_ENOSPC;
				}
				else  {
					memcpy(unexp->buf, src, len);
					unexp->sender_addr = epaddr;
					unexp->sender_context = args[1].u64;
					unexp->len_received = len;
					unexp->done = !!eom;
					unexp->list_entry.next = NULL;
					psmx_am_enqueue_unexp(domain, unexp);

					if (!eom) {
						/* stop here. will reply when recv is posted */
						break;
					}
				}
			}

			if (!op_error && !eom) {
				/* reply w/ recv req to be used for following packets */
				rep_args[0].u32w0 = PSMX_AM_REP_SEND;
				rep_args[0].u32w1 = 0;
				rep_args[1].u64 = args[1].u64;
				rep_args[2].u64 = (uint64_t)(uintptr_t)req;
				err = psm_am_reply_short(token, PSMX_AM_MSG_HANDLER,
						rep_args, 3, NULL, 0, 0,
						NULL, NULL );
			}
		}
		else {
			req = (struct psmx_am_request *)(uintptr_t)args[2].u64;
			if (req) {
				copy_len = MIN(req->recv.len + offset, len);
				memcpy(req->recv.buf + offset, src, copy_len);
				req->recv.len_received += copy_len;
			}
			else {
				FI_WARN(&psmx_prov, FI_LOG_EP_DATA,
					"NULL recv_req in follow-up packets.\n");
				op_error = -FI_ENOMSG;
			}
		}

		if (eom && req) {
			if (req->ep->recv_cq && !req->no_event) {
				event = psmx_cq_create_event(
						req->ep->recv_cq,
						req->recv.context,
						req->recv.buf,
						req->cq_flags,
						req->recv.len_received,
						0, /* data */
						0, /* tag */
						req->recv.len - req->recv.len_received,
						0 /* err */);
				if (event)
					psmx_cq_enqueue_event(req->ep->recv_cq, event);
				else
					err = -FI_ENOMEM;
			}

			if (req->ep->recv_cntr)
				psmx_cntr_inc(req->ep->recv_cntr);

			free(req);
		}

		if (eom || op_error) {
			rep_args[0].u32w0 = PSMX_AM_REP_SEND;
			rep_args[0].u32w1 = op_error;
			rep_args[1].u64 = args[1].u64;
			rep_args[2].u64 = 0; /* done */
			err = psm_am_reply_short(token, PSMX_AM_MSG_HANDLER,
					rep_args, 3, NULL, 0, 0,
					NULL, NULL );
		}
                break;

	case PSMX_AM_REP_SEND:
		req = (struct psmx_am_request *)(uintptr_t)args[1].u64;
		op_error = (int)args[0].u32w1;
		assert(req->op == PSMX_AM_REQ_SEND);

		if (args[2].u64) { /* more to send */
			req->send.peer_context = (void *)(uintptr_t)args[2].u64;

			/* psm_am_request_short() can't be called inside the handler.
			 * put the request into a queue and process it later.
			 */
			psmx_am_enqueue_send(req->ep->domain, req);
		}
		else { /* done */
			if (req->ep->send_cq && !req->no_event) {
				event = psmx_cq_create_event(
						req->ep->send_cq,
						req->send.context,
						req->send.buf,
						req->cq_flags,
						req->send.len,
						0, /* data */
						0, /* tag */
						0, /* olen */
						op_error);
				if (event)
					psmx_cq_enqueue_event(req->ep->send_cq, event);
				else
					err = -FI_ENOMEM;
			}

			if (req->ep->send_cntr)
				psmx_cntr_inc(req->ep->send_cntr);

			free(req);
		}
		break;

	default:
		err = -FI_EINVAL;
	}

	return err;
}
示例#13
0
int psmx_am_msg_handler(psm_am_token_t token, psm_epaddr_t epaddr,
			psm_amarg_t *args, int nargs, void *src, uint32_t len)
{
        psm_amarg_t rep_args[8];
        struct psmx_am_request *req;
        struct psmx_cq_event *event;
	struct psmx_epaddr_context *epaddr_context;
	struct psmx_fid_domain *domain;
	int msg_len;
	int copy_len;
	uint64_t offset;
	int cmd, eom;
	int err = 0;
	int op_error = 0;
	struct psmx_unexp *unexp;

	epaddr_context = psm_epaddr_getctxt(epaddr);
	if (!epaddr_context) {
		fprintf(stderr, "%s: NULL context for epaddr %p\n", __func__, epaddr);
		return -EIO;
	}

	domain = epaddr_context->domain;

	cmd = args[0].u32w0 & PSMX_AM_OP_MASK;
	eom = args[0].u32w0 & PSMX_AM_EOM;

	switch (cmd) {
	case PSMX_AM_REQ_SEND:
		msg_len = args[0].u32w1;
                offset = args[3].u64;
                assert(len == msg_len);
		if (offset == 0) {
			/* this is the first packet */
			req = psmx_am_search_and_dequeue_recv(domain, (const void *)epaddr);
			if (req) {
				copy_len = MIN(len, req->recv.len);
				memcpy(req->recv.buf, src, len);
				req->recv.len_received += copy_len;
			}
			else {
				unexp = malloc(sizeof(*unexp) + len);
				if (!unexp) {
					op_error = -ENOBUFS;
				}
				else  {
					memcpy(unexp->buf, src, len);
					unexp->sender_addr = epaddr;
					unexp->sender_context = args[1].u64;
					unexp->len_received = len;
					unexp->done = !!eom;
					unexp->next = NULL;
					psmx_unexp_enqueue(unexp);

					if (!eom) {
						/* stop here. will reply when recv is posted */
						break;
					}
				}
			}

			if (!op_error && !eom) {
				/* reply w/ recv req to be used for following packets */
				rep_args[0].u32w0 = PSMX_AM_REP_SEND;
				rep_args[0].u32w1 = 0;
				rep_args[1].u64 = args[1].u64;
				rep_args[2].u64 = (uint64_t)(uintptr_t)req;
				err = psm_am_reply_short(token, PSMX_AM_MSG_HANDLER,
						rep_args, 3, NULL, 0, 0,
						NULL, NULL );
			}
		}
		else {
			req = (struct psmx_am_request *)(uintptr_t)args[2].u64;
			if (req) {
				copy_len = MIN(req->recv.len + offset, len);
				memcpy(req->recv.buf + offset, src, copy_len);
				req->recv.len_received += copy_len;
			}
			else {
				fprintf(stderr, "%s: NULL recv_req in follow-up packets.\n", __func__);
				op_error = -EBADMSG;
			}
		}

		if (eom && req) {
			if (req->ep->recv_cq && !req->no_event) {
				event = psmx_cq_create_event(
						req->ep->recv_cq,
						req->recv.context,
						req->recv.buf,
						0, /* flags */
						req->recv.len_received,
						0, /* data */
						0, /* tag */
						req->recv.len - req->recv.len_received,
						0 /* err */);
				if (event)
					psmx_cq_enqueue_event(req->ep->recv_cq, event);
				else
					err = -ENOMEM;
			}

			if (req->ep->recv_cntr)
				psmx_cntr_inc(req->ep->recv_cntr);

			free(req);
		}

		if (eom || op_error) {
			rep_args[0].u32w0 = PSMX_AM_REP_SEND;
			rep_args[0].u32w1 = op_error;
			rep_args[1].u64 = args[1].u64;
			rep_args[2].u64 = 0; /* done */
			err = psm_am_reply_short(token, PSMX_AM_MSG_HANDLER,
					rep_args, 3, NULL, 0, 0,
					NULL, NULL );
		}
                break;

	case PSMX_AM_REP_SEND:
		req = (struct psmx_am_request *)(uintptr_t)args[1].u64;
		op_error = (int)args[0].u32w1;
		assert(req->op == PSMX_AM_REQ_SEND);

		if (args[2].u64) { /* more to send */
			req->send.peer_context = (void *)(uintptr_t)args[2].u64;

#if PSMX_AM_USE_SEND_QUEUE
			/* psm_am_request_short() can't be called inside the handler.
			 * put the request into a queue and process it later.
			 */
			psmx_am_enqueue_send(req->ep->domain, req);
			if (req->ep->domain->progress_thread)
				pthread_cond_signal(&req->ep->domain->progress_cond);
#else
			req->send.peer_ready = 1;
#endif
		}
		else { /* done */
			if (req->ep->send_cq && !req->no_event) {
				event = psmx_cq_create_event(
						req->ep->send_cq,
						req->send.context,
						req->send.buf,
						0, /* flags */
						req->send.len,
						0, /* data */
						0, /* tag */
						0, /* olen */
						op_error);
				if (event)
					psmx_cq_enqueue_event(req->ep->send_cq, event);
				else
					err = -ENOMEM;
			}

			if (req->ep->send_cntr)
				psmx_cntr_inc(req->ep->send_cntr);

			if (req->state == PSMX_AM_STATE_QUEUED)
				req->state = PSMX_AM_STATE_DONE;
			else
				free(req);
		}
		break;

	default:
		err = -EINVAL;
	}

	return err;
}