Пример #1
0
/*
 * rpmem_fip_init_memory -- (internal) initialize common memory resources
 */
static int
rpmem_fip_init_memory(struct rpmem_fip *fip)
{
	ASSERTne(Pagesize, 0);
	int ret;

	/*
	 * Register local memory space. The local memory will be used
	 * with WRITE operation in rpmem_fip_persist function thus
	 * the FI_WRITE access flag.
	 */
	ret = fi_mr_reg(fip->domain, fip->laddr, fip->size,
			FI_WRITE, 0, 0, 0, &fip->mr, NULL);
	if (ret) {
		RPMEM_FI_ERR(ret, "registrating memory");
		return ret;
	}

	/* get local memory descriptor */
	fip->mr_desc = fi_mr_desc(fip->mr);

	/* allocate buffer for read operation */
	ASSERT(IS_PAGE_ALIGNED(RPMEM_RD_BUFF_SIZE));
	errno = posix_memalign((void **)&fip->rd_buff, Pagesize,
			RPMEM_RD_BUFF_SIZE);
	if (errno) {
		RPMEM_LOG(ERR, "!allocating read buffer");
		ret = -1;
		goto err_malloc_rd_buff;
	}

	/*
	 * Register buffer for read operation.
	 * The read operation utilizes READ operation thus
	 * the FI_REMOTE_WRITE flag.
	 */
	ret = fi_mr_reg(fip->domain, fip->rd_buff,
			RPMEM_RD_BUFF_SIZE, FI_REMOTE_WRITE,
			0, 0, 0, &fip->rd_mr, NULL);
	if (ret) {
		RPMEM_FI_ERR(ret, "registrating read buffer");
		goto err_rd_mr;
	}

	/* get read buffer local memory descriptor */
	fip->rd_mr_desc = fi_mr_desc(fip->rd_mr);

	return 0;
err_rd_mr:
	free(fip->rd_buff);
err_malloc_rd_buff:
	RPMEM_FI_CLOSE(fip->mr, "unregistering memory");
	return ret;
}
Пример #2
0
/*
 * rpmem_fip_persist_gpspm -- (internal) perform persist operation for GPSPM
 */
static int
rpmem_fip_persist_gpspm(struct rpmem_fip *fip, size_t offset,
	size_t len, unsigned lane)
{
	int ret;
	struct rpmem_fip_plane_gpspm *lanep = &fip->lanes.gpspm[lane];

	ret = rpmem_fip_lane_wait(&lanep->lane, FI_SEND);
	if (unlikely(ret)) {
		RPMEM_LOG(ERR, "waiting for SEND buffer");
		return ret;
	}

	RPMEM_ASSERT(!rpmem_fip_lane_busy(&lanep->lane));

	rpmem_fip_lane_begin(&lanep->lane, FI_SEND | FI_RECV);

	void *laddr = (void *)((uintptr_t)fip->laddr + offset);
	uint64_t raddr = fip->raddr + offset;
	struct rpmem_msg_persist *msg;
	struct rpmem_fip_plane_gpspm *gpspm = (void *)lanep;

	/* WRITE for requested memory region */
	ret = rpmem_fip_writemsg(fip->ep, &gpspm->write, laddr, len, raddr);
	if (unlikely(ret)) {
		RPMEM_FI_ERR((int)ret, "RMA write");
		return ret;
	}

	/* SEND persist message */
	msg = rpmem_fip_msg_get_pmsg(&gpspm->send);
	msg->lane = lane;
	msg->addr = raddr;
	msg->size = len;

	ret = rpmem_fip_sendmsg(fip->ep, &gpspm->send);
	if (unlikely(ret)) {
		RPMEM_FI_ERR(ret, "MSG send");
		return ret;
	}

	/* wait for persist operation completion */
	ret = rpmem_fip_lane_wait(&lanep->lane, FI_RECV);
	if (unlikely(ret)) {
		RPMEM_LOG(ERR, "persist operation failed");
		return ret;
	}

	return ret;
}
Пример #3
0
/*
 * rpmem_fip_init_fabric_res -- (internal) initialize common fabric resources
 */
static int
rpmem_fip_init_fabric_res(struct rpmem_fip *fip)
{
	int ret;
	ret = fi_fabric(fip->fi->fabric_attr, &fip->fabric, NULL);
	if (ret) {
		RPMEM_FI_ERR(ret, "opening fabric domain");
		goto err_fi_fabric;
	}

	ret = fi_domain(fip->fabric, fip->fi, &fip->domain, NULL);
	if (ret) {
		RPMEM_FI_ERR(ret, "opening fabric access domain");
		goto err_fi_domain;
	}

	struct fi_eq_attr eq_attr = {
		.size = 0, /* use default value */
		.flags = 0,
		.wait_obj = FI_WAIT_UNSPEC,
		.signaling_vector = 0,
		.wait_set = NULL,
	};

	ret = fi_eq_open(fip->fabric, &eq_attr, &fip->eq, NULL);
	if (ret) {
		RPMEM_FI_ERR(ret, "opening event queue");
		goto err_eq_open;
	}

	return 0;
err_eq_open:
	RPMEM_FI_CLOSE(fip->domain, "closing fabric access domain");
err_fi_domain:
	RPMEM_FI_CLOSE(fip->fabric, "closing fabric domain");
err_fi_fabric:
	return ret;
}

/*
 * rpmem_fip_fini_fabric_res -- (internal) deinitialize common fabric resources
 */
static void
rpmem_fip_fini_fabric_res(struct rpmem_fip *fip)
{
	RPMEM_FI_CLOSE(fip->eq, "closing event queue");
	RPMEM_FI_CLOSE(fip->domain, "closing fabric access domain");
	RPMEM_FI_CLOSE(fip->fabric, "closing fabric domain");
}
Пример #4
0
/*
 * rpmem_fip_getinfo -- (internal) get fabric interface information
 */
static int
rpmem_fip_getinfo(struct rpmem_fip *fip, const char *node, const char *service,
	enum rpmem_provider provider)
{
	int ret = 0;
	struct fi_info *hints = rpmem_fip_get_hints(provider);
	if (!hints) {
		RPMEM_LOG(ERR, "!getting fabric interface information hints");
		goto err_hints;
	}

	ret = fi_getinfo(RPMEM_FIVERSION, node, service,
			0, hints, &fip->fi);
	if (ret) {
		RPMEM_FI_ERR(ret, "getting fabric interface information");
		goto err_fi_getinfo;
	}

	rpmem_fip_print_info(fip->fi);

	/* fallback to free the hints */
err_fi_getinfo:
	fi_freeinfo(hints);
err_hints:
	return ret;
}
Пример #5
0
/*
 * rpmem_fip_close -- close connection to remote peer
 */
int
rpmem_fip_close(struct rpmem_fip *fip)
{
	int ret;
	int lret = 0;

	ret = fi_shutdown(fip->ep, 0);
	if (ret) {
		RPMEM_FI_ERR(ret, "disconnecting endpoint");
		lret = ret;
	}

	struct fi_eq_cm_entry entry;
	ret = rpmem_fip_read_eq(fip->eq, &entry, FI_SHUTDOWN,
			&fip->ep->fid, -1);
	if (ret)
		lret = ret;

	ret = rpmem_fip_fini_ep(fip);
	if (ret)
		lret = ret;

	ret = rpmem_fip_fini_cq(fip);
	if (ret)
		lret = ret;

	return lret;
}
Пример #6
0
/*
 * rpmem_fip_gpspm_post_resp -- (internal) post persist response message buffer
 */
static inline int
rpmem_fip_gpspm_post_resp(struct rpmem_fip *fip,
	struct rpmem_fip_msg *resp)
{
	int ret = rpmem_fip_recvmsg(fip->ep, resp);
	if (unlikely(ret)) {
		RPMEM_FI_ERR(ret, "posting GPSPM recv buffer");
		return ret;
	}

	return 0;
}
Пример #7
0
/*
 * rpmem_fip_persist_apm -- (internal) perform persist operation for APM
 */
static int
rpmem_fip_persist_apm(struct rpmem_fip *fip, size_t offset,
	size_t len, unsigned lane)
{
	struct rpmem_fip_plane_apm *lanep = &fip->lanes.apm[lane];

	RPMEM_ASSERT(!rpmem_fip_lane_busy(&lanep->lane));

	rpmem_fip_lane_begin(&lanep->lane, FI_READ);

	int ret;
	void *laddr = (void *)((uintptr_t)fip->laddr + offset);
	uint64_t raddr = fip->raddr + offset;

	/* WRITE for requested memory region */
	ret = rpmem_fip_writemsg(fip->ep, &lanep->write, laddr, len, raddr);
	if (unlikely(ret)) {
		RPMEM_FI_ERR(ret, "RMA write");
		return ret;
	}

	/* READ to read-after-write buffer */
	ret = rpmem_fip_readmsg(fip->ep, &lanep->read, &fip->raw_buff,
			sizeof(fip->raw_buff), raddr);
	if (unlikely(ret)) {
		RPMEM_FI_ERR(ret, "RMA read");
		return ret;
	}

	/* wait for READ completion */
	ret = rpmem_fip_lane_wait(&lanep->lane, FI_READ);
	if (unlikely(ret)) {
		RPMEM_LOG(ERR, "waiting for READ completion failed");
		return ret;
	}

	return ret;
}
Пример #8
0
/*
 * rpmem_fip_connect -- connect to remote peer
 */
int
rpmem_fip_connect(struct rpmem_fip *fip)
{
	int ret;
	struct fi_eq_cm_entry entry;

	ret = rpmem_fip_init_cq(fip);
	if (ret)
		goto err_init_cq;

	ret = rpmem_fip_init_ep(fip);
	if (ret)
		goto err_init_ep;

	ret = fip->ops->lanes_post(fip);
	if (ret)
		goto err_lanes_post;

	ret = fi_connect(fip->ep, fip->fi->dest_addr, NULL, 0);
	if (ret) {
		RPMEM_FI_ERR(ret, "initiating connection request");
		goto err_fi_connect;
	}

	ret = rpmem_fip_read_eq(fip->eq, &entry, FI_CONNECTED,
			&fip->ep->fid, -1);
	if (ret)
		goto err_fi_eq_read;

	return 0;
err_fi_eq_read:
err_fi_connect:
err_lanes_post:
	rpmem_fip_fini_ep(fip);
err_init_ep:
	rpmem_fip_fini_cq(fip);
err_init_cq:
	return ret;
}
Пример #9
0
/*
 * rpmem_fip_close -- close connection to remote peer
 */
int
rpmem_fip_close(struct rpmem_fip *fip)
{
	int ret;
	int lret = 0;

	ret = fi_shutdown(fip->ep, 0);
	if (ret) {
		RPMEM_FI_ERR(ret, "disconnecting endpoint");
		lret = ret;
	}

	ret = rpmem_fip_fini_ep(fip);
	if (ret)
		lret = ret;

	ret = rpmem_fip_fini_cq(fip);
	if (ret)
		lret = ret;

	return lret;
}
Пример #10
0
/*
 * rpmem_fip_process_gpspm -- (internal) process completion queue entry for
 * GPSPM
 */
static int
rpmem_fip_process_gpspm(struct rpmem_fip *fip, void *context, uint64_t flags)
{
	if (flags & FI_RECV) {
		/* RECV completion */
		struct rpmem_fip_msg *resp = context;
		struct rpmem_msg_persist_resp *msg_resp =
			rpmem_fip_msg_get_pres(resp);
		VALGRIND_DO_MAKE_MEM_DEFINED(msg_resp, sizeof(*msg_resp));

		if (unlikely(msg_resp->lane >= fip->nlanes)) {
			RPMEM_LOG(ERR, "lane number received (%lu) is greater "
					"than maximum lane number (%u)",
					msg_resp->lane, fip->nlanes - 1);
			return -1;
		}

		struct rpmem_fip_lane *lanep =
			&fip->lanes.gpspm[msg_resp->lane].lane;

		/* post RECV buffer immediately */
		int ret = rpmem_fip_gpspm_post_resp(fip, resp);
		if (unlikely(ret))
			RPMEM_FI_ERR((int)ret, "MSG send");

		rpmem_fip_lane_sigret(lanep, flags, ret);

		return ret;
	}

	struct rpmem_fip_lane *lanep = context;

	/* SEND completion */
	rpmem_fip_lane_signal(lanep, flags);

	return 0;
}
Пример #11
0
/*
 * rpmem_fip_process -- (internal) process completion events
 */
static int
rpmem_fip_process(struct rpmem_fip *fip)
{
	ssize_t sret;
	struct fi_cq_err_entry err;
	const char *str_err;
	int ret;
	struct fi_cq_msg_entry *cq_entries;

	cq_entries = malloc(fip->cq_size * sizeof(*cq_entries));
	if (!cq_entries) {
		RPMEM_LOG(ERR, "!allocating completion queue buffer");
		return -1;
	}

	while (!fip->closing) {
		sret = fi_cq_sread(fip->cq, cq_entries, fip->cq_size,
				NULL, RPMEM_FIP_CQ_WAIT_MS);

		if (unlikely(fip->closing))
			break;

		if (unlikely(sret == -FI_EAGAIN))
			continue;

		if (unlikely(sret < 0)) {
			ret = (int)sret;
			goto err_cq_read;
		}

		for (ssize_t i = 0; i < sret; i++) {
			struct fi_cq_msg_entry *comp = &cq_entries[i];

			/*
			 * If the context is NULL it probably means that
			 * we get an unexpected CQ entry. The CQ is configured
			 * with FI_SELECTIVE_COMPLETION so every inbound or
			 * outbound operation must be issued with FI_COMPLETION
			 * flag and non-NULL context.
			 */
			RPMEM_ASSERT(comp->op_context);

			/* read operation */
			if (unlikely(comp->op_context == &fip->rd_lane)) {
				rpmem_fip_lane_signal(&fip->rd_lane.lane,
						FI_READ);
				continue;
			}

			/* persist operation */
			ret = fip->ops->process(fip, comp->op_context,
					comp->flags);
			if (unlikely(ret)) {
				RPMEM_LOG(ERR, "persist operation failed");
				goto err;
			}
		}
	}

	free(cq_entries);
	return 0;
err_cq_read:
	sret = fi_cq_readerr(fip->cq, &err, 0);
	if (sret < 0) {
		RPMEM_FI_ERR((int)sret, "error reading from completion queue: "
			"cannot read error from event queue");
		goto err;
	}

	str_err = fi_cq_strerror(fip->cq, err.prov_errno, NULL, NULL, 0);
	RPMEM_LOG(ERR, "error reading from completion queue: %s", str_err);
err:
	rpmem_fip_signal_all(fip, ret);
	free(cq_entries);
	return ret;
}
Пример #12
0
/*
 * rpmem_fip_init_lanes_gpspm -- (internal) initialize lanes for GPSPM
 */
static int
rpmem_fip_init_lanes_gpspm(struct rpmem_fip *fip)
{
	int ret = 0;

	/* allocate GPSPM lanes */
	fip->lanes.gpspm = calloc(1, fip->nlanes * sizeof(*fip->lanes.gpspm));
	if (!fip->lanes.gpspm) {
		RPMEM_LOG(ERR, "allocating GPSPM lanes");
		goto err_malloc_lanes;
	}

	/* allocate persist messages buffer */
	size_t msg_size = fip->nlanes * sizeof(struct rpmem_msg_persist);
	fip->pmsg = malloc(msg_size);
	if (!fip->pmsg) {
		RPMEM_LOG(ERR, "!allocating messages buffer");
		ret = -1;
		goto err_malloc_pmsg;
	}

	/*
	 * Register persist messages buffer. The persist messages
	 * are sent to daemon thus the FI_SEND access flag.
	 */
	ret = fi_mr_reg(fip->domain, fip->pmsg, msg_size, FI_SEND,
			0, 0, 0, &fip->pmsg_mr, NULL);
	if (ret) {
		RPMEM_FI_ERR(ret, "registering messages buffer");
		goto err_fi_mr_reg_pmsg;
	}

	/* get persist messages buffer local descriptor */
	fip->pmsg_mr_desc = fi_mr_desc(fip->pmsg_mr);

	/* allocate persist response messages buffer */
	size_t msg_resp_size = fip->nlanes *
				sizeof(struct rpmem_msg_persist_resp);
	fip->pres = malloc(msg_resp_size);
	if (!fip->pres) {
		RPMEM_LOG(ERR, "!allocating messages response buffer");
		ret = -1;
		goto err_malloc_pres;
	}

	/*
	 * Register persist messages response buffer. The persist response
	 * messages are received from daemon thus the FI_RECV access flag.
	 */
	ret = fi_mr_reg(fip->domain, fip->pres, msg_resp_size, FI_RECV,
			0, 0, 0, &fip->pres_mr, NULL);
	if (ret) {
		RPMEM_FI_ERR(ret, "registering messages response buffer");
		goto err_fi_mr_reg_pres;
	}

	/* get persist response messages buffer local descriptor */
	fip->pres_mr_desc = fi_mr_desc(fip->pres_mr);

	/* allocate RECV structures for fi_recvmsg(3) */
	fip->recv = malloc(fip->nlanes * sizeof(*fip->recv));
	if (!fip->recv) {
		RPMEM_LOG(ERR, "!allocating response message iov buffer");
		goto err_malloc_recv;
	}

	/*
	 * Initialize all required structures for:
	 * WRITE, SEND and RECV operations.
	 *
	 * If the completion is required the FI_COMPLETION flag and
	 * appropriate context should be used.
	 *
	 * In GPSPM only the RECV and SEND completions are required.
	 *
	 * For RECV the context is RECV operation structure used for
	 * fi_recvmsg(3) function call.
	 *
	 * For SEND the context is lane structure.
	 *
	 * The received buffer contains a lane id which is used
	 * to obtain a lane which must be signaled that operation
	 * has been completed.
	 */
	unsigned i;
	for (i = 0; i < fip->nlanes; i++) {
		ret = rpmem_fip_lane_init(&fip->lanes.gpspm[i].lane);
		if (ret)
			goto err_lane_init;

		/* WRITE */
		rpmem_fip_rma_init(&fip->lanes.gpspm[i].write,
				fip->mr_desc, 0,
				fip->rkey,
				&fip->lanes.gpspm[i],
				0);

		/* SEND */
		rpmem_fip_msg_init(&fip->lanes.gpspm[i].send,
				fip->pmsg_mr_desc, 0,
				&fip->lanes.gpspm[i],
				&fip->pmsg[i],
				sizeof(fip->pmsg[i]),
				FI_COMPLETION);

		/* RECV */
		rpmem_fip_msg_init(&fip->recv[i],
				fip->pres_mr_desc, 0,
				&fip->recv[i],
				&fip->pres[i],
				sizeof(fip->pres[i]),
				FI_COMPLETION);
	}

	return 0;
err_lane_init:
	for (unsigned j = 0; j < i; j++)
		rpmem_fip_lane_fini(&fip->lanes.gpspm[i].lane);
err_malloc_recv:
	RPMEM_FI_CLOSE(fip->pres_mr, "unregistering messages "
			"response buffer");
err_fi_mr_reg_pres:
	free(fip->pres);
err_malloc_pres:
	RPMEM_FI_CLOSE(fip->pmsg_mr, "unregistering messages buffer");
err_fi_mr_reg_pmsg:
	free(fip->pmsg);
err_malloc_pmsg:
	free(fip->lanes.gpspm);
err_malloc_lanes:
	return ret;
}
Пример #13
0
/*
 * rpmem_fip_init_lanes_apm -- (internal) initialize lanes for APM
 */
static int
rpmem_fip_init_lanes_apm(struct rpmem_fip *fip)
{
	int ret;

	/* allocate APM lanes */
	fip->lanes.apm = calloc(1, fip->nlanes * sizeof(*fip->lanes.apm));
	if (!fip->lanes.apm) {
		RPMEM_LOG(ERR, "!allocating APM lanes");
		goto err_malloc_lanes;
	}

	/* register read-after-write buffer */
	ret = fi_mr_reg(fip->domain, &fip->raw_buff, sizeof(fip->raw_buff),
			FI_REMOTE_WRITE, 0, 0, 0, &fip->raw_mr, NULL);
	if (ret) {
		RPMEM_FI_ERR(ret, "registering APM read buffer");
		goto err_fi_raw_mr;
	}

	/* get read-after-write buffer local descriptor */
	fip->raw_mr_desc = fi_mr_desc(fip->raw_mr);

	/*
	 * Initialize all required structures for:
	 * WRITE and READ operations.
	 *
	 * If the completion is required the FI_COMPLETION flag and
	 * appropriate context should be used.
	 *
	 * In APM only the READ completion is required.
	 * The context is a lane structure.
	 */
	unsigned i;
	for (i = 0; i < fip->nlanes; i++) {
		ret = rpmem_fip_lane_init(&fip->lanes.apm[i].lane);
		if (ret)
			goto err_lane_init;

		/* WRITE */
		rpmem_fip_rma_init(&fip->lanes.apm[i].write,
				fip->mr_desc, 0,
				fip->rkey,
				&fip->lanes.apm[i],
				0);

		/* READ */
		rpmem_fip_rma_init(&fip->lanes.apm[i].read,
				fip->raw_mr_desc, 0,
				fip->rkey,
				&fip->lanes.apm[i],
				FI_COMPLETION);
	}

	return 0;
err_lane_init:
	for (unsigned j = 0; j < i; j++)
		rpmem_fip_lane_fini(&fip->lanes.apm[i].lane);
err_fi_raw_mr:
	free(fip->lanes.apm);
err_malloc_lanes:
	return -1;
}
Пример #14
0
/*
 * rpmem_fip_init_cq -- (internal) initialize completion queue(s)
 */
static int
rpmem_fip_init_cq(struct rpmem_fip *fip)
{
	int ret;

	struct fi_cq_attr cq_attr = {
		.size = fip->cq_size,
		.flags = 0,
		.format = FI_CQ_FORMAT_MSG,
		.wait_obj = FI_WAIT_UNSPEC,
		.signaling_vector = 0,
		.wait_cond = FI_CQ_COND_NONE,
		.wait_set = NULL,
	};

	ret = fi_cq_open(fip->domain, &cq_attr, &fip->cq, NULL);
	if (ret) {
		RPMEM_FI_ERR(ret, "opening completion queue");
		goto err_cq_open;
	}

	return 0;
err_cq_open:
	return -1;
}

/*
 * rpmem_fip_fini_cq -- (internal) deinitialize completion queue(s)
 */
static int
rpmem_fip_fini_cq(struct rpmem_fip *fip)
{
	return RPMEM_FI_CLOSE(fip->cq, "closing completion queue");
}

/*
 * rpmem_fip_init_ep -- (internal) initialize endpoint
 */
static int
rpmem_fip_init_ep(struct rpmem_fip *fip)
{
	int ret;

	/* create an endpoint */
	ret = fi_endpoint(fip->domain, fip->fi, &fip->ep, NULL);
	if (ret) {
		RPMEM_FI_ERR(ret, "allocating endpoint");
		goto err_endpoint;
	}

	/*
	 * Bind an event queue to an endpoint to get
	 * connection-related events for the endpoint.
	 */
	ret = fi_ep_bind(fip->ep, &fip->eq->fid, 0);
	if (ret) {
		RPMEM_FI_ERR(ret, "binding event queue to endpoint");
		goto err_ep_bind_eq;
	}

	/*
	 * Bind a completion queue to an endpoint to get completion
	 * events of specified inbound/outbound operations.
	 *
	 * FI_SELECTIVE_COMPLETION means all inbound/outbound operations
	 * must explicitly specify if the completion event should be
	 * generated or not using FI_COMPLETION flag.
	 *
	 * The completion events received are highly related to the
	 * persistency method used and are configured in lanes
	 * initialization specified for persistency method utilized.
	 */
	ret = fi_ep_bind(fip->ep, &fip->cq->fid,
			FI_RECV | FI_TRANSMIT | FI_SELECTIVE_COMPLETION);
	if (ret) {
		RPMEM_FI_ERR(ret, "binding completion queue to endpoint");
		goto err_ep_bind_cq;
	}

	/*
	 * Enable endpoint so it is possible to post inbound/outbound
	 * operations if required.
	 */
	ret = fi_enable(fip->ep);
	if (ret) {
		RPMEM_FI_ERR(ret, "activating endpoint");
		goto err_fi_enable;
	}

	return 0;
err_fi_enable:
err_ep_bind_cq:
err_ep_bind_eq:
err_endpoint:
	return ret;
}