示例#1
0
int sock_verify_info(struct fi_info *hints)
{
	uint64_t caps;
	enum fi_ep_type ep_type;
	int ret;
	struct sock_domain *domain;
	struct sock_fabric *fabric;

	if (!hints)
		return 0;

	ep_type = hints->ep_attr ? hints->ep_attr->type : FI_EP_UNSPEC;
	switch (ep_type) {
	case FI_EP_UNSPEC:
	case FI_EP_MSG:
		caps = SOCK_EP_MSG_CAP;
		ret = sock_msg_verify_ep_attr(hints->ep_attr,
					      hints->tx_attr,
					      hints->rx_attr);
		break;
	case FI_EP_DGRAM:
		caps = SOCK_EP_DGRAM_CAP;
		ret = sock_dgram_verify_ep_attr(hints->ep_attr,
						hints->tx_attr,
						hints->rx_attr);
		break;
	case FI_EP_RDM:
		caps = SOCK_EP_RDM_CAP;
		ret = sock_rdm_verify_ep_attr(hints->ep_attr,
					      hints->tx_attr,
					      hints->rx_attr);
		break;
	default:
		ret = -FI_ENODATA;
	}
	if (ret)
		return ret;

	if ((caps | hints->caps) != caps) {
		SOCK_LOG_DBG("Unsupported capabilities\n");
		return -FI_ENODATA;
	}

	switch (hints->addr_format) {
	case FI_FORMAT_UNSPEC:
	case FI_SOCKADDR:
	case FI_SOCKADDR_IN:
		break;
	default:
		return -FI_ENODATA;
	}

	if (hints->domain_attr && hints->domain_attr->domain) {
		domain = container_of(hints->domain_attr->domain,
				      struct sock_domain, dom_fid);
		if (!sock_dom_check_list(domain)) {
			SOCK_LOG_DBG("no matching domain\n");
			return -FI_ENODATA;
		}
	}
示例#2
0
static void *_sock_conn_listen(void *arg)
{
	int conn_fd, ret;
	char tmp;
	socklen_t addr_size;
	struct sockaddr_in remote;
	struct pollfd poll_fds[2];

	struct sock_ep_attr *ep_attr = (struct sock_ep_attr *)arg;
	struct sock_conn_listener *listener = &ep_attr->listener;
	struct sock_conn_map *map = &ep_attr->cmap;

	poll_fds[0].fd = listener->sock;
	poll_fds[1].fd = listener->signal_fds[1];
	poll_fds[0].events = poll_fds[1].events = POLLIN;
	listener->is_ready = 1;

	while (listener->do_listen) {
		if (poll(poll_fds, 2, -1) > 0) {
			if (poll_fds[1].revents & POLLIN) {
				ret = ofi_read_socket(listener->signal_fds[1], &tmp, 1);
				if (ret != 1) {
					SOCK_LOG_ERROR("Invalid signal\n");
					goto err;
				}
				continue;
			}
		} else {
			goto err;
		}

		addr_size = sizeof(remote);
		conn_fd = accept(listener->sock, (struct sockaddr *) &remote,
					&addr_size);
		SOCK_LOG_DBG("CONN: accepted conn-req: %d\n", conn_fd);
		if (conn_fd < 0) {
			SOCK_LOG_ERROR("failed to accept: %s\n", strerror(errno));
			goto err;
		}

		SOCK_LOG_DBG("ACCEPT: %s, %d\n", inet_ntoa(remote.sin_addr),
				ntohs(remote.sin_port));

		fastlock_acquire(&map->lock);
		sock_conn_map_insert(ep_attr, &remote, conn_fd, 1);
		fastlock_release(&map->lock);
		sock_pe_signal(ep_attr->domain->pe);
	}

err:
	ofi_close_socket(listener->sock);
	SOCK_LOG_DBG("Listener thread exited\n");
	return NULL;
}
示例#3
0
ssize_t sock_conn_send_src_addr(struct sock_ep_attr *ep_attr, struct sock_tx_ctx *tx_ctx,
				struct sock_conn *conn)
{
	int ret;
	uint64_t total_len;
	struct sock_op tx_op = { 0 };

	tx_op.op = SOCK_OP_CONN_MSG;
	SOCK_LOG_DBG("New conn msg on TX: %p using conn: %p\n", tx_ctx, conn);

	total_len = 0;
	tx_op.src_iov_len = sizeof(struct sockaddr_in);
	total_len = tx_op.src_iov_len + sizeof(struct sock_op_send);

	sock_tx_ctx_start(tx_ctx);
	if (ofi_rbavail(&tx_ctx->rb) < total_len) {
		ret = -FI_EAGAIN;
		goto err;
	}

	sock_tx_ctx_write_op_send(tx_ctx, &tx_op, 0, (uintptr_t) NULL, 0, 0,
				   ep_attr, conn);
	sock_tx_ctx_write(tx_ctx, ep_attr->src_addr, sizeof(struct sockaddr_in));
	sock_tx_ctx_commit(tx_ctx);
	conn->address_published = 1;
	return 0;

err:
	sock_tx_ctx_abort(tx_ctx);
	return ret;
}
struct sock_rx_entry *sock_rx_new_buffered_entry(struct sock_rx_ctx *rx_ctx,
						 size_t len)
{
	struct sock_rx_entry *rx_entry;

	if (rx_ctx->buffered_len + len >= rx_ctx->attr.total_buffered_recv) {
		SOCK_LOG_ERROR("Exceeded buffered recv limit\n");
	}

	rx_entry = calloc(1, sizeof(*rx_entry) + len);
	if (!rx_entry)
		return NULL;

	SOCK_LOG_DBG("New buffered entry:%p len: %lu, ctx: %p\n", 
		       rx_entry, len, rx_ctx);

	rx_entry->is_busy = 1;
	rx_entry->is_buffered = 1;
	rx_entry->rx_op.dest_iov_len = 1;
	rx_entry->iov[0].iov.len = len;
	rx_entry->iov[0].iov.addr = (uintptr_t) (rx_entry + 1);
	rx_entry->total_len = len;
	
	rx_ctx->buffered_len += len;
	dlist_insert_tail(&rx_entry->entry, &rx_ctx->rx_buffered_list);
	rx_entry->is_busy = 1;
	rx_entry->is_tagged = 0;

	return rx_entry;
}
示例#5
0
ssize_t sock_comm_send(struct sock_pe_entry *pe_entry,
		       const void *buf, size_t len)
{
	ssize_t ret, used;

	if (len > pe_entry->cache_sz) {
		used = ofi_rbused(&pe_entry->comm_buf);
		if (used == sock_comm_flush(pe_entry)) {
			return sock_comm_send_socket(pe_entry->conn, buf, len);
		} else {
			return 0;
		}
	}

	if (ofi_rbavail(&pe_entry->comm_buf) < len) {
		ret = sock_comm_flush(pe_entry);
		if (ret <= 0)
			return 0;
	}

	ret = MIN(ofi_rbavail(&pe_entry->comm_buf), len);
	ofi_rbwrite(&pe_entry->comm_buf, buf, ret);
	ofi_rbcommit(&pe_entry->comm_buf);
	SOCK_LOG_DBG("buffered %lu\n", ret);
	return ret;
}
示例#6
0
ssize_t sock_comm_peek(struct sock_conn *conn, void *buf, size_t len)
{
	ssize_t ret;
	ret = ofi_recv_socket(conn->sock_fd, buf, len, MSG_PEEK);
	if (ret == 0) {
		conn->connected = 0;
		SOCK_LOG_DBG("Disconnected\n");
		return ret;
	}

	if (ret < 0) {
		SOCK_LOG_DBG("peek %s\n", strerror(ofi_sockerr()));
		ret = 0;
	}

	if (ret > 0)
		SOCK_LOG_DBG("peek from network: %lu\n", ret);
	return ret;
}
示例#7
0
fi_addr_t sock_av_lookup_key(struct sock_av *av, int key)
{
	int i;
	struct sock_av_addr *av_addr;
	struct sock_conn_map *cmap;

	cmap = av->cmap;
	for (i = 0; i < av->table_hdr->stored; i++) {
		av_addr = &av->table[i];
		if (sock_compare_addr(&cmap->table[key].addr, 
				      (struct sockaddr_in*)&av_addr->addr)) {
			SOCK_LOG_DBG("LOOKUP: (%d->%d)\n", key, i);
			return i;
		}
	}
	
	SOCK_LOG_DBG("Reverse-LOOKUP failed: %d, %s:%d\n", key,
		       inet_ntoa(cmap->table[key].addr.sin_addr),
		       ntohs(cmap->table[key].addr.sin_port));
	return FI_ADDR_NOTAVAIL;
}
示例#8
0
static ssize_t sock_comm_recv_socket(struct sock_conn *conn,
			      void *buf, size_t len)
{
	ssize_t ret;
	ret = ofi_recv_socket(conn->sock_fd, buf, len, 0);
	if (ret == 0) {
		conn->connected = 0;
		SOCK_LOG_DBG("Disconnected: %s:%d\n",
			     inet_ntoa(conn->addr.sin_addr),
			     ntohs(conn->addr.sin_port));
		return ret;
	}

	if (ret < 0) {
		SOCK_LOG_DBG("read %s\n", strerror(ofi_sockerr()));
		ret = 0;
	}

	if (ret > 0)
		SOCK_LOG_DBG("read from network: %lu\n", ret);
	return ret;
}
示例#9
0
int sock_av_get_addr_index(struct sock_av *av, struct sockaddr_in *addr)
{
	int i;
	struct sock_av_addr *av_addr;

	for (i = 0; i < av->table_hdr->stored; i++) {
		av_addr = &av->table[i];
		if (ofi_equals_sockaddr(addr, (struct sockaddr_in *)&av_addr->addr))
			return i;
	}
	SOCK_LOG_DBG("failed to get index in AV\n");
	return -1;
}
示例#10
0
static ssize_t sock_eq_sread(struct fid_eq *eq, uint32_t *event, void *buf, size_t len,
		      int timeout, uint64_t flags)
{
	int ret;
	struct sock_eq *sock_eq;
	struct dlist_entry *list;
	struct sock_eq_entry *entry;

	sock_eq = container_of(eq, struct sock_eq, eq);
	sock_eq_clean_err_data_list(sock_eq, 0);
	if (!dlistfd_empty(&sock_eq->err_list)) {
		return -FI_EAVAIL;
	}
	
	if (dlistfd_empty(&sock_eq->list)) {
		if(!timeout) {
			SOCK_LOG_DBG("Nothing to read from eq!\n");
			return -FI_EAGAIN;
		}
		ret = dlistfd_wait_avail(&sock_eq->list, timeout);
		if (!dlistfd_empty(&sock_eq->err_list)) {
			return -FI_EAVAIL;
		}
		if (ret <= 0)
			return (ret == 0 || ret == -FI_ETIMEDOUT) ? 
				-FI_EAGAIN : ret;
	}

	fastlock_acquire(&sock_eq->lock);
	list = sock_eq->list.list.next;
	entry = container_of(list, struct sock_eq_entry, entry);

	if (entry->len > len) {
		ret = -FI_ETOOSMALL;
		goto out;
	}

	ret = entry->len;
	*event = entry->type;
	memcpy(buf, entry->event, entry->len);

	if (!(flags & FI_PEEK)) {
		dlistfd_remove(list, &sock_eq->list);
		free(entry);
	}

out:
	fastlock_release(&sock_eq->lock);
	return (ret == 0 || ret == -FI_ETIMEDOUT) ? -FI_EAGAIN : ret;
}
示例#11
0
static ssize_t sock_comm_send_socket(struct sock_conn *conn,
				     const void *buf, size_t len)
{
	ssize_t ret;

	ret = ofi_send_socket(conn->sock_fd, buf, len, MSG_NOSIGNAL);
	if (ret < 0) {
		if (OFI_SOCK_TRY_SND_RCV_AGAIN(ofi_sockerr())) {
			ret = 0;
		} else if (ofi_sockerr() == EPIPE) {
			conn->connected = 0;
			SOCK_LOG_DBG("Disconnected: %s:%d\n",
				     inet_ntoa(conn->addr.sin_addr),
				     ntohs(conn->addr.sin_port));
		} else {
			SOCK_LOG_DBG("write error: %s\n",
				     strerror(ofi_sockerr()));
		}
	}
	if (ret > 0)
		SOCK_LOG_DBG("wrote to network: %lu\n", ret);
	return ret;
}
示例#12
0
ssize_t sock_comm_recv(struct sock_pe_entry *pe_entry, void *buf, size_t len)
{
	ssize_t read_len;
	if (ofi_rbempty(&pe_entry->comm_buf)) {
		if (len <= pe_entry->cache_sz) {
			sock_comm_recv_buffer(pe_entry);
		} else {
			return sock_comm_recv_socket(pe_entry->conn, buf, len);
		}
	}

	read_len = MIN(len, ofi_rbused(&pe_entry->comm_buf));
	ofi_rbread(&pe_entry->comm_buf, buf, read_len);
	SOCK_LOG_DBG("read from buffer: %lu\n", read_len);
	return read_len;
}
示例#13
0
/* FIXME: pool of rx_entry */
struct sock_rx_entry *sock_rx_new_entry(struct sock_rx_ctx *rx_ctx)
{
	struct sock_rx_entry *rx_entry;

	rx_entry = calloc(1, sizeof(*rx_entry));
	if (!rx_entry)
		return NULL;
	
	rx_entry->is_tagged = 0;
	SOCK_LOG_DBG("New rx_entry: %p, ctx: %p\n", rx_entry, rx_ctx);
	dlist_init(&rx_entry->entry);

	fastlock_acquire(&rx_ctx->lock);
	rx_ctx->num_left--;
	fastlock_release(&rx_ctx->lock);
	
	return rx_entry;
}
示例#14
0
static int sock_rdm_verify_rx_attr(const struct fi_rx_attr *attr)
{
	if (!attr)
		return 0;

	if ((attr->caps | SOCK_EP_RDM_CAP) != SOCK_EP_RDM_CAP) {
		SOCK_LOG_DBG("Unsupported RDM rx caps\n");
		return -FI_ENODATA;
	}

	if ((attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER) {
		SOCK_LOG_DBG("Unsuported rx message order\n");
		return -FI_ENODATA;
	}

	if ((attr->comp_order | SOCK_EP_COMP_ORDER) != SOCK_EP_COMP_ORDER) {
		SOCK_LOG_DBG("Unsuported rx completion order\n");
		return -FI_ENODATA;
	}

	if (attr->total_buffered_recv > sock_rdm_rx_attr.total_buffered_recv) {
		SOCK_LOG_DBG("Buffered receive size too large\n");
		return -FI_ENODATA;
	}

	if (attr->size > sock_rdm_rx_attr.size) {
		SOCK_LOG_DBG("Rx size too large\n");
		return -FI_ENODATA;
	}

	if (attr->iov_limit > sock_rdm_rx_attr.iov_limit) {
		SOCK_LOG_DBG("Rx iov limit too large\n");
		return -FI_ENODATA;
	}

	return 0;
}
示例#15
0
static int sock_rdm_verify_tx_attr(const struct fi_tx_attr *attr)
{
	if (!attr)
		return 0;

	if ((attr->caps | SOCK_EP_RDM_CAP) != SOCK_EP_RDM_CAP) {
		SOCK_LOG_DBG("Unsupported RDM tx caps\n");
		return -FI_ENODATA;
	}

	if ((attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER) {
		SOCK_LOG_DBG("Unsupported tx message order\n");
		return -FI_ENODATA;
	}

	if (attr->inject_size > sock_rdm_tx_attr.inject_size) {
		SOCK_LOG_DBG("Inject size too large\n");
		return -FI_ENODATA;
	}

	if (attr->size > sock_rdm_tx_attr.size) {
		SOCK_LOG_DBG("Tx size too large\n");
		return -FI_ENODATA;
	}

	if (attr->iov_limit > sock_rdm_tx_attr.iov_limit) {
		SOCK_LOG_DBG("Tx iov limit too large\n");
		return -FI_ENODATA;
	}

	if (attr->rma_iov_limit > sock_rdm_tx_attr.rma_iov_limit) {
		SOCK_LOG_DBG("RMA iov limit too large\n");
		return -FI_ENODATA;
	}

	return 0;
}
示例#16
0
static int sock_ep_close(struct fid *fid)
{
	struct sock_ep *sock_ep;
	char c = 0;

	switch (fid->fclass) {
	case FI_CLASS_EP:
		sock_ep = container_of(fid, struct sock_ep, ep.fid);
		break;

	case FI_CLASS_SEP:
		sock_ep = container_of(fid, struct sock_ep, ep.fid);
		break;

	default:
		return -FI_EINVAL;
	}

	if (sock_ep->is_alias) {
		ofi_atomic_dec32(&sock_ep->attr->ref);
		return 0;
	}
	if (ofi_atomic_get32(&sock_ep->attr->ref) ||
	    ofi_atomic_get32(&sock_ep->attr->num_rx_ctx) ||
	    ofi_atomic_get32(&sock_ep->attr->num_tx_ctx))
		return -FI_EBUSY;

	if (sock_ep->attr->ep_type == FI_EP_MSG) {
		sock_ep->attr->cm.do_listen = 0;
		if (ofi_write_socket(sock_ep->attr->cm.signal_fds[0], &c, 1) != 1)
			SOCK_LOG_DBG("Failed to signal\n");

		if (sock_ep->attr->cm.listener_thread &&
			pthread_join(sock_ep->attr->cm.listener_thread, NULL)) {
			SOCK_LOG_ERROR("pthread join failed (%d)\n",
				       ofi_syserr());
		}
		ofi_close_socket(sock_ep->attr->cm.signal_fds[0]);
		ofi_close_socket(sock_ep->attr->cm.signal_fds[1]);
	} else {
		if (sock_ep->attr->av)
			ofi_atomic_dec32(&sock_ep->attr->av->ref);
	}
	if (sock_ep->attr->av) {
		fastlock_acquire(&sock_ep->attr->av->list_lock);
		fid_list_remove(&sock_ep->attr->av->ep_list,
				&sock_ep->attr->lock, &sock_ep->ep.fid);
		fastlock_release(&sock_ep->attr->av->list_lock);
	}

	pthread_mutex_lock(&sock_ep->attr->domain->pe->list_lock);
	if (sock_ep->attr->tx_shared) {
		fastlock_acquire(&sock_ep->attr->tx_ctx->lock);
		dlist_remove(&sock_ep->attr->tx_ctx_entry);
		fastlock_release(&sock_ep->attr->tx_ctx->lock);
	}

	if (sock_ep->attr->rx_shared) {
		fastlock_acquire(&sock_ep->attr->rx_ctx->lock);
		dlist_remove(&sock_ep->attr->rx_ctx_entry);
		fastlock_release(&sock_ep->attr->rx_ctx->lock);
	}
	pthread_mutex_unlock(&sock_ep->attr->domain->pe->list_lock);

	if (sock_ep->attr->conn_handle.do_listen) {
		fastlock_acquire(&sock_ep->attr->domain->conn_listener.signal_lock);
		fi_epoll_del(sock_ep->attr->domain->conn_listener.emap,
		             sock_ep->attr->conn_handle.sock);
		fastlock_release(&sock_ep->attr->domain->conn_listener.signal_lock);
		ofi_close_socket(sock_ep->attr->conn_handle.sock);
		sock_ep->attr->conn_handle.do_listen = 0;
	}

	fastlock_destroy(&sock_ep->attr->cm.lock);

	if (sock_ep->attr->eq) {
		fastlock_acquire(&sock_ep->attr->eq->lock);
		sock_ep_clear_eq_list(&sock_ep->attr->eq->list,
				      &sock_ep->ep);
		/* Any err_data if present would be freed by
		 * sock_eq_clean_err_data_list when EQ is closed */
		sock_ep_clear_eq_list(&sock_ep->attr->eq->err_list,
				      &sock_ep->ep);
		fastlock_release(&sock_ep->attr->eq->lock);
	}

	if (sock_ep->attr->fclass != FI_CLASS_SEP) {
		if (!sock_ep->attr->tx_shared)
			sock_pe_remove_tx_ctx(sock_ep->attr->tx_array[0]);

		sock_tx_ctx_close(sock_ep->attr->tx_array[0]);
		sock_tx_ctx_free(sock_ep->attr->tx_array[0]);
	}

	if (sock_ep->attr->fclass != FI_CLASS_SEP) {
		if (!sock_ep->attr->rx_shared)
			sock_pe_remove_rx_ctx(sock_ep->attr->rx_array[0]);

		sock_rx_ctx_close(sock_ep->attr->rx_array[0]);
		sock_rx_ctx_free(sock_ep->attr->rx_array[0]);
	}

	free(sock_ep->attr->tx_array);
	free(sock_ep->attr->rx_array);

	if (sock_ep->attr->src_addr)
		free(sock_ep->attr->src_addr);
	if (sock_ep->attr->dest_addr)
		free(sock_ep->attr->dest_addr);

	fastlock_acquire(&sock_ep->attr->domain->pe->lock);
	ofi_idm_reset(&sock_ep->attr->av_idm);
	sock_conn_map_destroy(sock_ep->attr);
	fastlock_release(&sock_ep->attr->domain->pe->lock);

	ofi_atomic_dec32(&sock_ep->attr->domain->ref);
	fastlock_destroy(&sock_ep->attr->lock);
	free(sock_ep->attr);
	free(sock_ep);
	return 0;
}
示例#17
0
static int sock_check_table_in(struct sock_av *_av, struct sockaddr_in *addr,
			       fi_addr_t *fi_addr, int count, uint64_t flags,
			       void *context, int index)
{
	void *new_addr;
	int i, j, ret = 0;
	char sa_ip[INET_ADDRSTRLEN];
	struct sock_av_addr *av_addr;
	size_t new_count, table_sz, old_sz;

	if ((_av->attr.flags & FI_EVENT) && !_av->eq)
		return -FI_ENOEQ;

	if (_av->attr.flags & FI_READ) {
		for (i = 0; i < count; i++) {
			for (j = 0; j < _av->table_hdr->stored; j++) {

				if (!sock_av_is_valid_address(&addr[i])) {
					if (fi_addr)
						fi_addr[i] = FI_ADDR_NOTAVAIL;
					sock_av_report_error(_av, context, i,
								FI_EINVAL);
					continue;
				}

				av_addr = &_av->table[j];
				if (memcmp(&av_addr->addr, &addr[i],
					   sizeof(struct sockaddr_in)) == 0) {
					SOCK_LOG_DBG("Found addr in shared av\n");
					if (fi_addr)
						fi_addr[i] = (fi_addr_t)j;
					ret++;
				}
			}
		}
		sock_av_report_success(_av, context, ret, flags);
		return (_av->attr.flags & FI_EVENT) ? 0 : ret;
	}

	for (i = 0, ret = 0; i < count; i++) {
		if (_av->table_hdr->stored == _av->table_hdr->size) {
			if (_av->table_hdr->req_sz) {
				if (fi_addr)
					fi_addr[i] = FI_ADDR_NOTAVAIL;
				sock_av_report_error(_av, context, i, FI_ENOSPC);
				SOCK_LOG_ERROR("Cannot insert to AV table\n");
				continue;
			} else {
				new_count = _av->table_hdr->size * 2;
				table_sz = SOCK_AV_TABLE_SZ(new_count, _av->attr.name);
				old_sz = SOCK_AV_TABLE_SZ(_av->table_hdr->size, _av->attr.name);

				if (_av->attr.name) {
					new_addr = sock_mremap(_av->table_hdr,
							       old_sz, table_sz);
					if (new_addr == MAP_FAILED) {
						if (fi_addr)
							fi_addr[i] = FI_ADDR_NOTAVAIL;
						sock_av_report_error(_av,
							context, i, FI_ENOMEM);
						continue;
					}
					_av->idx_arr[_av->table_hdr->stored] = _av->table_hdr->stored;
				} else {
					new_addr = realloc(_av->table_hdr,
								table_sz);
					if (!new_addr) {
						if (fi_addr)
							fi_addr[i] = FI_ADDR_NOTAVAIL;
						sock_av_report_error(_av,
							context, i, FI_ENOMEM);
						continue;
					}
				}
				_av->table_hdr = new_addr;
				_av->table_hdr->size = new_count;
				sock_update_av_table(_av, new_count);
			}
		}

		if (!sock_av_is_valid_address(&addr[i])) {
			if (fi_addr)
				fi_addr[i] = FI_ADDR_NOTAVAIL;
			sock_av_report_error(_av, context, i, FI_EINVAL);
			continue;
		}

		av_addr = &_av->table[_av->table_hdr->stored];
		memcpy(sa_ip, inet_ntoa((&addr[i])->sin_addr), INET_ADDRSTRLEN);
		SOCK_LOG_DBG("AV-INSERT:dst_addr: family: %d, IP is %s, port: %d\n",
			      ((struct sockaddr_in *)&addr[i])->sin_family,
				sa_ip, ntohs(((struct sockaddr_in *)&addr[i])->sin_port));

		memcpy(&av_addr->addr, &addr[i], sizeof(struct sockaddr_in));
		if (fi_addr)
			fi_addr[i] = (fi_addr_t)_av->table_hdr->stored;

		av_addr->valid = 1;
		_av->table_hdr->stored++;
		ret++;
	}
	sock_av_report_success(_av, context, ret, flags);
	return (_av->attr.flags & FI_EVENT) ? 0 : ret;
}
示例#18
0
int sock_av_open(struct fid_domain *domain, struct fi_av_attr *attr,
		 struct fid_av **av, void *context)
{
	int ret = 0;
	struct sock_domain *dom;
	struct sock_av *_av;
	size_t table_sz;

	if (!attr || sock_verify_av_attr(attr))
		return -FI_EINVAL;

	if (attr->type == FI_AV_UNSPEC)
		attr->type = FI_AV_TABLE;

	dom = container_of(domain, struct sock_domain, dom_fid);
	if (dom->attr.av_type != FI_AV_UNSPEC &&
	    dom->attr.av_type != attr->type)
		return -FI_EINVAL;

	_av = calloc(1, sizeof(*_av));
	if (!_av)
		return -FI_ENOMEM;

	_av->attr = *attr;
	_av->attr.count = (attr->count) ? attr->count : sock_av_def_sz;
	table_sz = SOCK_AV_TABLE_SZ(_av->attr.count, attr->name);

	if (attr->name) {
		ret = ofi_shm_map(&_av->shm, attr->name, table_sz,
				attr->flags & FI_READ, (void**)&_av->table_hdr);

		if (ret || _av->table_hdr == MAP_FAILED) {
			SOCK_LOG_ERROR("map failed\n");
			ret = -FI_EINVAL;
			goto err;
		}

		_av->idx_arr = (uint64_t *)(_av->table_hdr + 1);
		_av->attr.map_addr = _av->idx_arr;
		attr->map_addr = _av->attr.map_addr;
		SOCK_LOG_DBG("Updating map_addr: %p\n", _av->attr.map_addr);

		if (attr->flags & FI_READ) {
			if (_av->table_hdr->size != _av->attr.count) {
				ret = -FI_EINVAL;
				goto err2;
			}
		} else {
			_av->table_hdr->size = _av->attr.count;
			_av->table_hdr->stored = 0;
		}
		_av->shared = 1;
	} else {
		_av->table_hdr = calloc(1, table_sz);
		if (!_av->table_hdr) {
			ret = -FI_ENOMEM;
			goto err;
		}
		_av->table_hdr->size = _av->attr.count;
		_av->table_hdr->req_sz = attr->count;
	}
	sock_update_av_table(_av, _av->attr.count);

	_av->av_fid.fid.fclass = FI_CLASS_AV;
	_av->av_fid.fid.context = context;
	_av->av_fid.fid.ops = &sock_av_fi_ops;

	switch (attr->type) {
	case FI_AV_MAP:
		_av->av_fid.ops = &sock_am_ops;
		break;
	case FI_AV_TABLE:
		_av->av_fid.ops = &sock_at_ops;
		break;
	default:
		ret = -FI_EINVAL;
		goto err2;
	}

	atomic_initialize(&_av->ref, 0);
	atomic_inc(&dom->ref);
	_av->domain = dom;
	switch (dom->info.addr_format) {
	case FI_SOCKADDR_IN:
		_av->addrlen = sizeof(struct sockaddr_in);
		break;
	default:
		SOCK_LOG_ERROR("Invalid address format: only IPv4 supported\n");
		ret = -FI_EINVAL;
		goto err2;
	}
	_av->rx_ctx_bits = attr->rx_ctx_bits;
	_av->mask = attr->rx_ctx_bits ?
		((uint64_t)1 << (64 - attr->rx_ctx_bits)) - 1 : ~0;
	*av = &_av->av_fid;
	return 0;

err2:
	if(attr->name) {
		ofi_shm_unmap(&_av->shm);
	} else {
		if(_av->table_hdr && _av->table_hdr != MAP_FAILED)
			free(_av->table_hdr);
	}
err:
	free(_av);
	return ret;
}
示例#19
0
void sock_rx_release_entry(struct sock_rx_entry *rx_entry)
{
	SOCK_LOG_DBG("Releasing rx_entry: %p\n", rx_entry);
	free(rx_entry);
}
示例#20
0
static int sock_ep_close(struct fid *fid)
{
	struct sock_ep *sock_ep;
	char c = 0;

	switch (fid->fclass) {
	case FI_CLASS_EP:
		sock_ep = container_of(fid, struct sock_ep, ep.fid);
		break;

	case FI_CLASS_SEP:
		sock_ep = container_of(fid, struct sock_ep, ep.fid);
		break;

	default:
		return -FI_EINVAL;
	}

	if (sock_ep->is_alias) {
		atomic_dec(&sock_ep->attr->ref);
		return 0;
	}
	if (atomic_get(&sock_ep->attr->ref) || atomic_get(&sock_ep->attr->num_rx_ctx) ||
	    atomic_get(&sock_ep->attr->num_tx_ctx))
		return -FI_EBUSY;

	if (sock_ep->attr->ep_type == FI_EP_MSG) {
		sock_ep->attr->cm.do_listen = 0;
		if (ofi_write_socket(sock_ep->attr->cm.signal_fds[0], &c, 1) != 1)
			SOCK_LOG_DBG("Failed to signal\n");

		if (sock_ep->attr->cm.listener_thread &&
		    pthread_join(sock_ep->attr->cm.listener_thread, NULL)) {
			SOCK_LOG_ERROR("pthread join failed (%d)\n", errno);
		}
		ofi_close_socket(sock_ep->attr->cm.signal_fds[0]);
		ofi_close_socket(sock_ep->attr->cm.signal_fds[1]);
	} else {
		if (sock_ep->attr->av)
			atomic_dec(&sock_ep->attr->av->ref);
	}

	pthread_mutex_lock(&sock_ep->attr->domain->pe->list_lock);
	if (sock_ep->attr->tx_shared) {
		fastlock_acquire(&sock_ep->attr->tx_ctx->lock);
		dlist_remove(&sock_ep->attr->tx_ctx_entry);
		fastlock_release(&sock_ep->attr->tx_ctx->lock);
	}

	if (sock_ep->attr->rx_shared) {
		fastlock_acquire(&sock_ep->attr->rx_ctx->lock);
		dlist_remove(&sock_ep->attr->rx_ctx_entry);
		fastlock_release(&sock_ep->attr->rx_ctx->lock);
	}
	pthread_mutex_unlock(&sock_ep->attr->domain->pe->list_lock);

	if (sock_ep->attr->listener.do_listen) {
		sock_ep->attr->listener.do_listen = 0;
		if (ofi_write_socket(sock_ep->attr->listener.signal_fds[0], &c, 1) != 1)
			SOCK_LOG_DBG("Failed to signal\n");

		if (sock_ep->attr->listener.listener_thread &&
		     pthread_join(sock_ep->attr->listener.listener_thread, NULL)) {
			SOCK_LOG_ERROR("pthread join failed (%d)\n", errno);
		}

		ofi_close_socket(sock_ep->attr->listener.signal_fds[0]);
		ofi_close_socket(sock_ep->attr->listener.signal_fds[1]);
	}

	fastlock_destroy(&sock_ep->attr->cm.lock);

	if (sock_ep->attr->fclass != FI_CLASS_SEP) {
		if (!sock_ep->attr->tx_shared)
			sock_pe_remove_tx_ctx(sock_ep->attr->tx_array[0]);

		sock_tx_ctx_close(sock_ep->attr->tx_array[0]);
		sock_tx_ctx_free(sock_ep->attr->tx_array[0]);
	}

	if (sock_ep->attr->fclass != FI_CLASS_SEP) {
		if (!sock_ep->attr->rx_shared)
			sock_pe_remove_rx_ctx(sock_ep->attr->rx_array[0]);

		sock_rx_ctx_close(sock_ep->attr->rx_array[0]);
		sock_rx_ctx_free(sock_ep->attr->rx_array[0]);
	}

	idm_reset(&sock_ep->attr->conn_idm);
	idm_reset(&sock_ep->attr->av_idm);

	free(sock_ep->attr->tx_array);
	free(sock_ep->attr->rx_array);

	if (sock_ep->attr->src_addr)
		free(sock_ep->attr->src_addr);
	if (sock_ep->attr->dest_addr)
		free(sock_ep->attr->dest_addr);

	sock_conn_map_destroy(&sock_ep->attr->cmap);
	atomic_dec(&sock_ep->attr->domain->ref);
	fastlock_destroy(&sock_ep->attr->lock);
	free(sock_ep->attr);
	free(sock_ep);
	return 0;
}
示例#21
0
int sock_verify_domain_attr(struct fi_domain_attr *attr)
{
	if (!attr)
		return 0;

	if (attr->name) {
		if (strcmp(attr->name, sock_dom_name))
			return -FI_ENODATA;
	}

	switch (attr->threading) {
	case FI_THREAD_UNSPEC:
	case FI_THREAD_SAFE:
	case FI_THREAD_FID:
	case FI_THREAD_DOMAIN:
	case FI_THREAD_COMPLETION:
	case FI_THREAD_ENDPOINT:
		break;
	default:
		SOCK_LOG_DBG("Invalid threading model!\n");
		return -FI_ENODATA;
	}

	switch (attr->control_progress) {
	case FI_PROGRESS_UNSPEC:
	case FI_PROGRESS_AUTO:
	case FI_PROGRESS_MANUAL:
		break;

	default:
		SOCK_LOG_DBG("Control progress mode not supported!\n");
		return -FI_ENODATA;
	}

	switch (attr->data_progress) {
	case FI_PROGRESS_UNSPEC:
	case FI_PROGRESS_AUTO:
	case FI_PROGRESS_MANUAL:
		break;

	default:
		SOCK_LOG_DBG("Data progress mode not supported!\n");
		return -FI_ENODATA;
	}

	switch (attr->resource_mgmt) {
	case FI_RM_UNSPEC:
	case FI_RM_DISABLED:
	case FI_RM_ENABLED:
		break;

	default:
		SOCK_LOG_DBG("Resource mgmt not supported!\n");
		return -FI_ENODATA;
	}

	switch (attr->av_type) {
	case FI_AV_UNSPEC:
	case FI_AV_MAP:
	case FI_AV_TABLE:
		break;

	default:
		SOCK_LOG_DBG("AV type not supported!\n");
		return -FI_ENODATA;
	}

	switch (attr->mr_mode) {
	case FI_MR_UNSPEC:
	case FI_MR_BASIC:
	case FI_MR_SCALABLE:
		break;
	default:
		SOCK_LOG_DBG("MR mode not supported\n");
		return -FI_ENODATA;
	}

	if (attr->mr_key_size > sock_domain_attr.mr_key_size)
		return -FI_ENODATA;

	if (attr->cq_data_size > sock_domain_attr.cq_data_size)
		return -FI_ENODATA;

	if (attr->cq_cnt > sock_domain_attr.cq_cnt)
		return -FI_ENODATA;

	if (attr->ep_cnt > sock_domain_attr.ep_cnt)
		return -FI_ENODATA;

	if (attr->max_ep_tx_ctx > sock_domain_attr.max_ep_tx_ctx)
		return -FI_ENODATA;

	if (attr->max_ep_rx_ctx > sock_domain_attr.max_ep_rx_ctx)
		return -FI_ENODATA;

	return 0;
}
示例#22
0
ssize_t sock_ep_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
                        uint64_t flags)
{
    int ret, i;
    uint64_t total_len;
    struct sock_op tx_op;
    union sock_iov tx_iov;
    struct sock_conn *conn;
    struct sock_tx_ctx *tx_ctx;
    struct sock_ep *sock_ep;

    switch (ep->fid.fclass) {
    case FI_CLASS_EP:
        sock_ep = container_of(ep, struct sock_ep, ep);
        tx_ctx = sock_ep->tx_ctx;
        break;
    case FI_CLASS_TX_CTX:
        tx_ctx = container_of(ep, struct sock_tx_ctx, fid.ctx);
        sock_ep = tx_ctx->ep;
        break;
    default:
        SOCK_LOG_ERROR("Invalid EP type\n");
        return -FI_EINVAL;
    }

#if ENABLE_DEBUG
    if (msg->iov_count > SOCK_EP_MAX_IOV_LIMIT)
        return -FI_EINVAL;
#endif

    if (!tx_ctx->enabled)
        return -FI_EOPBADSTATE;

    if (sock_drop_packet(sock_ep))
        return 0;

    ret = sock_ep_get_conn(sock_ep, tx_ctx, msg->addr, &conn);
    if (ret)
        return ret;

    SOCK_LOG_DBG("New sendmsg on TX: %p using conn: %p\n",
                 tx_ctx, conn);

    SOCK_EP_SET_TX_OP_FLAGS(flags);
    if (flags & SOCK_USE_OP_FLAGS)
        flags |= tx_ctx->attr.op_flags;

    if (sock_ep_is_send_cq_low(&tx_ctx->comp, flags)) {
        SOCK_LOG_ERROR("CQ size low\n");
        return -FI_EAGAIN;
    }

    if (flags & FI_TRIGGER) {
        ret = sock_queue_msg_op(ep, msg, flags, SOCK_OP_SEND);
        if (ret != 1)
            return ret;
    }

    memset(&tx_op, 0, sizeof(struct sock_op));
    tx_op.op = SOCK_OP_SEND;

    total_len = 0;
    if (flags & FI_INJECT) {
        for (i = 0; i < msg->iov_count; i++)
            total_len += msg->msg_iov[i].iov_len;

        if (total_len > SOCK_EP_MAX_INJECT_SZ) {
            ret = -FI_EINVAL;
            goto err;
        }
        tx_op.src_iov_len = total_len;
    } else {
        tx_op.src_iov_len = msg->iov_count;
        total_len = msg->iov_count * sizeof(union sock_iov);
    }

    total_len += sizeof(struct sock_op_send);

    if (flags & FI_REMOTE_CQ_DATA)
        total_len += sizeof(uint64_t);

    sock_tx_ctx_start(tx_ctx);
    if (rbavail(&tx_ctx->rb) < total_len) {
        ret = -FI_EAGAIN;
        goto err;
    }

    sock_tx_ctx_write_op_send(tx_ctx, &tx_op, flags, (uintptr_t) msg->context,
                              msg->addr, (uintptr_t) msg->msg_iov[0].iov_base,
                              sock_ep, conn);

    if (flags & FI_REMOTE_CQ_DATA)
        sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(msg->data));

    if (flags & FI_INJECT) {
        for (i = 0; i < msg->iov_count; i++) {
            sock_tx_ctx_write(tx_ctx, msg->msg_iov[i].iov_base,
                              msg->msg_iov[i].iov_len);
        }
    } else {
        for (i = 0; i < msg->iov_count; i++) {
            tx_iov.iov.addr = (uintptr_t) msg->msg_iov[i].iov_base;
            tx_iov.iov.len = msg->msg_iov[i].iov_len;
            sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
        }
    }

    sock_tx_ctx_commit(tx_ctx);
    return 0;

err:
    sock_tx_ctx_abort(tx_ctx);
    return ret;
}
示例#23
0
int sock_av_open(struct fid_domain *domain, struct fi_av_attr *attr,
		 struct fid_av **av, void *context)
{
	int ret = 0;
	struct sock_domain *dom;
	struct sock_av *_av;
	size_t table_sz, i;
	uint64_t flags = O_RDWR;

	if (!attr || sock_verify_av_attr(attr))
		return -FI_EINVAL;

	dom = container_of(domain, struct sock_domain, dom_fid);
	if (dom->attr.av_type != FI_AV_UNSPEC && attr &&
	    dom->attr.av_type != attr->type)
		return -FI_EINVAL;

	_av = calloc(1, sizeof(*_av));
	if (!_av)
		return -FI_ENOMEM;

	_av->attr = *attr;
	_av->attr.count = (attr->count) ? attr->count : sock_av_def_sz;

	table_sz = sizeof(struct sock_av_table_hdr) +
		_av->attr.count * sizeof(struct sock_av_addr);

	if (attr->name) {
		_av->name = strdup(attr->name);
		if (!_av->name) {
			ret = -FI_ENOMEM;
			goto err1;
		}
		if (!(attr->flags & FI_READ))
			flags |= O_CREAT;

		for (i = 0; i < strlen(_av->name); i++)
			if (_av->name[i] == ' ')
				_av->name[i] = '_';

		SOCK_LOG_DBG("Creating shm segment :%s (size: %lu)\n",
			      _av->name, table_sz);

		_av->shared_fd = shm_open(_av->name, flags, S_IRUSR | S_IWUSR);
		if (_av->shared_fd < 0) {
			SOCK_LOG_ERROR("shm_open failed\n");
			ret = -FI_EINVAL;
			goto err2;
		}

		if (ftruncate(_av->shared_fd, table_sz) == -1) {
			SOCK_LOG_ERROR("ftruncate failed\n");
			shm_unlink(_av->name);
			ret = -FI_EINVAL;
			goto err2;
		}

		_av->table_hdr = mmap(NULL, table_sz, PROT_READ | PROT_WRITE,
				      MAP_SHARED, _av->shared_fd, 0);
		if (attr->flags & FI_READ) {
			if (_av->table_hdr->size != _av->attr.count) {
				ret = -FI_EINVAL;
				goto err2;
			}
		} else {
			_av->table_hdr->size = _av->attr.count;
			_av->table_hdr->stored = 0;
		}

		if (_av->table_hdr == MAP_FAILED) {
			SOCK_LOG_ERROR("mmap failed\n");
			shm_unlink(_av->name);
			ret = -FI_EINVAL;
			goto err2;
		}
	} else {
		_av->table_hdr = calloc(1, table_sz);
		if (!_av->table_hdr) {
			ret = -FI_ENOMEM;
			goto err3;
		}
		_av->table_hdr->size = _av->attr.count;
		_av->table_hdr->req_sz = attr->count;
	}

	_av->table = (struct sock_av_addr *)((char *)_av->table_hdr +
					    sizeof(struct sock_av_table_hdr));
	_av->av_fid.fid.fclass = FI_CLASS_AV;
	_av->av_fid.fid.context = context;
	_av->av_fid.fid.ops = &sock_av_fi_ops;

	switch (attr->type) {
	case FI_AV_MAP:
		_av->av_fid.ops = &sock_am_ops;
		break;
	case FI_AV_TABLE:
		_av->av_fid.ops = &sock_at_ops;
		break;
	default:
		ret = -FI_EINVAL;
		goto err3;
	}

	atomic_initialize(&_av->ref, 0);
	atomic_inc(&dom->ref);
	_av->domain = dom;
	switch (dom->info.addr_format) {
	case FI_SOCKADDR_IN:
		_av->addrlen = sizeof(struct sockaddr_in);
		break;
	default:
		SOCK_LOG_ERROR("Invalid address format: only IPv4 supported\n");
		ret = -FI_EINVAL;
		goto err3;
	}
	_av->rx_ctx_bits = attr->rx_ctx_bits;
	_av->mask = attr->rx_ctx_bits ?
		((uint64_t)1 << (64 - attr->rx_ctx_bits)) - 1 : ~0;
	*av = &_av->av_fid;
	return 0;

err3:
	free(_av->table_hdr);
err2:
	free(_av->name);
err1:
	free(_av);

	return ret;
}
示例#24
0
struct sock_conn *sock_ep_connect(struct sock_ep *ep, fi_addr_t index)
{
	int conn_fd = -1, ret;
	int do_retry = sock_conn_retry;
	struct sock_conn *conn, *new_conn;
	uint16_t idx;
	struct sockaddr_in *addr;
	socklen_t lon;
	int valopt = 0;
	struct pollfd poll_fd;

	if (ep->ep_type == FI_EP_MSG) {
		idx = 0;
		addr = ep->dest_addr;
	} else {
		idx = index & ep->av->mask;
		addr = (struct sockaddr_in *)&ep->av->table[idx].addr;
	}

do_connect:
	fastlock_acquire(&ep->cmap.lock);
	conn = sock_ep_lookup_conn(ep, index, addr);
	fastlock_release(&ep->cmap.lock);

	if (conn != SOCK_CM_CONN_IN_PROGRESS)
		return conn;

	conn_fd = socket(AF_INET, SOCK_STREAM, 0);
	if (conn_fd == -1) {
		SOCK_LOG_ERROR("failed to create conn_fd, errno: %d\n", errno);
		errno = FI_EOTHER;
		return NULL;
	}

	ret = fd_set_nonblock(conn_fd);
	if (ret) {
		SOCK_LOG_ERROR("failed to set conn_fd nonblocking, errno: %d\n", errno);
		errno = FI_EOTHER;
		close(conn_fd);
                return NULL;
	}

	SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr->sin_addr),
			ntohs(addr->sin_port));
	SOCK_LOG_DBG("Connecting using address:%s\n",
			inet_ntoa(ep->src_addr->sin_addr));

	ret = connect(conn_fd, (struct sockaddr *) addr, sizeof *addr);
	if (ret < 0) {
		if (errno == EINPROGRESS) {
			poll_fd.fd = conn_fd;
			poll_fd.events = POLLOUT;

			ret = poll(&poll_fd, 1, 15 * 1000);
			if (ret < 0) {
				SOCK_LOG_DBG("poll failed\n");
				goto retry;
			}

			lon = sizeof(int);
			ret = getsockopt(conn_fd, SOL_SOCKET, SO_ERROR, (void*)(&valopt), &lon);
			if (ret < 0) {
				SOCK_LOG_DBG("getsockopt failed: %d, %d\n", ret, conn_fd);
				goto retry;
			}

			if (valopt) {
				SOCK_LOG_DBG("Error in connection() %d - %s - %d\n", valopt, strerror(valopt), conn_fd);
				SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr->sin_addr),
						ntohs(addr->sin_port));
				SOCK_LOG_DBG("Connecting using address:%s\n",
				inet_ntoa(ep->src_addr->sin_addr));
				goto retry;
			}
			goto out;
		} else {
			SOCK_LOG_DBG("Timeout or error() - %s: %d\n", strerror(errno), conn_fd);
			SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr->sin_addr),
					ntohs(addr->sin_port));
			SOCK_LOG_DBG("Connecting using address:%s\n",
					inet_ntoa(ep->src_addr->sin_addr));
			goto retry;
		}
	} else {
		goto out;
	}

retry:
	do_retry--;
	sleep(10);
	if (!do_retry)
		goto err;

	if (conn_fd != -1) {
		close(conn_fd);
		conn_fd = -1;
	}

	SOCK_LOG_ERROR("Connect error, retrying - %s - %d\n", strerror(errno), conn_fd);
	SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr->sin_addr),
			ntohs(addr->sin_port));
	SOCK_LOG_DBG("Connecting using address:%s\n",
			inet_ntoa(ep->src_addr->sin_addr));
        goto do_connect;

out:
	fastlock_acquire(&ep->cmap.lock);
	new_conn = sock_conn_map_insert(ep, addr, conn_fd, 0);
	new_conn->av_index = (ep->ep_type == FI_EP_MSG) ? FI_ADDR_NOTAVAIL : (fi_addr_t) idx;
	conn = idm_lookup(&ep->av_idm, index);
	if (conn == SOCK_CM_CONN_IN_PROGRESS) {
		idm_set(&ep->av_idm, index, new_conn);
		conn = new_conn;
	}
	fastlock_release(&ep->cmap.lock);
	return conn;

err:
	close(conn_fd);
	return NULL;
}
示例#25
0
static ssize_t sock_ep_recv(struct fid_ep *ep, void *buf, size_t len,
				void *desc, fi_addr_t src_addr, void *context)
{
	struct iovec msg_iov = {
		.iov_base = buf,
		.iov_len = len,
	};
	struct fi_msg msg = {
		.msg_iov = &msg_iov,
		.desc = &desc,
		.iov_count = 1,
		.addr = src_addr,
		.context = context,
		.data = 0,
	};

	return sock_ep_recvmsg(ep, &msg, SOCK_USE_OP_FLAGS);
}

static ssize_t sock_ep_recvv(struct fid_ep *ep, const struct iovec *iov,
		       void **desc, size_t count, fi_addr_t src_addr,
		       void *context)
{
	struct fi_msg msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = src_addr,
		.context = context,
		.data = 0,
	};

	return sock_ep_recvmsg(ep, &msg, SOCK_USE_OP_FLAGS);
}

ssize_t sock_ep_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
			uint64_t flags)
{
	int ret;
	size_t i;
	uint64_t total_len, op_flags;
	struct sock_op tx_op;
	union sock_iov tx_iov;
	struct sock_conn *conn;
	struct sock_tx_ctx *tx_ctx;
	struct sock_ep *sock_ep;
	struct sock_ep_attr *ep_attr;

	switch (ep->fid.fclass) {
	case FI_CLASS_EP:
		sock_ep = container_of(ep, struct sock_ep, ep);
		ep_attr = sock_ep->attr;
		tx_ctx = sock_ep->attr->tx_ctx->use_shared ?
			sock_ep->attr->tx_ctx->stx_ctx : sock_ep->attr->tx_ctx;
		op_flags = sock_ep->tx_attr.op_flags;
		break;
	case FI_CLASS_TX_CTX:
		tx_ctx = container_of(ep, struct sock_tx_ctx, fid.ctx);
		ep_attr = tx_ctx->ep_attr;
		op_flags = tx_ctx->attr.op_flags;
		break;
	default:
		SOCK_LOG_ERROR("Invalid EP type\n");
		return -FI_EINVAL;
	}

#if ENABLE_DEBUG
	if (msg->iov_count > SOCK_EP_MAX_IOV_LIMIT)
		return -FI_EINVAL;
#endif

	if (!tx_ctx->enabled)
		return -FI_EOPBADSTATE;

	if (sock_drop_packet(ep_attr))
		return 0;

	ret = sock_ep_get_conn(ep_attr, tx_ctx, msg->addr, &conn);
	if (ret)
		return ret;

	SOCK_LOG_DBG("New sendmsg on TX: %p using conn: %p\n",
		      tx_ctx, conn);

	SOCK_EP_SET_TX_OP_FLAGS(flags);
	if (flags & SOCK_USE_OP_FLAGS)
		flags |= op_flags;

	if (flags & FI_TRIGGER) {
		ret = sock_queue_msg_op(ep, msg, flags, FI_OP_SEND);
		if (ret != 1)
			return ret;
	}

	memset(&tx_op, 0, sizeof(struct sock_op));
	tx_op.op = SOCK_OP_SEND;

	total_len = 0;
	if (flags & FI_INJECT) {
		for (i = 0; i < msg->iov_count; i++)
			total_len += msg->msg_iov[i].iov_len;

		if (total_len > SOCK_EP_MAX_INJECT_SZ)
			return -FI_EINVAL;

		tx_op.src_iov_len = total_len;
	} else {
		tx_op.src_iov_len = msg->iov_count;
		total_len = msg->iov_count * sizeof(union sock_iov);
	}

	total_len += sizeof(struct sock_op_send);

	if (flags & FI_REMOTE_CQ_DATA)
		total_len += sizeof(uint64_t);

	sock_tx_ctx_start(tx_ctx);
	if (ofi_rbavail(&tx_ctx->rb) < total_len) {
		ret = -FI_EAGAIN;
		goto err;
	}

	sock_tx_ctx_write_op_send(tx_ctx, &tx_op, flags, (uintptr_t) msg->context,
				  msg->addr, (uintptr_t) ((msg->iov_count > 0) ?
				  msg->msg_iov[0].iov_base : NULL),
				  ep_attr, conn);

	if (flags & FI_REMOTE_CQ_DATA)
		sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(msg->data));

	if (flags & FI_INJECT) {
		for (i = 0; i < msg->iov_count; i++) {
			sock_tx_ctx_write(tx_ctx, msg->msg_iov[i].iov_base,
					  msg->msg_iov[i].iov_len);
		}
	} else {
		for (i = 0; i < msg->iov_count; i++) {
			tx_iov.iov.addr = (uintptr_t) msg->msg_iov[i].iov_base;
			tx_iov.iov.len = msg->msg_iov[i].iov_len;
			sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
		}
	}

	sock_tx_ctx_commit(tx_ctx);
	return 0;

err:
	sock_tx_ctx_abort(tx_ctx);
	return ret;
}

static ssize_t sock_ep_send(struct fid_ep *ep, const void *buf, size_t len,
		      void *desc, fi_addr_t dest_addr, void *context)
{
	struct iovec msg_iov = {
		.iov_base = (void *)buf,
		.iov_len = len,
	};
	struct fi_msg msg = {
		.msg_iov = &msg_iov,
		.desc = &desc,
		.iov_count = 1,
		.addr = dest_addr,
		.context = context,
		.data = 0,
	};

	return sock_ep_sendmsg(ep, &msg, SOCK_USE_OP_FLAGS);
}

static ssize_t sock_ep_sendv(struct fid_ep *ep, const struct iovec *iov,
		       void **desc, size_t count, fi_addr_t dest_addr,
		       void *context)
{
	struct fi_msg msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = dest_addr,
		.context = context,
		.data = 0,
	};

	return sock_ep_sendmsg(ep, &msg, SOCK_USE_OP_FLAGS);
}

static ssize_t sock_ep_senddata(struct fid_ep *ep, const void *buf, size_t len,
			  void *desc, uint64_t data, fi_addr_t dest_addr,
			  void *context)
{
	struct iovec msg_iov = {
		.iov_base = (void *)buf,
		.iov_len = len,
	};
	struct fi_msg msg = {
		.msg_iov = &msg_iov,
		.desc = desc,
		.iov_count = 1,
		.addr = dest_addr,
		.context = context,
		.data = data,
	};

	return sock_ep_sendmsg(ep, &msg, FI_REMOTE_CQ_DATA | SOCK_USE_OP_FLAGS);
}

static ssize_t sock_ep_inject(struct fid_ep *ep, const void *buf, size_t len,
			fi_addr_t dest_addr)
{
	struct iovec msg_iov = {
		.iov_base = (void *)buf,
		.iov_len = len,
	};
	struct fi_msg msg = {
		.msg_iov = &msg_iov,
		.desc = NULL,
		.iov_count = 1,
		.addr = dest_addr,
		.context = NULL,
		.data = 0,
	};

	return sock_ep_sendmsg(ep, &msg, FI_INJECT |
			       SOCK_NO_COMPLETION | SOCK_USE_OP_FLAGS);
}

static ssize_t sock_ep_injectdata(struct fid_ep *ep, const void *buf,
				size_t len, uint64_t data, fi_addr_t dest_addr)
{
	struct iovec msg_iov = {
		.iov_base = (void *)buf,
		.iov_len = len,
	};
	struct fi_msg msg = {
		.msg_iov = &msg_iov,
		.desc = NULL,
		.iov_count = 1,
		.addr = dest_addr,
		.context = NULL,
		.data = data,
	};

	return sock_ep_sendmsg(ep, &msg, FI_REMOTE_CQ_DATA | FI_INJECT |
			       SOCK_NO_COMPLETION | SOCK_USE_OP_FLAGS);
}

struct fi_ops_msg sock_ep_msg_ops = {
	.size = sizeof(struct fi_ops_msg),
	.recv = sock_ep_recv,
	.recvv = sock_ep_recvv,
	.recvmsg = sock_ep_recvmsg,
	.send = sock_ep_send,
	.sendv = sock_ep_sendv,
	.sendmsg = sock_ep_sendmsg,
	.inject = sock_ep_inject,
	.senddata = sock_ep_senddata,
	.injectdata = sock_ep_injectdata
};

ssize_t sock_ep_trecvmsg(struct fid_ep *ep,
			 const struct fi_msg_tagged *msg, uint64_t flags)
{
	int ret;
	size_t i;
	struct sock_rx_ctx *rx_ctx;
	struct sock_rx_entry *rx_entry;
	struct sock_ep *sock_ep;
	uint64_t op_flags;

	switch (ep->fid.fclass) {
	case FI_CLASS_EP:
		sock_ep = container_of(ep, struct sock_ep, ep);
		rx_ctx = sock_ep->attr->rx_ctx;
		op_flags = sock_ep->rx_attr.op_flags;
		break;
	case FI_CLASS_RX_CTX:
	case FI_CLASS_SRX_CTX:
		rx_ctx = container_of(ep, struct sock_rx_ctx, ctx);
		op_flags = rx_ctx->attr.op_flags;
		break;
	default:
		SOCK_LOG_ERROR("Invalid ep type\n");
		return -FI_EINVAL;
	}

#if ENABLE_DEBUG
	if (msg->iov_count > SOCK_EP_MAX_IOV_LIMIT)
		return -FI_EINVAL;
#endif

	if (!rx_ctx->enabled)
		return -FI_EOPBADSTATE;

	if (flags & SOCK_USE_OP_FLAGS)
		flags |= op_flags;
	flags &= ~FI_MULTI_RECV;

	if (flags & FI_TRIGGER) {
		ret = sock_queue_tmsg_op(ep, msg, flags, FI_OP_TRECV);
		if (ret != 1)
			return ret;
	}

	if (flags & FI_PEEK) {
		return sock_rx_peek_recv(rx_ctx, msg->addr,
					 msg->tag, msg->ignore,
					 msg->context, flags, 1);
	} else if (flags & FI_CLAIM) {
		return sock_rx_claim_recv(rx_ctx, msg->context, flags,
					  msg->tag, msg->ignore, 1,
					  msg->msg_iov, msg->iov_count);
	}

	fastlock_acquire(&rx_ctx->lock);
	rx_entry = sock_rx_new_entry(rx_ctx);
	fastlock_release(&rx_ctx->lock);
	if (!rx_entry)
		return -FI_ENOMEM;

	rx_entry->rx_op.op = SOCK_OP_TRECV;
	rx_entry->rx_op.dest_iov_len = msg->iov_count;

	rx_entry->flags = flags;
	rx_entry->context = (uintptr_t) msg->context;
	rx_entry->addr = (rx_ctx->attr.caps & FI_DIRECTED_RECV) ?
			 msg->addr : FI_ADDR_UNSPEC;
	rx_entry->data = msg->data;
	rx_entry->tag = msg->tag;
	rx_entry->ignore = msg->ignore;
	rx_entry->is_tagged = 1;

	for (i = 0; i < msg->iov_count; i++) {
		rx_entry->iov[i].iov.addr = (uintptr_t) msg->msg_iov[i].iov_base;
		rx_entry->iov[i].iov.len = msg->msg_iov[i].iov_len;
		rx_entry->total_len += rx_entry->iov[i].iov.len;
	}

	fastlock_acquire(&rx_ctx->lock);
	SOCK_LOG_DBG("New rx_entry: %p (ctx: %p)\n", rx_entry, rx_ctx);
	dlist_insert_tail(&rx_entry->entry, &rx_ctx->rx_entry_list);
	fastlock_release(&rx_ctx->lock);
	return 0;
}

static ssize_t sock_ep_trecv(struct fid_ep *ep, void *buf, size_t len,
			void *desc, fi_addr_t src_addr, uint64_t tag,
			uint64_t ignore, void *context)
{
	struct iovec msg_iov = {
		.iov_base = buf,
		.iov_len = len,
	};
	struct fi_msg_tagged msg = {
		.msg_iov = &msg_iov,
		.desc = &desc,
		.iov_count = 1,
		.addr = src_addr,
		.context = context,
		.tag = tag,
		.ignore = ignore,
		.data = 0,
	};

	return sock_ep_trecvmsg(ep, &msg, SOCK_USE_OP_FLAGS);
}

static ssize_t sock_ep_trecvv(struct fid_ep *ep, const struct iovec *iov,
			       void **desc, size_t count, fi_addr_t src_addr,
			       uint64_t tag, uint64_t ignore, void *context)
{
	struct fi_msg_tagged msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = src_addr,
		.context = context,
		.tag = tag,
		.ignore = ignore,
		.data = 0,
	};

	return sock_ep_trecvmsg(ep, &msg, SOCK_USE_OP_FLAGS);
}

ssize_t sock_ep_tsendmsg(struct fid_ep *ep,
			 const struct fi_msg_tagged *msg, uint64_t flags)
{
	int ret;
	size_t i;
	uint64_t total_len, op_flags;
	struct sock_op tx_op;
	union sock_iov tx_iov;
	struct sock_conn *conn;
	struct sock_tx_ctx *tx_ctx;
	struct sock_ep *sock_ep;
	struct sock_ep_attr *ep_attr;

	switch (ep->fid.fclass) {
	case FI_CLASS_EP:
		sock_ep = container_of(ep, struct sock_ep, ep);
		tx_ctx = sock_ep->attr->tx_ctx->use_shared ?
			sock_ep->attr->tx_ctx->stx_ctx : sock_ep->attr->tx_ctx;
		ep_attr = sock_ep->attr;
		op_flags = sock_ep->tx_attr.op_flags;
		break;
	case FI_CLASS_TX_CTX:
		tx_ctx = container_of(ep, struct sock_tx_ctx, fid.ctx);
		ep_attr = tx_ctx->ep_attr;
		op_flags = tx_ctx->attr.op_flags;
		break;
	default:
		SOCK_LOG_ERROR("Invalid EP type\n");
		return -FI_EINVAL;
	}

#if ENABLE_DEBUG
	if (msg->iov_count > SOCK_EP_MAX_IOV_LIMIT)
		return -FI_EINVAL;
#endif

	if (!tx_ctx->enabled)
		return -FI_EOPBADSTATE;

	if (sock_drop_packet(ep_attr))
		return 0;

	ret = sock_ep_get_conn(ep_attr, tx_ctx, msg->addr, &conn);
	if (ret)
		return ret;

	SOCK_EP_SET_TX_OP_FLAGS(flags);
	if (flags & SOCK_USE_OP_FLAGS)
		flags |= op_flags;

	if (flags & FI_TRIGGER) {
		ret = sock_queue_tmsg_op(ep, msg, flags, FI_OP_TSEND);
		if (ret != 1)
			return ret;
	}

	memset(&tx_op, 0, sizeof(tx_op));
	tx_op.op = SOCK_OP_TSEND;

	total_len = 0;
	if (flags & FI_INJECT) {
		for (i = 0; i < msg->iov_count; i++)
			total_len += msg->msg_iov[i].iov_len;

		tx_op.src_iov_len = total_len;
		if (total_len > SOCK_EP_MAX_INJECT_SZ)
			return -FI_EINVAL;
	} else {
		total_len = msg->iov_count * sizeof(union sock_iov);
		tx_op.src_iov_len = msg->iov_count;
	}

	total_len += sizeof(struct sock_op_tsend);
	if (flags & FI_REMOTE_CQ_DATA)
		total_len += sizeof(uint64_t);

	sock_tx_ctx_start(tx_ctx);
	if (ofi_rbavail(&tx_ctx->rb) < total_len) {
		ret = -FI_EAGAIN;
		goto err;
	}

	sock_tx_ctx_write_op_tsend(tx_ctx, &tx_op, flags,
				   (uintptr_t) msg->context, msg->addr,
				   (uintptr_t) ((msg->iov_count > 0) ?
				    msg->msg_iov[0].iov_base : NULL),
				    ep_attr, conn, msg->tag);

	if (flags & FI_REMOTE_CQ_DATA)
		sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(msg->data));

	if (flags & FI_INJECT) {
		for (i = 0; i < msg->iov_count; i++) {
			sock_tx_ctx_write(tx_ctx, msg->msg_iov[i].iov_base,
					  msg->msg_iov[i].iov_len);
		}
	} else {
		for (i = 0; i < msg->iov_count; i++) {
			tx_iov.iov.addr = (uintptr_t) msg->msg_iov[i].iov_base;
			tx_iov.iov.len = msg->msg_iov[i].iov_len;
			sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
		}
	}

	sock_tx_ctx_commit(tx_ctx);
	return 0;

err:
	sock_tx_ctx_abort(tx_ctx);
	return ret;
}

static ssize_t sock_ep_tsend(struct fid_ep *ep, const void *buf, size_t len,
			void *desc, fi_addr_t dest_addr, uint64_t tag,
			void *context)
{
	struct iovec msg_iov = {
		.iov_base = (void *)buf,
		.iov_len = len,
	};
	struct fi_msg_tagged msg = {
		.msg_iov = &msg_iov,
		.desc = &desc,
		.iov_count = 1,
		.addr = dest_addr,
		.tag = tag,
		.ignore = 0,
		.context = context,
		.data = 0,
	};

	return sock_ep_tsendmsg(ep, &msg, SOCK_USE_OP_FLAGS);
}

static ssize_t sock_ep_tsendv(struct fid_ep *ep, const struct iovec *iov,
			       void **desc, size_t count, fi_addr_t dest_addr,
			       uint64_t tag, void *context)
{
	struct fi_msg_tagged msg = {
		.msg_iov = iov,
		.desc = desc,
		.iov_count = count,
		.addr = dest_addr,
		.tag = tag,
		.ignore = 0,
		.context = context,
		.data = 0,
	};

	return sock_ep_tsendmsg(ep, &msg, SOCK_USE_OP_FLAGS);
}

static ssize_t sock_ep_tsenddata(struct fid_ep *ep, const void *buf, size_t len,
				void *desc, uint64_t data, fi_addr_t dest_addr,
				uint64_t tag, void *context)
{
	struct iovec msg_iov = {
		.iov_base = (void *)buf,
		.iov_len = len,
	};
	struct fi_msg_tagged msg = {
		.msg_iov = &msg_iov,
		.desc = desc,
		.iov_count = 1,
		.addr = dest_addr,
		.tag = tag,
		.ignore = 0,
		.context = context,
		.data = data,
	};

	return sock_ep_tsendmsg(ep, &msg, FI_REMOTE_CQ_DATA | SOCK_USE_OP_FLAGS);
}

static ssize_t sock_ep_tinject(struct fid_ep *ep, const void *buf, size_t len,
				fi_addr_t dest_addr, uint64_t tag)
{
	struct iovec msg_iov = {
		.iov_base = (void *)buf,
		.iov_len = len,
	};
	struct fi_msg_tagged msg = {
		.msg_iov = &msg_iov,
		.desc = NULL,
		.iov_count = 1,
		.addr = dest_addr,
		.tag = tag,
		.ignore = 0,
		.context = NULL,
		.data = 0,
	};

	return sock_ep_tsendmsg(ep, &msg, FI_INJECT |
				SOCK_NO_COMPLETION | SOCK_USE_OP_FLAGS);
}

static ssize_t	sock_ep_tinjectdata(struct fid_ep *ep, const void *buf,
				size_t len, uint64_t data, fi_addr_t dest_addr,
				uint64_t tag)
{
	struct iovec msg_iov = {
		.iov_base = (void *)buf,
		.iov_len = len,
	};
	struct fi_msg_tagged msg = {
		.msg_iov = &msg_iov,
		.desc = NULL,
		.iov_count = 1,
		.addr = dest_addr,
		.tag = tag,
		.ignore = 0,
		.context = NULL,
		.data = data,
	};

	return sock_ep_tsendmsg(ep, &msg, FI_REMOTE_CQ_DATA | FI_INJECT |
				SOCK_NO_COMPLETION | SOCK_USE_OP_FLAGS);
}


struct fi_ops_tagged sock_ep_tagged = {
	.size = sizeof(struct fi_ops_tagged),
	.recv = sock_ep_trecv,
	.recvv = sock_ep_trecvv,
	.recvmsg = sock_ep_trecvmsg,
	.send = sock_ep_tsend,
	.sendv = sock_ep_tsendv,
	.sendmsg = sock_ep_tsendmsg,
	.inject = sock_ep_tinject,
	.senddata = sock_ep_tsenddata,
	.injectdata = sock_ep_tinjectdata,
};
示例#26
0
struct sock_conn *sock_ep_connect(struct sock_ep_attr *ep_attr, fi_addr_t index)
{
	int conn_fd = -1, ret;
	int do_retry = sock_conn_retry;
	struct sock_conn *conn, *new_conn;
	struct sockaddr_in addr;
	socklen_t lon;
	int valopt = 0;
	struct pollfd poll_fd;

	if (ep_attr->ep_type == FI_EP_MSG) {
		/* Need to check that destination address has been
		   passed to endpoint */
		assert(ep_attr->dest_addr);
		addr = *ep_attr->dest_addr;
		addr.sin_port = htons(ep_attr->msg_dest_port);
	} else {
		addr = *((struct sockaddr_in *)&ep_attr->av->table[index].addr);
	}

do_connect:
	fastlock_acquire(&ep_attr->cmap.lock);
	conn = sock_ep_lookup_conn(ep_attr, index, &addr);
	fastlock_release(&ep_attr->cmap.lock);

	if (conn != SOCK_CM_CONN_IN_PROGRESS)
		return conn;

	conn_fd = ofi_socket(AF_INET, SOCK_STREAM, 0);
	if (conn_fd == -1) {
		SOCK_LOG_ERROR("failed to create conn_fd, errno: %d\n", errno);
		errno = FI_EOTHER;
		return NULL;
	}

	ret = fd_set_nonblock(conn_fd);
	if (ret) {
		SOCK_LOG_ERROR("failed to set conn_fd nonblocking, errno: %d\n", errno);
		errno = FI_EOTHER;
		ofi_close_socket(conn_fd);
		return NULL;
	}

	SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr.sin_addr),
			ntohs(addr.sin_port));
	SOCK_LOG_DBG("Connecting using address:%s\n",
			inet_ntoa(ep_attr->src_addr->sin_addr));

	ret = connect(conn_fd, (struct sockaddr *) &addr, sizeof addr);
	if (ret < 0) {
		if (ofi_sockerr() == EINPROGRESS) {
			poll_fd.fd = conn_fd;
			poll_fd.events = POLLOUT;

			ret = poll(&poll_fd, 1, 15 * 1000);
			if (ret < 0) {
				SOCK_LOG_DBG("poll failed\n");
				goto retry;
			}

			lon = sizeof(int);
			ret = getsockopt(conn_fd, SOL_SOCKET, SO_ERROR, (void*)(&valopt), &lon);
			if (ret < 0) {
				SOCK_LOG_DBG("getsockopt failed: %d, %d\n", ret, conn_fd);
				goto retry;
			}

			if (valopt) {
				SOCK_LOG_DBG("Error in connection() %d - %s - %d\n", valopt, strerror(valopt), conn_fd);
				SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr.sin_addr),
						ntohs(addr.sin_port));
				SOCK_LOG_DBG("Connecting using address:%s\n",
				inet_ntoa(ep_attr->src_addr->sin_addr));
				goto retry;
			}
			goto out;
		} else {
			SOCK_LOG_DBG("Timeout or error() - %s: %d\n", strerror(errno), conn_fd);
			SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr.sin_addr),
					ntohs(addr.sin_port));
			SOCK_LOG_DBG("Connecting using address:%s\n",
					inet_ntoa(ep_attr->src_addr->sin_addr));
			goto retry;
		}
	} else {
		goto out;
	}

retry:
	do_retry--;
	sleep(10);
	if (!do_retry)
		goto err;

	if (conn_fd != -1) {
		ofi_close_socket(conn_fd);
		conn_fd = -1;
	}

	SOCK_LOG_ERROR("Connect error, retrying - %s - %d\n", strerror(errno), conn_fd);
	SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr.sin_addr),
			ntohs(addr.sin_port));
	SOCK_LOG_DBG("Connecting using address:%s\n",
			inet_ntoa(ep_attr->src_addr->sin_addr));
        goto do_connect;

out:
	fastlock_acquire(&ep_attr->cmap.lock);
	new_conn = sock_conn_map_insert(ep_attr, &addr, conn_fd, 0);
	if (!new_conn) {
		fastlock_release(&ep_attr->cmap.lock);
		goto err;
	}
	new_conn->av_index = (ep_attr->ep_type == FI_EP_MSG) ? FI_ADDR_NOTAVAIL : index;
	conn = ofi_idm_lookup(&ep_attr->av_idm, index);
	if (conn == SOCK_CM_CONN_IN_PROGRESS) {
		if (ofi_idm_set(&ep_attr->av_idm, index, new_conn) < 0)
			SOCK_LOG_ERROR("ofi_idm_set failed\n");
		conn = new_conn;
	}
	fastlock_release(&ep_attr->cmap.lock);
	return conn;

err:
	ofi_close_socket(conn_fd);
	return NULL;
}
示例#27
0
int sock_conn_listen(struct sock_ep_attr *ep_attr)
{
	struct addrinfo *s_res = NULL, *p;
	struct addrinfo hints = { 0 };
	int listen_fd = 0, ret;
	socklen_t addr_size;
	struct sockaddr_in addr;
	struct sock_conn_listener *listener = &ep_attr->listener;
	char service[NI_MAXSERV] = {0};
	char *port;
	char ipaddr[24];

	hints.ai_family = AF_INET;
	hints.ai_socktype = SOCK_STREAM;
	hints.ai_flags = AI_PASSIVE;

	memcpy(&addr, ep_attr->src_addr, sizeof(addr));
	if (getnameinfo((void *)ep_attr->src_addr, sizeof(*ep_attr->src_addr),
			NULL, 0, listener->service,
			sizeof(listener->service), NI_NUMERICSERV)) {
		SOCK_LOG_ERROR("could not resolve src_addr\n");
		return -FI_EINVAL;
	}

	if (ep_attr->ep_type == FI_EP_MSG) {
		memset(listener->service, 0, NI_MAXSERV);
		port = NULL;
		addr.sin_port = 0;
	} else
		port = listener->service;

	inet_ntop(addr.sin_family, &addr.sin_addr, ipaddr, sizeof(ipaddr));
	ret = getaddrinfo(ipaddr, port, &hints, &s_res);
	if (ret) {
		SOCK_LOG_ERROR("no available AF_INET address, service %s, %s\n",
			       listener->service, gai_strerror(ret));
		return -FI_EINVAL;
	}

	SOCK_LOG_DBG("Binding listener thread to port: %s\n", listener->service);
	for (p = s_res; p; p = p->ai_next) {
		listen_fd = ofi_socket(p->ai_family, p->ai_socktype, p->ai_protocol);
		if (listen_fd >= 0) {
			sock_set_sockopts(listen_fd);

			if (!bind(listen_fd, s_res->ai_addr, s_res->ai_addrlen))
				break;
			ofi_close_socket(listen_fd);
			listen_fd = -1;
		}
	}
	freeaddrinfo(s_res);

	if (listen_fd < 0) {
		SOCK_LOG_ERROR("failed to listen to port: %s\n",
			       listener->service);
		goto err;
	}

	if (atoi(listener->service) == 0) {
		addr_size = sizeof(addr);
		if (getsockname(listen_fd, (struct sockaddr *) &addr, &addr_size))
			goto err;
		snprintf(listener->service, sizeof listener->service, "%d",
			 ntohs(addr.sin_port));
		SOCK_LOG_DBG("Bound to port: %s - %d\n", listener->service, getpid());
		ep_attr->msg_src_port = ntohs(addr.sin_port);
	}

	if (ep_attr->src_addr->sin_addr.s_addr == 0) {
		snprintf(service, sizeof service, "%s", listener->service);
		ret = sock_get_src_addr_from_hostname(ep_attr->src_addr, service);
		if (ret)
			goto err;
	}

	if (listen(listen_fd, sock_cm_def_map_sz)) {
		SOCK_LOG_ERROR("failed to listen socket: %s\n", strerror(errno));
		goto err;
	}

	if (((struct sockaddr_in *) (ep_attr->src_addr))->sin_port == 0) {
		((struct sockaddr_in *) (ep_attr->src_addr))->sin_port =
			htons(atoi(listener->service));
	}

	listener->sock = listen_fd;
	if (socketpair(AF_UNIX, SOCK_STREAM, 0, listener->signal_fds) < 0)
		goto err;

	listener->do_listen = 1;

	fd_set_nonblock(listener->signal_fds[1]);
	if (pthread_create(&listener->listener_thread, 0,
			   _sock_conn_listen, ep_attr)) {
		SOCK_LOG_ERROR("failed to create conn listener thread\n");
		goto err;
	} while (!*((volatile int*)&listener->is_ready));
	return 0;
err:
	if (listen_fd >= 0)
		ofi_close_socket(listen_fd);
	return -FI_EINVAL;
}
示例#28
0
ssize_t sock_ep_recvmsg(struct fid_ep *ep, const struct fi_msg *msg,
			uint64_t flags)
{
	int ret;
	size_t i;
	struct sock_rx_ctx *rx_ctx;
	struct sock_rx_entry *rx_entry;
	struct sock_ep *sock_ep;
	uint64_t op_flags;

	switch (ep->fid.fclass) {
	case FI_CLASS_EP:
		sock_ep = container_of(ep, struct sock_ep, ep);
		rx_ctx = sock_ep->attr->rx_ctx;
		op_flags = sock_ep->rx_attr.op_flags;
		break;
	case FI_CLASS_RX_CTX:
	case FI_CLASS_SRX_CTX:
		rx_ctx = container_of(ep, struct sock_rx_ctx, ctx);
		op_flags = rx_ctx->attr.op_flags;
		break;
	default:
		SOCK_LOG_ERROR("Invalid ep type\n");
		return -FI_EINVAL;
	}

#if ENABLE_DEBUG
	if (msg->iov_count > SOCK_EP_MAX_IOV_LIMIT)
		return -FI_EINVAL;
#endif

	if (!rx_ctx->enabled)
		return -FI_EOPBADSTATE;

	if (flags & SOCK_USE_OP_FLAGS)
		flags |= op_flags;

	if (flags & FI_TRIGGER) {
		ret = sock_queue_msg_op(ep, msg, flags, FI_OP_RECV);
		if (ret != 1)
			return ret;
	}

	if (flags & FI_PEEK) {
		return sock_rx_peek_recv(rx_ctx, msg->addr, 0L, ~0ULL,
					 msg->context, flags, 0);
	} else if (flags & FI_CLAIM) {
		return sock_rx_claim_recv(rx_ctx, msg->context, flags,
					  0L, ~0ULL, 0,
					  msg->msg_iov, msg->iov_count);
	}

	fastlock_acquire(&rx_ctx->lock);
	rx_entry = sock_rx_new_entry(rx_ctx);
	fastlock_release(&rx_ctx->lock);
	if (!rx_entry)
		return -FI_ENOMEM;

	rx_entry->rx_op.op = SOCK_OP_RECV;
	rx_entry->rx_op.dest_iov_len = msg->iov_count;

	rx_entry->flags = flags;
	rx_entry->context = (uintptr_t) msg->context;
	rx_entry->addr = (rx_ctx->attr.caps & FI_DIRECTED_RECV) ?
			 msg->addr : FI_ADDR_UNSPEC;
	rx_entry->data = msg->data;
	rx_entry->ignore = ~0ULL;
	rx_entry->is_tagged = 0;

	for (i = 0; i < msg->iov_count; i++) {
		rx_entry->iov[i].iov.addr = (uintptr_t) msg->msg_iov[i].iov_base;
		rx_entry->iov[i].iov.len = msg->msg_iov[i].iov_len;
		rx_entry->total_len += rx_entry->iov[i].iov.len;
	}

	SOCK_LOG_DBG("New rx_entry: %p (ctx: %p)\n", rx_entry, rx_ctx);
	fastlock_acquire(&rx_ctx->lock);
	dlist_insert_tail(&rx_entry->entry, &rx_ctx->rx_entry_list);
	fastlock_release(&rx_ctx->lock);
	return 0;
}
示例#29
0
int sock_rdm_verify_ep_attr(struct fi_ep_attr *ep_attr,
			    struct fi_tx_attr *tx_attr,
			    struct fi_rx_attr *rx_attr)
{
	int ret;

	if (ep_attr) {
		switch (ep_attr->protocol) {
		case FI_PROTO_UNSPEC:
		case FI_PROTO_SOCK_TCP:
			break;
		default:
			SOCK_LOG_DBG("Unsupported protocol\n");
			return -FI_ENODATA;
		}

		if (ep_attr->protocol_version != sock_rdm_ep_attr.protocol_version) {
			SOCK_LOG_DBG("Invalid protocol version\n");
			return -FI_ENODATA;
		}

		if (ep_attr->max_msg_size > sock_rdm_ep_attr.max_msg_size) {
			SOCK_LOG_DBG("Message size too large\n");
			return -FI_ENODATA;
		}

		if (ep_attr->msg_prefix_size > sock_rdm_ep_attr.msg_prefix_size) {
			SOCK_LOG_DBG("Msg prefix size not supported\n");
			return -FI_ENODATA;
		}

		if (ep_attr->max_order_raw_size >
		   sock_rdm_ep_attr.max_order_raw_size) {
			SOCK_LOG_DBG("RAW order size too large\n");
			return -FI_ENODATA;
		}

		if (ep_attr->max_order_war_size >
		   sock_rdm_ep_attr.max_order_war_size) {
			SOCK_LOG_DBG("WAR order size too large\n");
			return -FI_ENODATA;
		}

		if (ep_attr->max_order_waw_size >
		   sock_rdm_ep_attr.max_order_waw_size) {
			SOCK_LOG_DBG("WAW order size too large\n");
			return -FI_ENODATA;
		}

		if ((ep_attr->tx_ctx_cnt > SOCK_EP_MAX_TX_CNT) &&
		    ep_attr->tx_ctx_cnt != FI_SHARED_CONTEXT)
			return -FI_ENODATA;

		if ((ep_attr->rx_ctx_cnt > SOCK_EP_MAX_RX_CNT) &&
		    ep_attr->rx_ctx_cnt != FI_SHARED_CONTEXT)
			return -FI_ENODATA;
	}

	ret = sock_rdm_verify_tx_attr(tx_attr);
	if (ret)
		return ret;

	ret = sock_rdm_verify_rx_attr(rx_attr);
	if (ret)
		return ret;

	return 0;
}