Esempio n. 1
0
ssize_t sock_ep_tx_atomic(struct fid_ep *ep,
			  const struct fi_msg_atomic *msg,
			  const struct fi_ioc *comparev, void **compare_desc,
			  size_t compare_count, struct fi_ioc *resultv,
			  void **result_desc, size_t result_count, uint64_t flags)
{
	int i, ret;
	size_t datatype_sz;
	struct sock_op tx_op;
	union sock_iov tx_iov;
	struct sock_conn *conn;
	struct sock_tx_ctx *tx_ctx;
	uint64_t total_len, src_len, dst_len;
	struct sock_ep *sock_ep;

	switch (ep->fid.fclass) {
	case FI_CLASS_EP:
		sock_ep = container_of(ep, struct sock_ep, ep);
		tx_ctx = sock_ep->tx_ctx;
		break;
	case FI_CLASS_TX_CTX:
		tx_ctx = container_of(ep, struct sock_tx_ctx, fid.ctx);
		sock_ep = tx_ctx->ep;
		break;
	default:
		SOCK_LOG_ERROR("Invalid EP type\n");
		return -FI_EINVAL;
	}

	if (msg->iov_count > SOCK_EP_MAX_IOV_LIMIT ||
	    msg->rma_iov_count > SOCK_EP_MAX_IOV_LIMIT)
		return -FI_EINVAL;

	if (!tx_ctx->enabled)
		return -FI_EOPBADSTATE;

	if (sock_ep->connected) {
		conn = sock_ep_lookup_conn(sock_ep);
	} else {
		conn = sock_av_lookup_addr(sock_ep, tx_ctx->av, msg->addr);
		if (!conn) {
			SOCK_LOG_ERROR("Address lookup failed\n");
			return -errno;
		}
	}

	if (!conn)
		return -FI_EAGAIN;

	SOCK_EP_SET_TX_OP_FLAGS(flags);
	if (flags & SOCK_USE_OP_FLAGS)
		flags |= tx_ctx->attr.op_flags;

	if (msg->op == FI_ATOMIC_READ) {
		flags &= ~FI_INJECT;
	}

	if (sock_ep_is_send_cq_low(&tx_ctx->comp, flags)) {
		SOCK_LOG_ERROR("CQ size low\n");
		return -FI_EAGAIN;
	}

	if (flags & FI_TRIGGER) {
		ret = sock_queue_atomic_op(ep, msg, comparev, compare_count,
					resultv, result_count, flags,
					SOCK_OP_ATOMIC);
		if (ret != 1)
			return ret;
	}

	src_len = 0;
	datatype_sz = fi_datatype_size(msg->datatype);
	if (flags & FI_INJECT) {
		for (i = 0; i < msg->iov_count; i++)
			src_len += (msg->msg_iov[i].count * datatype_sz);

		if (src_len > SOCK_EP_MAX_INJECT_SZ)
			return -FI_EINVAL;

		total_len = src_len;
	} else {
		total_len = msg->iov_count * sizeof(union sock_iov);
	}

	total_len += (sizeof(struct sock_op_send) +
		      (msg->rma_iov_count * sizeof(union sock_iov)) +
		      (result_count * sizeof(union sock_iov)));

	sock_tx_ctx_start(tx_ctx);
	if (rbfdavail(&tx_ctx->rbfd) < total_len) {
		ret = -FI_EAGAIN;
		goto err;
	}

	memset(&tx_op, 0, sizeof(tx_op));
	tx_op.op = SOCK_OP_ATOMIC;
	tx_op.dest_iov_len = msg->rma_iov_count;
	tx_op.atomic.op = msg->op;
	tx_op.atomic.datatype = msg->datatype;
	tx_op.atomic.res_iov_len = result_count;
	tx_op.atomic.cmp_iov_len = compare_count;

	if (flags & FI_INJECT)
		tx_op.src_iov_len = src_len;
	else
		tx_op.src_iov_len = msg->iov_count;

	sock_tx_ctx_write_op_send(tx_ctx, &tx_op, flags,
		(uintptr_t) msg->context, msg->addr,
		(uintptr_t) msg->msg_iov[0].addr, sock_ep, conn);

	if (flags & FI_REMOTE_CQ_DATA)
		sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(uint64_t));

	src_len = 0;
	if (flags & FI_INJECT) {
		for (i = 0; i < msg->iov_count; i++) {
			sock_tx_ctx_write(tx_ctx, msg->msg_iov[i].addr,
					  msg->msg_iov[i].count * datatype_sz);
			src_len += (msg->msg_iov[i].count * datatype_sz);
		}
	} else {
		for (i = 0; i < msg->iov_count; i++) {
			tx_iov.ioc.addr = (uintptr_t) msg->msg_iov[i].addr;
			tx_iov.ioc.count = msg->msg_iov[i].count;
			sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
			src_len += (tx_iov.ioc.count * datatype_sz);
		}
	}

#ifdef ENABLE_DEBUG
	if (src_len > SOCK_EP_MAX_ATOMIC_SZ) {
		ret = -FI_EINVAL;
		goto err;
	}
#endif

	dst_len = 0;
	for (i = 0; i < msg->rma_iov_count; i++) {
		tx_iov.ioc.addr = msg->rma_iov[i].addr;
		tx_iov.ioc.key = msg->rma_iov[i].key;
		tx_iov.ioc.count = msg->rma_iov[i].count;
		sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
		dst_len += (tx_iov.ioc.count * datatype_sz);
	}

	if (msg->iov_count && dst_len != src_len) {
		SOCK_LOG_ERROR("Buffer length mismatch\n");
		ret = -FI_EINVAL;
		goto err;
	} else {
		src_len = dst_len;
	}

	dst_len = 0;
	for (i = 0; i < result_count; i++) {
		tx_iov.ioc.addr = (uintptr_t) resultv[i].addr;
		tx_iov.ioc.count = resultv[i].count;
		sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
		dst_len += (tx_iov.ioc.count * datatype_sz);
	}

#ifdef ENABLE_DEBUG
	if (result_count && (dst_len != src_len)) {
		SOCK_LOG_ERROR("Buffer length mismatch\n");
		ret = -FI_EINVAL;
		goto err;
	}
#endif

	dst_len = 0;
	for (i = 0; i < compare_count; i++) {
		tx_iov.ioc.addr = (uintptr_t) comparev[i].addr;
		tx_iov.ioc.count = comparev[i].count;
		sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
		dst_len += (tx_iov.ioc.count * datatype_sz);
	}

#ifdef ENABLE_DEBUG
	if (compare_count && (dst_len != src_len)) {
		SOCK_LOG_ERROR("Buffer length mismatch\n");
		ret = -FI_EINVAL;
		goto err;
	}
#endif

	sock_tx_ctx_commit(tx_ctx);
	return 0;

err:
	sock_tx_ctx_abort(tx_ctx);
	return ret;
}
Esempio n. 2
0
struct sock_conn *sock_ep_connect(struct sock_ep_attr *ep_attr, fi_addr_t index)
{
	int conn_fd = -1, ret;
	int do_retry = sock_conn_retry;
	struct sock_conn *conn, *new_conn;
	struct sockaddr_in addr;
	socklen_t lon;
	int valopt = 0;
	struct pollfd poll_fd;

	if (ep_attr->ep_type == FI_EP_MSG) {
		/* Need to check that destination address has been
		   passed to endpoint */
		assert(ep_attr->dest_addr);
		addr = *ep_attr->dest_addr;
		addr.sin_port = htons(ep_attr->msg_dest_port);
	} else {
		addr = *((struct sockaddr_in *)&ep_attr->av->table[index].addr);
	}

do_connect:
	fastlock_acquire(&ep_attr->cmap.lock);
	conn = sock_ep_lookup_conn(ep_attr, index, &addr);
	fastlock_release(&ep_attr->cmap.lock);

	if (conn != SOCK_CM_CONN_IN_PROGRESS)
		return conn;

	conn_fd = ofi_socket(AF_INET, SOCK_STREAM, 0);
	if (conn_fd == -1) {
		SOCK_LOG_ERROR("failed to create conn_fd, errno: %d\n", errno);
		errno = FI_EOTHER;
		return NULL;
	}

	ret = fd_set_nonblock(conn_fd);
	if (ret) {
		SOCK_LOG_ERROR("failed to set conn_fd nonblocking, errno: %d\n", errno);
		errno = FI_EOTHER;
		ofi_close_socket(conn_fd);
		return NULL;
	}

	SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr.sin_addr),
			ntohs(addr.sin_port));
	SOCK_LOG_DBG("Connecting using address:%s\n",
			inet_ntoa(ep_attr->src_addr->sin_addr));

	ret = connect(conn_fd, (struct sockaddr *) &addr, sizeof addr);
	if (ret < 0) {
		if (ofi_sockerr() == EINPROGRESS) {
			poll_fd.fd = conn_fd;
			poll_fd.events = POLLOUT;

			ret = poll(&poll_fd, 1, 15 * 1000);
			if (ret < 0) {
				SOCK_LOG_DBG("poll failed\n");
				goto retry;
			}

			lon = sizeof(int);
			ret = getsockopt(conn_fd, SOL_SOCKET, SO_ERROR, (void*)(&valopt), &lon);
			if (ret < 0) {
				SOCK_LOG_DBG("getsockopt failed: %d, %d\n", ret, conn_fd);
				goto retry;
			}

			if (valopt) {
				SOCK_LOG_DBG("Error in connection() %d - %s - %d\n", valopt, strerror(valopt), conn_fd);
				SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr.sin_addr),
						ntohs(addr.sin_port));
				SOCK_LOG_DBG("Connecting using address:%s\n",
				inet_ntoa(ep_attr->src_addr->sin_addr));
				goto retry;
			}
			goto out;
		} else {
			SOCK_LOG_DBG("Timeout or error() - %s: %d\n", strerror(errno), conn_fd);
			SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr.sin_addr),
					ntohs(addr.sin_port));
			SOCK_LOG_DBG("Connecting using address:%s\n",
					inet_ntoa(ep_attr->src_addr->sin_addr));
			goto retry;
		}
	} else {
		goto out;
	}

retry:
	do_retry--;
	sleep(10);
	if (!do_retry)
		goto err;

	if (conn_fd != -1) {
		ofi_close_socket(conn_fd);
		conn_fd = -1;
	}

	SOCK_LOG_ERROR("Connect error, retrying - %s - %d\n", strerror(errno), conn_fd);
	SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr.sin_addr),
			ntohs(addr.sin_port));
	SOCK_LOG_DBG("Connecting using address:%s\n",
			inet_ntoa(ep_attr->src_addr->sin_addr));
        goto do_connect;

out:
	fastlock_acquire(&ep_attr->cmap.lock);
	new_conn = sock_conn_map_insert(ep_attr, &addr, conn_fd, 0);
	if (!new_conn) {
		fastlock_release(&ep_attr->cmap.lock);
		goto err;
	}
	new_conn->av_index = (ep_attr->ep_type == FI_EP_MSG) ? FI_ADDR_NOTAVAIL : index;
	conn = ofi_idm_lookup(&ep_attr->av_idm, index);
	if (conn == SOCK_CM_CONN_IN_PROGRESS) {
		if (ofi_idm_set(&ep_attr->av_idm, index, new_conn) < 0)
			SOCK_LOG_ERROR("ofi_idm_set failed\n");
		conn = new_conn;
	}
	fastlock_release(&ep_attr->cmap.lock);
	return conn;

err:
	ofi_close_socket(conn_fd);
	return NULL;
}
Esempio n. 3
0
ssize_t sock_ep_rma_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg,
			    uint64_t flags)
{
	int ret, i;
	struct sock_op tx_op;
	union sock_iov tx_iov;
	struct sock_conn *conn;
	struct sock_tx_ctx *tx_ctx;
	uint64_t total_len, src_len, dst_len;
	struct sock_ep *sock_ep;

	switch (ep->fid.fclass) {
	case FI_CLASS_EP:
		sock_ep = container_of(ep, struct sock_ep, ep);
		tx_ctx = sock_ep->tx_ctx;
		break;

	case FI_CLASS_TX_CTX:
		tx_ctx = container_of(ep, struct sock_tx_ctx, fid.ctx);
		sock_ep = tx_ctx->ep;
		break;

	default:
		SOCK_LOG_ERROR("Invalid EP type\n");
		return -FI_EINVAL;
	}

#if ENABLE_DEBUG
	if (msg->iov_count > SOCK_EP_MAX_IOV_LIMIT ||
		msg->rma_iov_count > SOCK_EP_MAX_IOV_LIMIT)
		return -FI_EINVAL;
#endif

	if (!tx_ctx->enabled)
		return -FI_EOPBADSTATE;

	if (sock_ep->connected) {
		conn = sock_ep_lookup_conn(sock_ep);
	} else {
		conn = sock_av_lookup_addr(sock_ep, tx_ctx->av, msg->addr);
		if (!conn) {
			SOCK_LOG_ERROR("Address lookup failed\n");
			return -errno;
		}
	}

	if (!conn)
		return -FI_EAGAIN;

	SOCK_EP_SET_TX_OP_FLAGS(flags);
	if (flags & SOCK_USE_OP_FLAGS)
		flags |= tx_ctx->attr.op_flags;

	if (sock_ep_is_send_cq_low(&tx_ctx->comp, flags)) {
		SOCK_LOG_ERROR("CQ size low\n");
		return -FI_EAGAIN;
	}

	if (flags & FI_TRIGGER) {
		ret = sock_queue_rma_op(ep, msg, flags, SOCK_OP_READ);
		if (ret != 1)
			return ret;
	}

	total_len = sizeof(struct sock_op_send) +
		(msg->iov_count * sizeof(union sock_iov)) +
		(msg->rma_iov_count * sizeof(union sock_iov));

	sock_tx_ctx_start(tx_ctx);
	if (rbfdavail(&tx_ctx->rbfd) < total_len) {
		ret = -FI_EAGAIN;
		goto err;
	}

	memset(&tx_op, 0, sizeof(struct sock_op));
	tx_op.op = SOCK_OP_READ;
	tx_op.src_iov_len = msg->rma_iov_count;
	tx_op.dest_iov_len = msg->iov_count;

	sock_tx_ctx_write_op_send(tx_ctx, &tx_op, flags,
			(uintptr_t) msg->context, msg->addr,
			(uintptr_t) msg->msg_iov[0].iov_base,
			sock_ep, conn);

	src_len = 0;
	for (i = 0; i < msg->rma_iov_count; i++) {
		tx_iov.iov.addr = msg->rma_iov[i].addr;
		tx_iov.iov.key = msg->rma_iov[i].key;
		tx_iov.iov.len = msg->rma_iov[i].len;
		sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
		src_len += tx_iov.iov.len;
	}

	dst_len = 0;
	for (i = 0; i < msg->iov_count; i++) {
		tx_iov.iov.addr = (uintptr_t) msg->msg_iov[i].iov_base;
		tx_iov.iov.len = msg->msg_iov[i].iov_len;
		sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
		dst_len += tx_iov.iov.len;
	}

#if ENABLE_DEBUG
	if (dst_len != src_len) {
		SOCK_LOG_ERROR("Buffer length mismatch\n");
		ret = -FI_EINVAL;
		goto err;
	}
#endif

	sock_tx_ctx_commit(tx_ctx);
	return 0;

err:
	sock_tx_ctx_abort(tx_ctx);
	return ret;
}
Esempio n. 4
0
static ssize_t sock_ep_tx_atomic(struct fid_ep *ep, 
				  const struct fi_msg_atomic *msg, 
				  const struct fi_ioc *comparev, void **compare_desc, 
				  size_t compare_count, struct fi_ioc *resultv, 
				  void **result_desc, size_t result_count, uint64_t flags)
{
	int i, ret;
	size_t datatype_sz;
	struct sock_op tx_op;
	union sock_iov tx_iov;
	struct sock_conn *conn;
	struct sock_tx_ctx *tx_ctx;
	uint64_t total_len, src_len, dst_len;
	struct sock_ep *sock_ep;

	switch (ep->fid.fclass) {
	case FI_CLASS_EP:
		sock_ep = container_of(ep, struct sock_ep, ep);
		tx_ctx = sock_ep->tx_ctx;
		break;
	case FI_CLASS_TX_CTX:
		tx_ctx = container_of(ep, struct sock_tx_ctx, fid.ctx);
		sock_ep = tx_ctx->ep;
		break;
	default:
		SOCK_LOG_ERROR("Invalid EP type\n");
		return -FI_EINVAL;
	}

	assert(tx_ctx->enabled && 
	       msg->iov_count <= SOCK_EP_MAX_IOV_LIMIT &&
	       msg->rma_iov_count <= SOCK_EP_MAX_IOV_LIMIT);
	
	if (sock_ep->connected) {
		conn = sock_ep_lookup_conn(sock_ep);
	} else {
		conn = sock_av_lookup_addr(sock_ep, tx_ctx->av, msg->addr);
	}

	if (!conn)
		return -FI_EAGAIN;

	src_len = 0;
	datatype_sz = fi_datatype_size(msg->datatype);
	if (flags & FI_INJECT) {
		for (i=0; i< msg->iov_count; i++) {
			src_len += (msg->msg_iov[i].count * datatype_sz);
		}
		assert(src_len <= SOCK_EP_MAX_INJECT_SZ);
		total_len = src_len;
	} else {
		total_len = msg->iov_count * sizeof(union sock_iov);
	}

	total_len += (sizeof(tx_op) +
		      (msg->rma_iov_count * sizeof(union sock_iov)) +
		      (result_count * sizeof (union sock_iov)));
	
	sock_tx_ctx_start(tx_ctx);
	if (rbfdavail(&tx_ctx->rbfd) < total_len) {
		ret = -FI_EAGAIN;
		goto err;
	}

	flags |= tx_ctx->attr.op_flags;
	memset(&tx_op, 0, sizeof(tx_op));
	tx_op.op = SOCK_OP_ATOMIC;
	tx_op.dest_iov_len = msg->rma_iov_count;
	tx_op.atomic.op = msg->op;
	tx_op.atomic.datatype = msg->datatype;
	tx_op.atomic.res_iov_len = result_count;
	tx_op.atomic.cmp_iov_len = compare_count;

	if (flags & FI_INJECT)
		tx_op.src_iov_len = src_len;
	else 
		tx_op.src_iov_len = msg->iov_count;

	sock_tx_ctx_write_op_send(tx_ctx, &tx_op, flags, (uintptr_t) msg->context,
			msg->addr, (uintptr_t) msg->msg_iov[0].addr, sock_ep, conn);

	if (flags & FI_REMOTE_CQ_DATA) {
		sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(uint64_t));
	}
	
	src_len = 0;
	if (flags & FI_INJECT) {
		for (i=0; i< msg->iov_count; i++) {
			sock_tx_ctx_write(tx_ctx, msg->msg_iov[i].addr,
					  msg->msg_iov[i].count * datatype_sz);
			src_len += (msg->msg_iov[i].count * datatype_sz);
		}
	} else {
		for (i = 0; i< msg->iov_count; i++) {
			tx_iov.ioc.addr = (uintptr_t) msg->msg_iov[i].addr;
			tx_iov.ioc.count = msg->msg_iov[i].count;
			tx_iov.ioc.key = (uintptr_t) msg->desc[i];
			sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
			src_len += (tx_iov.ioc.count * datatype_sz);
		}
	}
	assert(src_len <= SOCK_EP_MAX_ATOMIC_SZ);

	dst_len = 0;
	for (i = 0; i< msg->rma_iov_count; i++) {
		tx_iov.ioc.addr = msg->rma_iov[i].addr;
		tx_iov.ioc.key = msg->rma_iov[i].key;
		tx_iov.ioc.count = msg->rma_iov[i].count;
		sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
		dst_len += (tx_iov.ioc.count * datatype_sz);
	}
	
	if (dst_len != src_len) {
		SOCK_LOG_ERROR("Buffer length mismatch\n");
		ret = -FI_EINVAL;
		goto err;
	}

	dst_len = 0;
	for (i = 0; i< result_count; i++) {
		tx_iov.ioc.addr = (uintptr_t) resultv[i].addr;
		tx_iov.ioc.count = resultv[i].count;
		sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
		dst_len += (tx_iov.ioc.count * datatype_sz);
	}

	if (result_count && (dst_len != src_len)) {
		SOCK_LOG_ERROR("Buffer length mismatch\n");
		ret = -FI_EINVAL;
		goto err;
	}

	dst_len = 0;
	for (i = 0; i< compare_count; i++) {
		tx_iov.ioc.addr = (uintptr_t) comparev[i].addr;
		tx_iov.ioc.count = comparev[i].count;
		sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
		dst_len += (tx_iov.ioc.count * datatype_sz);
	}

	if (compare_count && (dst_len != src_len)) {
		SOCK_LOG_ERROR("Buffer length mismatch\n");
		ret = -FI_EINVAL;
		goto err;
	}
	
	sock_tx_ctx_commit(tx_ctx);
	return 0;

err:
	SOCK_LOG_INFO("Not enough space for TX entry, try again\n");
	sock_tx_ctx_abort(tx_ctx);
	return ret;
}
Esempio n. 5
0
struct sock_conn *sock_ep_connect(struct sock_ep *ep, fi_addr_t index)
{
	int conn_fd = -1, ret;
	int do_retry = sock_conn_retry;
	struct sock_conn *conn, *new_conn;
	uint16_t idx;
	struct sockaddr_in *addr;
	socklen_t lon;
	int valopt = 0;
	struct pollfd poll_fd;

	if (ep->ep_type == FI_EP_MSG) {
		idx = 0;
		addr = ep->dest_addr;
	} else {
		idx = index & ep->av->mask;
		addr = (struct sockaddr_in *)&ep->av->table[idx].addr;
	}

do_connect:
	fastlock_acquire(&ep->cmap.lock);
	conn = sock_ep_lookup_conn(ep, index, addr);
	fastlock_release(&ep->cmap.lock);

	if (conn != SOCK_CM_CONN_IN_PROGRESS)
		return conn;

	conn_fd = socket(AF_INET, SOCK_STREAM, 0);
	if (conn_fd == -1) {
		SOCK_LOG_ERROR("failed to create conn_fd, errno: %d\n", errno);
		errno = FI_EOTHER;
		return NULL;
	}

	ret = fd_set_nonblock(conn_fd);
	if (ret) {
		SOCK_LOG_ERROR("failed to set conn_fd nonblocking, errno: %d\n", errno);
		errno = FI_EOTHER;
		close(conn_fd);
                return NULL;
	}

	SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr->sin_addr),
			ntohs(addr->sin_port));
	SOCK_LOG_DBG("Connecting using address:%s\n",
			inet_ntoa(ep->src_addr->sin_addr));

	ret = connect(conn_fd, (struct sockaddr *) addr, sizeof *addr);
	if (ret < 0) {
		if (errno == EINPROGRESS) {
			poll_fd.fd = conn_fd;
			poll_fd.events = POLLOUT;

			ret = poll(&poll_fd, 1, 15 * 1000);
			if (ret < 0) {
				SOCK_LOG_DBG("poll failed\n");
				goto retry;
			}

			lon = sizeof(int);
			ret = getsockopt(conn_fd, SOL_SOCKET, SO_ERROR, (void*)(&valopt), &lon);
			if (ret < 0) {
				SOCK_LOG_DBG("getsockopt failed: %d, %d\n", ret, conn_fd);
				goto retry;
			}

			if (valopt) {
				SOCK_LOG_DBG("Error in connection() %d - %s - %d\n", valopt, strerror(valopt), conn_fd);
				SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr->sin_addr),
						ntohs(addr->sin_port));
				SOCK_LOG_DBG("Connecting using address:%s\n",
				inet_ntoa(ep->src_addr->sin_addr));
				goto retry;
			}
			goto out;
		} else {
			SOCK_LOG_DBG("Timeout or error() - %s: %d\n", strerror(errno), conn_fd);
			SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr->sin_addr),
					ntohs(addr->sin_port));
			SOCK_LOG_DBG("Connecting using address:%s\n",
					inet_ntoa(ep->src_addr->sin_addr));
			goto retry;
		}
	} else {
		goto out;
	}

retry:
	do_retry--;
	sleep(10);
	if (!do_retry)
		goto err;

	if (conn_fd != -1) {
		close(conn_fd);
		conn_fd = -1;
	}

	SOCK_LOG_ERROR("Connect error, retrying - %s - %d\n", strerror(errno), conn_fd);
	SOCK_LOG_DBG("Connecting to: %s:%d\n", inet_ntoa(addr->sin_addr),
			ntohs(addr->sin_port));
	SOCK_LOG_DBG("Connecting using address:%s\n",
			inet_ntoa(ep->src_addr->sin_addr));
        goto do_connect;

out:
	fastlock_acquire(&ep->cmap.lock);
	new_conn = sock_conn_map_insert(ep, addr, conn_fd, 0);
	new_conn->av_index = (ep->ep_type == FI_EP_MSG) ? FI_ADDR_NOTAVAIL : (fi_addr_t) idx;
	conn = idm_lookup(&ep->av_idm, index);
	if (conn == SOCK_CM_CONN_IN_PROGRESS) {
		idm_set(&ep->av_idm, index, new_conn);
		conn = new_conn;
	}
	fastlock_release(&ep->cmap.lock);
	return conn;

err:
	close(conn_fd);
	return NULL;
}