Beispiel #1
0
void Acceptor::listen(std::string const& hostname, std::string const& port) {
  auto res = create_addr_info(hostname, port);
  ibv_qp_init_attr init_attr;
  memset(&init_attr, 0, sizeof(init_attr));
  init_attr.cap.max_send_wr = m_credits;
  init_attr.cap.max_recv_wr = m_credits;
  init_attr.cap.max_send_sge = 1;
  init_attr.cap.max_recv_sge = 1;
  init_attr.cap.max_inline_data = 0;
  init_attr.sq_sig_all = 1;
  init_attr.qp_type = IBV_QPT_RC;
  int ret = rdma_create_ep(&m_cm_id, res, NULL, &init_attr);
  destroy_addr_info(res);
  if (ret) {
    rdma_destroy_ep(m_cm_id);
    throw exception::acceptor::generic_error(
        "Error on rdma_create_ep: " + std::string(strerror(errno)));
  }

  if (rdma_listen(m_cm_id, 128)) {
    rdma_destroy_ep(m_cm_id);
    throw exception::acceptor::generic_error(
        "Error on rdma_listen: " + std::string(strerror(errno)));
  }
}
Beispiel #2
0
std::unique_ptr<Socket> Acceptor::accept() {
  rdma_cm_id* new_cm_id;
  if (rdma_get_request(m_cm_id, &new_cm_id)) {
    throw exception::acceptor::generic_error(
        "Error on rdma_get_request: " + std::string(strerror(errno)));
  }

  rdma_conn_param conn_param;
  memset(&conn_param, 0, sizeof(rdma_conn_param));
  conn_param.rnr_retry_count = m_rnr_retry_count;
  if (rdma_accept(new_cm_id, &conn_param)) {
    rdma_destroy_ep(new_cm_id);
    throw exception::acceptor::generic_error(
        "Error on rdma_accept: " + std::string(strerror(errno)));
  }

  ibv_qp_attr attr;
  memset(&attr, 0, sizeof(ibv_qp_attr));
  attr.min_rnr_timer = m_min_rtr_timer;
  int flags = IBV_QP_MIN_RNR_TIMER;
  if (ibv_modify_qp(new_cm_id->qp, &attr, flags)) {
    rdma_destroy_ep(new_cm_id);
    throw exception::acceptor::generic_error(
        "Error on ibv_modify_qp: " + std::string(strerror(errno)));
  }

  std::unique_ptr<Socket> socket_ptr(new Socket(new_cm_id, m_credits));
  return socket_ptr;
}
int fi_ibv_getinfo(uint32_t version, const char *node, const char *service,
		   uint64_t flags, struct fi_info *hints, struct fi_info **info)
{
	struct rdma_cm_id *id;
	struct rdma_addrinfo *rai;
	int ret;

	ret = fi_ibv_init_info();
	if (ret)
		goto out;

	ret = fi_ibv_create_ep(node, service, flags, hints, &rai, &id);
	if (ret)
		goto out;

	if (id->verbs) {
		ret = fi_ibv_get_matching_info(ibv_get_device_name(id->verbs->device),
					       hints, rai, info);
	} else {
		ret = fi_ibv_get_matching_info(NULL, hints, rai, info);
	}

	rdma_destroy_ep(id);
	rdma_freeaddrinfo(rai);
out:
	if (!ret || ret == -FI_ENOMEM)
		return ret;
	else
		return -FI_ENODATA;
}
static void fi_ibv_free_msg_ep(struct fi_ibv_msg_ep *ep)
{
	if (ep->id)
		rdma_destroy_ep(ep->id);
	fi_freeinfo(ep->info);
	free(ep);
}
Beispiel #5
0
RDMACMSocket::~RDMACMSocket() {
    
    rdma_disconnect(this->client_id);
    rdma_dereg_mr(this->verbs_mr);
    rdma_destroy_ep(this->client_id);
    this->verbs_buf.free();
}
Beispiel #6
0
void RDMACMSocket::post_recv(const Buffer& buf) {
    
    if (rdma_post_recv(this->client_id, buf.addr, buf.addr, buf.size, this->verbs_mr) < 0) {
	rdma_dereg_mr(this->verbs_mr);
	this->verbs_buf.free();
	rdma_destroy_ep(this->client_id);
	perror("rdma_post_recv");
	exit(1);
    }
}
Beispiel #7
0
static int __fi_pep_close(fid_t fid)
{
	struct __fid_pep *pep;

	pep = container_of(fid, struct __fid_pep, pep_fid.fid);
	if (pep->id)
		rdma_destroy_ep(pep->id);

	free(pep);
	return 0;
}
Beispiel #8
0
void
kiro_server_stop (KiroServer *self)
{
    g_return_if_fail (self != NULL);
    KiroServerPrivate *priv = KIRO_SERVER_GET_PRIVATE (self);

    if (!priv->base)
        return;

    //Shut down event listening
    priv->close_signal = TRUE;
    g_debug ("Event handling stopped");

    g_list_foreach (priv->clients, disconnect_client, NULL);
    g_list_free (priv->clients);

    // Stop the main loop and clear its memory
    g_main_loop_quit (priv->main_loop);
    g_main_loop_unref (priv->main_loop);
    priv->main_loop = NULL;

    // Ask the main thread to join (It probably already has, but we do it
    // anyways. Just in case!)
    g_thread_join (priv->main_thread);
    priv->main_thread = NULL;

    // We don't need the connection management IO channel container any more.
    // Unref and thus free it.
    g_io_channel_unref (priv->conn_ec);
    priv->conn_ec = NULL;
    priv->close_signal = FALSE;

    // kiro_destroy_connection would try to call rdma_disconnect on the given
    // connection. But the server never 'connects' to anywhere, so this would
    // cause a crash. We need to destroy the enpoint manually without disconnect
    struct kiro_connection_context *ctx = (struct kiro_connection_context *) (priv->base->context);
    kiro_destroy_connection_context (&ctx);
    rdma_destroy_ep (priv->base);
    priv->base = NULL;

    rdma_destroy_event_channel (priv->ec);
    priv->ec = NULL;
    g_message ("Server stopped successfully");
}
int fi_ibv_create_ep(const char *node, const char *service,
		     uint64_t flags, const struct fi_info *hints,
		     struct rdma_addrinfo **rai, struct rdma_cm_id **id)
{
	struct rdma_addrinfo *_rai;
	struct sockaddr *local_addr;
	int ret;

	ret = fi_ibv_get_rdma_rai(node, service, flags, hints, &_rai);
	if (ret) {
		return ret;
	}

	ret = rdma_create_ep(id, _rai, NULL, NULL);
	if (ret) {
		VERBS_INFO_ERRNO(FI_LOG_FABRIC, "rdma_create_ep", errno);
		ret = -errno;
		goto err1;
	}
	if (rai && !_rai->ai_src_addr) {
		local_addr = rdma_get_local_addr(*id);
		_rai->ai_src_len = fi_ibv_sockaddr_len(local_addr);
		if (!(_rai->ai_src_addr = malloc(_rai->ai_src_len))) {
			ret = -FI_ENOMEM;
			goto err2;
		}
		memcpy(_rai->ai_src_addr, local_addr, _rai->ai_src_len);
	}

	if (rai) {
		*rai = _rai;
	} else {
		rdma_freeaddrinfo(_rai);
	}

	return ret;
err2:
	rdma_destroy_ep(*id);
err1:
	rdma_freeaddrinfo(_rai);

	return ret;
}
Beispiel #10
0
RDMACMSocket* RDMACMSocket::connect(const HostAndPort& hp) {
    
    struct rdma_cm_id* client_id = NULL; 

    struct rdma_addrinfo hints, *res;
    memset(&hints, 0, sizeof(hints));
    hints.ai_port_space = RDMA_PS_TCP;
    res = NULL;

    char* hostname = const_cast<char*>(hp.hostname);
    char* port_str = const_cast<char*>(hp.port_str);
    
    if (rdma_getaddrinfo(hostname, port_str, &hints, &res) < 0) {
	perror("rdma_getaddrinfo");
	exit(1);
    }
    
    struct ibv_qp_init_attr attr;
    memset(&attr, 0, sizeof(attr));
    attr.cap.max_send_wr = PACKET_WINDOW_SIZE; 
    attr.cap.max_recv_wr = PACKET_WINDOW_SIZE; 
    attr.cap.max_send_sge = 1;
    attr.cap.max_recv_sge = 1;
    attr.cap.max_inline_data = 0;
    attr.sq_sig_all = 1;
    
    if (rdma_create_ep(&client_id, res, NULL, &attr) < 0) {
	rdma_freeaddrinfo(res);
	perror("rdma_create_ep");
	exit(1);
    }

    rdma_freeaddrinfo(res);
    
    if (rdma_connect(client_id, NULL) < 0) {
        rdma_destroy_ep(client_id);
	perror("rdma_connect");
	exit(1);
    }
    
    return new RDMACMSocket(client_id);
}
Beispiel #11
0
static int fi_ibv_msg_ep_setname(fid_t ep_fid, void *addr, size_t addrlen)
{
	struct fi_ibv_msg_ep *ep;
	void *save_addr;
	struct rdma_cm_id *id;
	int ret;

	ep = container_of(ep_fid, struct fi_ibv_msg_ep, ep_fid);

	if (addrlen != ep->info->src_addrlen) {
		FI_INFO(&fi_ibv_prov, FI_LOG_EP_CTRL,"addrlen expected: %d, got: %d.\n",
				ep->info->src_addrlen, addrlen);
		return -FI_EINVAL;
	}

	save_addr = ep->info->src_addr;

	ep->info->src_addr = malloc(ep->info->src_addrlen);
	if (!ep->info->src_addr) {
		ret = -FI_ENOMEM;
		goto err1;
	}

	memcpy(ep->info->src_addr, addr, ep->info->src_addrlen);

	ret = fi_ibv_create_ep(NULL, NULL, 0, ep->info, NULL, &id);
	if (ret)
		goto err2;

	if (ep->id)
		rdma_destroy_ep(ep->id);

	ep->id = id;
	free(save_addr);

	return 0;
err2:
	free(ep->info->src_addr);
err1:
	ep->info->src_addr = save_addr;
	return ret;
}
Beispiel #12
0
void RDMACMSocket::setup_verbs_buf() {
        
    this->verbs_buf = Buffer::allocate(PACKET_SIZE * PACKET_WINDOW_SIZE * 2);

    this->verbs_mr = rdma_reg_msgs(this->client_id, this->verbs_buf.addr, this->verbs_buf.size);
    if (this->verbs_mr == NULL) {
	this->verbs_buf.free();
        rdma_destroy_ep(this->client_id);
	perror("rdma_reg_msgs");
	exit(1);
    }

    char* send_buf_begin = this->verbs_buf.addr + PACKET_SIZE * PACKET_WINDOW_SIZE;    
    for (int i = 0; i < PACKET_WINDOW_SIZE; ++i) {
	Buffer recv_buf(this->verbs_buf.addr + i * PACKET_SIZE, PACKET_SIZE);
	post_recv(recv_buf);
	Buffer send_buf(send_buf_begin + i * PACKET_SIZE, PACKET_SIZE);
       	send_bufs.push_back(send_buf);
    }
}
Beispiel #13
0
static int fi_ibv_copy_ifaddr(const char *name, const char *service, uint64_t flags,
		struct fi_info *info)
{
	struct rdma_addrinfo *rai;
	struct fi_info *fi;
	struct rdma_cm_id *id;
	int ret;

	ret = fi_ibv_get_rdma_rai(name, service, flags, NULL, &rai);
	if (ret) {
		FI_WARN(&fi_ibv_prov, FI_LOG_FABRIC,
				"rdma_getaddrinfo failed for name:%s\n", name);
		return ret;
	}
	ret = rdma_create_ep(&id, rai, NULL, NULL);
	if (!ret) {
		for (fi = info; fi; fi = fi->next)
			if (!strncmp(id->verbs->device->name, fi->domain_attr->name,
						strlen(id->verbs->device->name)))
				break;
		if (!fi) {
			FI_WARN(&fi_ibv_prov, FI_LOG_FABRIC,
					"No matching fi_info for device: "
					"%s with address: %s\n",
					id->verbs->device->name, name);
		} else {
			if (fi->src_addr) {
				free(fi->src_addr);
				fi->src_addr = NULL;
			}
			fi_ibv_rai_to_fi(rai, fi);
		}
		rdma_destroy_ep(id);
	}
	rdma_freeaddrinfo(rai);
	return 0;
}
Beispiel #14
0
void fi_ibv_destroy_ep(struct rdma_addrinfo *rai, struct rdma_cm_id **id)
{
	rdma_freeaddrinfo(rai);
	rdma_destroy_ep(*id);
}
Beispiel #15
0
/* Builds a list of interfaces that correspond to active verbs devices */
static int fi_ibv_getifaddrs(struct dlist_entry *verbs_devs)
{
	struct ifaddrs *ifaddr, *ifa;
	char name[INET6_ADDRSTRLEN];
	struct rdma_addrinfo *rai;
	struct rdma_cm_id *id;
	const char *ret_ptr;
	int ret, num_verbs_ifs = 0;

	char *iface = NULL;
	size_t iface_len = 0;
	int exact_match = 0;

	ret = getifaddrs(&ifaddr);
	if (ret) {
	       VERBS_WARN(FI_LOG_FABRIC,
			  "Unable to get interface addresses\n");
		return ret;
	}

	/* select best iface name based on user's input */
	if (fi_param_get_str(&fi_ibv_prov, "iface", &iface) == FI_SUCCESS) {
		iface_len = strlen(iface);
		if (iface_len > IFNAMSIZ) {
			VERBS_INFO(FI_LOG_EP_CTRL,
				   "Too long iface name: %s, max: %d\n",
				   iface, IFNAMSIZ);
			return -FI_EINVAL;
		}
		for (ifa = ifaddr; ifa && !exact_match; ifa = ifa->ifa_next)
			exact_match = !strcmp(ifa->ifa_name, iface);
	}

	for (ifa = ifaddr; ifa; ifa = ifa->ifa_next) {
		if (!ifa->ifa_addr || !(ifa->ifa_flags & IFF_UP) ||
				!strcmp(ifa->ifa_name, "lo"))
			continue;

		if(iface) {
			if(exact_match) {
				if(strcmp(ifa->ifa_name, iface))
					continue;
			} else {
				if(strncmp(ifa->ifa_name, iface, iface_len))
					continue;
			}
		}

		switch (ifa->ifa_addr->sa_family) {
		case AF_INET:
			ret_ptr = inet_ntop(AF_INET, &ofi_sin_addr(ifa->ifa_addr),
				name, INET6_ADDRSTRLEN);
			break;
		case AF_INET6:
			ret_ptr = inet_ntop(AF_INET6, &ofi_sin6_addr(ifa->ifa_addr),
				name, INET6_ADDRSTRLEN);
			break;
		default:
			continue;
		}
		if (!ret_ptr) {
			VERBS_WARN(FI_LOG_FABRIC,
				   "inet_ntop failed: %s(%d)\n",
				   strerror(errno), errno);
			goto err1;
		}

		ret = fi_ibv_create_ep(name, NULL, FI_NUMERICHOST | FI_SOURCE,
				NULL, &rai, &id);
		if (ret)
			continue;

		ret = fi_ibv_add_rai(verbs_devs, id, rai);
		if (ret)
			goto err2;

		VERBS_DBG(FI_LOG_FABRIC, "Found active interface for verbs device: "
			  "%s with address: %s\n",
			  ibv_get_device_name(id->verbs->device), name);

		rdma_destroy_ep(id);

		num_verbs_ifs++;
	}
	freeifaddrs(ifaddr);
	return num_verbs_ifs ? 0 : -FI_ENODATA;
err2:
	rdma_destroy_ep(id);
err1:
	fi_ibv_verbs_devs_free(verbs_devs);
	freeifaddrs(ifaddr);
	return ret;
}
Beispiel #16
0
static int run(void)
{
    struct rdma_addrinfo hints, *res;
    struct ibv_qp_init_attr attr;
    struct ibv_wc wc;
    int ret;

    memset(&hints, 0, sizeof hints);
    hints.ai_port_space = RDMA_PS_TCP;
    ret = rdma_getaddrinfo(server, port, &hints, &res);
    if (ret) {
        printf("rdma_getaddrinfo %d\n", errno);
        return ret;
    }

    memset(&attr, 0, sizeof attr);
    attr.cap.max_send_wr = attr.cap.max_recv_wr = 1;
    attr.cap.max_send_sge = attr.cap.max_recv_sge = 1;
    attr.cap.max_inline_data = 16;
    attr.qp_context = id;
    attr.sq_sig_all = 1;
    ret = rdma_create_ep(&id, res, NULL, &attr);
    rdma_freeaddrinfo(res);
    if (ret) {
        printf("rdma_create_ep %d\n", errno);
        return ret;
    }

    mr = rdma_reg_msgs(id, recv_msg, 16);
    if (!mr) {
        printf("rdma_reg_msgs %d\n", errno);
        return ret;
    }

    ret = rdma_post_recv(id, NULL, recv_msg, 16, mr);
    if (ret) {
        printf("rdma_post_recv %d\n", errno);
        return ret;
    }

    ret = rdma_connect(id, NULL);
    if (ret) {
        printf("rdma_connect %d\n", errno);
        return ret;
    }

    s = get_dtime();
    ret = rdma_post_send(id, NULL, send_msg, 16, NULL, IBV_SEND_INLINE);
    if (ret) {
        printf("rdma_post_send %d\n", errno);
        return ret;
    }
    e = get_dtime();
    ret = rdma_get_recv_comp(id, &wc);
    if (ret <= 0) {
        printf("rdma_get_recv_comp %d\n", ret);
        return ret;
    }

    printf("time %f\n", e - s);

    rdma_disconnect(id);
    rdma_dereg_mr(mr);
    rdma_destroy_ep(id);
    return 0;
}
Beispiel #17
0
static int xrc_test(void)
{
	struct rdma_cm_id *conn_id, *lookup_id;
	struct ibv_qp_init_attr attr;
	struct rdma_conn_param param;
	struct rdma_cm_event *event;
	struct ibv_wc wc;
	int ret;

	conn_id = xrc_listen_recv();
	if (!conn_id)
		return -1;

	ret = xrc_create_srq_listen(rdma_get_local_addr(conn_id),
				    sizeof(struct sockaddr_storage));
	if (ret)
		return -1;

	memset(&attr, 0, sizeof attr);
	attr.qp_type = IBV_QPT_XRC_RECV;
	attr.ext.xrc_recv.xrcd = srq_id->srq->ext.xrc.xrcd;
	ret = rdma_create_qp(conn_id, NULL, &attr);
	if (ret) {
		printf("Unable to create xrc recv qp %d\n", errno);
		return ret;
	}

	ret = rdma_accept(conn_id, NULL);
	if (ret) {
		printf("rdma_accept failed for xrc recv qp %d\n", errno);
		return ret;
	}

	ret = rdma_get_request(srq_id, &lookup_id);
	if (ret) {
		printf("rdma_get_request %d\n", errno);
		return ret;
	}

	mr = rdma_reg_msgs(srq_id, recv_msg, sizeof recv_msg);
	if (!mr) {
		printf("ibv_reg_msgs %d\n", errno);
		return ret;
	}

	ret = rdma_post_recv(srq_id, NULL, recv_msg, sizeof recv_msg, mr);
	if (ret) {
		printf("rdma_post_recv %d\n", errno);
		return ret;
	}

	memset(&param, 0, sizeof param);
	param.qp_num = srq_id->srq->ext.xrc.srq_num;
	ret = rdma_accept(lookup_id, &param);
	if (ret) {
		printf("rdma_accept failed for srqn lookup %d\n", errno);
		return ret;
	}

	rdma_destroy_id(lookup_id);

	ret = rdma_get_recv_comp(srq_id, &wc);
	if (ret <= 0) {
		printf("rdma_get_recv_comp %d\n", ret);
		return ret;
	}

	ret = rdma_get_cm_event(conn_id->channel, &event);
	if (ret || event->event != RDMA_CM_EVENT_DISCONNECTED) {
		printf("Failed to get disconnect event\n");
		return -1;
	}

	rdma_ack_cm_event(event);
	rdma_disconnect(conn_id);
	rdma_destroy_ep(conn_id);
	rdma_dereg_mr(mr);
	rdma_destroy_ep(srq_id);
	rdma_destroy_ep(listen_id);
	return 0;
}
Beispiel #18
0
static int rc_test(void)
{
	struct rdma_addrinfo hints, *res;
	struct ibv_qp_init_attr attr;
	struct ibv_wc wc;
	int ret;

	memset(&hints, 0, sizeof hints);
	hints.ai_flags = RAI_PASSIVE;
	hints.ai_port_space = RDMA_PS_TCP;
	ret = rdma_getaddrinfo(NULL, port, &hints, &res);
	if (ret) {
		printf("rdma_getaddrinfo %d\n", errno);
		return ret;
	}

	memset(&attr, 0, sizeof attr);
	attr.cap.max_send_wr = attr.cap.max_recv_wr = 1;
	attr.cap.max_send_sge = attr.cap.max_recv_sge = 1;
	attr.cap.max_inline_data = sizeof send_msg;
	attr.sq_sig_all = 1;
	ret = rdma_create_ep(&listen_id, res, NULL, &attr);
	rdma_freeaddrinfo(res);
	if (ret) {
		printf("rdma_create_ep %d\n", errno);
		return ret;
	}

	ret = rdma_listen(listen_id, 0);
	if (ret) {
		printf("rdma_listen %d\n", errno);
		return ret;
	}

	ret = rdma_get_request(listen_id, &id);
	if (ret) {
		printf("rdma_get_request %d\n", errno);
		return ret;
	}

	mr = rdma_reg_msgs(id, recv_msg, sizeof recv_msg);
	if (!mr) {
		printf("rdma_reg_msgs %d\n", errno);
		return ret;
	}

	ret = rdma_post_recv(id, NULL, recv_msg, sizeof recv_msg, mr);
	if (ret) {
		printf("rdma_post_recv %d\n", errno);
		return ret;
	}

	ret = rdma_accept(id, NULL);
	if (ret) {
		printf("rdma_accept %d\n", errno);
		return ret;
	}

	ret = rdma_get_recv_comp(id, &wc);
	if (ret <= 0) {
		printf("rdma_get_recv_comp %d\n", ret);
		return ret;
	}

	ret = rdma_post_send(id, NULL, send_msg, sizeof send_msg, NULL, IBV_SEND_INLINE);
	if (ret) {
		printf("rdma_post_send %d\n", errno);
		return ret;
	}

	ret = rdma_get_send_comp(id, &wc);
	if (ret <= 0) {
		printf("rdma_get_send_comp %d\n", ret);
		return ret;
	}

	rdma_disconnect(id);
	rdma_dereg_mr(mr);
	rdma_destroy_ep(id);
	rdma_destroy_ep(listen_id);
	return 0;
}
Beispiel #19
0
static gboolean
process_rdma_event (GIOChannel *source, GIOCondition condition, gpointer data)
{
    // Right now, we don't need 'source'
    // Tell the compiler to ignore it by (void)-ing it
    (void) source;

    if (!G_TRYLOCK (rdma_handling)) {
        g_debug ("RDMA handling will wait for the next dispatch.");
        return TRUE;
    }

    g_debug ("Got message on condition: %i", condition);
    void *payload = ((GList *)data)->data;
    struct kiro_client_connection *cc = (struct kiro_client_connection *)payload;
    struct ibv_wc wc;

    gint num_comp = ibv_poll_cq (cc->conn->recv_cq, 1, &wc);
    if (!num_comp) {
        g_critical ("RDMA event handling was triggered, but there is no completion on the queue");
        goto end_rmda_eh;
    }
    if (num_comp < 0) {
        g_critical ("Failure getting receive completion event from the queue: %s", strerror (errno));
        goto end_rmda_eh;
    }
    g_debug ("Got %i receive events from the queue", num_comp);
    void *cq_ctx;
    struct ibv_cq *cq;
    int err = ibv_get_cq_event (cc->conn->recv_cq_channel, &cq, &cq_ctx);
    if (!err)
        ibv_ack_cq_events (cq, 1);

    struct kiro_connection_context *ctx = (struct kiro_connection_context *)cc->conn->context;
    guint type = ((struct kiro_ctrl_msg *)ctx->cf_mr_recv->mem)->msg_type;
    g_debug ("Received a message from Client %u of type %u", cc->id, type);

    switch (type) {
        case KIRO_PING:
        {
            struct kiro_ctrl_msg *msg = (struct kiro_ctrl_msg *) (ctx->cf_mr_send->mem);
            msg->msg_type = KIRO_PONG;

            if (!send_msg (cc->conn, ctx->cf_mr_send)) {
                g_warning ("Failure while trying to post PONG send: %s", strerror (errno));
                goto done;
            }
            break;
        }
        case KIRO_ACK_RDMA:
        {
            g_debug ("ACK received");
            if (G_TRYLOCK (realloc_timeout)) {
                g_debug ("Client %i has ACKed the reallocation request", cc->id);
                GList *client = g_list_find (realloc_list, (gpointer)cc);
                if (client) {
                    realloc_list = g_list_remove_link (realloc_list, client);
                    if (cc->backup_mri->mr)
                        ibv_dereg_mr (cc->backup_mri->mr);
                    g_free (cc->backup_mri);
                    cc->backup_mri = NULL;
                    g_debug ("Client %i removed from realloc_list", cc->id);
                }
                G_UNLOCK (realloc_timeout);
            }
            break;
        }
        default:
            g_debug ("Message Type is unknow. Ignoring...");
    }

done:
    //Post a generic receive in order to stay responsive to any messages from
    //the client
    if (rdma_post_recv (cc->conn, cc->conn, ctx->cf_mr_recv->mem, ctx->cf_mr_recv->size, ctx->cf_mr_recv->mr)) {
        //TODO: Connection teardown in an event handler routine? Not a good
        //idea...
        g_critical ("Posting generic receive for event handling failed: %s", strerror (errno));
        kiro_destroy_connection_context (&ctx);
        rdma_destroy_ep (cc->conn);
        goto end_rmda_eh;
    }

    ibv_req_notify_cq (cc->conn->recv_cq, 0); // Make the respective Queue push events onto the channel

    g_debug ("Finished RDMA event handling");

end_rmda_eh:
    G_UNLOCK (rdma_handling);
    return TRUE;
}
Beispiel #20
0
int
kiro_server_start (KiroServer *self, const char *address, const char *port, void *mem, size_t mem_size)
{
    g_return_val_if_fail (self != NULL, -1);
    KiroServerPrivate *priv = KIRO_SERVER_GET_PRIVATE (self);

    if (priv->base) {
        g_debug ("Server already started.");
        return -1;
    }

    if (!mem || mem_size == 0) {
        g_warning ("Invalid memory given to provide.");
        return -1;
    }

    struct rdma_addrinfo hints, *res_addrinfo;
    memset (&hints, 0, sizeof (hints));
    hints.ai_port_space = RDMA_PS_IB;
    hints.ai_flags = RAI_PASSIVE;

    char *addr_c = g_strdup (address);
    char *port_c = g_strdup (port);

    int rtn = rdma_getaddrinfo (addr_c, port_c, &hints, &res_addrinfo);
    g_free (addr_c);
    g_free (port_c);

    if (rtn) {
        g_critical ("Failed to create address information: %s", strerror (errno));
        return -1;
    }

    struct ibv_qp_init_attr qp_attr;
    memset (&qp_attr, 0, sizeof (qp_attr));
    qp_attr.cap.max_send_wr = 10;
    qp_attr.cap.max_recv_wr = 10;
    qp_attr.cap.max_send_sge = 1;
    qp_attr.cap.max_recv_sge = 1;
    qp_attr.qp_context = priv->base;
    qp_attr.sq_sig_all = 1;

    if (rdma_create_ep (& (priv->base), res_addrinfo, NULL, &qp_attr)) {
        g_critical ("Endpoint creation failed: %s", strerror (errno));
        g_free (res_addrinfo);
        return -1;
    }
    g_free (res_addrinfo); // No longer needed

    g_debug ("Endpoint created");
    char *addr_local = NULL;
    struct sockaddr *src_addr = rdma_get_local_addr (priv->base);

    if (!src_addr) {
        addr_local = "NONE";
    }
    else {
        addr_local = inet_ntoa (((struct sockaddr_in *)src_addr)->sin_addr);
        /*
        if(src_addr->sa_family == AF_INET)
            addr_local = &(((struct sockaddr_in*)src_addr)->sin_addr);
        else
            addr_local = &(((struct sockaddr_in6*)src_addr)->sin6_addr);
        */
    }

    g_message ("Server bound to address %s:%s", addr_local, port);

    if (rdma_listen (priv->base, 0)) {
        g_critical ("Failed to put server into listening state: %s", strerror (errno));
        rdma_destroy_ep (priv->base);
        return -1;
    }

    priv->mem = mem;
    priv->mem_size = mem_size;
    priv->ec = rdma_create_event_channel();

    if (rdma_migrate_id (priv->base, priv->ec)) {
        g_critical ("Was unable to migrate connection to new Event Channel: %s", strerror (errno));
        rdma_destroy_ep (priv->base);
        return -1;
    }

    priv->main_loop = g_main_loop_new (NULL, FALSE);
    priv->conn_ec = g_io_channel_unix_new (priv->ec->fd);
    g_io_add_watch (priv->conn_ec, G_IO_IN | G_IO_PRI, process_cm_event, (gpointer)priv);
    priv->main_thread = g_thread_new ("KIRO Server main loop", start_server_main_loop, priv->main_loop);

    // We gave control to the main_loop (with add_watch) and don't need our ref
    // any longer
    g_io_channel_unref (priv->conn_ec);


    g_message ("Enpoint listening");
    return 0;
}
Beispiel #21
0
Acceptor::~Acceptor() {
  rdma_destroy_ep(m_cm_id);
}
Beispiel #22
0
static int
fi_ibv_mr_reg(struct fid *fid, const void *buf, size_t len,
	   uint64_t access, uint64_t offset, uint64_t requested_key,
	   uint64_t flags, struct fid_mr **mr, void *context)
{
	struct fi_ibv_mem_desc *md;
	int fi_ibv_access = 0;
	struct fid_domain *domain;

	if (flags)
		return -FI_EBADFLAGS;

	if (fid->fclass != FI_CLASS_DOMAIN) {
		return -FI_EINVAL;
	}
	domain = container_of(fid, struct fid_domain, fid);

	md = calloc(1, sizeof *md);
	if (!md)
		return -FI_ENOMEM;

	md->domain = container_of(domain, struct fi_ibv_domain, domain_fid);
	md->mr_fid.fid.fclass = FI_CLASS_MR;
	md->mr_fid.fid.context = context;
	md->mr_fid.fid.ops = &fi_ibv_mr_ops;

	/* Enable local write access by default for FI_EP_RDM which hides local
	 * registration requirements. This allows to avoid buffering or double
	 * registration */
	if (!(md->domain->info->caps & FI_LOCAL_MR))
		fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE;

	/* Local read access to an MR is enabled by default in verbs */

	if (access & FI_RECV)
		fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE;

	/* iWARP spec requires Remote Write access for an MR that is used
	 * as a data sink for a Remote Read */
	if (access & FI_READ) {
		fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE;
		if (md->domain->verbs->device->transport_type == IBV_TRANSPORT_IWARP)
			fi_ibv_access |= IBV_ACCESS_REMOTE_WRITE;
	}

	if (access & FI_WRITE)
		fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE;

	if (access & FI_REMOTE_READ)
		fi_ibv_access |= IBV_ACCESS_REMOTE_READ;

	/* Verbs requires Local Write access too for Remote Write access */
	if (access & FI_REMOTE_WRITE)
		fi_ibv_access |= IBV_ACCESS_LOCAL_WRITE |
			IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_ATOMIC;

	md->mr = ibv_reg_mr(md->domain->pd, (void *) buf, len, fi_ibv_access);
	if (!md->mr)
		goto err;

	md->mr_fid.mem_desc = (void *) (uintptr_t) md->mr->lkey;
	md->mr_fid.key = md->mr->rkey;
	*mr = &md->mr_fid;
	if(md->domain->eq && (md->domain->eq_flags & FI_REG_MR)) {
		struct fi_eq_entry entry = {
			.fid = &md->mr_fid.fid,
			.context = context
		};
		fi_ibv_eq_write_event(md->domain->eq, FI_MR_COMPLETE,
			 	      &entry, sizeof(entry));
	}
	return 0;

err:
	free(md);
	return -errno;
}

static int fi_ibv_mr_regv(struct fid *fid, const struct iovec * iov,
		size_t count, uint64_t access, uint64_t offset, uint64_t requested_key,
		uint64_t flags, struct fid_mr **mr, void *context)
{
	if (count > VERBS_MR_IOV_LIMIT) {
		VERBS_WARN(FI_LOG_FABRIC,
			   "iov count > %d not supported\n",
			   VERBS_MR_IOV_LIMIT);
		return -FI_EINVAL;
	}
	return fi_ibv_mr_reg(fid, iov->iov_base, iov->iov_len, access, offset,
			requested_key, flags, mr, context);
}

static int fi_ibv_mr_regattr(struct fid *fid, const struct fi_mr_attr *attr,
		uint64_t flags, struct fid_mr **mr)
{
	return fi_ibv_mr_regv(fid, attr->mr_iov, attr->iov_count, attr->access,
			0, attr->requested_key, flags, mr, attr->context);
}

static int fi_ibv_domain_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
{
	struct fi_ibv_domain *domain;
	struct fi_ibv_eq *eq;

	domain = container_of(fid, struct fi_ibv_domain, domain_fid.fid);

	switch (bfid->fclass) {
	case FI_CLASS_EQ:
		eq = container_of(bfid, struct fi_ibv_eq, eq_fid);
		domain->eq = eq;
		domain->eq_flags = flags;
		break;
	default:
		return -EINVAL;
	}

	return 0;
}

static int fi_ibv_domain_close(fid_t fid)
{
	struct fi_ibv_domain *domain;
	int ret;

	domain = container_of(fid, struct fi_ibv_domain, domain_fid.fid);

	if (domain->rdm) {
		rdma_destroy_ep(domain->rdm_cm->listener);
		free(domain->rdm_cm);
	}

	if (domain->pd) {
		ret = ibv_dealloc_pd(domain->pd);
		if (ret)
			return -ret;
		domain->pd = NULL;
	}

	fi_freeinfo(domain->info);
	free(domain);
	return 0;
}

static int fi_ibv_open_device_by_name(struct fi_ibv_domain *domain, const char *name)
{
	struct ibv_context **dev_list;
	int i, ret = -FI_ENODEV;

	if (!name)
		return -FI_EINVAL;

	dev_list = rdma_get_devices(NULL);
	if (!dev_list)
		return -errno;

	for (i = 0; dev_list[i] && ret; i++) {
		if (domain->rdm) {
			ret = strncmp(name, ibv_get_device_name(dev_list[i]->device),
				      strlen(name) - strlen(verbs_rdm_domain.suffix));

		} else {
			ret = strcmp(name, ibv_get_device_name(dev_list[i]->device));
		}

		if (!ret)
			domain->verbs = dev_list[i];
	}
	rdma_free_devices(dev_list);
	return ret;
}

static struct fi_ops fi_ibv_fid_ops = {
	.size = sizeof(struct fi_ops),
	.close = fi_ibv_domain_close,
	.bind = fi_ibv_domain_bind,
	.control = fi_no_control,
	.ops_open = fi_no_ops_open,
};

static struct fi_ops_mr fi_ibv_domain_mr_ops = {
	.size = sizeof(struct fi_ops_mr),
	.reg = fi_ibv_mr_reg,
	.regv = fi_ibv_mr_regv,
	.regattr = fi_ibv_mr_regattr,
};

static struct fi_ops_domain fi_ibv_domain_ops = {
	.size = sizeof(struct fi_ops_domain),
	.av_open = fi_no_av_open,
	.cq_open = fi_ibv_cq_open,
	.endpoint = fi_ibv_open_ep,
	.scalable_ep = fi_no_scalable_ep,
	.cntr_open = fi_no_cntr_open,
	.poll_open = fi_no_poll_open,
	.stx_ctx = fi_no_stx_context,
	.srx_ctx = fi_ibv_srq_context,
};

static struct fi_ops_domain fi_ibv_rdm_domain_ops = {
	.size = sizeof(struct fi_ops_domain),
	.av_open = fi_ibv_rdm_av_open,
	.cq_open = fi_ibv_rdm_cq_open,
	.endpoint = fi_ibv_rdm_open_ep,
	.scalable_ep = fi_no_scalable_ep,
	.cntr_open = fi_rbv_rdm_cntr_open,
	.poll_open = fi_no_poll_open,
	.stx_ctx = fi_no_stx_context,
	.srx_ctx = fi_no_srx_context,
};

static int
fi_ibv_domain(struct fid_fabric *fabric, struct fi_info *info,
	   struct fid_domain **domain, void *context)
{
	struct fi_ibv_domain *_domain;
	struct fi_ibv_fabric *fab;
	struct fi_info *fi;
	int ret;

	fi = fi_ibv_get_verbs_info(info->domain_attr->name);
	if (!fi)
		return -FI_EINVAL;

	fab = container_of(fabric, struct fi_ibv_fabric, util_fabric.fabric_fid);
	ret = ofi_check_domain_attr(&fi_ibv_prov, fabric->api_version,
				    fi->domain_attr, info->domain_attr);
	if (ret)
		return ret;

	_domain = calloc(1, sizeof *_domain);
	if (!_domain)
		return -FI_ENOMEM;

	_domain->info = fi_dupinfo(info);
	if (!_domain->info)
		goto err1;

	_domain->rdm = FI_IBV_EP_TYPE_IS_RDM(info);
	if (_domain->rdm) {
		_domain->rdm_cm = calloc(1, sizeof(*_domain->rdm_cm));
		if (!_domain->rdm_cm) {
			ret = -FI_ENOMEM;
			goto err2;
		}
	}
	ret = fi_ibv_open_device_by_name(_domain, info->domain_attr->name);
	if (ret)
		goto err2;

	_domain->pd = ibv_alloc_pd(_domain->verbs);
	if (!_domain->pd) {
		ret = -errno;
		goto err2;
	}

	_domain->domain_fid.fid.fclass = FI_CLASS_DOMAIN;
	_domain->domain_fid.fid.context = context;
	_domain->domain_fid.fid.ops = &fi_ibv_fid_ops;
	_domain->domain_fid.mr = &fi_ibv_domain_mr_ops;
	if (_domain->rdm) {
		_domain->domain_fid.ops = &fi_ibv_rdm_domain_ops;

		_domain->rdm_cm->ec = rdma_create_event_channel();

		if (!_domain->rdm_cm->ec) {
			VERBS_INFO(FI_LOG_EP_CTRL,
				"Failed to create listener event channel: %s\n",
				strerror(errno));
			ret = -FI_EOTHER;
			goto err2;
		}

		if (fi_fd_nonblock(_domain->rdm_cm->ec->fd) != 0) {
			VERBS_INFO_ERRNO(FI_LOG_EP_CTRL, "fcntl", errno);
			ret = -FI_EOTHER;
			goto err3;
		}

		if (rdma_create_id(_domain->rdm_cm->ec,
				   &_domain->rdm_cm->listener, NULL, RDMA_PS_TCP))
		{
			VERBS_INFO(FI_LOG_EP_CTRL, "Failed to create cm listener: %s\n",
				   strerror(errno));
			ret = -FI_EOTHER;
			goto err3;
		}
		_domain->rdm_cm->is_bound = 0;
	} else {
		_domain->domain_fid.ops = &fi_ibv_domain_ops;
	}
	_domain->fab = fab;

	*domain = &_domain->domain_fid;
	return 0;
err3:
	if (_domain->rdm)
		rdma_destroy_event_channel(_domain->rdm_cm->ec);
err2:
	if (_domain->rdm)
		free(_domain->rdm_cm);
	fi_freeinfo(_domain->info);
err1:
	free(_domain);
	return ret;
}

static int fi_ibv_trywait(struct fid_fabric *fabric, struct fid **fids, int count)
{
	struct fi_ibv_cq *cq;
	int ret, i;

	for (i = 0; i < count; i++) {
		switch (fids[i]->fclass) {
		case FI_CLASS_CQ:
			cq = container_of(fids[i], struct fi_ibv_cq, cq_fid.fid);
			ret = cq->trywait(fids[i]);
			if (ret)
				return ret;
			break;
		case FI_CLASS_EQ:
			/* We are always ready to wait on an EQ since
			 * rdmacm EQ is based on an fd */
			continue;
		case FI_CLASS_CNTR:
		case FI_CLASS_WAIT:
			return -FI_ENOSYS;
		default:
			return -FI_EINVAL;
		}

	}
	return FI_SUCCESS;
}

static int fi_ibv_fabric_close(fid_t fid)
{
	struct fi_ibv_fabric *fab;
	int ret;

	fab = container_of(fid, struct fi_ibv_fabric, util_fabric.fabric_fid.fid);
	ret = ofi_fabric_close(&fab->util_fabric);
	if (ret)
		return ret;
	free(fab);

	return 0;
}

static struct fi_ops fi_ibv_fi_ops = {
	.size = sizeof(struct fi_ops),
	.close = fi_ibv_fabric_close,
	.bind = fi_no_bind,
	.control = fi_no_control,
	.ops_open = fi_no_ops_open,
};

static struct fi_ops_fabric fi_ibv_ops_fabric = {
	.size = sizeof(struct fi_ops_fabric),
	.domain = fi_ibv_domain,
	.passive_ep = fi_ibv_passive_ep,
	.eq_open = fi_ibv_eq_open,
	.wait_open = fi_no_wait_open,
	.trywait = fi_ibv_trywait
};

int fi_ibv_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric,
		  void *context)
{
	struct fi_ibv_fabric *fab;
	struct fi_info *info;
	int ret;

	ret = fi_ibv_init_info();
	if (ret)
		return ret;

	fab = calloc(1, sizeof(*fab));
	if (!fab)
		return -FI_ENOMEM;

	for (info = verbs_info; info; info = info->next) {
		ret = ofi_fabric_init(&fi_ibv_prov, info->fabric_attr, attr,
				      &fab->util_fabric, context);
		if (ret != -FI_ENODATA)
			break;
	}
	if (ret) {
		free(fab);
		return ret;
	}

	*fabric = &fab->util_fabric.fabric_fid;
	(*fabric)->fid.ops = &fi_ibv_fi_ops;
	(*fabric)->ops = &fi_ibv_ops_fabric;

	return 0;
}