Example #1
1
static void build_verbs(IbvConnection *conn, struct ibv_context *verbs)
{
    conn->ibvctx = verbs;
    TEST_Z(conn->pd = ibv_alloc_pd(conn->ibvctx));
    TEST_Z(conn->comp_channel = ibv_create_comp_channel(conn->ibvctx));
    TEST_Z(conn->cq = ibv_create_cq(conn->ibvctx, 10, NULL, conn->comp_channel, 0)); /* cqe=10 is arbitrary */
    TEST_NZ(ibv_req_notify_cq(conn->cq, 0));

    TEST_NZ(pthread_create(&conn->cq_poller_thread, NULL, poll_cq, conn));
}
Example #2
0
CompletionChannel::CompletionChannel(ibv_context* context)
        : mChannel(ibv_create_comp_channel(context)) {
    if (mChannel == nullptr) {
        throw std::system_error(errno, std::generic_category());
    }
    LOG_TRACE("Created completion channel");
}
Example #3
0
static CcPtr make_cc(CtxPtr ctx) {
    auto ptr = ibv_create_comp_channel(ctx.get());
    if(!ptr) {
        throw std::runtime_error("cannot create cq");
    }
    return CcPtr(ptr, ibv_destroy_comp_channel);
}
Example #4
0
RDMAAdapter::RDMAAdapter()
    : context_(open_default_device()),
      pd_(alloc_protection_domain(context_)) {
  channel_ = ibv_create_comp_channel(context_);
  CHECK(channel_) << "Failed to create completion channel";
  cq_ = ibv_create_cq(context_, MAX_CONCURRENT_WRITES * 2, NULL, channel_, 0);
  CHECK(cq_) << "Failed to create completion queue";
  CHECK(!ibv_req_notify_cq(cq_, 0)) << "Failed to request CQ notification";

  StartInternalThread();
}
Example #5
0
File: rping.c Project: hkimura/pib
static int rping_setup_qp(struct rping_cb *cb, struct rdma_cm_id *cm_id)
{
	int ret;

	cb->pd = ibv_alloc_pd(cm_id->verbs);
	if (!cb->pd) {
		fprintf(stderr, "ibv_alloc_pd failed\n");
		return errno;
	}
	DEBUG_LOG("created pd %p\n", cb->pd);

	cb->channel = ibv_create_comp_channel(cm_id->verbs);
	if (!cb->channel) {
		fprintf(stderr, "ibv_create_comp_channel failed\n");
		ret = errno;
		goto err1;
	}
	DEBUG_LOG("created channel %p\n", cb->channel);

	cb->cq = ibv_create_cq(cm_id->verbs, RPING_SQ_DEPTH * 2, cb,
				cb->channel, 0);
	if (!cb->cq) {
		fprintf(stderr, "ibv_create_cq failed\n");
		ret = errno;
		goto err2;
	}
	DEBUG_LOG("created cq %p\n", cb->cq);

	ret = ibv_req_notify_cq(cb->cq, 0);
	if (ret) {
		fprintf(stderr, "ibv_create_cq failed\n");
		ret = errno;
		goto err3;
	}

	ret = rping_create_qp(cb);
	if (ret) {
		perror("rdma_create_qp");
		goto err3;
	}
	DEBUG_LOG("created qp %p\n", cb->qp);
	return 0;

err3:
	ibv_destroy_cq(cb->cq);
err2:
	ibv_destroy_comp_channel(cb->channel);
err1:
	ibv_dealloc_pd(cb->pd);
	return ret;
}
Example #6
0
void build_context(struct ibv_context *verbs)
{
    if (s_ctx) {
        if (s_ctx->ctx != verbs) {
            die("cannot handle events in more than one context.");
        }
        return;
    }

    s_ctx = (rdma_ctx_t *)malloc(sizeof(rdma_ctx_t));

    s_ctx->ctx = verbs;
    TEST_Z(s_ctx->pd = ibv_alloc_pd(s_ctx->ctx));
    TEST_Z(s_ctx->comp_channel = ibv_create_comp_channel(s_ctx->ctx));
    TEST_Z(s_ctx->cq = ibv_create_cq(s_ctx->ctx, 10, NULL, s_ctx->comp_channel, 0)); /* cqe=10 is arbitrary */

    TEST_NZ(ibv_req_notify_cq(s_ctx->cq, 0));
}
Example #7
0
static void build_context(struct ibv_context *verbs)
{
  if (s_ctx) {
    if (s_ctx->ctx != verbs)
      die("cannot handle events in more than one context.");

    return;
  }

  s_ctx = (struct context *)malloc(sizeof(struct context));

  s_ctx->ctx = verbs;

  TEST_Z(s_ctx->pd = ibv_alloc_pd(s_ctx->ctx));
  TEST_Z(s_ctx->comp_channel = ibv_create_comp_channel(s_ctx->ctx));
  TEST_Z(s_ctx->cq = ibv_create_cq(s_ctx->ctx, 10, NULL, s_ctx->comp_channel, 0)); /* cqe=10 is arbitrary */
  TEST_NZ(ibv_req_notify_cq(s_ctx->cq, 0));

  //  TEST_NZ(pthread_create(&s_ctx->cq_poller_thread, NULL, poll_cq, NULL));
}
Example #8
0
void Connector::build_context(struct ibv_context* verb_)
{
  if (s_ctx_ && s_ctx_->ctx_ != verb_) {
    log_(ERROR, "cannot handle events in more than one context.")
    exit(EXIT_FAILURE);
  }
  
  s_ctx_ = (struct context*)malloc(sizeof(struct context) );
  
  s_ctx_->ctx_ = verb_;
  
  TEST_Z(s_ctx_->pd_ = ibv_alloc_pd(s_ctx_->ctx_) );
  TEST_Z(s_ctx_->comp_channel_ = ibv_create_comp_channel(s_ctx_->ctx_) );
  TEST_Z(s_ctx_->cq_ = ibv_create_cq(s_ctx_->ctx_, MAX_QP__CQ_LENGTH, NULL, s_ctx_->comp_channel_, 0) );
  TEST_NZ(ibv_req_notify_cq(s_ctx_->cq_, 0) )
  // TODO
  // TEST_NZ(pthread_create(pthread_v.back(), NULL, &Connector::bst_poll_cq, (void*)(this) ) )
  pthread_v.push_back(new pthread_t() );
  wrap_Connector* wrap_ = new wrap_Connector(this, s_ctx_);
  TEST_NZ(pthread_create(pthread_v.back(), NULL, call_poll_cq_w_wrap, wrap_) )
}
Example #9
0
static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
					    int tx_depth, int port,struct user_parameters *user_parm) {
	struct pingpong_context *ctx;
	struct ibv_device_attr device_attr;

	ctx = malloc(sizeof *ctx);
	if (!ctx)
		return NULL;

	ctx->size     = size;
	ctx->tx_depth = tx_depth;
	/* in case of UD need space for the GRH */
	if (user_parm->connection_type==UD) {
		ctx->buf = memalign(page_size, ( size + 40 ) * 2);
		if (!ctx->buf) {
			fprintf(stderr, "Couldn't allocate work buf.\n");
			return NULL;
		}
		memset(ctx->buf, 0, ( size + 40 ) * 2);
	} else {
		ctx->buf = memalign(page_size, size * 2);
		if (!ctx->buf) {
			fprintf(stderr, "Couldn't allocate work buf.\n");
			return NULL;
		}
		memset(ctx->buf, 0, size * 2);
	}

	ctx->post_buf = (char*)ctx->buf + (size - 1);
	ctx->poll_buf = (char*)ctx->buf + (2 * size - 1);

	ctx->context = ibv_open_device(ib_dev);
	if (!ctx->context) {
		fprintf(stderr, "Couldn't get context for %s\n",
			ibv_get_device_name(ib_dev));
		return NULL;
	}
	if (user_parm->mtu == 0) {/*user did not ask for specific mtu */
		if (ibv_query_device(ctx->context, &device_attr)) {
			fprintf(stderr, "Failed to query device props");
			return NULL;
		}
		if (device_attr.vendor_part_id == 23108 || user_parm->gid_index > -1) {
			user_parm->mtu = 1024;
		} else {
			user_parm->mtu = 2048;
		}
	}
    if (user_parm->use_event) {
		ctx->channel = ibv_create_comp_channel(ctx->context);
		if (!ctx->channel) {
			fprintf(stderr, "Couldn't create completion channel\n");
			return NULL;
		}
	} else
		ctx->channel = NULL;
	ctx->pd = ibv_alloc_pd(ctx->context);
	if (!ctx->pd) {
		fprintf(stderr, "Couldn't allocate PD\n");
		return NULL;
	}
	if (user_parm->connection_type==UD) {
		ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, (size + 40 ) * 2,
				     IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);
		if (!ctx->mr) {
			fprintf(stderr, "Couldn't allocate MR\n");
			return NULL;
		}
	} else {
		ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size * 2,
				     IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);
		if (!ctx->mr) {
			fprintf(stderr, "Couldn't allocate MR\n");
			return NULL;
		}
	}

	ctx->scq = ibv_create_cq(ctx->context, tx_depth, NULL, ctx->channel, 0);
	if (!ctx->scq) {
		fprintf(stderr, "Couldn't create CQ\n");
		return NULL;
	}
	ctx->rcq = ibv_create_cq(ctx->context, tx_depth, NULL, ctx->channel, 0);
	if (!ctx->rcq) {
		fprintf(stderr, "Couldn't create Recieve CQ\n");
		return NULL;
	}
	{
		struct ibv_qp_init_attr attr;
		memset(&attr, 0, sizeof(struct ibv_qp_init_attr));
		attr.send_cq = ctx->scq;
		attr.recv_cq = ctx->rcq;
		attr.cap.max_send_wr  = tx_depth;
		/* Work around:  driver doesnt support
		 * recv_wr = 0 */
		attr.cap.max_recv_wr  = tx_depth;
		attr.cap.max_send_sge = 1;
		attr.cap.max_recv_sge = 1;
		attr.cap.max_inline_data = user_parm->inline_size;
		switch (user_parm->connection_type) {
		case RC :
			attr.qp_type = IBV_QPT_RC;
			break;
		case UC :
			attr.qp_type = IBV_QPT_UC;
			break;
		case UD :
			attr.qp_type = IBV_QPT_UD;
			break;
		default:
			fprintf(stderr, "Unknown connection type %d \n",user_parm->connection_type);
			return NULL;
		}
		attr.sq_sig_all = 0;
		ctx->qp = ibv_create_qp(ctx->pd, &attr);
		if (!ctx->qp) {
			fprintf(stderr, "Couldn't create QP\n");
			return NULL;
		}
	}

	{
		struct ibv_qp_attr attr;
		memset(&attr, 0, sizeof(struct ibv_qp_init_attr));
		attr.qp_state        = IBV_QPS_INIT;
		attr.pkey_index      = 0;
		attr.port_num        = port;
		if (user_parm->connection_type==UD) {
			attr.qkey            = 0x11111111;
		} else {
			attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE;
		}

		if (user_parm->connection_type==UD) {
			if (ibv_modify_qp(ctx->qp, &attr,
					  IBV_QP_STATE              |
					  IBV_QP_PKEY_INDEX         |
					  IBV_QP_PORT               |
					  IBV_QP_QKEY)) {
				fprintf(stderr, "Failed to modify UD QP to INIT\n");
				return NULL;
			}

			if (user_parm->use_mcg) {
				union ibv_gid gid;
				uint8_t mcg_gid[16] = MCG_GID;

				/* use the local QP number as part of the mcg */
				mcg_gid[11] = (user_parm->servername) ? 0 : 1;
				*(uint32_t *)(&mcg_gid[12]) = ctx->qp->qp_num;
				memcpy(gid.raw, mcg_gid, 16);

				if (ibv_attach_mcast(ctx->qp, &gid, MCG_LID)) {
					fprintf(stderr, "Couldn't attach QP to mcg\n");
					return NULL;
				}
			}
		} else if (ibv_modify_qp(ctx->qp, &attr,
					 IBV_QP_STATE              |
					 IBV_QP_PKEY_INDEX         |
					 IBV_QP_PORT               |
					 IBV_QP_ACCESS_FLAGS)) {
			fprintf(stderr, "Failed to modify QP to INIT\n");
			return NULL;
		}
	}
	//send                        
	ctx->wr.wr_id      = PINGPONG_SEND_WRID;
	ctx->wr.sg_list    = &ctx->list;
	ctx->wr.num_sge    = 1;
	ctx->wr.opcode     = IBV_WR_SEND;
	ctx->wr.next       = NULL;
	//recieve
	ctx->rwr.wr_id      = PINGPONG_RECV_WRID;
	ctx->rwr.sg_list    = &ctx->recv_list;
	ctx->rwr.num_sge    = 1;
	ctx->rwr.next       = NULL;
	return ctx;
}
Example #10
0
static int fio_rdmaio_setup_qp(struct thread_data *td)
{
	struct rdmaio_data *rd = td->io_ops->data;
	struct ibv_qp_init_attr init_attr;
	int qp_depth = td->o.iodepth * 2;	/* 2 times of io depth */

	if (rd->is_client == 0)
		rd->pd = ibv_alloc_pd(rd->child_cm_id->verbs);
	else
		rd->pd = ibv_alloc_pd(rd->cm_id->verbs);

	if (rd->pd == NULL) {
		log_err("fio: ibv_alloc_pd fail\n");
		return 1;
	}

	if (rd->is_client == 0)
		rd->channel = ibv_create_comp_channel(rd->child_cm_id->verbs);
	else
		rd->channel = ibv_create_comp_channel(rd->cm_id->verbs);
	if (rd->channel == NULL) {
		log_err("fio: ibv_create_comp_channel fail\n");
		goto err1;
	}

	if (qp_depth < 16)
		qp_depth = 16;

	if (rd->is_client == 0)
		rd->cq = ibv_create_cq(rd->child_cm_id->verbs,
				       qp_depth, rd, rd->channel, 0);
	else
		rd->cq = ibv_create_cq(rd->cm_id->verbs,
				       qp_depth, rd, rd->channel, 0);
	if (rd->cq == NULL) {
		log_err("fio: ibv_create_cq failed\n");
		goto err2;
	}

	if (ibv_req_notify_cq(rd->cq, 0) != 0) {
		log_err("fio: ibv_create_cq failed\n");
		goto err3;
	}

	/* create queue pair */
	memset(&init_attr, 0, sizeof(init_attr));
	init_attr.cap.max_send_wr = qp_depth;
	init_attr.cap.max_recv_wr = qp_depth;
	init_attr.cap.max_recv_sge = 1;
	init_attr.cap.max_send_sge = 1;
	init_attr.qp_type = IBV_QPT_RC;
	init_attr.send_cq = rd->cq;
	init_attr.recv_cq = rd->cq;

	if (rd->is_client == 0) {
		if (rdma_create_qp(rd->child_cm_id, rd->pd, &init_attr) != 0) {
			log_err("fio: rdma_create_qp failed\n");
			goto err3;
		}
		rd->qp = rd->child_cm_id->qp;
	} else {
		if (rdma_create_qp(rd->cm_id, rd->pd, &init_attr) != 0) {
			log_err("fio: rdma_create_qp failed\n");
			goto err3;
		}
		rd->qp = rd->cm_id->qp;
	}

	return 0;

err3:
	ibv_destroy_cq(rd->cq);
err2:
	ibv_destroy_comp_channel(rd->channel);
err1:
	ibv_dealloc_pd(rd->pd);

	return 1;
}
Example #11
0
int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev,
                      RdmaDeviceResources *rdma_dev_res,
                      const char *backend_device_name, uint8_t port_num,
                      struct ibv_device_attr *dev_attr, CharBackend *mad_chr_be)
{
    int i;
    int ret = 0;
    int num_ibv_devices;
    struct ibv_device **dev_list;

    memset(backend_dev, 0, sizeof(*backend_dev));

    backend_dev->dev = pdev;
    backend_dev->port_num = port_num;
    backend_dev->rdma_dev_res = rdma_dev_res;

    rdma_backend_register_comp_handler(dummy_comp_handler);

    dev_list = ibv_get_device_list(&num_ibv_devices);
    if (!dev_list) {
        rdma_error_report("Failed to get IB devices list");
        return -EIO;
    }

    if (num_ibv_devices == 0) {
        rdma_error_report("No IB devices were found");
        ret = -ENXIO;
        goto out_free_dev_list;
    }

    if (backend_device_name) {
        for (i = 0; dev_list[i]; ++i) {
            if (!strcmp(ibv_get_device_name(dev_list[i]),
                        backend_device_name)) {
                break;
            }
        }

        backend_dev->ib_dev = dev_list[i];
        if (!backend_dev->ib_dev) {
            rdma_error_report("Failed to find IB device %s",
                              backend_device_name);
            ret = -EIO;
            goto out_free_dev_list;
        }
    } else {
        backend_dev->ib_dev = *dev_list;
    }

    rdma_info_report("uverb device %s", backend_dev->ib_dev->dev_name);

    backend_dev->context = ibv_open_device(backend_dev->ib_dev);
    if (!backend_dev->context) {
        rdma_error_report("Failed to open IB device %s",
                          ibv_get_device_name(backend_dev->ib_dev));
        ret = -EIO;
        goto out;
    }

    backend_dev->channel = ibv_create_comp_channel(backend_dev->context);
    if (!backend_dev->channel) {
        rdma_error_report("Failed to create IB communication channel");
        ret = -EIO;
        goto out_close_device;
    }

    ret = init_device_caps(backend_dev, dev_attr);
    if (ret) {
        rdma_error_report("Failed to initialize device capabilities");
        ret = -EIO;
        goto out_destroy_comm_channel;
    }


    ret = mad_init(backend_dev, mad_chr_be);
    if (ret) {
        rdma_error_report("Failed to initialize mad");
        ret = -EIO;
        goto out_destroy_comm_channel;
    }

    backend_dev->comp_thread.run = false;
    backend_dev->comp_thread.is_running = false;

    ah_cache_init();

    goto out_free_dev_list;

out_destroy_comm_channel:
    ibv_destroy_comp_channel(backend_dev->channel);

out_close_device:
    ibv_close_device(backend_dev->context);

out_free_dev_list:
    ibv_free_device_list(dev_list);

out:
    return ret;
}
Example #12
0
/**
 * @param rx_headroom   Headroom requested by the user.
 * @param rx_priv_len   Length of transport private data to reserve (0 if unused)
 * @param rx_hdr_len    Length of transport network header.
 * @param mss           Maximal segment size (transport limit).
 */
UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_ib_iface_ops_t *ops, uct_md_h md,
                    uct_worker_h worker, const uct_iface_params_t *params,
                    unsigned rx_priv_len, unsigned rx_hdr_len, unsigned tx_cq_len,
                    size_t mss, const uct_ib_iface_config_t *config)
{
    uct_ib_device_t *dev = &ucs_derived_of(md, uct_ib_md_t)->dev;
    ucs_status_t status;
    uint8_t port_num;

    UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &ops->super, md, worker,
                              &config->super UCS_STATS_ARG(dev->stats));

    status = uct_ib_device_find_port(dev, params->dev_name, &port_num);
    if (status != UCS_OK) {
        goto err;
    }

    self->ops                      = ops;

    self->config.rx_payload_offset = sizeof(uct_ib_iface_recv_desc_t) +
                                     ucs_max(sizeof(uct_am_recv_desc_t) +
                                             params->rx_headroom,
                                             rx_priv_len + rx_hdr_len);
    self->config.rx_hdr_offset     = self->config.rx_payload_offset - rx_hdr_len;
    self->config.rx_headroom_offset= self->config.rx_payload_offset -
                                     params->rx_headroom;
    self->config.seg_size          = ucs_min(mss, config->super.max_bcopy);
    self->config.tx_max_poll       = config->tx.max_poll;
    self->config.rx_max_poll       = config->rx.max_poll;
    self->config.rx_max_batch      = ucs_min(config->rx.max_batch,
                                             config->rx.queue_len / 4);
    self->config.port_num          = port_num;
    self->config.sl                = config->sl;
    self->config.gid_index         = config->gid_index;

    status = uct_ib_iface_init_pkey(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    status = uct_ib_device_query_gid(dev, self->config.port_num,
                                     self->config.gid_index, &self->gid);
    if (status != UCS_OK) {
        goto err;
    }

    status = uct_ib_iface_init_lmc(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    self->comp_channel = ibv_create_comp_channel(dev->ibv_context);
    if (self->comp_channel == NULL) {
        ucs_error("ibv_create_comp_channel() failed: %m");
        status = UCS_ERR_IO_ERROR;
        goto err_free_path_bits;
    }

    status = ucs_sys_fcntl_modfl(self->comp_channel->fd, O_NONBLOCK, 0);
    if (status != UCS_OK) {
        goto err_destroy_comp_channel;
    }

    status = uct_ib_iface_create_cq(self, tx_cq_len, 0, &self->send_cq);
    if (status != UCS_OK) {
        goto err_destroy_comp_channel;
    }

    status = uct_ib_iface_create_cq(self, config->rx.queue_len, config->rx.inl,
                                    &self->recv_cq);
    if (status != UCS_OK) {
        goto err_destroy_send_cq;
    }

    /* Address scope and size */
    if (config->addr_type == UCT_IB_IFACE_ADDRESS_TYPE_AUTO) {
        if (IBV_PORT_IS_LINK_LAYER_ETHERNET(uct_ib_iface_port_attr(self))) {
            self->addr_type = UCT_IB_ADDRESS_TYPE_ETH;
        } else {
            self->addr_type = uct_ib_address_scope(self->gid.global.subnet_prefix);
        }
    } else {
        ucs_assert(config->addr_type < UCT_IB_ADDRESS_TYPE_LAST);
        self->addr_type = config->addr_type;
    }

    self->addr_size  = uct_ib_address_size(self->addr_type);

    ucs_debug("created uct_ib_iface_t headroom_ofs %d payload_ofs %d hdr_ofs %d data_sz %d",
              self->config.rx_headroom_offset, self->config.rx_payload_offset,
              self->config.rx_hdr_offset, self->config.seg_size);

    return UCS_OK;

err_destroy_send_cq:
    ibv_destroy_cq(self->send_cq);
err_destroy_comp_channel:
    ibv_destroy_comp_channel(self->comp_channel);
err_free_path_bits:
    ucs_free(self->path_bits);
err:
    return status;
}
Example #13
0
static int
rdmasniff_activate(pcap_t *handle)
{
	struct pcap_rdmasniff *priv = handle->priv;
	struct ibv_qp_init_attr qp_init_attr;
	struct ibv_qp_attr qp_attr;
	struct ibv_flow_attr flow_attr;
	struct ibv_port_attr port_attr;
	int i;

	priv->context = ibv_open_device(priv->rdma_device);
	if (!priv->context) {
		pcap_snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
			      "Failed to open device %s", handle->opt.device);
		goto error;
	}

	priv->pd = ibv_alloc_pd(priv->context);
	if (!priv->pd) {
		pcap_snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
			      "Failed to alloc PD for device %s", handle->opt.device);
		goto error;
	}

	priv->channel = ibv_create_comp_channel(priv->context);
	if (!priv->channel) {
		pcap_snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
			      "Failed to create comp channel for device %s", handle->opt.device);
		goto error;
	}

	priv->cq = ibv_create_cq(priv->context, RDMASNIFF_NUM_RECEIVES,
				 NULL, priv->channel, 0);
	if (!priv->cq) {
		pcap_snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
			      "Failed to create CQ for device %s", handle->opt.device);
		goto error;
	}

	ibv_req_notify_cq(priv->cq, 0);

	memset(&qp_init_attr, 0, sizeof qp_init_attr);
	qp_init_attr.send_cq = qp_init_attr.recv_cq = priv->cq;
	qp_init_attr.cap.max_recv_wr = RDMASNIFF_NUM_RECEIVES;
	qp_init_attr.cap.max_recv_sge = 1;
	qp_init_attr.qp_type = IBV_QPT_RAW_PACKET;
	priv->qp = ibv_create_qp(priv->pd, &qp_init_attr);
	if (!priv->qp) {
		pcap_snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
			      "Failed to create QP for device %s", handle->opt.device);
		goto error;
	}

	memset(&qp_attr, 0, sizeof qp_attr);
	qp_attr.qp_state = IBV_QPS_INIT;
	qp_attr.port_num = priv->port_num;
	if (ibv_modify_qp(priv->qp, &qp_attr, IBV_QP_STATE | IBV_QP_PORT)) {
		pcap_snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
			      "Failed to modify QP to INIT for device %s", handle->opt.device);
		goto error;
	}

	memset(&qp_attr, 0, sizeof qp_attr);
	qp_attr.qp_state = IBV_QPS_RTR;
	if (ibv_modify_qp(priv->qp, &qp_attr, IBV_QP_STATE)) {
		pcap_snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
			      "Failed to modify QP to RTR for device %s", handle->opt.device);
		goto error;
	}

	memset(&flow_attr, 0, sizeof flow_attr);
	flow_attr.type = IBV_FLOW_ATTR_SNIFFER;
	flow_attr.size = sizeof flow_attr;
	flow_attr.port = priv->port_num;
	priv->flow = ibv_create_flow(priv->qp, &flow_attr);
	if (!priv->flow) {
		pcap_snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
			      "Failed to create flow for device %s", handle->opt.device);
		goto error;
	}

	handle->bufsize = RDMASNIFF_NUM_RECEIVES * RDMASNIFF_RECEIVE_SIZE;
	handle->buffer = malloc(handle->bufsize);
	if (!handle->buffer) {
		pcap_snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
			      "Failed to allocate receive buffer for device %s", handle->opt.device);
		goto error;
	}

	priv->oneshot_buffer = malloc(RDMASNIFF_RECEIVE_SIZE);
	if (!priv->oneshot_buffer) {
		pcap_snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
			      "Failed to allocate oneshot buffer for device %s", handle->opt.device);
		goto error;
	}

	priv->mr = ibv_reg_mr(priv->pd, handle->buffer, handle->bufsize, IBV_ACCESS_LOCAL_WRITE);
	if (!priv->mr) {
		pcap_snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
			      "Failed to register MR for device %s", handle->opt.device);
		goto error;
	}


	for (i = 0; i < RDMASNIFF_NUM_RECEIVES; ++i) {
		rdmasniff_post_recv(handle, i);
	}

	if (!ibv_query_port(priv->context, priv->port_num, &port_attr) &&
	    port_attr.link_layer == IBV_LINK_LAYER_INFINIBAND) {
		handle->linktype = DLT_INFINIBAND;
	} else {
		handle->linktype = DLT_EN10MB;
	}

	if (handle->snapshot <= 0 || handle->snapshot > RDMASNIFF_RECEIVE_SIZE)
		handle->snapshot = RDMASNIFF_RECEIVE_SIZE;

	handle->offset = 0;
	handle->read_op = rdmasniff_read;
	handle->stats_op = rdmasniff_stats;
	handle->cleanup_op = rdmasniff_cleanup;
	handle->setfilter_op = install_bpf_program;
	handle->setdirection_op = NULL;
	handle->set_datalink_op = NULL;
	handle->getnonblock_op = pcap_getnonblock_fd;
	handle->setnonblock_op = pcap_setnonblock_fd;
	handle->oneshot_callback = rdmasniff_oneshot;
	handle->selectable_fd = priv->channel->fd;

	return 0;

error:
	if (priv->mr) {
		ibv_dereg_mr(priv->mr);
	}

	if (priv->flow) {
		ibv_destroy_flow(priv->flow);
	}

	if (priv->qp) {
		ibv_destroy_qp(priv->qp);
	}

	if (priv->cq) {
		ibv_destroy_cq(priv->cq);
	}

	if (priv->channel) {
		ibv_destroy_comp_channel(priv->channel);
	}

	if (priv->pd) {
		ibv_dealloc_pd(priv->pd);
	}

	if (priv->context) {
		ibv_close_device(priv->context);
	}

	if (priv->oneshot_buffer) {
		free(priv->oneshot_buffer);
	}

	return PCAP_ERROR;
}
static inline int mca_oob_ud_device_setup (mca_oob_ud_device_t *device,
                                           struct ibv_device *ib_device)
{
    int rc, port_num;
    struct ibv_device_attr dev_attr;

    OPAL_OUTPUT_VERBOSE((5, mca_oob_base_output, "%s oob:ud:device_setup attempting to setup ib device %p",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (void *) ib_device));

    device->ib_context = ibv_open_device (ib_device);
    if (NULL == device->ib_context) {
        OPAL_OUTPUT_VERBOSE((5, mca_oob_base_output, "%s oob:ud:device_setup error opening device. errno = %d",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno));
        return ORTE_ERROR;
    }

    rc = ibv_query_device (device->ib_context, &dev_attr); 
    if (0 != rc) {
        OPAL_OUTPUT_VERBOSE((5, mca_oob_base_output, "%s oob:ud:device_setup error querying device. errno = %d",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno));
        return ORTE_ERROR;
    }

    device->ib_channel = ibv_create_comp_channel (device->ib_context);
    if (NULL == device->ib_channel) {
        OPAL_OUTPUT_VERBOSE((5, mca_oob_base_output, "%s oob:ud:device_setup error completing completion channel."
                             "errno = %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno));
        return ORTE_ERROR;
    }

    device->ib_pd = ibv_alloc_pd (device->ib_context);
    if (NULL == device->ib_pd) {
        OPAL_OUTPUT_VERBOSE((5, mca_oob_base_output, "%s oob:ud:device_setup error allocating protection domain."
                             "errno = %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno));
        return ORTE_ERROR;
    }

    for (port_num = 1 ; port_num <= dev_attr.phys_port_cnt ; ++port_num) {
        mca_oob_ud_port_t *port = OBJ_NEW(mca_oob_ud_port_t);

        if (NULL == port) {
            opal_output (0, "oob:ud:device_setup malloc failure. errno = %d", errno);
            return ORTE_ERR_OUT_OF_RESOURCE;
        }

        port->device = device;
        port->port_num = port_num;

        rc = mca_oob_ud_port_setup (port);
        if (ORTE_SUCCESS != rc) {
            OBJ_RELEASE(port);
            continue;
        }

        opal_list_append (&device->ports, (opal_list_item_t *) port);

	break;
    }

    if (0 == opal_list_get_size(&device->ports)) {
        OPAL_OUTPUT_VERBOSE((5, mca_oob_base_output, "%s oob:ud:device_setup could not init device. no usable "
                             "ports present", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        return ORTE_ERROR;
    }

    return ORTE_SUCCESS;
}
Example #15
0
/**
 * \brief Create an RDMA transport server
 *
 * \param cmid The CM id passed up in the connect event
 * \param q_depth A hint from the client on the depth of it's SQ/RQ
 * \param msize The max message size
 * \returns A pointer to the newly allocated transport
 */
Nptrans *
np_rdmatrans_create(struct rdma_cm_id *cmid, int q_depth, int msize)
{
	int i, ret;
	u8 *p;
	struct Nptrans *trans;
	struct Rdmatrans *rdma;
	struct ibv_qp_init_attr qp_attr;
	struct rdma_conn_param cparam;

	rdma = calloc(1, sizeof *rdma);
	if (!rdma)
		goto error;

	ret = pthread_mutex_init(&rdma->lock, NULL);
	if (ret)
		goto error;

	ret = pthread_cond_init(&rdma->cond, NULL);
	if (ret)
		goto error;

	rdma->connected = 0;
	rdma->cm_id = cmid;
	rdma->context = cmid->verbs;
	rdma->q_depth = q_depth;
	rdma->msize = msize + sizeof(Rdmactx);

	rdma->pd = ibv_alloc_pd(rdma->context);
	if (!rdma->pd)
		goto error;

	/* Create receive buffer space and register it */
	rdma->rcv_buf = malloc(rdma->msize * q_depth);
	if (!rdma->rcv_buf)
		goto error;

	rdma->rcv_mr = ibv_reg_mr(rdma->pd, rdma->rcv_buf, rdma->msize * q_depth,
				  IBV_ACCESS_LOCAL_WRITE);
	if (!rdma->rcv_mr)
		goto error;

	/* Create send buffer space and register it */
	rdma->snd_buf = malloc(rdma->msize * q_depth);
	if (!rdma->snd_buf)
		goto error;

	rdma->next_buf = 0;
	rdma->snd_mr = ibv_reg_mr(rdma->pd, rdma->snd_buf, rdma->msize * q_depth, 0);
	if (!rdma->snd_mr)
		goto error;

	rdma->ch = ibv_create_comp_channel(rdma->context);
	if (!rdma->ch)
		goto error;

	rdma->fd = rdma->ch->fd;
	rdma->cq = ibv_create_cq(rdma->context, 2*q_depth, rdma, rdma->ch, 0);
	if (!rdma->cq)
		goto error;

	ibv_req_notify_cq(rdma->cq, 0);

	/* Create the CQ */
	memset(&qp_attr, 0, sizeof qp_attr);
	qp_attr.send_cq = rdma->cq;
	qp_attr.recv_cq = rdma->cq;
	qp_attr.cap.max_send_wr = q_depth;
	qp_attr.cap.max_recv_wr = q_depth;
	qp_attr.cap.max_send_sge = 1;
	qp_attr.cap.max_send_sge = 1;
	qp_attr.cap.max_recv_sge = 1;
	qp_attr.cap.max_inline_data = 64;
	qp_attr.qp_type = IBV_QPT_RC;
	ret = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr);
	if (ret)
		goto error;
	rdma->qp = rdma->cm_id->qp;

	p = rdma->rcv_buf;
	for (i = 0; i < q_depth; i++)
		rdma_post_recv(rdma, (Rdmactx *)(p + i*rdma->msize));

	trans = np_trans_create(rdma, rdma_trans_recv,
				      rdma_trans_send,
				      rdma_trans_destroy);
	if (!trans)
		goto error;
	rdma->trans = trans;

	memset(&cparam, 0, sizeof(cparam));
	cparam.responder_resources = 1;
	cparam.initiator_depth = 1;
	cparam.private_data = NULL;
	cparam.private_data_len = 0;
	ret = rdma_accept(cmid, &cparam);
	if (ret) {
		np_uerror(ret);
		goto error;
	}

	rdma->connected = 1;
	return trans;

 error:
	if (rdma)
		rdma_trans_destroy(rdma);

	rdma_reject(cmid, NULL, 0);
	return NULL;
}
Example #16
0
static struct ibv_comp_channel *
mlx5_glue_create_comp_channel(struct ibv_context *context)
{
	return ibv_create_comp_channel(context);
}
Example #17
0
/**
 * @param rx_headroom   Headroom requested by the user.
 * @param rx_priv_len   Length of transport private data to reserve (0 if unused)
 * @param rx_hdr_len    Length of transport network header.
 * @param mss           Maximal segment size (transport limit).
 */
UCS_CLASS_INIT_FUNC(uct_ib_iface_t, uct_ib_iface_ops_t *ops, uct_md_h md,
                    uct_worker_h worker, const char *dev_name, unsigned rx_headroom,
                    unsigned rx_priv_len, unsigned rx_hdr_len, unsigned tx_cq_len,
                    size_t mss, uct_ib_iface_config_t *config)
{
    uct_ib_device_t *dev = &ucs_derived_of(md, uct_ib_md_t)->dev;
    ucs_status_t status;
    uint8_t port_num;

    UCS_CLASS_CALL_SUPER_INIT(uct_base_iface_t, &ops->super, md, worker,
                              &config->super UCS_STATS_ARG(dev->stats));

    status = uct_ib_device_find_port(dev, dev_name, &port_num);
    if (status != UCS_OK) {
        goto err;
    }

    self->port_num                 = port_num;
    self->sl                       = config->sl;
    self->config.rx_payload_offset = sizeof(uct_ib_iface_recv_desc_t) +
                                     ucs_max(sizeof(uct_am_recv_desc_t) + rx_headroom,
                                             rx_priv_len + rx_hdr_len);
    self->config.rx_hdr_offset     = self->config.rx_payload_offset - rx_hdr_len;
    self->config.rx_headroom_offset= self->config.rx_payload_offset - rx_headroom;
    self->config.seg_size          = ucs_min(mss, config->super.max_bcopy);
    self->config.tx_max_poll       = config->tx.max_poll;
    self->config.rx_max_poll       = config->rx.max_poll;
    self->config.rx_max_batch      = ucs_min(config->rx.max_batch,
                                     config->rx.queue_len / 4);
    self->ops                      = ops;

    status = uct_ib_iface_init_pkey(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    status = uct_ib_iface_init_gid(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    status = uct_ib_iface_init_lmc(self, config);
    if (status != UCS_OK) {
        goto err;
    }

    self->comp_channel = ibv_create_comp_channel(dev->ibv_context);
    if (self->comp_channel == NULL) {
        ucs_error("Failed to create completion channel: %m");
        status = UCS_ERR_IO_ERROR;
        goto err_free_path_bits;
    }

    status = ucs_sys_fcntl_modfl(self->comp_channel->fd, O_NONBLOCK, 0);
    if (status != UCS_OK) {
        goto err_destroy_comp_channel;
    }

    /* TODO inline scatter for send SQ */
    self->send_cq = ibv_create_cq(dev->ibv_context, tx_cq_len,
                                  NULL, self->comp_channel, 0);
    if (self->send_cq == NULL) {
        ucs_error("Failed to create send cq: %m");
        status = UCS_ERR_IO_ERROR;
        goto err_destroy_comp_channel;
    }

    if (config->rx.inl > 32 /*UCT_IB_MLX5_CQE64_MAX_INL*/) {
        ibv_exp_setenv(dev->ibv_context, "MLX5_CQE_SIZE", "128", 1);
    }

    self->recv_cq = ibv_create_cq(dev->ibv_context, config->rx.queue_len,
                                  NULL, self->comp_channel, 0);
    ibv_exp_setenv(dev->ibv_context, "MLX5_CQE_SIZE", "64", 1);

    if (self->recv_cq == NULL) {
        ucs_error("Failed to create recv cq: %m");
        status = UCS_ERR_IO_ERROR;
        goto err_destroy_send_cq;
    }

    if (!uct_ib_device_is_port_ib(dev, self->port_num)) {
        ucs_error("Unsupported link layer");
        status = UCS_ERR_UNSUPPORTED;
        goto err_destroy_recv_cq;
    }

    /* Address scope and size */
    self->addr_scope = uct_ib_address_scope(self->gid.global.subnet_prefix);
    self->addr_size  = uct_ib_address_size(self->addr_scope);

    ucs_debug("created uct_ib_iface_t headroom_ofs %d payload_ofs %d hdr_ofs %d data_sz %d",
              self->config.rx_headroom_offset, self->config.rx_payload_offset,
              self->config.rx_hdr_offset, self->config.seg_size);

    return UCS_OK;

err_destroy_recv_cq:
    ibv_destroy_cq(self->recv_cq);
err_destroy_send_cq:
    ibv_destroy_cq(self->send_cq);
err_destroy_comp_channel:
    ibv_destroy_comp_channel(self->comp_channel);
err_free_path_bits:
    ucs_free(self->path_bits);
err:
    return status;
}
Example #18
0
struct xfer_context *xfer_rdma_init_ctx(void *ptr, struct xfer_data *data)
{
        struct xfer_context *ctx;
        struct rdma_cm_id *cm_id = NULL;

        ctx = malloc(sizeof *ctx);
        if (!ctx)
                return NULL;

        ctx->tx_depth = data->tx_depth;

        if (data->use_cma) {
                cm_id = (struct rdma_cm_id *)ptr;
                ctx->context = cm_id->verbs;
                if (!ctx->context) {
                        fprintf(stderr, "%d:%s: Unbound cm_id!!\n", pid,
				__func__);
                        return NULL;
                }

        } else {
		// use alternative to CMA here
        }

        ctx->pd = ibv_alloc_pd(ctx->context);
        if (!ctx->pd) {
                fprintf(stderr, "%d:%s: Couldn't allocate PD\n", pid, __func__);
                return NULL;
        }

	// setup the message buffers
	ctx->send_msg = malloc(sizeof(struct message));
	ctx->recv_msg = malloc(sizeof(struct message));

	ctx->recv_mr = ibv_reg_mr(ctx->pd, ctx->recv_msg, sizeof(struct message),
				  IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);
	if (!ctx->recv_mr) {
		fprintf(stderr, "%d:%s: Couldn't allocate MR\n", pid, __func__);
		return NULL;
	}
	
	ctx->send_mr = ibv_reg_mr(ctx->pd, ctx->send_msg, sizeof(struct message),
				  IBV_ACCESS_LOCAL_WRITE);
        if (!ctx->send_mr) {
                fprintf(stderr, "%d:%s: Couldn't allocate MR\n", pid, __func__);
                return NULL;
        }

        ctx->ch = ibv_create_comp_channel(ctx->context);
        if (!ctx->ch) {
                fprintf(stderr, "%d:%s: Couldn't create comp channel\n", pid,
			__func__);
                return NULL;
        }

	ctx->cq = ibv_create_cq(ctx->context, ctx->tx_depth+1, ctx, ctx->ch, 0);
        if (!ctx->cq) {
                fprintf(stderr, "%d:%s: Couldn't create CQ\n", pid, __func__);
                return NULL;
        }

	if (ibv_req_notify_cq(ctx->cq, 0)) {
		fprintf(stderr, "%d:%s: Couldn't request CQ notification\n", 
			pid, __func__);
		return NULL;
	}

        struct ibv_qp_init_attr attr = {
		.qp_context = ctx,
                .send_cq = ctx->cq,
                .recv_cq = ctx->cq,
                .cap     = {
                        .max_send_wr  = ctx->tx_depth+1,
                        .max_recv_wr  = ctx->tx_depth+1,
                        .max_send_sge = 1,
                        .max_recv_sge = 1,
                        .max_inline_data = 0
                },
                .qp_type = IBV_QPT_RC,
		.sq_sig_all = 1,
		.srq = NULL
        };

	if (data->use_cma) {
                if (rdma_create_qp(cm_id, ctx->pd, &attr)) {
                        fprintf(stderr, "%d:%s: Couldn't create QP\n", pid, __func__);
                        return NULL;
                }
                ctx->qp = cm_id->qp;
		ctx->cm_id = cm_id;
		// arm the QP
		__xfer_rdma_post_recv(ctx);
                return ctx;
        } else {
		// use an alternative to CMA here
		ctx = NULL;
		return ctx;
	}
}
/*****************************************
* Function: resources_create
*****************************************/
static int resources_create(
	struct resources *res)
{
	struct ibv_qp_init_attr qp_init_attr;
	struct ibv_device *ib_dev = NULL;
	size_t size;
	int i;
	int mr_flags = 0;
	int cq_size = 0;
	int num_devices;
	int rc;

	/* if client side */
	if (config.server_name) {
		res->sock = sock_client_connect(config.server_name, config.tcp_port);
		if (res->sock < 0) {
			fprintf(stderr, "failed to establish TCP connection to server %s, port %d\n", 
				config.server_name, config.tcp_port);
			return -1;
		}
	} else {
		fprintf(stdout, "waiting on port %d for TCP connection\n", config.tcp_port);

		res->sock = sock_daemon_connect(config.tcp_port);
		if (res->sock < 0) {
			fprintf(stderr, "failed to establish TCP connection with client on port %d\n", 
				config.tcp_port);
			return -1;
		}
	}

	fprintf(stdout, "TCP connection was established\n");

	fprintf(stdout, "searching for IB devices in host\n");

	/* get device names in the system */
	res->dev_list = ibv_get_device_list(&num_devices);
	if (!res->dev_list) {
		fprintf(stderr, "failed to get IB devices list\n");
		return 1;
	}

	/* if there isn't any IB device in host */
	if (!num_devices) {
		fprintf(stderr, "found %d device(s)\n", num_devices);
		return 1;
	}

	fprintf(stdout, "found %d device(s)\n", num_devices);

	/* search for the specific device we want to work with */
	for (i = 0; i < num_devices; i ++) {
		if (!strcmp(ibv_get_device_name(res->dev_list[i]), config.dev_name)) {
			ib_dev = res->dev_list[i];
			break;
		}
	}

	/* if the device wasn't found in host */
	if (!ib_dev) {
		fprintf(stderr, "IB device %s wasn't found\n", config.dev_name);
		return 1;
	}

	/* get device handle */
	res->ib_ctx = ibv_open_device(ib_dev);
	if (!res->ib_ctx) {
		fprintf(stderr, "failed to open device %s\n", config.dev_name);
		return 1;
	}

	/* query port properties  */
	if (ibv_query_port(res->ib_ctx, config.ib_port, &res->port_attr)) {
		fprintf(stderr, "ibv_query_port on port %u failed\n", config.ib_port);
		return 1;
	}

	/* allocate Protection Domain */
	res->pd = ibv_alloc_pd(res->ib_ctx);
	if (!res->pd) {
		fprintf(stderr, "ibv_alloc_pd failed\n");
		return 1;
	}

	res->comp_channel = ibv_create_comp_channel(res->ib_ctx);
	if (!res->comp_channel) {
		fprintf(stderr, "ibv_create_comp_channel failed\n");
		return 1;
	}

	/* each side will send only one WR, so Completion Queue with 1 entry is enough */
	cq_size = 1;
	res->cq = ibv_create_cq(res->ib_ctx, cq_size, NULL, res->comp_channel, 0);
	if (!res->cq) {
		fprintf(stderr, "failed to create CQ with %u entries\n", cq_size);
		return 1;
	}

	/* Arm the CQ before any completion is expected (to prevent races) */
	rc = ibv_req_notify_cq(res->cq, 0);
	if (rc) {
		fprintf(stderr, "failed to arm the CQ\n");
		return 1;
	}
	fprintf(stdout, "CQ was armed\n");

	/* allocate the memory buffer that will hold the data */
	size = MSG_SIZE;
	res->buf = malloc(size);
	if (!res->buf) {
		fprintf(stderr, "failed to malloc %Zu bytes to memory buffer\n", size);
		return 1;
	}

	/* only in the daemon side put the message in the memory buffer */
	if (!config.server_name) {
		strcpy(res->buf, MSG);
		fprintf(stdout, "going to send the message: '%s'\n", res->buf);
	} else
		memset(res->buf, 0, size);

	/* register this memory buffer */
	mr_flags = (config.server_name) ? IBV_ACCESS_LOCAL_WRITE : 0;
	res->mr = ibv_reg_mr(res->pd, res->buf, size, mr_flags);
	if (!res->mr) {
		fprintf(stderr, "ibv_reg_mr failed with mr_flags=0x%x\n", mr_flags);
		return 1;
	}

	fprintf(stdout, "MR was registered with addr=%p, lkey=0x%x, rkey=0x%x, flags=0x%x\n",
			      res->buf, res->mr->lkey, res->mr->rkey, mr_flags);


	/* create the Queue Pair */
	memset(&qp_init_attr, 0, sizeof(qp_init_attr));

	qp_init_attr.qp_type    = IBV_QPT_RC;
	qp_init_attr.sq_sig_all = 1;
	qp_init_attr.send_cq    = res->cq;
	qp_init_attr.recv_cq    = res->cq;
	qp_init_attr.cap.max_send_wr  = 1;
	qp_init_attr.cap.max_recv_wr  = 1;
	qp_init_attr.cap.max_send_sge = 1;
	qp_init_attr.cap.max_recv_sge = 1;

	res->qp = ibv_create_qp(res->pd, &qp_init_attr);
	if (!res->qp) {
		fprintf(stderr, "failed to create QP\n");
		return 1;
	}
	fprintf(stdout, "QP was created, QP number=0x%x\n", res->qp->qp_num);

	return 0;
}
Example #20
0
static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev,int size,
										    struct perftest_parameters *user_parm) {

	struct pingpong_context *ctx;

	ALLOCATE(ctx,struct pingpong_context,1);
	
	ctx->size     = size;
	ctx->tx_depth = user_parm->tx_depth;

	ctx->buf = memalign(page_size, BUFF_SIZE(size));
	if (!ctx->buf) {
		fprintf(stderr, "Couldn't allocate work buf.\n");
		return NULL;
	}

	memset(ctx->buf, 0, BUFF_SIZE(size));

	ctx->context = ibv_open_device(ib_dev);
	if (!ctx->context) {
		fprintf(stderr, "Couldn't get context for %s\n",ibv_get_device_name(ib_dev));
		return NULL;
	}

	// Finds the link type and configure the HCA accordingly.
	if (ctx_set_link_layer(ctx->context,user_parm)) {
		fprintf(stderr, " Couldn't set the link layer\n");
		return NULL;
	}

	// Configure the Link MTU acoording to the user or the active mtu.
	if (ctx_set_mtu(ctx->context,user_parm)) {
		fprintf(stderr, "Couldn't set the link layer\n");
		return NULL;
	}

	if (user_parm->use_event) {
		ctx->channel = ibv_create_comp_channel(ctx->context);
		if (!ctx->channel) {
			fprintf(stderr, "Couldn't create completion channel\n");
			return NULL;
		}
	} else
		ctx->channel = NULL;

	ctx->pd = ibv_alloc_pd(ctx->context);
	if (!ctx->pd) {
		fprintf(stderr, "Couldn't allocate PD\n");
		return NULL;
	}

	ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf,BUFF_SIZE(size),IBV_ACCESS_REMOTE_WRITE | 
														   IBV_ACCESS_LOCAL_WRITE  | 
														   IBV_ACCESS_REMOTE_READ);
	if (!ctx->mr) {
		fprintf(stderr, "Couldn't allocate MR\n");
		return NULL;
	}

	// Creates the CQ according to ctx_cq_create in perfetst_resources.
	ctx->cq = ctx_cq_create(ctx->context,ctx->channel,user_parm);
	if (!ctx->cq) {
		fprintf(stderr, "Couldn't create CQ\n");
		return NULL;
	}

	ctx->qp = ctx_qp_create(ctx->pd,ctx->cq,ctx->cq,user_parm);
	if (!ctx->qp)  {
		fprintf(stderr, "Couldn't create QP\n");
		return NULL;
	}

	if (ctx_modify_qp_to_init(ctx->qp,user_parm)) {
		fprintf(stderr, "Failed to modify QP to INIT\n");
		return NULL;
	}
	return ctx;
}
Example #21
0
int main(int argc, char *argv[]) {
	struct pdata rep_pdata;

	struct rdma_event_channel *cm_channel;
	struct rdma_cm_id *listen_id;
	struct rdma_cm_id *cm_id;
	struct rdma_cm_event *event;
	struct rdma_conn_param conn_param = { };

	struct ibv_pd *pd;
	struct ibv_comp_channel *comp_chan;
	struct ibv_cq *cq;
	struct ibv_cq *evt_cq;
	struct ibv_mr *mr;
	struct ibv_qp_init_attr qp_attr = { };
	struct ibv_sge sge;
	struct ibv_send_wr send_wr = { };
	struct ibv_send_wr *bad_send_wr;
	struct ibv_recv_wr recv_wr = { };
	struct ibv_recv_wr *bad_recv_wr;
	struct ibv_wc wc;
	void *cq_context;

	struct sockaddr_in sin;

	uint32_t *buf;

	int err;

	/* Set up RDMA CM structures */

	cm_channel = rdma_create_event_channel();
	if (!cm_channel)
		return 1;

	err = rdma_create_id(cm_channel, &listen_id, NULL, RDMA_PS_TCP);
	if (err)
		return err;

	sin.sin_family = AF_INET;
	sin.sin_port = htons(20079);
	sin.sin_addr.s_addr = INADDR_ANY;

	/* Bind to local port and listen for connection request */

	err = rdma_bind_addr(listen_id, (struct sockaddr *) &sin);
	if (err)
		return 1;


	err = rdma_listen(listen_id, 1);
	if (err)
		return 1;

	err = rdma_get_cm_event(cm_channel, &event);
	if (err)
		return err;
	printf("after get_cm_event\n");

	if (event->event != RDMA_CM_EVENT_CONNECT_REQUEST)
		return 1;

	cm_id = event->id;

	rdma_ack_cm_event(event);

	/* Create verbs objects now that we know which device to use */

	pd = ibv_alloc_pd(cm_id->verbs);
	if (!pd)
		return 1;

	comp_chan = ibv_create_comp_channel(cm_id->verbs);
	if (!comp_chan)
		return 1;

	cq = ibv_create_cq(cm_id->verbs, 2, NULL, comp_chan, 0);
	if (!cq)
		return 1;

	if (ibv_req_notify_cq(cq, 0))
		return 1;

	buf = calloc(2, sizeof(uint32_t));
	if (!buf)
		return 1;

	mr = ibv_reg_mr(pd, buf, 2 * sizeof(uint32_t),
			IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ
					| IBV_ACCESS_REMOTE_WRITE);
	if (!mr)
		return 1;

	qp_attr.cap.max_send_wr = 1;
	qp_attr.cap.max_send_sge = 1;
	qp_attr.cap.max_recv_wr = 1;
	qp_attr.cap.max_recv_sge = 1;

	qp_attr.send_cq = cq;
	qp_attr.recv_cq = cq;

	qp_attr.qp_type = IBV_QPT_RC;

	err = rdma_create_qp(cm_id, pd, &qp_attr);
	if (err)
		return err;

	/* Post receive before accepting connection */

	sge.addr = (uintptr_t) buf + sizeof(uint32_t);
	sge.length = sizeof(uint32_t);
	sge.lkey = mr->lkey;

	recv_wr.sg_list = &sge;
	recv_wr.num_sge = 1;

	if (ibv_post_recv(cm_id->qp, &recv_wr, &bad_recv_wr))
		return 1;

	rep_pdata.buf_va = htonll((uintptr_t) buf);
	rep_pdata.buf_rkey = htonl(mr->rkey);

	conn_param.responder_resources = 1;
	conn_param.private_data = &rep_pdata;
	conn_param.private_data_len = sizeof rep_pdata;

	/* Accept connection */
	printf("before accept\n");
	err = rdma_accept(cm_id, &conn_param);
	if (err)
		return 1;
	printf("after accept\n");
	err = rdma_get_cm_event(cm_channel, &event);
	if (err)
		return err;

	if (event->event != RDMA_CM_EVENT_ESTABLISHED)
		return 1;

	rdma_ack_cm_event(event);

	/* Wait for receive completion */

	if (ibv_get_cq_event(comp_chan, &evt_cq, &cq_context))
		return 1;

	if (ibv_req_notify_cq(cq, 0))
		return 1;

	if (ibv_poll_cq(cq, 1, &wc) < 1)
		return 1;

	if (wc.status != IBV_WC_SUCCESS)
		return 1;

	/* Add two integers and send reply back */

	buf[0] = htonl(ntohl(buf[0]) + ntohl(buf[1]));

	sge.addr = (uintptr_t) buf;
	sge.length = sizeof(uint32_t);
	sge.lkey = mr->lkey;

	send_wr.opcode = IBV_WR_SEND;
	send_wr.send_flags = IBV_SEND_SIGNALED;
	send_wr.sg_list = &sge;
	send_wr.num_sge = 1;

	if (ibv_post_send(cm_id->qp, &send_wr, &bad_send_wr))
		return 1;

	/* Wait for send completion */

	if (ibv_get_cq_event(comp_chan, &evt_cq, &cq_context))
		return 1;

	if (ibv_poll_cq(cq, 1, &wc) < 1)
		return 1;

	if (wc.status != IBV_WC_SUCCESS)
		return 1;

	printf("before ack cq 2\n");
	ibv_ack_cq_events(cq, 2);

	return 0;
}
void network_init() {
    /* Set up RDMA CM structures */

    cm_channel = rdma_create_event_channel();
    assert(cm_channel);

    err = rdma_create_id(cm_channel, &cm_id, 0, RDMA_PS_TCP);
    assert(err == 0);

    /* Resolve server address and route */

    n = getaddrinfo(server_ip, server_port_string, &hints, &res); 
    assert(n >= 0);

    for (t = res; t; t = t->ai_next) {
        err = rdma_resolve_addr(cm_id, 0, t->ai_addr, RESOLVE_TIMEOUT_MS);
        if (!err) break;
    }
    assert(err == 0);

    err = rdma_get_cm_event(cm_channel, &event);
    assert(err == 0);
    assert(event->event == RDMA_CM_EVENT_ADDR_RESOLVED);

    rdma_ack_cm_event(event);

    err = rdma_resolve_route(cm_id, RESOLVE_TIMEOUT_MS);
    assert(err == 0);

    err = rdma_get_cm_event(cm_channel, &event);
    assert(err == 0);
    assert(event->event == RDMA_CM_EVENT_ROUTE_RESOLVED);
    rdma_ack_cm_event(event);

    /* Create verbs objects now that we know which device to use */

    pd = ibv_alloc_pd(cm_id->verbs);
    assert(pd);

    comp_chan = ibv_create_comp_channel(cm_id->verbs);
    assert(comp_chan);

    cq = ibv_create_cq(cm_id->verbs, 10, 0, comp_chan, 0);
    assert(cq);

    err = ibv_req_notify_cq(cq, 0);
    assert(err == 0);

    mr_data = ibv_reg_mr(pd, data, BUFFER_SIZE, 
                         IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE);
    assert(mr_data);

    mr_ack_buffer = ibv_reg_mr(pd, &ack_buffer, sizeof(ack_buffer),
                               IBV_ACCESS_LOCAL_WRITE);
    assert(mr_ack_buffer);

    qp_attr.cap.max_send_wr = 10;
    qp_attr.cap.max_send_sge = 10;
    qp_attr.cap.max_recv_wr = 10;
    qp_attr.cap.max_recv_sge = 10;
    qp_attr.send_cq = cq;
    qp_attr.recv_cq = cq;
    qp_attr.qp_type = IBV_QPT_RC;

    err = rdma_create_qp(cm_id, pd, &qp_attr);
    assert(err == 0);

    /* Post receive for data before connecting */

    sge_data.addr = (uintptr_t)data;
    sge_data.length = BUFFER_SIZE;
    sge_data.lkey = mr_data->lkey;

    recv_wr.sg_list = &sge_data;
    recv_wr.num_sge = 1;

    err = ibv_post_recv(cm_id->qp, &recv_wr, &bad_recv_wr);
    assert(err == 0);

    /* Construct connection params */

    client_pdata.data_va = htonll((uintptr_t)data);
    client_pdata.data_rkey = htonl(mr_data->rkey);

    conn_param.private_data = &client_pdata;
    conn_param.private_data_len = sizeof(client_pdata);
    conn_param.initiator_depth = 1;
    conn_param.retry_count = 7;

    /* Connect to server */

    err = rdma_connect(cm_id, &conn_param);
    assert(err == 0);

    err = rdma_get_cm_event(cm_channel, &event);
    assert(err == 0);
    assert(event->event == RDMA_CM_EVENT_ESTABLISHED);

    memcpy(&server_pdata, event->param.conn.private_data, sizeof(server_pdata));
    rdma_ack_cm_event(event);

    printf("My index == %d\n", server_pdata.index);

    /* Construct connection params */

    client_pdata.index = server_pdata.index;
    client_pdata.ack_buffer_va = server_pdata.ack_buffer_va;
    client_pdata.ack_buffer_rkey = server_pdata.ack_buffer_rkey;
}
Example #23
0
static int ibw_setup_cq_qp(struct ibw_conn *conn)
{
	struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
	struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
	struct ibv_qp_init_attr init_attr;
	struct ibv_qp_attr attr;
	int rc;

	DEBUG(DEBUG_DEBUG, ("ibw_setup_cq_qp(cmid: %p)\n", pconn->cm_id));

	/* init verbs */
	pconn->verbs_channel = ibv_create_comp_channel(pconn->cm_id->verbs);
	if (!pconn->verbs_channel) {
		sprintf(ibw_lasterr, "ibv_create_comp_channel failed %d\n", errno);
		return -1;
	}
	DEBUG(DEBUG_DEBUG, ("created channel %p\n", pconn->verbs_channel));

	pconn->verbs_channel_event = tevent_add_fd(pctx->ectx, NULL, /* not pconn or conn */
		pconn->verbs_channel->fd, TEVENT_FD_READ, ibw_event_handler_verbs, conn);

	pconn->pd = ibv_alloc_pd(pconn->cm_id->verbs);
	if (!pconn->pd) {
		sprintf(ibw_lasterr, "ibv_alloc_pd failed %d\n", errno);
		return -1;
	}
	DEBUG(DEBUG_DEBUG, ("created pd %p\n", pconn->pd));

	/* init mr */
	if (ibw_init_memory(conn))
		return -1;

	/* init cq */
	pconn->cq = ibv_create_cq(pconn->cm_id->verbs,
		pctx->opts.max_recv_wr + pctx->opts.max_send_wr,
		conn, pconn->verbs_channel, 0);
	if (pconn->cq==NULL) {
		sprintf(ibw_lasterr, "ibv_create_cq failed\n");
		return -1;
	}

	rc = ibv_req_notify_cq(pconn->cq, 0);
	if (rc) {
		sprintf(ibw_lasterr, "ibv_req_notify_cq failed with %d\n", rc);
		return rc;
	}

	/* init qp */
	memset(&init_attr, 0, sizeof(init_attr));
	init_attr.cap.max_send_wr = pctx->opts.max_send_wr;
	init_attr.cap.max_recv_wr = pctx->opts.max_recv_wr;
	init_attr.cap.max_recv_sge = 1;
	init_attr.cap.max_send_sge = 1;
	init_attr.qp_type = IBV_QPT_RC;
	init_attr.send_cq = pconn->cq;
	init_attr.recv_cq = pconn->cq;

	rc = rdma_create_qp(pconn->cm_id, pconn->pd, &init_attr);
	if (rc) {
		sprintf(ibw_lasterr, "rdma_create_qp failed with %d\n", rc);
		return rc;
	}
	/* elase result is in pconn->cm_id->qp */

	rc = ibv_query_qp(pconn->cm_id->qp, &attr, IBV_QP_PATH_MTU, &init_attr);
	if (rc) {
		sprintf(ibw_lasterr, "ibv_query_qp failed with %d\n", rc);
		return rc;
	}

	return ibw_fill_cq(conn);
}
Example #24
0
/**
 * the first step in original MPID_nem_ib_setup_conn() function
 * open hca, create ptags  and create cqs
 */
int MPID_nem_ib_open_ports()
{
    int mpi_errno = MPI_SUCCESS;

    /* Infiniband Verb Structures */
    struct ibv_port_attr    port_attr;
    struct ibv_device_attr  dev_attr;

    int nHca; /* , curRank, rail_index ; */

    MPIDI_STATE_DECL(MPID_STATE_MPIDI_OPEN_HCA);
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_OPEN_HCA);

    for (nHca = 0; nHca < ib_hca_num_hcas; nHca++) {
        if (ibv_query_device(hca_list[nHca].nic_context, &dev_attr)) {
            MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**fail",
                    "**fail %s", "Error getting HCA attributes");
        }

        /* detecting active ports */
        if (rdma_default_port < 0 || ib_hca_num_ports > 1) {
            int nPort;
            int k = 0;
            for (nPort = 1; nPort <= RDMA_DEFAULT_MAX_PORTS; nPort ++) {
                if ((! ibv_query_port(hca_list[nHca].nic_context, nPort, &port_attr)) &&
                            port_attr.state == IBV_PORT_ACTIVE &&
                            (port_attr.lid || (!port_attr.lid && use_iboeth))) {
                    if (use_iboeth) {
                        if (ibv_query_gid(hca_list[nHca].nic_context,
                                        nPort, 0, &hca_list[nHca].gids[k])) {
                            /* new error information function needed */
                            MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER,
                                    "**fail", "Failed to retrieve gid on rank %d", process_info.rank);
                        }
                        DEBUG_PRINT("[%d] %s(%d): Getting gid[%d][%d] for"
                                " port %d subnet_prefix = %llx,"
                                " intf_id = %llx\r\n",
                                process_info.rank, __FUNCTION__, __LINE__, nHca, k, k,
                                hca_list[nHca].gids[k].global.subnet_prefix,
                                hca_list[nHca].gids[k].global.interface_id);
                    } else {
                        hca_list[nHca].lids[k]    = port_attr.lid;
                    }
                    hca_list[nHca].ports[k++] = nPort;

                    if (check_attrs(&port_attr, &dev_attr)) {
                        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER,
                                "**fail", "**fail %s",
                                "Attributes failed sanity check");
                    }
                }
            }
            if (k < ib_hca_num_ports) {
                MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER,
                        "**activeports", "**activeports %d", ib_hca_num_ports);
            }
        } else {
            if(ibv_query_port(hca_list[nHca].nic_context,
                        rdma_default_port, &port_attr)
                || (!port_attr.lid && !use_iboeth)
                || (port_attr.state != IBV_PORT_ACTIVE)) {
                MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER,
                        "**portquery", "**portquery %d", rdma_default_port);
            }

            hca_list[nHca].ports[0] = rdma_default_port;

            if (use_iboeth) {
                if (ibv_query_gid(hca_list[nHca].nic_context, 0, 0, &hca_list[nHca].gids[0])) {
                    /* new error function needed */
                    MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER,
                            "**fail", "Failed to retrieve gid on rank %d", process_info.rank);
                }

                if (check_attrs(&port_attr, &dev_attr)) {
                    MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER,
                            "**fail", "**fail %s", "Attributes failed sanity check");
                }
            } else {
                hca_list[nHca].lids[0]  = port_attr.lid;
            }
        }

        if (rdma_use_blocking) {
            hca_list[nHca].comp_channel = ibv_create_comp_channel(hca_list[nHca].nic_context);

            if (!hca_list[nHca].comp_channel) {
                MPIU_ERR_SETFATALANDSTMT1(mpi_errno, MPI_ERR_OTHER, goto fn_fail,
                        "**fail", "**fail %s", "cannot create completion channel");
            }

            hca_list[nHca].send_cq_hndl = NULL;
            hca_list[nHca].recv_cq_hndl = NULL;
            hca_list[nHca].cq_hndl = ibv_create_cq(hca_list[nHca].nic_context,
                    rdma_default_max_cq_size, NULL, hca_list[nHca].comp_channel, 0);
            if (!hca_list[nHca].cq_hndl) {
                MPIU_ERR_SETFATALANDSTMT1(mpi_errno, MPI_ERR_OTHER, goto fn_fail,
                        "**fail", "**fail %s", "cannot create cq");
            }

            if (ibv_req_notify_cq(hca_list[nHca].cq_hndl, 0)) {
                MPIU_ERR_SETFATALANDSTMT1(mpi_errno, MPI_ERR_OTHER, goto fn_fail,
                        "**fail", "**fail %s", "cannot request cq notification");
            }
/***************************************************************************//**
 * Description 
 * Init rdma global resources
 *
 ******************************************************************************/
static struct thread_context*
init_rdma_thread_resources() {

    struct thread_context *ctx = calloc(1, sizeof(struct thread_context));

    ctx->qp_hash = hashtable_create(1024);

    int num_device;
    if ( !(ctx->device_ctx_list = rdma_get_devices(&num_device)) ) {
        perror("rdma_get_devices()");
        return NULL;
    }
    ctx->device_ctx = *ctx->device_ctx_list;
    if (verbose) {
        printf("Get device: %d\n", num_device); 
    }

    if ( !(ctx->pd = ibv_alloc_pd(ctx->device_ctx)) ) {
        perror("ibv_alloc_pd()");
        return NULL;
    }

    if ( !(ctx->comp_channel = ibv_create_comp_channel(ctx->device_ctx)) ) {
        perror("ibv_create_comp_channel()");
        return NULL;
    }

    struct ibv_srq_init_attr srq_init_attr;
    srq_init_attr.srq_context = NULL;
    srq_init_attr.attr.max_sge = 16;
    srq_init_attr.attr.max_wr = srq_size;
    srq_init_attr.attr.srq_limit = srq_size; /* RDMA TODO: what is srq_limit? */

    if ( !(ctx->srq = ibv_create_srq(ctx->pd, &srq_init_attr)) ) {
        perror("ibv_create_srq()");
        return NULL;
    }

    if ( !(ctx->send_cq = ibv_create_cq(ctx->device_ctx, 
                    cq_size, NULL, ctx->comp_channel, 0)) ) {
        perror("ibv_create_cq()");
        return NULL;
    }

    if (0 != ibv_req_notify_cq(ctx->send_cq, 0)) {
        perror("ibv_reg_notify_cq()");
        return NULL;
    }

    if ( !(ctx->recv_cq = ibv_create_cq(ctx->device_ctx, 
                    cq_size, NULL, ctx->comp_channel, 0)) ) {
        perror("ibv_create_cq()");
        return NULL;
    }

    if (0 != ibv_req_notify_cq(ctx->recv_cq, 0)) {
        perror("ibv_reg_notify_cq()");
        return NULL;
    }

    ctx->rsize = BUFF_SIZE;
    ctx->rbuf_list = calloc(buff_per_thread, sizeof(char *));
    ctx->rmr_list = calloc(buff_per_thread, sizeof(struct ibv_mr*));
    ctx->poll_wc = calloc(poll_wc_size, sizeof(struct ibv_wc));

    int i = 0;
    for (i = 0; i < buff_per_thread; ++i) {
        ctx->rbuf_list[i] = malloc(ctx->rsize);
        if (ctx->rbuf_list[i] == 0) {
            break;
        }
    }
    if (i != buff_per_thread) {
        int j = 0;
        for (j = 0; j < i; ++j) {
            free(ctx->rbuf_list[j]);
        }
        free(ctx->rbuf_list);
        ctx->rbuf_list = 0;
    }
    if (!ctx->rmr_list || !ctx->rbuf_list) {
        fprintf(stderr, "out of ctxmory in init_rdma_thread_resources()\n");
        return NULL;
    }

    struct ibv_recv_wr *bad = NULL;
    struct ibv_sge sge;
    struct ibv_recv_wr rwr;
    for (i = 0; i < buff_per_thread; ++i) {
        ctx->rmr_list[i] = ibv_reg_mr(ctx->pd, ctx->rbuf_list[i], ctx->rsize, IBV_ACCESS_LOCAL_WRITE);

        sge.addr = (uintptr_t)ctx->rbuf_list[i];
        sge.length = ctx->rsize;
        sge.lkey = ctx->rmr_list[i]->lkey;

        rwr.wr_id = (uintptr_t)ctx->rmr_list[i];
        rwr.next = NULL;
        rwr.sg_list = &sge;
        rwr.num_sge = 1;

        if (0 != ibv_post_srq_recv(ctx->srq, &rwr, &bad)) {
            perror("ibv_post_srq_recv()");
            return NULL;
        }
    }

    return ctx;
}
Example #26
0
struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
                                     int rx_depth, int port, int use_event,
                                     enum pp_wr_calc_op   calc_op,
                                     enum pp_wr_data_type calc_data_type,
                                     char *calc_operands_str)
{
    struct pingpong_context *ctx;
    int rc;

    ctx = malloc(sizeof *ctx);
    if (!ctx)
        return NULL;
    memset(ctx, 0, sizeof *ctx);

    ctx->size	= size;
    ctx->rx_depth	= rx_depth;

    ctx->calc_op.opcode	= IBV_EXP_CALC_OP_NUMBER;
    ctx->calc_op.data_type	= IBV_EXP_CALC_DATA_TYPE_NUMBER;
    ctx->calc_op.data_size	= IBV_EXP_CALC_DATA_SIZE_NUMBER;

    ctx->buf = memalign(page_size, size);
    if (!ctx->buf) {
        fprintf(stderr, "Couldn't allocate work buf.\n");
        goto clean_ctx;
    }

    memset(ctx->buf, 0, size);

    ctx->net_buf = memalign(page_size, size);
    if (!ctx->net_buf) {
        fprintf(stderr, "Couldn't allocate work buf.\n");
        goto clean_buffer;
    }
    memset(ctx->net_buf, 0, size);

    ctx->context = ibv_open_device(ib_dev);
    if (!ctx->context) {
        fprintf(stderr, "Couldn't get context for %s\n",
                ibv_get_device_name(ib_dev));
        goto clean_net_buf;
    }

    if (use_event) {
        ctx->channel = ibv_create_comp_channel(ctx->context);
        if (!ctx->channel) {
            fprintf(stderr, "Couldn't create completion channel\n");
            goto clean_device;
        }
    } else
        ctx->channel = NULL;

    ctx->pd = ibv_alloc_pd(ctx->context);
    if (!ctx->pd) {
        fprintf(stderr, "Couldn't allocate PD\n");
        goto clean_comp_channel;
    }

    ctx->mr = ibv_reg_mr(ctx->pd, ctx->net_buf, size, IBV_ACCESS_LOCAL_WRITE);
    if (!ctx->mr) {
        fprintf(stderr, "Couldn't register MR\n");
        goto clean_pd;
    }

    if (calc_op != PP_CALC_INVALID) {
        int op_per_gather, num_op, max_num_op;

        ctx->calc_op.opcode	= IBV_EXP_CALC_OP_NUMBER;
        ctx->calc_op.data_type	= IBV_EXP_CALC_DATA_TYPE_NUMBER;
        ctx->calc_op.data_size	= IBV_EXP_CALC_DATA_SIZE_NUMBER;

        num_op = pp_parse_calc_to_gather(calc_operands_str, calc_op, calc_data_type,
                                         &ctx->calc_op, ctx->context, ctx->buf, ctx->net_buf);
        if (num_op < 0) {
            fprintf(stderr, "-E- failed parsing calc operators\n");
            goto clean_mr;
        }

        rc = pp_query_calc_cap(ctx->context,
                               ctx->calc_op.opcode,
                               ctx->calc_op.data_type,
                               ctx->calc_op.data_size,
                               &op_per_gather, &max_num_op);
        if (rc) {
            fprintf(stderr, "-E- operation not supported on %s. valid ops are:\n",
                    ibv_get_device_name(ib_dev));

            pp_print_dev_calc_ops(ctx->context);
            goto clean_mr;
        }

        if (pp_prepare_sg_list(op_per_gather, num_op, ctx->mr->lkey, &ctx->calc_op, ctx->net_buf)) {
            fprintf(stderr, "-failed to prepare the sg list\n");
            goto clean_mr;
        }
    }

    ctx->cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL,
                            ctx->channel, 0);
    if (!ctx->cq) {
        fprintf(stderr, "Couldn't create CQ\n");
        goto clean_mr;
    }

    {
        struct ibv_exp_qp_init_attr attr = {
            .send_cq = ctx->cq,
            .recv_cq = ctx->cq,
            .cap	 = {
                .max_send_wr  = 16,
                .max_recv_wr  = rx_depth,
                .max_send_sge = 16,
                .max_recv_sge = 16
            },
            .qp_type = IBV_QPT_RC,
            .pd = ctx->pd
        };

        attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_CREATE_FLAGS | IBV_EXP_QP_INIT_ATTR_PD;
        attr.exp_create_flags = IBV_EXP_QP_CREATE_CROSS_CHANNEL;

        ctx->qp = ibv_exp_create_qp(ctx->context, &attr);
        if (!ctx->qp)  {
            fprintf(stderr, "Couldn't create QP\n");
            goto clean_cq;
        }
    }

    {
        struct ibv_qp_attr attr = {
            .qp_state		= IBV_QPS_INIT,
            .pkey_index		= 0,
            .port_num		= port,
            .qp_access_flags	= 0
        };

        if (ibv_modify_qp(ctx->qp, &attr,
                          IBV_QP_STATE		|
                          IBV_QP_PKEY_INDEX	|
                          IBV_QP_PORT		|
                          IBV_QP_ACCESS_FLAGS)) {
            fprintf(stderr, "Failed to modify QP to INIT\n");
            goto clean_qp;
        }

    }

    ctx->mcq = ibv_create_cq(ctx->context, rx_depth + 1, NULL,
                             ctx->channel, 0);
    if (!ctx->mcq) {
        fprintf(stderr, "Couldn't create CQ for MQP\n");
        goto clean_qp;
    }

    {
        struct ibv_exp_qp_init_attr mattr = {
            .send_cq = ctx->mcq,
            .recv_cq = ctx->mcq,
            .cap	 = {
                .max_send_wr  = 1,
                .max_recv_wr  = rx_depth,
                .max_send_sge = 16,
                .max_recv_sge = 16
            },
            .qp_type = IBV_QPT_RC,
            .pd = ctx->pd
        };

        mattr.comp_mask |= IBV_EXP_QP_INIT_ATTR_CREATE_FLAGS | IBV_EXP_QP_INIT_ATTR_PD;
        mattr.exp_create_flags = IBV_EXP_QP_CREATE_CROSS_CHANNEL;

        ctx->mqp = ibv_exp_create_qp(ctx->context, &mattr);
        if (!ctx->qp)  {
            fprintf(stderr, "Couldn't create MQP\n");
            goto clean_mcq;
        }
    }

    {
        struct ibv_qp_attr mattr = {
            .qp_state		= IBV_QPS_INIT,
            .pkey_index		= 0,
            .port_num		= port,
            .qp_access_flags	= 0
        };

        if (ibv_modify_qp(ctx->mqp, &mattr,
                          IBV_QP_STATE		|
                          IBV_QP_PKEY_INDEX	|
                          IBV_QP_PORT		|
                          IBV_QP_ACCESS_FLAGS)) {
            fprintf(stderr, "Failed to modify MQP to INIT\n");
            goto clean_mqp;
        }
    }

    return ctx;

clean_mqp:
    ibv_destroy_qp(ctx->mqp);

clean_mcq:
    ibv_destroy_cq(ctx->mcq);

clean_qp:
    ibv_destroy_qp(ctx->qp);

clean_cq:
    ibv_destroy_cq(ctx->cq);

clean_mr:
    ibv_dereg_mr(ctx->mr);

clean_pd:
    ibv_dealloc_pd(ctx->pd);

clean_comp_channel:
    if (ctx->channel)
        ibv_destroy_comp_channel(ctx->channel);

clean_device:
    ibv_close_device(ctx->context);

clean_net_buf:
    free(ctx->net_buf);

clean_buffer:
    free(ctx->buf);

clean_ctx:
    free(ctx);

    return NULL;
}

int pp_close_ctx(struct pingpong_context *ctx)
{
    if (ibv_destroy_qp(ctx->qp)) {
        fprintf(stderr, "Couldn't destroy QP\n");
        return 1;
    }


    if (ibv_destroy_qp(ctx->mqp)) {
        fprintf(stderr, "Couldn't destroy MQP\n");
        return 1;
    }


    if (ibv_destroy_cq(ctx->cq)) {
        fprintf(stderr, "Couldn't destroy CQ\n");
        return 1;
    }

    if (ibv_destroy_cq(ctx->mcq)) {
        fprintf(stderr, "Couldn't destroy MCQ\n");
        return 1;
    }

    if (ibv_dereg_mr(ctx->mr)) {
        fprintf(stderr, "Couldn't deregister MR\n");
        return 1;
    }

    if (ibv_dealloc_pd(ctx->pd)) {
        fprintf(stderr, "Couldn't deallocate PD\n");
        return 1;
    }

    if (ctx->channel) {
        if (ibv_destroy_comp_channel(ctx->channel)) {
            fprintf(stderr, "Couldn't destroy completion channel\n");
            return 1;
        }
    }

    if (ibv_close_device(ctx->context)) {
        fprintf(stderr, "Couldn't release context\n");
        return 1;
    }

    free(ctx->buf);
    free(ctx->net_buf);
    free(ctx);

    return 0;
}

static int pp_post_recv(struct pingpong_context *ctx, int n)
{
    int rc;

    struct ibv_sge list = {
        .addr	= (uintptr_t) ctx->net_buf,
        .length = ctx->size,
        .lkey	= ctx->mr->lkey
    };
    struct ibv_recv_wr wr = {
        .wr_id		= PP_RECV_WRID,
        .sg_list	= &list,
        .num_sge	= 1,
    };
    struct ibv_recv_wr *bad_wr;
    int i;

    for (i = 0; i < n; ++i) {
        rc = ibv_post_recv(ctx->qp, &wr, &bad_wr);
        if (rc)
            return rc;
    }

    return i;
}

static int pp_post_send(struct pingpong_context *ctx)
{
    int ret;

    struct ibv_sge list = {
        .addr	= (uintptr_t) ctx->net_buf,
        .length = ctx->size,
        .lkey	= ctx->mr->lkey
    };
    struct ibv_exp_send_wr wr = {
        .wr_id		= PP_SEND_WRID,
        .sg_list	= &list,
        .num_sge	= 1,
        .exp_opcode	= IBV_EXP_WR_SEND,
        .exp_send_flags	= IBV_EXP_SEND_SIGNALED,
    };
    struct ibv_exp_send_wr *bad_wr;
    /* If this is a calc operation - set the required params in the wr */
    if (ctx->calc_op.opcode != IBV_EXP_CALC_OP_NUMBER) {
        wr.exp_opcode  = IBV_EXP_WR_SEND;
        wr.exp_send_flags |= IBV_EXP_SEND_WITH_CALC;
        wr.sg_list = ctx->calc_op.gather_list;
        wr.num_sge = ctx->calc_op.gather_list_size;

        wr.op.calc.calc_op   = ctx->calc_op.opcode;
        wr.op.calc.data_type = ctx->calc_op.data_type;
        wr.op.calc.data_size = ctx->calc_op.data_size;

    }

    ret = ibv_exp_post_send(ctx->qp, &wr, &bad_wr);

    return ret;
}

int pp_post_ext_wqe(struct pingpong_context *ctx, enum ibv_exp_wr_opcode op)
{
    int ret;
    struct ibv_exp_send_wr wr = {
        .wr_id		= PP_CQE_WAIT,
        .sg_list	= NULL,
        .num_sge	= 0,
        .exp_opcode	= op,
        .exp_send_flags	= IBV_EXP_SEND_SIGNALED,
    };
    struct ibv_exp_send_wr *bad_wr;

    switch (op) {
    case IBV_EXP_WR_RECV_ENABLE:
    case IBV_EXP_WR_SEND_ENABLE:

        wr.task.wqe_enable.qp = ctx->qp;
        wr.task.wqe_enable.wqe_count = 0;

        wr.exp_send_flags |= IBV_EXP_SEND_WAIT_EN_LAST;

        break;

    case IBV_EXP_WR_CQE_WAIT:
        wr.task.cqe_wait.cq = ctx->cq;
        wr.task.cqe_wait.cq_count = 1;

        wr.exp_send_flags |=  IBV_EXP_SEND_WAIT_EN_LAST;

        break;

    default:
        fprintf(stderr, "-E- unsupported m_wqe opcode %d\n", op);
        return -1;
    }

    ret = ibv_exp_post_send(ctx->mqp, &wr, &bad_wr);

    return ret;
}

int pp_poll_mcq(struct ibv_cq *cq, int num_cqe)
{
    int ne;
    int i;
    struct ibv_wc wc[2];

    if (num_cqe > 2) {
        fprintf(stderr, "-E- max num cqe exceeded\n");
        return -1;
    }

    do {
        ne = ibv_poll_cq(cq, num_cqe, wc);
        if (ne < 0) {
            fprintf(stderr, "poll CQ failed %d\n", ne);
            return 1;
        }
    } while (ne < 1);

    for (i = 0; i < ne; ++i) {
        if (wc[i].status != IBV_WC_SUCCESS) {
            fprintf(stderr, "Failed %s status %s (%d)\n",
                    wr_id_str[(int)wc[i].wr_id],
                    ibv_wc_status_str(wc[i].status),
                    wc[i].status);
            return 1;
        }

        if ((int) wc[i].wr_id != PP_CQE_WAIT) {
            fprintf(stderr, "invalid wr_id %" PRIx64 "\n", wc[i].wr_id);
            return -1;
        }
    }

    return 0;
}

static int pp_calc_verify(struct pingpong_context *ctx,
                          enum pp_wr_data_type calc_data_type,
                          enum pp_wr_calc_op calc_opcode)
{
    uint64_t *op1 = &(ctx->last_result);
    uint64_t *op2 = (uint64_t *)ctx->buf + 2;
    uint64_t *res = (uint64_t *)ctx->buf;

    return !EXEC_VERIFY(calc_data_type, calc_opcode, 1, op1, op2, res);
}

static int pp_update_last_result(struct pingpong_context *ctx,
                                 enum pp_wr_data_type calc_data_type,
                                 enum pp_wr_calc_op calc_opcode)
{
    /* EXEC_VERIFY derefence result parameter */
    uint64_t *dummy;

    uint64_t *op1 = (uint64_t *)ctx->buf;
    uint64_t *op2 = (uint64_t *)ctx->buf + 2;
    uint64_t res = (uint64_t)EXEC_VERIFY(calc_data_type, calc_opcode, 0, op1, op2, dummy);

    ctx->last_result = res;
    return 0;
}


static void usage(const char *argv0)
{
    printf("Usage:\n");
    printf("  %s				start a server and wait for connection\n", argv0);
    printf("  %s <host>			connect to server at <host>\n", argv0);
    printf("\n");
    printf("Options:\n");
    printf("  -p, --port=<port>		listen on/connect to port <port> (default 18515)\n");
    printf("  -d, --ib-dev=<dev>		use IB device <dev> (default first device found)\n");
    printf("  -i, --ib-port=<port>		use port <port> of IB device (default 1)\n");
    printf("  -s, --size=<size>		size of message to exchange (default 4096 minimum 16)\n");
    printf("  -m, --mtu=<size>		path MTU (default 1024)\n");
    printf("  -r, --rx-depth=<dep>		number of receives to post at a time (default 500)\n");
    printf("  -n, --iters=<iters>		number of exchanges (default 1000)\n");
    printf("  -l, --sl=<sl>			service level value\n");
    printf("  -e, --events			sleep on CQ events (default poll)\n");
    printf("  -c, --calc=<operation>	calc operation\n");
    printf("  -t, --op_type=<type>		calc operands type\n");
    printf("  -o, --operands=<o1,o2,...>	comma separated list of operands\n");
    printf("  -w, --wait_cq=cqn		wait for entries on cq\n");
    printf("  -v, --verbose			print verbose information\n");
    printf("  -V, --verify			verify calc operations\n");
}
Example #27
0
int main(int argc, char *argv[])
{
    struct ibv_pd		       *pd1, *pd2;
    struct ibv_comp_channel	       *comp_chan1, *comp_chan2;
    struct ibv_cq		       *cq1, *cq2;
    struct ibv_cq		       *evt_cq = NULL;
    struct ibv_mr		       *mr1, *mr2;
    struct ibv_qp_init_attr		qp_attr1 = { }, qp_attr2 = {};
    struct ibv_sge			sge;
    struct ibv_send_wr		send_wr = { };
    struct ibv_send_wr	       *bad_send_wr = NULL;
    struct ibv_wc			wc;
    struct ibv_qp			*qp1, *qp2;
    void			       *cq_context = NULL;
    union ibv_gid			gid1, gid2;

    int				n;

    uint8_t			       *buf1, *buf2;

    int				err;
    int 				num_devices;
    struct ibv_context	*	verbs1, *verbs2;
    struct ibv_device ** dev_list = ibv_get_device_list(&num_devices);
    struct ibv_device_attr		dev_attr;
    int use = 0;
    int port = 1;
    int x = 0;
    unsigned long mb = 0;
    unsigned long bytes = 0;
    unsigned long save_diff = 0;
    struct timeval start, stop, diff;
    int iterations = 0;

    struct rusage usage;
    struct timeval ustart, uend;
    struct timeval sstart, send;
    struct timeval tstart, tend;

    DPRINTF("There are %d devices\n", num_devices);

    for(x = 0; x < num_devices; x++) {
        printf("Device: %d, %s\n", x, ibv_get_device_name(dev_list[use]));
    }

    if(num_devices == 0 || dev_list == NULL) {
        printf("No devices found\n");
        return 1;
    }

    if(argc < 2) {
        printf("Which RDMA device to use? 0, 1, 2, 3...\n");
        return 1;
    }

    use = atoi(argv[1]);

    DPRINTF("Using device %d\n", use);

    verbs1 = ibv_open_device(dev_list[use]);

    if(verbs1 == NULL) {
        printf("Failed to open device!\n");
        return 1;
    }

    DPRINTF("Device open %s\n", ibv_get_device_name(dev_list[use]));

    verbs2 = ibv_open_device(dev_list[use]);

    if(verbs2 == NULL) {
        printf("Failed to open device again!\n");
        return 1;
    }

    if(ibv_query_device(verbs1, &dev_attr)) {
        printf("Failed to query device attributes.\n");
        return 1;
    }

    printf("Device open: %d, %s which has %d ports\n", x, ibv_get_device_name(dev_list[use]), dev_attr.phys_port_cnt);

    if(argc < 3) {
        printf("Which port on the device to use? 1, 2, 3...\n");
        return 1;
    }

    port = atoi(argv[2]);

    if(port <= 0) {
        printf("Port #%d invalid, must start with 1, 2, 3, ...\n", port);
        return 1;
    }

    printf("Using port %d\n", port);

    if(argc < 4) {
        printf("How many iterations to perform?\n");
        return 1;
    }

    iterations = atoi(argv[3]);
    printf("Will perform %d iterations\n", iterations);

    pd1 = ibv_alloc_pd(verbs1);
    if (!pd1)
        return 1;

    if(argc < 5) {
        printf("How many megabytes to allocate? (This will be allocated twice. Once for source, once for destination.)\n");
        return 1;
    }

    mb = atoi(argv[4]);

    if(mb <= 0) {
        printf("Megabytes %lu invalid\n", mb);
        return 1;
    }

    DPRINTF("protection domain1 allocated\n");

    pd2 = ibv_alloc_pd(verbs2);
    if (!pd2)
        return 1;

    DPRINTF("protection domain2 allocated\n");

    comp_chan1 = ibv_create_comp_channel(verbs1);
    if (!comp_chan1)
        return 1;

    DPRINTF("completion chan1 created\n");

    comp_chan2 = ibv_create_comp_channel(verbs2);
    if (!comp_chan2)
        return 1;

    DPRINTF("completion chan2 created\n");

    cq1 = ibv_create_cq(verbs1, 2, NULL, comp_chan1, 0);
    if (!cq1)
        return 1;

    DPRINTF("CQ1 created\n");

    cq2 = ibv_create_cq(verbs2, 2, NULL, comp_chan2, 0);
    if (!cq2)
        return 1;

    DPRINTF("CQ2 created\n");

    bytes = mb * 1024UL * 1024UL;

    buf1 = malloc(bytes);
    if (!buf1)
        return 1;

    buf2 = malloc(bytes);
    if (!buf2)
        return 1;

    printf("Populating %lu MB memory.\n", mb * 2);

    for(x = 0; x < bytes; x++) {
        buf1[x] = 123;
    }

    buf1[bytes - 1] = 123;

    mr1 = ibv_reg_mr(pd1, buf1, bytes, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ);
    if (!mr1) {
        printf("Failed to register memory.\n");
        return 1;
    }

    mr2 = ibv_reg_mr(pd2, buf2, bytes, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ);
    if (!mr2) {
        printf("Failed to register memory.\n");
        return 1;
    }

    DPRINTF("memory registered.\n");

    qp_attr1.cap.max_send_wr	 = 10;
    qp_attr1.cap.max_send_sge = 10;
    qp_attr1.cap.max_recv_wr	 = 10;
    qp_attr1.cap.max_recv_sge = 10;
    qp_attr1.sq_sig_all = 1;

    qp_attr1.send_cq		 = cq1;
    qp_attr1.recv_cq		 = cq1;

    qp_attr1.qp_type		 = IBV_QPT_RC;

    qp1 = ibv_create_qp(pd1, &qp_attr1);
    if (!qp1) {
        printf("failed to create queue pair #1\n");
        return 1;
    }

    DPRINTF("queue pair1 created\n");

    qp_attr2.cap.max_send_wr	 = 10;
    qp_attr2.cap.max_send_sge = 10;
    qp_attr2.cap.max_recv_wr	 = 10;
    qp_attr2.cap.max_recv_sge = 10;
    qp_attr2.sq_sig_all = 1;

    qp_attr2.send_cq		 = cq2;
    qp_attr2.recv_cq		 = cq2;

    qp_attr2.qp_type		 = IBV_QPT_RC;


    qp2 = ibv_create_qp(pd2, &qp_attr2);
    if (!qp2) {
        printf("failed to create queue pair #2\n");
        return 1;
    }

    DPRINTF("queue pair2 created\n");

    struct ibv_qp_attr attr1 = {
        .qp_state = IBV_QPS_INIT,
        .pkey_index = 0,
        .port_num = port,
        .qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_LOCAL_WRITE,
    };

    if(ibv_modify_qp(qp1, &attr1,
                     IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) {
        printf("verbs 1 Failed to go to init\n");
        return 1;
    }

    DPRINTF("verbs1 to init\n");

    struct ibv_qp_attr attr2 = {
        .qp_state = IBV_QPS_INIT,
        .pkey_index = 0,
        .port_num = port,
        .qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_LOCAL_WRITE,
    };

    if(ibv_modify_qp(qp2, &attr2,
                     IBV_QP_STATE |
                     IBV_QP_PKEY_INDEX |
                     IBV_QP_PORT |
                     IBV_QP_ACCESS_FLAGS)) {
        printf("verbs 2 Failed to go to init\n");
        return 1;
    }

    DPRINTF("verbs2 to init\n");

    //struct ibv_gid gid1, gid2;
    struct ibv_port_attr port1, port2;
    uint64_t psn1 = lrand48() & 0xffffff;
    uint64_t psn2 = lrand48() & 0xffffff;

    if(ibv_query_port(verbs1, port, &port1))
        return 1;

    DPRINTF("got port1 information\n");

    if(ibv_query_port(verbs2, port, &port2))
        return 1;

    DPRINTF("got port2 information\n");

    if(ibv_query_gid(verbs1, 1, 0, &gid1))
        return 1;
    DPRINTF("got gid1 information\n");

    if(ibv_query_gid(verbs2, 1, 0, &gid2))
        return 1;

    DPRINTF("got gid2 information\n");

    struct ibv_qp_attr next2 = {
        .qp_state = IBV_QPS_RTR,
        .path_mtu = IBV_MTU_1024,
        .dest_qp_num = qp2->qp_num,
        .rq_psn = psn2,
        .max_dest_rd_atomic = 5,
        .min_rnr_timer = 12,
        .ah_attr = {
            .is_global = 0,
            .dlid = port2.lid,
            .sl = 0,
            .src_path_bits = 0,
            .port_num = port,
        }
    };

    if(gid2.global.interface_id) {
        next2.ah_attr.is_global = 1;
        next2.ah_attr.grh.hop_limit = 1;
        next2.ah_attr.grh.dgid = gid2;
        next2.ah_attr.grh.sgid_index = 0;
    }

    struct ibv_qp_attr next1 = {
        .qp_state = IBV_QPS_RTR,
        .path_mtu = IBV_MTU_1024,
        .dest_qp_num = qp1->qp_num,
        .rq_psn = psn1,
        .max_dest_rd_atomic = 1,
        .min_rnr_timer = 12,
        .ah_attr = {
            .is_global = 0,
            .dlid = port1.lid,
            .sl = 0,
            .src_path_bits = 0,
            .port_num = port,
        }
    };

    if(gid1.global.interface_id) {
        next1.ah_attr.is_global = 1;
        next1.ah_attr.grh.hop_limit = 1;
        next1.ah_attr.grh.dgid = gid1;
        next1.ah_attr.grh.sgid_index = 0;
    }

    if(ibv_modify_qp(qp2, &next1,
                     IBV_QP_STATE |
                     IBV_QP_AV |
                     IBV_QP_PATH_MTU |
                     IBV_QP_DEST_QPN |
                     IBV_QP_RQ_PSN |
                     IBV_QP_MAX_DEST_RD_ATOMIC |
                     IBV_QP_MIN_RNR_TIMER)) {
        printf("Failed to modify verbs2 to ready\n");
        return 1;
    }

    DPRINTF("verbs2 RTR\n");

    if(ibv_modify_qp(qp1, &next2,
                     IBV_QP_STATE |
                     IBV_QP_AV |
                     IBV_QP_PATH_MTU |
                     IBV_QP_DEST_QPN |
                     IBV_QP_RQ_PSN |
                     IBV_QP_MAX_DEST_RD_ATOMIC |
                     IBV_QP_MIN_RNR_TIMER)) {
        printf("Failed to modify verbs1 to ready\n");
        return 1;
    }

    DPRINTF("verbs1 RTR\n");

    next2.qp_state = IBV_QPS_RTS;
    next2.timeout = 14;
    next2.retry_cnt = 7;
    next2.rnr_retry = 7;
    next2.sq_psn = psn1;
    next2.max_rd_atomic = 1;

    if(ibv_modify_qp(qp1, &next2,
                     IBV_QP_STATE |
                     IBV_QP_TIMEOUT |
                     IBV_QP_RETRY_CNT |
                     IBV_QP_RNR_RETRY |
                     IBV_QP_SQ_PSN |
                     IBV_QP_MAX_QP_RD_ATOMIC)) {
        printf("Failed again to modify verbs1 to ready\n");
        return 1;
    }

    DPRINTF("verbs1 RTS\n");

    next1.qp_state = IBV_QPS_RTS;
    next1.timeout = 14;
    next1.retry_cnt = 7;
    next1.rnr_retry = 7;
    next1.sq_psn = psn2;
    next1.max_rd_atomic = 1;

    if(ibv_modify_qp(qp2, &next1,
                     IBV_QP_STATE |
                     IBV_QP_TIMEOUT |
                     IBV_QP_RETRY_CNT |
                     IBV_QP_RNR_RETRY |
                     IBV_QP_SQ_PSN |
                     IBV_QP_MAX_QP_RD_ATOMIC)) {
        printf("Failed again to modify verbs2 to ready\n");
        return 1;
    }

    DPRINTF("verbs2 RTS\n");

    printf("Performing RDMA first.\n");
    iterations = atoi(argv[3]);

    getrusage(RUSAGE_SELF, &usage);
    ustart = usage.ru_utime;
    sstart = usage.ru_stime;

    gettimeofday(&tstart, NULL);

    while(iterations-- > 0) {
        sge.addr   = (uintptr_t) buf1;
        sge.length = bytes;
        sge.lkey   = mr1->lkey;

        send_wr.wr_id		    = 1;
        send_wr.opcode		    = IBV_WR_RDMA_WRITE;
        send_wr.sg_list		    = &sge;
        send_wr.num_sge		    = 1;
        send_wr.send_flags          = IBV_SEND_SIGNALED;
        send_wr.wr.rdma.rkey 	    = mr2->rkey;
        send_wr.wr.rdma.remote_addr = (uint64_t) buf2;

        DPRINTF("Iterations left: %d\n", iterations);
        if (ibv_req_notify_cq(cq1, 0))
            return 1;

        DPRINTF("Submitting local RDMA\n");
        gettimeofday(&start, NULL);
        if (ibv_post_send(qp1, &send_wr, &bad_send_wr))
            return 1;

        DPRINTF("RDMA posted %p %p\n", &send_wr, bad_send_wr);

        DPRINTF("blocking...\n");
        if(ibv_get_cq_event(comp_chan1, &evt_cq, &cq_context)) {
            printf("failed to get CQ event\n");
            return 1;
        }
        gettimeofday(&stop, NULL);
        timersub(&stop, &start, &diff);

        DPRINTF("RDMA took: %lu us\n", diff.tv_usec);

        ibv_ack_cq_events(evt_cq, 1);

        DPRINTF("got event\n");

        n = ibv_poll_cq(cq1, 1, &wc);
        if (n > 0) {
            DPRINTF("return from poll: %lu\n", wc.wr_id);
            if (wc.status != IBV_WC_SUCCESS) {
                printf("poll failed %s\n", ibv_wc_status_str(wc.status));
                return 1;
            }

            if (wc.wr_id == 1) {
                DPRINTF("Finished %d bytes %d %d\n", n, buf1[bytes - 1], buf2[bytes - 1]);
            } else {
                printf("didn't find completion\n");
            }
        }

        if (n < 0) {
            printf("poll returned error\n");
            return 1;
        }

        DPRINTF("Poll returned %d bytes %d %d\n", n, buf1[0], buf2[0]);

    }

    gettimeofday(&tend, NULL);

    getrusage(RUSAGE_SELF, &usage);
    uend = usage.ru_utime;
    send = usage.ru_stime;

    save_diff = 0;
    timersub(&uend, &ustart, &diff);
    save_diff += diff.tv_usec;
    printf("User CPU time: %lu us\n", diff.tv_usec);
    timersub(&send, &sstart, &diff);
    save_diff += diff.tv_usec;
    printf("System CPU time: %lu us\n", diff.tv_usec);
    timersub(&tend, &tstart, &diff);
    printf("Sleeping time: %lu us\n", diff.tv_usec - save_diff);
    printf("Wall clock CPU time: %lu us\n", diff.tv_usec);

    iterations = atoi(argv[3]);

    printf("Now using the CPU instead....\n");

    getrusage(RUSAGE_SELF, &usage);
    ustart = usage.ru_utime;
    sstart = usage.ru_stime;

    gettimeofday(&tstart, NULL);

    while(iterations-- > 0) {
        DPRINTF("Repeating without RDMA...\n");

        gettimeofday(&start, NULL);

        memcpy(buf2, buf1, bytes);

        gettimeofday(&stop, NULL);
        timersub(&stop, &start, &diff);
        DPRINTF("Regular copy too took: %lu us\n", diff.tv_usec);
    }

    gettimeofday(&tend, NULL);

    getrusage(RUSAGE_SELF, &usage);
    uend = usage.ru_utime;
    send = usage.ru_stime;

    save_diff = 0;
    timersub(&uend, &ustart, &diff);
    save_diff += diff.tv_usec;
    printf("User CPU time: %lu us\n", diff.tv_usec);
    timersub(&send, &sstart, &diff);
    save_diff += diff.tv_usec;
    printf("System CPU time: %lu us\n", diff.tv_usec);
    timersub(&tend, &tstart, &diff);
    printf("Sleeping time: %lu us\n", diff.tv_usec - save_diff);
    printf("Wall clock CPU time: %lu us\n", diff.tv_usec);
    return 0;
}
Example #28
0
static int pp_init_ctx(char *ib_devname)
{
	struct ibv_srq_init_attr_ex attr;
	struct ibv_xrcd_init_attr xrcd_attr;
	struct ibv_port_attr port_attr;

	ctx.recv_qp = calloc(ctx.num_clients, sizeof *ctx.recv_qp);
	ctx.send_qp = calloc(ctx.num_clients, sizeof *ctx.send_qp);
	ctx.rem_dest = calloc(ctx.num_clients, sizeof *ctx.rem_dest);
	if (!ctx.recv_qp || !ctx.send_qp || !ctx.rem_dest)
		return 1;

	if (open_device(ib_devname)) {
		fprintf(stderr, "Failed to open device\n");
		return 1;
	}

	if (pp_get_port_info(ctx.context, ctx.ib_port, &port_attr)) {
		fprintf(stderr, "Failed to get port info\n");
		return 1;
	}

	ctx.lid = port_attr.lid;
	if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET && !ctx.lid) {
		fprintf(stderr, "Couldn't get local LID\n");
		return 1;
	}

	ctx.buf = memalign(page_size, ctx.size);
	if (!ctx.buf) {
		fprintf(stderr, "Couldn't allocate work buf.\n");
		return 1;
	}

	memset(ctx.buf, 0, ctx.size);

	if (ctx.use_event) {
		ctx.channel = ibv_create_comp_channel(ctx.context);
		if (!ctx.channel) {
			fprintf(stderr, "Couldn't create completion channel\n");
			return 1;
		}
	}

	ctx.pd = ibv_alloc_pd(ctx.context);
	if (!ctx.pd) {
		fprintf(stderr, "Couldn't allocate PD\n");
		return 1;
	}

	ctx.mr = ibv_reg_mr(ctx.pd, ctx.buf, ctx.size, IBV_ACCESS_LOCAL_WRITE);
	if (!ctx.mr) {
		fprintf(stderr, "Couldn't register MR\n");
		return 1;
	}

	ctx.fd = open("/tmp/xrc_domain", O_RDONLY | O_CREAT, S_IRUSR | S_IRGRP);
	if (ctx.fd < 0) {
		fprintf(stderr,
			"Couldn't create the file for the XRC Domain "
			"but not stopping %d\n", errno);
		ctx.fd = -1;
	}

	memset(&xrcd_attr, 0, sizeof xrcd_attr);
	xrcd_attr.comp_mask = IBV_XRCD_INIT_ATTR_FD | IBV_XRCD_INIT_ATTR_OFLAGS;
	xrcd_attr.fd = ctx.fd;
	xrcd_attr.oflags = O_CREAT;
	ctx.xrcd = ibv_open_xrcd(ctx.context, &xrcd_attr);
	if (!ctx.xrcd) {
		fprintf(stderr, "Couldn't Open the XRC Domain %d\n", errno);
		return 1;
	}

	ctx.recv_cq = ibv_create_cq(ctx.context, ctx.num_clients, &ctx.recv_cq,
				    ctx.channel, 0);
	if (!ctx.recv_cq) {
		fprintf(stderr, "Couldn't create recv CQ\n");
		return 1;
	}

	if (ctx.use_event) {
		if (ibv_req_notify_cq(ctx.recv_cq, 0)) {
			fprintf(stderr, "Couldn't request CQ notification\n");
			return 1;
		}
	}

	ctx.send_cq = ibv_create_cq(ctx.context, ctx.num_clients, NULL, NULL, 0);
	if (!ctx.send_cq) {
		fprintf(stderr, "Couldn't create send CQ\n");
		return 1;
	}

	memset(&attr, 0, sizeof attr);
	attr.attr.max_wr = ctx.num_clients;
	attr.attr.max_sge = 1;
	attr.comp_mask = IBV_SRQ_INIT_ATTR_TYPE | IBV_SRQ_INIT_ATTR_XRCD |
			 IBV_SRQ_INIT_ATTR_CQ | IBV_SRQ_INIT_ATTR_PD;
	attr.srq_type = IBV_SRQT_XRC;
	attr.xrcd = ctx.xrcd;
	attr.cq = ctx.recv_cq;
	attr.pd = ctx.pd;

	ctx.srq = ibv_create_srq_ex(ctx.context, &attr);
	if (!ctx.srq)  {
		fprintf(stderr, "Couldn't create SRQ\n");
		return 1;
	}

	if (create_qps())
		return 1;

	return 0;
}