コード例 #1
1
/*****************************************
* Function: poll_completion
*****************************************/
static int poll_completion(
	struct resources *res)
{
	struct ibv_wc wc;
	void *ev_ctx;
	struct ibv_cq *ev_cq;
	int rc;


	fprintf(stdout, "waiting for completion event\n");

	/* Wait for the completion event */
	if (ibv_get_cq_event(res->comp_channel, &ev_cq, &ev_ctx)) {
		fprintf(stderr, "failed to get cq_event\n");
		return 1;
	}

	fprintf(stdout, "got completion event\n");

	/* Ack the event */
	ibv_ack_cq_events(ev_cq, 1);

	/* Request notification upon the next completion event */
	rc = ibv_req_notify_cq(ev_cq, 0);
	if (rc) {
		fprintf(stderr, "Couldn't request CQ notification\n");
		return 1;
	}

	/* in a real program, the user should empty the CQ before waiting for the next completion event */

	/* poll the completion that causes thew event (if exists) */
	rc = ibv_poll_cq(res->cq, 1, &wc);
	if (rc < 0) {
		fprintf(stderr, "poll CQ failed\n");
		return 1;
	}

	/* check if the CQ is empty (there can be an event event when the CQ is empty, this can happen 
	   when more than one completion(s) are being created. Here we create only one completion 
	   so empty CQ means there is an error) */
	if (rc == 0) {
		fprintf(stderr, "completion wasn't found in the CQ after timeout\n");
		return 1;
	}

	fprintf(stdout, "completion was found in CQ with status 0x%x\n", wc.status);

	/* check the completion status (here we don't care about the completion opcode */
	if (wc.status != IBV_WC_SUCCESS) {
		fprintf(stderr, "got bad completion with status: 0x%x, vendor syndrome: 0x%x\n", 
			wc.status, wc.vendor_err);
		return 1;
	}

	return 0;
}
コード例 #2
0
ファイル: ib_iface.c プロジェクト: francois-wellenreiter/ucx
ucs_status_t uct_ib_iface_wakeup_arm(uct_wakeup_h wakeup)
{
    int res, send_cq_count = 0, recv_cq_count = 0;
    ucs_status_t status;
    struct ibv_cq *cq;
    void *cq_context;
    uct_ib_iface_t *iface = ucs_derived_of(wakeup->iface, uct_ib_iface_t);

    do {
        res = ibv_get_cq_event(iface->comp_channel, &cq, &cq_context);
        if (0 == res) {
            if (iface->send_cq == cq) {
                ++send_cq_count;
            }
            if (iface->recv_cq == cq) {
                ++recv_cq_count;
            }
        }
    } while (res == 0);

    if (errno != EAGAIN) {
        return UCS_ERR_IO_ERROR;
    }

    if (send_cq_count > 0) {
        ibv_ack_cq_events(iface->send_cq, send_cq_count);
    }

    if (recv_cq_count > 0) {
        ibv_ack_cq_events(iface->recv_cq, recv_cq_count);
    }

    /* avoid re-arming the interface if any events exists */
    if ((send_cq_count > 0) || (recv_cq_count > 0)) {
        return UCS_ERR_BUSY;
    }

    if (wakeup->events & UCT_WAKEUP_TX_COMPLETION) {
        status = iface->ops->arm_tx_cq(iface);
        if (status != UCS_OK) {
            return status;
        }
    }

    if (wakeup->events & (UCT_WAKEUP_RX_AM | UCT_WAKEUP_RX_SIGNALED_AM)) {
        status = iface->ops->arm_rx_cq(iface, 0);
        if (status != UCS_OK) {
            return status;
        }
    }

    return UCS_OK;
}
コード例 #3
0
ファイル: rdma_client.c プロジェクト: hxmhuang/CFIO2
void * poll_cq(void *ctx)
{
    struct ibv_cq *cq;
    struct ibv_wc wc;

    while (1) {
        if (!paused) {
            // rdma_debug("get cq event ...");
            TEST_NZ(ibv_get_cq_event(s_ctx->comp_channel, &cq, &ctx));
            ibv_ack_cq_events(cq, 1);
            TEST_NZ(ibv_req_notify_cq(cq, 0));

            while (ibv_poll_cq(cq, 1, &wc)) {
                // rdma_debug("handle cq ...");
                on_completion(&wc);
            }
        } else {
            // rdma_debug("wait signal ...");
            pthread_mutex_lock(&mutex);
            pthread_cond_wait(&resume_cond, &mutex);
            pthread_mutex_unlock(&mutex);
        }
    }

    return NULL;
}
コード例 #4
0
void send_ack() {
    /* Send ack */
    ack_buffer = client_pdata.index;
    sge_send.addr = (uintptr_t)&ack_buffer;
    sge_send.length = sizeof(ack_buffer);
    sge_send.lkey = mr_ack_buffer->lkey;

    send_wr.wr_id = 1;
    send_wr.opcode = IBV_WR_SEND;
    send_wr.send_flags = IBV_SEND_SIGNALED;
    send_wr.sg_list = &sge_send;
    send_wr.num_sge = 1;

    err = ibv_post_send(cm_id->qp, &send_wr, &bad_send_wr);
    assert(err == 0);

    /* Wait send completion */
    err = ibv_get_cq_event(comp_chan, &evt_cq, &cq_context);
    assert(err == 0);

    ibv_ack_cq_events(evt_cq, 1);

    err = ibv_req_notify_cq(cq, 0);
    assert(err == 0);
    
    n = ibv_poll_cq(cq, 1, &wc);
    assert(n >= 1); 
    if (wc.status != IBV_WC_SUCCESS) 
        printf("Warning: Client %d send ack failed\n", client_pdata.index);
}
コード例 #5
0
ファイル: rping.c プロジェクト: hkimura/pib
static void *cq_thread(void *arg)
{
	struct rping_cb *cb = arg;
	struct ibv_cq *ev_cq;
	void *ev_ctx;
	int ret;
	
	DEBUG_LOG("cq_thread started.\n");

	while (1) {	
		pthread_testcancel();

		ret = ibv_get_cq_event(cb->channel, &ev_cq, &ev_ctx);
		if (ret) {
			fprintf(stderr, "Failed to get cq event!\n");
			pthread_exit(NULL);
		}
		if (ev_cq != cb->cq) {
			fprintf(stderr, "Unknown CQ!\n");
			pthread_exit(NULL);
		}
		ret = ibv_req_notify_cq(cb->cq, 0);
		if (ret) {
			fprintf(stderr, "Failed to set notify!\n");
			pthread_exit(NULL);
		}
		ret = rping_cq_event_handler(cb);
		ibv_ack_cq_events(cb->cq, 1);
		if (ret)
			pthread_exit(NULL);
	}
}
コード例 #6
0
ファイル: rdma_backend.c プロジェクト: OSLL/qemu-xtensa
static void *comp_handler_thread(void *arg)
{
    RdmaBackendDev *backend_dev = (RdmaBackendDev *)arg;
    int rc;
    struct ibv_cq *ev_cq;
    void *ev_ctx;
    int flags;
    GPollFD pfds[1];

    /* Change to non-blocking mode */
    flags = fcntl(backend_dev->channel->fd, F_GETFL);
    rc = fcntl(backend_dev->channel->fd, F_SETFL, flags | O_NONBLOCK);
    if (rc < 0) {
        rdma_error_report("Failed to change backend channel FD to non-blocking");
        return NULL;
    }

    pfds[0].fd = backend_dev->channel->fd;
    pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;

    backend_dev->comp_thread.is_running = true;

    while (backend_dev->comp_thread.run) {
        do {
            rc = qemu_poll_ns(pfds, 1, THR_POLL_TO * (int64_t)SCALE_MS);
            if (!rc) {
                backend_dev->rdma_dev_res->stats.poll_cq_ppoll_to++;
            }
        } while (!rc && backend_dev->comp_thread.run);

        if (backend_dev->comp_thread.run) {
            rc = ibv_get_cq_event(backend_dev->channel, &ev_cq, &ev_ctx);
            if (unlikely(rc)) {
                rdma_error_report("ibv_get_cq_event fail, rc=%d, errno=%d", rc,
                                  errno);
                continue;
            }

            rc = ibv_req_notify_cq(ev_cq, 0);
            if (unlikely(rc)) {
                rdma_error_report("ibv_req_notify_cq fail, rc=%d, errno=%d", rc,
                                  errno);
            }

            backend_dev->rdma_dev_res->stats.poll_cq_from_bk++;
            rdma_poll_cq(backend_dev->rdma_dev_res, ev_cq);

            ibv_ack_cq_events(ev_cq, 1);
        }
    }

    backend_dev->comp_thread.is_running = false;

    qemu_thread_exit(0);

    return NULL;
}
コード例 #7
0
ファイル: rdma-common.c プロジェクト: kento/Samples
void * poll_cq2(void *ctx)
{
  struct ibv_cq *cq;
  struct ibv_wc wc;
  while (1) {
    TEST_NZ(ibv_get_cq_event(s_ctx->comp_channel, &cq, &ctx));
    ibv_ack_cq_events(cq, 1);
    TEST_NZ(ibv_req_notify_cq(cq, 0));
    while (ibv_poll_cq(cq, 1, &wc))
      on_completion(&wc);
  }
  return NULL;
}
コード例 #8
0
void ParallelRenderingClientIBVerbs::run()
{

    struct ibv_wc wc;
    struct ibv_cq *ev_cq;
    void *ev_ctx;

    while (keepRunning)
    {

        lock.lock();
        int ne;

        do
        {
            ne = ibv_poll_cq(ctx->cq, 1, &wc);
            if (ne > 0)
            {
                if (ibv_get_cq_event(ctx->ch, &ev_cq, &ev_ctx))
                {
                    fprintf(stderr, "Failed to get cq event!\n");
                    return;
                }
                if (ev_cq != ctx->cq)
                {
                    fprintf(stderr, "Unkown CQ!\n");
                    return;
                }
                ibv_ack_cq_events(ctx->cq, 1);
                ibv_req_notify_cq(ctx->cq, 0);
            }
            microSleep(100);
        } while (ne == 0);

        if (ne < 0)
        {
            fprintf(stderr, "poll CQ failed %d\n", ne);
            return;
        }
        if (wc.status != IBV_WC_SUCCESS)
        {
            fprintf(stderr, "Completion with error at client\n");
            fprintf(stderr, "Failed status %d: wr_id %d\n",
                    wc.status, (int)wc.wr_id);
            return;
        }
    }
}
コード例 #9
0
int wait_receive_data() {
    /* Wait for receive completion */
    err = ibv_get_cq_event(comp_chan, &evt_cq, &cq_context);
    if (err) return 1;

    ibv_ack_cq_events(evt_cq, 1);

    err = ibv_req_notify_cq(cq, 0);
    if (err) return 1;

    n = ibv_poll_cq(cq, 1, &wc);
    if (n <= 0) return 1;
    if (wc.status != IBV_WC_SUCCESS) return 1;
    
    return 0;
}
コード例 #10
0
ファイル: async_progress.c プロジェクト: grondo/mvapich-cce
static void async_completion_thread()
{
    int ret;
    struct ibv_comp_channel *ev_ch;
    struct ibv_cq *ev_cq;
    void *ev_ctx;

    /* This thread should be in a cancel enabled state */
    pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);

    ev_ch = viadev.comp_channel;

    while(1) {
        pthread_testcancel();
        pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
        do {
            ret = ibv_get_cq_event(ev_ch, &ev_cq, &ev_ctx);

            if (ret && errno != EINTR) {
                error_abort_all(IBV_RETURN_ERR,
                        "Failed to get cq event: %d\n", ret);
            }

        } while (ret && errno == EINTR);

        pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL);

        if (ev_cq != viadev.cq_hndl) {
            error_abort_all(GEN_ASSERT_ERR, "Event in unknown CQ\n");
        }

        pthread_kill(parent_threadId, SIGUSR1);

        ibv_ack_cq_events(viadev.cq_hndl, 1);

        pthread_testcancel();

        pthread_testcancel();


        if (ibv_req_notify_cq(viadev.cq_hndl, 1)) {
            error_abort_all(IBV_RETURN_ERR,
                    "Couldn't request for CQ notification\n");
        }
    }
}
コード例 #11
0
ファイル: rdma_thread.c プロジェクト: li-ch/rdma-examples
static int get_thread_wc(struct thread_context_t *t_ctx, struct ibv_wc *wc, int is_send)
{
	struct ibv_cq           *cq;
	struct ibv_comp_channel *comp_channel;
	struct rdma_resource_t *rdma_resource;
	struct user_param_t *user_param;
	void *ectx;
	int rc = 0;

	rdma_resource = t_ctx->rdma_resource;
	user_param    = &(rdma_resource->user_param);

	if (is_send) {
		cq = t_ctx->send_cq;
		comp_channel = t_ctx->send_comp_channel;
	} else {
		cq = t_ctx->recv_cq;
		comp_channel = t_ctx->recv_comp_channel;
	}

	if (user_param->use_event) {
		rc = ibv_get_cq_event(comp_channel, &cq, &ectx);
		if (rc != 0) {
			ERROR("Failed to do ibv_get_cq_event.\n");
			return 1;
		}

		ibv_ack_cq_events(cq, 1);

		rc = ibv_req_notify_cq(cq, 0);
		if (rc != 0) {
			ERROR("Failed to do ibv_get_cq_event");
			return 1;
		}
	}

	do {
		rc = ibv_poll_cq(cq, 1, wc);
		if (rc < 0) {
			ERROR("Failed to poll CQ.\n");
			return 1;
		}
	} while (!user_param->use_event && (rc == 0)); /// need timeout

	return 0;
}
コード例 #12
0
ファイル: rdma.cpp プロジェクト: Aravindreddy986/CaffeOnSpark
/**
 * Polling for events on a inner thread allows processing of management messages
 * like buffer connection immediately, even if the user is not polling.
 * Otherwise buffer constructors would block indefinitely.
 *
 * Deep learning workloads are about sending small numbers of large messages,
 * in which case this model works great. If the library was to be used to
 * exchange large numbers of short messages, it would be useful to split
 * management and data messages over two different queue pairs. User threads
 * could then wait or poll on the data queue pair directly.
 */
void RDMAAdapter::InternalThreadEntry() {
  while (!must_stop()) {
    ibv_cq* cq;
    void* cq_context;
    CHECK(!ibv_get_cq_event(channel_, &cq, &cq_context));
    CHECK(cq == cq_);
    ibv_ack_cq_events(cq, 1);
    CHECK(!ibv_req_notify_cq(cq_, 0));

    int ne = ibv_poll_cq(cq_, MAX_CONCURRENT_WRITES * 2,
      static_cast<ibv_wc*>(wc_));
    CHECK_GE(ne, 0);

    for (int i = 0; i < ne; ++i) {
      CHECK(wc_[i].status == IBV_WC_SUCCESS) << "Failed status \n"
                                             << ibv_wc_status_str(wc_[i].status)
                                             << " " << wc_[i].status << " "
                                             << static_cast<int>(wc_[i].wr_id)
                                             << " "<< wc_[i].vendor_err;

      if (wc_[i].opcode == IBV_WC_RECV_RDMA_WITH_IMM) {
        // Data message, add it to user received queue
        RDMAChannel* channel = reinterpret_cast<RDMAChannel*>(wc_[i].wr_id);
        channel->recv();
        int id = wc_[i].imm_data;
        if (id >= CTRL_ID_OFFSET) {
        // ctrl signal
          ctrl_received_.push(channel->buffers_[id - CTRL_ID_OFFSET]);
        } else {
        // data
          received_.push(channel->buffers_[id]);
        }
      } else {
        if (wc_[i].opcode & IBV_WC_RECV) {
          // Buffer connection message
          RDMAChannel* channel = reinterpret_cast<RDMAChannel*>(wc_[i].wr_id);
          int id = wc_[i].imm_data;
          channel->memory_regions_queue_.push(channel->memory_regions_[id]);
          CHECK(id == channel->memory_regions_received_++);
          CHECK(!ibv_dereg_mr(channel->region_regions_[id]));
        }
      }
    }
  }
}
コード例 #13
0
ファイル: rdma_client.c プロジェクト: hxmhuang/CFIO2
inline void cfio_rdma_client_wait(void *ctx)
{
    struct ibv_cq *cq;
    struct ibv_wc wc;

    while (request_stack_size) {
        // rdma_debug("get cq event ...");
        TEST_NZ(ibv_get_cq_event(s_ctx->comp_channel, &cq, &ctx));
        // rdma_debug("ibv_ack_cq_events...");
        ibv_ack_cq_events(cq, 1);
        TEST_NZ(ibv_req_notify_cq(cq, 0));

        while (ibv_poll_cq(cq, 1, &wc)) {
            // rdma_debug("handle cq ...");
            on_completion(&wc);
        }
    }
}
コード例 #14
0
ファイル: ibv_rdma.cpp プロジェクト: Daweek/Original_DSCUDA
void *
poll_cq(void *ctx)
{
    struct ibv_cq *cq;
    struct ibv_wc wc;
    IbvConnection *conn = (IbvConnection *)ctx;

    while (1) {
        TEST_NZ(ibv_get_cq_event(conn->comp_channel, &cq, &ctx));
        ibv_ack_cq_events(cq, 1);
        TEST_NZ(ibv_req_notify_cq(cq, 0));

        while (ibv_poll_cq(cq, 1, &wc)) {
            (OnCompletionHandler)(&wc);
        }
    }

    return NULL;
}
コード例 #15
0
ファイル: ib_iface.c プロジェクト: xinzhao3/ucx
ucs_status_t uct_ib_iface_wakeup_arm(uct_wakeup_h wakeup)
{
    int res, ack_count = 0;
    ucs_status_t status;
    struct ibv_cq *cq;
    void *cq_context;
    uct_ib_iface_t *iface = ucs_derived_of(wakeup->iface, uct_ib_iface_t);

    do {
        res = ibv_get_cq_event(iface->comp_channel, &cq, &cq_context);
        ack_count++;
    } while (res == 0);

    if (errno != EAGAIN) {
        return UCS_ERR_IO_ERROR;
    }

    if (ack_count > 1) {
        ibv_ack_cq_events(cq, ack_count - 1);
    }

    if (wakeup->events & UCT_WAKEUP_TX_COMPLETION) {
        status = iface->ops->arm_tx_cq(iface);
        if (status != UCS_OK) {
            return status;
        }
    }

    if (wakeup->events & (UCT_WAKEUP_RX_AM | UCT_WAKEUP_RX_SIGNALED_AM)) {
        status = iface->ops->arm_rx_cq(iface, 0);
        if (status != UCS_OK) {
            return status;
        }
    }

    return UCS_OK;
}
コード例 #16
0
ファイル: rdma.c プロジェクト: DebashisGanguly/FIOBenchmark
/*
 * Return -1 for error and 'nr events' for a positive number
 * of events
 */
static int rdma_poll_wait(struct thread_data *td, enum ibv_wc_opcode opcode)
{
	struct rdmaio_data *rd = td->io_ops->data;
	struct ibv_cq *ev_cq;
	void *ev_ctx;
	int ret;

	if (rd->cq_event_num > 0) {	/* previous left */
		rd->cq_event_num--;
		return 0;
	}

again:
	if (ibv_get_cq_event(rd->channel, &ev_cq, &ev_ctx) != 0) {
		log_err("fio: Failed to get cq event!\n");
		return -1;
	}
	if (ev_cq != rd->cq) {
		log_err("fio: Unknown CQ!\n");
		return -1;
	}
	if (ibv_req_notify_cq(rd->cq, 0) != 0) {
		log_err("fio: Failed to set notify!\n");
		return -1;
	}

	ret = cq_event_handler(td, opcode);
	if (ret == 0)
		goto again;

	ibv_ack_cq_events(rd->cq, ret);

	rd->cq_event_num--;

	return ret;
}
コード例 #17
0
ファイル: rdma-common.c プロジェクト: kento/Samples
void * poll_cq(void *ctx)
{
  void* tmp_ctx;
  struct ibv_wc wc;
  int num_entries, nument = 1;
  tmp_cq = NULL;

  while(1) {
    if (tmp_cq != NULL) {
      while ((num_entries = ibv_poll_cq(tmp_cq, nument, &wc))) {
	on_completion(&wc);
      }
    }
    
    if (ibv_get_cq_event(s_ctx->comp_channel, &tmp_cq, &tmp_ctx)) {

    }
    ibv_ack_cq_events(tmp_cq, 1);
    if (ibv_req_notify_cq(tmp_cq, 0) > 0) {

    }
  }
  return 0;
}
コード例 #18
0
ファイル: client1.c プロジェクト: xiansl/mytests
void  poll_cq(void *ctx)
{
  struct ibv_cq *cq;
  struct ibv_wc wc;
  int ne;

    TEST_NZ(ibv_get_cq_event(s_ctx->comp_channel, &cq, &ctx));//block by default
    ibv_ack_cq_events(cq, 1);
    TEST_NZ(ibv_req_notify_cq(cq, 0));

    do {
        ne = ibv_poll_cq(cq, 1, &wc);
        if(ne < 0){
            printf("fail to poll completion from the CQ. ret = %d\n", ne);
            return;
        }
        else if(ne == 0)
            continue;
        else
            on_completion(&wc);
    } while (ne == 0);

  return;
}
コード例 #19
0
ファイル: server.c プロジェクト: WANG-lp/RDMA
int main(int argc, char *argv[]) {
	struct pdata rep_pdata;

	struct rdma_event_channel *cm_channel;
	struct rdma_cm_id *listen_id;
	struct rdma_cm_id *cm_id;
	struct rdma_cm_event *event;
	struct rdma_conn_param conn_param = { };

	struct ibv_pd *pd;
	struct ibv_comp_channel *comp_chan;
	struct ibv_cq *cq;
	struct ibv_cq *evt_cq;
	struct ibv_mr *mr;
	struct ibv_qp_init_attr qp_attr = { };
	struct ibv_sge sge;
	struct ibv_send_wr send_wr = { };
	struct ibv_send_wr *bad_send_wr;
	struct ibv_recv_wr recv_wr = { };
	struct ibv_recv_wr *bad_recv_wr;
	struct ibv_wc wc;
	void *cq_context;

	struct sockaddr_in sin;

	uint32_t *buf;

	int err;

	/* Set up RDMA CM structures */

	cm_channel = rdma_create_event_channel();
	if (!cm_channel)
		return 1;

	err = rdma_create_id(cm_channel, &listen_id, NULL, RDMA_PS_TCP);
	if (err)
		return err;

	sin.sin_family = AF_INET;
	sin.sin_port = htons(20079);
	sin.sin_addr.s_addr = INADDR_ANY;

	/* Bind to local port and listen for connection request */

	err = rdma_bind_addr(listen_id, (struct sockaddr *) &sin);
	if (err)
		return 1;


	err = rdma_listen(listen_id, 1);
	if (err)
		return 1;

	err = rdma_get_cm_event(cm_channel, &event);
	if (err)
		return err;
	printf("after get_cm_event\n");

	if (event->event != RDMA_CM_EVENT_CONNECT_REQUEST)
		return 1;

	cm_id = event->id;

	rdma_ack_cm_event(event);

	/* Create verbs objects now that we know which device to use */

	pd = ibv_alloc_pd(cm_id->verbs);
	if (!pd)
		return 1;

	comp_chan = ibv_create_comp_channel(cm_id->verbs);
	if (!comp_chan)
		return 1;

	cq = ibv_create_cq(cm_id->verbs, 2, NULL, comp_chan, 0);
	if (!cq)
		return 1;

	if (ibv_req_notify_cq(cq, 0))
		return 1;

	buf = calloc(2, sizeof(uint32_t));
	if (!buf)
		return 1;

	mr = ibv_reg_mr(pd, buf, 2 * sizeof(uint32_t),
			IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ
					| IBV_ACCESS_REMOTE_WRITE);
	if (!mr)
		return 1;

	qp_attr.cap.max_send_wr = 1;
	qp_attr.cap.max_send_sge = 1;
	qp_attr.cap.max_recv_wr = 1;
	qp_attr.cap.max_recv_sge = 1;

	qp_attr.send_cq = cq;
	qp_attr.recv_cq = cq;

	qp_attr.qp_type = IBV_QPT_RC;

	err = rdma_create_qp(cm_id, pd, &qp_attr);
	if (err)
		return err;

	/* Post receive before accepting connection */

	sge.addr = (uintptr_t) buf + sizeof(uint32_t);
	sge.length = sizeof(uint32_t);
	sge.lkey = mr->lkey;

	recv_wr.sg_list = &sge;
	recv_wr.num_sge = 1;

	if (ibv_post_recv(cm_id->qp, &recv_wr, &bad_recv_wr))
		return 1;

	rep_pdata.buf_va = htonll((uintptr_t) buf);
	rep_pdata.buf_rkey = htonl(mr->rkey);

	conn_param.responder_resources = 1;
	conn_param.private_data = &rep_pdata;
	conn_param.private_data_len = sizeof rep_pdata;

	/* Accept connection */
	printf("before accept\n");
	err = rdma_accept(cm_id, &conn_param);
	if (err)
		return 1;
	printf("after accept\n");
	err = rdma_get_cm_event(cm_channel, &event);
	if (err)
		return err;

	if (event->event != RDMA_CM_EVENT_ESTABLISHED)
		return 1;

	rdma_ack_cm_event(event);

	/* Wait for receive completion */

	if (ibv_get_cq_event(comp_chan, &evt_cq, &cq_context))
		return 1;

	if (ibv_req_notify_cq(cq, 0))
		return 1;

	if (ibv_poll_cq(cq, 1, &wc) < 1)
		return 1;

	if (wc.status != IBV_WC_SUCCESS)
		return 1;

	/* Add two integers and send reply back */

	buf[0] = htonl(ntohl(buf[0]) + ntohl(buf[1]));

	sge.addr = (uintptr_t) buf;
	sge.length = sizeof(uint32_t);
	sge.lkey = mr->lkey;

	send_wr.opcode = IBV_WR_SEND;
	send_wr.send_flags = IBV_SEND_SIGNALED;
	send_wr.sg_list = &sge;
	send_wr.num_sge = 1;

	if (ibv_post_send(cm_id->qp, &send_wr, &bad_send_wr))
		return 1;

	/* Wait for send completion */

	if (ibv_get_cq_event(comp_chan, &evt_cq, &cq_context))
		return 1;

	if (ibv_poll_cq(cq, 1, &wc) < 1)
		return 1;

	if (wc.status != IBV_WC_SUCCESS)
		return 1;

	printf("before ack cq 2\n");
	ibv_ack_cq_events(cq, 2);

	return 0;
}
コード例 #20
0
ファイル: GPI2_IB_PASSIVE.c プロジェクト: jakalx/GPI-2
gaspi_return_t
pgaspi_passive_receive (const gaspi_segment_id_t segment_id_local,
		       const gaspi_offset_t offset_local,
		       gaspi_rank_t * const rem_rank, const gaspi_size_t size,
		       const gaspi_timeout_t timeout_ms)
{

#ifdef DEBUG  
  if (glb_gaspi_ctx_ib.rrmd[segment_id_local] == NULL)
    {
      gaspi_printf("Debug: Invalid local segment (gaspi_passive_receive)\n");    
      return GASPI_ERROR;
    }
  
  if( rem_rank == NULL)
    {
      gaspi_printf("Debug: Invalid pointer parameter: rem_rank (gaspi_passive_receive)\n");    
      return GASPI_ERROR;
    }
  
  if( offset_local > glb_gaspi_ctx_ib.rrmd[segment_id_local][glb_gaspi_ctx.rank].size)
    {
      gaspi_printf("Debug: Invalid offsets (gaspi_passive_receive)\n");    
      return GASPI_ERROR;
    }
    
  if( size < 1 || size > GASPI_MAX_TSIZE_P )
    {
      gaspi_printf("Debug: Invalid size (gaspi_passive_receive)\n");    
      return GASPI_ERROR;
    }
#endif

  struct ibv_recv_wr *bad_wr;
  struct ibv_wc wc_recv;
  struct ibv_sge rlist;
  struct ibv_recv_wr rwr;
  struct ibv_cq *ev_cq;
  void *ev_ctx;
  int i;
  fd_set rfds;
  struct timeval tout;


  lock_gaspi_tout (&glb_gaspi_ctx.lockPR, timeout_ms);

  rlist.addr =
    (uintptr_t) (glb_gaspi_ctx_ib.
		 rrmd[segment_id_local][glb_gaspi_ctx.rank].addr +
		 NOTIFY_OFFSET + offset_local);
  rlist.length = size;
  rlist.lkey =
    glb_gaspi_ctx_ib.rrmd[segment_id_local][glb_gaspi_ctx.rank].mr->lkey;
  rwr.wr_id = glb_gaspi_ctx.rank;
  rwr.sg_list = &rlist;
  rwr.num_sge = 1;
  rwr.next = NULL;

  if (ibv_post_srq_recv (glb_gaspi_ctx_ib.srqP, &rwr, &bad_wr))
    {
      unlock_gaspi (&glb_gaspi_ctx.lockPR);
      return GASPI_ERROR;
    }

  FD_ZERO (&rfds);
  FD_SET (glb_gaspi_ctx_ib.channelP->fd, &rfds);

  const long ts = (timeout_ms / 1000);
  const long tus = (timeout_ms - ts * 1000) * 1000;

  tout.tv_sec = ts;
  tout.tv_usec = tus;

  const int selret = select (FD_SETSIZE, &rfds, NULL, NULL, &tout);
  if (selret < 0)
    {
      unlock_gaspi (&glb_gaspi_ctx.lockPR);
      return GASPI_ERROR;
    }
  else if (selret == 0)
    {
      unlock_gaspi (&glb_gaspi_ctx.lockPR);
      return GASPI_TIMEOUT;
    }

  if (ibv_get_cq_event (glb_gaspi_ctx_ib.channelP, &ev_cq, &ev_ctx))
    {
      unlock_gaspi (&glb_gaspi_ctx.lockPR);
      return GASPI_ERROR;
    }

  ibv_ack_cq_events (ev_cq, 1);

  if (ev_cq != glb_gaspi_ctx_ib.rcqP)
    {
      unlock_gaspi (&glb_gaspi_ctx.lockPR);
      return GASPI_ERROR;
    }

  if (ibv_req_notify_cq (glb_gaspi_ctx_ib.rcqP, 0))
    {
      unlock_gaspi (&glb_gaspi_ctx.lockPR);
      return GASPI_ERROR;
    }

  int ne = 0;
  do
    {
      ne = ibv_poll_cq (glb_gaspi_ctx_ib.rcqP, 1, &wc_recv);
    }
  while (ne == 0);

  if ((ne < 0) || (wc_recv.status != IBV_WC_SUCCESS))
    {
      glb_gaspi_ctx.qp_state_vec[GASPI_PASSIVE_QP][wc_recv.wr_id] = 1;
      unlock_gaspi (&glb_gaspi_ctx.lockPR);
      return GASPI_ERROR;
    }

  *rem_rank = 0xffff;
  for (i = 0; i < glb_gaspi_ctx.tnc; i++)
    {
      if (glb_gaspi_ctx_ib.qpP[i]->qp_num == wc_recv.qp_num)
	{
	  *rem_rank = i;
	  break;
	}
    }


  unlock_gaspi (&glb_gaspi_ctx.lockPR);
  return GASPI_SUCCESS;

}
コード例 #21
0
ファイル: rdma-client.c プロジェクト: kento/ibrdma
//static void* poll_cq(struct RDMA_communicator* comm)
static void* poll_cq(struct poll_cq_args* args)
{
  struct ibv_cq *cq;
  struct ibv_wc wc;
  struct connection *conn;
  struct RDMA_communicator *comm;
  //  struct RDMA_message *msg;
  double s, e;
  char* ip;

  struct control_msg cmsg;
  void* ctx;
  char* buff; 
  uint64_t buff_size;
  int tag;

  uint64_t mr_size=0;
  uint64_t sent_size=0;
  char* send_base_addr;

  int* flag = args->flag;
  int mr_index;

  //for (i = 0; i < RDMA_BUF_NUM_C; i++){ rdma_msg_mr[i] = NULL;}
  
  comm = args->comm;
  buff = args->msg->buff;
  send_base_addr = args->msg->buff;
  buff_size= args->msg->size;
  tag= args->msg->tag;

  cmsg.type=MR_INIT;
  cmsg.data1.buff_size=buff_size;
  send_control_msg(comm->cm_id->context, &cmsg);
  //  fprintf(stderr, "RDMA lib: SEND: INIT: tag=%d\n", tag);
  post_receives(comm->cm_id->context);
  s = get_dtime();
  while (1) {
    if (ibv_get_cq_event(s_ctx->comp_channel, &cq, &ctx)) {
      fprintf(stderr, "RDMA lib: SEND: ERROR: get cq event  failed @ %s:%d", __FILE__, __LINE__);
      exit(1);
    }
    ibv_ack_cq_events(cq, 1);
    if (ibv_req_notify_cq(cq, 0)) {
      fprintf(stderr, "RDMA lib: SEND: ERROR: request notification failed @ %s:%d", __FILE__, __LINE__);
      exit(1);
    }

    while (ibv_poll_cq(cq, 1, &wc)){
      conn = (struct connection *)(uintptr_t)wc.wr_id;
      debug(printf("Control MSG from: %lu\n", (uintptr_t)conn->id), 1);
      if (wc.status != IBV_WC_SUCCESS) {
        die("RDMA lib: SEND: ERROR: on_completion: status is not IBV_WC_SUCCESS.");
      }

      if (wc.opcode == IBV_WC_RECV) {
        switch (conn->recv_msg->type)
          {
          case MR_INIT_ACK:
	    debug(printf("Recived: Type=%d\n",  conn->recv_msg->type), 1);
	    for (mr_index = 0; mr_index < RDMA_BUF_NUM_C; mr_index++) {
	      debug(printf("Recived: Type=%d\n",  conn->recv_msg->type), 1);
	      if (sent_size == buff_size) {
		/*sent all data*/
		cmsg.type=MR_FIN;
		cmsg.data1.tag=tag;
		send_control_msg(conn, &cmsg);
		//		fprintf(stderr,"Yahoooooooooo !!\n");
		post_receives(conn);
		debug(printf("RDMA lib: SEND: Recieved MR_INIT_ACK: for tag=%d\n",  tag), 1);
	      } else {
		debug(printf("RDMA lib: SEND: Recieved MR_INIT_ACK: for tag=%d\n",  tag), 1);
		/*not sent all data yet*/
		if (sent_size + rdma_buf_size > buff_size) {
		  mr_size = buff_size - sent_size;
		} else {
		  mr_size = rdma_buf_size;
		}
		debug(printf("mr_size=%lu\n", mr_size),1);
		//	      printf("%s\n", send_base_addr);
		//	      register_rdma_region(conn, send_base_addr, mr_size);
		
		register_rdma_msg_mr(mr_index, send_base_addr, mr_size);
		send_base_addr += mr_size;
		sent_size += mr_size;
		
		cmsg.type=MR_CHUNK;
		cmsg.data1.mr_size=mr_size;
		memcpy(&cmsg.data.mr, rdma_msg_mr[mr_index], sizeof(struct ibv_mr));
		//	      cmsg.data.mr = conn->rdma_msg_mr;
		send_control_msg(conn, &cmsg);
		//		fprintf(stderr, "RDMA lib: SEND: CHUNK: tag=%d\n", tag);
		post_receives(conn);
	      }
	    }
            break;
          case MR_CHUNK_ACK:

	    if (sent_size == buff_size) {
              /*sent all data*/
	      cmsg.type=MR_FIN;
	      cmsg.data1.tag=tag;
	      debug(printf("RDMA lib: SEND: Recieved MR_CHUNK_ACK => FIN: for tag=%d\n",  tag), 1);
	    } else {
              /*not sent all data yet*/
	      debug(printf("RDMA lib: SEND: Recieved MR_CHUNK_ACK: for tag=%d\n",  tag), 1);
	      if (sent_size + rdma_buf_size > buff_size) {
		mr_size = buff_size - sent_size;
	      } else {
		mr_size = rdma_buf_size;
	      }
	      debug(printf("mr_size=%lu\n", mr_size),1);
	      //	      printf("%s\n", send_base_addr);
	      //	      register_rdma_region(conn, send_base_addr, mr_size);
	      //	      mr_index = (mr_index+ 1) % RDMA_BUF_NUM_C;
	      mr_index = (mr_index+ 1) % RDMA_BUF_NUM_C;
	      debug(printf("mr_index=%d\n", mr_index),1);
	      register_rdma_msg_mr(mr_index, send_base_addr, mr_size);
	      send_base_addr += mr_size;

	      sent_size += mr_size;
	      cmsg.type=MR_CHUNK;
	      cmsg.data1.mr_size=mr_size;
	      memcpy(&cmsg.data.mr, rdma_msg_mr[mr_index], sizeof(struct ibv_mr));
	      //	      cmsg.data.mr = conn->rdma_msg_mr;
	    }
	    send_control_msg(conn, &cmsg);
	    //	    fprintf(stderr, "RDMA lib: SEND: CHUNK2: tag=%d, slid=%lu\n", tag, (uintptr_t)wc.slid);
	    post_receives(conn);
            break;
          case MR_FIN_ACK:
            debug(printf("Recived: Type=%d\n",  conn->recv_msg->type),1);
	    *flag = 1;
	    // rdma_disconnect(comm->cm_id);
	    // rdma_disconnect(conn->id);
	    //exit(0);
	    e = get_dtime();
	    free(args->msg);
	    free(args);
	    //	    fprintf(stderr, "RDMA lib: SEND: FIN_ACK: tag=%d\n", tag);
	    //ip = get_ip_addr("ib0");
	    //	    printf("RDMA lib: SEND: %s: send time= %f secs, send size= %lu MB, throughput = %f MB/s\n", ip, e - s, buff_size/1000000, buff_size/(e - s)/1000000.0);
	    return NULL;
          default:
            debug(printf("Unknown TYPE"), 1);
	    return NULL;
          }
      } else if (wc.opcode == IBV_WC_SEND) {
	//	fprintf(stderr, "RDMA lib: SENT: DONE: tag=%d\n", tag);
	debug(printf("RDMA lib: SEND: Sent: TYPE=%d, tag=%d\n", conn->send_msg->type, tag),1);
      } else {
	  die("unknow opecode.");
      }
    }
  }
  return NULL;
}
コード例 #22
0
ファイル: rpcdwyane.c プロジェクト: carriercomm/DPDK-Graph
/*
 * RDMA Read a buffer from the remote address.
 */
rdma_stat
dw_read(CONN *conn, struct clist *cl)
{
	int 						total_msg_size;
	int						err;
	struct ibv_cq		       	*evt_cq;
	void			       		*cq_context;

	struct ibv_sge			sge;
	struct ibv_send_wr		send_wr = { };
	struct ibv_send_wr	       *bad_send_wr;
	struct ibv_wc				wc;
	
	if (cl == NULL) {
		return (RDMA_FAILED);
	}

	total_msg_size = 0;
		
	sge.addr 		= cl->u.c_daddr3;
	sge.lkey 		= cl->c_dmemhandle.mrc_lmr; /* lkey */
	sge.length 	= cl->c_len;
	
	total_msg_size += cl->c_len;

	PRINTF_INFO("the total_msg_size is %d\n", total_msg_size);
	send_wr.wr_id 		= IBV_WR_RDMA_READ;
	send_wr.opcode     	= IBV_WR_RDMA_READ;
	send_wr.send_flags 	= IBV_SEND_SIGNALED;
	send_wr.sg_list    		= &sge;
	send_wr.num_sge    	= 1;
	send_wr.next			= NULL;

	send_wr.wr.rdma.rkey			= cl->c_smemhandle.mrc_rmr;
	send_wr.wr.rdma.remote_addr 	= cl->w.c_saddr;
		
	if (ibv_post_send(conn->cm_id->qp, &send_wr, &bad_send_wr)){
		PRINTF_ERR("err while ibv_post_send\n");
		return RDMA_FAILED;
	}
	
	while((err = ibv_poll_cq(conn->cm_id->qp->send_cq, 1, &wc)) == 0){
		//PRINTF_ERR("wait\n");
	}

	if(err < 0){
		PRINTF_ERR("err occure while ibv_poll_cq in %s\n", __func__);
		return RDMA_FAILED;
	}
		
	if (wc.status != IBV_WC_SUCCESS){
		PRINTF_ERR("err %d in %s\n", wc.status, __func__);
		return RDMA_FAILED;
	}

	ibv_ack_cq_events(conn->cm_id->qp->send_cq, 1);

	if(wc.wr_id!= IBV_WR_RDMA_READ){
		PRINTF_ERR("wc.opcode is %d\n", wc.opcode);
		return RDMA_FAILED;
	}

	PRINTF_INFO("read ok\n");
	return (RDMA_SUCCESS);
	
}
コード例 #23
0
void __ibv_ack_cq_events_1_0(struct ibv_cq_1_0 *cq, unsigned int nevents)
{
	ibv_ack_cq_events(cq->real_cq, nevents);
}
コード例 #24
0
ファイル: mpxy_in.c プロジェクト: Cai900205/test
/* Proxy-in service - RX thread
 *
 *  <- Work request in (RW_imm - WR idata), remote initiated RW
 *  <- Work completion in (RW_imm - WC idata), local initiated RW
 */
void m_rcv_event(struct mcm_cq *m_cq, int *events)
{
	struct ibv_wc wc[mcm_wrc_max];
	struct ibv_cq *ib_cq;
	struct mcm_qp *m_qp;
	void *cq_ctx;
	int i, wc_cnt, ret, err=0, notify=0;

	ret = ibv_get_cq_event(m_cq->ib_ch, &ib_cq, (void *)&cq_ctx);
	if (ret == 0)
		ibv_ack_cq_events(m_cq->ib_cq, 1);

	wc_cnt = 0;
retry:
	if (wc_cnt >= mcm_wrc_max) {
		if (wc[0].status == 0)
			mlog(0x10," m_cq %p processed max %d, exit\n", m_cq, wc_cnt);
		*events += 1;  /* pending */
		return;
	}

	ret = ibv_poll_cq(m_cq->ib_cq, mcm_wrc_max, wc);
	if (ret <= 0) {
		if (!ret && !notify) {
			ibv_req_notify_cq(m_cq->ib_cq, 0);
			notify = 1;
			goto retry;
		}
		return;
	} else
		notify = 0;

	wc_cnt += ret;

	for (i=0; i<ret; i++) {
		m_qp = (struct mcm_qp *)wc[i].wr_id;

		mlog(0x40," wr_id[%d of %d] m_qp %p\n", i+1, ret, m_qp);
		mlog(0x40," ib_wc: st %d, vn %x idata %x  op %x wr_id %Lx\n",
			wc[i].status, wc[i].vendor_err, ntohl(wc[i].imm_data),
			wc[i].opcode, wc[i].wr_id);

		if (wc[i].status != IBV_WC_SUCCESS) {
			if (wc[i].status != IBV_WC_WR_FLUSH_ERR)
				mlog(0," DTO ERR: st %d, vn %x idata %x qstate 0x%x\n",
					wc[i].status, wc[i].vendor_err,
					ntohl(wc[i].imm_data), m_qp->ib_qp2->state);
			continue;
		}
		if (m_qp->cm && (m_qp->cm->state == MCM_DISCONNECTED)) {
			mlog(1," WARN: RX data on DISC m_qp %p qp1 %p qp2 %p %s\n",
				m_qp, m_qp->ib_qp1, m_qp->ib_qp2,
				mcm_state_str(m_qp->cm->state));
			continue;
		}

		if (wc[i].opcode == IBV_WC_RECV_RDMA_WITH_IMM) {
			struct ibv_recv_wr r_wr, *r_err;
			wrc_idata_t  wrc;
			struct ibv_qp *ib_qp;

			wrc.id = WRC_ID_DATA(ntohl(wc[i].imm_data));
			wrc.type = WRC_TYPE_DATA(ntohl(wc[i].imm_data));
			wrc.flags = WRC_FLAGS_DATA(ntohl(wc[i].imm_data));

			/* process WR or WC */
			m_pi_rcv_event(m_qp, &wrc);

			/* re-post message */
			r_wr.next = NULL;
			r_wr.sg_list = NULL;
			r_wr.num_sge = 0;
			r_wr.wr_id = (uint64_t)(uintptr_t) m_qp;

			/* MXS -> MSS or HST, PI service will be on QP1 */
			if (MXS_EP(&m_qp->smd->md->addr) &&
			   (MSS_EP(&m_qp->cm->msg.daddr1) || HST_EP(&m_qp->cm->msg.daddr1)))
			        ib_qp = m_qp->ib_qp1;
			else
				ib_qp = m_qp->ib_qp2;

			errno = 0;
			if (ib_qp) {
				err = ibv_post_recv(ib_qp, &r_wr, &r_err);
				if (err) {
					mlog(0,"ERR: qp %p (%s) qpn %x ibv_post_recv ret = %d %s\n",
						m_qp, (MXS_EP(&m_qp->smd->md->addr) &&
						MSS_EP(&m_qp->cm->msg.daddr1)) ? "QP1":"QP2",
						m_qp->ib_qp2 ?
						m_qp->ib_qp2->qp_num:m_qp->ib_qp1->qp_num,
						ret, strerror(errno));
				}
			}
			MCNTR(m_qp->smd->md, MCM_QP_RECV);

		} else {
			mlog(0,"ERR: unexpected WC opcode = %d on m_qp %p\n", wc[i].opcode, m_qp);
		}
	}
	goto retry;
}
コード例 #25
0
ファイル: local.c プロジェクト: xiansl/mytests
int main(int argc, char *argv[])
{
    struct ibv_pd		       *pd1, *pd2;
    struct ibv_comp_channel	       *comp_chan1, *comp_chan2;
    struct ibv_cq		       *cq1, *cq2;
    struct ibv_cq		       *evt_cq = NULL;
    struct ibv_mr		       *mr1, *mr2;
    struct ibv_qp_init_attr		qp_attr1 = { }, qp_attr2 = {};
    struct ibv_sge			sge;
    struct ibv_send_wr		send_wr = { };
    struct ibv_send_wr	       *bad_send_wr = NULL;
    struct ibv_wc			wc;
    struct ibv_qp			*qp1, *qp2;
    void			       *cq_context = NULL;
    union ibv_gid			gid1, gid2;

    int				n;

    uint8_t			       *buf1, *buf2;

    int				err;
    int 				num_devices;
    struct ibv_context	*	verbs1, *verbs2;
    struct ibv_device ** dev_list = ibv_get_device_list(&num_devices);
    struct ibv_device_attr		dev_attr;
    int use = 0;
    int port = 1;
    int x = 0;
    unsigned long mb = 0;
    unsigned long bytes = 0;
    unsigned long save_diff = 0;
    struct timeval start, stop, diff;
    int iterations = 0;

    struct rusage usage;
    struct timeval ustart, uend;
    struct timeval sstart, send;
    struct timeval tstart, tend;

    DPRINTF("There are %d devices\n", num_devices);

    for(x = 0; x < num_devices; x++) {
        printf("Device: %d, %s\n", x, ibv_get_device_name(dev_list[use]));
    }

    if(num_devices == 0 || dev_list == NULL) {
        printf("No devices found\n");
        return 1;
    }

    if(argc < 2) {
        printf("Which RDMA device to use? 0, 1, 2, 3...\n");
        return 1;
    }

    use = atoi(argv[1]);

    DPRINTF("Using device %d\n", use);

    verbs1 = ibv_open_device(dev_list[use]);

    if(verbs1 == NULL) {
        printf("Failed to open device!\n");
        return 1;
    }

    DPRINTF("Device open %s\n", ibv_get_device_name(dev_list[use]));

    verbs2 = ibv_open_device(dev_list[use]);

    if(verbs2 == NULL) {
        printf("Failed to open device again!\n");
        return 1;
    }

    if(ibv_query_device(verbs1, &dev_attr)) {
        printf("Failed to query device attributes.\n");
        return 1;
    }

    printf("Device open: %d, %s which has %d ports\n", x, ibv_get_device_name(dev_list[use]), dev_attr.phys_port_cnt);

    if(argc < 3) {
        printf("Which port on the device to use? 1, 2, 3...\n");
        return 1;
    }

    port = atoi(argv[2]);

    if(port <= 0) {
        printf("Port #%d invalid, must start with 1, 2, 3, ...\n", port);
        return 1;
    }

    printf("Using port %d\n", port);

    if(argc < 4) {
        printf("How many iterations to perform?\n");
        return 1;
    }

    iterations = atoi(argv[3]);
    printf("Will perform %d iterations\n", iterations);

    pd1 = ibv_alloc_pd(verbs1);
    if (!pd1)
        return 1;

    if(argc < 5) {
        printf("How many megabytes to allocate? (This will be allocated twice. Once for source, once for destination.)\n");
        return 1;
    }

    mb = atoi(argv[4]);

    if(mb <= 0) {
        printf("Megabytes %lu invalid\n", mb);
        return 1;
    }

    DPRINTF("protection domain1 allocated\n");

    pd2 = ibv_alloc_pd(verbs2);
    if (!pd2)
        return 1;

    DPRINTF("protection domain2 allocated\n");

    comp_chan1 = ibv_create_comp_channel(verbs1);
    if (!comp_chan1)
        return 1;

    DPRINTF("completion chan1 created\n");

    comp_chan2 = ibv_create_comp_channel(verbs2);
    if (!comp_chan2)
        return 1;

    DPRINTF("completion chan2 created\n");

    cq1 = ibv_create_cq(verbs1, 2, NULL, comp_chan1, 0);
    if (!cq1)
        return 1;

    DPRINTF("CQ1 created\n");

    cq2 = ibv_create_cq(verbs2, 2, NULL, comp_chan2, 0);
    if (!cq2)
        return 1;

    DPRINTF("CQ2 created\n");

    bytes = mb * 1024UL * 1024UL;

    buf1 = malloc(bytes);
    if (!buf1)
        return 1;

    buf2 = malloc(bytes);
    if (!buf2)
        return 1;

    printf("Populating %lu MB memory.\n", mb * 2);

    for(x = 0; x < bytes; x++) {
        buf1[x] = 123;
    }

    buf1[bytes - 1] = 123;

    mr1 = ibv_reg_mr(pd1, buf1, bytes, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ);
    if (!mr1) {
        printf("Failed to register memory.\n");
        return 1;
    }

    mr2 = ibv_reg_mr(pd2, buf2, bytes, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ);
    if (!mr2) {
        printf("Failed to register memory.\n");
        return 1;
    }

    DPRINTF("memory registered.\n");

    qp_attr1.cap.max_send_wr	 = 10;
    qp_attr1.cap.max_send_sge = 10;
    qp_attr1.cap.max_recv_wr	 = 10;
    qp_attr1.cap.max_recv_sge = 10;
    qp_attr1.sq_sig_all = 1;

    qp_attr1.send_cq		 = cq1;
    qp_attr1.recv_cq		 = cq1;

    qp_attr1.qp_type		 = IBV_QPT_RC;

    qp1 = ibv_create_qp(pd1, &qp_attr1);
    if (!qp1) {
        printf("failed to create queue pair #1\n");
        return 1;
    }

    DPRINTF("queue pair1 created\n");

    qp_attr2.cap.max_send_wr	 = 10;
    qp_attr2.cap.max_send_sge = 10;
    qp_attr2.cap.max_recv_wr	 = 10;
    qp_attr2.cap.max_recv_sge = 10;
    qp_attr2.sq_sig_all = 1;

    qp_attr2.send_cq		 = cq2;
    qp_attr2.recv_cq		 = cq2;

    qp_attr2.qp_type		 = IBV_QPT_RC;


    qp2 = ibv_create_qp(pd2, &qp_attr2);
    if (!qp2) {
        printf("failed to create queue pair #2\n");
        return 1;
    }

    DPRINTF("queue pair2 created\n");

    struct ibv_qp_attr attr1 = {
        .qp_state = IBV_QPS_INIT,
        .pkey_index = 0,
        .port_num = port,
        .qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_LOCAL_WRITE,
    };

    if(ibv_modify_qp(qp1, &attr1,
                     IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) {
        printf("verbs 1 Failed to go to init\n");
        return 1;
    }

    DPRINTF("verbs1 to init\n");

    struct ibv_qp_attr attr2 = {
        .qp_state = IBV_QPS_INIT,
        .pkey_index = 0,
        .port_num = port,
        .qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_LOCAL_WRITE,
    };

    if(ibv_modify_qp(qp2, &attr2,
                     IBV_QP_STATE |
                     IBV_QP_PKEY_INDEX |
                     IBV_QP_PORT |
                     IBV_QP_ACCESS_FLAGS)) {
        printf("verbs 2 Failed to go to init\n");
        return 1;
    }

    DPRINTF("verbs2 to init\n");

    //struct ibv_gid gid1, gid2;
    struct ibv_port_attr port1, port2;
    uint64_t psn1 = lrand48() & 0xffffff;
    uint64_t psn2 = lrand48() & 0xffffff;

    if(ibv_query_port(verbs1, port, &port1))
        return 1;

    DPRINTF("got port1 information\n");

    if(ibv_query_port(verbs2, port, &port2))
        return 1;

    DPRINTF("got port2 information\n");

    if(ibv_query_gid(verbs1, 1, 0, &gid1))
        return 1;
    DPRINTF("got gid1 information\n");

    if(ibv_query_gid(verbs2, 1, 0, &gid2))
        return 1;

    DPRINTF("got gid2 information\n");

    struct ibv_qp_attr next2 = {
        .qp_state = IBV_QPS_RTR,
        .path_mtu = IBV_MTU_1024,
        .dest_qp_num = qp2->qp_num,
        .rq_psn = psn2,
        .max_dest_rd_atomic = 5,
        .min_rnr_timer = 12,
        .ah_attr = {
            .is_global = 0,
            .dlid = port2.lid,
            .sl = 0,
            .src_path_bits = 0,
            .port_num = port,
        }
    };

    if(gid2.global.interface_id) {
        next2.ah_attr.is_global = 1;
        next2.ah_attr.grh.hop_limit = 1;
        next2.ah_attr.grh.dgid = gid2;
        next2.ah_attr.grh.sgid_index = 0;
    }

    struct ibv_qp_attr next1 = {
        .qp_state = IBV_QPS_RTR,
        .path_mtu = IBV_MTU_1024,
        .dest_qp_num = qp1->qp_num,
        .rq_psn = psn1,
        .max_dest_rd_atomic = 1,
        .min_rnr_timer = 12,
        .ah_attr = {
            .is_global = 0,
            .dlid = port1.lid,
            .sl = 0,
            .src_path_bits = 0,
            .port_num = port,
        }
    };

    if(gid1.global.interface_id) {
        next1.ah_attr.is_global = 1;
        next1.ah_attr.grh.hop_limit = 1;
        next1.ah_attr.grh.dgid = gid1;
        next1.ah_attr.grh.sgid_index = 0;
    }

    if(ibv_modify_qp(qp2, &next1,
                     IBV_QP_STATE |
                     IBV_QP_AV |
                     IBV_QP_PATH_MTU |
                     IBV_QP_DEST_QPN |
                     IBV_QP_RQ_PSN |
                     IBV_QP_MAX_DEST_RD_ATOMIC |
                     IBV_QP_MIN_RNR_TIMER)) {
        printf("Failed to modify verbs2 to ready\n");
        return 1;
    }

    DPRINTF("verbs2 RTR\n");

    if(ibv_modify_qp(qp1, &next2,
                     IBV_QP_STATE |
                     IBV_QP_AV |
                     IBV_QP_PATH_MTU |
                     IBV_QP_DEST_QPN |
                     IBV_QP_RQ_PSN |
                     IBV_QP_MAX_DEST_RD_ATOMIC |
                     IBV_QP_MIN_RNR_TIMER)) {
        printf("Failed to modify verbs1 to ready\n");
        return 1;
    }

    DPRINTF("verbs1 RTR\n");

    next2.qp_state = IBV_QPS_RTS;
    next2.timeout = 14;
    next2.retry_cnt = 7;
    next2.rnr_retry = 7;
    next2.sq_psn = psn1;
    next2.max_rd_atomic = 1;

    if(ibv_modify_qp(qp1, &next2,
                     IBV_QP_STATE |
                     IBV_QP_TIMEOUT |
                     IBV_QP_RETRY_CNT |
                     IBV_QP_RNR_RETRY |
                     IBV_QP_SQ_PSN |
                     IBV_QP_MAX_QP_RD_ATOMIC)) {
        printf("Failed again to modify verbs1 to ready\n");
        return 1;
    }

    DPRINTF("verbs1 RTS\n");

    next1.qp_state = IBV_QPS_RTS;
    next1.timeout = 14;
    next1.retry_cnt = 7;
    next1.rnr_retry = 7;
    next1.sq_psn = psn2;
    next1.max_rd_atomic = 1;

    if(ibv_modify_qp(qp2, &next1,
                     IBV_QP_STATE |
                     IBV_QP_TIMEOUT |
                     IBV_QP_RETRY_CNT |
                     IBV_QP_RNR_RETRY |
                     IBV_QP_SQ_PSN |
                     IBV_QP_MAX_QP_RD_ATOMIC)) {
        printf("Failed again to modify verbs2 to ready\n");
        return 1;
    }

    DPRINTF("verbs2 RTS\n");

    printf("Performing RDMA first.\n");
    iterations = atoi(argv[3]);

    getrusage(RUSAGE_SELF, &usage);
    ustart = usage.ru_utime;
    sstart = usage.ru_stime;

    gettimeofday(&tstart, NULL);

    while(iterations-- > 0) {
        sge.addr   = (uintptr_t) buf1;
        sge.length = bytes;
        sge.lkey   = mr1->lkey;

        send_wr.wr_id		    = 1;
        send_wr.opcode		    = IBV_WR_RDMA_WRITE;
        send_wr.sg_list		    = &sge;
        send_wr.num_sge		    = 1;
        send_wr.send_flags          = IBV_SEND_SIGNALED;
        send_wr.wr.rdma.rkey 	    = mr2->rkey;
        send_wr.wr.rdma.remote_addr = (uint64_t) buf2;

        DPRINTF("Iterations left: %d\n", iterations);
        if (ibv_req_notify_cq(cq1, 0))
            return 1;

        DPRINTF("Submitting local RDMA\n");
        gettimeofday(&start, NULL);
        if (ibv_post_send(qp1, &send_wr, &bad_send_wr))
            return 1;

        DPRINTF("RDMA posted %p %p\n", &send_wr, bad_send_wr);

        DPRINTF("blocking...\n");
        if(ibv_get_cq_event(comp_chan1, &evt_cq, &cq_context)) {
            printf("failed to get CQ event\n");
            return 1;
        }
        gettimeofday(&stop, NULL);
        timersub(&stop, &start, &diff);

        DPRINTF("RDMA took: %lu us\n", diff.tv_usec);

        ibv_ack_cq_events(evt_cq, 1);

        DPRINTF("got event\n");

        n = ibv_poll_cq(cq1, 1, &wc);
        if (n > 0) {
            DPRINTF("return from poll: %lu\n", wc.wr_id);
            if (wc.status != IBV_WC_SUCCESS) {
                printf("poll failed %s\n", ibv_wc_status_str(wc.status));
                return 1;
            }

            if (wc.wr_id == 1) {
                DPRINTF("Finished %d bytes %d %d\n", n, buf1[bytes - 1], buf2[bytes - 1]);
            } else {
                printf("didn't find completion\n");
            }
        }

        if (n < 0) {
            printf("poll returned error\n");
            return 1;
        }

        DPRINTF("Poll returned %d bytes %d %d\n", n, buf1[0], buf2[0]);

    }

    gettimeofday(&tend, NULL);

    getrusage(RUSAGE_SELF, &usage);
    uend = usage.ru_utime;
    send = usage.ru_stime;

    save_diff = 0;
    timersub(&uend, &ustart, &diff);
    save_diff += diff.tv_usec;
    printf("User CPU time: %lu us\n", diff.tv_usec);
    timersub(&send, &sstart, &diff);
    save_diff += diff.tv_usec;
    printf("System CPU time: %lu us\n", diff.tv_usec);
    timersub(&tend, &tstart, &diff);
    printf("Sleeping time: %lu us\n", diff.tv_usec - save_diff);
    printf("Wall clock CPU time: %lu us\n", diff.tv_usec);

    iterations = atoi(argv[3]);

    printf("Now using the CPU instead....\n");

    getrusage(RUSAGE_SELF, &usage);
    ustart = usage.ru_utime;
    sstart = usage.ru_stime;

    gettimeofday(&tstart, NULL);

    while(iterations-- > 0) {
        DPRINTF("Repeating without RDMA...\n");

        gettimeofday(&start, NULL);

        memcpy(buf2, buf1, bytes);

        gettimeofday(&stop, NULL);
        timersub(&stop, &start, &diff);
        DPRINTF("Regular copy too took: %lu us\n", diff.tv_usec);
    }

    gettimeofday(&tend, NULL);

    getrusage(RUSAGE_SELF, &usage);
    uend = usage.ru_utime;
    send = usage.ru_stime;

    save_diff = 0;
    timersub(&uend, &ustart, &diff);
    save_diff += diff.tv_usec;
    printf("User CPU time: %lu us\n", diff.tv_usec);
    timersub(&send, &sstart, &diff);
    save_diff += diff.tv_usec;
    printf("System CPU time: %lu us\n", diff.tv_usec);
    timersub(&tend, &tstart, &diff);
    printf("Sleeping time: %lu us\n", diff.tv_usec - save_diff);
    printf("Wall clock CPU time: %lu us\n", diff.tv_usec);
    return 0;
}
コード例 #26
0
ファイル: rdma-client.c プロジェクト: kento/ibrdma
//static void* poll_cq(struct RDMA_communicator* comm)
static void* poll_cq(struct poll_cq_args* args)
{
  struct ibv_cq *cq;
  struct ibv_wc wc;
  struct connection *conn;
  struct RDMA_communicator *comm;
  //  struct RDMA_message *msg;
  struct control_msg cmsg;
  void* ctx;
  char* buff; 
  uint64_t buff_size;
  int tag;

  uint64_t mr_size=0;
  uint64_t sent_size=0;
  char* send_base_addr;

  int* flag = args->flag;

  comm= args->comm;
  buff= args->msg->buff;
  send_base_addr = args->msg->buff;;
  buff_size= args->msg->size;
  tag= args->msg->tag;

  cmsg.type=MR_INIT;
  cmsg.data1.buff_size=buff_size;
  send_control_msg(comm->cm_id->context, &cmsg);
  post_receives(comm->cm_id->context);
  
  while (1) {
    TEST_NZ(ibv_get_cq_event(s_ctx->comp_channel, &cq, &ctx));
    ibv_ack_cq_events(cq, 1);
    TEST_NZ(ibv_req_notify_cq(cq, 0));

    while (ibv_poll_cq(cq, 1, &wc)){
      conn = (struct connection *)(uintptr_t)wc.wr_id;

      if (wc.status != IBV_WC_SUCCESS) {
        die("on_completion: status is not IBV_WC_SUCCESS.");
      }

      if (wc.opcode == IBV_WC_RECV) {
        switch (conn->recv_msg->type)
          {
          case MR_INIT_ACK:
          case MR_CHUNK_ACK:
            debug(printf("Recived: Type=%d\n",  conn->recv_msg->type), 1);
	    if (sent_size == buff_size) {
              /*sent all data*/
	      cmsg.type=MR_FIN;
	      cmsg.data1.tag=tag;
	    } else {
              /*not sent all data yet*/
	      if (sent_size + RDMA_BUF_SIZE_C > buff_size) {
		mr_size = buff_size - sent_size;
	      } else {
		mr_size = RDMA_BUF_SIZE_C;
	      }
	      debug(printf("mr_size=%lu\n", mr_size),1);
	      //	      printf("%s\n", send_base_addr);
	      register_rdma_region(conn, send_base_addr, mr_size);
	      send_base_addr += mr_size;
	      sent_size += mr_size;

	      cmsg.type=MR_CHUNK;
	      cmsg.data1.mr_size=mr_size;
	      memcpy(&cmsg.data.mr, conn->rdma_msg_mr, sizeof(struct ibv_mr));
	      //	      cmsg.data.mr = conn->rdma_msg_mr;
	    }
            break;
          case MR_FIN_ACK:
            debug(printf("Recived: Type=%d\n",  conn->recv_msg->type),1);
	    *flag = 1;
	    // rdma_disconnect(comm->cm_id);
	    // rdma_disconnect(conn->id);
	    //exit(0);
	    return NULL;
          default:
            debug(printf("Unknown TYPE"), 1);
	    return NULL;
          }
	send_control_msg(conn, &cmsg);
        post_receives(conn);
      } else if (wc.opcode == IBV_WC_SEND) {
	  debug(printf("Sent: TYPE=%d\n", conn->send_msg->type),1);
      } else {
	  die("unknow opecode.");
      }
    }
  }
  return NULL;
}
コード例 #27
0
ファイル: ibwrapper.c プロジェクト: DanilKorotenko/samba
static void ibw_event_handler_verbs(struct tevent_context *ev,
	struct tevent_fd *fde, uint16_t flags, void *private_data)
{
	struct ibw_conn	*conn = talloc_get_type(private_data, struct ibw_conn);
	struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
	struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);

	struct ibv_wc wc;
	int rc;
	struct ibv_cq *ev_cq;
	void          *ev_ctx;

	DEBUG(DEBUG_DEBUG, ("ibw_event_handler_verbs(%u)\n", (uint32_t)flags));

	/* TODO: check whether if it's good to have more channels here... */
	rc = ibv_get_cq_event(pconn->verbs_channel, &ev_cq, &ev_ctx);
	if (rc) {
		sprintf(ibw_lasterr, "Failed to get cq_event with %d\n", rc);
		goto error;
	}
	if (ev_cq != pconn->cq) {
		sprintf(ibw_lasterr, "ev_cq(%p) != pconn->cq(%p)\n", ev_cq, pconn->cq);
		goto error;
	}
	rc = ibv_req_notify_cq(pconn->cq, 0);
	if (rc) {
		sprintf(ibw_lasterr, "Couldn't request CQ notification (%d)\n", rc);
		goto error;
	}

	while((rc=ibv_poll_cq(pconn->cq, 1, &wc))==1) {
		if (wc.status) {
			sprintf(ibw_lasterr, "cq completion failed status=%d, opcode=%d, rc=%d\n",
				wc.status, wc.opcode, rc);
			goto error;
		}

		switch(wc.opcode) {
		case IBV_WC_SEND:
			DEBUG(DEBUG_DEBUG, ("send completion\n"));
			if (ibw_wc_send(conn, &wc))
				goto error;
			break;

		case IBV_WC_RDMA_WRITE:
			DEBUG(DEBUG_DEBUG, ("rdma write completion\n"));
			break;
	
		case IBV_WC_RDMA_READ:
			DEBUG(DEBUG_DEBUG, ("rdma read completion\n"));
			break;

		case IBV_WC_RECV:
			DEBUG(DEBUG_DEBUG, ("recv completion\n"));
			if (ibw_wc_recv(conn, &wc))
				goto error;
			break;

		default:
			sprintf(ibw_lasterr, "unknown completion %d\n", wc.opcode);
			goto error;
		}
	}
	if (rc!=0) {
		sprintf(ibw_lasterr, "ibv_poll_cq error %d\n", rc);
		goto error;
	}

	ibv_ack_cq_events(pconn->cq, 1);

	return;
error:
	ibv_ack_cq_events(pconn->cq, 1);

	DEBUG(DEBUG_ERR, (ibw_lasterr));
	
	if (conn->state!=IBWC_ERROR) {
		conn->state = IBWC_ERROR;
		pctx->connstate_func(NULL, conn);
	}
}
コード例 #28
0
ファイル: rpcdwyane.c プロジェクト: carriercomm/DPDK-Graph
rdma_stat
dw_write(CONN *conn, struct clist *cl)
{
	struct clist	*clp;	
	int 	err; 
	int 	nds;
	
	u_int32_t		total_msg_size;
	
	struct ibv_cq				*evt_cq;
	void						*cq_context;
	
	struct ibv_sge			sgl[2];
	struct ibv_send_wr		send_wr = { };
	struct ibv_send_wr		   *bad_send_wr;
	struct ibv_wc				wc;
	if(cl==NULL)
		return (RDMA_SUCCESS);
	nds = 0;
	total_msg_size = 0;
	clp = cl;
	while (clp != NULL) {
		if (nds >= 2) {
			PRINTF_ERR("nds >= 2.In %s %d\n", __func__,__LINE__);
			//yh:5-30 可能大于2,现在还未处理
			//return (RDMA_FAILED);
		}
		sgl[nds].addr = clp->w.c_saddr3;
		sgl[nds].lkey = clp->c_smemhandle.mrc_lmr; /* lkey */
		sgl[nds].length = clp->c_len;
		total_msg_size += clp->c_len;
		PRINTF_INFO("the length of seg%d is %d\n", nds, clp->c_len);
		clp = clp->c_next;
		nds++;
	}
	
	PRINTF_INFO("the total_msg_size is %d, msgid is %d\n", total_msg_size);
	send_wr.wr_id		= IBV_WR_RDMA_WRITE;
	send_wr.opcode		= IBV_WR_RDMA_WRITE;
	send_wr.send_flags	= IBV_SEND_SIGNALED;
	send_wr.sg_list 		= sgl;
	send_wr.num_sge 	= nds;
	send_wr.next			= NULL;

	send_wr.wr.rdma.rkey			= cl->c_dmemhandle.mrc_rmr;
	send_wr.wr.rdma.remote_addr 	= cl->u.c_daddr;
	
	PRINTF_INFO("nds is %d in %s\n", nds, __func__);
	
	if (ibv_post_send(conn->cm_id->qp, &send_wr, &bad_send_wr)){
		PRINTF_ERR("err while ibv_post_send\n");
		return RDMA_FAILED;
	}
	
	while((err = ibv_poll_cq(conn->cm_id->qp->send_cq, 1, &wc)) == 0);
	
	if(err < 0){
		PRINTF_ERR("err occure while ibv_poll_cq in %s\n", __func__);
		return RDMA_FAILED;
	}
			
	if (wc.status != IBV_WC_SUCCESS){
		PRINTF_ERR("err %d in %s\n", wc.status, __func__);
		return RDMA_FAILED;
	}
	
	ibv_ack_cq_events(conn->cm_id->qp->send_cq, 1);

	if(wc.wr_id != IBV_WR_RDMA_WRITE){
		PRINTF_ERR("wc.wr_id != msgid .In %s: %d\n",__func__,__LINE__);
		return RDMA_FAILED;
	}
	PRINTF_INFO("write ok\n");
	return (RDMA_SUCCESS);											
}
コード例 #29
0
ファイル: rdma.c プロジェクト: DebashisGanguly/FIOBenchmark
static int fio_rdmaio_getevents(struct thread_data *td, unsigned int min,
				unsigned int max, const struct timespec *t)
{
	struct rdmaio_data *rd = td->io_ops->data;
	enum ibv_wc_opcode comp_opcode;
	struct ibv_cq *ev_cq;
	void *ev_ctx;
	int ret, r = 0;
	comp_opcode = IBV_WC_RDMA_WRITE;

	switch (rd->rdma_protocol) {
	case FIO_RDMA_MEM_WRITE:
		comp_opcode = IBV_WC_RDMA_WRITE;
		break;
	case FIO_RDMA_MEM_READ:
		comp_opcode = IBV_WC_RDMA_READ;
		break;
	case FIO_RDMA_CHA_SEND:
		comp_opcode = IBV_WC_SEND;
		break;
	case FIO_RDMA_CHA_RECV:
		comp_opcode = IBV_WC_RECV;
		break;
	default:
		log_err("fio: unknown rdma protocol - %d\n", rd->rdma_protocol);
		break;
	}

	if (rd->cq_event_num > 0) {	/* previous left */
		rd->cq_event_num--;
		return 0;
	}

again:
	if (ibv_get_cq_event(rd->channel, &ev_cq, &ev_ctx) != 0) {
		log_err("fio: Failed to get cq event!\n");
		return -1;
	}
	if (ev_cq != rd->cq) {
		log_err("fio: Unknown CQ!\n");
		return -1;
	}
	if (ibv_req_notify_cq(rd->cq, 0) != 0) {
		log_err("fio: Failed to set notify!\n");
		return -1;
	}

	ret = cq_event_handler(td, comp_opcode);
	if (ret < 1)
		goto again;

	ibv_ack_cq_events(rd->cq, ret);

	r += ret;
	if (r < min)
		goto again;

	rd->cq_event_num -= r;

	return r;
}
コード例 #30
0
ファイル: rdmatrans.c プロジェクト: doughdemon/diod
static int
rdma_trans_recv(Npfcall **fcp, u32 msize, void *a)
{
	int n, ret, closing = 0;
	struct ibv_cq *cq;
	struct ibv_wc wc;
	void *context;
	Rdmatrans *rdma = (Rdmatrans *)a;
	Rdmactx *ctx;
	Npfcall *fc = NULL;

	if (!(fc = np_alloc_fcall (msize))) {
		np_uerror(ENOMEM);
		return -1;
	}
	pthread_mutex_lock(&rdma->lock);
again:
	if (rdma->rfirst) {
		ctx = rdma->rfirst;

		n = ctx->len - ctx->pos;
		if (n > msize)
			n = msize;

		memmove(fc->pkt, ctx->buf + ctx->pos, n);
		ctx->pos += n;
		if (ctx->pos == ctx->len) {
			rdma->rfirst = ctx->next;
			if (ctx == rdma->rlast)
				rdma->rlast = NULL;

			rdma_post_recv(rdma, ctx);
		}

		pthread_mutex_unlock(&rdma->lock);
		fc->size = n;
		*fcp = fc;
		return 0;
	}

	pthread_mutex_unlock(&rdma->lock);

poll:
	ret = ibv_get_cq_event(rdma->ch, &cq, &context);
	if (ret) {
		np_uerror(ret);
		//fprintf(stderr, "Error %d polling cq\n", ret);
		return -1;
	}
	ibv_ack_cq_events(rdma->cq, 1);

	ibv_req_notify_cq(cq, 0);
	while ((ret = ibv_poll_cq(rdma->cq, 1, &wc)) > 0) {
		/* Check if it's a flush */
		if (wc.status != IBV_WC_SUCCESS) {
			//fprintf(stderr, "cq fail: status %d opcode %d\n",
			//	wc.status, wc.opcode);
			closing = 1;
			continue;
		}

		if (wc.opcode == IBV_WC_RECV) {
			ctx = (Rdmactx *) wc.wr_id;
			pthread_mutex_lock(&rdma->lock);
			ctx->used = 0;
			ctx->len = wc.byte_len;
			ctx->pos = 0;
			if (rdma->rlast)
				rdma->rlast->next = ctx;
			else
				rdma->rfirst = ctx;

			rdma->rlast = ctx;
			ctx->next = NULL;
			goto again;
		} else if (wc.opcode == IBV_WC_SEND) {
			ctx = (Rdmactx *) wc.wr_id;
			pthread_mutex_lock(&rdma->lock);
			ctx->used = 0;
			pthread_cond_signal(&rdma->cond);
			pthread_mutex_unlock(&rdma->lock);
		}
	}

	if (!ret && !closing)
		goto poll;

	np_uerror(ret);
	return -1;
}