Ejemplo n.º 1
0
static void
kickoff_rdma_with_offset(uintptr_t offset, IbvConnection *conn, int length)
{
    struct ibv_send_wr wr, *bad_wr = NULL;
    struct ibv_sge sge;

    memset(&wr, 0, sizeof(wr));

    wr.wr_id = (uintptr_t)conn;
    wr.opcode = IBV_WR_RDMA_WRITE;
    wr.sg_list = &sge;
    wr.num_sge = 1;
    wr.send_flags = IBV_SEND_SIGNALED;
    wr.wr.rdma.remote_addr = (uintptr_t)conn->peer_mr.addr + offset;
    wr.wr.rdma.rkey = conn->peer_mr.rkey;

    sge.addr = (uintptr_t)conn->rdma_local_region + offset;
    sge.length = length;
    sge.lkey = conn->rdma_local_mr->lkey;

    if (RDMA_BUFFER_SIZE < (offset + length)) {
        WARN(0, "kickoff_rdma_with_offset: offset + length (=%d) exceeds RDMA_BUFFER_SIZE (=%d).\n",
             offset + length, RDMA_BUFFER_SIZE);
        exit(1);
    }

    TEST_NZ(ibv_post_send(conn->qp, &wr, &bad_wr));
}
Ejemplo n.º 2
0
void _rdma_write_offset(void *context, void* buf, uint64_t offset)
{
  struct connection *conn = (struct connection *)context;
  struct ibv_send_wr wr, *bad_wr = NULL;
  struct ibv_sge sge;
  //printf("7\n");
  memset(&wr, 0, sizeof(wr));

  wr.wr_id = (uintptr_t)conn;
  wr.opcode = IBV_WR_RDMA_WRITE;
  wr.sg_list = &sge;
  wr.num_sge = 1;
  wr.send_flags = IBV_SEND_SIGNALED;
  wr.wr.rdma.remote_addr = (uintptr_t)conn->peer_mr.addr + offset;
  wr.wr.rdma.rkey = conn->peer_mr.rkey;

  sge.addr = (uintptr_t)conn->rdma_remote_region;
  sge.length = RDMA_BUFFER_SIZE;
  sge.lkey = conn->rdma_remote_mr->lkey;

  time_stamp(2);
  sem_wait(&write_ops);
  memcpy(conn->rdma_remote_region, (char*)buf, RDMA_BUFFER_SIZE);
  TEST_NZ(ibv_post_send(conn->qp, &wr, &bad_wr));

}
Ejemplo n.º 3
0
static int post_sends(struct cmatest_node *node, int signal_flag)
{
	struct ibv_send_wr send_wr, *bad_send_wr;
	struct ibv_sge sge;
	int i, ret = 0;

	if (!node->connected || !message_count)
		return 0;

	send_wr.next = NULL;
	send_wr.sg_list = &sge;
	send_wr.num_sge = 1;
	send_wr.opcode = IBV_WR_SEND_WITH_IMM;
	send_wr.send_flags = signal_flag;
	send_wr.wr_id = (unsigned long)node;
	send_wr.imm_data = htonl(node->cma_id->qp->qp_num);

	send_wr.wr.ud.ah = node->ah;
	send_wr.wr.ud.remote_qpn = node->remote_qpn;
	send_wr.wr.ud.remote_qkey = node->remote_qkey;

	sge.length = message_size;
	sge.lkey = node->mr->lkey;
	sge.addr = (uintptr_t) node->mem;

	for (i = 0; i < message_count && !ret; i++) {
		ret = ibv_post_send(node->cma_id->qp, &send_wr, &bad_send_wr);
		if (ret)
			printf("failed to post sends: %d\n", ret);
	}
	return ret;
}
Ejemplo n.º 4
0
static int post_sends(struct cmatest_node *node)
{
	struct ibv_send_wr send_wr, *bad_send_wr;
	struct ibv_sge sge;
	int i, ret = 0;

	if (!node->connected || !message_count)
		return 0;

	send_wr.next = NULL;
	send_wr.sg_list = &sge;
	send_wr.num_sge = 1;
	send_wr.opcode = IBV_WR_SEND;
	send_wr.send_flags = 0;
	send_wr.wr_id = (unsigned long)node;

	sge.length = message_size;
	sge.lkey = node->mr->lkey;
	sge.addr = (uintptr_t) node->mem;

	for (i = 0; i < message_count && !ret; i++) {
		ret = ibv_post_send(node->cma_id->qp, &send_wr, &bad_send_wr);
		if (ret) 
			printf("failed to post sends: %d\n", ret);
	}
	return ret;
}
Ejemplo n.º 5
0
int mca_btl_openib_get_internal (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *ep,
                                 mca_btl_openib_get_frag_t *frag)
{
    int qp = to_base_frag(frag)->base.order;
    struct ibv_send_wr *bad_wr;

    /* check for a send wqe */
    if (qp_get_wqe(ep, qp) < 0) {
        qp_put_wqe(ep, qp);
        return OPAL_ERR_OUT_OF_RESOURCE;
    }

    /* check for a get token */
    if (OPAL_THREAD_ADD32(&ep->get_tokens,-1) < 0) {
        qp_put_wqe(ep, qp);
        OPAL_THREAD_ADD32(&ep->get_tokens,1);
        return OPAL_ERR_OUT_OF_RESOURCE;
    }

    qp_inflight_wqe_to_frag(ep, qp, to_com_frag(frag));
    qp_reset_signal_count(ep, qp);

    if (ibv_post_send(ep->qps[qp].qp->lcl_qp, &frag->sr_desc, &bad_wr)) {
        qp_put_wqe(ep, qp);
        OPAL_THREAD_ADD32(&ep->get_tokens,1);
        return OPAL_ERROR;
    }

    return OPAL_SUCCESS;
}
Ejemplo n.º 6
0
void write_remote(struct connection * conn, uint32_t len){

    uint32_t size =len&(~(1U<<31));
    snprintf(conn->send_region, send_buffer_size, "message from active/client side with pid %d", getpid());
    struct ibv_send_wr wr, *bad_wr = NULL; 
    struct ibv_sge sge;

    memset(&wr,0,sizeof(wr));

    wr.wr_id = (uintptr_t)conn;
    wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
    wr.send_flags = IBV_SEND_SIGNALED;

    wr.imm_data = htonl(len);
    wr.wr.rdma.remote_addr = (uintptr_t)conn->peer_addr;
    wr.wr.rdma.rkey = conn->peer_rkey;

    if (size>0){
        wr.sg_list = &sge;
        wr.num_sge = 1;
    sge.addr = (uintptr_t)conn->send_region;
    sge.length = size;
    sge.lkey = conn->send_region_mr->lkey;
    }
    TEST_NZ(ibv_post_send(conn->qp, &wr, &bad_wr));


}
Ejemplo n.º 7
0
/**
 * Sends a buffer's memory region so that it can be mapped to it's remote end.
 */
void RDMAChannel::SendMR(ibv_mr* mr, int id) {
  // Map the memory region itself so that it can be sent
  ibv_mr* init = ibv_reg_mr(adapter_.pd_, mr, sizeof(ibv_mr),
                            IBV_ACCESS_LOCAL_WRITE);

  struct ibv_sge list;
  list.addr = (uint64_t) mr;
  list.length = sizeof(ibv_mr);
  list.lkey = init->lkey;

  struct ibv_send_wr wr;
  caffe_memset(sizeof(wr), 0, &wr);
  wr.wr_id = (uint64_t) init;
  wr.sg_list = &list;
  wr.num_sge = 1;
  wr.opcode = IBV_WR_SEND_WITH_IMM;
  wr.send_flags = IBV_SEND_SIGNALED;
  wr.imm_data = id;

  struct ibv_send_wr *bad_wr;
  CHECK(!ibv_post_send(qp_, &wr, &bad_wr));

  for (;;) {
    ibv_wc wc;
    int ne = ibv_poll_cq(write_cq_, 1, &wc);
    CHECK_GE(ne, 0);
    if (ne && wc.wr_id == (uint64_t) init) {
      break;
    }
  }
  CHECK(!ibv_dereg_mr(init));
}
int Process::post_send(void *context){
	
	Connection *conn = (Connection *) context;
	std::cout<<"SEND LOCATION CONN -> ID"<<conn->identifier<<"\n";
	std::cout<<"SEND POINTER ID"<<listener<<std::endl<<std::flush;

	assert(conn != nullptr);
	assert(conn->identifier != nullptr);

	struct ibv_send_wr wr, *bad_wr = nullptr;
	struct ibv_sge sge;
	assert(&message != nullptr);
	calc_message_numerical(&message);
	assert(&message != nullptr);
	assert(conn->send_region != nullptr);
	memcpy(conn->send_region, message.x, message.size*sizeof(char));
	printf("connected. posting send...\n");
	
	memsetzero(&wr);

	wr.wr_id = (uintptr_t)conn;
	wr.opcode = IBV_WR_SEND;
	wr.sg_list = &sge;
	wr.num_sge = 1;
	wr.send_flags = IBV_SEND_SIGNALED;
	
	sge.addr = (uintptr_t)conn->send_region;
	sge.length = message.size;
	sge.lkey = conn->send_memory_region->lkey;
	
	TEST_NZ(ibv_post_send(conn->queue_pair, &wr, &bad_wr));
	
	return 0;
}
void send_ack() {
    /* Send ack */
    ack_buffer = client_pdata.index;
    sge_send.addr = (uintptr_t)&ack_buffer;
    sge_send.length = sizeof(ack_buffer);
    sge_send.lkey = mr_ack_buffer->lkey;

    send_wr.wr_id = 1;
    send_wr.opcode = IBV_WR_SEND;
    send_wr.send_flags = IBV_SEND_SIGNALED;
    send_wr.sg_list = &sge_send;
    send_wr.num_sge = 1;

    err = ibv_post_send(cm_id->qp, &send_wr, &bad_send_wr);
    assert(err == 0);

    /* Wait send completion */
    err = ibv_get_cq_event(comp_chan, &evt_cq, &cq_context);
    assert(err == 0);

    ibv_ack_cq_events(evt_cq, 1);

    err = ibv_req_notify_cq(cq, 0);
    assert(err == 0);
    
    n = ibv_poll_cq(cq, 1, &wc);
    assert(n >= 1); 
    if (wc.status != IBV_WC_SUCCESS) 
        printf("Warning: Client %d send ack failed\n", client_pdata.index);
}
int on_connection(void *context)
{
  struct connection *conn = (struct connection *)context;
  struct ibv_send_wr wr, *bad_wr = NULL;
  struct ibv_sge sge;

  snprintf(conn->send_region, BUFFER_SIZE, "message from active/client side with pid %d", getpid());

  printf("connected. posting send...\n");

  memset(&wr, 0, sizeof(wr));

  wr.wr_id = (uintptr_t)conn;
  wr.opcode = IBV_WR_SEND;
  wr.sg_list = &sge;
  wr.num_sge = 1;
  wr.send_flags = IBV_SEND_SIGNALED;

  sge.addr = (uintptr_t)conn->send_region;
  sge.length = BUFFER_SIZE;
  sge.lkey = conn->send_mr->lkey;

  TEST_NZ(ibv_post_send(conn->qp, &wr, &bad_wr));

  return 0;
}
Ejemplo n.º 11
0
void cfio_rdma_client_write_data(
        int remote_offset, 
        int length, 
        int local_offset)
{
    // rdma_debug("write data ...");
    if (remote_offset < 0 || remote_offset + length > DATA_REGION_SIZE) {
        die("RDMA out of region");
    }

    struct ibv_send_wr wr, *bad_wr = NULL;
    struct ibv_sge sge;

    memset(&wr, 0, sizeof(wr));

    rdma_conn_t *conn = rdma_conn;
    wr.wr_id = (uintptr_t)(conn);
    wr.opcode = IBV_WR_RDMA_WRITE;
    wr.sg_list = &sge;
    wr.num_sge = 1;
    wr.send_flags = IBV_SEND_SIGNALED;
    wr.wr.rdma.remote_addr = (uintptr_t)((char *)conn->peer_data_mr.addr + remote_offset); 
    wr.wr.rdma.rkey = conn->peer_data_mr.rkey;

    sge.addr = (uintptr_t)(conn->data_region + local_offset);
    sge.length = length;
    sge.lkey = conn->data_mr->lkey;

    ++ request_stack_size;
    TEST_NZ(ibv_post_send(conn->qp, &wr, &bad_wr));
}
Ejemplo n.º 12
0
static int
_gaspi_event_send(gaspi_cuda_event *event, int queue)
{
  struct ibv_send_wr swr;
  struct ibv_sge slist;
  struct ibv_send_wr *bad_wr;

  swr.wr.rdma.rkey = glb_gaspi_ctx.rrmd[event->segment_remote][event->rank].rkey;
  swr.sg_list    = &slist;
  swr.num_sge    = 1;
  swr.wr_id      = event->rank;
  swr.opcode     = IBV_WR_RDMA_WRITE;
  swr.send_flags = IBV_SEND_SIGNALED;
  swr.next       = NULL;

  slist.addr = (uintptr_t) (char*)(glb_gaspi_ctx.rrmd[event->segment_local][event->rank].host_ptr + NOTIFY_OFFSET + event->offset_local);

  slist.length = event->size;
  slist.lkey = ((struct ibv_mr *)glb_gaspi_ctx.rrmd[event->segment_local][glb_gaspi_ctx.rank].host_mr)->lkey;

  if(glb_gaspi_ctx.rrmd[event->segment_remote][event->rank].cudaDevId >= 0)
    swr.wr.rdma.remote_addr = (glb_gaspi_ctx.rrmd[event->segment_remote][event->rank].addr + event->offset_remote);
  else
    swr.wr.rdma.remote_addr = (glb_gaspi_ctx.rrmd[event->segment_remote][event->rank].addr + NOTIFY_OFFSET + event->offset_remote);

  if(ibv_post_send(glb_gaspi_ctx_ib.qpC[queue][event->rank], &swr, &bad_wr))
  {
    glb_gaspi_ctx.qp_state_vec[queue][event->rank] = GASPI_STATE_CORRUPT;
    return -1;
  }

  event->ib_use = 1;

  return 0;
}
Ejemplo n.º 13
0
static UCS_F_ALWAYS_INLINE void
uct_rc_verbs_ep_post_send(uct_rc_verbs_iface_t* iface, uct_rc_verbs_ep_t* ep,
                          struct ibv_send_wr *wr, int send_flags, int max_log_sge)
{
    struct ibv_send_wr *bad_wr;
    int ret;

    uct_rc_txqp_check(&ep->super.txqp);

    if (!(send_flags & IBV_SEND_SIGNALED)) {
        send_flags |= uct_rc_iface_tx_moderation(&iface->super, &ep->super.txqp,
                                                 IBV_SEND_SIGNALED);
    }
    if (wr->opcode == IBV_WR_RDMA_READ) {
        send_flags |= uct_rc_ep_atomic_fence(&iface->super, &ep->fi,
                                             IBV_SEND_FENCE);
    }

    wr->send_flags = send_flags;
    wr->wr_id      = uct_rc_txqp_unsignaled(&ep->super.txqp);

    uct_ib_log_post_send(&iface->super.super, ep->super.txqp.qp, wr, max_log_sge,
                         (wr->opcode == IBV_WR_SEND) ? uct_rc_ep_packet_dump : NULL);

    ret = ibv_post_send(ep->super.txqp.qp, wr, &bad_wr);
    if (ret != 0) {
        ucs_fatal("ibv_post_send() returned %d (%m)", ret);
    }

    uct_rc_verbs_txqp_posted(&ep->super.txqp, &ep->txcnt, &iface->super, send_flags & IBV_SEND_SIGNALED);
}
Ejemplo n.º 14
0
static UCS_F_ALWAYS_INLINE void
uct_rc_verbs_ep_post_send(uct_rc_verbs_iface_t* iface, uct_rc_verbs_ep_t* ep,
                          struct ibv_send_wr *wr, int send_flags)
{
    struct ibv_send_wr *bad_wr;
    int ret;

    uct_rc_txqp_check(&ep->super.txqp);

    if (!(send_flags & IBV_SEND_SIGNALED)) {
        send_flags |= uct_rc_iface_tx_moderation(&iface->super, &ep->super.txqp,
                                                 IBV_SEND_SIGNALED);
    }
    wr->send_flags = send_flags;
    wr->wr_id      = uct_rc_txqp_unsignaled(&ep->super.txqp);

    uct_ib_log_post_send(&iface->super.super, ep->super.txqp.qp, wr,
                         (wr->opcode == IBV_WR_SEND) ? uct_rc_ep_am_packet_dump : NULL);

    UCT_IB_INSTRUMENT_RECORD_SEND_WR_LEN("uct_rc_verbs_ep_post_send", wr);

    ret = ibv_post_send(ep->super.txqp.qp, wr, &bad_wr);
    if (ret != 0) {
        ucs_fatal("ibv_post_send() returned %d (%m)", ret);
    }

    uct_rc_verbs_txqp_posted(&ep->super.txqp, &ep->txcnt, &iface->super, send_flags & IBV_SEND_SIGNALED);
}
Ejemplo n.º 15
0
static int rdma_write_keys(struct pingpong_dest *my_dest,
		struct perftest_comm *comm)
{
	struct ibv_send_wr wr;
	struct ibv_send_wr *bad_wr;
	struct ibv_sge list;
	struct ibv_wc wc;
	int ne;

	#ifdef HAVE_ENDIAN
	int i;
	struct pingpong_dest m_my_dest;

	m_my_dest.lid 		= htobe32(my_dest->lid);
	m_my_dest.out_reads 	= htobe32(my_dest->out_reads);
	m_my_dest.qpn 		= htobe32(my_dest->qpn);
	m_my_dest.psn 		= htobe32(my_dest->psn);
	m_my_dest.rkey 		= htobe32(my_dest->rkey);
	m_my_dest.srqn		= htobe32(my_dest->srqn);
	m_my_dest.gid_index	= htobe32(my_dest->gid_index);
	m_my_dest.vaddr		= htobe64(my_dest->vaddr);

	for(i=0; i<16; i++) {
		m_my_dest.gid.raw[i] = my_dest->gid.raw[i];
	}

	memcpy(comm->rdma_ctx->buf, &m_my_dest, sizeof(struct pingpong_dest));
	#else
	memcpy(comm->rdma_ctx->buf, &my_dest, sizeof(struct pingpong_dest));
	#endif
	list.addr   = (uintptr_t)comm->rdma_ctx->buf;
	list.length = sizeof(struct pingpong_dest);
	list.lkey   = comm->rdma_ctx->mr->lkey;


	wr.wr_id      = SYNC_SPEC_ID;
	wr.sg_list    = &list;
	wr.num_sge    = 1;
	wr.opcode     = IBV_WR_SEND;
	wr.send_flags = IBV_SEND_SIGNALED;
	wr.next       = NULL;

	if (ibv_post_send(comm->rdma_ctx->qp[0],&wr,&bad_wr)) {
		fprintf(stderr, "Function ibv_post_send failed\n");
		return 1;
	}

	do {
		ne = ibv_poll_cq(comm->rdma_ctx->send_cq, 1,&wc);
	} while (ne == 0);

	if (wc.status || wc.opcode != IBV_WC_SEND || wc.wr_id != SYNC_SPEC_ID) {
		fprintf(stderr, " Bad wc status %d\n",(int)wc.status);
		return 1;
	}

	return 0;
}
Ejemplo n.º 16
0
int mca_btl_openib_get( mca_btl_base_module_t* btl,
                    mca_btl_base_endpoint_t* endpoint,
                    mca_btl_base_descriptor_t* descriptor)
{
    int rc;
    struct ibv_send_wr* bad_wr; 
    mca_btl_openib_frag_t* frag = (mca_btl_openib_frag_t*) descriptor; 
    mca_btl_openib_module_t* openib_btl = (mca_btl_openib_module_t*) btl;
    frag->endpoint = endpoint;
    frag->wr_desc.sr_desc.opcode = IBV_WR_RDMA_READ; 

    /* check for a send wqe */
    if (OPAL_THREAD_ADD32(&endpoint->sd_wqe[BTL_OPENIB_LP_QP],-1) < 0) {

        OPAL_THREAD_ADD32(&endpoint->sd_wqe[BTL_OPENIB_LP_QP],1);
        OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
        opal_list_append(&endpoint->pending_get_frags, (opal_list_item_t*)frag);
        OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
        return OMPI_SUCCESS;

    /* check for a get token */
    } else if(OPAL_THREAD_ADD32(&endpoint->get_tokens,-1) < 0) {

        OPAL_THREAD_ADD32(&endpoint->sd_wqe[BTL_OPENIB_LP_QP],1);
        OPAL_THREAD_ADD32(&endpoint->get_tokens,1);
        OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
        opal_list_append(&endpoint->pending_get_frags, (opal_list_item_t*)frag);
        OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
        return OMPI_SUCCESS;

    } else { 
    
        frag->wr_desc.sr_desc.send_flags = IBV_SEND_SIGNALED; 
        frag->wr_desc.sr_desc.wr.rdma.remote_addr = frag->base.des_src->seg_addr.lval; 
        frag->wr_desc.sr_desc.wr.rdma.rkey = frag->base.des_src->seg_key.key32[0]; 
        frag->sg_entry.addr = (unsigned long) frag->base.des_dst->seg_addr.pval; 
        frag->sg_entry.length  = frag->base.des_dst->seg_len; 
        
        if(ibv_post_send(endpoint->lcl_qp[BTL_OPENIB_LP_QP], 
                         &frag->wr_desc.sr_desc, 
                         &bad_wr)){ 
            BTL_ERROR(("error posting send request errno (%d) says %s", errno, strerror(errno))); 
            rc = ORTE_ERROR;
        }  else {
            rc = ORTE_SUCCESS;
        }
        
        if(mca_btl_openib_component.use_srq) { 
            mca_btl_openib_post_srr(openib_btl, 1, BTL_OPENIB_HP_QP);
            mca_btl_openib_post_srr(openib_btl, 1, BTL_OPENIB_LP_QP);
        } else { 
            btl_openib_endpoint_post_rr(endpoint, 1, BTL_OPENIB_HP_QP);
            btl_openib_endpoint_post_rr(endpoint, 1, BTL_OPENIB_LP_QP);
        }
    }
    return rc; 
}
Ejemplo n.º 17
0
void on_completion(struct ibv_wc *wc)
{
  struct connection *conn = (struct connection *)(uintptr_t)wc->wr_id;
  printf("== STATE: send=%d / recv=%d ==\n", conn->send_state, conn->recv_state);
  if (wc->status != IBV_WC_SUCCESS)
    die("on_completion: status is not IBV_WC_SUCCESS.");

  if (wc->opcode & IBV_WC_RECV) {
    conn->recv_state++;
    printf("RECV: Recieved: TYPE=%d\n", conn->recv_msg->type);
    if (conn->recv_msg->type == MSG_MR) {
      memcpy(&conn->peer_mr, &conn->recv_msg->data.mr, sizeof(conn->peer_mr));
      post_receives(conn); /* only rearm for MSG_MR */
      if (conn->send_state == SS_INIT) /* received peer's MR before sending ours, so send ours back */
        send_mr(conn);
    }

  } else {
    conn->send_state++;
    printf("SEND: Sent out: TYPE=%d\n", conn->send_msg->type);
  }

  if (conn->send_state == SS_MR_SENT && conn->recv_state == RS_MR_RECV) {
    struct ibv_send_wr wr, *bad_wr = NULL;
    struct ibv_sge sge;

    if (s_mode == M_WRITE)
      printf(" -> received MSG_MR. writing message to remote memory...\n");
    else
      printf(" -> received MSG_MR. reading message from remote memory...\n");

    memset(&wr, 0, sizeof(wr));

    wr.wr_id = (uintptr_t)conn;
    wr.opcode = (s_mode == M_WRITE) ? IBV_WR_RDMA_WRITE : IBV_WR_RDMA_READ;
    wr.sg_list = &sge;
    wr.num_sge = 1;
    wr.send_flags = IBV_SEND_SIGNALED;
    wr.wr.rdma.remote_addr = (uintptr_t)conn->peer_mr.addr;
    wr.wr.rdma.rkey = conn->peer_mr.rkey;

    sge.addr = (uintptr_t)conn->rdma_local_region;
    sge.length = RDMA_BUFFER_SIZE;
    sge.lkey = conn->rdma_local_mr->lkey;

    TEST_NZ(ibv_post_send(conn->qp, &wr, &bad_wr));
    printf("PSEND: Posted send request: MSG=%s\n", conn->rdma_local_region);

    conn->send_msg->type = MSG_DONE;
    send_message(conn);

  } else if (conn->send_state == SS_DONE_SENT && conn->recv_state == RS_DONE_RECV) {
    printf(" -> remote buffer: %s\n", get_peer_message_region(conn));
    rdma_disconnect(conn->id);
  }
}
Ejemplo n.º 18
0
// All data transfers must go through this function
void prepare_and_post_send_desc(void *src, void *dst,
        int dest, int len, int lkey, int rkey, int type, int lock_or_unlock)
{
    sr_desc.send_flags = IBV_SEND_SIGNALED;
    sr_desc.next = NULL;
    sr_desc.opcode = type;
    sr_desc.wr_id = 0;
    sr_desc.num_sge = 1;

    if(IBV_WR_RDMA_WRITE == type) { 
        sr_desc.wr.rdma.remote_addr = (uintptr_t) (dst);
        sr_sg_entry.addr = (uintptr_t) (src);
        sr_sg_entry.length = len;
        sr_desc.wr.rdma.rkey = rkey;
    }

    if (IBV_WR_RDMA_READ == type) {
        sr_desc.wr.rdma.remote_addr = (uintptr_t) (src);
        sr_sg_entry.addr = (uintptr_t) (dst);
        sr_sg_entry.length = len;
        sr_desc.wr.rdma.rkey = rkey;
    }

    if (IBV_WR_ATOMIC_CMP_AND_SWP == type) {
        sr_desc.wr.atomic.remote_addr = (uintptr_t) (dst);
        sr_desc.wr.atomic.rkey = rkey;
        sr_sg_entry.addr = (uintptr_t) (src);
        sr_sg_entry.length = sizeof(long);

        if (lock_or_unlock == OPENIB_LOCK) {
            sr_desc.wr.atomic.compare_add = 0;
            sr_desc.wr.atomic.swap = l_state.rank + 1;
        }
        else if (lock_or_unlock == OPENIB_UNLOCK){
            sr_desc.wr.atomic.compare_add = l_state.rank + 1;
            sr_desc.wr.atomic.swap = 0;
        }
        else {
            assert(0);
        }
    }
    sr_sg_entry.lkey = lkey;

    sr_desc.sg_list = &(sr_sg_entry);
    struct ibv_send_wr *bad_wr;
    
    if(ibv_post_send(conn.qp[dest], &sr_desc, &bad_wr)) {
        fprintf(stderr,"[%d] Error posting send\n",
                me);
        fflush(stderr);
    }
   
    // Increment outstanding and check whether we need to make progress 
    increment_outstanding();
}
Ejemplo n.º 19
0
static int fio_rdmaio_close_file(struct thread_data *td, struct fio_file *f)
{
	struct rdmaio_data *rd = td->io_ops->data;
	struct ibv_send_wr *bad_wr;

	/* unregister rdma buffer */

	/*
	 * Client sends notification to the server side
	 */
	/* refer to: http://linux.die.net/man/7/rdma_cm */
	if ((rd->is_client == 1) && ((rd->rdma_protocol == FIO_RDMA_MEM_WRITE)
				     || (rd->rdma_protocol ==
					 FIO_RDMA_MEM_READ))) {
		if (ibv_post_send(rd->qp, &rd->sq_wr, &bad_wr) != 0) {
			log_err("fio: ibv_post_send fail");
			return 1;
		}

		dprint(FD_IO, "fio: close information sent success\n");
		rdma_poll_wait(td, IBV_WC_SEND);
	}

	if (rd->is_client == 1)
		rdma_disconnect(rd->cm_id);
	else {
		rdma_disconnect(rd->child_cm_id);
#if 0
		rdma_disconnect(rd->cm_id);
#endif
	}

#if 0
	if (get_next_channel_event(td, rd->cm_channel, RDMA_CM_EVENT_DISCONNECTED) != 0) {
		log_err("fio: wait for RDMA_CM_EVENT_DISCONNECTED\n");
		return 1;
	}
#endif

	ibv_destroy_cq(rd->cq);
	ibv_destroy_qp(rd->qp);

	if (rd->is_client == 1)
		rdma_destroy_id(rd->cm_id);
	else {
		rdma_destroy_id(rd->child_cm_id);
		rdma_destroy_id(rd->cm_id);
	}

	ibv_destroy_comp_channel(rd->channel);
	ibv_dealloc_pd(rd->pd);

	return 0;
}
Ejemplo n.º 20
0
void mvdev_ext_backlogq_send(mv_qp * qp)
{
    mv_sdescriptor *d;
    struct ibv_send_wr *sr;
    struct ibv_send_wr *bad_wr;
    int i;

    while (qp->send_credits_remaining > 0 && qp->ext_backlogq_head) {
        d = qp->ext_backlogq_head;

        /* find how many desc are chained */
        i = 1;
        sr = &(d->sr);
        while(sr->next) {
            sr = sr->next;
            i++;
        }
        assert(i == 1);

        if(qp->send_credits_remaining >= i) {
            qp->ext_backlogq_head = d->next_extsendq;
            if (d == qp->ext_backlogq_tail) {
                qp->ext_backlogq_tail = NULL;
            }
            d->next_extsendq = NULL;

            mvdev.connections[((mv_sbuf *)d->parent)->rank].queued--;

            /* reset the credit counter now  -- so we don't lose credits in
             * the backlogq */
            if(MVDEV_RPUT_FLAG == ((mv_sbuf *)d->parent)->flag) {
                D_PRINT("unqueing RPUT\n");
            } else {
                PACKET_SET_CREDITS(((mv_sbuf *)d->parent), (&(mvdev.connections[((mv_sbuf *) d->parent)->rank])));
            }

            D_PRINT("at %d, dropping to %d, queued: %d\n", qp->send_credits_remaining,
                    qp->send_credits_remaining - i, mvdev.connections[((mv_sbuf *)d->parent)->rank].queued);
            qp->send_credits_remaining -= i;

            if((qp->send_wqes_avail - i) < 0 || (NULL != qp->ext_sendq_head)) {
                mvdev_ext_sendq_queue(qp, d);
            } else {
                if(ibv_post_send(qp->qp, &(d->sr), &bad_wr)) {
                    error_abort_all(IBV_RETURN_ERR,"Error posting to RC QP (%d)\n", qp->send_wqes_avail);
                }
                qp->send_wqes_avail -= i;
            }
        } else {
            break;
        }
    }
}
Ejemplo n.º 21
0
gaspi_return_t
pgaspi_dev_read (const gaspi_segment_id_t segment_id_local,
		 const gaspi_offset_t offset_local, const gaspi_rank_t rank,
		 const gaspi_segment_id_t segment_id_remote,
		 const gaspi_offset_t offset_remote, const unsigned int size,
		 const gaspi_queue_id_t queue)
{
  struct ibv_send_wr *bad_wr;
  struct ibv_sge slist;
  struct ibv_send_wr swr;

#ifdef GPI2_CUDA
  if(glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].cudaDevId >= 0)
    slist.addr =
      (uintptr_t) (glb_gaspi_ctx_ib.
		   rrmd[segment_id_local][glb_gaspi_ctx.rank].addr +
		   offset_local);
  else
#endif 
    slist.addr =
      (uintptr_t) (glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].addr +
		   NOTIFY_OFFSET + offset_local);
  slist.length = size;
  slist.lkey = ((struct ibv_mr *)glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].mr)->lkey;
  
#ifdef GPI2_CUDA
  if(glb_gaspi_ctx.rrmd[segment_id_remote][rank].cudaDevId >= 0)
    swr.wr.rdma.remote_addr =(glb_gaspi_ctx.rrmd[segment_id_remote][rank].addr +
			      offset_remote);
  else
#endif
    
    swr.wr.rdma.remote_addr =
      (glb_gaspi_ctx.rrmd[segment_id_remote][rank].addr + NOTIFY_OFFSET +
       offset_remote);
  
  swr.wr.rdma.rkey = glb_gaspi_ctx.rrmd[segment_id_remote][rank].rkey;
  swr.sg_list = &slist;
  swr.num_sge = 1;
  swr.wr_id = rank;
  swr.opcode = IBV_WR_RDMA_READ;
  swr.send_flags = IBV_SEND_SIGNALED;// | IBV_SEND_FENCE;
  swr.next = NULL;

  if (ibv_post_send (glb_gaspi_ctx_ib.qpC[queue][rank], &swr, &bad_wr))
    {
      glb_gaspi_ctx.qp_state_vec[queue][rank] = GASPI_STATE_CORRUPT;

      return GASPI_ERROR;
    }

  return GASPI_SUCCESS;
}
Ejemplo n.º 22
0
Archivo: ibv.c Proyecto: carriercomm/ix
/* SendData == Post a 'send' request to the (send)command queue */
void SendData(ArgStruct *p)
{
  int                ret;       /* Return code */
  struct ibv_send_wr sr;        /* Send request */
  struct ibv_send_wr *bad_wr;	/* Handle to any incomplete wr returned by ibv*/
  struct ibv_sge     sg_entry;  /* Scatter/Gather list - holds buff addr */

  /* Fill in send request struct */
    /* Set the send request's opcode based on run-time options */
  if(p->prot.commtype == NP_COMM_SENDRECV) {
     sr.opcode = IBV_WR_SEND;
     LOGPRINTF(("Doing regular send"));
  } else if(p->prot.commtype == NP_COMM_SENDRECV_WITH_IMM) {
     sr.opcode = IBV_WR_SEND_WITH_IMM;
     LOGPRINTF(("Doing regular send with imm"));
  } else if(p->prot.commtype == NP_COMM_RDMAWRITE) {
     sr.opcode = IBV_WR_RDMA_WRITE;	/* if RDMA, need to give more info */
     sr.wr.rdma.remote_addr = (uintptr_t)(((char *)remote_address) + (p->s_ptr - p->s_buff));
     sr.wr.rdma.rkey = remote_key;
     LOGPRINTF(("Doing RDMA write (raddr=%p)", sr.wr.rdma.remote_addr));
  } else if(p->prot.commtype == NP_COMM_RDMAWRITE_WITH_IMM) {
     sr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM;	/* more info if RDMA */
     sr.wr.rdma.remote_addr = (uintptr_t)(((char *)remote_address) + (p->s_ptr - p->s_buff));
     sr.wr.rdma.rkey = remote_key;
     LOGPRINTF(("Doing RDMA write with imm (raddr=%p)", sr.wr.rdma.remote_addr));
  } else {
     fprintf(stderr, "Error, invalid communication type in SendData\n");
     exit(-1);
  }
  
  sr.send_flags = 0;	/* This needed due to a bug in Mellanox HW rel a-0 */

  sr.num_sge = 1;		    /* # entries in this request */
  sr.sg_list = &sg_entry;	    /* the list of other requests */
  sr.next = NULL;		    /* the next request in the list */

  sg_entry.lkey = s_mr_hndl->lkey;  /* Local memory region key */
  sg_entry.length = p->bufflen;	   /* buffer's size */
  sg_entry.addr = (uintptr_t)p->s_ptr;	/* buffer's location */


  
  /* Post the send request to the (send)command queue */

  /* ibv_post_send(...) is handled in same fashion ibv_post_recv(..) */
  ret = ibv_post_send(qp_hndl, &sr, &bad_wr);
  if(ret) {
    fprintf(stderr, "Error posting send request\n");
  } else {
    LOGPRINTF(("Posted send request"));
  }

}
Ejemplo n.º 23
0
static inline void uct_ud_verbs_iface_tx_data(uct_ud_verbs_iface_t *iface, uct_ud_verbs_ep_t *ep)
{
    int UCS_V_UNUSED ret;
    struct ibv_send_wr *bad_wr;

    uct_ud_verbs_iface_fill_tx_wr(iface, ep, 
                                  &iface->tx.wr_bcp, 0);
    UCT_UD_EP_HOOK_CALL_TX(&ep->super, (uct_ud_neth_t *)iface->tx.sge[0].addr);
    ret = ibv_post_send(iface->super.qp, &iface->tx.wr_bcp, &bad_wr);
    ucs_assertv(ret == 0, "ibv_post_send() returned %d (%m)", ret);
    uct_ib_log_post_send(iface->super.qp, &iface->tx.wr_bcp, NULL);
}
Ejemplo n.º 24
0
static int __xfer_rdma_do_rdma(struct xfer_rdma_buf_handle_t **handles, int hcount, int opcode)
{
	struct xfer_context *ctx = handles[0]->ctx;
	struct ibv_sge *sge;
	struct ibv_send_wr *wr;
	struct ibv_send_wr *curr_wr;
        struct ibv_send_wr *bad_wr;
	int i;
	int ret = 0;

	for (i=0; i < hcount; i++) {
		curr_wr = malloc(sizeof(struct ibv_send_wr));
		sge = malloc(sizeof(struct ibv_sge));
		
		sge->addr = (uintptr_t) handles[i]->buf;
		sge->length = handles[i]->local_size;
		sge->lkey = handles[i]->local_mr->lkey;
		
		curr_wr->wr.rdma.remote_addr = (uintptr_t) handles[i]->remote_mr->addr;
		curr_wr->wr.rdma.rkey = handles[i]->remote_mr->rkey;
		curr_wr->wr_id      = handles[i]->id;
		curr_wr->sg_list    = sge;
		curr_wr->num_sge    = 1;
		curr_wr->opcode     = opcode;
		curr_wr->send_flags = IBV_SEND_SIGNALED;
		curr_wr->imm_data   = 0;
		
		if (i == 0)
			wr = curr_wr;
		
		if (i == hcount-1)
			curr_wr->next = NULL;
		else
			curr_wr = curr_wr->next;
		
		handles[i]->opcode = opcode;
	}

        if (ibv_post_send(ctx->qp, wr, &bad_wr)) {
                fprintf(stderr, "%d:%s: ibv_post_send failed\n", pid, __func__);
		perror("ibv_post_send");
		ret = -1;
        }

	// free the wr
	for (i = 0; i < hcount; i++) {
		free(curr_wr->sg_list);
		free(curr_wr);
	}

	return ret;
}
Ejemplo n.º 25
0
static inline void uct_ud_verbs_iface_tx_inl(uct_ud_verbs_iface_t *iface, uct_ud_verbs_ep_t *ep, const void *buffer, unsigned length)
{
    int UCS_V_UNUSED ret;
    struct ibv_send_wr *bad_wr;

    iface->tx.sge[1].addr   = (uintptr_t)buffer;
    iface->tx.sge[1].length = length;
    uct_ud_verbs_iface_fill_tx_wr(iface, ep, &iface->tx.wr_inl, IBV_SEND_INLINE);
    UCT_UD_EP_HOOK_CALL_TX(&ep->super, (uct_ud_neth_t *)iface->tx.sge[0].addr);
    ret = ibv_post_send(iface->super.qp, &iface->tx.wr_inl, &bad_wr);
    ucs_assertv(ret == 0, "ibv_post_send() returned %d (%m)", ret);
    uct_ib_log_post_send(iface->super.qp, &iface->tx.wr_inl, NULL);
}
Ejemplo n.º 26
0
static int fio_rdmaio_connect(struct thread_data *td, struct fio_file *f)
{
	struct rdmaio_data *rd = td->io_ops->data;
	struct rdma_conn_param conn_param;
	struct ibv_send_wr *bad_wr;

	memset(&conn_param, 0, sizeof(conn_param));
	conn_param.responder_resources = 1;
	conn_param.initiator_depth = 1;
	conn_param.retry_count = 10;

	if (rdma_connect(rd->cm_id, &conn_param) != 0) {
		log_err("fio: rdma_connect fail\n");
		return 1;
	}

	if (get_next_channel_event
	    (td, rd->cm_channel, RDMA_CM_EVENT_ESTABLISHED) != 0) {
		log_err("fio: wait for RDMA_CM_EVENT_ESTABLISHED\n");
		return 1;
	}

	/* send task request */
	rd->send_buf.mode = htonl(rd->rdma_protocol);
	rd->send_buf.nr = htonl(td->o.iodepth);

	if (ibv_post_send(rd->qp, &rd->sq_wr, &bad_wr) != 0) {
		log_err("fio: ibv_post_send fail");
		return 1;
	}

	if (rdma_poll_wait(td, IBV_WC_SEND) < 0)
		return 1;

	/* wait for remote MR info from server side */
	if (rdma_poll_wait(td, IBV_WC_RECV) < 0)
		return 1;

	/* In SEND/RECV test, it's a good practice to setup the iodepth of
	 * of the RECV side deeper than that of the SEND side to
	 * avoid RNR (receiver not ready) error. The
	 * SEND side may send so many unsolicited message before
	 * RECV side commits sufficient recv buffers into recv queue.
	 * This may lead to RNR error. Here, SEND side pauses for a while
	 * during which RECV side commits sufficient recv buffers.
	 */
	usleep(500000);

	return 0;
}
Ejemplo n.º 27
0
static int
rdma_trans_send(Npfcall *fc, void *a)
{
	int i, n;
	Rdmatrans *rdma;
	struct ibv_sge sge;
	struct ibv_send_wr wr, *bad_wr;
	Rdmactx *wctx;

	rdma = a;
	pthread_mutex_lock(&rdma->lock);

again:
	for(i = 0, wctx = (Rdmactx *) rdma->snd_buf; i < rdma->q_depth;
			i++, wctx = (Rdmactx *) ((char *) wctx + rdma->msize))
		if (!wctx->used)
			break;

	if (i >= rdma->q_depth) {
		/* wait for a slot */
		pthread_cond_wait(&rdma->cond, &rdma->lock);
		goto again;
	}

	wctx->wc_op = IBV_WC_SEND;
	wctx->rdma = rdma;
	wctx->used = 1;
	wctx->len = fc->size;
	wctx->pos = 0;
	memmove(wctx->buf, fc->pkt, fc->size);
	pthread_mutex_unlock(&rdma->lock);

	sge.addr = (uintptr_t) wctx->buf;
	sge.length = fc->size;
	sge.lkey = rdma->snd_mr->lkey;
	wr.next = NULL;
	wr.wr_id = (u64)(unsigned long)wctx;
	wr.opcode = IBV_WR_SEND;
	wr.send_flags = IBV_SEND_SIGNALED;
	wr.sg_list = &sge;
	wr.num_sge = 1;
	n = ibv_post_send(rdma->qp, &wr, &bad_wr);
	if (n) {
		np_uerror(n);
		return -1;
	}

	return fc->size;
}
Ejemplo n.º 28
0
gaspi_return_t
pgaspi_dev_notify (const gaspi_segment_id_t segment_id_remote,
		   const gaspi_rank_t rank,
		   const gaspi_notification_id_t notification_id,
		   const gaspi_notification_t notification_value,
		   const gaspi_queue_id_t queue)
{
 
  struct ibv_send_wr *bad_wr;
  struct ibv_sge slistN;
  struct ibv_send_wr swrN;

  slistN.addr = (uintptr_t) (glb_gaspi_ctx.nsrc.buf + notification_id * sizeof(gaspi_notification_t));

  *((unsigned int *) slistN.addr) = notification_value;

  slistN.length = sizeof(gaspi_notification_t);
  slistN.lkey = ((struct ibv_mr *) glb_gaspi_ctx.nsrc.mr)->lkey;

#ifdef GPI2_CUDA
  if( glb_gaspi_ctx.rrmd[segment_id_remote][rank].cudaDevId >= 0)
    {
      swrN.wr.rdma.remote_addr = (glb_gaspi_ctx.rrmd[segment_id_remote][rank].host_addr + notification_id * sizeof(gaspi_notification_t));
      swrN.wr.rdma.rkey = glb_gaspi_ctx.rrmd[segment_id_remote][rank].host_rkey;
    }
  else
#endif
    {
      swrN.wr.rdma.remote_addr = (glb_gaspi_ctx.rrmd[segment_id_remote][rank].addr + notification_id * sizeof(gaspi_notification_t)); 
      swrN.wr.rdma.rkey = glb_gaspi_ctx.rrmd[segment_id_remote][rank].rkey;
    }
  
  swrN.sg_list = &slistN;
  swrN.num_sge = 1;
  swrN.wr_id = rank;
  swrN.opcode = IBV_WR_RDMA_WRITE;
  swrN.send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE;
  swrN.next = NULL;

  if (ibv_post_send (glb_gaspi_ctx_ib.qpC[queue][rank], &swrN, &bad_wr))
    {
      glb_gaspi_ctx.qp_state_vec[queue][rank] = GASPI_STATE_CORRUPT;

      return GASPI_ERROR;
    }

  return GASPI_SUCCESS;

}
Ejemplo n.º 29
0
static inline void 
uct_ud_verbs_ep_tx_skb(uct_ud_verbs_iface_t *iface,
                          uct_ud_verbs_ep_t *ep, uct_ud_send_skb_t *skb, unsigned flags)
{
    int UCS_V_UNUSED ret;
    struct ibv_send_wr *bad_wr;

    iface->tx.sge[0].lkey   = skb->lkey;
    iface->tx.sge[0].length = skb->len;
    iface->tx.sge[0].addr   = (uintptr_t)skb->neth;
    uct_ud_verbs_iface_fill_tx_wr(iface, ep, &iface->tx.wr_skb, flags);
    UCT_UD_EP_HOOK_CALL_TX(&ep->super, (uct_ud_neth_t *)iface->tx.sge[0].addr);
    ret = ibv_post_send(iface->super.qp, &iface->tx.wr_skb, &bad_wr);
    ucs_assertv(ret == 0, "ibv_post_send() returned %d (%m)", ret);
    uct_ib_log_post_send(&iface->super.super, iface->super.qp, &iface->tx.wr_skb, NULL);
    --iface->super.tx.available;
}
Ejemplo n.º 30
0
static int send_qp_num_for_ah(struct pingpong_context *ctx,
		struct perftest_parameters *user_param)
{
	struct ibv_send_wr wr;
	struct ibv_send_wr *bad_wr;
	struct ibv_sge list;
	struct ibv_wc wc;
	int ne;

	memcpy(ctx->buf,&ctx->qp[0]->qp_num,sizeof(uint32_t));

	list.addr   = (uintptr_t)ctx->buf;
	list.length = sizeof(uint32_t);
	list.lkey   = ctx->mr->lkey;

	wr.wr_id      = 0;
	wr.sg_list    = &list;
	wr.num_sge    = 1;
	wr.opcode     = IBV_WR_SEND_WITH_IMM;
	wr.send_flags = IBV_SEND_SIGNALED;
	wr.next       = NULL;
	wr.imm_data   = htonl(ctx->qp[0]->qp_num);

	wr.wr.ud.ah = ctx->ah[0];
	wr.wr.ud.remote_qpn  = user_param->rem_ud_qpn;
	wr.wr.ud.remote_qkey = user_param->rem_ud_qkey;


	if (ibv_post_send(ctx->qp[0],&wr,&bad_wr)) {
		fprintf(stderr, "Function ibv_post_send failed\n");
		return 1;
	}

	do {
		ne = ibv_poll_cq(ctx->send_cq, 1,&wc);
	} while (ne == 0);

	if (wc.status || wc.opcode != IBV_WC_SEND || wc.wr_id != 0) {
		fprintf(stderr, " Couldn't post send my QP number %d\n",(int)wc.status);
		return 1;
	}

	return 0;

}