static void kickoff_rdma_with_offset(uintptr_t offset, IbvConnection *conn, int length) { struct ibv_send_wr wr, *bad_wr = NULL; struct ibv_sge sge; memset(&wr, 0, sizeof(wr)); wr.wr_id = (uintptr_t)conn; wr.opcode = IBV_WR_RDMA_WRITE; wr.sg_list = &sge; wr.num_sge = 1; wr.send_flags = IBV_SEND_SIGNALED; wr.wr.rdma.remote_addr = (uintptr_t)conn->peer_mr.addr + offset; wr.wr.rdma.rkey = conn->peer_mr.rkey; sge.addr = (uintptr_t)conn->rdma_local_region + offset; sge.length = length; sge.lkey = conn->rdma_local_mr->lkey; if (RDMA_BUFFER_SIZE < (offset + length)) { WARN(0, "kickoff_rdma_with_offset: offset + length (=%d) exceeds RDMA_BUFFER_SIZE (=%d).\n", offset + length, RDMA_BUFFER_SIZE); exit(1); } TEST_NZ(ibv_post_send(conn->qp, &wr, &bad_wr)); }
void _rdma_write_offset(void *context, void* buf, uint64_t offset) { struct connection *conn = (struct connection *)context; struct ibv_send_wr wr, *bad_wr = NULL; struct ibv_sge sge; //printf("7\n"); memset(&wr, 0, sizeof(wr)); wr.wr_id = (uintptr_t)conn; wr.opcode = IBV_WR_RDMA_WRITE; wr.sg_list = &sge; wr.num_sge = 1; wr.send_flags = IBV_SEND_SIGNALED; wr.wr.rdma.remote_addr = (uintptr_t)conn->peer_mr.addr + offset; wr.wr.rdma.rkey = conn->peer_mr.rkey; sge.addr = (uintptr_t)conn->rdma_remote_region; sge.length = RDMA_BUFFER_SIZE; sge.lkey = conn->rdma_remote_mr->lkey; time_stamp(2); sem_wait(&write_ops); memcpy(conn->rdma_remote_region, (char*)buf, RDMA_BUFFER_SIZE); TEST_NZ(ibv_post_send(conn->qp, &wr, &bad_wr)); }
static int post_sends(struct cmatest_node *node, int signal_flag) { struct ibv_send_wr send_wr, *bad_send_wr; struct ibv_sge sge; int i, ret = 0; if (!node->connected || !message_count) return 0; send_wr.next = NULL; send_wr.sg_list = &sge; send_wr.num_sge = 1; send_wr.opcode = IBV_WR_SEND_WITH_IMM; send_wr.send_flags = signal_flag; send_wr.wr_id = (unsigned long)node; send_wr.imm_data = htonl(node->cma_id->qp->qp_num); send_wr.wr.ud.ah = node->ah; send_wr.wr.ud.remote_qpn = node->remote_qpn; send_wr.wr.ud.remote_qkey = node->remote_qkey; sge.length = message_size; sge.lkey = node->mr->lkey; sge.addr = (uintptr_t) node->mem; for (i = 0; i < message_count && !ret; i++) { ret = ibv_post_send(node->cma_id->qp, &send_wr, &bad_send_wr); if (ret) printf("failed to post sends: %d\n", ret); } return ret; }
static int post_sends(struct cmatest_node *node) { struct ibv_send_wr send_wr, *bad_send_wr; struct ibv_sge sge; int i, ret = 0; if (!node->connected || !message_count) return 0; send_wr.next = NULL; send_wr.sg_list = &sge; send_wr.num_sge = 1; send_wr.opcode = IBV_WR_SEND; send_wr.send_flags = 0; send_wr.wr_id = (unsigned long)node; sge.length = message_size; sge.lkey = node->mr->lkey; sge.addr = (uintptr_t) node->mem; for (i = 0; i < message_count && !ret; i++) { ret = ibv_post_send(node->cma_id->qp, &send_wr, &bad_send_wr); if (ret) printf("failed to post sends: %d\n", ret); } return ret; }
int mca_btl_openib_get_internal (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *ep, mca_btl_openib_get_frag_t *frag) { int qp = to_base_frag(frag)->base.order; struct ibv_send_wr *bad_wr; /* check for a send wqe */ if (qp_get_wqe(ep, qp) < 0) { qp_put_wqe(ep, qp); return OPAL_ERR_OUT_OF_RESOURCE; } /* check for a get token */ if (OPAL_THREAD_ADD32(&ep->get_tokens,-1) < 0) { qp_put_wqe(ep, qp); OPAL_THREAD_ADD32(&ep->get_tokens,1); return OPAL_ERR_OUT_OF_RESOURCE; } qp_inflight_wqe_to_frag(ep, qp, to_com_frag(frag)); qp_reset_signal_count(ep, qp); if (ibv_post_send(ep->qps[qp].qp->lcl_qp, &frag->sr_desc, &bad_wr)) { qp_put_wqe(ep, qp); OPAL_THREAD_ADD32(&ep->get_tokens,1); return OPAL_ERROR; } return OPAL_SUCCESS; }
void write_remote(struct connection * conn, uint32_t len){ uint32_t size =len&(~(1U<<31)); snprintf(conn->send_region, send_buffer_size, "message from active/client side with pid %d", getpid()); struct ibv_send_wr wr, *bad_wr = NULL; struct ibv_sge sge; memset(&wr,0,sizeof(wr)); wr.wr_id = (uintptr_t)conn; wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM; wr.send_flags = IBV_SEND_SIGNALED; wr.imm_data = htonl(len); wr.wr.rdma.remote_addr = (uintptr_t)conn->peer_addr; wr.wr.rdma.rkey = conn->peer_rkey; if (size>0){ wr.sg_list = &sge; wr.num_sge = 1; sge.addr = (uintptr_t)conn->send_region; sge.length = size; sge.lkey = conn->send_region_mr->lkey; } TEST_NZ(ibv_post_send(conn->qp, &wr, &bad_wr)); }
/** * Sends a buffer's memory region so that it can be mapped to it's remote end. */ void RDMAChannel::SendMR(ibv_mr* mr, int id) { // Map the memory region itself so that it can be sent ibv_mr* init = ibv_reg_mr(adapter_.pd_, mr, sizeof(ibv_mr), IBV_ACCESS_LOCAL_WRITE); struct ibv_sge list; list.addr = (uint64_t) mr; list.length = sizeof(ibv_mr); list.lkey = init->lkey; struct ibv_send_wr wr; caffe_memset(sizeof(wr), 0, &wr); wr.wr_id = (uint64_t) init; wr.sg_list = &list; wr.num_sge = 1; wr.opcode = IBV_WR_SEND_WITH_IMM; wr.send_flags = IBV_SEND_SIGNALED; wr.imm_data = id; struct ibv_send_wr *bad_wr; CHECK(!ibv_post_send(qp_, &wr, &bad_wr)); for (;;) { ibv_wc wc; int ne = ibv_poll_cq(write_cq_, 1, &wc); CHECK_GE(ne, 0); if (ne && wc.wr_id == (uint64_t) init) { break; } } CHECK(!ibv_dereg_mr(init)); }
int Process::post_send(void *context){ Connection *conn = (Connection *) context; std::cout<<"SEND LOCATION CONN -> ID"<<conn->identifier<<"\n"; std::cout<<"SEND POINTER ID"<<listener<<std::endl<<std::flush; assert(conn != nullptr); assert(conn->identifier != nullptr); struct ibv_send_wr wr, *bad_wr = nullptr; struct ibv_sge sge; assert(&message != nullptr); calc_message_numerical(&message); assert(&message != nullptr); assert(conn->send_region != nullptr); memcpy(conn->send_region, message.x, message.size*sizeof(char)); printf("connected. posting send...\n"); memsetzero(&wr); wr.wr_id = (uintptr_t)conn; wr.opcode = IBV_WR_SEND; wr.sg_list = &sge; wr.num_sge = 1; wr.send_flags = IBV_SEND_SIGNALED; sge.addr = (uintptr_t)conn->send_region; sge.length = message.size; sge.lkey = conn->send_memory_region->lkey; TEST_NZ(ibv_post_send(conn->queue_pair, &wr, &bad_wr)); return 0; }
void send_ack() { /* Send ack */ ack_buffer = client_pdata.index; sge_send.addr = (uintptr_t)&ack_buffer; sge_send.length = sizeof(ack_buffer); sge_send.lkey = mr_ack_buffer->lkey; send_wr.wr_id = 1; send_wr.opcode = IBV_WR_SEND; send_wr.send_flags = IBV_SEND_SIGNALED; send_wr.sg_list = &sge_send; send_wr.num_sge = 1; err = ibv_post_send(cm_id->qp, &send_wr, &bad_send_wr); assert(err == 0); /* Wait send completion */ err = ibv_get_cq_event(comp_chan, &evt_cq, &cq_context); assert(err == 0); ibv_ack_cq_events(evt_cq, 1); err = ibv_req_notify_cq(cq, 0); assert(err == 0); n = ibv_poll_cq(cq, 1, &wc); assert(n >= 1); if (wc.status != IBV_WC_SUCCESS) printf("Warning: Client %d send ack failed\n", client_pdata.index); }
int on_connection(void *context) { struct connection *conn = (struct connection *)context; struct ibv_send_wr wr, *bad_wr = NULL; struct ibv_sge sge; snprintf(conn->send_region, BUFFER_SIZE, "message from active/client side with pid %d", getpid()); printf("connected. posting send...\n"); memset(&wr, 0, sizeof(wr)); wr.wr_id = (uintptr_t)conn; wr.opcode = IBV_WR_SEND; wr.sg_list = &sge; wr.num_sge = 1; wr.send_flags = IBV_SEND_SIGNALED; sge.addr = (uintptr_t)conn->send_region; sge.length = BUFFER_SIZE; sge.lkey = conn->send_mr->lkey; TEST_NZ(ibv_post_send(conn->qp, &wr, &bad_wr)); return 0; }
void cfio_rdma_client_write_data( int remote_offset, int length, int local_offset) { // rdma_debug("write data ..."); if (remote_offset < 0 || remote_offset + length > DATA_REGION_SIZE) { die("RDMA out of region"); } struct ibv_send_wr wr, *bad_wr = NULL; struct ibv_sge sge; memset(&wr, 0, sizeof(wr)); rdma_conn_t *conn = rdma_conn; wr.wr_id = (uintptr_t)(conn); wr.opcode = IBV_WR_RDMA_WRITE; wr.sg_list = &sge; wr.num_sge = 1; wr.send_flags = IBV_SEND_SIGNALED; wr.wr.rdma.remote_addr = (uintptr_t)((char *)conn->peer_data_mr.addr + remote_offset); wr.wr.rdma.rkey = conn->peer_data_mr.rkey; sge.addr = (uintptr_t)(conn->data_region + local_offset); sge.length = length; sge.lkey = conn->data_mr->lkey; ++ request_stack_size; TEST_NZ(ibv_post_send(conn->qp, &wr, &bad_wr)); }
static int _gaspi_event_send(gaspi_cuda_event *event, int queue) { struct ibv_send_wr swr; struct ibv_sge slist; struct ibv_send_wr *bad_wr; swr.wr.rdma.rkey = glb_gaspi_ctx.rrmd[event->segment_remote][event->rank].rkey; swr.sg_list = &slist; swr.num_sge = 1; swr.wr_id = event->rank; swr.opcode = IBV_WR_RDMA_WRITE; swr.send_flags = IBV_SEND_SIGNALED; swr.next = NULL; slist.addr = (uintptr_t) (char*)(glb_gaspi_ctx.rrmd[event->segment_local][event->rank].host_ptr + NOTIFY_OFFSET + event->offset_local); slist.length = event->size; slist.lkey = ((struct ibv_mr *)glb_gaspi_ctx.rrmd[event->segment_local][glb_gaspi_ctx.rank].host_mr)->lkey; if(glb_gaspi_ctx.rrmd[event->segment_remote][event->rank].cudaDevId >= 0) swr.wr.rdma.remote_addr = (glb_gaspi_ctx.rrmd[event->segment_remote][event->rank].addr + event->offset_remote); else swr.wr.rdma.remote_addr = (glb_gaspi_ctx.rrmd[event->segment_remote][event->rank].addr + NOTIFY_OFFSET + event->offset_remote); if(ibv_post_send(glb_gaspi_ctx_ib.qpC[queue][event->rank], &swr, &bad_wr)) { glb_gaspi_ctx.qp_state_vec[queue][event->rank] = GASPI_STATE_CORRUPT; return -1; } event->ib_use = 1; return 0; }
static UCS_F_ALWAYS_INLINE void uct_rc_verbs_ep_post_send(uct_rc_verbs_iface_t* iface, uct_rc_verbs_ep_t* ep, struct ibv_send_wr *wr, int send_flags, int max_log_sge) { struct ibv_send_wr *bad_wr; int ret; uct_rc_txqp_check(&ep->super.txqp); if (!(send_flags & IBV_SEND_SIGNALED)) { send_flags |= uct_rc_iface_tx_moderation(&iface->super, &ep->super.txqp, IBV_SEND_SIGNALED); } if (wr->opcode == IBV_WR_RDMA_READ) { send_flags |= uct_rc_ep_atomic_fence(&iface->super, &ep->fi, IBV_SEND_FENCE); } wr->send_flags = send_flags; wr->wr_id = uct_rc_txqp_unsignaled(&ep->super.txqp); uct_ib_log_post_send(&iface->super.super, ep->super.txqp.qp, wr, max_log_sge, (wr->opcode == IBV_WR_SEND) ? uct_rc_ep_packet_dump : NULL); ret = ibv_post_send(ep->super.txqp.qp, wr, &bad_wr); if (ret != 0) { ucs_fatal("ibv_post_send() returned %d (%m)", ret); } uct_rc_verbs_txqp_posted(&ep->super.txqp, &ep->txcnt, &iface->super, send_flags & IBV_SEND_SIGNALED); }
static UCS_F_ALWAYS_INLINE void uct_rc_verbs_ep_post_send(uct_rc_verbs_iface_t* iface, uct_rc_verbs_ep_t* ep, struct ibv_send_wr *wr, int send_flags) { struct ibv_send_wr *bad_wr; int ret; uct_rc_txqp_check(&ep->super.txqp); if (!(send_flags & IBV_SEND_SIGNALED)) { send_flags |= uct_rc_iface_tx_moderation(&iface->super, &ep->super.txqp, IBV_SEND_SIGNALED); } wr->send_flags = send_flags; wr->wr_id = uct_rc_txqp_unsignaled(&ep->super.txqp); uct_ib_log_post_send(&iface->super.super, ep->super.txqp.qp, wr, (wr->opcode == IBV_WR_SEND) ? uct_rc_ep_am_packet_dump : NULL); UCT_IB_INSTRUMENT_RECORD_SEND_WR_LEN("uct_rc_verbs_ep_post_send", wr); ret = ibv_post_send(ep->super.txqp.qp, wr, &bad_wr); if (ret != 0) { ucs_fatal("ibv_post_send() returned %d (%m)", ret); } uct_rc_verbs_txqp_posted(&ep->super.txqp, &ep->txcnt, &iface->super, send_flags & IBV_SEND_SIGNALED); }
static int rdma_write_keys(struct pingpong_dest *my_dest, struct perftest_comm *comm) { struct ibv_send_wr wr; struct ibv_send_wr *bad_wr; struct ibv_sge list; struct ibv_wc wc; int ne; #ifdef HAVE_ENDIAN int i; struct pingpong_dest m_my_dest; m_my_dest.lid = htobe32(my_dest->lid); m_my_dest.out_reads = htobe32(my_dest->out_reads); m_my_dest.qpn = htobe32(my_dest->qpn); m_my_dest.psn = htobe32(my_dest->psn); m_my_dest.rkey = htobe32(my_dest->rkey); m_my_dest.srqn = htobe32(my_dest->srqn); m_my_dest.gid_index = htobe32(my_dest->gid_index); m_my_dest.vaddr = htobe64(my_dest->vaddr); for(i=0; i<16; i++) { m_my_dest.gid.raw[i] = my_dest->gid.raw[i]; } memcpy(comm->rdma_ctx->buf, &m_my_dest, sizeof(struct pingpong_dest)); #else memcpy(comm->rdma_ctx->buf, &my_dest, sizeof(struct pingpong_dest)); #endif list.addr = (uintptr_t)comm->rdma_ctx->buf; list.length = sizeof(struct pingpong_dest); list.lkey = comm->rdma_ctx->mr->lkey; wr.wr_id = SYNC_SPEC_ID; wr.sg_list = &list; wr.num_sge = 1; wr.opcode = IBV_WR_SEND; wr.send_flags = IBV_SEND_SIGNALED; wr.next = NULL; if (ibv_post_send(comm->rdma_ctx->qp[0],&wr,&bad_wr)) { fprintf(stderr, "Function ibv_post_send failed\n"); return 1; } do { ne = ibv_poll_cq(comm->rdma_ctx->send_cq, 1,&wc); } while (ne == 0); if (wc.status || wc.opcode != IBV_WC_SEND || wc.wr_id != SYNC_SPEC_ID) { fprintf(stderr, " Bad wc status %d\n",(int)wc.status); return 1; } return 0; }
int mca_btl_openib_get( mca_btl_base_module_t* btl, mca_btl_base_endpoint_t* endpoint, mca_btl_base_descriptor_t* descriptor) { int rc; struct ibv_send_wr* bad_wr; mca_btl_openib_frag_t* frag = (mca_btl_openib_frag_t*) descriptor; mca_btl_openib_module_t* openib_btl = (mca_btl_openib_module_t*) btl; frag->endpoint = endpoint; frag->wr_desc.sr_desc.opcode = IBV_WR_RDMA_READ; /* check for a send wqe */ if (OPAL_THREAD_ADD32(&endpoint->sd_wqe[BTL_OPENIB_LP_QP],-1) < 0) { OPAL_THREAD_ADD32(&endpoint->sd_wqe[BTL_OPENIB_LP_QP],1); OPAL_THREAD_LOCK(&endpoint->endpoint_lock); opal_list_append(&endpoint->pending_get_frags, (opal_list_item_t*)frag); OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); return OMPI_SUCCESS; /* check for a get token */ } else if(OPAL_THREAD_ADD32(&endpoint->get_tokens,-1) < 0) { OPAL_THREAD_ADD32(&endpoint->sd_wqe[BTL_OPENIB_LP_QP],1); OPAL_THREAD_ADD32(&endpoint->get_tokens,1); OPAL_THREAD_LOCK(&endpoint->endpoint_lock); opal_list_append(&endpoint->pending_get_frags, (opal_list_item_t*)frag); OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); return OMPI_SUCCESS; } else { frag->wr_desc.sr_desc.send_flags = IBV_SEND_SIGNALED; frag->wr_desc.sr_desc.wr.rdma.remote_addr = frag->base.des_src->seg_addr.lval; frag->wr_desc.sr_desc.wr.rdma.rkey = frag->base.des_src->seg_key.key32[0]; frag->sg_entry.addr = (unsigned long) frag->base.des_dst->seg_addr.pval; frag->sg_entry.length = frag->base.des_dst->seg_len; if(ibv_post_send(endpoint->lcl_qp[BTL_OPENIB_LP_QP], &frag->wr_desc.sr_desc, &bad_wr)){ BTL_ERROR(("error posting send request errno (%d) says %s", errno, strerror(errno))); rc = ORTE_ERROR; } else { rc = ORTE_SUCCESS; } if(mca_btl_openib_component.use_srq) { mca_btl_openib_post_srr(openib_btl, 1, BTL_OPENIB_HP_QP); mca_btl_openib_post_srr(openib_btl, 1, BTL_OPENIB_LP_QP); } else { btl_openib_endpoint_post_rr(endpoint, 1, BTL_OPENIB_HP_QP); btl_openib_endpoint_post_rr(endpoint, 1, BTL_OPENIB_LP_QP); } } return rc; }
void on_completion(struct ibv_wc *wc) { struct connection *conn = (struct connection *)(uintptr_t)wc->wr_id; printf("== STATE: send=%d / recv=%d ==\n", conn->send_state, conn->recv_state); if (wc->status != IBV_WC_SUCCESS) die("on_completion: status is not IBV_WC_SUCCESS."); if (wc->opcode & IBV_WC_RECV) { conn->recv_state++; printf("RECV: Recieved: TYPE=%d\n", conn->recv_msg->type); if (conn->recv_msg->type == MSG_MR) { memcpy(&conn->peer_mr, &conn->recv_msg->data.mr, sizeof(conn->peer_mr)); post_receives(conn); /* only rearm for MSG_MR */ if (conn->send_state == SS_INIT) /* received peer's MR before sending ours, so send ours back */ send_mr(conn); } } else { conn->send_state++; printf("SEND: Sent out: TYPE=%d\n", conn->send_msg->type); } if (conn->send_state == SS_MR_SENT && conn->recv_state == RS_MR_RECV) { struct ibv_send_wr wr, *bad_wr = NULL; struct ibv_sge sge; if (s_mode == M_WRITE) printf(" -> received MSG_MR. writing message to remote memory...\n"); else printf(" -> received MSG_MR. reading message from remote memory...\n"); memset(&wr, 0, sizeof(wr)); wr.wr_id = (uintptr_t)conn; wr.opcode = (s_mode == M_WRITE) ? IBV_WR_RDMA_WRITE : IBV_WR_RDMA_READ; wr.sg_list = &sge; wr.num_sge = 1; wr.send_flags = IBV_SEND_SIGNALED; wr.wr.rdma.remote_addr = (uintptr_t)conn->peer_mr.addr; wr.wr.rdma.rkey = conn->peer_mr.rkey; sge.addr = (uintptr_t)conn->rdma_local_region; sge.length = RDMA_BUFFER_SIZE; sge.lkey = conn->rdma_local_mr->lkey; TEST_NZ(ibv_post_send(conn->qp, &wr, &bad_wr)); printf("PSEND: Posted send request: MSG=%s\n", conn->rdma_local_region); conn->send_msg->type = MSG_DONE; send_message(conn); } else if (conn->send_state == SS_DONE_SENT && conn->recv_state == RS_DONE_RECV) { printf(" -> remote buffer: %s\n", get_peer_message_region(conn)); rdma_disconnect(conn->id); } }
// All data transfers must go through this function void prepare_and_post_send_desc(void *src, void *dst, int dest, int len, int lkey, int rkey, int type, int lock_or_unlock) { sr_desc.send_flags = IBV_SEND_SIGNALED; sr_desc.next = NULL; sr_desc.opcode = type; sr_desc.wr_id = 0; sr_desc.num_sge = 1; if(IBV_WR_RDMA_WRITE == type) { sr_desc.wr.rdma.remote_addr = (uintptr_t) (dst); sr_sg_entry.addr = (uintptr_t) (src); sr_sg_entry.length = len; sr_desc.wr.rdma.rkey = rkey; } if (IBV_WR_RDMA_READ == type) { sr_desc.wr.rdma.remote_addr = (uintptr_t) (src); sr_sg_entry.addr = (uintptr_t) (dst); sr_sg_entry.length = len; sr_desc.wr.rdma.rkey = rkey; } if (IBV_WR_ATOMIC_CMP_AND_SWP == type) { sr_desc.wr.atomic.remote_addr = (uintptr_t) (dst); sr_desc.wr.atomic.rkey = rkey; sr_sg_entry.addr = (uintptr_t) (src); sr_sg_entry.length = sizeof(long); if (lock_or_unlock == OPENIB_LOCK) { sr_desc.wr.atomic.compare_add = 0; sr_desc.wr.atomic.swap = l_state.rank + 1; } else if (lock_or_unlock == OPENIB_UNLOCK){ sr_desc.wr.atomic.compare_add = l_state.rank + 1; sr_desc.wr.atomic.swap = 0; } else { assert(0); } } sr_sg_entry.lkey = lkey; sr_desc.sg_list = &(sr_sg_entry); struct ibv_send_wr *bad_wr; if(ibv_post_send(conn.qp[dest], &sr_desc, &bad_wr)) { fprintf(stderr,"[%d] Error posting send\n", me); fflush(stderr); } // Increment outstanding and check whether we need to make progress increment_outstanding(); }
static int fio_rdmaio_close_file(struct thread_data *td, struct fio_file *f) { struct rdmaio_data *rd = td->io_ops->data; struct ibv_send_wr *bad_wr; /* unregister rdma buffer */ /* * Client sends notification to the server side */ /* refer to: http://linux.die.net/man/7/rdma_cm */ if ((rd->is_client == 1) && ((rd->rdma_protocol == FIO_RDMA_MEM_WRITE) || (rd->rdma_protocol == FIO_RDMA_MEM_READ))) { if (ibv_post_send(rd->qp, &rd->sq_wr, &bad_wr) != 0) { log_err("fio: ibv_post_send fail"); return 1; } dprint(FD_IO, "fio: close information sent success\n"); rdma_poll_wait(td, IBV_WC_SEND); } if (rd->is_client == 1) rdma_disconnect(rd->cm_id); else { rdma_disconnect(rd->child_cm_id); #if 0 rdma_disconnect(rd->cm_id); #endif } #if 0 if (get_next_channel_event(td, rd->cm_channel, RDMA_CM_EVENT_DISCONNECTED) != 0) { log_err("fio: wait for RDMA_CM_EVENT_DISCONNECTED\n"); return 1; } #endif ibv_destroy_cq(rd->cq); ibv_destroy_qp(rd->qp); if (rd->is_client == 1) rdma_destroy_id(rd->cm_id); else { rdma_destroy_id(rd->child_cm_id); rdma_destroy_id(rd->cm_id); } ibv_destroy_comp_channel(rd->channel); ibv_dealloc_pd(rd->pd); return 0; }
void mvdev_ext_backlogq_send(mv_qp * qp) { mv_sdescriptor *d; struct ibv_send_wr *sr; struct ibv_send_wr *bad_wr; int i; while (qp->send_credits_remaining > 0 && qp->ext_backlogq_head) { d = qp->ext_backlogq_head; /* find how many desc are chained */ i = 1; sr = &(d->sr); while(sr->next) { sr = sr->next; i++; } assert(i == 1); if(qp->send_credits_remaining >= i) { qp->ext_backlogq_head = d->next_extsendq; if (d == qp->ext_backlogq_tail) { qp->ext_backlogq_tail = NULL; } d->next_extsendq = NULL; mvdev.connections[((mv_sbuf *)d->parent)->rank].queued--; /* reset the credit counter now -- so we don't lose credits in * the backlogq */ if(MVDEV_RPUT_FLAG == ((mv_sbuf *)d->parent)->flag) { D_PRINT("unqueing RPUT\n"); } else { PACKET_SET_CREDITS(((mv_sbuf *)d->parent), (&(mvdev.connections[((mv_sbuf *) d->parent)->rank]))); } D_PRINT("at %d, dropping to %d, queued: %d\n", qp->send_credits_remaining, qp->send_credits_remaining - i, mvdev.connections[((mv_sbuf *)d->parent)->rank].queued); qp->send_credits_remaining -= i; if((qp->send_wqes_avail - i) < 0 || (NULL != qp->ext_sendq_head)) { mvdev_ext_sendq_queue(qp, d); } else { if(ibv_post_send(qp->qp, &(d->sr), &bad_wr)) { error_abort_all(IBV_RETURN_ERR,"Error posting to RC QP (%d)\n", qp->send_wqes_avail); } qp->send_wqes_avail -= i; } } else { break; } } }
gaspi_return_t pgaspi_dev_read (const gaspi_segment_id_t segment_id_local, const gaspi_offset_t offset_local, const gaspi_rank_t rank, const gaspi_segment_id_t segment_id_remote, const gaspi_offset_t offset_remote, const unsigned int size, const gaspi_queue_id_t queue) { struct ibv_send_wr *bad_wr; struct ibv_sge slist; struct ibv_send_wr swr; #ifdef GPI2_CUDA if(glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].cudaDevId >= 0) slist.addr = (uintptr_t) (glb_gaspi_ctx_ib. rrmd[segment_id_local][glb_gaspi_ctx.rank].addr + offset_local); else #endif slist.addr = (uintptr_t) (glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].addr + NOTIFY_OFFSET + offset_local); slist.length = size; slist.lkey = ((struct ibv_mr *)glb_gaspi_ctx.rrmd[segment_id_local][glb_gaspi_ctx.rank].mr)->lkey; #ifdef GPI2_CUDA if(glb_gaspi_ctx.rrmd[segment_id_remote][rank].cudaDevId >= 0) swr.wr.rdma.remote_addr =(glb_gaspi_ctx.rrmd[segment_id_remote][rank].addr + offset_remote); else #endif swr.wr.rdma.remote_addr = (glb_gaspi_ctx.rrmd[segment_id_remote][rank].addr + NOTIFY_OFFSET + offset_remote); swr.wr.rdma.rkey = glb_gaspi_ctx.rrmd[segment_id_remote][rank].rkey; swr.sg_list = &slist; swr.num_sge = 1; swr.wr_id = rank; swr.opcode = IBV_WR_RDMA_READ; swr.send_flags = IBV_SEND_SIGNALED;// | IBV_SEND_FENCE; swr.next = NULL; if (ibv_post_send (glb_gaspi_ctx_ib.qpC[queue][rank], &swr, &bad_wr)) { glb_gaspi_ctx.qp_state_vec[queue][rank] = GASPI_STATE_CORRUPT; return GASPI_ERROR; } return GASPI_SUCCESS; }
/* SendData == Post a 'send' request to the (send)command queue */ void SendData(ArgStruct *p) { int ret; /* Return code */ struct ibv_send_wr sr; /* Send request */ struct ibv_send_wr *bad_wr; /* Handle to any incomplete wr returned by ibv*/ struct ibv_sge sg_entry; /* Scatter/Gather list - holds buff addr */ /* Fill in send request struct */ /* Set the send request's opcode based on run-time options */ if(p->prot.commtype == NP_COMM_SENDRECV) { sr.opcode = IBV_WR_SEND; LOGPRINTF(("Doing regular send")); } else if(p->prot.commtype == NP_COMM_SENDRECV_WITH_IMM) { sr.opcode = IBV_WR_SEND_WITH_IMM; LOGPRINTF(("Doing regular send with imm")); } else if(p->prot.commtype == NP_COMM_RDMAWRITE) { sr.opcode = IBV_WR_RDMA_WRITE; /* if RDMA, need to give more info */ sr.wr.rdma.remote_addr = (uintptr_t)(((char *)remote_address) + (p->s_ptr - p->s_buff)); sr.wr.rdma.rkey = remote_key; LOGPRINTF(("Doing RDMA write (raddr=%p)", sr.wr.rdma.remote_addr)); } else if(p->prot.commtype == NP_COMM_RDMAWRITE_WITH_IMM) { sr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM; /* more info if RDMA */ sr.wr.rdma.remote_addr = (uintptr_t)(((char *)remote_address) + (p->s_ptr - p->s_buff)); sr.wr.rdma.rkey = remote_key; LOGPRINTF(("Doing RDMA write with imm (raddr=%p)", sr.wr.rdma.remote_addr)); } else { fprintf(stderr, "Error, invalid communication type in SendData\n"); exit(-1); } sr.send_flags = 0; /* This needed due to a bug in Mellanox HW rel a-0 */ sr.num_sge = 1; /* # entries in this request */ sr.sg_list = &sg_entry; /* the list of other requests */ sr.next = NULL; /* the next request in the list */ sg_entry.lkey = s_mr_hndl->lkey; /* Local memory region key */ sg_entry.length = p->bufflen; /* buffer's size */ sg_entry.addr = (uintptr_t)p->s_ptr; /* buffer's location */ /* Post the send request to the (send)command queue */ /* ibv_post_send(...) is handled in same fashion ibv_post_recv(..) */ ret = ibv_post_send(qp_hndl, &sr, &bad_wr); if(ret) { fprintf(stderr, "Error posting send request\n"); } else { LOGPRINTF(("Posted send request")); } }
static inline void uct_ud_verbs_iface_tx_data(uct_ud_verbs_iface_t *iface, uct_ud_verbs_ep_t *ep) { int UCS_V_UNUSED ret; struct ibv_send_wr *bad_wr; uct_ud_verbs_iface_fill_tx_wr(iface, ep, &iface->tx.wr_bcp, 0); UCT_UD_EP_HOOK_CALL_TX(&ep->super, (uct_ud_neth_t *)iface->tx.sge[0].addr); ret = ibv_post_send(iface->super.qp, &iface->tx.wr_bcp, &bad_wr); ucs_assertv(ret == 0, "ibv_post_send() returned %d (%m)", ret); uct_ib_log_post_send(iface->super.qp, &iface->tx.wr_bcp, NULL); }
static int __xfer_rdma_do_rdma(struct xfer_rdma_buf_handle_t **handles, int hcount, int opcode) { struct xfer_context *ctx = handles[0]->ctx; struct ibv_sge *sge; struct ibv_send_wr *wr; struct ibv_send_wr *curr_wr; struct ibv_send_wr *bad_wr; int i; int ret = 0; for (i=0; i < hcount; i++) { curr_wr = malloc(sizeof(struct ibv_send_wr)); sge = malloc(sizeof(struct ibv_sge)); sge->addr = (uintptr_t) handles[i]->buf; sge->length = handles[i]->local_size; sge->lkey = handles[i]->local_mr->lkey; curr_wr->wr.rdma.remote_addr = (uintptr_t) handles[i]->remote_mr->addr; curr_wr->wr.rdma.rkey = handles[i]->remote_mr->rkey; curr_wr->wr_id = handles[i]->id; curr_wr->sg_list = sge; curr_wr->num_sge = 1; curr_wr->opcode = opcode; curr_wr->send_flags = IBV_SEND_SIGNALED; curr_wr->imm_data = 0; if (i == 0) wr = curr_wr; if (i == hcount-1) curr_wr->next = NULL; else curr_wr = curr_wr->next; handles[i]->opcode = opcode; } if (ibv_post_send(ctx->qp, wr, &bad_wr)) { fprintf(stderr, "%d:%s: ibv_post_send failed\n", pid, __func__); perror("ibv_post_send"); ret = -1; } // free the wr for (i = 0; i < hcount; i++) { free(curr_wr->sg_list); free(curr_wr); } return ret; }
static inline void uct_ud_verbs_iface_tx_inl(uct_ud_verbs_iface_t *iface, uct_ud_verbs_ep_t *ep, const void *buffer, unsigned length) { int UCS_V_UNUSED ret; struct ibv_send_wr *bad_wr; iface->tx.sge[1].addr = (uintptr_t)buffer; iface->tx.sge[1].length = length; uct_ud_verbs_iface_fill_tx_wr(iface, ep, &iface->tx.wr_inl, IBV_SEND_INLINE); UCT_UD_EP_HOOK_CALL_TX(&ep->super, (uct_ud_neth_t *)iface->tx.sge[0].addr); ret = ibv_post_send(iface->super.qp, &iface->tx.wr_inl, &bad_wr); ucs_assertv(ret == 0, "ibv_post_send() returned %d (%m)", ret); uct_ib_log_post_send(iface->super.qp, &iface->tx.wr_inl, NULL); }
static int fio_rdmaio_connect(struct thread_data *td, struct fio_file *f) { struct rdmaio_data *rd = td->io_ops->data; struct rdma_conn_param conn_param; struct ibv_send_wr *bad_wr; memset(&conn_param, 0, sizeof(conn_param)); conn_param.responder_resources = 1; conn_param.initiator_depth = 1; conn_param.retry_count = 10; if (rdma_connect(rd->cm_id, &conn_param) != 0) { log_err("fio: rdma_connect fail\n"); return 1; } if (get_next_channel_event (td, rd->cm_channel, RDMA_CM_EVENT_ESTABLISHED) != 0) { log_err("fio: wait for RDMA_CM_EVENT_ESTABLISHED\n"); return 1; } /* send task request */ rd->send_buf.mode = htonl(rd->rdma_protocol); rd->send_buf.nr = htonl(td->o.iodepth); if (ibv_post_send(rd->qp, &rd->sq_wr, &bad_wr) != 0) { log_err("fio: ibv_post_send fail"); return 1; } if (rdma_poll_wait(td, IBV_WC_SEND) < 0) return 1; /* wait for remote MR info from server side */ if (rdma_poll_wait(td, IBV_WC_RECV) < 0) return 1; /* In SEND/RECV test, it's a good practice to setup the iodepth of * of the RECV side deeper than that of the SEND side to * avoid RNR (receiver not ready) error. The * SEND side may send so many unsolicited message before * RECV side commits sufficient recv buffers into recv queue. * This may lead to RNR error. Here, SEND side pauses for a while * during which RECV side commits sufficient recv buffers. */ usleep(500000); return 0; }
static int rdma_trans_send(Npfcall *fc, void *a) { int i, n; Rdmatrans *rdma; struct ibv_sge sge; struct ibv_send_wr wr, *bad_wr; Rdmactx *wctx; rdma = a; pthread_mutex_lock(&rdma->lock); again: for(i = 0, wctx = (Rdmactx *) rdma->snd_buf; i < rdma->q_depth; i++, wctx = (Rdmactx *) ((char *) wctx + rdma->msize)) if (!wctx->used) break; if (i >= rdma->q_depth) { /* wait for a slot */ pthread_cond_wait(&rdma->cond, &rdma->lock); goto again; } wctx->wc_op = IBV_WC_SEND; wctx->rdma = rdma; wctx->used = 1; wctx->len = fc->size; wctx->pos = 0; memmove(wctx->buf, fc->pkt, fc->size); pthread_mutex_unlock(&rdma->lock); sge.addr = (uintptr_t) wctx->buf; sge.length = fc->size; sge.lkey = rdma->snd_mr->lkey; wr.next = NULL; wr.wr_id = (u64)(unsigned long)wctx; wr.opcode = IBV_WR_SEND; wr.send_flags = IBV_SEND_SIGNALED; wr.sg_list = &sge; wr.num_sge = 1; n = ibv_post_send(rdma->qp, &wr, &bad_wr); if (n) { np_uerror(n); return -1; } return fc->size; }
gaspi_return_t pgaspi_dev_notify (const gaspi_segment_id_t segment_id_remote, const gaspi_rank_t rank, const gaspi_notification_id_t notification_id, const gaspi_notification_t notification_value, const gaspi_queue_id_t queue) { struct ibv_send_wr *bad_wr; struct ibv_sge slistN; struct ibv_send_wr swrN; slistN.addr = (uintptr_t) (glb_gaspi_ctx.nsrc.buf + notification_id * sizeof(gaspi_notification_t)); *((unsigned int *) slistN.addr) = notification_value; slistN.length = sizeof(gaspi_notification_t); slistN.lkey = ((struct ibv_mr *) glb_gaspi_ctx.nsrc.mr)->lkey; #ifdef GPI2_CUDA if( glb_gaspi_ctx.rrmd[segment_id_remote][rank].cudaDevId >= 0) { swrN.wr.rdma.remote_addr = (glb_gaspi_ctx.rrmd[segment_id_remote][rank].host_addr + notification_id * sizeof(gaspi_notification_t)); swrN.wr.rdma.rkey = glb_gaspi_ctx.rrmd[segment_id_remote][rank].host_rkey; } else #endif { swrN.wr.rdma.remote_addr = (glb_gaspi_ctx.rrmd[segment_id_remote][rank].addr + notification_id * sizeof(gaspi_notification_t)); swrN.wr.rdma.rkey = glb_gaspi_ctx.rrmd[segment_id_remote][rank].rkey; } swrN.sg_list = &slistN; swrN.num_sge = 1; swrN.wr_id = rank; swrN.opcode = IBV_WR_RDMA_WRITE; swrN.send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE; swrN.next = NULL; if (ibv_post_send (glb_gaspi_ctx_ib.qpC[queue][rank], &swrN, &bad_wr)) { glb_gaspi_ctx.qp_state_vec[queue][rank] = GASPI_STATE_CORRUPT; return GASPI_ERROR; } return GASPI_SUCCESS; }
static inline void uct_ud_verbs_ep_tx_skb(uct_ud_verbs_iface_t *iface, uct_ud_verbs_ep_t *ep, uct_ud_send_skb_t *skb, unsigned flags) { int UCS_V_UNUSED ret; struct ibv_send_wr *bad_wr; iface->tx.sge[0].lkey = skb->lkey; iface->tx.sge[0].length = skb->len; iface->tx.sge[0].addr = (uintptr_t)skb->neth; uct_ud_verbs_iface_fill_tx_wr(iface, ep, &iface->tx.wr_skb, flags); UCT_UD_EP_HOOK_CALL_TX(&ep->super, (uct_ud_neth_t *)iface->tx.sge[0].addr); ret = ibv_post_send(iface->super.qp, &iface->tx.wr_skb, &bad_wr); ucs_assertv(ret == 0, "ibv_post_send() returned %d (%m)", ret); uct_ib_log_post_send(&iface->super.super, iface->super.qp, &iface->tx.wr_skb, NULL); --iface->super.tx.available; }
static int send_qp_num_for_ah(struct pingpong_context *ctx, struct perftest_parameters *user_param) { struct ibv_send_wr wr; struct ibv_send_wr *bad_wr; struct ibv_sge list; struct ibv_wc wc; int ne; memcpy(ctx->buf,&ctx->qp[0]->qp_num,sizeof(uint32_t)); list.addr = (uintptr_t)ctx->buf; list.length = sizeof(uint32_t); list.lkey = ctx->mr->lkey; wr.wr_id = 0; wr.sg_list = &list; wr.num_sge = 1; wr.opcode = IBV_WR_SEND_WITH_IMM; wr.send_flags = IBV_SEND_SIGNALED; wr.next = NULL; wr.imm_data = htonl(ctx->qp[0]->qp_num); wr.wr.ud.ah = ctx->ah[0]; wr.wr.ud.remote_qpn = user_param->rem_ud_qpn; wr.wr.ud.remote_qkey = user_param->rem_ud_qkey; if (ibv_post_send(ctx->qp[0],&wr,&bad_wr)) { fprintf(stderr, "Function ibv_post_send failed\n"); return 1; } do { ne = ibv_poll_cq(ctx->send_cq, 1,&wc); } while (ne == 0); if (wc.status || wc.opcode != IBV_WC_SEND || wc.wr_id != 0) { fprintf(stderr, " Couldn't post send my QP number %d\n",(int)wc.status); return 1; } return 0; }