static int ucma_init_ud_qp3(struct cma_id_private *id_priv, struct ibv_qp *qp) { struct ibv_qp_attr qp_attr; int ret; ret = ucma_find_pkey(id_priv->cma_dev, id_priv->id.port_num, id_priv->id.route.addr.addr.ibaddr.pkey, &qp_attr.pkey_index); if (ret) return ret; qp_attr.port_num = id_priv->id.port_num; qp_attr.qp_state = IBV_QPS_INIT; qp_attr.qkey = RDMA_UDP_QKEY; ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_QKEY | IBV_QP_PKEY_INDEX | IBV_QP_PORT); if (ret) return ret; qp_attr.qp_state = IBV_QPS_RTR; ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE); if (ret) return ret; qp_attr.qp_state = IBV_QPS_RTS; qp_attr.sq_psn = 0; return ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_SQ_PSN); }
// Transition all UD QPs to RTS int modify_dgram_qp_to_rts(struct ctrl_blk *ctx) { int i; for(i = 0; i < ctx->num_local_dgram_qps; i++) { struct ibv_qp_attr dgram_attr = { .qp_state = IBV_QPS_RTR, }; if (ibv_modify_qp(ctx->dgram_qp[i], &dgram_attr, IBV_QP_STATE)) { fprintf(stderr, "Failed to modify dgram QP to RTR\n"); return 1; } dgram_attr.qp_state = IBV_QPS_RTS; dgram_attr.sq_psn = ctx->local_dgram_qp_attrs[i].psn; if(ibv_modify_qp(ctx->dgram_qp[i], &dgram_attr, IBV_QP_STATE|IBV_QP_SQ_PSN)) { fprintf(stderr, "Failed to modify dgram QP to RTS\n"); return 1; } } return 0; }
static int ucma_init_ud_qp(struct cma_id_private *id_priv, struct ibv_qp *qp) { struct ibv_qp_attr qp_attr; int qp_attr_mask, ret; if (abi_ver == 3) return ucma_init_ud_qp3(id_priv, qp); qp_attr.qp_state = IBV_QPS_INIT; ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) return ret; ret = ibv_modify_qp(qp, &qp_attr, qp_attr_mask); if (ret) return ret; qp_attr.qp_state = IBV_QPS_RTR; ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE); if (ret) return ret; qp_attr.qp_state = IBV_QPS_RTS; qp_attr.sq_psn = 0; return ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_SQ_PSN); }
static int ucma_modify_qp_rtr(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) { struct ibv_qp_attr qp_attr; int qp_attr_mask, ret; if (!id->qp) return ERR(EINVAL); /* Need to update QP attributes from default values. */ qp_attr.qp_state = IBV_QPS_INIT; ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); if (ret) return ret; ret = ibv_modify_qp(id->qp, &qp_attr, qp_attr_mask); if (ret) return ret; qp_attr.qp_state = IBV_QPS_RTR; ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); if (ret) return ret; if (conn_param) qp_attr.max_dest_rd_atomic = conn_param->responder_resources; return ibv_modify_qp(id->qp, &qp_attr, qp_attr_mask); }
// Transition connected QP indexed qp_i through RTR and RTS stages int connect_ctx(struct ctrl_blk *ctx, int my_psn, struct qp_attr dest, int qp_i) { struct ibv_qp_attr conn_attr = { .qp_state = IBV_QPS_RTR, .path_mtu = IBV_MTU_4096, .dest_qp_num = dest.qpn, .rq_psn = dest.psn, .ah_attr = { .is_global = (is_roce() == 1) ? 1 : 0, .dlid = (is_roce() == 1) ? 0 : dest.lid, .sl = 0, .src_path_bits = 0, .port_num = IB_PHYS_PORT } }; if(is_roce()) { conn_attr.ah_attr.grh.dgid.global.interface_id = dest.gid_global_interface_id; conn_attr.ah_attr.grh.dgid.global.subnet_prefix = dest.gid_global_subnet_prefix; conn_attr.ah_attr.grh.sgid_index = 0; conn_attr.ah_attr.grh.hop_limit = 1; } int rtr_flags = IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN; if(!USE_UC) { conn_attr.max_dest_rd_atomic = 16; conn_attr.min_rnr_timer = 12; rtr_flags |= IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER; } if (ibv_modify_qp(ctx->conn_qp[qp_i], &conn_attr, rtr_flags)) { fprintf(stderr, "Failed to modify QP to RTR\n"); return 1; } memset(&conn_attr, 0, sizeof(conn_attr)); conn_attr.qp_state = IBV_QPS_RTS; conn_attr.sq_psn = my_psn; int rts_flags = IBV_QP_STATE | IBV_QP_SQ_PSN; if(!USE_UC) { conn_attr.timeout = 14; conn_attr.retry_cnt = 7; conn_attr.rnr_retry = 7; conn_attr.max_rd_atomic = 16; rts_flags |= IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC; } if (ibv_modify_qp(ctx->conn_qp[qp_i], &conn_attr, rts_flags)) { fprintf(stderr, "Failed to modify QP to RTS\n"); return 1; } return 0; }
static int pp_connect_ctx(struct pingpong_context *ctx,int my_psn, struct pingpong_dest *dest,int my_reads, struct perftest_parameters *user_parm) { struct ibv_qp_attr attr; memset(&attr, 0, sizeof(struct ibv_qp_attr)); attr.qp_state = IBV_QPS_RTR; attr.path_mtu = user_parm->curr_mtu; attr.dest_qp_num = dest->qpn; attr.rq_psn = dest->psn; attr.ah_attr.dlid = dest->lid; attr.max_dest_rd_atomic = my_reads; attr.min_rnr_timer = 12; if (user_parm->gid_index < 0) { attr.ah_attr.is_global = 0; attr.ah_attr.sl = user_parm->sl; } else { attr.ah_attr.is_global = 1; attr.ah_attr.grh.dgid = dest->gid; attr.ah_attr.grh.sgid_index = user_parm->gid_index; attr.ah_attr.grh.hop_limit = 1; attr.ah_attr.sl = 0; } attr.ah_attr.src_path_bits = 0; attr.ah_attr.port_num = user_parm->ib_port; if (ibv_modify_qp(ctx->qp[0], &attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MIN_RNR_TIMER | IBV_QP_MAX_DEST_RD_ATOMIC)) { fprintf(stderr, "Failed to modify RC QP to RTR\n"); return 1; } attr.timeout = user_parm->qp_timeout; attr.retry_cnt = 7; attr.rnr_retry = 7; attr.qp_state = IBV_QPS_RTS; attr.max_rd_atomic = dest->out_reads; attr.sq_psn = my_psn; if (ibv_modify_qp(ctx->qp[0], &attr, IBV_QP_STATE | IBV_QP_SQ_PSN | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC)) { fprintf(stderr, "Failed to modify RC QP to RTS\n"); return 1; } return 0; }
static int pp_connect_ctx(struct pingpong_context *ctx, struct ibv_qp *qp, int port, int my_psn, enum ibv_mtu mtu, int sl, struct pingpong_dest *dest) { struct ibv_qp_attr attr = { .qp_state = IBV_QPS_RTR, .path_mtu = mtu, .dest_qp_num = dest->qpn, .rq_psn = dest->psn, .max_dest_rd_atomic = 1, .min_rnr_timer = 12, .ah_attr = { .is_global = 0, .dlid = dest->lid, .sl = sl, .src_path_bits = 0, .port_num = port } }; if (ibv_modify_qp(qp, &attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER)) { fprintf(stderr, "Failed to modify QP to RTR\n"); return 1; } attr.qp_state = IBV_QPS_RTS; attr.timeout = 14; attr.retry_cnt = 7; attr.rnr_retry = 7; attr.sq_psn = my_psn; attr.max_rd_atomic = 1; if (ibv_modify_qp(qp, &attr, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC)) { fprintf(stderr, "Failed to modify QP to RTS\n"); return 1; } return 0; }
int connect_qp(void) { struct ibv_qp_attr attr; int i; struct ibv_qp_init_attr init_attr; enum ibv_qp_attr_mask attr_mask; memset(&attr, 0 , sizeof attr); for(i = 0; i < nprocs; i++){ attr.qp_state = IBV_QPS_RTR; attr.path_mtu = IBV_MTU_2048; attr.dest_qp_num = rbuf.qp_num[i]; attr.rq_psn = 0; attr.max_dest_rd_atomic = 10; attr.min_rnr_timer = 20; attr.ah_attr.is_global = 0; attr.ah_attr.dlid = rbuf.lid[i]; attr.ah_attr.sl = 0; attr.ah_attr.src_path_bits = 0; attr.ah_attr.port_num = 1; attr.ah_attr.static_rate = 0; if (ibv_modify_qp(conn.qp[i], &attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER)) { fprintf(stderr, "Failed to modify QP to RTR\n"); return 1; } attr.qp_state = IBV_QPS_RTS; attr.timeout = 14; attr.retry_cnt = 7; attr.rnr_retry = 7; attr.sq_psn = 0; attr.max_rd_atomic = 0; if (ibv_modify_qp(conn.qp[i], &attr, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC)) { fprintf(stderr, "Failed to modify QP to RTS\n"); return 1; } } return 0; }
static void rdma_trans_destroy(void *a) { Rdmatrans *rdma; struct ibv_qp_attr attr; rdma = a; if (rdma->connected) rdma_disconnect(rdma->cm_id); if (rdma->qp) { attr.qp_state = IBV_QPS_ERR; ibv_modify_qp(rdma->qp, &attr, IBV_QP_STATE); ibv_destroy_qp(rdma->qp); } if (rdma->cq) ibv_destroy_cq(rdma->cq); if (rdma->ch) ibv_destroy_comp_channel(rdma->ch); if (rdma->snd_mr) ibv_dereg_mr(rdma->snd_mr); if (rdma->snd_buf) free(rdma->snd_buf); if (rdma->rcv_mr) ibv_dereg_mr(rdma->rcv_mr); if (rdma->rcv_buf) free(rdma->rcv_buf); if (rdma->pd) ibv_dealloc_pd(rdma->pd); if (rdma->cm_id) rdma_destroy_id(rdma->cm_id); }
static int modify_qp_to_rts(struct ibv_qp *qp) { struct ibv_qp_attr attr; int flags; int rc; /* do the following QP transition: RTR -> RTS */ memset(&attr, 0, sizeof(attr)); attr.qp_state = IBV_QPS_RTS; attr.timeout = 0x12; attr.retry_cnt = 6; attr.rnr_retry = 0; attr.sq_psn = 0; attr.max_rd_atomic = 0; flags = IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC; rc = ibv_modify_qp(qp, &attr, flags); if (rc) { fprintf(stderr, "failed to modify QP state to RTS\n"); return rc; } return 0; }
std::unique_ptr<Socket> Acceptor::accept() { rdma_cm_id* new_cm_id; if (rdma_get_request(m_cm_id, &new_cm_id)) { throw exception::acceptor::generic_error( "Error on rdma_get_request: " + std::string(strerror(errno))); } rdma_conn_param conn_param; memset(&conn_param, 0, sizeof(rdma_conn_param)); conn_param.rnr_retry_count = m_rnr_retry_count; if (rdma_accept(new_cm_id, &conn_param)) { rdma_destroy_ep(new_cm_id); throw exception::acceptor::generic_error( "Error on rdma_accept: " + std::string(strerror(errno))); } ibv_qp_attr attr; memset(&attr, 0, sizeof(ibv_qp_attr)); attr.min_rnr_timer = m_min_rtr_timer; int flags = IBV_QP_MIN_RNR_TIMER; if (ibv_modify_qp(new_cm_id->qp, &attr, flags)) { rdma_destroy_ep(new_cm_id); throw exception::acceptor::generic_error( "Error on ibv_modify_qp: " + std::string(strerror(errno))); } std::unique_ptr<Socket> socket_ptr(new Socket(new_cm_id, m_credits)); return socket_ptr; }
RDMAChannel::RDMAChannel(const RDMAAdapter& adapter) : adapter_(adapter), buffers_(), memory_regions_(MAX_BUFFERS), region_regions_(MAX_BUFFERS), memory_regions_received_() { // Create write completion queue write_cq_ = ibv_create_cq(adapter_.context_, 1, NULL, NULL, 0); CHECK(write_cq_) << "Failed to create completion queue"; // Create queue pair { struct ibv_qp_init_attr attr; caffe_memset(sizeof(ibv_qp_init_attr), 0, &attr); attr.send_cq = write_cq_; attr.recv_cq = adapter.cq_; attr.cap.max_send_wr = RDMAAdapter::MAX_CONCURRENT_WRITES; attr.cap.max_recv_wr = RDMAAdapter::MAX_CONCURRENT_WRITES; attr.cap.max_send_sge = 1; attr.cap.max_recv_sge = 1; attr.qp_type = IBV_QPT_RC; qp_ = ibv_create_qp(adapter.pd_, &attr); CHECK(qp_) << "Failed to create queue pair"; } // Init queue pair { struct ibv_qp_attr attr; caffe_memset(sizeof(ibv_qp_attr), 0, &attr); attr.qp_state = IBV_QPS_INIT; attr.pkey_index = 0; attr.port_num = 1; attr.qp_access_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE; int mask = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS; CHECK(!ibv_modify_qp(qp_, &attr, mask)) << "Failed to set QP to INIT"; } // Local address { struct ibv_port_attr attr; CHECK(!ibv_query_port(adapter.context_, (uint8_t) 1, &attr)) << "Query port"; self_.lid = attr.lid; self_.qpn = qp_->qp_num; self_.psn = caffe_rng_rand() & 0xffffff; } for (int i = 0; i < MAX_BUFFERS; ++i) { RecvMR(i); } // Create initial recv request for data. recv(); // Create initial recv request for ctrl signals. recv(); }
static int modify_qp_to_init(struct ibv_qp *qp) { struct ibv_qp_attr attr; int flags; int rc; /* do the following QP transition: RESET -> INIT */ memset(&attr, 0, sizeof(attr)); attr.qp_state = IBV_QPS_INIT; attr.port_num = config.ib_port; attr.pkey_index = 0; /* we don't do any RDMA operation, so remote operation is not permitted */ attr.qp_access_flags = 0; flags = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS; rc = ibv_modify_qp(qp, &attr, flags); if (rc) { fprintf(stderr, "failed to modify QP state to INIT\n"); return rc; } return 0; }
static int modify_qp_to_rtr( struct ibv_qp *qp, uint32_t remote_qpn, uint16_t dlid) { struct ibv_qp_attr attr; int flags; int rc; /* do the following QP transition: INIT -> RTR */ memset(&attr, 0, sizeof(attr)); attr.qp_state = IBV_QPS_RTR; attr.path_mtu = IBV_MTU_256; attr.dest_qp_num = remote_qpn; attr.rq_psn = 0; attr.max_dest_rd_atomic = 0; attr.min_rnr_timer = 0x12; attr.ah_attr.is_global = 0; attr.ah_attr.dlid = dlid; attr.ah_attr.sl = 0; attr.ah_attr.src_path_bits = 0; attr.ah_attr.port_num = config.ib_port; flags = IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER; rc = ibv_modify_qp(qp, &attr, flags); if (rc) { fprintf(stderr, "failed to modify QP state to RTR\n"); return rc; } return 0; }
void RDMAChannel::Connect(const string& address) { Address peer; uint8_t* bytes = reinterpret_cast<uint8_t*>(&peer); size_t size = sizeof(Address); CHECK_EQ(address.size(), size * 2); hex(address, bytes); struct ibv_qp_attr attr; caffe_memset(sizeof(ibv_qp_attr), 0, &attr); attr.qp_state = IBV_QPS_RTR; attr.path_mtu = IBV_MTU_4096; attr.dest_qp_num = peer.qpn; attr.rq_psn = peer.psn; attr.max_dest_rd_atomic = 1; attr.min_rnr_timer = 12; attr.ah_attr.is_global = 0; attr.ah_attr.dlid = peer.lid; attr.ah_attr.sl = 0; attr.ah_attr.src_path_bits = 0; attr.ah_attr.port_num = 1; int r; CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER))) << "QP to Ready to Receive " << r; caffe_memset(sizeof(ibv_qp_attr), 0, &attr); attr.qp_state = IBV_QPS_RTS; attr.sq_psn = self_.psn; attr.timeout = 14; attr.retry_cnt = 7; attr.rnr_retry = 7; /* infinite */ attr.max_rd_atomic = 1; CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC))) << "QP to Ready to Send " << r; }
void MV_Transition_UD_QP(mv_qp_setup_information *si, struct ibv_qp * qp) { { struct ibv_qp_attr attr; memset(&attr, 0, sizeof(struct ibv_qp_attr)); attr.qp_state = IBV_QPS_INIT; attr.pkey_index = 0; attr.port_num = 1; attr.qkey = 0; if (ibv_modify_qp(qp, &attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_QKEY)) { error_abort_all(IBV_RETURN_ERR, "Failed to modify QP to INIT"); } } { struct ibv_qp_attr attr; memset(&attr, 0, sizeof(struct ibv_qp_attr)); attr.qp_state = IBV_QPS_RTR; if (ibv_modify_qp(qp, &attr, IBV_QP_STATE)) { error_abort_all(IBV_RETURN_ERR, "Failed to modify QP to RTR"); } } { struct ibv_qp_attr attr; memset(&attr, 0, sizeof(struct ibv_qp_attr)); attr.qp_state = IBV_QPS_RTS; attr.sq_psn = si->sq_psn; if (ibv_modify_qp(qp, &attr, IBV_QP_STATE | IBV_QP_SQ_PSN)) { error_abort_all(IBV_RETURN_ERR, "Failed to modify QP to RTS"); } } }
struct ibv_qp * MV_Create_RC_QP(mv_qp_setup_information *si) { struct ibv_qp * qp = NULL; /* create */ { struct ibv_qp_init_attr attr; memset(&attr, 0, sizeof(struct ibv_qp_init_attr)); attr.srq = si->srq; D_PRINT("SRQ at create qp: %p\n", attr.srq); attr.send_cq = si->send_cq; attr.recv_cq = si->recv_cq; attr.cap.max_send_wr = si->cap.max_send_wr; attr.cap.max_recv_wr = si->cap.max_recv_wr; attr.cap.max_send_sge = si->cap.max_send_sge; attr.cap.max_recv_sge = si->cap.max_recv_sge; attr.cap.max_inline_data = si->cap.max_inline_data; attr.qp_type = IBV_QPT_RC; qp = ibv_create_qp(si->pd, &attr); if (!qp) { error_abort_all(IBV_RETURN_ERR, "Couldn't create RC QP"); return NULL; } } /* init */ { struct ibv_qp_attr attr; memset(&attr, 0, sizeof(struct ibv_qp_attr)); attr.qp_state = IBV_QPS_INIT; attr.pkey_index = mvparams.pkey_ix; attr.port_num = mvparams.default_port; attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ; attr.pkey_index = 0; if(ibv_modify_qp(qp, &attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) { error_abort_all(IBV_RETURN_ERR, "Failed to modify RC QP to INIT"); return NULL; } } mvdev.rc_connections++; return qp; }
static int ucma_modify_qp_err(struct rdma_cm_id *id) { struct ibv_qp_attr qp_attr; if (!id->qp) return 0; qp_attr.qp_state = IBV_QPS_ERR; return ibv_modify_qp(id->qp, &qp_attr, IBV_QP_STATE); }
int create_qp(void) { struct ibv_qp_attr qp_attr; int i; memset(&qp_attr, 0, sizeof qp_attr); struct ibv_qp_init_attr attr; memset(&attr, 0, sizeof attr); attr.send_cq = hca.cq; attr.recv_cq = hca.cq; attr.cap.max_send_wr = opts.send_wr; attr.cap.max_recv_wr = opts.recv_wr; attr.cap.max_send_sge = opts.num_sge; attr.cap.max_recv_sge = opts.num_sge; attr.cap.max_inline_data = 1; attr.qp_type = IBV_QPT_RC; // Create a connection to yourself for(i = 0; i < nprocs; i++) { conn.qp[i] = ibv_create_qp(hca.pd, &attr); if(!conn.qp[i]) { fprintf(stderr,"Couldn't create QP\n"); return 1; } conn.qp_num[i] = conn.qp[i]->qp_num; qp_attr.qp_state = IBV_QPS_INIT; qp_attr.pkey_index = 0; qp_attr.port_num = 1; qp_attr.qp_access_flags = IBV_ACCESS_LOCAL_WRITE| IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_ATOMIC; if(ibv_modify_qp(conn.qp[i], &qp_attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) { fprintf(stderr,"Could not modify QP to INIT\n"); return 1; } #ifdef DEBUG fprintf(stdout,"[%d] Created QP %d, LID %d\n", me, conn.qp_num[i], conn.lid[i]); fflush(stdout); #endif } return 0; }
static int ucma_modify_qp_rts(struct rdma_cm_id *id) { struct ibv_qp_attr qp_attr; int qp_attr_mask, ret; qp_attr.qp_state = IBV_QPS_RTS; ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); if (ret) return ret; return ibv_modify_qp(id->qp, &qp_attr, qp_attr_mask); }
void mvdev_flush_qp(mv_qp_pool_entry *rqp, int num_to_flush) { struct ibv_qp_attr qp_attr; struct ibv_wc wc; int ne; memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.qp_state = IBV_QPS_ERR; /* need to transition to the error state so we can flush * all the posted buffers */ if(ibv_modify_qp(rqp->ud_qp, &qp_attr, IBV_QP_STATE)) { error_abort_all(IBV_RETURN_ERR, "Error changing to the err state\n"); } /* pull failed completions */ { int total_pulled = 0; do { ne = ibv_poll_cq(rqp->ud_cq, 1, &wc); total_pulled += ne; } while(total_pulled < num_to_flush); } { struct ibv_qp_attr attr; memset(&attr, 0, sizeof(struct ibv_qp_attr)); attr.qp_state = IBV_QPS_RESET; if (ibv_modify_qp(rqp->ud_qp, &attr, IBV_QP_STATE)) { error_abort_all(IBV_RETURN_ERR, "Failed to modify QP to RESET"); } } /* now we need to re-transition it back to the RTS phase */ MV_Transition_UD_QP(&mvdev.rndv_si, rqp->ud_qp); }
int mv2_ud_qp_transition(struct ibv_qp *qp) { struct ibv_qp_attr attr; memset(&attr, 0, sizeof(struct ibv_qp_attr)); attr.qp_state = IBV_QPS_INIT; attr.pkey_index = 0; attr.port_num = 1; attr.qkey = 0; if (ibv_modify_qp(qp, &attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_QKEY)) { fprintf(stderr,"Failed to modify QP to INIT\n"); return 1; } memset(&attr, 0, sizeof(struct ibv_qp_attr)); attr.qp_state = IBV_QPS_RTR; if (ibv_modify_qp(qp, &attr, IBV_QP_STATE)) { fprintf(stderr, "Failed to modify QP to RTR\n"); return 1; } memset(&attr, 0, sizeof(struct ibv_qp_attr)); attr.qp_state = IBV_QPS_RTS; attr.sq_psn = rdma_default_psn; if (ibv_modify_qp(qp, &attr, IBV_QP_STATE | IBV_QP_SQ_PSN)) { fprintf(stderr, "Failed to modify QP to RTS\n"); return 1; } return 0; }
static int ucma_init_conn_qp(struct cma_id_private *id_priv, struct ibv_qp *qp) { struct ibv_qp_attr qp_attr; int qp_attr_mask, ret; if (abi_ver == 3) return ucma_init_conn_qp3(id_priv, qp); qp_attr.qp_state = IBV_QPS_INIT; ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) return ret; return ibv_modify_qp(qp, &qp_attr, qp_attr_mask); }
static int ucma_init_conn_qp3(struct cma_id_private *id_priv, struct ibv_qp *qp) { struct ibv_qp_attr qp_attr; int ret; ret = ucma_find_pkey(id_priv->cma_dev, id_priv->id.port_num, id_priv->id.route.addr.addr.ibaddr.pkey, &qp_attr.pkey_index); if (ret) return ret; qp_attr.port_num = id_priv->id.port_num; qp_attr.qp_state = IBV_QPS_INIT; qp_attr.qp_access_flags = 0; return ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_ACCESS_FLAGS | IBV_QP_PKEY_INDEX | IBV_QP_PORT); }
/* * move_to_rtr */ static int move_to_rtr(struct ibv_qp *qp, unsigned int port_num, uint16_t remote_lid, /* remote peer's LID */ uint32_t remote_qpn) /* remote peer's QPN */ { struct ibv_qp_attr attr = { .qp_state = IBV_QPS_RTR, .path_mtu = psoib_path_mtu, .dest_qp_num = remote_qpn, .rq_psn = 0, /* Packet sequence number */ .max_dest_rd_atomic = 1, /* Maximum number of oust. RDMA read/atomic as target */ .min_rnr_timer = 12, /* Minimum RNR NAK timer (old = 0) */ .ah_attr = { .is_global = 0, /* old av.grh_flag ? */ .dlid = remote_lid, .sl = 0, /* Service level bits ??? */ .src_path_bits = 0, .port_num = port_num } }; if (ibv_modify_qp(qp, &attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER)) goto err_ibv_modify_qp; return 0; /* --- */ err_ibv_modify_qp: psoib_err_errno("ibv_modify_qp() move to RTR failed", errno); return -1; }
/* Load new dlid to the QP */ void mca_btl_openib_load_apm(struct ibv_qp *qp, mca_btl_openib_endpoint_t *ep) { struct ibv_qp_init_attr qp_init_attr; struct ibv_qp_attr attr; enum ibv_qp_attr_mask mask = 0; struct mca_btl_openib_module_t *btl; BTL_VERBOSE(("APM: Loading alternative path")); assert (NULL != ep); btl = ep->endpoint_btl; if (ibv_query_qp(qp, &attr, mask, &qp_init_attr)) BTL_ERROR(("Failed to ibv_query_qp, qp num: %d", qp->qp_num)); if (mca_btl_openib_component.apm_lmc && attr.ah_attr.src_path_bits - btl->src_path_bits < mca_btl_openib_component.apm_lmc) { BTL_VERBOSE(("APM LMC: src: %d btl_src: %d lmc_max: %d", attr.ah_attr.src_path_bits, btl->src_path_bits, mca_btl_openib_component.apm_lmc)); apm_update_attr(&attr, &mask); } else { if (mca_btl_openib_component.apm_ports) { /* Try to migrate to next port */ if (OPAL_SUCCESS != apm_update_port(ep, &attr, &mask)) return; } else { BTL_ERROR(("Failed to load alternative path, all %d were used", attr.ah_attr.src_path_bits - btl->src_path_bits)); } } if (ibv_modify_qp(qp, &attr, mask)) BTL_ERROR(("Failed to ibv_query_qp, qp num: %d, errno says: %s (%d)", qp->qp_num, strerror(errno), errno)); }
int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp, uint8_t qp_type, uint32_t qkey) { struct ibv_qp_attr attr = {}; int rc, attr_mask; attr_mask = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT; attr.qp_state = IBV_QPS_INIT; attr.pkey_index = 0; attr.port_num = backend_dev->port_num; switch (qp_type) { case IBV_QPT_RC: attr_mask |= IBV_QP_ACCESS_FLAGS; trace_rdma_backend_rc_qp_state_init(qp->ibqp->qp_num); break; case IBV_QPT_UD: attr.qkey = qkey; attr_mask |= IBV_QP_QKEY; trace_rdma_backend_ud_qp_state_init(qp->ibqp->qp_num, qkey); break; default: rdma_error_report("Unsupported QP type %d", qp_type); return -EIO; } rc = ibv_modify_qp(qp->ibqp, &attr, attr_mask); if (rc) { rdma_error_report("ibv_modify_qp fail, rc=%d, errno=%d", rc, errno); return -EIO; } return 0; }
void on_completion(struct ibv_wc *wc) { struct connection *conn = (struct connection *)(uintptr_t)wc->wr_id; if (wc->status != IBV_WC_SUCCESS) die("on_completion: status is not IBV_WC_SUCCESS."); if (wc->opcode & IBV_WC_RECV) { conn->recv_state++; if (conn->recv_msg->type == MSG_MR) { memcpy(&conn->peer_mr, &conn->recv_msg->data.mr, sizeof(conn->peer_mr)); post_receives(conn); /* only rearm for MSG_MR */ if (conn->send_state == SS_INIT) /* received peer's MR before sending ours, so send ours back */ send_mr(conn); } } else { conn->send_state++; printf("send completed successfully.\n"); } if (conn->send_state == SS_MR_SENT && conn->recv_state == RS_MR_RECV) { struct ibv_send_wr wr, *bad_wr = NULL; struct ibv_sge sge; if (s_mode == M_WRITE) printf("received MSG_MR. writing message to remote memory...\n"); else printf("received MSG_MR. reading message from remote memory...\n"); memset(&wr, 0, sizeof(wr)); wr.wr_id = (uintptr_t)conn; wr.opcode = (s_mode == M_WRITE) ? IBV_WR_RDMA_WRITE : IBV_WR_RDMA_READ; wr.sg_list = &sge; wr.num_sge = 1; wr.send_flags = IBV_SEND_SIGNALED; wr.wr.rdma.remote_addr = (uintptr_t)conn->peer_mr.addr; wr.wr.rdma.rkey = conn->peer_mr.rkey; sge.addr = (uintptr_t)conn->rdma_local_region; sge.length = RDMA_BUFFER_SIZE; sge.lkey = conn->rdma_local_mr->lkey; /* CODE TO MESS UP PSN */ srand48(getpid()); struct ibv_qp_attr attr; attr.sq_psn = lrand48() & 0xffffff; attr.rq_psn = lrand48() & 0xffffff; if (ibv_modify_qp(conn->qp, &attr, IBV_QP_RQ_PSN | IBV_QP_SQ_PSN)) { fprintf(stderr, "Failed to set the PSN."); return; } /* END CODE TO MESS UP PSN */ TEST_NZ(ibv_post_send(conn->qp, &wr, &bad_wr)); conn->send_msg->type = MSG_DONE; send_message(conn); } else if (conn->send_state == SS_DONE_SENT && conn->recv_state == RS_DONE_RECV) { printf("remote buffer: %s\n", get_peer_message_region(conn)); rdma_disconnect(conn->id); } }
/* Send qp connect */ static int xoob_send_qp_connect(mca_btl_openib_endpoint_t *endpoint, mca_btl_openib_rem_info_t *rem_info) { struct ibv_qp* qp; struct ibv_qp_attr attr; uint32_t psn; int ret; mca_btl_openib_module_t* openib_btl = (mca_btl_openib_module_t*)endpoint->endpoint_btl; BTL_VERBOSE(("Connecting Send QP\n")); assert(NULL != endpoint->qps); qp = endpoint->qps[0].qp->lcl_qp; psn = endpoint->qps[0].qp->lcl_psn; memset(&attr, 0, sizeof(attr)); attr.qp_state = IBV_QPS_RTR; attr.path_mtu = (openib_btl->device->mtu < endpoint->rem_info.rem_mtu) ? openib_btl->device->mtu : rem_info->rem_mtu; attr.dest_qp_num = rem_info->rem_qps->rem_qp_num; attr.rq_psn = rem_info->rem_qps->rem_psn; attr.max_dest_rd_atomic = mca_btl_openib_component.ib_max_rdma_dst_ops; attr.min_rnr_timer = mca_btl_openib_component.ib_min_rnr_timer; attr.ah_attr.is_global = 0; attr.ah_attr.dlid = rem_info->rem_lid; attr.ah_attr.src_path_bits = openib_btl->src_path_bits; attr.ah_attr.port_num = openib_btl->port_num; attr.ah_attr.static_rate = 0; attr.ah_attr.sl = mca_btl_openib_component.ib_service_level; #if (ENABLE_DYNAMIC_SL) /* if user enabled dynamic SL, get it from PathRecord */ if (0 != mca_btl_openib_component.ib_path_record_service_level) { int rc = btl_openib_connect_get_pathrecord_sl(qp->context, attr.ah_attr.port_num, openib_btl->lid, attr.ah_attr.dlid); if (OMPI_ERROR == rc) { return OMPI_ERROR; } attr.ah_attr.sl = rc; } #endif if (mca_btl_openib_component.verbose) { BTL_VERBOSE(("Set MTU to IBV value %d (%s bytes)", attr.path_mtu, (attr.path_mtu == IBV_MTU_256) ? "256" : (attr.path_mtu == IBV_MTU_512) ? "512" : (attr.path_mtu == IBV_MTU_1024) ? "1024" : (attr.path_mtu == IBV_MTU_2048) ? "2048" : (attr.path_mtu == IBV_MTU_4096) ? "4096" : "unknown (!)")); } ret = ibv_modify_qp(qp, &attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER); if (ret) { BTL_ERROR(("Error modifying QP[%x] to IBV_QPS_RTR errno says: %s [%d]", qp->qp_num, strerror(ret), ret)); return OMPI_ERROR; } attr.qp_state = IBV_QPS_RTS; attr.timeout = mca_btl_openib_component.ib_timeout; attr.retry_cnt = mca_btl_openib_component.ib_retry_count; attr.rnr_retry = mca_btl_openib_component.ib_rnr_retry; attr.sq_psn = psn; attr.max_rd_atomic = mca_btl_openib_component.ib_max_rdma_dst_ops; ret = ibv_modify_qp(qp, &attr, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC); if (ret) { BTL_ERROR(("Error modifying QP[%x] to IBV_QPS_RTS errno says: %s [%d]", qp->qp_num, strerror(ret), ret)); return OMPI_ERROR; } return OMPI_SUCCESS; }
/* Create XRC send qp */ static int xoob_send_qp_create (mca_btl_base_endpoint_t* endpoint) { int prio = BTL_OPENIB_LP_CQ; /* all send completions go to low prio CQ */ uint32_t send_wr; struct ibv_qp **qp; uint32_t *psn; struct ibv_qp_init_attr qp_init_attr; struct ibv_qp_attr attr; int ret; size_t req_inline; mca_btl_openib_module_t *openib_btl = (mca_btl_openib_module_t*)endpoint->endpoint_btl; /* Prepare QP structs */ BTL_VERBOSE(("Creating Send QP\n")); qp = &endpoint->qps[0].qp->lcl_qp; psn = &endpoint->qps[0].qp->lcl_psn; /* reserve additional wr for eager rdma credit management */ send_wr = endpoint->ib_addr->qp->sd_wqe + (mca_btl_openib_component.use_eager_rdma ? mca_btl_openib_component.max_eager_rdma : 0); memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr)); memset(&attr, 0, sizeof(struct ibv_qp_attr)); qp_init_attr.send_cq = qp_init_attr.recv_cq = openib_btl->device->ib_cq[prio]; /* no need recv queue; receives are posted to srq */ qp_init_attr.cap.max_recv_wr = 0; qp_init_attr.cap.max_send_wr = send_wr; qp_init_attr.cap.max_inline_data = req_inline = openib_btl->device->max_inline_data; qp_init_attr.cap.max_send_sge = 1; /* this one is ignored by driver */ qp_init_attr.cap.max_recv_sge = 1; /* we do not use SG list */ qp_init_attr.qp_type = IBV_QPT_XRC; qp_init_attr.xrc_domain = openib_btl->device->xrc_domain; *qp = ibv_create_qp(openib_btl->device->ib_pd, &qp_init_attr); if (NULL == *qp) { opal_show_help("help-mpi-btl-openib-cpc-base.txt", "ibv_create_qp failed", true, ompi_process_info.nodename, ibv_get_device_name(openib_btl->device->ib_dev), "Reliable connected (XRC)"); return OMPI_ERROR; } if (qp_init_attr.cap.max_inline_data < req_inline) { endpoint->qps[0].ib_inline_max = qp_init_attr.cap.max_inline_data; opal_show_help("help-mpi-btl-openib-cpc-base.txt", "inline truncated", ompi_process_info.nodename, ibv_get_device_name(openib_btl->device->ib_dev), openib_btl->port_num, req_inline, qp_init_attr.cap.max_inline_data); } else { endpoint->qps[0].ib_inline_max = req_inline; } attr.qp_state = IBV_QPS_INIT; attr.pkey_index = openib_btl->pkey_index; attr.port_num = openib_btl->port_num; attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ; ret = ibv_modify_qp(*qp, &attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS ); if (ret) { BTL_ERROR(("Error modifying QP[%x] to IBV_QPS_INIT errno says: %s [%d]", (*qp)->qp_num, strerror(ret), ret)); return OMPI_ERROR; } /* Setup meta data on the endpoint */ *psn = lrand48() & 0xffffff; /* Now that all the qp's are created locally, post some receive buffers, setup credits, etc. */ return mca_btl_openib_endpoint_post_recvs(endpoint); }