static int set_up_connection(struct pingpong_context *ctx, struct perftest_parameters *user_parm, struct pingpong_dest *my_dest) { int use_i = user_parm->gid_index; int port = user_parm->ib_port; if (use_i != -1) { if (ibv_query_gid(ctx->context,port,use_i,&my_dest->gid)) { return -1; } } my_dest->lid = ctx_get_local_lid(ctx->context,user_parm->ib_port); my_dest->out_reads = ctx_set_out_reads(ctx->context,user_parm->out_reads); my_dest->qpn = ctx->qp->qp_num; my_dest->psn = lrand48() & 0xffffff; my_dest->rkey = ctx->mr->rkey; my_dest->vaddr = (uintptr_t)ctx->buf; // We do not fail test upon lid in RDMAoE/Eth conf. if (use_i < 0) { if (!my_dest->lid) { fprintf(stderr," Local lid 0x0 detected. Is an SM running? \n"); fprintf(stderr," If you're running RMDAoE you must use GIDs\n"); return -1; } } return 0; }
static int set_mcast_group(struct pingpong_context *ctx, struct perftest_parameters *user_param, struct mcast_parameters *mcg_params) { struct ibv_port_attr port_attr; if (ibv_query_gid(ctx->context,user_param->ib_port,user_param->gid_index,&mcg_params->port_gid)) { return 1; } if (ibv_query_pkey(ctx->context,user_param->ib_port,DEF_PKEY_IDX,&mcg_params->pkey)) { return 1; } if (ibv_query_port(ctx->context,user_param->ib_port,&port_attr)) { return 1; } mcg_params->sm_lid = port_attr.sm_lid; mcg_params->sm_sl = port_attr.sm_sl; mcg_params->ib_port = user_param->ib_port; if (!strcmp(link_layer_str(user_param->link_type),"IB")) { /* Request for Mcast group create registery in SM. */ if (join_multicast_group(SUBN_ADM_METHOD_SET,mcg_params)) { fprintf(stderr,"Couldn't Register the Mcast group on the SM\n"); return 1; } } return 0; }
static int set_up_connection(struct pingpong_context *ctx, struct perftest_parameters *user_parm, struct pingpong_dest *my_dest) { int i; union ibv_gid temp_gid; if (user_parm->gid_index != -1) { if (ibv_query_gid(ctx->context,user_parm->ib_port,user_parm->gid_index,&temp_gid)) { return -1; } } for (i=0; i < user_parm->num_of_qps; i++) { my_dest[i].lid = ctx_get_local_lid(ctx->context,user_parm->ib_port); my_dest[i].qpn = ctx->qp[i]->qp_num; my_dest[i].psn = lrand48() & 0xffffff; my_dest[i].rkey = ctx->mr->rkey; // Each qp gives his receive buffer address . my_dest[i].vaddr = (uintptr_t)ctx->buf + (user_parm->num_of_qps + i)*BUFF_SIZE(ctx->size); memcpy(my_dest[i].gid.raw,temp_gid.raw ,16); // We do not fail test upon lid above RoCE. if (user_parm->gid_index < 0) { if (!my_dest[i].lid) { fprintf(stderr," Local lid 0x0 detected. Is an SM running? \n"); return -1; } } } return 0; }
static int ibv_find_gid_index(struct ibv_context *context, uint8_t port_num, union ibv_gid *gid) { union ibv_gid sgid; int i = 0, ret; do { ret = ibv_query_gid(context, port_num, i++, &sgid); } while (!ret && memcmp(&sgid, gid, sizeof *gid)); return ret ? ret : i - 1; }
static int ibv_find_gid_index(struct ibv_context *context, uint8_t port_num, union ibv_gid *gid) { union ibv_gid sgid; int i = 0, ret; fprintf(stderr, "%s:%s:%d\n", __func__, __FILE__, __LINE__); do { ret = ibv_query_gid(context, port_num, i++, &sgid); } while (!ret && memcmp(&sgid, gid, sizeof *gid)); return ret ? ret : i - 1; }
static int set_mcast_group(struct pingpong_context *ctx, struct perftest_parameters *user_param, struct mcast_parameters *mcg_params) { int i; struct ibv_port_attr port_attr; if (ibv_query_gid(ctx->context,user_param->ib_port,user_param->gid_index,&mcg_params->port_gid)) { return 1; } if (ibv_query_pkey(ctx->context,user_param->ib_port,DEF_PKEY_IDX,&mcg_params->pkey)) { return 1; } if (ibv_query_port(ctx->context,user_param->ib_port,&port_attr)) { return 1; } mcg_params->sm_lid = port_attr.sm_lid; mcg_params->sm_sl = port_attr.sm_sl; mcg_params->ib_port = user_param->ib_port; mcg_params->user_mgid = user_param->user_mgid; set_multicast_gid(mcg_params,ctx->qp[0]->qp_num,(int)user_param->machine); if (!strcmp(link_layer_str(user_param->link_type),"IB")) { // Request for Mcast group create registery in SM. if (join_multicast_group(SUBN_ADM_METHOD_SET,mcg_params)) { fprintf(stderr," Failed to Join Mcast request\n"); return 1; } } for (i=0; i < user_param->num_of_qps; i++) { if (ibv_attach_mcast(ctx->qp[i],&mcg_params->mgid,mcg_params->mlid)) { fprintf(stderr, "Couldn't attach QP to MultiCast group"); return 1; } } mcg_params->mcast_state |= MCAST_IS_ATTACHED; return 0; }
int rdma_backend_get_gid_index(RdmaBackendDev *backend_dev, union ibv_gid *gid) { union ibv_gid sgid; int ret; int i = 0; do { ret = ibv_query_gid(backend_dev->context, backend_dev->port_num, i, &sgid); i++; } while (!ret && (memcmp(&sgid, gid, sizeof(*gid)))); trace_rdma_backend_get_gid_index(be64_to_cpu(gid->global.subnet_prefix), be64_to_cpu(gid->global.interface_id), i - 1); return ret ? ret : i - 1; }
static int send_local_dest(int sockfd, int index) { char msg[MSG_SIZE]; char gid[33]; uint32_t srq_num; union ibv_gid local_gid; if (ctx.gidx >= 0) { if (ibv_query_gid(ctx.context, ctx.ib_port, ctx.gidx, &local_gid)) { fprintf(stderr, "can't read sgid of index %d\n", ctx.gidx); return -1; } } else { memset(&local_gid, 0, sizeof(local_gid)); } ctx.rem_dest[index].recv_psn = lrand48() & 0xffffff; if (ibv_get_srq_num(ctx.srq, &srq_num)) { fprintf(stderr, "Couldn't get SRQ num\n"); return -1; } inet_ntop(AF_INET6, &local_gid, gid, sizeof(gid)); printf(ADDR_FORMAT, "local", ctx.lid, ctx.recv_qp[index]->qp_num, ctx.send_qp[index]->qp_num, ctx.rem_dest[index].recv_psn, srq_num, gid); gid_to_wire_gid(&local_gid, gid); sprintf(msg, MSG_FORMAT, ctx.lid, ctx.recv_qp[index]->qp_num, ctx.send_qp[index]->qp_num, ctx.rem_dest[index].recv_psn, srq_num, gid); if (write(sockfd, msg, MSG_SIZE) != MSG_SIZE) { fprintf(stderr, "Couldn't send local address\n"); return -1; } return 0; }
static ucs_status_t uct_ib_iface_init_gid(uct_ib_iface_t *iface, uct_ib_iface_config_t *config) { uct_ib_device_t *dev = uct_ib_iface_device(iface); int ret; ret = ibv_query_gid(dev->ibv_context, iface->port_num, config->gid_index, &iface->gid); if (ret != 0) { ucs_error("ibv_query_gid(index=%d) failed: %m", config->gid_index); return UCS_ERR_INVALID_PARAM; } if ((iface->gid.global.interface_id == 0) && (iface->gid.global.subnet_prefix == 0)) { ucs_error("Invalid gid[%d] on %s:%d", config->gid_index, uct_ib_device_name(dev), iface->port_num); return UCS_ERR_INVALID_ADDR; } return UCS_OK; }
static int send_set_up_connection(struct pingpong_context *ctx, struct perftest_parameters *user_param, struct pingpong_dest *my_dest, struct mcast_parameters *mcg_params, struct perftest_comm *comm) { int i; srand48(getpid() * time(NULL)); union ibv_gid temp_gid; if (user_param->gid_index != -1) { if (ibv_query_gid(ctx->context,user_param->ib_port,user_param->gid_index,&temp_gid)) { return -1; } } for (i = 0; i < user_param->num_of_qps; i++) { if (user_param->use_mcg) { if (set_mcast_group(ctx,user_param,mcg_params)) { return 1; } my_dest[i].gid = mcg_params->mgid; my_dest[i].lid = mcg_params->mlid; my_dest[i].qpn = QPNUM_MCAST; } else { memcpy(my_dest[i].gid.raw,temp_gid.raw ,16); my_dest[i].lid = ctx_get_local_lid(ctx->context,user_param->ib_port); my_dest[i].qpn = ctx->qp[i]->qp_num; } my_dest[i].psn = lrand48() & 0xffffff; // We do not fail test upon lid above RoCE. if (user_param->gid_index < 0) { if (!my_dest->lid) { fprintf(stderr," Local lid 0x0 detected,without any use of gid. Is SM running?\n"); return -1; } } #ifdef HAVE_XRCD if (user_param->use_xrc || user_param->connection_type == DC) { if (ibv_get_srq_num(ctx->srq,&(my_dest[i].srqn))) { fprintf(stderr, "Couldn't get SRQ number\n"); return 1; } } #endif #ifdef HAVE_DC if (user_param->connection_type == DC) { if (ibv_get_srq_num(ctx->srq,&(my_dest[i].srqn))) { fprintf(stderr, "Couldn't get SRQ number\n"); return 1; } } #endif } return 0; }
int main(int argc, char *argv[]) { struct ibv_pd *pd1, *pd2; struct ibv_comp_channel *comp_chan1, *comp_chan2; struct ibv_cq *cq1, *cq2; struct ibv_cq *evt_cq = NULL; struct ibv_mr *mr1, *mr2; struct ibv_qp_init_attr qp_attr1 = { }, qp_attr2 = {}; struct ibv_sge sge; struct ibv_send_wr send_wr = { }; struct ibv_send_wr *bad_send_wr = NULL; struct ibv_wc wc; struct ibv_qp *qp1, *qp2; void *cq_context = NULL; union ibv_gid gid1, gid2; int n; uint8_t *buf1, *buf2; int err; int num_devices; struct ibv_context * verbs1, *verbs2; struct ibv_device ** dev_list = ibv_get_device_list(&num_devices); struct ibv_device_attr dev_attr; int use = 0; int port = 1; int x = 0; unsigned long mb = 0; unsigned long bytes = 0; unsigned long save_diff = 0; struct timeval start, stop, diff; int iterations = 0; struct rusage usage; struct timeval ustart, uend; struct timeval sstart, send; struct timeval tstart, tend; DPRINTF("There are %d devices\n", num_devices); for(x = 0; x < num_devices; x++) { printf("Device: %d, %s\n", x, ibv_get_device_name(dev_list[use])); } if(num_devices == 0 || dev_list == NULL) { printf("No devices found\n"); return 1; } if(argc < 2) { printf("Which RDMA device to use? 0, 1, 2, 3...\n"); return 1; } use = atoi(argv[1]); DPRINTF("Using device %d\n", use); verbs1 = ibv_open_device(dev_list[use]); if(verbs1 == NULL) { printf("Failed to open device!\n"); return 1; } DPRINTF("Device open %s\n", ibv_get_device_name(dev_list[use])); verbs2 = ibv_open_device(dev_list[use]); if(verbs2 == NULL) { printf("Failed to open device again!\n"); return 1; } if(ibv_query_device(verbs1, &dev_attr)) { printf("Failed to query device attributes.\n"); return 1; } printf("Device open: %d, %s which has %d ports\n", x, ibv_get_device_name(dev_list[use]), dev_attr.phys_port_cnt); if(argc < 3) { printf("Which port on the device to use? 1, 2, 3...\n"); return 1; } port = atoi(argv[2]); if(port <= 0) { printf("Port #%d invalid, must start with 1, 2, 3, ...\n", port); return 1; } printf("Using port %d\n", port); if(argc < 4) { printf("How many iterations to perform?\n"); return 1; } iterations = atoi(argv[3]); printf("Will perform %d iterations\n", iterations); pd1 = ibv_alloc_pd(verbs1); if (!pd1) return 1; if(argc < 5) { printf("How many megabytes to allocate? (This will be allocated twice. Once for source, once for destination.)\n"); return 1; } mb = atoi(argv[4]); if(mb <= 0) { printf("Megabytes %lu invalid\n", mb); return 1; } DPRINTF("protection domain1 allocated\n"); pd2 = ibv_alloc_pd(verbs2); if (!pd2) return 1; DPRINTF("protection domain2 allocated\n"); comp_chan1 = ibv_create_comp_channel(verbs1); if (!comp_chan1) return 1; DPRINTF("completion chan1 created\n"); comp_chan2 = ibv_create_comp_channel(verbs2); if (!comp_chan2) return 1; DPRINTF("completion chan2 created\n"); cq1 = ibv_create_cq(verbs1, 2, NULL, comp_chan1, 0); if (!cq1) return 1; DPRINTF("CQ1 created\n"); cq2 = ibv_create_cq(verbs2, 2, NULL, comp_chan2, 0); if (!cq2) return 1; DPRINTF("CQ2 created\n"); bytes = mb * 1024UL * 1024UL; buf1 = malloc(bytes); if (!buf1) return 1; buf2 = malloc(bytes); if (!buf2) return 1; printf("Populating %lu MB memory.\n", mb * 2); for(x = 0; x < bytes; x++) { buf1[x] = 123; } buf1[bytes - 1] = 123; mr1 = ibv_reg_mr(pd1, buf1, bytes, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ); if (!mr1) { printf("Failed to register memory.\n"); return 1; } mr2 = ibv_reg_mr(pd2, buf2, bytes, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ); if (!mr2) { printf("Failed to register memory.\n"); return 1; } DPRINTF("memory registered.\n"); qp_attr1.cap.max_send_wr = 10; qp_attr1.cap.max_send_sge = 10; qp_attr1.cap.max_recv_wr = 10; qp_attr1.cap.max_recv_sge = 10; qp_attr1.sq_sig_all = 1; qp_attr1.send_cq = cq1; qp_attr1.recv_cq = cq1; qp_attr1.qp_type = IBV_QPT_RC; qp1 = ibv_create_qp(pd1, &qp_attr1); if (!qp1) { printf("failed to create queue pair #1\n"); return 1; } DPRINTF("queue pair1 created\n"); qp_attr2.cap.max_send_wr = 10; qp_attr2.cap.max_send_sge = 10; qp_attr2.cap.max_recv_wr = 10; qp_attr2.cap.max_recv_sge = 10; qp_attr2.sq_sig_all = 1; qp_attr2.send_cq = cq2; qp_attr2.recv_cq = cq2; qp_attr2.qp_type = IBV_QPT_RC; qp2 = ibv_create_qp(pd2, &qp_attr2); if (!qp2) { printf("failed to create queue pair #2\n"); return 1; } DPRINTF("queue pair2 created\n"); struct ibv_qp_attr attr1 = { .qp_state = IBV_QPS_INIT, .pkey_index = 0, .port_num = port, .qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_LOCAL_WRITE, }; if(ibv_modify_qp(qp1, &attr1, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) { printf("verbs 1 Failed to go to init\n"); return 1; } DPRINTF("verbs1 to init\n"); struct ibv_qp_attr attr2 = { .qp_state = IBV_QPS_INIT, .pkey_index = 0, .port_num = port, .qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_LOCAL_WRITE, }; if(ibv_modify_qp(qp2, &attr2, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) { printf("verbs 2 Failed to go to init\n"); return 1; } DPRINTF("verbs2 to init\n"); //struct ibv_gid gid1, gid2; struct ibv_port_attr port1, port2; uint64_t psn1 = lrand48() & 0xffffff; uint64_t psn2 = lrand48() & 0xffffff; if(ibv_query_port(verbs1, port, &port1)) return 1; DPRINTF("got port1 information\n"); if(ibv_query_port(verbs2, port, &port2)) return 1; DPRINTF("got port2 information\n"); if(ibv_query_gid(verbs1, 1, 0, &gid1)) return 1; DPRINTF("got gid1 information\n"); if(ibv_query_gid(verbs2, 1, 0, &gid2)) return 1; DPRINTF("got gid2 information\n"); struct ibv_qp_attr next2 = { .qp_state = IBV_QPS_RTR, .path_mtu = IBV_MTU_1024, .dest_qp_num = qp2->qp_num, .rq_psn = psn2, .max_dest_rd_atomic = 5, .min_rnr_timer = 12, .ah_attr = { .is_global = 0, .dlid = port2.lid, .sl = 0, .src_path_bits = 0, .port_num = port, } }; if(gid2.global.interface_id) { next2.ah_attr.is_global = 1; next2.ah_attr.grh.hop_limit = 1; next2.ah_attr.grh.dgid = gid2; next2.ah_attr.grh.sgid_index = 0; } struct ibv_qp_attr next1 = { .qp_state = IBV_QPS_RTR, .path_mtu = IBV_MTU_1024, .dest_qp_num = qp1->qp_num, .rq_psn = psn1, .max_dest_rd_atomic = 1, .min_rnr_timer = 12, .ah_attr = { .is_global = 0, .dlid = port1.lid, .sl = 0, .src_path_bits = 0, .port_num = port, } }; if(gid1.global.interface_id) { next1.ah_attr.is_global = 1; next1.ah_attr.grh.hop_limit = 1; next1.ah_attr.grh.dgid = gid1; next1.ah_attr.grh.sgid_index = 0; } if(ibv_modify_qp(qp2, &next1, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER)) { printf("Failed to modify verbs2 to ready\n"); return 1; } DPRINTF("verbs2 RTR\n"); if(ibv_modify_qp(qp1, &next2, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER)) { printf("Failed to modify verbs1 to ready\n"); return 1; } DPRINTF("verbs1 RTR\n"); next2.qp_state = IBV_QPS_RTS; next2.timeout = 14; next2.retry_cnt = 7; next2.rnr_retry = 7; next2.sq_psn = psn1; next2.max_rd_atomic = 1; if(ibv_modify_qp(qp1, &next2, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC)) { printf("Failed again to modify verbs1 to ready\n"); return 1; } DPRINTF("verbs1 RTS\n"); next1.qp_state = IBV_QPS_RTS; next1.timeout = 14; next1.retry_cnt = 7; next1.rnr_retry = 7; next1.sq_psn = psn2; next1.max_rd_atomic = 1; if(ibv_modify_qp(qp2, &next1, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC)) { printf("Failed again to modify verbs2 to ready\n"); return 1; } DPRINTF("verbs2 RTS\n"); printf("Performing RDMA first.\n"); iterations = atoi(argv[3]); getrusage(RUSAGE_SELF, &usage); ustart = usage.ru_utime; sstart = usage.ru_stime; gettimeofday(&tstart, NULL); while(iterations-- > 0) { sge.addr = (uintptr_t) buf1; sge.length = bytes; sge.lkey = mr1->lkey; send_wr.wr_id = 1; send_wr.opcode = IBV_WR_RDMA_WRITE; send_wr.sg_list = &sge; send_wr.num_sge = 1; send_wr.send_flags = IBV_SEND_SIGNALED; send_wr.wr.rdma.rkey = mr2->rkey; send_wr.wr.rdma.remote_addr = (uint64_t) buf2; DPRINTF("Iterations left: %d\n", iterations); if (ibv_req_notify_cq(cq1, 0)) return 1; DPRINTF("Submitting local RDMA\n"); gettimeofday(&start, NULL); if (ibv_post_send(qp1, &send_wr, &bad_send_wr)) return 1; DPRINTF("RDMA posted %p %p\n", &send_wr, bad_send_wr); DPRINTF("blocking...\n"); if(ibv_get_cq_event(comp_chan1, &evt_cq, &cq_context)) { printf("failed to get CQ event\n"); return 1; } gettimeofday(&stop, NULL); timersub(&stop, &start, &diff); DPRINTF("RDMA took: %lu us\n", diff.tv_usec); ibv_ack_cq_events(evt_cq, 1); DPRINTF("got event\n"); n = ibv_poll_cq(cq1, 1, &wc); if (n > 0) { DPRINTF("return from poll: %lu\n", wc.wr_id); if (wc.status != IBV_WC_SUCCESS) { printf("poll failed %s\n", ibv_wc_status_str(wc.status)); return 1; } if (wc.wr_id == 1) { DPRINTF("Finished %d bytes %d %d\n", n, buf1[bytes - 1], buf2[bytes - 1]); } else { printf("didn't find completion\n"); } } if (n < 0) { printf("poll returned error\n"); return 1; } DPRINTF("Poll returned %d bytes %d %d\n", n, buf1[0], buf2[0]); } gettimeofday(&tend, NULL); getrusage(RUSAGE_SELF, &usage); uend = usage.ru_utime; send = usage.ru_stime; save_diff = 0; timersub(&uend, &ustart, &diff); save_diff += diff.tv_usec; printf("User CPU time: %lu us\n", diff.tv_usec); timersub(&send, &sstart, &diff); save_diff += diff.tv_usec; printf("System CPU time: %lu us\n", diff.tv_usec); timersub(&tend, &tstart, &diff); printf("Sleeping time: %lu us\n", diff.tv_usec - save_diff); printf("Wall clock CPU time: %lu us\n", diff.tv_usec); iterations = atoi(argv[3]); printf("Now using the CPU instead....\n"); getrusage(RUSAGE_SELF, &usage); ustart = usage.ru_utime; sstart = usage.ru_stime; gettimeofday(&tstart, NULL); while(iterations-- > 0) { DPRINTF("Repeating without RDMA...\n"); gettimeofday(&start, NULL); memcpy(buf2, buf1, bytes); gettimeofday(&stop, NULL); timersub(&stop, &start, &diff); DPRINTF("Regular copy too took: %lu us\n", diff.tv_usec); } gettimeofday(&tend, NULL); getrusage(RUSAGE_SELF, &usage); uend = usage.ru_utime; send = usage.ru_stime; save_diff = 0; timersub(&uend, &ustart, &diff); save_diff += diff.tv_usec; printf("User CPU time: %lu us\n", diff.tv_usec); timersub(&send, &sstart, &diff); save_diff += diff.tv_usec; printf("System CPU time: %lu us\n", diff.tv_usec); timersub(&tend, &tstart, &diff); printf("Sleeping time: %lu us\n", diff.tv_usec - save_diff); printf("Wall clock CPU time: %lu us\n", diff.tv_usec); return 0; }
int __ibv_query_gid_1_0(struct ibv_context_1_0 *context, uint8_t port_num, int index, union ibv_gid *gid) { return ibv_query_gid(context->real_context, port_num, index, gid); }
static int __ibv_exp_query_gid_attr(struct ibv_context *context, uint8_t port_num, unsigned int index, struct ibv_exp_gid_attr *attr) { char *dir_path; char name[32]; char buff[41]; DIR *dir; if (attr->comp_mask & ~(IBV_EXP_QUERY_GID_ATTR_RESERVED - 1)) return ENOTSUP; if (attr->comp_mask & IBV_EXP_QUERY_GID_ATTR_TYPE) { snprintf(name, sizeof(name), "ports/%d/gid_attrs/types/%d", port_num, index); if (ibv_read_sysfs_file(context->device->ibdev_path, name, buff, sizeof(buff)) <= 0) { if (errno == EINVAL) { /* In IB, this file doesn't exist and we return * -EINVAL. */ attr->type = IBV_EXP_IB_ROCE_V1_GID_TYPE; goto query_gid; } if (asprintf(&dir_path, "%s/%s", context->device->ibdev_path, "ports/1/gid_attrs/") < 0) return ENOMEM; dir = opendir(dir_path); free(dir_path); if (!dir) { if (errno == ENOENT) /* Assuming that if gid_attrs doesn't * exist, we have an old kernel and all * GIDs are IB/RoCE v1 */ attr->type = IBV_EXP_IB_ROCE_V1_GID_TYPE; else return errno; } else { closedir(dir); return EINVAL; } } else { if (!strcmp(buff, "IB/RoCE V1")) attr->type = IBV_EXP_IB_ROCE_V1_GID_TYPE; else if (!strcmp(buff, "RoCE V2")) attr->type = IBV_EXP_ROCE_V2_GID_TYPE; else if (!strcmp(buff, "RoCE V1.5")) attr->type = IBV_EXP_ROCE_V1_5_GID_TYPE; else return EINVAL; } } query_gid: if (attr->comp_mask & IBV_EXP_QUERY_GID_ATTR_GID) { if (ibv_query_gid(context, port_num, index, &attr->gid)) return ENOENT; } return 0; }
int __ibv_query_gid_1_0(struct ibv_context_1_0 *context, uint8_t port_num, int index, union ibv_gid *gid) { fprintf(stderr, "%s:%s:%d \n", __func__, __FILE__, __LINE__); return ibv_query_gid(context->real_context, port_num, index, gid); }
/** * the first step in original MPID_nem_ib_setup_conn() function * open hca, create ptags and create cqs */ int MPID_nem_ib_open_ports() { int mpi_errno = MPI_SUCCESS; /* Infiniband Verb Structures */ struct ibv_port_attr port_attr; struct ibv_device_attr dev_attr; int nHca; /* , curRank, rail_index ; */ MPIDI_STATE_DECL(MPID_STATE_MPIDI_OPEN_HCA); MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_OPEN_HCA); for (nHca = 0; nHca < ib_hca_num_hcas; nHca++) { if (ibv_query_device(hca_list[nHca].nic_context, &dev_attr)) { MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s", "Error getting HCA attributes"); } /* detecting active ports */ if (rdma_default_port < 0 || ib_hca_num_ports > 1) { int nPort; int k = 0; for (nPort = 1; nPort <= RDMA_DEFAULT_MAX_PORTS; nPort ++) { if ((! ibv_query_port(hca_list[nHca].nic_context, nPort, &port_attr)) && port_attr.state == IBV_PORT_ACTIVE && (port_attr.lid || (!port_attr.lid && use_iboeth))) { if (use_iboeth) { if (ibv_query_gid(hca_list[nHca].nic_context, nPort, 0, &hca_list[nHca].gids[k])) { /* new error information function needed */ MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**fail", "Failed to retrieve gid on rank %d", process_info.rank); } DEBUG_PRINT("[%d] %s(%d): Getting gid[%d][%d] for" " port %d subnet_prefix = %llx," " intf_id = %llx\r\n", process_info.rank, __FUNCTION__, __LINE__, nHca, k, k, hca_list[nHca].gids[k].global.subnet_prefix, hca_list[nHca].gids[k].global.interface_id); } else { hca_list[nHca].lids[k] = port_attr.lid; } hca_list[nHca].ports[k++] = nPort; if (check_attrs(&port_attr, &dev_attr)) { MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s", "Attributes failed sanity check"); } } } if (k < ib_hca_num_ports) { MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**activeports", "**activeports %d", ib_hca_num_ports); } } else { if(ibv_query_port(hca_list[nHca].nic_context, rdma_default_port, &port_attr) || (!port_attr.lid && !use_iboeth) || (port_attr.state != IBV_PORT_ACTIVE)) { MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**portquery", "**portquery %d", rdma_default_port); } hca_list[nHca].ports[0] = rdma_default_port; if (use_iboeth) { if (ibv_query_gid(hca_list[nHca].nic_context, 0, 0, &hca_list[nHca].gids[0])) { /* new error function needed */ MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**fail", "Failed to retrieve gid on rank %d", process_info.rank); } if (check_attrs(&port_attr, &dev_attr)) { MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %s", "Attributes failed sanity check"); } } else { hca_list[nHca].lids[0] = port_attr.lid; } } if (rdma_use_blocking) { hca_list[nHca].comp_channel = ibv_create_comp_channel(hca_list[nHca].nic_context); if (!hca_list[nHca].comp_channel) { MPIU_ERR_SETFATALANDSTMT1(mpi_errno, MPI_ERR_OTHER, goto fn_fail, "**fail", "**fail %s", "cannot create completion channel"); } hca_list[nHca].send_cq_hndl = NULL; hca_list[nHca].recv_cq_hndl = NULL; hca_list[nHca].cq_hndl = ibv_create_cq(hca_list[nHca].nic_context, rdma_default_max_cq_size, NULL, hca_list[nHca].comp_channel, 0); if (!hca_list[nHca].cq_hndl) { MPIU_ERR_SETFATALANDSTMT1(mpi_errno, MPI_ERR_OTHER, goto fn_fail, "**fail", "**fail %s", "cannot create cq"); } if (ibv_req_notify_cq(hca_list[nHca].cq_hndl, 0)) { MPIU_ERR_SETFATALANDSTMT1(mpi_errno, MPI_ERR_OTHER, goto fn_fail, "**fail", "**fail %s", "cannot request cq notification"); }
static int pp_open_port(struct pingpong_context *ctx, const char * servername, int ib_port, int port, struct pingpong_dest *rem_dest,struct user_parameters *user_parm) { char addr_fmt[] = "%8s address: LID %#04x QPN %#06x PSN %#06x\n"; struct pingpong_dest my_dest; int sockfd; int rc; union ibv_gid gid; /* Create connection between client and server. * We do it by exchanging data over a TCP socket connection. */ my_dest.lid = pp_get_local_lid(ctx, ib_port); my_dest.qpn = ctx->qp->qp_num; my_dest.psn = lrand48() & 0xffffff; if (user_parm->gid_index < 0) {/*We do not fail test upon lid in RDMA0E/Eth conf*/ if (!my_dest.lid) { fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n"); return -1; } } if (user_parm->gid_index != -1) { int err=0; err = ibv_query_gid (ctx->context, ib_port, user_parm->gid_index, &gid); if (err) { return -1; } ctx->dgid=gid; } my_dest.dgid = gid; my_dest.rkey = ctx->mr->rkey; my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size; printf(addr_fmt, "local", my_dest.lid, my_dest.qpn, my_dest.psn); if (user_parm->gid_index > -1) { printf(" GID: %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n", my_dest.dgid.raw[0],my_dest.dgid.raw[1], my_dest.dgid.raw[2], my_dest.dgid.raw[3], my_dest.dgid.raw[4], my_dest.dgid.raw[5], my_dest.dgid.raw[6], my_dest.dgid.raw[7], my_dest.dgid.raw[8], my_dest.dgid.raw[9], my_dest.dgid.raw[10], my_dest.dgid.raw[11], my_dest.dgid.raw[12], my_dest.dgid.raw[13], my_dest.dgid.raw[14], my_dest.dgid.raw[15]); } sockfd = servername ? pp_client_connect(servername, port) : pp_server_connect(port); if (sockfd < 0) { printf("pp_connect_sock(%s,%d) failed (%d)!\n", servername, port, sockfd); return sockfd; } rc = servername ? pp_client_exch_dest(sockfd, &my_dest, rem_dest, user_parm) : pp_server_exch_dest(sockfd, &my_dest, rem_dest, user_parm); if (rc) return rc; printf(addr_fmt, "remote", rem_dest->lid, rem_dest->qpn, rem_dest->psn, rem_dest->rkey, rem_dest->vaddr); if (user_parm->gid_index > -1) { printf(" GID: %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n", rem_dest->dgid.raw[0],rem_dest->dgid.raw[1], rem_dest->dgid.raw[2], rem_dest->dgid.raw[3], rem_dest->dgid.raw[4], rem_dest->dgid.raw[5], rem_dest->dgid.raw[6], rem_dest->dgid.raw[7], rem_dest->dgid.raw[8], rem_dest->dgid.raw[9], rem_dest->dgid.raw[10], rem_dest->dgid.raw[11], rem_dest->dgid.raw[12], rem_dest->dgid.raw[13], rem_dest->dgid.raw[14], rem_dest->dgid.raw[15]); } if ((rc = pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest,user_parm))) return rc; /* An additional handshake is required *after* moving qp to RTR. * Arbitrarily reuse exch_dest for this purpose. */ rc = servername ? pp_client_exch_dest(sockfd, &my_dest, rem_dest, user_parm) : pp_server_exch_dest(sockfd, &my_dest, rem_dest, user_parm); if (rc) return rc; if (write(sockfd, "done", sizeof "done") != sizeof "done"){ perror("write"); fprintf(stderr, "Couldn't write to socket\n"); return 1; } close(sockfd); return 0; }
static int fi_ibv_alloc_info(struct ibv_context *ctx, struct fi_info **info, const struct verbs_ep_domain *ep_dom) { struct fi_info *fi; union ibv_gid gid; size_t name_len; int ret; int param; if (!(fi = fi_allocinfo())) return -FI_ENOMEM; fi->caps = ep_dom->caps; fi->handle = NULL; if (ep_dom->type == FI_EP_RDM) { fi->mode = VERBS_RDM_MODE; *(fi->tx_attr) = verbs_rdm_tx_attr; } else { fi->mode = VERBS_MODE; *(fi->tx_attr) = verbs_tx_attr; } *(fi->rx_attr) = (ep_dom->type == FI_EP_RDM) ? verbs_rdm_rx_attr : verbs_rx_attr; *(fi->ep_attr) = verbs_ep_attr; *(fi->domain_attr) = verbs_domain_attr; *(fi->fabric_attr) = verbs_fabric_attr; fi->ep_attr->type = ep_dom->type; fi->tx_attr->caps = ep_dom->caps; fi->rx_attr->caps = ep_dom->caps; ret = fi_ibv_get_device_attrs(ctx, fi); if (ret) goto err; if (ep_dom->type == FI_EP_RDM) { fi->tx_attr->inject_size = FI_IBV_RDM_DFLT_BUFFERED_SSIZE; fi->tx_attr->iov_limit = 1; fi->tx_attr->rma_iov_limit = 1; if (!fi_param_get_int(&fi_ibv_prov, "rdm_buffer_size", ¶m)) { if (param > sizeof (struct fi_ibv_rdm_tagged_rndv_header)) { fi->tx_attr->inject_size = param; } else { FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "rdm_buffer_size too small, should be greater then %d\n", sizeof (struct fi_ibv_rdm_tagged_rndv_header)); ret = -FI_EINVAL; goto err; } } } switch (ctx->device->transport_type) { case IBV_TRANSPORT_IB: if(ibv_query_gid(ctx, 1, 0, &gid)) { VERBS_INFO_ERRNO(FI_LOG_FABRIC, "ibv_query_gid", errno); ret = -errno; goto err; } name_len = strlen(VERBS_IB_PREFIX) + INET6_ADDRSTRLEN; if (!(fi->fabric_attr->name = calloc(1, name_len + 1))) { ret = -FI_ENOMEM; goto err; } snprintf(fi->fabric_attr->name, name_len, VERBS_IB_PREFIX "%lx", gid.global.subnet_prefix); fi->ep_attr->protocol = (ep_dom == &verbs_msg_domain) ? FI_PROTO_RDMA_CM_IB_RC : FI_PROTO_IB_RDM; break; case IBV_TRANSPORT_IWARP: fi->fabric_attr->name = strdup(VERBS_IWARP_FABRIC); if (!fi->fabric_attr->name) { ret = -FI_ENOMEM; goto err; } if (ep_dom == &verbs_msg_domain) { fi->ep_attr->protocol = FI_PROTO_IWARP; fi->tx_attr->op_flags = VERBS_TX_OP_FLAGS_IWARP; } else { fi->ep_attr->protocol = FI_PROTO_IWARP_RDM; fi->tx_attr->op_flags = VERBS_TX_OP_FLAGS_IWARP_RDM; } break; default: FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "Unknown transport type\n"); ret = -FI_ENODATA; goto err; } name_len = strlen(ctx->device->name) + strlen(ep_dom->suffix); fi->domain_attr->name = malloc(name_len + 1); if (!fi->domain_attr->name) { ret = -FI_ENOMEM; goto err; } snprintf(fi->domain_attr->name, name_len + 1, "%s%s", ctx->device->name, ep_dom->suffix); fi->domain_attr->name[name_len] = '\0'; *info = fi; return 0; err: fi_freeinfo(fi); return ret; }
int set_up_connection(struct pingpong_context *ctx, struct perftest_parameters *user_param, struct pingpong_dest *my_dest) { int num_of_qps = user_param->num_of_qps; int num_of_qps_per_port = user_param->num_of_qps / 2; int i; int is_ipv4; union ibv_gid temp_gid; union ibv_gid temp_gid2; struct ibv_port_attr attr; srand48(getpid() * time(NULL)); /*in xrc with bidirectional, there are send qps and recv qps. the actual number of send/recv qps is num_of_qps / 2. */ if ( (user_param->connection_type == DC || user_param->use_xrc) && (user_param->duplex || user_param->tst == LAT)) { num_of_qps /= 2; num_of_qps_per_port = num_of_qps / 2; } if (user_param->gid_index != -1) { if (ibv_query_port(ctx->context,user_param->ib_port,&attr)) return 0; if (user_param->use_gid_user) { if (ibv_query_gid(ctx->context,user_param->ib_port,user_param->gid_index,&temp_gid)) { return -1; } } else { for (i=0 ; i < attr.gid_tbl_len; i++) { if (ibv_query_gid(ctx->context,user_param->ib_port,i,&temp_gid)) { return -1; } is_ipv4 = ipv6_addr_v4mapped((struct in6_addr *)temp_gid.raw); if ((user_param->ipv6 && !is_ipv4) || (!user_param->ipv6 && is_ipv4)) { user_param->gid_index = i; break; } } } } if (user_param->dualport==ON) { if (user_param->gid_index2 != -1) { if (ibv_query_port(ctx->context,user_param->ib_port2,&attr)) return 0; if (user_param->use_gid_user) { if (ibv_query_gid(ctx->context,user_param->ib_port2,user_param->gid_index,&temp_gid2)) return -1; } else { for (i=0 ; i < attr.gid_tbl_len; i++) { if (ibv_query_gid(ctx->context,user_param->ib_port2,i,&temp_gid2)) { return -1; } is_ipv4 = ipv6_addr_v4mapped((struct in6_addr *)temp_gid2.raw); if ((user_param->ipv6 && !is_ipv4) || (!user_param->ipv6 && is_ipv4)) { user_param->gid_index = i; break; } } } } } for (i = 0; i < user_param->num_of_qps; i++) { if (user_param->dualport == ON) { /*first half of qps are for ib_port and second half are for ib_port2 in xrc with bidirectional, the first half of qps are xrc_send qps and the second half are xrc_recv qps. the first half of the send/recv qps are for ib_port1 and the second half are for ib_port2 */ if (i % num_of_qps < num_of_qps_per_port) { my_dest[i].lid = ctx_get_local_lid(ctx->context,user_param->ib_port); my_dest[i].gid_index = user_param->gid_index; } else { my_dest[i].lid = ctx_get_local_lid(ctx->context,user_param->ib_port2); my_dest[i].gid_index = user_param->gid_index2; } /*single-port case*/ } else { my_dest[i].lid = ctx_get_local_lid(ctx->context,user_param->ib_port); my_dest[i].gid_index = user_param->gid_index; } my_dest[i].qpn = ctx->qp[i]->qp_num; my_dest[i].psn = lrand48() & 0xffffff; my_dest[i].rkey = ctx->mr->rkey; /* Each qp gives his receive buffer address.*/ my_dest[i].out_reads = user_param->out_reads; my_dest[i].vaddr = (uintptr_t)ctx->buf + (user_param->num_of_qps + i)*BUFF_SIZE(ctx->size,ctx->cycle_buffer); if (user_param->dualport==ON) { if (i % num_of_qps < num_of_qps_per_port) memcpy(my_dest[i].gid.raw,temp_gid.raw ,16); else memcpy(my_dest[i].gid.raw,temp_gid2.raw ,16); } else { memcpy(my_dest[i].gid.raw,temp_gid.raw ,16); } /* We do not fail test upon lid above RoCE. if ( (user_param->gid_index < 0) || ((user_param->gid_index2 < 0) && (user_param->dualport == ON)) ){ if (!my_dest[i].lid) { fprintf(stderr," Local lid 0x0 detected. Is an SM running? \n"); return -1; } } */ } #ifdef HAVE_XRCD if (user_param->use_xrc) { for (i=0; i < user_param->num_of_qps; i++) { if (ibv_get_srq_num(ctx->srq,&(my_dest[i].srqn))) { fprintf(stderr, "Couldn't get SRQ number\n"); return 1; } } } #endif #ifdef HAVE_DC if(user_param->machine == SERVER || user_param->duplex || user_param->tst == LAT) { if (user_param->connection_type == DC) { for (i=0; i < user_param->num_of_qps; i++) { if (ibv_get_srq_num(ctx->srq, &(my_dest[i].srqn))) { fprintf(stderr, "Couldn't get SRQ number\n"); return 1; } } } } #endif return 0; }