/* * Create SRQ and listen for XRC SRQN lookup request. */ static int xrc_create_srq_listen(struct sockaddr *addr, socklen_t addr_len) { struct rdma_addrinfo rai; struct sockaddr_storage ss; struct ibv_srq_init_attr attr; int ret; memset(&rai, 0, sizeof rai); rai.ai_flags = RAI_PASSIVE; rai.ai_family = addr->sa_family; rai.ai_qp_type = IBV_QPT_UD; /* for now */ rai.ai_port_space = RDMA_PS_IB; memcpy(&ss, addr, addr_len); rai.ai_src_len = addr_len; rai.ai_src_addr = (struct sockaddr *) &ss; ((struct sockaddr_in *) &ss)->sin_port = htons((short) atoi(port) + 1); ret = rdma_create_ep(&srq_id, &rai, NULL, NULL); if (ret) { printf("rdma_create_ep srq ep %d\n", errno); return ret; } if (!srq_id->verbs) { printf("rdma_create_ep failed to bind to device.\n"); printf("XRC tests cannot use loopback addressing\n"); return -1; } memset(&attr, 0, sizeof attr); attr.attr.max_wr = 1; attr.attr.max_sge = 1; attr.srq_type = IBV_SRQT_XRC; attr.ext.xrc.xrcd = ibv_open_xrcd(srq_id->verbs, -1, 0); if (!attr.ext.xrc.xrcd) { printf("Unable to open xrcd\n"); return -1; } ret = rdma_create_srq(srq_id, NULL, &attr); if (ret) { printf("Unable to create srq %d\n", errno); return ret; } ret = rdma_listen(srq_id, 0); if (ret) { printf("rdma_listen srq id %d\n", errno); return ret; } return 0; }
/* This func. opens XRC domain */ int mca_btl_openib_open_xrc_domain(struct mca_btl_openib_device_t *device) { int len; char *xrc_file_name; const char *dev_name; #if OPAL_HAVE_CONNECTX_XRC_DOMAINS struct ibv_xrcd_init_attr xrcd_attr; #endif dev_name = ibv_get_device_name(device->ib_dev); len = asprintf(&xrc_file_name, "%s"OPAL_PATH_SEP"openib_xrc_domain_%s", opal_process_info.job_session_dir, dev_name); if (0 > len) { BTL_ERROR(("Failed to allocate memomry for XRC file name: %s\n", strerror(errno))); return OPAL_ERROR; } device->xrc_fd = open(xrc_file_name, O_CREAT, S_IWUSR|S_IRUSR); if (0 > device->xrc_fd) { BTL_ERROR(("Failed to open XRC domain file %s, errno says %s\n", xrc_file_name,strerror(errno))); free(xrc_file_name); return OPAL_ERROR; } #if OPAL_HAVE_CONNECTX_XRC_DOMAINS memset(&xrcd_attr, 0, sizeof xrcd_attr); xrcd_attr.comp_mask = IBV_XRCD_INIT_ATTR_FD | IBV_XRCD_INIT_ATTR_OFLAGS; xrcd_attr.fd = device->xrc_fd; xrcd_attr.oflags = O_CREAT; device->xrcd = ibv_open_xrcd(device->ib_dev_context, &xrcd_attr); if (NULL == device->xrcd) { #else device->xrc_domain = ibv_open_xrc_domain(device->ib_dev_context, device->xrc_fd, O_CREAT); if (NULL == device->xrc_domain) { #endif BTL_ERROR(("Failed to open XRC domain\n")); close(device->xrc_fd); free(xrc_file_name); return OPAL_ERROR; } return OPAL_SUCCESS; } /* This func. closes XRC domain */ int mca_btl_openib_close_xrc_domain(struct mca_btl_openib_device_t *device) { #if OPAL_HAVE_CONNECTX_XRC_DOMAINS if (NULL == device->xrcd) { #else if (NULL == device->xrc_domain) { #endif /* No XRC domain, just exit */ return OPAL_SUCCESS; } #if OPAL_HAVE_CONNECTX_XRC_DOMAINS if (ibv_close_xrcd(device->xrcd)) { #else if (ibv_close_xrc_domain(device->xrc_domain)) { #endif BTL_ERROR(("Failed to close XRC domain, errno %d says %s\n", device->xrc_fd, strerror(errno))); return OPAL_ERROR; } /* do we need to check exit status */ if (close(device->xrc_fd)) { BTL_ERROR(("Failed to close XRC file descriptor, errno %d says %s\n", device->xrc_fd, strerror(errno))); return OPAL_ERROR; } return OPAL_SUCCESS; } static void ib_address_constructor(ib_address_t *ib_addr) { ib_addr->key = NULL; ib_addr->subnet_id = 0; ib_addr->lid = 0; ib_addr->status = MCA_BTL_IB_ADDR_CLOSED; ib_addr->qp = NULL; OBJ_CONSTRUCT(&ib_addr->addr_lock, opal_mutex_t); OBJ_CONSTRUCT(&ib_addr->pending_ep, opal_list_t); } static void ib_address_destructor(ib_address_t *ib_addr) { if (NULL != ib_addr->key) { free(ib_addr->key); } OBJ_DESTRUCT(&ib_addr->addr_lock); OBJ_DESTRUCT(&ib_addr->pending_ep); } static int ib_address_init(ib_address_t *ib_addr, uint16_t lid, uint64_t s_id, opal_jobid_t ep_jobid) { ib_addr->key = malloc(SIZE_OF3(s_id, lid, ep_jobid)); if (NULL == ib_addr->key) { BTL_ERROR(("Failed to allocate memory for key\n")); return OPAL_ERROR; } memset(ib_addr->key, 0, SIZE_OF3(s_id, lid, ep_jobid)); /* creating the key = lid + s_id + ep_jobid */ memcpy(ib_addr->key, &lid, sizeof(lid)); memcpy((void*)((char*)ib_addr->key + sizeof(lid)), &s_id, sizeof(s_id)); memcpy((void*)((char*)ib_addr->key + sizeof(lid) + sizeof(s_id)), &ep_jobid, sizeof(ep_jobid)); /* caching lid and subnet id */ ib_addr->subnet_id = s_id; ib_addr->lid = lid; return OPAL_SUCCESS; } /* Create new entry in hash table for subnet_id and lid, * update the endpoint pointer. * Before call to this function you need to protect with */ int mca_btl_openib_ib_address_add_new (uint16_t lid, uint64_t s_id, opal_jobid_t ep_jobid, mca_btl_openib_endpoint_t *ep) { void *tmp; int ret = OPAL_SUCCESS; struct ib_address_t *ib_addr = OBJ_NEW(ib_address_t); ret = ib_address_init(ib_addr, lid, s_id, ep_jobid); if (OPAL_SUCCESS != ret ) { BTL_ERROR(("XRC Internal error. Failed to init ib_addr\n")); OBJ_DESTRUCT(ib_addr); return ret; } /* is it already in the table ?*/ OPAL_THREAD_LOCK(&mca_btl_openib_component.ib_lock); if (OPAL_SUCCESS != opal_hash_table_get_value_ptr(&mca_btl_openib_component.ib_addr_table, ib_addr->key, SIZE_OF3(s_id, lid, ep_jobid), &tmp)) { /* It is new one, lets put it on the table */ ret = opal_hash_table_set_value_ptr(&mca_btl_openib_component.ib_addr_table, ib_addr->key, SIZE_OF3(s_id, lid, ep_jobid), (void*)ib_addr); if (OPAL_SUCCESS != ret) { BTL_ERROR(("XRC Internal error." " Failed to add element to mca_btl_openib_component.ib_addr_table\n")); OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock); OBJ_DESTRUCT(ib_addr); return ret; } /* update the endpoint with pointer to ib address */ ep->ib_addr = ib_addr; } else { /* so we have this one in the table, just add the pointer to the endpoint */ ep->ib_addr = (ib_address_t *)tmp; assert(lid == ep->ib_addr->lid && s_id == ep->ib_addr->subnet_id); OBJ_DESTRUCT(ib_addr); } OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock); return ret; }
static int pp_init_ctx(char *ib_devname) { struct ibv_srq_init_attr_ex attr; struct ibv_xrcd_init_attr xrcd_attr; struct ibv_port_attr port_attr; ctx.recv_qp = calloc(ctx.num_clients, sizeof *ctx.recv_qp); ctx.send_qp = calloc(ctx.num_clients, sizeof *ctx.send_qp); ctx.rem_dest = calloc(ctx.num_clients, sizeof *ctx.rem_dest); if (!ctx.recv_qp || !ctx.send_qp || !ctx.rem_dest) return 1; if (open_device(ib_devname)) { fprintf(stderr, "Failed to open device\n"); return 1; } if (pp_get_port_info(ctx.context, ctx.ib_port, &port_attr)) { fprintf(stderr, "Failed to get port info\n"); return 1; } ctx.lid = port_attr.lid; if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET && !ctx.lid) { fprintf(stderr, "Couldn't get local LID\n"); return 1; } ctx.buf = memalign(page_size, ctx.size); if (!ctx.buf) { fprintf(stderr, "Couldn't allocate work buf.\n"); return 1; } memset(ctx.buf, 0, ctx.size); if (ctx.use_event) { ctx.channel = ibv_create_comp_channel(ctx.context); if (!ctx.channel) { fprintf(stderr, "Couldn't create completion channel\n"); return 1; } } ctx.pd = ibv_alloc_pd(ctx.context); if (!ctx.pd) { fprintf(stderr, "Couldn't allocate PD\n"); return 1; } ctx.mr = ibv_reg_mr(ctx.pd, ctx.buf, ctx.size, IBV_ACCESS_LOCAL_WRITE); if (!ctx.mr) { fprintf(stderr, "Couldn't register MR\n"); return 1; } ctx.fd = open("/tmp/xrc_domain", O_RDONLY | O_CREAT, S_IRUSR | S_IRGRP); if (ctx.fd < 0) { fprintf(stderr, "Couldn't create the file for the XRC Domain " "but not stopping %d\n", errno); ctx.fd = -1; } memset(&xrcd_attr, 0, sizeof xrcd_attr); xrcd_attr.comp_mask = IBV_XRCD_INIT_ATTR_FD | IBV_XRCD_INIT_ATTR_OFLAGS; xrcd_attr.fd = ctx.fd; xrcd_attr.oflags = O_CREAT; ctx.xrcd = ibv_open_xrcd(ctx.context, &xrcd_attr); if (!ctx.xrcd) { fprintf(stderr, "Couldn't Open the XRC Domain %d\n", errno); return 1; } ctx.recv_cq = ibv_create_cq(ctx.context, ctx.num_clients, &ctx.recv_cq, ctx.channel, 0); if (!ctx.recv_cq) { fprintf(stderr, "Couldn't create recv CQ\n"); return 1; } if (ctx.use_event) { if (ibv_req_notify_cq(ctx.recv_cq, 0)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } } ctx.send_cq = ibv_create_cq(ctx.context, ctx.num_clients, NULL, NULL, 0); if (!ctx.send_cq) { fprintf(stderr, "Couldn't create send CQ\n"); return 1; } memset(&attr, 0, sizeof attr); attr.attr.max_wr = ctx.num_clients; attr.attr.max_sge = 1; attr.comp_mask = IBV_SRQ_INIT_ATTR_TYPE | IBV_SRQ_INIT_ATTR_XRCD | IBV_SRQ_INIT_ATTR_CQ | IBV_SRQ_INIT_ATTR_PD; attr.srq_type = IBV_SRQT_XRC; attr.xrcd = ctx.xrcd; attr.cq = ctx.recv_cq; attr.pd = ctx.pd; ctx.srq = ibv_create_srq_ex(ctx.context, &attr); if (!ctx.srq) { fprintf(stderr, "Couldn't create SRQ\n"); return 1; } if (create_qps()) return 1; return 0; }