예제 #1
0
파일: psoib.c 프로젝트: JonBau/pscom
static
void psoib_scan_hca_ports(struct ibv_device *ib_dev)
{
    struct ibv_context *ctx;
    struct ibv_device_attr device_attr;
    int rc;
    unsigned port_cnt;
    unsigned port;
    const char *dev_name;

    dev_name =ibv_get_device_name(ib_dev);
    if (!dev_name) dev_name = "unknown";

    ctx = ibv_open_device(ib_dev);
    if (!ctx) goto err_open_dev;

    rc = ibv_query_device(ctx, &device_attr);
    if (!rc) {
	port_cnt = device_attr.phys_port_cnt;
	if (port_cnt > 128) port_cnt = 128;
    } else {
	// Query failed. Assume 2 ports.
	port_cnt = 2;
    }

    for (port = 1; port <= port_cnt; port++) {
	struct ibv_port_attr port_attr;
	enum ibv_port_state port_state;
	const char *marker;

	rc = ibv_query_port(ctx, port, &port_attr);
	port_state = !rc ? port_attr.state : 999 /* unknown */;

	marker = "";
	if (port_state == IBV_PORT_ACTIVE &&
	    (!psoib_hca || !strcmp(dev_name, psoib_hca)) &&
	    (!psoib_port || psoib_port == port)) {
	    // use this port for the communication:

	    if (!psoib_hca) psoib_hca = strdup(dev_name);
	    if (!psoib_port) psoib_port = port;
	    marker = "*";
	}

	psoib_dprint(3, "IB port <%s:%u>: %s%s",
		     dev_name, port, port_state_str(port_state), marker);
    }

    if (ctx) ibv_close_device(ctx);

err_open_dev:
    return;
}
예제 #2
0
int rdma_backend_query_port(RdmaBackendDev *backend_dev,
                            struct ibv_port_attr *port_attr)
{
    int rc;

    rc = ibv_query_port(backend_dev->context, backend_dev->port_num, port_attr);
    if (rc) {
        rdma_error_report("ibv_query_port fail, rc=%d, errno=%d", rc, errno);
        return -EIO;
    }

    return 0;
}
예제 #3
0
static int fi_ibv_get_device_attrs(struct ibv_context *ctx, struct fi_info *info)
{
	struct ibv_device_attr device_attr;
	struct ibv_port_attr port_attr;
	int ret = 0;

	ret = ibv_query_device(ctx, &device_attr);
	if (ret) {
		VERBS_INFO_ERRNO(FI_LOG_FABRIC, "ibv_query_device", errno);
		return -errno;
	}

	info->domain_attr->cq_cnt 		= device_attr.max_cq;
	info->domain_attr->ep_cnt 		= device_attr.max_qp;
	info->domain_attr->tx_ctx_cnt 		= MIN(info->domain_attr->tx_ctx_cnt, device_attr.max_qp);
	info->domain_attr->rx_ctx_cnt 		= MIN(info->domain_attr->rx_ctx_cnt, device_attr.max_qp);
	info->domain_attr->max_ep_tx_ctx 	= MIN(info->domain_attr->tx_ctx_cnt, device_attr.max_qp);
	info->domain_attr->max_ep_rx_ctx 	= MIN(info->domain_attr->rx_ctx_cnt, device_attr.max_qp);
	info->domain_attr->max_ep_srx_ctx	= device_attr.max_qp;
	info->domain_attr->mr_cnt		= device_attr.max_mr;

	if (info->ep_attr->type == FI_EP_RDM)
		info->domain_attr->cntr_cnt	= device_attr.max_qp * 4;

	info->tx_attr->size 			= device_attr.max_qp_wr;
	info->tx_attr->iov_limit 		= device_attr.max_sge;
	info->tx_attr->rma_iov_limit		= device_attr.max_sge;

	info->rx_attr->size 			= device_attr.max_srq_wr ?
						  MIN(device_attr.max_qp_wr,
						      device_attr.max_srq_wr) :
						      device_attr.max_qp_wr;
	info->rx_attr->iov_limit 		= MIN(device_attr.max_sge,
						      device_attr.max_srq_sge);

	ret = fi_ibv_get_qp_cap(ctx, info);
	if (ret)
		return ret;

	ret = ibv_query_port(ctx, 1, &port_attr);
	if (ret) {
		VERBS_INFO_ERRNO(FI_LOG_FABRIC, "ibv_query_port", errno);
		return -errno;
	}

	info->ep_attr->max_msg_size 		= port_attr.max_msg_sz;
	info->ep_attr->max_order_raw_size 	= port_attr.max_msg_sz;
	info->ep_attr->max_order_waw_size	= port_attr.max_msg_sz;

	return 0;
}
예제 #4
0
static uint8_t set_link_layer(struct ibv_context *context, uint8_t ib_port) {
  struct ibv_port_attr port_attr;

  if (ibv_query_port(context,ib_port,&port_attr)) {
    perror("Unable to query port attributes\n");
    return LINK_FAILURE;
  }
  if (port_attr.state != IBV_PORT_ACTIVE) {
    perror("Port number invalid state \n");
    return LINK_FAILURE;
  }
  if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) {
    perror("Unable to determine link layer \n"); return LINK_FAILURE;
  }
  return port_attr.link_layer;
}
예제 #5
0
파일: mckey.c 프로젝트: lzyang1995/DARE
static int verify_test_params(struct cmatest_node *node)
{
	struct ibv_port_attr port_attr;
	int ret;

	ret = ibv_query_port(node->cma_id->verbs, node->cma_id->port_num,
			     &port_attr);
	if (ret)
		return ret;

	if (message_count && message_size > (1 << (port_attr.active_mtu + 7))) {
		printf("mckey: message_size %d is larger than active mtu %d\n",
		       message_size, 1 << (port_attr.active_mtu + 7));
		return -EINVAL;
	}

	return 0;
}
예제 #6
0
파일: verbs.c 프로젝트: kento/libpibverbs
//uint16_t pc_init(struct ibv_pd* pd, struct pc_ibv_co *input_co, int device_num)
uint16_t pc_init(struct pc_ibv_co *input_co, int device_num)
{
  struct ibv_device **dev_list;
  struct ibv_device *dev;
  struct ibv_context *ctx;
  struct ibv_port_attr pattr;
  struct pc_hca *hca;
  uint16_t lid;
  int num_of_hcas;

  dev_list  = ibv_get_device_list(&num_of_hcas);
  if (num_of_hcas <= device_num) {
    fprintf(stderr, "Invalide devic_num: %d @%s:%d\n", device_num, __FILE__, __LINE__);
    exit(1);
  }
  dev = dev_list[device_num];
  ctx = ibv_open_device(dev);
  ibv_query_port(ctx, 1, &pattr);
  lid = pattr.lid;

  if (init_hcas_q == 0) {
    lq_init(&hcas_q);
    lq_init(&ctx_lid_q);
    init_hcas_q = 1;
  } 
  /*remove*/
  else 
      {
      return -1; 
    }
  /*remove*/

  hca = (struct pc_hca*) malloc(sizeof(struct pc_hca));
  hca->lid = lid;
  hca->co = input_co;
  hca->co->pdg_num = 0;
  hca->co->pdg_size = 0;
  lq_enq(&hcas_q, hca);

  lq_init(&pc_q);
  ibv_free_device_list(dev_list);
  return lid;
}
예제 #7
0
파일: send_lat.c 프로젝트: Cai900205/test
static int set_mcast_group(struct pingpong_context *ctx,
						   struct perftest_parameters *user_param,
						   struct mcast_parameters *mcg_params) {

	int i;
	struct ibv_port_attr port_attr;

	if (ibv_query_gid(ctx->context,user_param->ib_port,user_param->gid_index,&mcg_params->port_gid)) {
			return 1;
	}

	if (ibv_query_pkey(ctx->context,user_param->ib_port,DEF_PKEY_IDX,&mcg_params->pkey)) {
		return 1;
	}

	if (ibv_query_port(ctx->context,user_param->ib_port,&port_attr)) {
		return 1;
	}
	mcg_params->sm_lid  = port_attr.sm_lid;
	mcg_params->sm_sl   = port_attr.sm_sl;
	mcg_params->ib_port = user_param->ib_port;
	mcg_params->user_mgid = user_param->user_mgid;
	set_multicast_gid(mcg_params,ctx->qp[0]->qp_num,(int)user_param->machine);

	if (!strcmp(link_layer_str(user_param->link_type),"IB")) {
		// Request for Mcast group create registery in SM.
		if (join_multicast_group(SUBN_ADM_METHOD_SET,mcg_params)) {
			fprintf(stderr," Failed to Join Mcast request\n");
			return 1;
		}
	}

	for (i=0; i < user_param->num_of_qps; i++) {

		if (ibv_attach_mcast(ctx->qp[i],&mcg_params->mgid,mcg_params->mlid)) {
			fprintf(stderr, "Couldn't attach QP to MultiCast group");
			return 1;
		}
	}
	mcg_params->mcast_state |= MCAST_IS_ATTACHED;

	return 0;
}
예제 #8
0
파일: verbs.c 프로젝트: kento/libpibverbs
void __pc_ibv_poll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *wc)
{

  struct pc_length *len;

  if (init_hcas_q == 0) {
    return;
  }
  if (wc->opcode == IBV_WC_RDMA_WRITE) {
      lq_init_it(&pc_q);

      while ((len = (struct pc_length*)lq_next(&pc_q)) != NULL) {
	struct pc_hca *hca;
	uint16_t lid;
	if (len->ctx == cq->context) {
	  lid = get_lid(cq->context);
	  hca = get_pc_hca(lid);

	  pthread_mutex_lock(&pc_mutex);
	  hca->co->pdg_num--;
	  hca->co->pdg_size = hca->co->pdg_size - len->length;
	  pthread_mutex_unlock(&pc_mutex);

	  lq_remove(&pc_q, len);
	  free(len);
	  lq_fin_it(&pc_q);

#ifdef DEBUG
	  	  fprintf(stderr, "[ %f ] post_cq   : pdg_num: %lu , pdg_size: %lu \n", get_dtime(), hca->co->pdg_num, hca->co->pdg_size);
#ifdef DETAIL
	  	  struct ibv_port_attr pattr;
	  	  ibv_query_port(cq->context, 1, &pattr);
	  	  fprintf(stderr, "\tcp:%p, context:%p(more_ops:%p, abi_compat:%p, num_comp_vectors:%d, slid:%u), :\n", cq, cq->context, cq->context->more_ops, cq->context->abi_compat, cq->context->num_comp_vectors, pattr.lid);
	  	  fprintf(stderr, "\twr_id:%lu, status:%d, wc->opcode:%d, vendor_err:%u, byte_len:%u, imm_data:%u, qp_num:%p src_qp:%p, wc_flags:%d,  pkey_index:%u, slid:%u, sl:%u, dlid_path_bits:%u, num_entries:%d \n", wc->wr_id,  wc->status, wc->opcode, wc->vendor_err, wc->byte_len, wc->imm_data, wc->qp_num, wc->src_qp, wc->wc_flags, wc->pkey_index, wc->slid, wc->sl, wc->dlid_path_bits, num_entries);
#endif
#endif
	  return;
	}
      }
      lq_fin_it(&pc_q);
  }
  return;
}
static inline int mca_oob_ud_port_setup (mca_oob_ud_port_t *port)
{
    int rc;
    struct ibv_port_attr port_attr;

    rc = ibv_query_port (port->device->ib_context, port->port_num, &port_attr);
    if (0 != rc || IBV_PORT_ACTIVE != port_attr.state || 0 == port_attr.lid) {
        /* skip this port */
        return ORTE_ERROR;
    }

    port->lid = port_attr.lid;
    port->mtu = port_attr.active_mtu > IBV_MTU_4096 ? 2048 : port_mtus[port_attr.active_mtu];

    OPAL_OUTPUT_VERBOSE((5, mca_oob_base_output, "%s oob:ud:port_setup found port: num = %u,"
                         "lid = %u, mtu = %u", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         port->port_num, port->lid, port->mtu));

    return rc;
}
예제 #10
0
파일: verbs.c 프로젝트: kento/libpibverbs
static uint16_t get_lid(struct ibv_context *ctx) 
{
  struct ibv_port_attr pattr;
  struct ctx_lid *cl;

  lq_init_it(&ctx_lid_q);
  while ((cl = (struct ctx_lid*)lq_next(&ctx_lid_q)) != NULL) {
    if (cl->ctx == ctx) {
      lq_fin_it(&ctx_lid_q);
      return cl->lid;
    }
  }
  lq_fin_it(&ctx_lid_q);
  
  ibv_query_port(ctx, 1, &pattr);
  cl = (struct ctx_lid*)malloc(sizeof(struct ctx_lid));
  cl->ctx = ctx;
  cl->lid = pattr.lid;
  lq_enq(&ctx_lid_q, cl);
  return pattr.lid;
}
예제 #11
0
파일: openib.c 프로젝트: bcernohous/ga
int get_lid(void)
{
    struct ibv_port_attr port_attr[MAX_PORTS];
    int i, j, count;
    int active_port_search_count;

    int active_port_found = 0;

    for (j = 1; j <= opts.num_ports; j++) {
        if (!ibv_query_port(hca.context, j, &port_attr[j - 1]) 
                && (port_attr[j - 1].state == IBV_PORT_ACTIVE)) {
            active_port_found = 1;
            for (i = 0; i < nprocs; i++)
                conn.lid[i] = port_attr[j - 1].lid;
            return 0;
        } 
    }

    assert(active_port_found);
    
    return 1;
}
예제 #12
0
/*
 * Find a list of ibv_ports matching a set of criteria.
 */
opal_list_t *opal_common_verbs_find_ports(const char *if_include,
                                          const char *if_exclude,
                                          int flags,
                                          int stream)
{
    int32_t num_devs;
    struct ibv_device **devices;
    struct ibv_device *device;
    struct ibv_context *device_context;
    struct ibv_device_attr device_attr;
    struct ibv_port_attr port_attr;
    char **if_include_list = NULL, **if_exclude_list = NULL, **if_sanity_list = NULL;
    opal_common_verbs_device_item_t *di;
    opal_common_verbs_port_item_t *pi;
    int rc;
    uint32_t j;
    opal_list_t *port_list = NULL;
    bool want;

    /* Sanity check the include/exclude params */
    if (NULL != if_include && NULL != if_exclude) {
        return NULL;
    }

    /* Query all the IBV devices on the machine.  Use an ompi
       compatibility function, because how to get this list changed
       over the history of the IBV API. */
    devices = opal_ibv_get_device_list(&num_devs);
    if (0 == num_devs) {
        opal_output_verbose(5, stream, "no verbs interfaces found");
        return NULL;
    }

    opal_output_verbose(5, stream, "found %d verbs interface%s",
                        num_devs, (num_devs != 1) ? "s" : "");

    /* Allocate a list to fill */
    port_list = OBJ_NEW(opal_list_t);
    if (NULL == port_list) {
        return NULL;
    }

    if (NULL != if_include) {
        opal_output_verbose(5, stream, "finding verbs interfaces, including %s",
                            if_include);
        if_include_list = opal_argv_split(if_include, ',');
        if_sanity_list = opal_argv_copy(if_include_list);
    } else if (NULL != if_exclude) {
        opal_output_verbose(5, stream, "finding verbs interfaces, excluding %s",
                            if_exclude);
        if_exclude_list = opal_argv_split(if_exclude, ',');
        if_sanity_list = opal_argv_copy(if_exclude_list);
    }

    /* Now loop through all the devices.  Get the attributes for each
       port on each device to see if they match our selection
       criteria. */
    for (int32_t i = 0; (int32_t) i < num_devs; ++i) {
        /* See if this device is on the include/exclude sanity check
           list.  If it is, remove it from the sanity check list
           (i.e., we should end up with an empty list at the end if
           all entries in the sanity check list exist) */
        device = devices[i];
        check_sanity(&if_sanity_list, ibv_get_device_name(device), -1);

        opal_output_verbose(5, stream, "examining verbs interface: %s",
                            ibv_get_device_name(device));

        device_context = ibv_open_device(device);
        if (NULL == device_context) {
            opal_show_help("help-opal-common-verbs.txt",
                           "ibv_open_device fail", true,
                           opal_proc_local_get()->proc_hostname,
                           ibv_get_device_name(device),
                           errno, strerror(errno));
            goto err_free_port_list;
        }

        if (ibv_query_device(device_context, &device_attr)){
            opal_show_help("help-opal-common-verbs.txt",
                           "ibv_query_device fail", true,
                           opal_proc_local_get()->proc_hostname,
                           ibv_get_device_name(device),
                           errno, strerror(errno));
            goto err_free_port_list;
        }

        /* Now that we have the attributes of this device, remove all
           ports of this device from the sanity check list.  Note that
           IBV ports are indexed from 1, not 0. */
        for (j = 1; j <= device_attr.phys_port_cnt; j++) {
            check_sanity(&if_sanity_list, ibv_get_device_name(device), j);
        }

        /* Check the device-specific flags to see if we want this
           device */
        want = false;

        if (flags & OPAL_COMMON_VERBS_FLAGS_TRANSPORT_IB &&
            IBV_TRANSPORT_IB == device->transport_type) {
            opal_output_verbose(5, stream, "verbs interface %s has right type (IB)",
                                ibv_get_device_name(device));
            want = true;
        }
        if (flags & OPAL_COMMON_VERBS_FLAGS_TRANSPORT_IWARP &&
            IBV_TRANSPORT_IWARP == device->transport_type) {
            opal_output_verbose(5, stream, "verbs interface %s has right type (IWARP)",
                                ibv_get_device_name(device));
            want = true;
        }

        /* Check for RC or UD QP support */
        if (flags & OPAL_COMMON_VERBS_FLAGS_RC) {
            rc = opal_common_verbs_qp_test(device_context, flags);
            if (OPAL_SUCCESS == rc) {
                want = true;
                opal_output_verbose(5, stream,
                                    "verbs interface %s supports RC QPs",
                                    ibv_get_device_name(device));
            } else {
                opal_output_verbose(5, stream,
                                    "verbs interface %s failed to make RC QP",
                                    ibv_get_device_name(device));
            }
        }
        if (flags & OPAL_COMMON_VERBS_FLAGS_UD) {
            rc = opal_common_verbs_qp_test(device_context, flags);
            if (OPAL_SUCCESS == rc) {
                want = true;
                opal_output_verbose(5, stream,
                                    "verbs interface %s supports UD QPs",
                                    ibv_get_device_name(device));
            } else if (OPAL_ERR_TYPE_MISMATCH == rc) {
                opal_output_verbose(5, stream,
                                    "verbs interface %s made an RC QP! we don't want RC-capable devices",
                                    ibv_get_device_name(device));
            } else {
                opal_output_verbose(5, stream,
                                    "verbs interface %s failed to make UD QP",
                                    ibv_get_device_name(device));
            }
        }

        /* If we didn't want it, go to the next device */
        if (!want) {
            continue;
        }

        /* Make a device_item_t to hold the device information */
        di = OBJ_NEW(opal_common_verbs_device_item_t);
        if (NULL == di) {
            goto err_free_port_list;
        }
        di->device = device;
        di->context = device_context;
        di->device_attr = device_attr;
        di->device_name = strdup(ibv_get_device_name(device));

        /* Note IBV ports are 1 based (not 0 based) */
        for (j = 1; j <= device_attr.phys_port_cnt; j++) {

            /* If we don't want this port (based on if_include /
               if_exclude lists), skip it */
            if (!want_this_port(if_include_list, if_exclude_list, di, j)) {
                opal_output_verbose(5, stream, "verbs interface %s:%d: rejected by include/exclude",
                                    ibv_get_device_name(device), j);
                continue;
            }

            /* Query the port */
            if (ibv_query_port(device_context, (uint8_t) j, &port_attr)) {
                opal_show_help("help-opal-common-verbs.txt",
                               "ibv_query_port fail", true,
                               opal_proc_local_get()->proc_hostname,
                               ibv_get_device_name(device),
                               errno, strerror(errno));
                goto err_free_port_list;
            }

            /* We definitely only want ACTIVE ports */
            if (IBV_PORT_ACTIVE != port_attr.state) {
                opal_output_verbose(5, stream, "verbs interface %s:%d: not ACTIVE",
                                    ibv_get_device_name(device), j);
                continue;
            }

            /* Check the port-specific flags to see if we want this
               port */
            want = false;
            if (0 == flags) {
                want = true;
            }

            if ((flags & (OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_IB |
                          OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET)) ==
                 (OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_IB |
                  OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET)) {
                /* If they specified both link layers, then we want this port */
                want = true;
            } else if ((flags & (OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_IB |
                                 OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET)) == 0) {
                /* If they specified neither link layer, then we want this port */
                want = true;
            }
#if HAVE_DECL_IBV_LINK_LAYER_ETHERNET
            else if (flags & OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_IB) {
                if (IBV_LINK_LAYER_INFINIBAND == port_attr.link_layer) {
                    want = true;
                } else {
                    opal_output_verbose(5, stream, "verbs interface %s:%d has wrong link layer (has %s, want IB)",
                                        ibv_get_device_name(device), j,
                                        link_layer_to_str(port_attr.link_layer));
                }
            } else if (flags & OPAL_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET) {
                if (IBV_LINK_LAYER_ETHERNET == port_attr.link_layer) {
                    want = true;
                } else {
                    opal_output_verbose(5, stream, "verbs interface %s:%d has wrong link layer (has %s, want Ethernet)",
                                        ibv_get_device_name(device), j,
                                        link_layer_to_str(port_attr.link_layer));
                }
            }
#endif

            if (!want) {
                continue;
            }

            /* If we got this far, we want the port.  Make an item for it. */
            pi = OBJ_NEW(opal_common_verbs_port_item_t);
            if (NULL == pi) {
                goto err_free_port_list;
            }
            pi->device = di;
            pi->port_num = j;
            pi->port_attr = port_attr;
            OBJ_RETAIN(di);

            /* Add the port item to the list */
            opal_list_append(port_list, &pi->super);
            opal_output_verbose(5, stream, "found acceptable verbs interface %s:%d",
                                ibv_get_device_name(device), j);
        }

        /* We're done with the device; if some ports are using it, its
           ref count will be > 0, and therefore the device won't be
           deleted here. */
        OBJ_RELEASE(di);
    }

    /* Sanity check that the devices specified in the if_include /
       if_exclude lists actually existed.  If this is true, then the
       sanity list will now be empty.  If there are still items left
       on the list, then they didn't exist.  Bad.  Print a warning (if
       the warning is not disabled). */
    if (0 != opal_argv_count(if_sanity_list)) {
        if (opal_common_verbs_warn_nonexistent_if) {
            char *str = opal_argv_join(if_sanity_list, ',');
            opal_show_help("help-opal-common-verbs.txt", "nonexistent port",
                           true, opal_proc_local_get()->proc_hostname,
                           ((NULL != if_include) ? "in" : "ex"), str);
            free(str);

            /* Only warn once per process */
            opal_common_verbs_warn_nonexistent_if = false;
        }
    }
    if (NULL != if_sanity_list) {
        opal_argv_free(if_sanity_list);
    }

    opal_argv_free(if_include_list);
    opal_argv_free(if_exclude_list);

    /* All done! */
    opal_ibv_free_device_list(devices);
    return port_list;

 err_free_port_list:
    OPAL_LIST_RELEASE(port_list);
    opal_ibv_free_device_list(devices);

    if (NULL != if_sanity_list) {
        opal_argv_free(if_sanity_list);
    }

    opal_argv_free(if_include_list);
    opal_argv_free(if_exclude_list);

    return NULL;
}
예제 #13
0
파일: pingpong.c 프로젝트: wangzhiNJU/workd
int pp_get_port_info(struct ibv_context *context, int port,
		     struct ibv_port_attr *attr)
{
	return ibv_query_port(context, port, attr);
}
예제 #14
0
static int resources_create(struct resources *res)
{	
	struct ibv_device       **dev_list = NULL;
	struct ibv_qp_init_attr qp_init_attr;
	struct ibv_device 	*ib_dev = NULL;
	size_t 			size;
	int 			i;
	int 			mr_flags = 0;
	int 			cq_size = 0;
	int 			num_devices;

	/* if client side */
	if (config.server_name) {
		res->sock = sock_client_connect(config.server_name, config.tcp_port);
		if (res->sock < 0) {
			fprintf(stderr, "failed to establish TCP connection to server %s, port %d\n", 
				config.server_name, config.tcp_port);
			return -1;
		}
	} else {
		fprintf(stdout, "waiting on port %d for TCP connection\n", config.tcp_port);

		res->sock = sock_daemon_connect(config.tcp_port);
		if (res->sock < 0) {
			fprintf(stderr, "failed to establish TCP connection with client on port %d\n", 
				config.tcp_port);
			return -1;
		}
	}

	fprintf(stdout, "TCP connection was established\n");

	fprintf(stdout, "searching for IB devices in host\n");

	/* get device names in the system */
	dev_list = ibv_get_device_list(&num_devices);
	if (!dev_list) {
		fprintf(stderr, "failed to get IB devices list\n");
		return 1;
	}

	/* if there isn't any IB device in host */
	if (!num_devices) {
		fprintf(stderr, "found %d device(s)\n", num_devices);
		return 1;
	}

	fprintf(stdout, "found %d device(s)\n", num_devices);

	/* search for the specific device we want to work with */
	for (i = 0; i < num_devices; i ++) {
		if (!config.dev_name) {
			config.dev_name = strdup(ibv_get_device_name(dev_list[i])); 
			fprintf(stdout, "device not specified, using first one found: %s\n", config.dev_name);
		}
		if (!strcmp(ibv_get_device_name(dev_list[i]), config.dev_name)) {
			ib_dev = dev_list[i];
			break;
		}
	}

	/* if the device wasn't found in host */
	if (!ib_dev) {
		fprintf(stderr, "IB device %s wasn't found\n", config.dev_name);
		return 1;
	}

	/* get device handle */
	res->ib_ctx = ibv_open_device(ib_dev);
	if (!res->ib_ctx) {
		fprintf(stderr, "failed to open device %s\n", config.dev_name);
		return 1;
	}

	/* We are now done with device list, free it */
	ibv_free_device_list(dev_list);
	dev_list = NULL;
	ib_dev = NULL;

	/* query port properties  */
	if (ibv_query_port(res->ib_ctx, config.ib_port, &res->port_attr)) {
		fprintf(stderr, "ibv_query_port on port %u failed\n", config.ib_port);
		return 1;
	}

	/* allocate Protection Domain */
	res->pd = ibv_alloc_pd(res->ib_ctx);
	if (!res->pd) {
		fprintf(stderr, "ibv_alloc_pd failed\n");
		return 1;
	}

	/* each side will send only one WR, so Completion Queue with 1 entry is enough */
	cq_size = 1;
	res->cq = ibv_create_cq(res->ib_ctx, cq_size, NULL, NULL, 0);
	if (!res->cq) {
		fprintf(stderr, "failed to create CQ with %u entries\n", cq_size);
		return 1;
	}

	/* allocate the memory buffer that will hold the data */
	size = MSG_SIZE;
	res->buf = malloc(size);
	if (!res->buf) {
		fprintf(stderr, "failed to malloc %Zu bytes to memory buffer\n", size);
		return 1;
	}

	/* only in the daemon side put the message in the memory buffer */
	if (!config.server_name) {
		strcpy(res->buf, MSG);
		fprintf(stdout, "going to send the message: '%s'\n", res->buf);
	} else
		memset(res->buf, 0, size);

	/* register this memory buffer */
	mr_flags = (config.server_name) ? IBV_ACCESS_LOCAL_WRITE : 0;
	res->mr = ibv_reg_mr(res->pd, res->buf, size, mr_flags);
	if (!res->mr) {
		fprintf(stderr, "ibv_reg_mr failed with mr_flags=0x%x\n", mr_flags);
		return 1;
	}

	fprintf(stdout, "MR was registered with addr=%p, lkey=0x%x, rkey=0x%x, flags=0x%x\n",
			      res->buf, res->mr->lkey, res->mr->rkey, mr_flags);


	/* create the Queue Pair */
	memset(&qp_init_attr, 0, sizeof(qp_init_attr));

	qp_init_attr.qp_type    	= IBV_QPT_RC;
	qp_init_attr.sq_sig_all 	= 1;
	qp_init_attr.send_cq    	= res->cq;
	qp_init_attr.recv_cq    	= res->cq;
	qp_init_attr.cap.max_send_wr  	= 1;
	qp_init_attr.cap.max_recv_wr  	= 1;
	qp_init_attr.cap.max_send_sge 	= 1;
	qp_init_attr.cap.max_recv_sge 	= 1;

	res->qp = ibv_create_qp(res->pd, &qp_init_attr);
	if (!res->qp) {
		fprintf(stderr, "failed to create QP\n");
		return 1;
	}
	fprintf(stdout, "QP was created, QP number=0x%x\n", res->qp->qp_num);

	return 0;
}
예제 #15
0
파일: ib_hca.c 프로젝트: hpc/mvapich2-cce
/**
 * the first step in original MPID_nem_ib_setup_conn() function
 * open hca, create ptags  and create cqs
 */
int MPID_nem_ib_open_ports()
{
    int mpi_errno = MPI_SUCCESS;

    /* Infiniband Verb Structures */
    struct ibv_port_attr    port_attr;
    struct ibv_device_attr  dev_attr;

    int nHca; /* , curRank, rail_index ; */

    MPIDI_STATE_DECL(MPID_STATE_MPIDI_OPEN_HCA);
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_OPEN_HCA);

    for (nHca = 0; nHca < ib_hca_num_hcas; nHca++) {
        if (ibv_query_device(hca_list[nHca].nic_context, &dev_attr)) {
            MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**fail",
                    "**fail %s", "Error getting HCA attributes");
        }

        /* detecting active ports */
        if (rdma_default_port < 0 || ib_hca_num_ports > 1) {
            int nPort;
            int k = 0;
            for (nPort = 1; nPort <= RDMA_DEFAULT_MAX_PORTS; nPort ++) {
                if ((! ibv_query_port(hca_list[nHca].nic_context, nPort, &port_attr)) &&
                            port_attr.state == IBV_PORT_ACTIVE &&
                            (port_attr.lid || (!port_attr.lid && use_iboeth))) {
                    if (use_iboeth) {
                        if (ibv_query_gid(hca_list[nHca].nic_context,
                                        nPort, 0, &hca_list[nHca].gids[k])) {
                            /* new error information function needed */
                            MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER,
                                    "**fail", "Failed to retrieve gid on rank %d", process_info.rank);
                        }
                        DEBUG_PRINT("[%d] %s(%d): Getting gid[%d][%d] for"
                                " port %d subnet_prefix = %llx,"
                                " intf_id = %llx\r\n",
                                process_info.rank, __FUNCTION__, __LINE__, nHca, k, k,
                                hca_list[nHca].gids[k].global.subnet_prefix,
                                hca_list[nHca].gids[k].global.interface_id);
                    } else {
                        hca_list[nHca].lids[k]    = port_attr.lid;
                    }
                    hca_list[nHca].ports[k++] = nPort;

                    if (check_attrs(&port_attr, &dev_attr)) {
                        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER,
                                "**fail", "**fail %s",
                                "Attributes failed sanity check");
                    }
                }
            }
            if (k < ib_hca_num_ports) {
                MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER,
                        "**activeports", "**activeports %d", ib_hca_num_ports);
            }
        } else {
            if(ibv_query_port(hca_list[nHca].nic_context,
                        rdma_default_port, &port_attr)
                || (!port_attr.lid && !use_iboeth)
                || (port_attr.state != IBV_PORT_ACTIVE)) {
                MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER,
                        "**portquery", "**portquery %d", rdma_default_port);
            }

            hca_list[nHca].ports[0] = rdma_default_port;

            if (use_iboeth) {
                if (ibv_query_gid(hca_list[nHca].nic_context, 0, 0, &hca_list[nHca].gids[0])) {
                    /* new error function needed */
                    MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER,
                            "**fail", "Failed to retrieve gid on rank %d", process_info.rank);
                }

                if (check_attrs(&port_attr, &dev_attr)) {
                    MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER,
                            "**fail", "**fail %s", "Attributes failed sanity check");
                }
            } else {
                hca_list[nHca].lids[0]  = port_attr.lid;
            }
        }

        if (rdma_use_blocking) {
            hca_list[nHca].comp_channel = ibv_create_comp_channel(hca_list[nHca].nic_context);

            if (!hca_list[nHca].comp_channel) {
                MPIU_ERR_SETFATALANDSTMT1(mpi_errno, MPI_ERR_OTHER, goto fn_fail,
                        "**fail", "**fail %s", "cannot create completion channel");
            }

            hca_list[nHca].send_cq_hndl = NULL;
            hca_list[nHca].recv_cq_hndl = NULL;
            hca_list[nHca].cq_hndl = ibv_create_cq(hca_list[nHca].nic_context,
                    rdma_default_max_cq_size, NULL, hca_list[nHca].comp_channel, 0);
            if (!hca_list[nHca].cq_hndl) {
                MPIU_ERR_SETFATALANDSTMT1(mpi_errno, MPI_ERR_OTHER, goto fn_fail,
                        "**fail", "**fail %s", "cannot create cq");
            }

            if (ibv_req_notify_cq(hca_list[nHca].cq_hndl, 0)) {
                MPIU_ERR_SETFATALANDSTMT1(mpi_errno, MPI_ERR_OTHER, goto fn_fail,
                        "**fail", "**fail %s", "cannot request cq notification");
            }
예제 #16
0
static int
rdmasniff_activate(pcap_t *handle)
{
	struct pcap_rdmasniff *priv = handle->priv;
	struct ibv_qp_init_attr qp_init_attr;
	struct ibv_qp_attr qp_attr;
	struct ibv_flow_attr flow_attr;
	struct ibv_port_attr port_attr;
	int i;

	priv->context = ibv_open_device(priv->rdma_device);
	if (!priv->context) {
		pcap_snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
			      "Failed to open device %s", handle->opt.device);
		goto error;
	}

	priv->pd = ibv_alloc_pd(priv->context);
	if (!priv->pd) {
		pcap_snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
			      "Failed to alloc PD for device %s", handle->opt.device);
		goto error;
	}

	priv->channel = ibv_create_comp_channel(priv->context);
	if (!priv->channel) {
		pcap_snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
			      "Failed to create comp channel for device %s", handle->opt.device);
		goto error;
	}

	priv->cq = ibv_create_cq(priv->context, RDMASNIFF_NUM_RECEIVES,
				 NULL, priv->channel, 0);
	if (!priv->cq) {
		pcap_snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
			      "Failed to create CQ for device %s", handle->opt.device);
		goto error;
	}

	ibv_req_notify_cq(priv->cq, 0);

	memset(&qp_init_attr, 0, sizeof qp_init_attr);
	qp_init_attr.send_cq = qp_init_attr.recv_cq = priv->cq;
	qp_init_attr.cap.max_recv_wr = RDMASNIFF_NUM_RECEIVES;
	qp_init_attr.cap.max_recv_sge = 1;
	qp_init_attr.qp_type = IBV_QPT_RAW_PACKET;
	priv->qp = ibv_create_qp(priv->pd, &qp_init_attr);
	if (!priv->qp) {
		pcap_snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
			      "Failed to create QP for device %s", handle->opt.device);
		goto error;
	}

	memset(&qp_attr, 0, sizeof qp_attr);
	qp_attr.qp_state = IBV_QPS_INIT;
	qp_attr.port_num = priv->port_num;
	if (ibv_modify_qp(priv->qp, &qp_attr, IBV_QP_STATE | IBV_QP_PORT)) {
		pcap_snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
			      "Failed to modify QP to INIT for device %s", handle->opt.device);
		goto error;
	}

	memset(&qp_attr, 0, sizeof qp_attr);
	qp_attr.qp_state = IBV_QPS_RTR;
	if (ibv_modify_qp(priv->qp, &qp_attr, IBV_QP_STATE)) {
		pcap_snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
			      "Failed to modify QP to RTR for device %s", handle->opt.device);
		goto error;
	}

	memset(&flow_attr, 0, sizeof flow_attr);
	flow_attr.type = IBV_FLOW_ATTR_SNIFFER;
	flow_attr.size = sizeof flow_attr;
	flow_attr.port = priv->port_num;
	priv->flow = ibv_create_flow(priv->qp, &flow_attr);
	if (!priv->flow) {
		pcap_snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
			      "Failed to create flow for device %s", handle->opt.device);
		goto error;
	}

	handle->bufsize = RDMASNIFF_NUM_RECEIVES * RDMASNIFF_RECEIVE_SIZE;
	handle->buffer = malloc(handle->bufsize);
	if (!handle->buffer) {
		pcap_snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
			      "Failed to allocate receive buffer for device %s", handle->opt.device);
		goto error;
	}

	priv->oneshot_buffer = malloc(RDMASNIFF_RECEIVE_SIZE);
	if (!priv->oneshot_buffer) {
		pcap_snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
			      "Failed to allocate oneshot buffer for device %s", handle->opt.device);
		goto error;
	}

	priv->mr = ibv_reg_mr(priv->pd, handle->buffer, handle->bufsize, IBV_ACCESS_LOCAL_WRITE);
	if (!priv->mr) {
		pcap_snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
			      "Failed to register MR for device %s", handle->opt.device);
		goto error;
	}


	for (i = 0; i < RDMASNIFF_NUM_RECEIVES; ++i) {
		rdmasniff_post_recv(handle, i);
	}

	if (!ibv_query_port(priv->context, priv->port_num, &port_attr) &&
	    port_attr.link_layer == IBV_LINK_LAYER_INFINIBAND) {
		handle->linktype = DLT_INFINIBAND;
	} else {
		handle->linktype = DLT_EN10MB;
	}

	if (handle->snapshot <= 0 || handle->snapshot > RDMASNIFF_RECEIVE_SIZE)
		handle->snapshot = RDMASNIFF_RECEIVE_SIZE;

	handle->offset = 0;
	handle->read_op = rdmasniff_read;
	handle->stats_op = rdmasniff_stats;
	handle->cleanup_op = rdmasniff_cleanup;
	handle->setfilter_op = install_bpf_program;
	handle->setdirection_op = NULL;
	handle->set_datalink_op = NULL;
	handle->getnonblock_op = pcap_getnonblock_fd;
	handle->setnonblock_op = pcap_setnonblock_fd;
	handle->oneshot_callback = rdmasniff_oneshot;
	handle->selectable_fd = priv->channel->fd;

	return 0;

error:
	if (priv->mr) {
		ibv_dereg_mr(priv->mr);
	}

	if (priv->flow) {
		ibv_destroy_flow(priv->flow);
	}

	if (priv->qp) {
		ibv_destroy_qp(priv->qp);
	}

	if (priv->cq) {
		ibv_destroy_cq(priv->cq);
	}

	if (priv->channel) {
		ibv_destroy_comp_channel(priv->channel);
	}

	if (priv->pd) {
		ibv_dealloc_pd(priv->pd);
	}

	if (priv->context) {
		ibv_close_device(priv->context);
	}

	if (priv->oneshot_buffer) {
		free(priv->oneshot_buffer);
	}

	return PCAP_ERROR;
}
예제 #17
0
파일: local.c 프로젝트: xiansl/mytests
int main(int argc, char *argv[])
{
    struct ibv_pd		       *pd1, *pd2;
    struct ibv_comp_channel	       *comp_chan1, *comp_chan2;
    struct ibv_cq		       *cq1, *cq2;
    struct ibv_cq		       *evt_cq = NULL;
    struct ibv_mr		       *mr1, *mr2;
    struct ibv_qp_init_attr		qp_attr1 = { }, qp_attr2 = {};
    struct ibv_sge			sge;
    struct ibv_send_wr		send_wr = { };
    struct ibv_send_wr	       *bad_send_wr = NULL;
    struct ibv_wc			wc;
    struct ibv_qp			*qp1, *qp2;
    void			       *cq_context = NULL;
    union ibv_gid			gid1, gid2;

    int				n;

    uint8_t			       *buf1, *buf2;

    int				err;
    int 				num_devices;
    struct ibv_context	*	verbs1, *verbs2;
    struct ibv_device ** dev_list = ibv_get_device_list(&num_devices);
    struct ibv_device_attr		dev_attr;
    int use = 0;
    int port = 1;
    int x = 0;
    unsigned long mb = 0;
    unsigned long bytes = 0;
    unsigned long save_diff = 0;
    struct timeval start, stop, diff;
    int iterations = 0;

    struct rusage usage;
    struct timeval ustart, uend;
    struct timeval sstart, send;
    struct timeval tstart, tend;

    DPRINTF("There are %d devices\n", num_devices);

    for(x = 0; x < num_devices; x++) {
        printf("Device: %d, %s\n", x, ibv_get_device_name(dev_list[use]));
    }

    if(num_devices == 0 || dev_list == NULL) {
        printf("No devices found\n");
        return 1;
    }

    if(argc < 2) {
        printf("Which RDMA device to use? 0, 1, 2, 3...\n");
        return 1;
    }

    use = atoi(argv[1]);

    DPRINTF("Using device %d\n", use);

    verbs1 = ibv_open_device(dev_list[use]);

    if(verbs1 == NULL) {
        printf("Failed to open device!\n");
        return 1;
    }

    DPRINTF("Device open %s\n", ibv_get_device_name(dev_list[use]));

    verbs2 = ibv_open_device(dev_list[use]);

    if(verbs2 == NULL) {
        printf("Failed to open device again!\n");
        return 1;
    }

    if(ibv_query_device(verbs1, &dev_attr)) {
        printf("Failed to query device attributes.\n");
        return 1;
    }

    printf("Device open: %d, %s which has %d ports\n", x, ibv_get_device_name(dev_list[use]), dev_attr.phys_port_cnt);

    if(argc < 3) {
        printf("Which port on the device to use? 1, 2, 3...\n");
        return 1;
    }

    port = atoi(argv[2]);

    if(port <= 0) {
        printf("Port #%d invalid, must start with 1, 2, 3, ...\n", port);
        return 1;
    }

    printf("Using port %d\n", port);

    if(argc < 4) {
        printf("How many iterations to perform?\n");
        return 1;
    }

    iterations = atoi(argv[3]);
    printf("Will perform %d iterations\n", iterations);

    pd1 = ibv_alloc_pd(verbs1);
    if (!pd1)
        return 1;

    if(argc < 5) {
        printf("How many megabytes to allocate? (This will be allocated twice. Once for source, once for destination.)\n");
        return 1;
    }

    mb = atoi(argv[4]);

    if(mb <= 0) {
        printf("Megabytes %lu invalid\n", mb);
        return 1;
    }

    DPRINTF("protection domain1 allocated\n");

    pd2 = ibv_alloc_pd(verbs2);
    if (!pd2)
        return 1;

    DPRINTF("protection domain2 allocated\n");

    comp_chan1 = ibv_create_comp_channel(verbs1);
    if (!comp_chan1)
        return 1;

    DPRINTF("completion chan1 created\n");

    comp_chan2 = ibv_create_comp_channel(verbs2);
    if (!comp_chan2)
        return 1;

    DPRINTF("completion chan2 created\n");

    cq1 = ibv_create_cq(verbs1, 2, NULL, comp_chan1, 0);
    if (!cq1)
        return 1;

    DPRINTF("CQ1 created\n");

    cq2 = ibv_create_cq(verbs2, 2, NULL, comp_chan2, 0);
    if (!cq2)
        return 1;

    DPRINTF("CQ2 created\n");

    bytes = mb * 1024UL * 1024UL;

    buf1 = malloc(bytes);
    if (!buf1)
        return 1;

    buf2 = malloc(bytes);
    if (!buf2)
        return 1;

    printf("Populating %lu MB memory.\n", mb * 2);

    for(x = 0; x < bytes; x++) {
        buf1[x] = 123;
    }

    buf1[bytes - 1] = 123;

    mr1 = ibv_reg_mr(pd1, buf1, bytes, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ);
    if (!mr1) {
        printf("Failed to register memory.\n");
        return 1;
    }

    mr2 = ibv_reg_mr(pd2, buf2, bytes, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ);
    if (!mr2) {
        printf("Failed to register memory.\n");
        return 1;
    }

    DPRINTF("memory registered.\n");

    qp_attr1.cap.max_send_wr	 = 10;
    qp_attr1.cap.max_send_sge = 10;
    qp_attr1.cap.max_recv_wr	 = 10;
    qp_attr1.cap.max_recv_sge = 10;
    qp_attr1.sq_sig_all = 1;

    qp_attr1.send_cq		 = cq1;
    qp_attr1.recv_cq		 = cq1;

    qp_attr1.qp_type		 = IBV_QPT_RC;

    qp1 = ibv_create_qp(pd1, &qp_attr1);
    if (!qp1) {
        printf("failed to create queue pair #1\n");
        return 1;
    }

    DPRINTF("queue pair1 created\n");

    qp_attr2.cap.max_send_wr	 = 10;
    qp_attr2.cap.max_send_sge = 10;
    qp_attr2.cap.max_recv_wr	 = 10;
    qp_attr2.cap.max_recv_sge = 10;
    qp_attr2.sq_sig_all = 1;

    qp_attr2.send_cq		 = cq2;
    qp_attr2.recv_cq		 = cq2;

    qp_attr2.qp_type		 = IBV_QPT_RC;


    qp2 = ibv_create_qp(pd2, &qp_attr2);
    if (!qp2) {
        printf("failed to create queue pair #2\n");
        return 1;
    }

    DPRINTF("queue pair2 created\n");

    struct ibv_qp_attr attr1 = {
        .qp_state = IBV_QPS_INIT,
        .pkey_index = 0,
        .port_num = port,
        .qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_LOCAL_WRITE,
    };

    if(ibv_modify_qp(qp1, &attr1,
                     IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) {
        printf("verbs 1 Failed to go to init\n");
        return 1;
    }

    DPRINTF("verbs1 to init\n");

    struct ibv_qp_attr attr2 = {
        .qp_state = IBV_QPS_INIT,
        .pkey_index = 0,
        .port_num = port,
        .qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_LOCAL_WRITE,
    };

    if(ibv_modify_qp(qp2, &attr2,
                     IBV_QP_STATE |
                     IBV_QP_PKEY_INDEX |
                     IBV_QP_PORT |
                     IBV_QP_ACCESS_FLAGS)) {
        printf("verbs 2 Failed to go to init\n");
        return 1;
    }

    DPRINTF("verbs2 to init\n");

    //struct ibv_gid gid1, gid2;
    struct ibv_port_attr port1, port2;
    uint64_t psn1 = lrand48() & 0xffffff;
    uint64_t psn2 = lrand48() & 0xffffff;

    if(ibv_query_port(verbs1, port, &port1))
        return 1;

    DPRINTF("got port1 information\n");

    if(ibv_query_port(verbs2, port, &port2))
        return 1;

    DPRINTF("got port2 information\n");

    if(ibv_query_gid(verbs1, 1, 0, &gid1))
        return 1;
    DPRINTF("got gid1 information\n");

    if(ibv_query_gid(verbs2, 1, 0, &gid2))
        return 1;

    DPRINTF("got gid2 information\n");

    struct ibv_qp_attr next2 = {
        .qp_state = IBV_QPS_RTR,
        .path_mtu = IBV_MTU_1024,
        .dest_qp_num = qp2->qp_num,
        .rq_psn = psn2,
        .max_dest_rd_atomic = 5,
        .min_rnr_timer = 12,
        .ah_attr = {
            .is_global = 0,
            .dlid = port2.lid,
            .sl = 0,
            .src_path_bits = 0,
            .port_num = port,
        }
    };

    if(gid2.global.interface_id) {
        next2.ah_attr.is_global = 1;
        next2.ah_attr.grh.hop_limit = 1;
        next2.ah_attr.grh.dgid = gid2;
        next2.ah_attr.grh.sgid_index = 0;
    }

    struct ibv_qp_attr next1 = {
        .qp_state = IBV_QPS_RTR,
        .path_mtu = IBV_MTU_1024,
        .dest_qp_num = qp1->qp_num,
        .rq_psn = psn1,
        .max_dest_rd_atomic = 1,
        .min_rnr_timer = 12,
        .ah_attr = {
            .is_global = 0,
            .dlid = port1.lid,
            .sl = 0,
            .src_path_bits = 0,
            .port_num = port,
        }
    };

    if(gid1.global.interface_id) {
        next1.ah_attr.is_global = 1;
        next1.ah_attr.grh.hop_limit = 1;
        next1.ah_attr.grh.dgid = gid1;
        next1.ah_attr.grh.sgid_index = 0;
    }

    if(ibv_modify_qp(qp2, &next1,
                     IBV_QP_STATE |
                     IBV_QP_AV |
                     IBV_QP_PATH_MTU |
                     IBV_QP_DEST_QPN |
                     IBV_QP_RQ_PSN |
                     IBV_QP_MAX_DEST_RD_ATOMIC |
                     IBV_QP_MIN_RNR_TIMER)) {
        printf("Failed to modify verbs2 to ready\n");
        return 1;
    }

    DPRINTF("verbs2 RTR\n");

    if(ibv_modify_qp(qp1, &next2,
                     IBV_QP_STATE |
                     IBV_QP_AV |
                     IBV_QP_PATH_MTU |
                     IBV_QP_DEST_QPN |
                     IBV_QP_RQ_PSN |
                     IBV_QP_MAX_DEST_RD_ATOMIC |
                     IBV_QP_MIN_RNR_TIMER)) {
        printf("Failed to modify verbs1 to ready\n");
        return 1;
    }

    DPRINTF("verbs1 RTR\n");

    next2.qp_state = IBV_QPS_RTS;
    next2.timeout = 14;
    next2.retry_cnt = 7;
    next2.rnr_retry = 7;
    next2.sq_psn = psn1;
    next2.max_rd_atomic = 1;

    if(ibv_modify_qp(qp1, &next2,
                     IBV_QP_STATE |
                     IBV_QP_TIMEOUT |
                     IBV_QP_RETRY_CNT |
                     IBV_QP_RNR_RETRY |
                     IBV_QP_SQ_PSN |
                     IBV_QP_MAX_QP_RD_ATOMIC)) {
        printf("Failed again to modify verbs1 to ready\n");
        return 1;
    }

    DPRINTF("verbs1 RTS\n");

    next1.qp_state = IBV_QPS_RTS;
    next1.timeout = 14;
    next1.retry_cnt = 7;
    next1.rnr_retry = 7;
    next1.sq_psn = psn2;
    next1.max_rd_atomic = 1;

    if(ibv_modify_qp(qp2, &next1,
                     IBV_QP_STATE |
                     IBV_QP_TIMEOUT |
                     IBV_QP_RETRY_CNT |
                     IBV_QP_RNR_RETRY |
                     IBV_QP_SQ_PSN |
                     IBV_QP_MAX_QP_RD_ATOMIC)) {
        printf("Failed again to modify verbs2 to ready\n");
        return 1;
    }

    DPRINTF("verbs2 RTS\n");

    printf("Performing RDMA first.\n");
    iterations = atoi(argv[3]);

    getrusage(RUSAGE_SELF, &usage);
    ustart = usage.ru_utime;
    sstart = usage.ru_stime;

    gettimeofday(&tstart, NULL);

    while(iterations-- > 0) {
        sge.addr   = (uintptr_t) buf1;
        sge.length = bytes;
        sge.lkey   = mr1->lkey;

        send_wr.wr_id		    = 1;
        send_wr.opcode		    = IBV_WR_RDMA_WRITE;
        send_wr.sg_list		    = &sge;
        send_wr.num_sge		    = 1;
        send_wr.send_flags          = IBV_SEND_SIGNALED;
        send_wr.wr.rdma.rkey 	    = mr2->rkey;
        send_wr.wr.rdma.remote_addr = (uint64_t) buf2;

        DPRINTF("Iterations left: %d\n", iterations);
        if (ibv_req_notify_cq(cq1, 0))
            return 1;

        DPRINTF("Submitting local RDMA\n");
        gettimeofday(&start, NULL);
        if (ibv_post_send(qp1, &send_wr, &bad_send_wr))
            return 1;

        DPRINTF("RDMA posted %p %p\n", &send_wr, bad_send_wr);

        DPRINTF("blocking...\n");
        if(ibv_get_cq_event(comp_chan1, &evt_cq, &cq_context)) {
            printf("failed to get CQ event\n");
            return 1;
        }
        gettimeofday(&stop, NULL);
        timersub(&stop, &start, &diff);

        DPRINTF("RDMA took: %lu us\n", diff.tv_usec);

        ibv_ack_cq_events(evt_cq, 1);

        DPRINTF("got event\n");

        n = ibv_poll_cq(cq1, 1, &wc);
        if (n > 0) {
            DPRINTF("return from poll: %lu\n", wc.wr_id);
            if (wc.status != IBV_WC_SUCCESS) {
                printf("poll failed %s\n", ibv_wc_status_str(wc.status));
                return 1;
            }

            if (wc.wr_id == 1) {
                DPRINTF("Finished %d bytes %d %d\n", n, buf1[bytes - 1], buf2[bytes - 1]);
            } else {
                printf("didn't find completion\n");
            }
        }

        if (n < 0) {
            printf("poll returned error\n");
            return 1;
        }

        DPRINTF("Poll returned %d bytes %d %d\n", n, buf1[0], buf2[0]);

    }

    gettimeofday(&tend, NULL);

    getrusage(RUSAGE_SELF, &usage);
    uend = usage.ru_utime;
    send = usage.ru_stime;

    save_diff = 0;
    timersub(&uend, &ustart, &diff);
    save_diff += diff.tv_usec;
    printf("User CPU time: %lu us\n", diff.tv_usec);
    timersub(&send, &sstart, &diff);
    save_diff += diff.tv_usec;
    printf("System CPU time: %lu us\n", diff.tv_usec);
    timersub(&tend, &tstart, &diff);
    printf("Sleeping time: %lu us\n", diff.tv_usec - save_diff);
    printf("Wall clock CPU time: %lu us\n", diff.tv_usec);

    iterations = atoi(argv[3]);

    printf("Now using the CPU instead....\n");

    getrusage(RUSAGE_SELF, &usage);
    ustart = usage.ru_utime;
    sstart = usage.ru_stime;

    gettimeofday(&tstart, NULL);

    while(iterations-- > 0) {
        DPRINTF("Repeating without RDMA...\n");

        gettimeofday(&start, NULL);

        memcpy(buf2, buf1, bytes);

        gettimeofday(&stop, NULL);
        timersub(&stop, &start, &diff);
        DPRINTF("Regular copy too took: %lu us\n", diff.tv_usec);
    }

    gettimeofday(&tend, NULL);

    getrusage(RUSAGE_SELF, &usage);
    uend = usage.ru_utime;
    send = usage.ru_stime;

    save_diff = 0;
    timersub(&uend, &ustart, &diff);
    save_diff += diff.tv_usec;
    printf("User CPU time: %lu us\n", diff.tv_usec);
    timersub(&send, &sstart, &diff);
    save_diff += diff.tv_usec;
    printf("System CPU time: %lu us\n", diff.tv_usec);
    timersub(&tend, &tstart, &diff);
    printf("Sleeping time: %lu us\n", diff.tv_usec - save_diff);
    printf("Wall clock CPU time: %lu us\n", diff.tv_usec);
    return 0;
}
예제 #18
0
파일: resource.c 프로젝트: tomykaira/IB
int
resource_create(resource_t *res, int ib_port, int myrank)
{
    struct ibv_device		**dev_list = NULL;
    struct ibv_qp_init_attr	qp_init_attr;
    struct ibv_device		*ib_dev = NULL;
    char	*dev_name = NULL;
    size_t	size;
    int		i;
    int		mr_flags = 0;
    int		cq_size = 0;
    int		dev_numm;
    int		rc = 0;

    /* Init structure */
    memset(res, 0, sizeof(resource_t));
    /* Get the device list */
    dev_list = ibv_get_device_list(&dev_numm);
    if(!dev_list) {
	fprintf(stderr, "[%d] failed to get IB devices list\n", myrank);
	return 1;
    }
    // if no device
    if(!dev_numm) {
	fprintf(stderr, "[%d] No IB device is found\n", myrank);
	rc = 1;
	goto err_exit;
    }
    DEBUG { printf("[%d] found %d IB device(s)\n", myrank, dev_numm); }
    /* Open the requested device */
    for(i = 0; i < dev_numm; i ++){
	dev_name = strdup(ibv_get_device_name(dev_list[i]));
	DEBUG { printf("[%d] IB device name: %s\n", myrank, dev_name); }
	ib_dev = dev_list[i];
	break;
    }
    if (!ib_dev){
	fprintf(stderr, "[%d] IB device %s wasn't found\n", myrank, dev_name);
	rc = 1;
	goto err_exit;
    }
    res->ib_ctx = ibv_open_device(ib_dev);
    DEBUG { printf("[%d] IB context = %lx\n", myrank, (uintptr_t)res->ib_ctx); }
    if(!res->ib_ctx){
	fprintf(stderr, "[%d] failed to open device %s\n", myrank, dev_name);
	rc = 1;
	goto err_exit;
    }
    // free device list
    ibv_free_device_list(dev_list);
    dev_list = NULL;
    ib_dev = NULL;
    // query prot properties
    if(ibv_query_port(res->ib_ctx, ib_port, &res->port_attr)){
	fprintf(stderr, "[%d] ibv_query_port on port %u failed\n", myrank, ib_port);
	rc = 1;
	goto err_exit;
    }

    /* Create a PD */
    res->pd = ibv_alloc_pd(res->ib_ctx);
    if (!res->pd){
	fprintf(stderr, "[%d] ibv_alloc_pd failed\n", myrank);
	rc = 1;
	goto err_exit;
    }

    /* Create send/recv CQ
     *  inputs:
     *		device handle
     *		CQ capacity
     *  Output:
     *		CQ handle
     */
    res->scq = ibv_create_cq(res->ib_ctx, MAX_CQ_CAPACITY, NULL, NULL, 0);
    res->rcq = ibv_create_cq(res->ib_ctx, MAX_CQ_CAPACITY, NULL, NULL, 0);
    if (!res->scq){
	fprintf(stderr, "[%d] failed to create SCQ with %u entries\n", myrank, cq_size);
	rc = 1;
	goto err_exit;
    }
    if (!res->rcq){
	fprintf(stderr, "[%d] failed to create SCQ with %u entries\n", myrank, cq_size);
	rc = 1;
	goto err_exit;
    }

    /* Allocate fix buffer */
    size = MAX_FIX_BUF_SIZE;
    res->buf_size = size;
    res->buf = (char *)malloc(size * sizeof(char));
    if (!res->buf ){
	fprintf(stderr, "[%d] failed to malloc %Zu bytes to memory buffer\n", myrank, size);
	rc = 1;
	goto err_exit;
    }
    memset(res->buf, 0 , size);

    /* Memory Region
     *	inputs:
     *		device handle
     *		PD
     *		Virtual Addr(addr of MR)
     *		Access Ctrl: LocalWrite, RemoteRead, RemoteWrite, RemoteAtomicOp, MemWindowBinding
     *	outputs:
     *		MR handle
     *		L_Key
     *		R_Key
     */
    res->mr_list = malloc(sizeof(struct ibv_mr*) * MAX_MR_NUM);
    res->mr_size = 1;

    mr_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ |
		IBV_ACCESS_REMOTE_WRITE ;
    res->mr_list[0] = ibv_reg_mr(res->pd, res->buf, size, mr_flags);
    if (!res->mr_list[0]){
	fprintf(stderr, "[%d] ibv_reg_mr failed with mr_flags=0x%x\n", myrank, mr_flags);
	rc = 1;
	goto err_exit;
    }
    DEBUG { printf("[%d] fixed MR was registered with addr=%p, lkey=0x%x, rkey=0x%x, flags=0x%x\n", myrank, res->buf, res->mr_list[0]->lkey, res->mr_list[0]->rkey, mr_flags); }

    /* Create QP */
    // inputs:
    //	PD
    //	CQs for SQ,RQ
    //	capacity of SQ,RQ
    // Outputs:
    //	QP handle
    memset(&qp_init_attr, 0, sizeof(qp_init_attr));
    qp_init_attr.qp_type = IBV_QPT_RC;
    qp_init_attr.sq_sig_all = 1;
    qp_init_attr.send_cq = res->scq;
    qp_init_attr.recv_cq = res->rcq;
    // max SR/RR num in SQ/RQ
    qp_init_attr.cap.max_send_wr = MAX_SQ_CAPACITY ;
    qp_init_attr.cap.max_recv_wr = MAX_RQ_CAPACITY;
    // max SGE num
    qp_init_attr.cap.max_send_sge = MAX_SGE_CAPACITY;
    qp_init_attr.cap.max_recv_sge = MAX_SGE_CAPACITY;
    qp_init_attr.cap.max_inline_data = 256;

    res->qp = ibv_create_qp(res->pd, &qp_init_attr);
    if (!res->qp){
	fprintf(stderr, "failed to create QP\n");
	rc = 1;
	goto err_exit;
    }
    DEBUG { printf("[%d] QP was created, QP number=0x%x\n", myrank, res->qp->qp_num); }

    /* EXIT */
err_exit:
    if(rc){
	/* Error encountered, cleanup */
	if(res->qp){
	    ibv_destroy_qp(res->qp);
	    res->qp = NULL;
	}
	if(res->mr_list && res->mr_size > 0){
	    int i;
	    for(i=0; i<res->mr_size; i++){
		ibv_dereg_mr(res->mr_list[i]);
		res->mr_list[i] = NULL;
	    }
	    free(res->mr_list);
	}
	if(res->buf){
	    free(res->buf);
	    res->buf = NULL;
	}
	if(res->scq){
	    ibv_destroy_cq(res->scq);
	    res->scq = NULL;
	}
	if(res->rcq){
	    ibv_destroy_cq(res->rcq);
	    res->rcq = NULL;
	}
	if(res->comp_ch){
	    ibv_destroy_comp_channel(res->comp_ch);
	    res->comp_ch = NULL;
	}
	if(res->pd){
	    ibv_dealloc_pd(res->pd);
	    res->pd = NULL;
	}
	if (res->ib_ctx) {
	    ibv_close_device(res->ib_ctx);
	    res->ib_ctx = NULL;
	}
	if (dev_list) {
	    ibv_free_device_list(dev_list);
	    dev_list = NULL;
	}
    }
    return rc;
}
예제 #19
0
int __ibv_query_port_1_0(struct ibv_context_1_0 *context, uint8_t port_num,
			 struct ibv_port_attr *port_attr)
{
	return ibv_query_port(context->real_context, port_num, port_attr);
}
예제 #20
0
static int init_device(struct ibv_context *context_arg,
                       struct mca_btl_openib_sa_qp_cache *cache,
                       uint32_t port_num)
{
    struct ibv_ah_attr aattr;
    struct ibv_port_attr pattr;
    int rc;

    cache->context = ibv_open_device(context_arg->device);
    if (NULL == cache->context) {
        BTL_ERROR(("error obtaining device context for %s errno says %s",
                    ibv_get_device_name(context_arg->device), strerror(errno)));
        return OPAL_ERROR;
    }
    cache->device_name = strdup(ibv_get_device_name(cache->context->device));
    cache->port_num = port_num;

    /* init all sl_values to be SL_NOT_PRESENT */
    memset(&cache->sl_values, SL_NOT_PRESENT, sizeof(cache->sl_values));

    cache->next = sa_qp_cache;
    sa_qp_cache = cache;

    /* allocate the protection domain for the device */
    cache->pd = ibv_alloc_pd(cache->context);
    if (NULL == cache->pd) {
        BTL_ERROR(("error allocating protection domain for %s errno says %s",
                    ibv_get_device_name(context_arg->device), strerror(errno)));
        return OPAL_ERROR;
    }

    /* register memory region */
    cache->mr = ibv_reg_mr(cache->pd, cache->send_recv_buffer,
            sizeof(cache->send_recv_buffer),
            IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);
    if (NULL == cache->mr) {
        BTL_ERROR(("error registering memory region, errno says %s", strerror(errno)));
        return OPAL_ERROR;
    }

    /* init the ud qp */
    rc = init_ud_qp(context_arg, cache);
    if (OPAL_ERROR == rc) {
        return OPAL_ERROR;
    }

    rc = ibv_query_port(cache->context, cache->port_num, &pattr);
    if (rc) {
        BTL_ERROR(("error getting port attributes for device %s "
                    "port number %d errno says %s",
                    ibv_get_device_name(context_arg->device),
                    cache->port_num, strerror(errno)));
        return OPAL_ERROR;
    }

    /* create address handle  */
    memset(&aattr, 0, sizeof(aattr));
    aattr.dlid = pattr.sm_lid;
    aattr.sl = pattr.sm_sl;
    aattr.port_num = cache->port_num;
    cache->ah = ibv_create_ah(cache->pd, &aattr);
    if (NULL == cache->ah) {
        BTL_ERROR(("error creating address handle: %s", strerror(errno)));
        return OPAL_ERROR;
    }

    memset(&(cache->rwr), 0, sizeof(cache->rwr));
    cache->rwr.num_sge = 1;
    cache->rwr.sg_list = &(cache->rsge);
    memset(&(cache->rsge), 0, sizeof(cache->rsge));
    cache->rsge.addr = (uint64_t)(void *)
        (cache->send_recv_buffer + MAD_BLOCK_SIZE);
    cache->rsge.length = MAD_BLOCK_SIZE + 40;
    cache->rsge.lkey = cache->mr->lkey;

    return 0;
}
예제 #21
0
int set_up_connection(struct pingpong_context *ctx,
		struct perftest_parameters *user_param,
		struct pingpong_dest *my_dest)
{
	int num_of_qps = user_param->num_of_qps;
	int num_of_qps_per_port = user_param->num_of_qps / 2;

	int i;
	int is_ipv4;

	union ibv_gid temp_gid;
	union ibv_gid temp_gid2;
	struct ibv_port_attr attr;

	srand48(getpid() * time(NULL));

	/*in xrc with bidirectional,
	there are send qps and recv qps. the actual number of send/recv qps
	is num_of_qps / 2.
	*/
	if ( (user_param->connection_type == DC || user_param->use_xrc) && (user_param->duplex || user_param->tst == LAT)) {
		num_of_qps /= 2;
		num_of_qps_per_port = num_of_qps / 2;
	}

	if (user_param->gid_index != -1) {
		if (ibv_query_port(ctx->context,user_param->ib_port,&attr))
			return 0;

		if (user_param->use_gid_user) {
			if (ibv_query_gid(ctx->context,user_param->ib_port,user_param->gid_index,&temp_gid)) {
				return -1;
			}
		} else {
			for (i=0 ; i < attr.gid_tbl_len; i++) {
				if (ibv_query_gid(ctx->context,user_param->ib_port,i,&temp_gid)) {	
					return -1;
				}
				is_ipv4 = ipv6_addr_v4mapped((struct in6_addr *)temp_gid.raw);
				if ((user_param->ipv6 && !is_ipv4) || (!user_param->ipv6 && is_ipv4)) {
					user_param->gid_index = i;
					break;
				}
			}
		}
	}

	if (user_param->dualport==ON) {
		if (user_param->gid_index2 != -1) {
			if (ibv_query_port(ctx->context,user_param->ib_port2,&attr))
				return 0;

			if (user_param->use_gid_user) {
				if (ibv_query_gid(ctx->context,user_param->ib_port2,user_param->gid_index,&temp_gid2))
					return -1;
			} else {
				for (i=0 ; i < attr.gid_tbl_len; i++) {
					if (ibv_query_gid(ctx->context,user_param->ib_port2,i,&temp_gid2)) {
						return -1;
					}
					is_ipv4 = ipv6_addr_v4mapped((struct in6_addr *)temp_gid2.raw);
					if ((user_param->ipv6 && !is_ipv4) || (!user_param->ipv6 && is_ipv4)) {
						user_param->gid_index = i;
						break;
					}
				}
			}
		}
	}

	for (i = 0; i < user_param->num_of_qps; i++) {

		if (user_param->dualport == ON) {
			/*first half of qps are for ib_port and second half are for ib_port2
			in xrc with bidirectional, the first half of qps are xrc_send qps and
			the second half are xrc_recv qps. the first half of the send/recv qps
			are for ib_port1 and the second half are for ib_port2
			*/
			if (i % num_of_qps < num_of_qps_per_port) {
				my_dest[i].lid   = ctx_get_local_lid(ctx->context,user_param->ib_port);
				my_dest[i].gid_index = user_param->gid_index;
			} else {
				my_dest[i].lid   = ctx_get_local_lid(ctx->context,user_param->ib_port2);
				my_dest[i].gid_index = user_param->gid_index2;
			}
			/*single-port case*/
		} else {
			my_dest[i].lid   = ctx_get_local_lid(ctx->context,user_param->ib_port);
			my_dest[i].gid_index = user_param->gid_index;
		}

		my_dest[i].qpn   = ctx->qp[i]->qp_num;
		my_dest[i].psn   = lrand48() & 0xffffff;
		my_dest[i].rkey  = ctx->mr->rkey;

		/* Each qp gives his receive buffer address.*/
		my_dest[i].out_reads = user_param->out_reads;
		my_dest[i].vaddr = (uintptr_t)ctx->buf + (user_param->num_of_qps + i)*BUFF_SIZE(ctx->size,ctx->cycle_buffer);

		if (user_param->dualport==ON) {

			if (i % num_of_qps < num_of_qps_per_port)
				memcpy(my_dest[i].gid.raw,temp_gid.raw ,16);

			else
				memcpy(my_dest[i].gid.raw,temp_gid2.raw ,16);
		} else {
			memcpy(my_dest[i].gid.raw,temp_gid.raw ,16);
		}

		/*
		We do not fail test upon lid above RoCE.
		if ( (user_param->gid_index < 0) ||  ((user_param->gid_index2 < 0) && (user_param->dualport == ON))  ){
			if (!my_dest[i].lid) {
				fprintf(stderr," Local lid 0x0 detected. Is an SM running? \n");
				return -1;
			}
		}
		*/
	}

	#ifdef HAVE_XRCD
	if (user_param->use_xrc) {
		for (i=0; i < user_param->num_of_qps; i++) {
			if (ibv_get_srq_num(ctx->srq,&(my_dest[i].srqn))) {
				fprintf(stderr, "Couldn't get SRQ number\n");
				return 1;
			}
		}
	}
	#endif

	#ifdef HAVE_DC
	if(user_param->machine == SERVER || user_param->duplex || user_param->tst == LAT) {
		if (user_param->connection_type == DC) {
			for (i=0; i < user_param->num_of_qps; i++) {
				if (ibv_get_srq_num(ctx->srq, &(my_dest[i].srqn))) {
					fprintf(stderr, "Couldn't get SRQ number\n");
					return 1;
				}
			}
		}
	}
	#endif
	return 0;
}
예제 #22
0
int __ibv_query_port_1_0(struct ibv_context_1_0 *context, uint8_t port_num,
			 struct ibv_port_attr *port_attr)
{  fprintf(stderr, "%s:%s:%d \n", __func__, __FILE__, __LINE__);
	return ibv_query_port(context->real_context, port_num, port_attr);
}
예제 #23
0
파일: mlx5_glue.c 프로젝트: cjdoucette/dpdk
static int
mlx5_glue_query_port(struct ibv_context *context, uint8_t port_num,
		     struct ibv_port_attr *port_attr)
{
	return ibv_query_port(context, port_num, port_attr);
}
예제 #24
0
파일: opasadb_path.c 프로젝트: 01org/opa-ff
/*
 * Opens the interface to the opp module.
 * Must be called before any use of the path functions.
 *
 * device		The verbs context for the HFI.
 *				Can be acquired via op_path_find_hfi.
 *
 * port_num		The port to use for sending queries.
 *
 * This information is used for querying pkeys and 
 * calculating timeouts.
 *
 * Returns a pointer to the op_path context on success, or returns NULL 
 * and sets errno if the device could not be opened.
 */
void *
op_path_open(struct ibv_device *device, int p)
{
	int         i, err;
	struct op_path_context *context;

	if (!device) {
		errno=ENXIO;
		return NULL;
	}

	context = malloc(sizeof(struct op_path_context));
	if (!context) {
		errno=ENOMEM;
		return NULL;
	}
	memset(context,0,sizeof(struct op_path_context));

	context->ibv_context = ibv_open_device(device);
	if (!context->ibv_context) {
		errno=ENODEV;
		goto open_device_failed;
	}

	context->port_num = p;

	context->reader = op_ppath_allocate_reader();
	if (!context->reader) {
		errno=ENOMEM;
		goto alloc_reader_failed;
	}

	err = op_ppath_create_reader(context->reader);
	if (err) {
		errno=err;
		goto create_reader_failed;
	}

	if ((err=ibv_query_device(context->ibv_context,
                         &(context->device_attr)))) {
		errno=EFAULT;
		goto query_attr_failed;
	}

	if ((err=ibv_query_port(context->ibv_context,
                       context->port_num,
                       &(context->port_attr)))) {
		errno=EFAULT;
		goto query_attr_failed;
	}

	context->pkey_table = malloc(context->device_attr.max_pkeys* sizeof(int));     
	if (!context->pkey_table) {
		errno= ENOMEM;
		goto query_attr_failed;
	}
	memset(context->pkey_table,0,context->device_attr.max_pkeys* sizeof(int));

	for (i = 0, err = 0; !err && i<context->device_attr.max_pkeys; i++) {
		err = ibv_query_pkey(context->ibv_context, context->port_num, i,
                             &(context->pkey_table[i]));
		if (err) {
			errno=EFAULT;
			goto query_pkey_failed;
		}
	}

	return context;

query_pkey_failed:
	free(context->pkey_table);
query_attr_failed:
	op_ppath_close_reader(context->reader);
create_reader_failed:
	free(context->reader);
alloc_reader_failed:
	ibv_close_device(context->ibv_context);
open_device_failed:
	free(context);
	return NULL;
}
예제 #25
0
파일: mlx5.c 프로젝트: goby/dpdk
/**
 * DPDK callback to register a PCI device.
 *
 * This function creates an Ethernet device for each port of a given
 * PCI device.
 *
 * @param[in] pci_drv
 *   PCI driver structure (mlx5_driver).
 * @param[in] pci_dev
 *   PCI device information.
 *
 * @return
 *   0 on success, negative errno value on failure.
 */
static int
mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
{
	struct ibv_device **list;
	struct ibv_device *ibv_dev;
	int err = 0;
	struct ibv_context *attr_ctx = NULL;
	struct ibv_device_attr device_attr;
	unsigned int vf;
	int idx;
	int i;

	(void)pci_drv;
	assert(pci_drv == &mlx5_driver.pci_drv);
	/* Get mlx5_dev[] index. */
	idx = mlx5_dev_idx(&pci_dev->addr);
	if (idx == -1) {
		ERROR("this driver cannot support any more adapters");
		return -ENOMEM;
	}
	DEBUG("using driver device index %d", idx);

	/* Save PCI address. */
	mlx5_dev[idx].pci_addr = pci_dev->addr;
	list = ibv_get_device_list(&i);
	if (list == NULL) {
		assert(errno);
		if (errno == ENOSYS) {
			WARN("cannot list devices, is ib_uverbs loaded?");
			return 0;
		}
		return -errno;
	}
	assert(i >= 0);
	/*
	 * For each listed device, check related sysfs entry against
	 * the provided PCI ID.
	 */
	while (i != 0) {
		struct rte_pci_addr pci_addr;

		--i;
		DEBUG("checking device \"%s\"", list[i]->name);
		if (mlx5_ibv_device_to_pci_addr(list[i], &pci_addr))
			continue;
		if ((pci_dev->addr.domain != pci_addr.domain) ||
		    (pci_dev->addr.bus != pci_addr.bus) ||
		    (pci_dev->addr.devid != pci_addr.devid) ||
		    (pci_dev->addr.function != pci_addr.function))
			continue;
		vf = ((pci_dev->id.device_id ==
		       PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) ||
		      (pci_dev->id.device_id ==
		       PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF));
		INFO("PCI information matches, using device \"%s\" (VF: %s)",
		     list[i]->name, (vf ? "true" : "false"));
		attr_ctx = ibv_open_device(list[i]);
		err = errno;
		break;
	}
	if (attr_ctx == NULL) {
		ibv_free_device_list(list);
		switch (err) {
		case 0:
			WARN("cannot access device, is mlx5_ib loaded?");
			return 0;
		case EINVAL:
			WARN("cannot use device, are drivers up to date?");
			return 0;
		}
		assert(err > 0);
		return -err;
	}
	ibv_dev = list[i];

	DEBUG("device opened");
	if (ibv_query_device(attr_ctx, &device_attr))
		goto error;
	INFO("%u port(s) detected", device_attr.phys_port_cnt);

	for (i = 0; i < device_attr.phys_port_cnt; i++) {
		uint32_t port = i + 1; /* ports are indexed from one */
		uint32_t test = (1 << i);
		struct ibv_context *ctx = NULL;
		struct ibv_port_attr port_attr;
		struct ibv_pd *pd = NULL;
		struct priv *priv = NULL;
		struct rte_eth_dev *eth_dev;
#ifdef HAVE_EXP_QUERY_DEVICE
		struct ibv_exp_device_attr exp_device_attr;
#endif /* HAVE_EXP_QUERY_DEVICE */
		struct ether_addr mac;

#ifdef HAVE_EXP_QUERY_DEVICE
		exp_device_attr.comp_mask =
			IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS |
			IBV_EXP_DEVICE_ATTR_RX_HASH;
#endif /* HAVE_EXP_QUERY_DEVICE */

		DEBUG("using port %u (%08" PRIx32 ")", port, test);

		ctx = ibv_open_device(ibv_dev);
		if (ctx == NULL)
			goto port_error;

		/* Check port status. */
		err = ibv_query_port(ctx, port, &port_attr);
		if (err) {
			ERROR("port query failed: %s", strerror(err));
			goto port_error;
		}
		if (port_attr.state != IBV_PORT_ACTIVE)
			DEBUG("port %d is not active: \"%s\" (%d)",
			      port, ibv_port_state_str(port_attr.state),
			      port_attr.state);

		/* Allocate protection domain. */
		pd = ibv_alloc_pd(ctx);
		if (pd == NULL) {
			ERROR("PD allocation failure");
			err = ENOMEM;
			goto port_error;
		}

		mlx5_dev[idx].ports |= test;

		/* from rte_ethdev.c */
		priv = rte_zmalloc("ethdev private structure",
				   sizeof(*priv),
				   RTE_CACHE_LINE_SIZE);
		if (priv == NULL) {
			ERROR("priv allocation failure");
			err = ENOMEM;
			goto port_error;
		}

		priv->ctx = ctx;
		priv->device_attr = device_attr;
		priv->port = port;
		priv->pd = pd;
		priv->mtu = ETHER_MTU;
#ifdef HAVE_EXP_QUERY_DEVICE
		if (ibv_exp_query_device(ctx, &exp_device_attr)) {
			ERROR("ibv_exp_query_device() failed");
			goto port_error;
		}

		priv->hw_csum =
			((exp_device_attr.exp_device_cap_flags &
			  IBV_EXP_DEVICE_RX_CSUM_TCP_UDP_PKT) &&
			 (exp_device_attr.exp_device_cap_flags &
			  IBV_EXP_DEVICE_RX_CSUM_IP_PKT));
		DEBUG("checksum offloading is %ssupported",
		      (priv->hw_csum ? "" : "not "));

		priv->hw_csum_l2tun = !!(exp_device_attr.exp_device_cap_flags &
					 IBV_EXP_DEVICE_VXLAN_SUPPORT);
		DEBUG("L2 tunnel checksum offloads are %ssupported",
		      (priv->hw_csum_l2tun ? "" : "not "));

		priv->ind_table_max_size = exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size;
		DEBUG("maximum RX indirection table size is %u",
		      priv->ind_table_max_size);

#else /* HAVE_EXP_QUERY_DEVICE */
		priv->ind_table_max_size = RSS_INDIRECTION_TABLE_SIZE;
#endif /* HAVE_EXP_QUERY_DEVICE */

		priv->vf = vf;
		/* Allocate and register default RSS hash keys. */
		priv->rss_conf = rte_calloc(__func__, hash_rxq_init_n,
					    sizeof((*priv->rss_conf)[0]), 0);
		if (priv->rss_conf == NULL) {
			err = ENOMEM;
			goto port_error;
		}
		err = rss_hash_rss_conf_new_key(priv,
						rss_hash_default_key,
						rss_hash_default_key_len,
						ETH_RSS_PROTO_MASK);
		if (err)
			goto port_error;
		/* Configure the first MAC address by default. */
		if (priv_get_mac(priv, &mac.addr_bytes)) {
			ERROR("cannot get MAC address, is mlx5_en loaded?"
			      " (errno: %s)", strerror(errno));
			goto port_error;
		}
		INFO("port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x",
		     priv->port,
		     mac.addr_bytes[0], mac.addr_bytes[1],
		     mac.addr_bytes[2], mac.addr_bytes[3],
		     mac.addr_bytes[4], mac.addr_bytes[5]);
		/* Register MAC and broadcast addresses. */
		claim_zero(priv_mac_addr_add(priv, 0,
					     (const uint8_t (*)[ETHER_ADDR_LEN])
					     mac.addr_bytes));
		claim_zero(priv_mac_addr_add(priv, (RTE_DIM(priv->mac) - 1),
					     &(const uint8_t [ETHER_ADDR_LEN])
					     { "\xff\xff\xff\xff\xff\xff" }));
#ifndef NDEBUG
		{
			char ifname[IF_NAMESIZE];

			if (priv_get_ifname(priv, &ifname) == 0)
				DEBUG("port %u ifname is \"%s\"",
				      priv->port, ifname);
			else
				DEBUG("port %u ifname is unknown", priv->port);
		}
#endif
		/* Get actual MTU if possible. */
		priv_get_mtu(priv, &priv->mtu);
		DEBUG("port %u MTU is %u", priv->port, priv->mtu);

		/* from rte_ethdev.c */
		{
			char name[RTE_ETH_NAME_MAX_LEN];

			snprintf(name, sizeof(name), "%s port %u",
				 ibv_get_device_name(ibv_dev), port);
			eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_PCI);
		}
		if (eth_dev == NULL) {
			ERROR("can not allocate rte ethdev");
			err = ENOMEM;
			goto port_error;
		}

		eth_dev->data->dev_private = priv;
		eth_dev->pci_dev = pci_dev;
		eth_dev->driver = &mlx5_driver;
		eth_dev->data->rx_mbuf_alloc_failed = 0;
		eth_dev->data->mtu = ETHER_MTU;

		priv->dev = eth_dev;
		eth_dev->dev_ops = &mlx5_dev_ops;
		eth_dev->data->mac_addrs = priv->mac;
		TAILQ_INIT(&eth_dev->link_intr_cbs);

		/* Bring Ethernet device up. */
		DEBUG("forcing Ethernet interface up");
		priv_set_flags(priv, ~IFF_UP, IFF_UP);
		continue;

port_error:
		rte_free(priv->rss_conf);
		rte_free(priv);
		if (pd)
			claim_zero(ibv_dealloc_pd(pd));
		if (ctx)
			claim_zero(ibv_close_device(ctx));
		break;
	}
예제 #26
0
파일: ibv.c 프로젝트: carriercomm/ix
/* Initialize the actual IB device */
int initIB(ArgStruct *p)
{
  int i, j, ret;
  char *tmp;
  int num_devices = 0;
  struct ibv_device **hca_list, **filtered_hca_list;
  struct ibv_device_attr hca_attr;
#if !HAVE_IBV_DEVICE_LIST
  struct dlist *hca_dlist; 
  struct ibv_device* hca_device; 
#endif

  /* Find all the devices on this host */
#if HAVE_IBV_DEVICE_LIST
  hca_list = ibv_get_device_list(&num_devices);
#else
  hca_dlist = ibv_get_devices();
  dlist_start(hca_dlist); 
  dlist_for_each_data(hca_dlist, hca_device, struct ibv_device)
    ++num_devices;
#endif

  /* If we didn't find any, return an error */
  if (0 == num_devices) {
      fprintf(stderr, "Couldn't find any IBV devices\n");
      return -1;
  }
  
#if !HAVE_IBV_DEVICE_LIST
  /* If we have the old version (ibv_get_devices()), convert it to
     the new form */
  hca_list = (struct ibv_device**) malloc(num_devices * 
                                          sizeof(struct ibv_device*));
  if (NULL == hca_list) {
      fprintf(stderr, "%s:%s:%d: malloc failed\n", __FILE__,
              __func__, __LINE__);
      return -1;
  }
  
  i = 0; 
  dlist_start(hca_dlist); 
  dlist_for_each_data(hca_dlist, hca_device, struct ibv_device)
      hca_list[i++] = hca_device;
#endif    

  /* Possible values for p->prot.device_and_port:

     1. <device>:<port> -- use only this device and only this port
     2. <device> -- use the first active port on this device
     3. :<port> -- use only this port, but on any device

     <device> names are matched exactly.
  */

  /* If a device name was specified on the command line, see if we can
     find it */
  tmp = NULL;
  port_num = -1;
  filtered_hca_list = hca_list;
  if (NULL != p->prot.device_and_port) {
      /* If there's a : in the string, then we have a port */
      tmp = strchr(p->prot.device_and_port, ':');
      if (NULL != tmp) {
          *tmp = '\0';
          ++tmp;
          port_num = atoi(tmp);
      }
      LOGPRINTF(("Pre-filter: looking for target device \"%s\", port %d",
                 p->prot.device_and_port, port_num));

      /* If the length of the device string left is >0, then there's a
         device specification */
      if (strlen(p->prot.device_and_port) > 0) {
          int found = 0;

          /* Loop through all the devices and find a matching
             name */
          for (i = 0; i < num_devices; ++i) {
              LOGPRINTF(("Pre-filter: found device: %s",
                         ibv_get_device_name(hca_list[i])));
              if (0 == strcmp(p->prot.device_and_port, 
                              ibv_get_device_name(hca_list[i]))) {
                  LOGPRINTF(("Pre-filter: found target device: %s (%d of %d)",
                             p->prot.device_and_port, i, num_devices));
                  filtered_hca_list = &(hca_list[i]);
                  num_devices = 1;
                  found = 1;
                  break;
              }
          }

          /* If we didn't find it, abort */
          if (!found) {
              fprintf(stderr, "Unable to find device \"%s\", aborting\n",
                      p->prot.device_and_port);
              return -1;
          }
      }
  }

  /* Traverse the filtered HCA list and find a good port */
  for (hca = NULL, i = 0; NULL == hca && i < num_devices; ++i) {

      /* Get a ibv_context from the ibv_device  */
      ctx = ibv_open_device(filtered_hca_list[i]);
      if (!ctx) {
          fprintf(stderr, "Couldn't create IBV context\n");
          return -1;
      } else {
          LOGPRINTF(("Found HCA %s",
                     ibv_get_device_name(filtered_hca_list[i])));
      }
      
      /* Get the device attributes */
      if (0 != ibv_query_device(ctx, &hca_attr)) {
          fprintf(stderr, "Could not get device context for %s, aborting\n",
                  ibv_get_device_name(hca));
          return -1;
      }

      for (j = 1; j <= hca_attr.phys_port_cnt; ++j) {
          /* If a specific port was asked for, *only* look at that port */
          if (port_num >= 0 && port_num != j) {
              continue;
          }
          LOGPRINTF(("Checking %s:%d...", 
                     ibv_get_device_name(filtered_hca_list[i]), j));

          /* Query this port and see if it's active */
          if (0 != ibv_query_port(ctx, j, &hca_port)) {
              fprintf(stderr, "Unable to query port %s:%d, aborting\n",
                      ibv_get_device_name(filtered_hca_list[i]), j);
              return -1;
          }

          /* If this port is active, we have a winner! */
          if (IBV_PORT_ACTIVE == hca_port.state) {
              LOGPRINTF(("%s:%d is ACTIVE", 
                         ibv_get_device_name(filtered_hca_list[i]), j));
              port_num = j;
              hca = filtered_hca_list[i];
              break;
          }
      }

      /* If we found one, we're done */
      if (hca) {
          break;
      }

      /* Otherwise, close the device (ignore any errors) */
      ibv_close_device(ctx);
      ctx = NULL;
  }

  /* If we didn't find a good device/port combo, abort */
  if (NULL == hca) {
      fprintf(stderr, "Could not find an active device and port, aborting\n");
      return -1;
  }

  /* free up the other devices in the event we would have multiple ib
     devices. if this isnt done, the device pointers will still be
     around in space somewhere -> bad */

#if HAVE_IBV_DEVICE_LIST
  ibv_free_device_list(hca_list); 
#endif
  
  /* Get HCA properties */
  
  lid = hca_port.lid;		/* local id, used to ref back to the device */
  LOGPRINTF(("  lid = %d", lid));


  /* Allocate Protection Domain */
	/* need a Protection domain to handle/register memory over the card */
  pd_hndl = ibv_alloc_pd(ctx);	
  if(!pd_hndl) {
    fprintf(stderr, "Error allocating PD\n");
    return -1;
  } else {
    LOGPRINTF(("Allocated Protection Domain"));
  }


  /* Create send completion queue */
  
  num_cqe = 30000; /* Requested number of completion q elements */
  s_cq_hndl = ibv_create_cq(ctx, num_cqe, NULL, NULL, 0);
  if(!s_cq_hndl) {
    fprintf(stderr, "Error creating send CQ\n");
    return -1;
  } else {
    act_num_cqe = s_cq_hndl->cqe;
    LOGPRINTF(("Created Send Completion Queue with %d elements", act_num_cqe));
  }


  /* Create recv completion queue */
  
  num_cqe = 20000; /* Requested number of completion q elements */
  r_cq_hndl = ibv_create_cq(ctx, num_cqe, NULL, NULL, 0);
  if(!r_cq_hndl) {
    fprintf(stderr, "Error creating send CQ\n");
    return -1;
  } else {
    act_num_cqe = r_cq_hndl->cqe;
    LOGPRINTF(("Created Recv Completion Queue with %d elements", act_num_cqe));
  }


  /* Placeholder for MR */
	/* We dont actually setup the Memory Regions here, instead
	 * this is done in the 'MyMalloc(..)' helper function.
	 * You could however, set them up here.
	 */

  /* Create Queue Pair */
    /* To setup a Queue Pair, the following qp initial attributes must be
     * specified and passed to the create_qp(..) function:
     * max send/recv write requests.  (max_recv/send_wr)
     * max scatter/gather entries. (max_recv/send_sge)
     * Command queues to associate the qp with.  (recv/send_cq)
     * Signalling type:  1-> signal all events.  0-> dont, event handler will
     *   deal with this.
     * QP type.  (RC=reliable connection, UC=unreliable.. etc.) defined 
     *   in the verbs header.
     */
  memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr)); 
  qp_init_attr.cap.max_recv_wr    = max_wq; /* Max outstanding WR on RQ      */
  qp_init_attr.cap.max_send_wr    = max_wq; /* Max outstanding WR on SQ      */
  qp_init_attr.cap.max_recv_sge   = 1; /* Max scatter/gather entries on RQ */
  qp_init_attr.cap.max_send_sge   = 1; /* Max scatter/gather entries on SQ */
  qp_init_attr.recv_cq            = r_cq_hndl; /* CQ handle for RQ         */
  qp_init_attr.send_cq            = s_cq_hndl; /* CQ handle for SQ         */
  qp_init_attr.sq_sig_all         = 0; /* Signalling type */
  qp_init_attr.qp_type            = IBV_QPT_RC; /* Transmission type         */

  /* ibv_create_qp( ibv_pd *pd, ibv_qp_init_attr * attr) */  
  qp_hndl = ibv_create_qp(pd_hndl, &qp_init_attr);
  if(!qp_hndl) {
    fprintf(stderr, "Error creating Queue Pair: %s\n", strerror(errno));
    return -1;
  } else {
    LOGPRINTF(("Created Queue Pair"));
  }

    /* Using the tcp connection, exchange necesary data needed to map
     *  the remote memory:
     *  (local: lid, qp_hndl->qp_num ), (remote: d_lid, d_qp_num)
     */

  /* Exchange lid and qp_num with other node */
  
  if( write(p->commfd, &lid, sizeof(lid) ) != sizeof(lid) ) {
    fprintf(stderr, "Failed to send lid over socket\n");
    return -1;
  }
  if( write(p->commfd, &qp_hndl->qp_num, sizeof(qp_hndl->qp_num) ) != sizeof(qp_hndl->qp_num) ) {
    fprintf(stderr, "Failed to send qpnum over socket\n");
    return -1;
  }
  if( read(p->commfd, &d_lid, sizeof(d_lid) ) != sizeof(d_lid) ) {
    fprintf(stderr, "Failed to read lid from socket\n");
    return -1;
  }
  if( read(p->commfd, &d_qp_num, sizeof(d_qp_num) ) != sizeof(d_qp_num) ) {
    fprintf(stderr, "Failed to read qpnum from socket\n");
    return -1;
  }
  
  LOGPRINTF(("Local: lid=%d qp_num=%d Remote: lid=%d qp_num=%d",
             lid, qp_hndl->qp_num, d_lid, d_qp_num));
    /* Further setup must be done to finalize the QP 'connection'.
     * First set the State of the qp to initialization by making a seperate
     * ibv_qp_attr* variable, giving it the initial values, and calling
     * ibv_qp_modify(..) to merge these settings into the QP.
     */
/* NOTE: According to openIB, ib_mthca's QP modify does not set alternate path
 *  fields in QP context, so you'll have to do this manually if necessary
 */

    /* Bring up Queue Pair */
  
  /******* INIT state ******/

  /* qp_attr is seperately allocated per qp/connection */
  qp_attr.qp_state = IBV_QPS_INIT;
  qp_attr.pkey_index = 0;
  qp_attr.port_num = port_num;
  qp_attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ;
  /* merge the qp_attributes into the queue pair */
  ret = ibv_modify_qp(qp_hndl, &qp_attr,
		      IBV_QP_STATE              |
		      IBV_QP_PKEY_INDEX         |
		      IBV_QP_PORT               |
		      IBV_QP_ACCESS_FLAGS);
  if(ret) {
    fprintf(stderr, "Error modifying QP to INIT\n");
    return -1;
  }

  LOGPRINTF(("Modified QP to INIT"));

/* To enable the Queue Pair to finally receive data, it must be 
 * put into the 'RTR' (Ready-To-Receive) state.  The Queue Pair will NOT
 * function properly until it has been setup, and manually put through
 * the init and rtr states.
 */
  
  /******* RTR (Ready-To-Receive) state *******/

  qp_attr.qp_state = IBV_QPS_RTR;
  qp_attr.max_dest_rd_atomic = 1;
  qp_attr.dest_qp_num = d_qp_num;
  qp_attr.ah_attr.sl = 0;
  qp_attr.ah_attr.is_global = 0;
  qp_attr.ah_attr.dlid = d_lid;
  qp_attr.ah_attr.static_rate = 0;
  qp_attr.ah_attr.src_path_bits = 0;
  qp_attr.ah_attr.port_num = port_num;
  qp_attr.path_mtu = p->prot.ib_mtu;
  qp_attr.rq_psn = 0;
  qp_attr.pkey_index = 0;
  qp_attr.min_rnr_timer = 5;
  /* merge these settings into the qp */
  ret = ibv_modify_qp(qp_hndl, &qp_attr,
		      IBV_QP_STATE              |
		      IBV_QP_AV                 |
		      IBV_QP_PATH_MTU           |
		      IBV_QP_DEST_QPN           |
		      IBV_QP_RQ_PSN             |
		      IBV_QP_MAX_DEST_RD_ATOMIC |
		      IBV_QP_MIN_RNR_TIMER);

  if(ret) {
    fprintf(stderr, "Error modifying QP to RTR\n");
    return -1;
  }

  LOGPRINTF(("Modified QP to RTR"));

  /* Sync before going to RTS state */
  Sync(p);

  /* In the same manner, 'enable' sending on the queue pair */
  
  /******* RTS (Ready-to-Send) state *******/

  qp_attr.qp_state = IBV_QPS_RTS;
  qp_attr.sq_psn = 0;
  qp_attr.timeout = 31;
  qp_attr.retry_cnt = 1;
  qp_attr.rnr_retry = 1;
  qp_attr.max_rd_atomic = 1;

  ret = ibv_modify_qp(qp_hndl, &qp_attr,
		      IBV_QP_STATE              |
		      IBV_QP_TIMEOUT            |
		      IBV_QP_RETRY_CNT          |
		      IBV_QP_RNR_RETRY          |
		      IBV_QP_SQ_PSN             |
		      IBV_QP_MAX_QP_RD_ATOMIC);

  if(ret) {
    fprintf(stderr, "Error modifying QP to RTS\n");
    return -1;
  }
  
  LOGPRINTF(("Modified QP to RTS"));

  /* If using event completion, request the initial notification */
  /* This spawns a seperate thread to do the event handling and
   * notification.
   * NOTE:  This may have problems in systems with Weak Memory Consistency
   * since there are no mutex(*) calls to preserve coherancy??
   */ 
  if( p->prot.comptype == NP_COMP_EVENT ) {
    if (pthread_create(&thread, NULL, EventThread, NULL)) {
      fprintf(stderr, "Couldn't start event thread\n");
      return -1;
    }
    ibv_req_notify_cq(r_cq_hndl, 0);	/* request completion notification  */
  }					/* for the receive cq.  2nd argument 
					   specifies if ONLY 'solicited'
					   completions will be 'noticed' */
  
 
  return 0; /* if we get here, the connection is setup correctly */
}