Ejemplo n.º 1
0
static void
update_read_info_hwnames()
{
	struct ibv_device **dev_list;
	int num_devices, i;
	uint64_t	dev_guid;
	char		*dev_name;
	size_t		dev_name_len;

	dev_list = ibv_get_device_list(&num_devices);
	if (!dev_list) {
		fprintf(stderr, "No IB devices found\n");
		return;
	}

	for (i = 0; i < num_devices; ++i) {
		int	j;

		dev_guid = (uint64_t)ntohll(ibv_get_device_guid(dev_list[i]));
		dev_name = (char *)ibv_get_device_name(dev_list[i]);
		dev_name_len = strlen(dev_name) + 1;
		for (j = 0; j < nd_read_info_cnt; j++) {
			if (nd_read_info_arr[j].info_valid == B_TRUE &&
			    nd_read_info_arr[j].guid == dev_guid) {
				memcpy(nd_read_info_arr[j].ofuv_name,
				    dev_name, dev_name_len);
				nd_read_info_arr[j].ofuv_name_valid = B_TRUE;
				break;
			}
		}
	}

	ibv_free_device_list(dev_list);
}
Ejemplo n.º 2
0
static int print_device_info(void) {
  struct ibv_device ** ibv_devs;
  int i = 0;
  /*TODO: get num_devs automatically*/
  int num_devs = 1;
  /*NULL => get all devices*/

  ibv_devs = ibv_get_device_list(NULL);

  for (i = 0; i < num_devs; i++) {
    struct ibv_context *ibv_contxt;
    struct ibv_device_attr device_attr;
    char *dev_name;
    uint64_t dev_guid;

    ibv_contxt = ibv_open_device (ibv_devs[i]);

    dev_name = ibv_get_device_name(ibv_devs[i]);
    dev_guid = ibv_get_device_guid(ibv_devs[i]);
    printf("%s (%d):\n", dev_name, dev_guid);
    ibv_query_device (ibv_contxt, &device_attr);
    printf("      Record           : %d\n", i);
    printf("         max_mr_size   : %llu\n", device_attr.max_mr_size);
    printf("         max_mr        : %llu\n", device_attr.max_mr);

    ibv_close_device (ibv_contxt);
  }

  ibv_free_device_list(ibv_devs);
  return 0;
}
Ejemplo n.º 3
0
static int open_device(char *ib_devname)
{
	struct ibv_device **dev_list;
	int i = 0;

	dev_list = ibv_get_device_list(NULL);
	if (!dev_list) {
		fprintf(stderr, "Failed to get IB devices list");
		return -1;
	}

	if (ib_devname) {
		for (; dev_list[i]; ++i) {
			if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname))
				break;
		}
	}
	if (!dev_list[i]) {
		fprintf(stderr, "IB device %s not found\n",
			ib_devname ? ib_devname : "");
		return -1;
	}

	ctx.context = ibv_open_device(dev_list[i]);
	if (!ctx.context) {
		fprintf(stderr, "Couldn't get context for %s\n",
			ibv_get_device_name(dev_list[i]));
		return -1;
	}

	ibv_free_device_list(dev_list);
	return 0;
}
Ejemplo n.º 4
0
static struct ibv_context *get_device_context(const char *device_name)
{
	struct ibv_device **device_list;
	struct ibv_context *ctx = NULL;
	int num_devices;
	int i;

	device_list = ibv_get_device_list(&num_devices);
	if (!device_list) {
		fprintf(stderr, "Error, ibv_get_device_list() failed\n");
		return NULL;
	}

	for (i = 0; i < num_devices; ++ i) {
		/* if this isn't the requested device */
		if (strcmp(ibv_get_device_name(device_list[i]), device_name))
			continue;

		ctx = ibv_open_device(device_list[i]);
		if (!ctx) {
			fprintf(stderr, "Error, failed to open the device '%s'\n",
				ibv_get_device_name(device_list[i]));
			goto out;
		}

		printf("The device '%s' was detected\n", device_name);
		break;
	}

out:
	ibv_free_device_list(device_list);

	return ctx;
}
Ejemplo n.º 5
0
int
rdmasniff_findalldevs(pcap_if_list_t *devlistp, char *err_str)
{
	struct ibv_device **dev_list;
	int numdev;
	int i;
	int ret = 0;

	dev_list = ibv_get_device_list(&numdev);
	if (!dev_list || !numdev) {
		return 0;
	}

	for (i = 0; i < numdev; ++i) {
		/*
		 * XXX - do the notions of "up", "running", or
		 * "connected" apply here?
		 */
		if (!add_dev(devlistp, dev_list[i]->name, 0, "RDMA sniffer", err_str)) {
			ret = -1;
			goto out;
		}
	}

out:
	ibv_free_device_list(dev_list);
	return ret;
}
Ejemplo n.º 6
0
static
void psofed_scan_all_ports(void)
{
	struct ibv_device **dev_list;
	struct ibv_device *ib_dev = NULL;
	int dev_list_count;
	int i;

	// psofed_dprint(3, "configured port <%s>", port_name(psofed_hca, psofed_port));

	dev_list = ibv_get_device_list(&dev_list_count);
	if (!dev_list) goto err_no_dev_list;

	for (i = 0; i < dev_list_count; i++) {
		ib_dev = dev_list[i];
		if (!ib_dev) continue;

		psofed_scan_hca_ports(ib_dev);
	}

	ibv_free_device_list(dev_list);
err_no_dev_list:
	if (!psofed_port) psofed_port = 1;
	psofed_dprint(2, "using port <%s>", port_name(psofed_hca, psofed_port));
}
Ejemplo n.º 7
0
static DevicesPtr get_devices() {
    int size;
    auto ptr = ibv_get_device_list(&size);
    if(!ptr) {
        throw std::runtime_error("cannot get devices");
    }
    return DevicesPtr(ptr, ibv_free_device_list);
}
static mca_oob_t *mca_oob_ud_component_init(int *priority)
{
    struct ibv_device **devices;
    int num_devices, i, rc;

    /* set the priority so that we will select this component
     * only if someone directs to do so
     */
    *priority = 0;

    opal_hash_table_init (&mca_oob_ud_component.ud_peers, 1024);

    devices = ibv_get_device_list (&num_devices);
    if (NULL == devices || 0 == num_devices) {
        OPAL_OUTPUT_VERBOSE((5, mca_oob_base_output, "%s oob:ud:component_init no devices found",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        return NULL;
    }

    for (i = 0 ; i < num_devices ; ++i) {
        mca_oob_ud_device_t *device = OBJ_NEW(mca_oob_ud_device_t);

        if (NULL == device) {
            opal_output (0, "oob:ud:component_init malloc failure. errno = %d",
                         errno);
            return NULL;
        }

        rc = mca_oob_ud_device_setup (device, devices[i]);
        if (ORTE_SUCCESS != rc) {
            OBJ_RELEASE(device);
            continue;
        }

        opal_list_append (&mca_oob_ud_component.ud_devices,
                          (opal_list_item_t *) device);

        /* NTH: support only 1 device for now */
        break;
    }

    ibv_free_device_list (devices);

    if (0 == opal_list_get_size (&mca_oob_ud_component.ud_devices)) {
        OPAL_OUTPUT_VERBOSE((5, mca_oob_base_output, "%s oob:ud:component_init no usable devices found.",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        return NULL;
    }

    /* have to call the module init here so we can test for available qpair */
    if (ORTE_SUCCESS != mca_oob_ud_module_init()) {
        return NULL;
    }

    return &mca_oob_ud_module;
}
Ejemplo n.º 9
0
int main(void)
{
  hwloc_topology_t topology;
  struct ibv_device **dev_list, *dev;
  int count, i;
  int err;

  dev_list = ibv_get_device_list(&count);
  if (!dev_list) {
    fprintf(stderr, "ibv_get_device_list failed\n");
    return 0;
  }
  printf("ibv_get_device_list found %d devices\n", count);

  hwloc_topology_init(&topology);
  hwloc_topology_set_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, HWLOC_TYPE_FILTER_KEEP_IMPORTANT);
  hwloc_topology_set_type_filter(topology, HWLOC_OBJ_OS_DEVICE, HWLOC_TYPE_FILTER_KEEP_IMPORTANT);
  hwloc_topology_load(topology);

  for(i=0; i<count; i++) {
    hwloc_bitmap_t set;
    dev = dev_list[i];

    set = hwloc_bitmap_alloc();
    err = hwloc_ibv_get_device_cpuset(topology, dev, set);
    if (err < 0) {
      printf("failed to get cpuset for device %d (%s)\n",
	     i, ibv_get_device_name(dev));
    } else {
      char *cpuset_string = NULL;
      hwloc_obj_t os;

      hwloc_bitmap_asprintf(&cpuset_string, set);
      printf("got cpuset %s for device %d (%s)\n",
	     cpuset_string, i, ibv_get_device_name(dev));
      free(cpuset_string);

      os = hwloc_ibv_get_device_osdev(topology, dev);
      if (os) {
	assert(os->type == HWLOC_OBJ_OS_DEVICE);
	printf("found OS object subtype %u lindex %u name %s\n",
	       (unsigned) os->attr->osdev.type, os->logical_index, os->name);
	assert(os->attr->osdev.type == HWLOC_OBJ_OSDEV_OPENFABRICS);
	if (strcmp(ibv_get_device_name(dev), os->name))
	  assert(0);
      }
    }
    hwloc_bitmap_free(set);
  }

  hwloc_topology_destroy(topology);

  ibv_free_device_list(dev_list);

  return 0;
}
Ejemplo n.º 10
0
static
struct ibv_device *psofed_get_dev_by_hca_name(const char *in_hca_name)
{
	/* new method with ibv_get_device_list() */
	struct ibv_device **dev_list;
	struct ibv_device *ib_dev = NULL;
	int dev_list_count;

	dev_list = ibv_get_device_list(&dev_list_count);
	if (!dev_list) goto err_no_dev;
	if (!in_hca_name) {
		// const char *tmp;
		ib_dev = dev_list[0];

		// tmp = ibv_get_device_name(ib_dev);

		// psofed_dprint(2, "Got IB device \"%s\"", tmp);

		if (!ib_dev) goto err_no_dev2;
	} else {
		int i;
		for (i = 0; i < dev_list_count; i++) {
			ib_dev = dev_list[i];
			if (!ib_dev) break;
			const char *tmp = ibv_get_device_name(ib_dev);
			if (!strcmp(tmp, in_hca_name)) {
				// psofed_dprint(2, "Got IB device \"%s\"", tmp);
				break;
			}
			ib_dev = NULL;
		}
		if (!ib_dev) goto err_no_dev_name;
	}
	ibv_free_device_list(dev_list);

	return ib_dev;
	/* --- */
err_no_dev:
	psofed_err_errno("ibv_get_devices() failed : No IB dev found", errno);
	return 0;
	/* --- */
err_no_dev2:
	psofed_err_errno("ibv_get_devices() failed : IB dev list empty", errno);
	ibv_free_device_list(dev_list);
	return 0;
	/* --- */
err_no_dev_name:
	{
		static char err_str[50];
		snprintf(err_str, sizeof(err_str), "IB device \"%s\"", in_hca_name);
		psofed_err_errno(err_str, ENODEV);
		ibv_free_device_list(dev_list);
		return 0;
	}
}
Ejemplo n.º 11
0
ibv_context* open_default_device() {
  ibv_device** dev_list;
  ibv_device* ib_dev;
  dev_list = ibv_get_device_list(NULL);
  CHECK(dev_list) << "No InfiniBand device found";
  ib_dev = dev_list[0];
  CHECK(ib_dev) << "No InfiniBand device found";
  ibv_context* context = ibv_open_device(ib_dev);
  CHECK(context) << "Open context failed for " << ibv_get_device_name(ib_dev);
  return context;
}
Ejemplo n.º 12
0
/*
 * Convenience function. Given the name of an HFI,
 * returns the ibv_device structure associated with it.
 * Returns NULL if the HFI could not be found.
 *
 * HFI can be identified by name ("mthfi0") or by number
 * "1", "2", et cetera.
 *
 * OPENS THE HFI! Use ibv_close_device() to release it.
 */ 
struct ibv_context *
op_path_find_hfi(const char *name, 
			    struct ibv_device **device)
{
	struct ibv_device	*ibv_dev = NULL;
	struct ibv_context	*context = NULL;
	int i;

	if (!dev_list) {
		dev_list = ibv_get_device_list(&num_devices);
	}
	if (!dev_list) {
		errno = EFAULT;
		return NULL;
	}
	
	if (name == NULL || name[0]=='\0') {
		i=0;
	} else if (isdigit(name[0])) {
		i = strtoul(name,NULL,0) - 1;
		if (i<0 || i > num_devices) i=0;
	} else {
		for (i=0; i < num_devices; i++) {
			if (!strcmp(ibv_get_device_name(dev_list[i]), name))
				break;
		}
		if (i >= num_devices) {
			errno = EFAULT;
			return NULL;
		}
	}
	ibv_dev = dev_list[i];

	/*
	 * Opens the verbs interface to the HFI.
	 * Note that this will increment the usage counter for that
	 * HFI. This needs to be done before we release the device list.
	 */
	if(ibv_dev) {
		context = ibv_open_device(ibv_dev);
		if (!context) {
			errno = EFAULT;
			*device = NULL;
		} else {
			*device = ibv_dev;
		}
	} else {
		*device = NULL;
		errno = ENODEV;
	}

	return context;
}
Ejemplo n.º 13
0
pcap_t *
rdmasniff_create(const char *device, char *ebuf, int *is_ours)
{
	struct pcap_rdmasniff *priv;
	struct ibv_device **dev_list;
	int numdev;
	size_t namelen;
	const char *port;
	unsigned long port_num;
	int i;
	pcap_t *p = NULL;

	*is_ours = 0;

	dev_list = ibv_get_device_list(&numdev);
	if (!dev_list || !numdev) {
		return NULL;
	}

	namelen = strlen(device);

	port = strchr(device, ':');
	if (port) {
		port_num = strtoul(port + 1, NULL, 10);
		if (port_num > 0) {
			namelen = port - device;
		} else {
			port_num = 1;
		}
	} else {
		port_num = 1;
	}

	for (i = 0; i < numdev; ++i) {
		if (strlen(dev_list[i]->name) == namelen &&
		    !strncmp(device, dev_list[i]->name, namelen)) {
			*is_ours = 1;

			p = pcap_create_common(ebuf, sizeof (struct pcap_rdmasniff));
			if (p) {
				p->activate_op = rdmasniff_activate;
				priv = p->priv;
				priv->rdma_device = dev_list[i];
				priv->port_num = port_num;
			}
			break;
		}
	}

	ibv_free_device_list(dev_list);
	return p;
}
Ejemplo n.º 14
0
//uint16_t pc_init(struct ibv_pd* pd, struct pc_ibv_co *input_co, int device_num)
uint16_t pc_init(struct pc_ibv_co *input_co, int device_num)
{
  struct ibv_device **dev_list;
  struct ibv_device *dev;
  struct ibv_context *ctx;
  struct ibv_port_attr pattr;
  struct pc_hca *hca;
  uint16_t lid;
  int num_of_hcas;

  dev_list  = ibv_get_device_list(&num_of_hcas);
  if (num_of_hcas <= device_num) {
    fprintf(stderr, "Invalide devic_num: %d @%s:%d\n", device_num, __FILE__, __LINE__);
    exit(1);
  }
  dev = dev_list[device_num];
  ctx = ibv_open_device(dev);
  ibv_query_port(ctx, 1, &pattr);
  lid = pattr.lid;

  if (init_hcas_q == 0) {
    lq_init(&hcas_q);
    lq_init(&ctx_lid_q);
    init_hcas_q = 1;
  } 
  /*remove*/
  else 
      {
      return -1; 
    }
  /*remove*/

  hca = (struct pc_hca*) malloc(sizeof(struct pc_hca));
  hca->lid = lid;
  hca->co = input_co;
  hca->co->pdg_num = 0;
  hca->co->pdg_size = 0;
  lq_enq(&hcas_q, hca);

  lq_init(&pc_q);
  ibv_free_device_list(dev_list);
  return lid;
}
Ejemplo n.º 15
0
static struct ibv_device *pp_find_dev(const char *ib_devname) {
	struct ibv_device **dev_list;
	struct ibv_device *ib_dev = NULL;

	dev_list = ibv_get_device_list(NULL);

	if (!ib_devname) {
		ib_dev = dev_list[0];
		if (!ib_dev)
			fprintf(stderr, "No IB devices found\n");
	} else {
		for (; (ib_dev = *dev_list); ++dev_list)
			if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
				break;
		if (!ib_dev)
			fprintf(stderr, "IB device %s not found\n", ib_devname);
	}
	return ib_dev;
}
Ejemplo n.º 16
0
Archivo: lsdev.c Proyecto: joerns/ibmsg
int
main(int argc, char** argv)
{
	struct ibv_device** devices;
	int num_devices;
	int i;

	devices = ibv_get_device_list(&num_devices);

	for(i=0; i<num_devices; i++)
	{
		uint64_t guid = ibv_get_device_guid(devices[i]);
		printf("%s \t%s \t%s \t%s \t0x%lx\n", 
		       devices[i]->name,
		       devices[i]->dev_name,
		       devices[i]->dev_path,
		       devices[i]->ibdev_path,
		       guid);
	}

	return EXIT_SUCCESS;
}
Ejemplo n.º 17
0
/*
 * USNIC plugs into the verbs framework, but is not a usable device.
 * Manually check for devices and fail gracefully if none are present.
 * This avoids the lower libraries (libibverbs and librdmacm) from
 * reporting error messages to stderr.
 */
static int fi_ibv_have_device(void)
{
	struct ibv_device **devs;
	struct ibv_context *verbs;
	int i, ret = 0;

	devs = ibv_get_device_list(NULL);
	if (!devs)
		return 0;

	for (i = 0; devs[i]; i++) {
		verbs = ibv_open_device(devs[i]);
		if (verbs) {
			ibv_close_device(verbs);
			ret = 1;
			break;
		}
	}

	ibv_free_device_list(devs);
	return ret;
}
Ejemplo n.º 18
0
/** ========================================================================= */
static int open_verbs_ctx(struct oib_port *port)
{
	int i;
	int num_devices;
	struct  ibv_device **dev_list;

	dev_list = ibv_get_device_list(&num_devices);
	for (i = 0; i < num_devices; ++i)
		if (dev_list[i] != NULL
		    && (strncmp(dev_list[i]->name, port->hfi_name, sizeof(dev_list[i]->name)) == 0) )
			break;

	if (i >= num_devices) {
		ibv_free_device_list(dev_list);
		OUTPUT_ERROR("failed to find verbs device\n");
		return EIO;
	}

	port->verbs_ctx = ibv_open_device(dev_list[i]);

	ibv_free_device_list(dev_list);

	if (port->verbs_ctx == NULL)
	{
		OUTPUT_ERROR("failed to open verbs device\n");
		return EIO;
	}

	if (sem_init(&port->lock,0,1) != 0)
	{
		ibv_close_device(port->verbs_ctx);
		OUTPUT_ERROR("failed to init registry lock\n");
		return EIO;
	}

	return 0;
}
Ejemplo n.º 19
0
int main(int argc, char *argv[])
{
	struct ibv_device **dev_list;
	int num_devices, i;

	dev_list = ibv_get_device_list(&num_devices);
	if (!dev_list) {
		perror("Failed to get IB devices list");
		return 1;
	}

	printf("    %-16s\t   node GUID\n", "device");
	printf("    %-16s\t----------------\n", "------");

	for (i = 0; i < num_devices; ++i) {
		printf("    %-16s\t%016llx\n",
		       ibv_get_device_name(dev_list[i]),
		       (unsigned long long) ntohll(ibv_get_device_guid(dev_list[i])));
	}

	ibv_free_device_list(dev_list);

	return 0;
}
Ejemplo n.º 20
0
int open_hca(void)
{
    struct ibv_device **dev_list=NULL;
    struct ibv_context *cxt = NULL;

    int rc;

    int num_hcas;
    dev_list = ibv_get_device_list(&num_hcas);


    // Assume that the first device has an ACTIVE port
    // if it does not we do not handle this situation for now
    
    hca.ib_dev = dev_list[0];

    hca.context = ibv_open_device(hca.ib_dev);

    if(!hca.context) {
        fprintf(stderr,"Couldn't get context %s\n",
                ibv_get_device_name(hca.ib_dev));
        return 1;
    }

    hca.pd = ibv_alloc_pd(hca.context);

    assert(hca.pd != NULL);
    
    if(!hca.pd) {
        fprintf(stderr,"Couldn't get pd %s\n",
                ibv_get_device_name(hca.ib_dev));
        return 1;
    }

    return 0;
}
Ejemplo n.º 21
0
/**
 * Initialize the HCAs
 * Look at rdma_open_hca() & rdma_iba_hca_init_noqp() in
 * mvapich2/trunk/src/mpid/ch3/channels/mrail/src/gen2/rdma_iba_priv.c
 *
 * Store all the HCA info in mv2_nem_dev_info_t->hca[hca_num]
 *
 * Output:
 *         hca_list: fill it with the HCAs information
 *
 * \see hca_list
 */
int MPID_nem_ib_init_hca()
{
    int mpi_errno = MPI_SUCCESS;

    MPIDI_STATE_DECL(MPID_STATE_MPIDI_INIT_HCA);
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_INIT_HCA);


    struct ibv_device *ib_dev    = NULL;
    struct ibv_device **dev_list = NULL;
    int nHca;
    int num_devices = 0;

#ifdef CRC_CHECK
    gen_crc_table();
#endif
    memset( hca_list, 0, sizeof(hca_list) );

    /* Get the list of devices */
    dev_list = ibv_get_device_list(&num_devices);
    if (dev_list==NULL) {
        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**fail",
	            "**fail %s", "No IB device found");
    }

    if (umad_init() < 0)
        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**fail",
	            "**fail %s", "Can't init UMAD library");

    /* Runtime checks */
    MPIU_Assert( num_devices<=MAX_NUM_HCAS );
    if ( num_devices> MAX_NUM_HCAS) {
        MPIU_Error_printf( "WARNING: found %d IB devices, the maximum is %d (MAX_NUM_HCAS). ",
        		num_devices, MAX_NUM_HCAS);
        num_devices = MAX_NUM_HCAS;
    }

    if ( ib_hca_num_hcas > num_devices) {
    	MPIU_Error_printf( "WARNING: user requested %d IB devices, the available number is %d. ",
        		ib_hca_num_hcas, num_devices);
        ib_hca_num_hcas = num_devices;
    }

    MPIU_DBG_MSG_P( CH3_CHANNEL, VERBOSE, "[HCA] Found %d HCAs\n", num_devices);
    MPIU_DBG_MSG_P( CH3_CHANNEL, VERBOSE, "[HCA] User requested %d\n", ib_hca_num_hcas);


    /* Retrieve information for each found device */
    for (nHca = 0; nHca < ib_hca_num_hcas; nHca++) {

    	/* Check for user choice */
        if( (rdma_iba_hca[0]==0) || (strncmp(rdma_iba_hca, RDMA_IBA_NULL_HCA, 32)!=0) || (ib_hca_num_hcas > 1)) {
            /* User hasn't specified any HCA name, or the number of HCAs is greater then 1 */
            ib_dev = dev_list[nHca];

        } else {
            /* User specified a HCA, try to look for it */
            int dev_count;

            dev_count = 0;
            while(dev_list[dev_count]) {
                if(!strncmp(ibv_get_device_name(dev_list[dev_count]), rdma_iba_hca, 32)) {
                    ib_dev = dev_list[dev_count];
                    break;
                }
                dev_count++;
            }
        }

        /* Check if device has been identified */
        hca_list[nHca].ib_dev = ib_dev;
        if (!ib_dev) {
	        MPIU_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**fail",
		            "**fail %s", "No IB device found");
        }

        MPIU_DBG_MSG_P( CH3_CHANNEL, VERBOSE, "[HCA] HCA device %d found\n", nHca);



        hca_list[nHca].nic_context = ibv_open_device(ib_dev);
        if (hca_list[nHca].nic_context==NULL) {
	        MPIU_ERR_SETFATALANDJUMP2(mpi_errno, MPI_ERR_OTHER, "**fail",
		            "%s %d", "Failed to open HCA number", nHca);
        }

        hca_list[nHca].ptag = ibv_alloc_pd(hca_list[nHca].nic_context);
        if (!hca_list[nHca].ptag) {
            MPIU_ERR_SETFATALANDJUMP2(mpi_errno, MPI_ERR_OTHER,
                    "**fail", "%s%d", "Failed to alloc pd number ", nHca);
        }


        /* Set the hca type */
    #if defined(RDMA_CM)
        if (process_info.use_iwarp_mode) {
    	    if ((mpi_errno = rdma_cm_get_hca_type(process_info.use_iwarp_mode, &process_info.hca_type)) != MPI_SUCCESS)
    	    {
    		    MPIU_ERR_POP(mpi_errno);
    	    }

    	    if (process_info.hca_type == CHELSIO_T3)
    	    {
    		    process_info.use_iwarp_mode = 1;
    	    }
        }
        else
    #endif /* defined(RDMA_CM) */

		mpi_errno = get_hca_type(hca_list[nHca].ib_dev, hca_list[nHca].nic_context, &hca_list[nHca].hca_type);
        if (mpi_errno != MPI_SUCCESS)
        {
        	fprintf(stderr, "[%s, %d] Error in get_hca_type", __FILE__, __LINE__ );
            MPIU_ERR_POP(mpi_errno);
        }

    }



    if (!strncmp(rdma_iba_hca, RDMA_IBA_NULL_HCA, 32) &&
        (ib_hca_num_hcas==1) && (num_devices > nHca) &&
        (rdma_find_active_port(hca_list[0].nic_context, hca_list[nHca].ib_dev)==-1)) {
        /* Trac #376 - There are multiple rdma capable devices (num_devices) in
         * the system. The user has asked us to use ANY (!strncmp) ONE device
         * (rdma_num_hcas), and the first device does not have an active port. So
         * try to find some other device with an active port.
         */
    	int j;
        for (j = 0; dev_list[j]; j++) {
            ib_dev = dev_list[j];
            if (ib_dev) {
            	hca_list[0].nic_context = ibv_open_device(ib_dev);
                if (!hca_list[0].nic_context) {
                    /* Go to next device */
                    continue;
                }
                if (rdma_find_active_port(hca_list[0].nic_context, ib_dev)!=-1) {
                	hca_list[0].ib_dev = ib_dev;
                	hca_list[0].ptag = ibv_alloc_pd(hca_list[0].nic_context);
                    if (!hca_list[0].ptag) {
                        MPIU_ERR_SETFATALANDJUMP2(mpi_errno, MPI_ERR_OTHER,
                             "**fail", "%s%d", "Failed to alloc pd number ", nHca);
                    }
                }
            }
        }
    }

fn_exit:
    /* Clean up before exit */
	if (dev_list!=NULL)
	  ibv_free_device_list(dev_list);

    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_INIT_HCA);
    return mpi_errno;
fn_fail:
    goto fn_exit;
}
Ejemplo n.º 22
0
static int _ibv_attach(map_segment_t *s, size_t size)
{
    int rc = OSHMEM_SUCCESS;
    static openib_device_t memheap_device;
    openib_device_t *device = &memheap_device;
    int num_devs = 0;

    assert(s);

    memset(device, 0, sizeof(*device));

#ifdef HAVE_IBV_GET_DEVICE_LIST
    device->ib_devs = ibv_get_device_list(&num_devs);
#else
#error unsupported ibv_get_device_list in infiniband/verbs.h
#endif

    if (num_devs == 0 || !device->ib_devs)
    {
        rc = OSHMEM_ERR_NOT_SUPPORTED;
    }

    /* Open device */
    if (!rc)
    {
        int i = 0;

        if (num_devs > 1)
        {
            if (NULL == mca_memheap_base_param_hca_name)
            {
                MEMHEAP_VERBOSE(5, "found %d HCAs, choosing the first", num_devs);
            }
            else
            {
                MEMHEAP_VERBOSE(5, "found %d HCAs, searching for %s", num_devs, mca_memheap_base_param_hca_name);
            }
        }

        for (i = 0; i < num_devs; i++)
        {
            device->ib_dev = device->ib_devs[i];

            device->ib_dev_context = ibv_open_device(device->ib_dev);
            if (NULL == device->ib_dev_context)
            {
                MEMHEAP_ERROR("error obtaining device context for %s errno says %d: %s",
                        ibv_get_device_name(device->ib_dev), errno, strerror(errno));
                rc = OSHMEM_ERR_RESOURCE_BUSY;
            }
            else
            {
                if (NULL != mca_memheap_base_param_hca_name)
                {
                    if (0 == strcmp(mca_memheap_base_param_hca_name,ibv_get_device_name(device->ib_dev)))
                    {
                        MEMHEAP_VERBOSE(5, "mca_memheap_base_param_hca_name = %s, selected %s as %d of %d", mca_memheap_base_param_hca_name, ibv_get_device_name(device->ib_dev), i, num_devs);
                        rc = OSHMEM_SUCCESS;
                        break;
                    }
                }
                else
                {
                    MEMHEAP_VERBOSE(5, "mca_memheap_base_param_hca_name = %s, selected %s as %d of %d", mca_memheap_base_param_hca_name, ibv_get_device_name(device->ib_dev), i, num_devs);
                    rc = OSHMEM_SUCCESS;
                    break;
                }
            }
        }
    }

    /* Obtain device attributes */
    if (!rc)
    {
        if (ibv_query_device(device->ib_dev_context, &device->ib_dev_attr))
        {
            MEMHEAP_ERROR("error obtaining device attributes for %s errno says %d: %s",
                    ibv_get_device_name(device->ib_dev), errno, strerror(errno));
            rc = OSHMEM_ERR_RESOURCE_BUSY;
        }
        else
        {
            MEMHEAP_VERBOSE(5, "ibv device %s",
                    ibv_get_device_name(device->ib_dev));
        }
    }

    /* Allocate the protection domain for the device */
    if (!rc)
    {
        device->ib_pd = ibv_alloc_pd(device->ib_dev_context);
        if (NULL == device->ib_pd)
        {
            MEMHEAP_ERROR("error allocating protection domain for %s errno says %d: %s",
                    ibv_get_device_name(device->ib_dev), errno, strerror(errno));
            rc = OSHMEM_ERR_RESOURCE_BUSY;
        }
    }

    /* Allocate memory */
    if (!rc)
    {
        void *addr = NULL;
        struct ibv_mr *ib_mr = NULL;
        int access_flag = IBV_ACCESS_LOCAL_WRITE |
        IBV_ACCESS_REMOTE_WRITE |
        IBV_ACCESS_REMOTE_READ;

        OBJ_CONSTRUCT(&device->ib_mr_array, opal_value_array_t);
        opal_value_array_init(&device->ib_mr_array, sizeof(struct ibv_mr *));

#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
        access_flag |= IBV_ACCESS_ALLOCATE_MR |
        IBV_ACCESS_SHARED_MR_USER_READ |
        IBV_ACCESS_SHARED_MR_USER_WRITE;
#endif /* MPAGE_ENABLE */

        ib_mr = ibv_reg_mr(device->ib_pd, addr, size, access_flag);
        if (NULL == ib_mr)
        {
            MEMHEAP_ERROR("error to ibv_reg_mr() %llu bytes errno says %d: %s",
                    (unsigned long long)size, errno, strerror(errno));
            rc = OSHMEM_ERR_OUT_OF_RESOURCE;
        }
        else
        {
            device->ib_mr_shared = ib_mr;
            opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
        }

#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
        if (!rc)
        {
            access_flag = IBV_ACCESS_LOCAL_WRITE |
            IBV_ACCESS_REMOTE_WRITE |
            IBV_ACCESS_REMOTE_READ|
            IBV_ACCESS_NO_RDMA;

            addr = (void *)mca_memheap_base_start_address;
            ib_mr = ibv_reg_shared_mr(device->ib_mr_shared->handle,
                    device->ib_pd, addr, access_flag);
            if (NULL == ib_mr)
            {
                MEMHEAP_ERROR("error to ibv_reg_shared_mr() %llu bytes errno says %d: %s",
                        (unsigned long long)size, errno, strerror(errno));
                rc = OSHMEM_ERR_OUT_OF_RESOURCE;
            }
            else
            {
                opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
            }
        }
#endif /* MPAGE_ENABLE */

        if (!rc)
        {
            assert(size == device->ib_mr_shared->length);

            s->type = MAP_SEGMENT_ALLOC_IBV;
            s->shmid = device->ib_mr_shared->handle;
            s->start = ib_mr->addr;
            s->size = size;
            s->end = (void*)((uintptr_t)s->start + s->size);
            s->context = &memheap_device;
        }
    }

    return rc;
}
Ejemplo n.º 23
0
static int resources_create(struct resources *res)
{	
	struct ibv_device       **dev_list = NULL;
	struct ibv_qp_init_attr qp_init_attr;
	struct ibv_device 	*ib_dev = NULL;
	size_t 			size;
	int 			i;
	int 			mr_flags = 0;
	int 			cq_size = 0;
	int 			num_devices;

	/* if client side */
	if (config.server_name) {
		res->sock = sock_client_connect(config.server_name, config.tcp_port);
		if (res->sock < 0) {
			fprintf(stderr, "failed to establish TCP connection to server %s, port %d\n", 
				config.server_name, config.tcp_port);
			return -1;
		}
	} else {
		fprintf(stdout, "waiting on port %d for TCP connection\n", config.tcp_port);

		res->sock = sock_daemon_connect(config.tcp_port);
		if (res->sock < 0) {
			fprintf(stderr, "failed to establish TCP connection with client on port %d\n", 
				config.tcp_port);
			return -1;
		}
	}

	fprintf(stdout, "TCP connection was established\n");

	fprintf(stdout, "searching for IB devices in host\n");

	/* get device names in the system */
	dev_list = ibv_get_device_list(&num_devices);
	if (!dev_list) {
		fprintf(stderr, "failed to get IB devices list\n");
		return 1;
	}

	/* if there isn't any IB device in host */
	if (!num_devices) {
		fprintf(stderr, "found %d device(s)\n", num_devices);
		return 1;
	}

	fprintf(stdout, "found %d device(s)\n", num_devices);

	/* search for the specific device we want to work with */
	for (i = 0; i < num_devices; i ++) {
		if (!config.dev_name) {
			config.dev_name = strdup(ibv_get_device_name(dev_list[i])); 
			fprintf(stdout, "device not specified, using first one found: %s\n", config.dev_name);
		}
		if (!strcmp(ibv_get_device_name(dev_list[i]), config.dev_name)) {
			ib_dev = dev_list[i];
			break;
		}
	}

	/* if the device wasn't found in host */
	if (!ib_dev) {
		fprintf(stderr, "IB device %s wasn't found\n", config.dev_name);
		return 1;
	}

	/* get device handle */
	res->ib_ctx = ibv_open_device(ib_dev);
	if (!res->ib_ctx) {
		fprintf(stderr, "failed to open device %s\n", config.dev_name);
		return 1;
	}

	/* We are now done with device list, free it */
	ibv_free_device_list(dev_list);
	dev_list = NULL;
	ib_dev = NULL;

	/* query port properties  */
	if (ibv_query_port(res->ib_ctx, config.ib_port, &res->port_attr)) {
		fprintf(stderr, "ibv_query_port on port %u failed\n", config.ib_port);
		return 1;
	}

	/* allocate Protection Domain */
	res->pd = ibv_alloc_pd(res->ib_ctx);
	if (!res->pd) {
		fprintf(stderr, "ibv_alloc_pd failed\n");
		return 1;
	}

	/* each side will send only one WR, so Completion Queue with 1 entry is enough */
	cq_size = 1;
	res->cq = ibv_create_cq(res->ib_ctx, cq_size, NULL, NULL, 0);
	if (!res->cq) {
		fprintf(stderr, "failed to create CQ with %u entries\n", cq_size);
		return 1;
	}

	/* allocate the memory buffer that will hold the data */
	size = MSG_SIZE;
	res->buf = malloc(size);
	if (!res->buf) {
		fprintf(stderr, "failed to malloc %Zu bytes to memory buffer\n", size);
		return 1;
	}

	/* only in the daemon side put the message in the memory buffer */
	if (!config.server_name) {
		strcpy(res->buf, MSG);
		fprintf(stdout, "going to send the message: '%s'\n", res->buf);
	} else
		memset(res->buf, 0, size);

	/* register this memory buffer */
	mr_flags = (config.server_name) ? IBV_ACCESS_LOCAL_WRITE : 0;
	res->mr = ibv_reg_mr(res->pd, res->buf, size, mr_flags);
	if (!res->mr) {
		fprintf(stderr, "ibv_reg_mr failed with mr_flags=0x%x\n", mr_flags);
		return 1;
	}

	fprintf(stdout, "MR was registered with addr=%p, lkey=0x%x, rkey=0x%x, flags=0x%x\n",
			      res->buf, res->mr->lkey, res->mr->rkey, mr_flags);


	/* create the Queue Pair */
	memset(&qp_init_attr, 0, sizeof(qp_init_attr));

	qp_init_attr.qp_type    	= IBV_QPT_RC;
	qp_init_attr.sq_sig_all 	= 1;
	qp_init_attr.send_cq    	= res->cq;
	qp_init_attr.recv_cq    	= res->cq;
	qp_init_attr.cap.max_send_wr  	= 1;
	qp_init_attr.cap.max_recv_wr  	= 1;
	qp_init_attr.cap.max_send_sge 	= 1;
	qp_init_attr.cap.max_recv_sge 	= 1;

	res->qp = ibv_create_qp(res->pd, &qp_init_attr);
	if (!res->qp) {
		fprintf(stderr, "failed to create QP\n");
		return 1;
	}
	fprintf(stdout, "QP was created, QP number=0x%x\n", res->qp->qp_num);

	return 0;
}
Ejemplo n.º 24
0
int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev,
                      RdmaDeviceResources *rdma_dev_res,
                      const char *backend_device_name, uint8_t port_num,
                      struct ibv_device_attr *dev_attr, CharBackend *mad_chr_be)
{
    int i;
    int ret = 0;
    int num_ibv_devices;
    struct ibv_device **dev_list;

    memset(backend_dev, 0, sizeof(*backend_dev));

    backend_dev->dev = pdev;
    backend_dev->port_num = port_num;
    backend_dev->rdma_dev_res = rdma_dev_res;

    rdma_backend_register_comp_handler(dummy_comp_handler);

    dev_list = ibv_get_device_list(&num_ibv_devices);
    if (!dev_list) {
        rdma_error_report("Failed to get IB devices list");
        return -EIO;
    }

    if (num_ibv_devices == 0) {
        rdma_error_report("No IB devices were found");
        ret = -ENXIO;
        goto out_free_dev_list;
    }

    if (backend_device_name) {
        for (i = 0; dev_list[i]; ++i) {
            if (!strcmp(ibv_get_device_name(dev_list[i]),
                        backend_device_name)) {
                break;
            }
        }

        backend_dev->ib_dev = dev_list[i];
        if (!backend_dev->ib_dev) {
            rdma_error_report("Failed to find IB device %s",
                              backend_device_name);
            ret = -EIO;
            goto out_free_dev_list;
        }
    } else {
        backend_dev->ib_dev = *dev_list;
    }

    rdma_info_report("uverb device %s", backend_dev->ib_dev->dev_name);

    backend_dev->context = ibv_open_device(backend_dev->ib_dev);
    if (!backend_dev->context) {
        rdma_error_report("Failed to open IB device %s",
                          ibv_get_device_name(backend_dev->ib_dev));
        ret = -EIO;
        goto out;
    }

    backend_dev->channel = ibv_create_comp_channel(backend_dev->context);
    if (!backend_dev->channel) {
        rdma_error_report("Failed to create IB communication channel");
        ret = -EIO;
        goto out_close_device;
    }

    ret = init_device_caps(backend_dev, dev_attr);
    if (ret) {
        rdma_error_report("Failed to initialize device capabilities");
        ret = -EIO;
        goto out_destroy_comm_channel;
    }


    ret = mad_init(backend_dev, mad_chr_be);
    if (ret) {
        rdma_error_report("Failed to initialize mad");
        ret = -EIO;
        goto out_destroy_comm_channel;
    }

    backend_dev->comp_thread.run = false;
    backend_dev->comp_thread.is_running = false;

    ah_cache_init();

    goto out_free_dev_list;

out_destroy_comm_channel:
    ibv_destroy_comp_channel(backend_dev->channel);

out_close_device:
    ibv_close_device(backend_dev->context);

out_free_dev_list:
    ibv_free_device_list(dev_list);

out:
    return ret;
}
Ejemplo n.º 25
0
int main(int argc, char *argv[])
{
    struct ibv_pd		       *pd1, *pd2;
    struct ibv_comp_channel	       *comp_chan1, *comp_chan2;
    struct ibv_cq		       *cq1, *cq2;
    struct ibv_cq		       *evt_cq = NULL;
    struct ibv_mr		       *mr1, *mr2;
    struct ibv_qp_init_attr		qp_attr1 = { }, qp_attr2 = {};
    struct ibv_sge			sge;
    struct ibv_send_wr		send_wr = { };
    struct ibv_send_wr	       *bad_send_wr = NULL;
    struct ibv_wc			wc;
    struct ibv_qp			*qp1, *qp2;
    void			       *cq_context = NULL;
    union ibv_gid			gid1, gid2;

    int				n;

    uint8_t			       *buf1, *buf2;

    int				err;
    int 				num_devices;
    struct ibv_context	*	verbs1, *verbs2;
    struct ibv_device ** dev_list = ibv_get_device_list(&num_devices);
    struct ibv_device_attr		dev_attr;
    int use = 0;
    int port = 1;
    int x = 0;
    unsigned long mb = 0;
    unsigned long bytes = 0;
    unsigned long save_diff = 0;
    struct timeval start, stop, diff;
    int iterations = 0;

    struct rusage usage;
    struct timeval ustart, uend;
    struct timeval sstart, send;
    struct timeval tstart, tend;

    DPRINTF("There are %d devices\n", num_devices);

    for(x = 0; x < num_devices; x++) {
        printf("Device: %d, %s\n", x, ibv_get_device_name(dev_list[use]));
    }

    if(num_devices == 0 || dev_list == NULL) {
        printf("No devices found\n");
        return 1;
    }

    if(argc < 2) {
        printf("Which RDMA device to use? 0, 1, 2, 3...\n");
        return 1;
    }

    use = atoi(argv[1]);

    DPRINTF("Using device %d\n", use);

    verbs1 = ibv_open_device(dev_list[use]);

    if(verbs1 == NULL) {
        printf("Failed to open device!\n");
        return 1;
    }

    DPRINTF("Device open %s\n", ibv_get_device_name(dev_list[use]));

    verbs2 = ibv_open_device(dev_list[use]);

    if(verbs2 == NULL) {
        printf("Failed to open device again!\n");
        return 1;
    }

    if(ibv_query_device(verbs1, &dev_attr)) {
        printf("Failed to query device attributes.\n");
        return 1;
    }

    printf("Device open: %d, %s which has %d ports\n", x, ibv_get_device_name(dev_list[use]), dev_attr.phys_port_cnt);

    if(argc < 3) {
        printf("Which port on the device to use? 1, 2, 3...\n");
        return 1;
    }

    port = atoi(argv[2]);

    if(port <= 0) {
        printf("Port #%d invalid, must start with 1, 2, 3, ...\n", port);
        return 1;
    }

    printf("Using port %d\n", port);

    if(argc < 4) {
        printf("How many iterations to perform?\n");
        return 1;
    }

    iterations = atoi(argv[3]);
    printf("Will perform %d iterations\n", iterations);

    pd1 = ibv_alloc_pd(verbs1);
    if (!pd1)
        return 1;

    if(argc < 5) {
        printf("How many megabytes to allocate? (This will be allocated twice. Once for source, once for destination.)\n");
        return 1;
    }

    mb = atoi(argv[4]);

    if(mb <= 0) {
        printf("Megabytes %lu invalid\n", mb);
        return 1;
    }

    DPRINTF("protection domain1 allocated\n");

    pd2 = ibv_alloc_pd(verbs2);
    if (!pd2)
        return 1;

    DPRINTF("protection domain2 allocated\n");

    comp_chan1 = ibv_create_comp_channel(verbs1);
    if (!comp_chan1)
        return 1;

    DPRINTF("completion chan1 created\n");

    comp_chan2 = ibv_create_comp_channel(verbs2);
    if (!comp_chan2)
        return 1;

    DPRINTF("completion chan2 created\n");

    cq1 = ibv_create_cq(verbs1, 2, NULL, comp_chan1, 0);
    if (!cq1)
        return 1;

    DPRINTF("CQ1 created\n");

    cq2 = ibv_create_cq(verbs2, 2, NULL, comp_chan2, 0);
    if (!cq2)
        return 1;

    DPRINTF("CQ2 created\n");

    bytes = mb * 1024UL * 1024UL;

    buf1 = malloc(bytes);
    if (!buf1)
        return 1;

    buf2 = malloc(bytes);
    if (!buf2)
        return 1;

    printf("Populating %lu MB memory.\n", mb * 2);

    for(x = 0; x < bytes; x++) {
        buf1[x] = 123;
    }

    buf1[bytes - 1] = 123;

    mr1 = ibv_reg_mr(pd1, buf1, bytes, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ);
    if (!mr1) {
        printf("Failed to register memory.\n");
        return 1;
    }

    mr2 = ibv_reg_mr(pd2, buf2, bytes, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ);
    if (!mr2) {
        printf("Failed to register memory.\n");
        return 1;
    }

    DPRINTF("memory registered.\n");

    qp_attr1.cap.max_send_wr	 = 10;
    qp_attr1.cap.max_send_sge = 10;
    qp_attr1.cap.max_recv_wr	 = 10;
    qp_attr1.cap.max_recv_sge = 10;
    qp_attr1.sq_sig_all = 1;

    qp_attr1.send_cq		 = cq1;
    qp_attr1.recv_cq		 = cq1;

    qp_attr1.qp_type		 = IBV_QPT_RC;

    qp1 = ibv_create_qp(pd1, &qp_attr1);
    if (!qp1) {
        printf("failed to create queue pair #1\n");
        return 1;
    }

    DPRINTF("queue pair1 created\n");

    qp_attr2.cap.max_send_wr	 = 10;
    qp_attr2.cap.max_send_sge = 10;
    qp_attr2.cap.max_recv_wr	 = 10;
    qp_attr2.cap.max_recv_sge = 10;
    qp_attr2.sq_sig_all = 1;

    qp_attr2.send_cq		 = cq2;
    qp_attr2.recv_cq		 = cq2;

    qp_attr2.qp_type		 = IBV_QPT_RC;


    qp2 = ibv_create_qp(pd2, &qp_attr2);
    if (!qp2) {
        printf("failed to create queue pair #2\n");
        return 1;
    }

    DPRINTF("queue pair2 created\n");

    struct ibv_qp_attr attr1 = {
        .qp_state = IBV_QPS_INIT,
        .pkey_index = 0,
        .port_num = port,
        .qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_LOCAL_WRITE,
    };

    if(ibv_modify_qp(qp1, &attr1,
                     IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) {
        printf("verbs 1 Failed to go to init\n");
        return 1;
    }

    DPRINTF("verbs1 to init\n");

    struct ibv_qp_attr attr2 = {
        .qp_state = IBV_QPS_INIT,
        .pkey_index = 0,
        .port_num = port,
        .qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_LOCAL_WRITE,
    };

    if(ibv_modify_qp(qp2, &attr2,
                     IBV_QP_STATE |
                     IBV_QP_PKEY_INDEX |
                     IBV_QP_PORT |
                     IBV_QP_ACCESS_FLAGS)) {
        printf("verbs 2 Failed to go to init\n");
        return 1;
    }

    DPRINTF("verbs2 to init\n");

    //struct ibv_gid gid1, gid2;
    struct ibv_port_attr port1, port2;
    uint64_t psn1 = lrand48() & 0xffffff;
    uint64_t psn2 = lrand48() & 0xffffff;

    if(ibv_query_port(verbs1, port, &port1))
        return 1;

    DPRINTF("got port1 information\n");

    if(ibv_query_port(verbs2, port, &port2))
        return 1;

    DPRINTF("got port2 information\n");

    if(ibv_query_gid(verbs1, 1, 0, &gid1))
        return 1;
    DPRINTF("got gid1 information\n");

    if(ibv_query_gid(verbs2, 1, 0, &gid2))
        return 1;

    DPRINTF("got gid2 information\n");

    struct ibv_qp_attr next2 = {
        .qp_state = IBV_QPS_RTR,
        .path_mtu = IBV_MTU_1024,
        .dest_qp_num = qp2->qp_num,
        .rq_psn = psn2,
        .max_dest_rd_atomic = 5,
        .min_rnr_timer = 12,
        .ah_attr = {
            .is_global = 0,
            .dlid = port2.lid,
            .sl = 0,
            .src_path_bits = 0,
            .port_num = port,
        }
    };

    if(gid2.global.interface_id) {
        next2.ah_attr.is_global = 1;
        next2.ah_attr.grh.hop_limit = 1;
        next2.ah_attr.grh.dgid = gid2;
        next2.ah_attr.grh.sgid_index = 0;
    }

    struct ibv_qp_attr next1 = {
        .qp_state = IBV_QPS_RTR,
        .path_mtu = IBV_MTU_1024,
        .dest_qp_num = qp1->qp_num,
        .rq_psn = psn1,
        .max_dest_rd_atomic = 1,
        .min_rnr_timer = 12,
        .ah_attr = {
            .is_global = 0,
            .dlid = port1.lid,
            .sl = 0,
            .src_path_bits = 0,
            .port_num = port,
        }
    };

    if(gid1.global.interface_id) {
        next1.ah_attr.is_global = 1;
        next1.ah_attr.grh.hop_limit = 1;
        next1.ah_attr.grh.dgid = gid1;
        next1.ah_attr.grh.sgid_index = 0;
    }

    if(ibv_modify_qp(qp2, &next1,
                     IBV_QP_STATE |
                     IBV_QP_AV |
                     IBV_QP_PATH_MTU |
                     IBV_QP_DEST_QPN |
                     IBV_QP_RQ_PSN |
                     IBV_QP_MAX_DEST_RD_ATOMIC |
                     IBV_QP_MIN_RNR_TIMER)) {
        printf("Failed to modify verbs2 to ready\n");
        return 1;
    }

    DPRINTF("verbs2 RTR\n");

    if(ibv_modify_qp(qp1, &next2,
                     IBV_QP_STATE |
                     IBV_QP_AV |
                     IBV_QP_PATH_MTU |
                     IBV_QP_DEST_QPN |
                     IBV_QP_RQ_PSN |
                     IBV_QP_MAX_DEST_RD_ATOMIC |
                     IBV_QP_MIN_RNR_TIMER)) {
        printf("Failed to modify verbs1 to ready\n");
        return 1;
    }

    DPRINTF("verbs1 RTR\n");

    next2.qp_state = IBV_QPS_RTS;
    next2.timeout = 14;
    next2.retry_cnt = 7;
    next2.rnr_retry = 7;
    next2.sq_psn = psn1;
    next2.max_rd_atomic = 1;

    if(ibv_modify_qp(qp1, &next2,
                     IBV_QP_STATE |
                     IBV_QP_TIMEOUT |
                     IBV_QP_RETRY_CNT |
                     IBV_QP_RNR_RETRY |
                     IBV_QP_SQ_PSN |
                     IBV_QP_MAX_QP_RD_ATOMIC)) {
        printf("Failed again to modify verbs1 to ready\n");
        return 1;
    }

    DPRINTF("verbs1 RTS\n");

    next1.qp_state = IBV_QPS_RTS;
    next1.timeout = 14;
    next1.retry_cnt = 7;
    next1.rnr_retry = 7;
    next1.sq_psn = psn2;
    next1.max_rd_atomic = 1;

    if(ibv_modify_qp(qp2, &next1,
                     IBV_QP_STATE |
                     IBV_QP_TIMEOUT |
                     IBV_QP_RETRY_CNT |
                     IBV_QP_RNR_RETRY |
                     IBV_QP_SQ_PSN |
                     IBV_QP_MAX_QP_RD_ATOMIC)) {
        printf("Failed again to modify verbs2 to ready\n");
        return 1;
    }

    DPRINTF("verbs2 RTS\n");

    printf("Performing RDMA first.\n");
    iterations = atoi(argv[3]);

    getrusage(RUSAGE_SELF, &usage);
    ustart = usage.ru_utime;
    sstart = usage.ru_stime;

    gettimeofday(&tstart, NULL);

    while(iterations-- > 0) {
        sge.addr   = (uintptr_t) buf1;
        sge.length = bytes;
        sge.lkey   = mr1->lkey;

        send_wr.wr_id		    = 1;
        send_wr.opcode		    = IBV_WR_RDMA_WRITE;
        send_wr.sg_list		    = &sge;
        send_wr.num_sge		    = 1;
        send_wr.send_flags          = IBV_SEND_SIGNALED;
        send_wr.wr.rdma.rkey 	    = mr2->rkey;
        send_wr.wr.rdma.remote_addr = (uint64_t) buf2;

        DPRINTF("Iterations left: %d\n", iterations);
        if (ibv_req_notify_cq(cq1, 0))
            return 1;

        DPRINTF("Submitting local RDMA\n");
        gettimeofday(&start, NULL);
        if (ibv_post_send(qp1, &send_wr, &bad_send_wr))
            return 1;

        DPRINTF("RDMA posted %p %p\n", &send_wr, bad_send_wr);

        DPRINTF("blocking...\n");
        if(ibv_get_cq_event(comp_chan1, &evt_cq, &cq_context)) {
            printf("failed to get CQ event\n");
            return 1;
        }
        gettimeofday(&stop, NULL);
        timersub(&stop, &start, &diff);

        DPRINTF("RDMA took: %lu us\n", diff.tv_usec);

        ibv_ack_cq_events(evt_cq, 1);

        DPRINTF("got event\n");

        n = ibv_poll_cq(cq1, 1, &wc);
        if (n > 0) {
            DPRINTF("return from poll: %lu\n", wc.wr_id);
            if (wc.status != IBV_WC_SUCCESS) {
                printf("poll failed %s\n", ibv_wc_status_str(wc.status));
                return 1;
            }

            if (wc.wr_id == 1) {
                DPRINTF("Finished %d bytes %d %d\n", n, buf1[bytes - 1], buf2[bytes - 1]);
            } else {
                printf("didn't find completion\n");
            }
        }

        if (n < 0) {
            printf("poll returned error\n");
            return 1;
        }

        DPRINTF("Poll returned %d bytes %d %d\n", n, buf1[0], buf2[0]);

    }

    gettimeofday(&tend, NULL);

    getrusage(RUSAGE_SELF, &usage);
    uend = usage.ru_utime;
    send = usage.ru_stime;

    save_diff = 0;
    timersub(&uend, &ustart, &diff);
    save_diff += diff.tv_usec;
    printf("User CPU time: %lu us\n", diff.tv_usec);
    timersub(&send, &sstart, &diff);
    save_diff += diff.tv_usec;
    printf("System CPU time: %lu us\n", diff.tv_usec);
    timersub(&tend, &tstart, &diff);
    printf("Sleeping time: %lu us\n", diff.tv_usec - save_diff);
    printf("Wall clock CPU time: %lu us\n", diff.tv_usec);

    iterations = atoi(argv[3]);

    printf("Now using the CPU instead....\n");

    getrusage(RUSAGE_SELF, &usage);
    ustart = usage.ru_utime;
    sstart = usage.ru_stime;

    gettimeofday(&tstart, NULL);

    while(iterations-- > 0) {
        DPRINTF("Repeating without RDMA...\n");

        gettimeofday(&start, NULL);

        memcpy(buf2, buf1, bytes);

        gettimeofday(&stop, NULL);
        timersub(&stop, &start, &diff);
        DPRINTF("Regular copy too took: %lu us\n", diff.tv_usec);
    }

    gettimeofday(&tend, NULL);

    getrusage(RUSAGE_SELF, &usage);
    uend = usage.ru_utime;
    send = usage.ru_stime;

    save_diff = 0;
    timersub(&uend, &ustart, &diff);
    save_diff += diff.tv_usec;
    printf("User CPU time: %lu us\n", diff.tv_usec);
    timersub(&send, &sstart, &diff);
    save_diff += diff.tv_usec;
    printf("System CPU time: %lu us\n", diff.tv_usec);
    timersub(&tend, &tstart, &diff);
    printf("Sleeping time: %lu us\n", diff.tv_usec - save_diff);
    printf("Wall clock CPU time: %lu us\n", diff.tv_usec);
    return 0;
}
Ejemplo n.º 26
0
/* ////////////////////////////////////////////////////////////////////////// */
static int
segment_create(map_segment_t *ds_buf,
               const char *file_name,
               size_t size)
{
    int rc = OSHMEM_SUCCESS;
    openib_device_t *device = &memheap_device;
    int num_devs = 0;
    int i = 0;

    assert(ds_buf);

    /* init the contents of map_segment_t */
    shmem_ds_reset(ds_buf);

    memset(device, 0, sizeof(*device));

#ifdef HAVE_IBV_GET_DEVICE_LIST
    device->ib_devs = ibv_get_device_list(&num_devs);
#else
#error unsupported ibv_get_device_list in infiniband/verbs.h
#endif

    if (num_devs == 0 || !device->ib_devs) {
        return OSHMEM_ERR_NOT_SUPPORTED;
    }

    /* Open device */
    if (NULL != mca_sshmem_verbs_component.hca_name) {
        for (i = 0; i < num_devs; i++) {
            if (0 == strcmp(mca_sshmem_verbs_component.hca_name, ibv_get_device_name(device->ib_devs[i]))) {
                device->ib_dev = device->ib_devs[i];
                break;
            }
        }
    } else {
        device->ib_dev = device->ib_devs[0];
    }

    if (NULL == device->ib_dev) {
        OPAL_OUTPUT_VERBOSE(
            (5, oshmem_sshmem_base_framework.framework_output,
            "error getting device says %d: %s",
            errno, strerror(errno))
            );
        return OSHMEM_ERR_NOT_FOUND;
    }

    if (NULL == (device->ib_dev_context = ibv_open_device(device->ib_dev))) {
        OPAL_OUTPUT_VERBOSE(
            (5, oshmem_sshmem_base_framework.framework_output,
            "error obtaining device context for %s errno says %d: %s",
            ibv_get_device_name(device->ib_dev), errno, strerror(errno))
            );
        return OSHMEM_ERR_RESOURCE_BUSY;
    }

    /* Obtain device attributes */
    if (ibv_query_device(device->ib_dev_context, &device->ib_dev_attr)) {
        OPAL_OUTPUT_VERBOSE(
            (5, oshmem_sshmem_base_framework.framework_output,
            "error obtaining device attributes for %s errno says %d: %s",
            ibv_get_device_name(device->ib_dev), errno, strerror(errno))
            );
        return OSHMEM_ERR_RESOURCE_BUSY;
    }

    /* Allocate the protection domain for the device */
    device->ib_pd = ibv_alloc_pd(device->ib_dev_context);
    if (NULL == device->ib_pd) {
        OPAL_OUTPUT_VERBOSE(
            (5, oshmem_sshmem_base_framework.framework_output,
            "error allocating protection domain for %s errno says %d: %s",
            ibv_get_device_name(device->ib_dev), errno, strerror(errno))
            );
        return OSHMEM_ERR_RESOURCE_BUSY;
    }

    /* Allocate memory */
    if (!rc) {
        void *addr = NULL;
        struct ibv_mr *ib_mr = NULL;
        uint64_t access_flag = IBV_ACCESS_LOCAL_WRITE |
                          IBV_ACCESS_REMOTE_WRITE |
                          IBV_ACCESS_REMOTE_READ;
        uint64_t exp_access_flag = 0;

        OBJ_CONSTRUCT(&device->ib_mr_array, opal_value_array_t);
        opal_value_array_init(&device->ib_mr_array, sizeof(struct ibv_mr *));

#if (MPAGE_ENABLE > 0)
        exp_access_flag = IBV_EXP_ACCESS_ALLOCATE_MR |
                          IBV_EXP_ACCESS_SHARED_MR_USER_READ |
                          IBV_EXP_ACCESS_SHARED_MR_USER_WRITE;
#endif /* MPAGE_ENABLE */

        struct ibv_exp_reg_mr_in in = {device->ib_pd, addr, size, access_flag|exp_access_flag, 0};

#if MPAGE_HAVE_IBV_EXP_REG_MR_CREATE_FLAGS
        if (0 == mca_sshmem_verbs_component.has_shared_mr) {
            in.addr = (void *)mca_sshmem_base_start_address;
            in.comp_mask    = IBV_EXP_REG_MR_CREATE_FLAGS;
            in.create_flags = IBV_EXP_REG_MR_CREATE_CONTIG;
            in.exp_access   = access_flag;
        }
#endif
        ib_mr = ibv_exp_reg_mr(&in);
        if (NULL == ib_mr) {
            OPAL_OUTPUT_VERBOSE(
                (5, oshmem_sshmem_base_framework.framework_output,
                    "error to ibv_exp_reg_mr() %llu bytes errno says %d: %s",
                    (unsigned long long)size, errno, strerror(errno))
                );
            rc = OSHMEM_ERR_OUT_OF_RESOURCE;
        } else {
            device->ib_mr_shared = ib_mr;
            opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
        }

#if (MPAGE_ENABLE > 0)
        if (!rc && mca_sshmem_verbs_component.has_shared_mr) {
            void *addr = NULL;
            access_flag = IBV_ACCESS_LOCAL_WRITE |
                          IBV_ACCESS_REMOTE_WRITE |
                          IBV_ACCESS_REMOTE_READ|
                          IBV_EXP_ACCESS_NO_RDMA;

            addr = (void *)mca_sshmem_base_start_address;
            struct ibv_exp_reg_shared_mr_in in;
            mca_sshmem_verbs_fill_shared_mr(&in, device->ib_pd, device->ib_mr_shared->handle, addr, access_flag);
            ib_mr = ibv_exp_reg_shared_mr(&in);
            if (NULL == ib_mr) {
                OPAL_OUTPUT_VERBOSE(
                    (5, oshmem_sshmem_base_framework.framework_output,
                        "error to ibv_reg_shared_mr() %llu bytes errno says %d: %s has_shared_mr: %d",
                        (unsigned long long)size, errno, strerror(errno),
                        mca_sshmem_verbs_component.has_shared_mr
                        )
                    );
                rc = OSHMEM_ERR_OUT_OF_RESOURCE;
            } else {
                opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
            }
        }
#endif /* MPAGE_ENABLE */

        if (!rc) {
            OPAL_OUTPUT_VERBOSE(
                (70, oshmem_sshmem_base_framework.framework_output,
                "ibv device %s shared_mr: %d",
                ibv_get_device_name(device->ib_dev),
                mca_sshmem_verbs_component.has_shared_mr)
                );

            if (mca_sshmem_verbs_component.has_shared_mr) {
                assert(size == device->ib_mr_shared->length);
                ds_buf->type = MAP_SEGMENT_ALLOC_IBV;
                ds_buf->seg_id = device->ib_mr_shared->handle;
            } else {
                ds_buf->type = MAP_SEGMENT_ALLOC_IBV_NOSHMR;
                ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID;
            }
            ds_buf->super.va_base = ib_mr->addr;
            ds_buf->seg_size = size;
            ds_buf->super.va_end = (void*)((uintptr_t)ds_buf->super.va_base + ds_buf->seg_size);
        }
    }

    OPAL_OUTPUT_VERBOSE(
          (70, oshmem_sshmem_base_framework.framework_output,
           "%s: %s: create %s "
           "(id: %d, addr: %p size: %lu, name: %s)\n",
           mca_sshmem_verbs_component.super.base_version.mca_type_name,
           mca_sshmem_verbs_component.super.base_version.mca_component_name,
           (rc ? "failure" : "successful"),
           ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
      );

    return rc;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
verbs_runtime_query(mca_base_module_t **module,
                    int *priority,
                    const char *hint)
{
    int rc = OSHMEM_SUCCESS;
    openib_device_t my_device;
    openib_device_t *device = &my_device;
    int num_devs = 0;
    int i = 0;

    *priority = 0;
    *module = NULL;

    memset(device, 0, sizeof(*device));

#ifdef HAVE_IBV_GET_DEVICE_LIST
    device->ib_devs = ibv_get_device_list(&num_devs);
#else
    #error unsupported ibv_get_device_list in infiniband/verbs.h
#endif

    if (num_devs == 0 || !device->ib_devs) {
        return OSHMEM_ERR_NOT_SUPPORTED;
    }

    /* Open device */
    if (NULL != mca_sshmem_verbs_component.hca_name) {
        for (i = 0; i < num_devs; i++) {
            if (0 == strcmp(mca_sshmem_verbs_component.hca_name, ibv_get_device_name(device->ib_devs[i]))) {
                device->ib_dev = device->ib_devs[i];
                break;
            }
        }
    } else {
        device->ib_dev = device->ib_devs[0];
    }

    if (NULL == device->ib_dev) {
        rc = OSHMEM_ERR_NOT_FOUND;
        goto out;
    }

    if (NULL == (device->ib_dev_context = ibv_open_device(device->ib_dev))) {
        rc = OSHMEM_ERR_RESOURCE_BUSY;
        goto out;
    }

    /* Obtain device attributes */
    if (ibv_query_device(device->ib_dev_context, &device->ib_dev_attr)) {
        rc = OSHMEM_ERR_RESOURCE_BUSY;
        goto out;
    }

    /* Allocate the protection domain for the device */
    device->ib_pd = ibv_alloc_pd(device->ib_dev_context);
    if (NULL == device->ib_pd) {
        rc = OSHMEM_ERR_RESOURCE_BUSY;
        goto out;
    }

    /* Allocate memory */
    if (!rc) {
        void *addr = NULL;
        size_t size = getpagesize();
        struct ibv_mr *ib_mr = NULL;
        uint64_t access_flag = IBV_ACCESS_LOCAL_WRITE |
                          IBV_ACCESS_REMOTE_WRITE |
                          IBV_ACCESS_REMOTE_READ; 
        uint64_t exp_access_flag = 0;

        OBJ_CONSTRUCT(&device->ib_mr_array, opal_value_array_t);
        opal_value_array_init(&device->ib_mr_array, sizeof(struct ibv_mr *));

#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
        exp_access_flag = IBV_EXP_ACCESS_ALLOCATE_MR  |
                          IBV_EXP_ACCESS_SHARED_MR_USER_READ |
                          IBV_EXP_ACCESS_SHARED_MR_USER_WRITE; 
#endif /* MPAGE_ENABLE */

        struct ibv_exp_reg_mr_in in = {device->ib_pd, addr, size, access_flag|exp_access_flag, 0};
        ib_mr = ibv_exp_reg_mr(&in);
        if (NULL == ib_mr) {
            rc = OSHMEM_ERR_OUT_OF_RESOURCE;
        } else {
            device->ib_mr_shared = ib_mr;
            opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
        }

#if defined(MPAGE_ENABLE) && (MPAGE_ENABLE > 0)
        if (!rc) {
            struct ibv_exp_reg_shared_mr_in in_smr;

            access_flag = IBV_ACCESS_LOCAL_WRITE |
                          IBV_ACCESS_REMOTE_WRITE |
                          IBV_ACCESS_REMOTE_READ|
                          IBV_EXP_ACCESS_NO_RDMA;

            addr = (void *)mca_sshmem_base_start_address;
            mca_sshmem_verbs_fill_shared_mr(&in_smr, device->ib_pd, device->ib_mr_shared->handle,  addr, access_flag);
            ib_mr = ibv_exp_reg_shared_mr(&in_smr);
            if (NULL == ib_mr) {
                mca_sshmem_verbs_component.has_shared_mr = 0;
            } else {
                opal_value_array_append_item(&device->ib_mr_array, &ib_mr);
                mca_sshmem_verbs_component.has_shared_mr = 1;
            }
        }
#endif /* MPAGE_ENABLE */
    }

    /* all is well - rainbows and butterflies */
    if (!rc) {
        *priority = mca_sshmem_verbs_component.priority;
        *module = (mca_base_module_t *)&mca_sshmem_verbs_module.super;
    }

out:
    if (device) {
        if (opal_value_array_get_size(&device->ib_mr_array)) {
            struct ibv_mr** array;
            struct ibv_mr* ib_mr = NULL;
            array = OPAL_VALUE_ARRAY_GET_BASE(&device->ib_mr_array, struct ibv_mr *);
            while (opal_value_array_get_size(&device->ib_mr_array) > 0) {
                ib_mr = array[0];
                ibv_dereg_mr(ib_mr);
                opal_value_array_remove_item(&device->ib_mr_array, 0);
            }

            if (device->ib_mr_shared) {
                device->ib_mr_shared = NULL;
            }
            OBJ_DESTRUCT(&device->ib_mr_array);
        }

        if (device->ib_pd) {
            ibv_dealloc_pd(device->ib_pd);
            device->ib_pd = NULL;
        }

        if(device->ib_dev_context) {
            ibv_close_device(device->ib_dev_context);
            device->ib_dev_context = NULL;
        }

        if(device->ib_devs) {
            ibv_free_device_list(device->ib_devs);
            device->ib_devs = NULL;
        }
    }

    return rc;
}
Ejemplo n.º 28
0
int
resource_create(resource_t *res, int ib_port, int myrank)
{
    struct ibv_device		**dev_list = NULL;
    struct ibv_qp_init_attr	qp_init_attr;
    struct ibv_device		*ib_dev = NULL;
    char	*dev_name = NULL;
    size_t	size;
    int		i;
    int		mr_flags = 0;
    int		cq_size = 0;
    int		dev_numm;
    int		rc = 0;

    /* Init structure */
    memset(res, 0, sizeof(resource_t));
    /* Get the device list */
    dev_list = ibv_get_device_list(&dev_numm);
    if(!dev_list) {
	fprintf(stderr, "[%d] failed to get IB devices list\n", myrank);
	return 1;
    }
    // if no device
    if(!dev_numm) {
	fprintf(stderr, "[%d] No IB device is found\n", myrank);
	rc = 1;
	goto err_exit;
    }
    DEBUG { printf("[%d] found %d IB device(s)\n", myrank, dev_numm); }
    /* Open the requested device */
    for(i = 0; i < dev_numm; i ++){
	dev_name = strdup(ibv_get_device_name(dev_list[i]));
	DEBUG { printf("[%d] IB device name: %s\n", myrank, dev_name); }
	ib_dev = dev_list[i];
	break;
    }
    if (!ib_dev){
	fprintf(stderr, "[%d] IB device %s wasn't found\n", myrank, dev_name);
	rc = 1;
	goto err_exit;
    }
    res->ib_ctx = ibv_open_device(ib_dev);
    DEBUG { printf("[%d] IB context = %lx\n", myrank, (uintptr_t)res->ib_ctx); }
    if(!res->ib_ctx){
	fprintf(stderr, "[%d] failed to open device %s\n", myrank, dev_name);
	rc = 1;
	goto err_exit;
    }
    // free device list
    ibv_free_device_list(dev_list);
    dev_list = NULL;
    ib_dev = NULL;
    // query prot properties
    if(ibv_query_port(res->ib_ctx, ib_port, &res->port_attr)){
	fprintf(stderr, "[%d] ibv_query_port on port %u failed\n", myrank, ib_port);
	rc = 1;
	goto err_exit;
    }

    /* Create a PD */
    res->pd = ibv_alloc_pd(res->ib_ctx);
    if (!res->pd){
	fprintf(stderr, "[%d] ibv_alloc_pd failed\n", myrank);
	rc = 1;
	goto err_exit;
    }

    /* Create send/recv CQ
     *  inputs:
     *		device handle
     *		CQ capacity
     *  Output:
     *		CQ handle
     */
    res->scq = ibv_create_cq(res->ib_ctx, MAX_CQ_CAPACITY, NULL, NULL, 0);
    res->rcq = ibv_create_cq(res->ib_ctx, MAX_CQ_CAPACITY, NULL, NULL, 0);
    if (!res->scq){
	fprintf(stderr, "[%d] failed to create SCQ with %u entries\n", myrank, cq_size);
	rc = 1;
	goto err_exit;
    }
    if (!res->rcq){
	fprintf(stderr, "[%d] failed to create SCQ with %u entries\n", myrank, cq_size);
	rc = 1;
	goto err_exit;
    }

    /* Allocate fix buffer */
    size = MAX_FIX_BUF_SIZE;
    res->buf_size = size;
    res->buf = (char *)malloc(size * sizeof(char));
    if (!res->buf ){
	fprintf(stderr, "[%d] failed to malloc %Zu bytes to memory buffer\n", myrank, size);
	rc = 1;
	goto err_exit;
    }
    memset(res->buf, 0 , size);

    /* Memory Region
     *	inputs:
     *		device handle
     *		PD
     *		Virtual Addr(addr of MR)
     *		Access Ctrl: LocalWrite, RemoteRead, RemoteWrite, RemoteAtomicOp, MemWindowBinding
     *	outputs:
     *		MR handle
     *		L_Key
     *		R_Key
     */
    res->mr_list = malloc(sizeof(struct ibv_mr*) * MAX_MR_NUM);
    res->mr_size = 1;

    mr_flags = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ |
		IBV_ACCESS_REMOTE_WRITE ;
    res->mr_list[0] = ibv_reg_mr(res->pd, res->buf, size, mr_flags);
    if (!res->mr_list[0]){
	fprintf(stderr, "[%d] ibv_reg_mr failed with mr_flags=0x%x\n", myrank, mr_flags);
	rc = 1;
	goto err_exit;
    }
    DEBUG { printf("[%d] fixed MR was registered with addr=%p, lkey=0x%x, rkey=0x%x, flags=0x%x\n", myrank, res->buf, res->mr_list[0]->lkey, res->mr_list[0]->rkey, mr_flags); }

    /* Create QP */
    // inputs:
    //	PD
    //	CQs for SQ,RQ
    //	capacity of SQ,RQ
    // Outputs:
    //	QP handle
    memset(&qp_init_attr, 0, sizeof(qp_init_attr));
    qp_init_attr.qp_type = IBV_QPT_RC;
    qp_init_attr.sq_sig_all = 1;
    qp_init_attr.send_cq = res->scq;
    qp_init_attr.recv_cq = res->rcq;
    // max SR/RR num in SQ/RQ
    qp_init_attr.cap.max_send_wr = MAX_SQ_CAPACITY ;
    qp_init_attr.cap.max_recv_wr = MAX_RQ_CAPACITY;
    // max SGE num
    qp_init_attr.cap.max_send_sge = MAX_SGE_CAPACITY;
    qp_init_attr.cap.max_recv_sge = MAX_SGE_CAPACITY;
    qp_init_attr.cap.max_inline_data = 256;

    res->qp = ibv_create_qp(res->pd, &qp_init_attr);
    if (!res->qp){
	fprintf(stderr, "failed to create QP\n");
	rc = 1;
	goto err_exit;
    }
    DEBUG { printf("[%d] QP was created, QP number=0x%x\n", myrank, res->qp->qp_num); }

    /* EXIT */
err_exit:
    if(rc){
	/* Error encountered, cleanup */
	if(res->qp){
	    ibv_destroy_qp(res->qp);
	    res->qp = NULL;
	}
	if(res->mr_list && res->mr_size > 0){
	    int i;
	    for(i=0; i<res->mr_size; i++){
		ibv_dereg_mr(res->mr_list[i]);
		res->mr_list[i] = NULL;
	    }
	    free(res->mr_list);
	}
	if(res->buf){
	    free(res->buf);
	    res->buf = NULL;
	}
	if(res->scq){
	    ibv_destroy_cq(res->scq);
	    res->scq = NULL;
	}
	if(res->rcq){
	    ibv_destroy_cq(res->rcq);
	    res->rcq = NULL;
	}
	if(res->comp_ch){
	    ibv_destroy_comp_channel(res->comp_ch);
	    res->comp_ch = NULL;
	}
	if(res->pd){
	    ibv_dealloc_pd(res->pd);
	    res->pd = NULL;
	}
	if (res->ib_ctx) {
	    ibv_close_device(res->ib_ctx);
	    res->ib_ctx = NULL;
	}
	if (dev_list) {
	    ibv_free_device_list(dev_list);
	    dev_list = NULL;
	}
    }
    return rc;
}
Ejemplo n.º 29
0
Archivo: mlx5.c Proyecto: goby/dpdk
/**
 * DPDK callback to register a PCI device.
 *
 * This function creates an Ethernet device for each port of a given
 * PCI device.
 *
 * @param[in] pci_drv
 *   PCI driver structure (mlx5_driver).
 * @param[in] pci_dev
 *   PCI device information.
 *
 * @return
 *   0 on success, negative errno value on failure.
 */
static int
mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
{
	struct ibv_device **list;
	struct ibv_device *ibv_dev;
	int err = 0;
	struct ibv_context *attr_ctx = NULL;
	struct ibv_device_attr device_attr;
	unsigned int vf;
	int idx;
	int i;

	(void)pci_drv;
	assert(pci_drv == &mlx5_driver.pci_drv);
	/* Get mlx5_dev[] index. */
	idx = mlx5_dev_idx(&pci_dev->addr);
	if (idx == -1) {
		ERROR("this driver cannot support any more adapters");
		return -ENOMEM;
	}
	DEBUG("using driver device index %d", idx);

	/* Save PCI address. */
	mlx5_dev[idx].pci_addr = pci_dev->addr;
	list = ibv_get_device_list(&i);
	if (list == NULL) {
		assert(errno);
		if (errno == ENOSYS) {
			WARN("cannot list devices, is ib_uverbs loaded?");
			return 0;
		}
		return -errno;
	}
	assert(i >= 0);
	/*
	 * For each listed device, check related sysfs entry against
	 * the provided PCI ID.
	 */
	while (i != 0) {
		struct rte_pci_addr pci_addr;

		--i;
		DEBUG("checking device \"%s\"", list[i]->name);
		if (mlx5_ibv_device_to_pci_addr(list[i], &pci_addr))
			continue;
		if ((pci_dev->addr.domain != pci_addr.domain) ||
		    (pci_dev->addr.bus != pci_addr.bus) ||
		    (pci_dev->addr.devid != pci_addr.devid) ||
		    (pci_dev->addr.function != pci_addr.function))
			continue;
		vf = ((pci_dev->id.device_id ==
		       PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) ||
		      (pci_dev->id.device_id ==
		       PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF));
		INFO("PCI information matches, using device \"%s\" (VF: %s)",
		     list[i]->name, (vf ? "true" : "false"));
		attr_ctx = ibv_open_device(list[i]);
		err = errno;
		break;
	}
	if (attr_ctx == NULL) {
		ibv_free_device_list(list);
		switch (err) {
		case 0:
			WARN("cannot access device, is mlx5_ib loaded?");
			return 0;
		case EINVAL:
			WARN("cannot use device, are drivers up to date?");
			return 0;
		}
		assert(err > 0);
		return -err;
	}
	ibv_dev = list[i];

	DEBUG("device opened");
	if (ibv_query_device(attr_ctx, &device_attr))
		goto error;
	INFO("%u port(s) detected", device_attr.phys_port_cnt);

	for (i = 0; i < device_attr.phys_port_cnt; i++) {
		uint32_t port = i + 1; /* ports are indexed from one */
		uint32_t test = (1 << i);
		struct ibv_context *ctx = NULL;
		struct ibv_port_attr port_attr;
		struct ibv_pd *pd = NULL;
		struct priv *priv = NULL;
		struct rte_eth_dev *eth_dev;
#ifdef HAVE_EXP_QUERY_DEVICE
		struct ibv_exp_device_attr exp_device_attr;
#endif /* HAVE_EXP_QUERY_DEVICE */
		struct ether_addr mac;

#ifdef HAVE_EXP_QUERY_DEVICE
		exp_device_attr.comp_mask =
			IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS |
			IBV_EXP_DEVICE_ATTR_RX_HASH;
#endif /* HAVE_EXP_QUERY_DEVICE */

		DEBUG("using port %u (%08" PRIx32 ")", port, test);

		ctx = ibv_open_device(ibv_dev);
		if (ctx == NULL)
			goto port_error;

		/* Check port status. */
		err = ibv_query_port(ctx, port, &port_attr);
		if (err) {
			ERROR("port query failed: %s", strerror(err));
			goto port_error;
		}
		if (port_attr.state != IBV_PORT_ACTIVE)
			DEBUG("port %d is not active: \"%s\" (%d)",
			      port, ibv_port_state_str(port_attr.state),
			      port_attr.state);

		/* Allocate protection domain. */
		pd = ibv_alloc_pd(ctx);
		if (pd == NULL) {
			ERROR("PD allocation failure");
			err = ENOMEM;
			goto port_error;
		}

		mlx5_dev[idx].ports |= test;

		/* from rte_ethdev.c */
		priv = rte_zmalloc("ethdev private structure",
				   sizeof(*priv),
				   RTE_CACHE_LINE_SIZE);
		if (priv == NULL) {
			ERROR("priv allocation failure");
			err = ENOMEM;
			goto port_error;
		}

		priv->ctx = ctx;
		priv->device_attr = device_attr;
		priv->port = port;
		priv->pd = pd;
		priv->mtu = ETHER_MTU;
#ifdef HAVE_EXP_QUERY_DEVICE
		if (ibv_exp_query_device(ctx, &exp_device_attr)) {
			ERROR("ibv_exp_query_device() failed");
			goto port_error;
		}

		priv->hw_csum =
			((exp_device_attr.exp_device_cap_flags &
			  IBV_EXP_DEVICE_RX_CSUM_TCP_UDP_PKT) &&
			 (exp_device_attr.exp_device_cap_flags &
			  IBV_EXP_DEVICE_RX_CSUM_IP_PKT));
		DEBUG("checksum offloading is %ssupported",
		      (priv->hw_csum ? "" : "not "));

		priv->hw_csum_l2tun = !!(exp_device_attr.exp_device_cap_flags &
					 IBV_EXP_DEVICE_VXLAN_SUPPORT);
		DEBUG("L2 tunnel checksum offloads are %ssupported",
		      (priv->hw_csum_l2tun ? "" : "not "));

		priv->ind_table_max_size = exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size;
		DEBUG("maximum RX indirection table size is %u",
		      priv->ind_table_max_size);

#else /* HAVE_EXP_QUERY_DEVICE */
		priv->ind_table_max_size = RSS_INDIRECTION_TABLE_SIZE;
#endif /* HAVE_EXP_QUERY_DEVICE */

		priv->vf = vf;
		/* Allocate and register default RSS hash keys. */
		priv->rss_conf = rte_calloc(__func__, hash_rxq_init_n,
					    sizeof((*priv->rss_conf)[0]), 0);
		if (priv->rss_conf == NULL) {
			err = ENOMEM;
			goto port_error;
		}
		err = rss_hash_rss_conf_new_key(priv,
						rss_hash_default_key,
						rss_hash_default_key_len,
						ETH_RSS_PROTO_MASK);
		if (err)
			goto port_error;
		/* Configure the first MAC address by default. */
		if (priv_get_mac(priv, &mac.addr_bytes)) {
			ERROR("cannot get MAC address, is mlx5_en loaded?"
			      " (errno: %s)", strerror(errno));
			goto port_error;
		}
		INFO("port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x",
		     priv->port,
		     mac.addr_bytes[0], mac.addr_bytes[1],
		     mac.addr_bytes[2], mac.addr_bytes[3],
		     mac.addr_bytes[4], mac.addr_bytes[5]);
		/* Register MAC and broadcast addresses. */
		claim_zero(priv_mac_addr_add(priv, 0,
					     (const uint8_t (*)[ETHER_ADDR_LEN])
					     mac.addr_bytes));
		claim_zero(priv_mac_addr_add(priv, (RTE_DIM(priv->mac) - 1),
					     &(const uint8_t [ETHER_ADDR_LEN])
					     { "\xff\xff\xff\xff\xff\xff" }));
#ifndef NDEBUG
		{
			char ifname[IF_NAMESIZE];

			if (priv_get_ifname(priv, &ifname) == 0)
				DEBUG("port %u ifname is \"%s\"",
				      priv->port, ifname);
			else
				DEBUG("port %u ifname is unknown", priv->port);
		}
#endif
		/* Get actual MTU if possible. */
		priv_get_mtu(priv, &priv->mtu);
		DEBUG("port %u MTU is %u", priv->port, priv->mtu);

		/* from rte_ethdev.c */
		{
			char name[RTE_ETH_NAME_MAX_LEN];

			snprintf(name, sizeof(name), "%s port %u",
				 ibv_get_device_name(ibv_dev), port);
			eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_PCI);
		}
		if (eth_dev == NULL) {
			ERROR("can not allocate rte ethdev");
			err = ENOMEM;
			goto port_error;
		}

		eth_dev->data->dev_private = priv;
		eth_dev->pci_dev = pci_dev;
		eth_dev->driver = &mlx5_driver;
		eth_dev->data->rx_mbuf_alloc_failed = 0;
		eth_dev->data->mtu = ETHER_MTU;

		priv->dev = eth_dev;
		eth_dev->dev_ops = &mlx5_dev_ops;
		eth_dev->data->mac_addrs = priv->mac;
		TAILQ_INIT(&eth_dev->link_intr_cbs);

		/* Bring Ethernet device up. */
		DEBUG("forcing Ethernet interface up");
		priv_set_flags(priv, ~IFF_UP, IFF_UP);
		continue;

port_error:
		rte_free(priv->rss_conf);
		rte_free(priv);
		if (pd)
			claim_zero(ibv_dealloc_pd(pd));
		if (ctx)
			claim_zero(ibv_close_device(ctx));
		break;
	}
Ejemplo n.º 30
0
/*-----------------------------------------------------------------------------------*/
static void
low_level_init(struct netif *netif)
{
  struct ibvif *ibvif;
  int num_of_device, flags = IBV_ACCESS_LOCAL_WRITE;
  struct ibv_qp_init_attr attr;
  struct ibv_qp_attr qp_attr;
  uint8_t port_num = 1;
  int    qp_flags;
  struct ibv_device **ib_dev_list;
  struct tcpip_thread *thread;
  struct ibv_exp_cq_init_attr cq_attr;

  ibvif = (struct ibvif *)netif->state;

  /* Obtain MAC address from network interface. */
  ibvif->ethaddr->addr[0] = 0x00;
  ibvif->ethaddr->addr[1] = 0x02;
  ibvif->ethaddr->addr[2] = 0xc9;
  ibvif->ethaddr->addr[3] = 0xa4;
  ibvif->ethaddr->addr[4] = 0x59;
  ibvif->ethaddr->addr[5] = 0x41;

  ibvif->buf_size = ALIGN_TO_PAGE_SIZE(PBUF_POOL_SIZE * TCP_MAX_PACKET_SIZE);

  /* Do things needed for using Raw Packet Verbs */

  ib_dev_list = ibv_get_device_list(&num_of_device);
  if (num_of_device <= 0 || !ib_dev_list || !ib_dev_list[0]) {
    perror("IBV no device found\n");
    exit(1);
  }

  ibvif->context = ibv_open_device(ib_dev_list[1]);
  if (!ibvif->context) {
    perror("IBV can't open device\n");
    exit(1);
  }

  ibv_free_device_list(ib_dev_list);

  if (set_link_layer(ibvif->context, 1) == LINK_FAILURE) {
    perror("IBV can't allocate PD\n");
    exit(1); 
  }

  ibvif->pd = ibv_alloc_pd(ibvif->context);
  if (!ibvif->pd) {
    perror("IBV can't allocate PD\n");
    exit(1);
  }

  /*if (!ibv_buffer(ibvif)) {
    LWIP_DEBUGF(NETIF_DEBUG, ("Buffer allocation failed\n"));
    exit(1);
  }*/

  ibvif->recv_buf     = netif->prot_thread->pbuf_rx_handle.buf;
  ibvif->send_buf     = netif->prot_thread->pbuf_tx_handle.buf;
  ibvif->send_size    = TCP_MAX_PACKET_SIZE;
  ibvif->rx_depth     = PBUF_POOL_SIZE;
  ibvif->tx_depth     = PBUF_POOL_SIZE;

  ibvif->send_mr = ibv_reg_mr(ibvif->pd, ibvif->send_buf, ibvif->buf_size, flags);
  if (!ibvif->send_mr) {
    perror("IBV error reg send mr\n");
    exit(1);
  }

  ibvif->recv_mr = ibv_reg_mr(ibvif->pd, ibvif->recv_buf, ibvif->buf_size, flags);
  if (!ibvif->recv_mr) {
    perror("IBV error reg recv mr\n");
    exit(1);
  }

  ibvif->send_cq = ibv_create_cq(ibvif->context, ibvif->tx_depth, NULL, NULL, 0);
  if (!ibvif->send_cq) {
    perror("IBV can't create send cq\n");
    exit(1);
  }

  cq_attr.flags = IBV_EXP_CQ_TIMESTAMP;
  cq_attr.comp_mask = IBV_EXP_CQ_INIT_ATTR_FLAGS;
  ibvif->recv_cq = ibv_exp_create_cq(ibvif->context, ibvif->rx_depth, NULL, NULL, 0, &cq_attr);
  if (!ibvif->recv_cq) {
    perror("IBV can't create recv cq\n");
    exit(1);
  }

  memset(&attr, 0, sizeof(struct ibv_qp_init_attr));
  attr.send_cq = ibvif->send_cq;
  attr.recv_cq = ibvif->recv_cq;
  attr.cap.max_send_wr = ibvif->tx_depth;
  attr.cap.max_send_sge = 1;
  attr.cap.max_recv_wr = ibvif->rx_depth;
  attr.cap.max_recv_sge = 1;
  attr.qp_type = IBV_QPT_RAW_PACKET;

  ibvif->qp = ibv_create_qp(ibvif->pd, &attr);
  if (!ibvif->qp) {
    perror("IBV can't create QP\n");
    exit(1);
  }

  qp_flags = IBV_QP_STATE | IBV_QP_PORT;
  memset(&qp_attr, 0, sizeof(struct ibv_qp_attr));
  qp_attr.qp_state = IBV_QPS_INIT;
  qp_attr.pkey_index = 0;
  qp_attr.port_num = port_num;
  qp_attr.qp_access_flags = 0;

  if (ibv_modify_qp(ibvif->qp, &qp_attr, qp_flags)) {
    perror("IBV can't set qp to init\n");
    exit(1);
  }
  ibv_attach_device(netif);
}