/** * Cleanup and exit. */ void unvme_cleanup() { INFO_FN(); while (unvme_dev.ses) unvme_session_delete(unvme_dev.ses->prev); if (unvme_dev.nvmedev) nvme_delete(unvme_dev.nvmedev); if (unvme_dev.vfiodev) vfio_delete(unvme_dev.vfiodev); log_close(); memset(&unvme_dev, 0, sizeof(unvme_dev)); }
/** * Disable MSIX interrupt. * @param vdev device context * @return 0 if ok else -1. */ int vfio_msix_disable(vfio_device_t* vdev) { vfio_dev_t* dev = (vfio_dev_t*)vdev; if (dev->msix_nvec == 0) return 0; struct vfio_irq_set irq_set = { .argsz = sizeof(irq_set), .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER, .index = VFIO_PCI_MSIX_IRQ_INDEX, .start = 0, .count = 0, }; if (ioctl(dev->fd, VFIO_DEVICE_SET_IRQS, &irq_set)) { ERROR("Unable to disable MSI-X interrupt"); return -1; } dev->msix_nvec = 0; return 0; } /** * Create a VFIO device context. * @param pci PCI device id (as BB:DD.F format) * @return device context or NULL if failure. */ vfio_device_t* vfio_create(int pci) { // map PCI to vfio device number char pciname[64]; char path[128]; int i; sprintf(pciname, "0000:%02x:%02x.%x", pci >> 16, (pci >> 8) & 0xff, pci & 0xff); sprintf(path, "/sys/bus/pci/devices/%s/driver", pciname); if ((i = readlink(path, path, sizeof(path))) < 0) { ERROR("unknown PCI device %s", pciname); return NULL; } path[i] = 0; if (!strstr(path, "/vfio-pci")) { ERROR("device %s not bound to vfio driver", pciname); return NULL; } sprintf(path, "/sys/bus/pci/devices/%s/iommu_group", pciname); if ((i = readlink(path, path, sizeof(path))) < 0) { ERROR("No iommu_group associated with device %s", pciname); return NULL; } path[i] = 0; int vfid = atoi(strrchr(path, '/') + 1); struct vfio_group_status group_status = { .argsz = sizeof(group_status) }; struct vfio_iommu_type1_info iommu_info = { .argsz = sizeof(iommu_info) }; struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; // allocate and initialize device context vfio_dev_t* dev = zalloc(sizeof(*dev)); dev->pci = pci; dev->iova = VFIO_IOVA; if (pthread_spin_init(&dev->lock, PTHREAD_PROCESS_PRIVATE)) return NULL; // map vfio context if ((dev->contfd = open("/dev/vfio/vfio", O_RDWR)) < 0) { ERROR("open /dev/vfio/vfio"); goto error; } if (ioctl(dev->contfd, VFIO_GET_API_VERSION) != VFIO_API_VERSION) { ERROR("ioctl VFIO_GET_API_VERSION"); goto error; } if (ioctl(dev->contfd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU) == 0) { ERROR("ioctl VFIO_CHECK_EXTENSION"); goto error; } sprintf(path, "/dev/vfio/%d", vfid); if ((dev->groupfd = open(path, O_RDWR)) < 0) { ERROR("open %s failed", path); goto error; } if (ioctl(dev->groupfd, VFIO_GROUP_GET_STATUS, &group_status) < 0) { ERROR("ioctl VFIO_GROUP_GET_STATUS"); goto error; } if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) { ERROR("group not viable %#x", group_status.flags); goto error; } if (ioctl(dev->groupfd, VFIO_GROUP_SET_CONTAINER, &dev->contfd) < 0) { ERROR("ioctl VFIO_GROUP_SET_CONTAINER"); goto error; } if (ioctl(dev->contfd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU) < 0) { ERROR("ioctl VFIO_SET_IOMMU"); goto error; } if (ioctl(dev->contfd, VFIO_IOMMU_GET_INFO, &iommu_info) < 0) { ERROR("ioctl VFIO_IOMMU_GET_INFO"); goto error; } dev->fd = ioctl(dev->groupfd, VFIO_GROUP_GET_DEVICE_FD, pciname); if (dev->fd < 0) { ERROR("ioctl VFIO_GROUP_GET_DEVICE_FD"); goto error; } if (ioctl(dev->fd, VFIO_DEVICE_GET_INFO, &dev_info) < 0) { ERROR("ioctl VFIO_DEVICE_GET_INFO"); } DEBUG_FN("%x: flags=%u regions=%u irqs=%u", pci, dev_info.flags, dev_info.num_regions, dev_info.num_irqs); for (i = 0; i < dev_info.num_regions; i++) { struct vfio_region_info reg = { .argsz = sizeof(reg), .index = i }; if (ioctl(dev->fd, VFIO_DEVICE_GET_REGION_INFO, ®)) continue; DEBUG_FN("%x: region=%d flags=%#x resv=%u off=%#llx size=%#llx", pci, reg.index, reg.flags, reg.resv, reg.offset, reg.size); if (i == VFIO_PCI_CONFIG_REGION_INDEX) { __u8 config[256]; if (vfio_read(dev, config, sizeof(config), reg.offset)) goto error; HEX_DUMP(config, sizeof(config)); __u16* vendor = (__u16*)(config + PCI_VENDOR_ID); __u16* cmd = (__u16*)(config + PCI_COMMAND); if (*vendor == 0xffff) { ERROR("device in bad state"); goto error; } *cmd |= PCI_COMMAND_MASTER|PCI_COMMAND_MEMORY|PCI_COMMAND_INTX_DISABLE; if (vfio_write(dev, cmd, sizeof(*cmd), reg.offset + PCI_COMMAND)) goto error; if (vfio_read(dev, cmd, sizeof(*cmd), reg.offset + PCI_COMMAND)) goto error; // read MSIX table size __u8 cap = config[PCI_CAPABILITY_LIST]; while (cap) { if (config[cap] == PCI_CAP_ID_MSIX) { __u16* msix_flags = (__u16*)(config + cap + PCI_MSIX_FLAGS); dev->msix_size = (*msix_flags & PCI_MSIX_FLAGS_QSIZE) + 1; break; } cap = config[cap+1]; } DEBUG_FN("%x: vendor=%#x cmd=%#x msix=%d device=%#x rev=%d", pci, *vendor, *cmd, dev->msix_size, (__u16*)(config + PCI_DEVICE_ID), config[PCI_REVISION_ID]); } } for (i = 0; i < dev_info.num_irqs; i++) { struct vfio_irq_info irq = { .argsz = sizeof(irq), .index = i }; if (ioctl(dev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq)) continue; DEBUG_FN("%x: irq=%s count=%d flags=%#x", pci, vfio_irq_names[i], irq.count, irq.flags); if (i == VFIO_PCI_MSIX_IRQ_INDEX && irq.count != dev->msix_size) { ERROR("VFIO_DEVICE_GET_IRQ_INFO MSIX count %d != %d", irq.count, dev->msix_size); } } return (vfio_device_t*)dev; error: vfio_delete((vfio_device_t*)dev); return NULL; } /** * Delete a VFIO device context. * @param vdev device context */ void vfio_delete(vfio_device_t* vdev) { if (!vdev) return; vfio_dev_t* dev = (vfio_dev_t*)vdev; DEBUG_FN("%x", dev->pci); // free all memory associated with the device while (dev->memlist) vfio_mem_free(dev->memlist); if (dev->fd) { close(dev->fd); dev->fd = 0; } if (dev->contfd) { close(dev->contfd); dev->contfd = 0; } if (dev->groupfd) { close(dev->groupfd); dev->groupfd = 0; } pthread_spin_destroy(&dev->lock); free(dev); } /** * Map a premapped buffer and return a DMA buffer. * @param vdev device context * @param size allocation size * @param pmb premapped buffer * @return 0 if ok else -1. */ vfio_dma_t* vfio_dma_map(vfio_device_t* vdev, size_t size, void* pmb) { vfio_mem_t* mem = vfio_mem_alloc(vdev, 1, size, pmb); return mem ? &mem->dma : NULL; } /** * Free a DMA buffer (without unmapping dma->buf). * @param dma memory pointer * @return 0 if ok else -1. */ int vfio_dma_unmap(vfio_dma_t* dma) { return vfio_mem_free((vfio_mem_t*)dma->id); } /** * Allocate and return a DMA buffer. * @param vdev device context * @param size allocation size * @return 0 if ok else -1. */ vfio_dma_t* vfio_dma_alloc(vfio_device_t* vdev, size_t size) { vfio_mem_t* mem = vfio_mem_alloc(vdev, 1, size, 0); return mem ? &mem->dma : NULL; } /** * Free a DMA buffer. * @param dma memory pointer * @return 0 if ok else -1. */ int vfio_dma_free(vfio_dma_t* dma) { return vfio_mem_free((vfio_mem_t*)dma->id); }