static void setup_interrupt(IVShmemState *s, int vector, Error **errp) { EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; bool with_irqfd = kvm_msi_via_irqfd_enabled() && ivshmem_has_feature(s, IVSHMEM_MSI); PCIDevice *pdev = PCI_DEVICE(s); Error *err = NULL; IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector); if (!with_irqfd) { IVSHMEM_DPRINTF("with eventfd\n"); watch_vector_notifier(s, n, vector); } else if (msix_enabled(pdev)) { IVSHMEM_DPRINTF("with irqfd\n"); ivshmem_add_kvm_msi_virq(s, vector, &err); if (err) { error_propagate(errp, err); return; } if (!msix_is_masked(pdev, vector)) { kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, s->msi_vectors[vector].virq); /* TODO handle error */ } } else { /* it will be delayed until msix is enabled, in write_config */ IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled\n"); } }
static void ivshmem_setup_msi(IVShmemState * s) { int i; /* allocate the MSI-X vectors */ if (!msix_init(&s->dev, s->vectors, 1, 0)) { pci_register_bar(&s->dev, 1, msix_bar_size(&s->dev), PCI_BASE_ADDRESS_SPACE_MEMORY, msix_mmio_map); IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors); } else { IVSHMEM_DPRINTF("msix initialization failed\n"); exit(1); } /* 'activate' the vectors */ for (i = 0; i < s->vectors; i++) { msix_vector_use(&s->dev, i); } /* allocate Qemu char devices for receiving interrupts */ s->eventfd_table = qemu_mallocz(s->vectors * sizeof(EventfdEntry)); }
static void setup_interrupt(IVShmemState *s, int vector) { EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; bool with_irqfd = kvm_msi_via_irqfd_enabled() && ivshmem_has_feature(s, IVSHMEM_MSI); PCIDevice *pdev = PCI_DEVICE(s); IVSHMEM_DPRINTF("setting up interrupt for vector: %d\n", vector); if (!with_irqfd) { IVSHMEM_DPRINTF("with eventfd"); s->eventfd_chr[vector] = create_eventfd_chr_device(s, n, vector); } else if (msix_enabled(pdev)) { IVSHMEM_DPRINTF("with irqfd"); if (ivshmem_add_kvm_msi_virq(s, vector) < 0) { return; } if (!msix_is_masked(pdev, vector)) { kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, s->msi_vectors[vector].virq); } } else { /* it will be delayed until msix is enabled, in write_config */ IVSHMEM_DPRINTF("with irqfd, delayed until msix enabled"); } }
static void ivshmem_setup_msi(IVShmemState * s) { if (msix_init_exclusive_bar(&s->dev, s->vectors, 1)) { IVSHMEM_DPRINTF("msix initialization failed\n"); exit(1); } IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors); /* allocate QEMU char devices for receiving interrupts */ s->eventfd_table = g_malloc0(s->vectors * sizeof(EventfdEntry)); ivshmem_use_msix(s); }
static uint64_t ivshmem_io_read(void *opaque, hwaddr addr, unsigned size) { IVShmemState *s = opaque; uint32_t ret; switch (addr) { case INTRMASK: ret = ivshmem_IntrMask_read(s); break; case INTRSTATUS: ret = ivshmem_IntrStatus_read(s); break; case IVPOSITION: ret = s->vm_id; break; default: IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr); ret = 0; } return ret; }
static void ivshmem_IntrStatus_write(IVShmemState *s, uint32_t val) { IVSHMEM_DPRINTF("IntrStatus write(w) val = 0x%04x\n", val); s->intrstatus = val; ivshmem_update_irq(s); }
static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd, Error **errp) { Peer *peer = &s->peers[posn]; int vector; /* * The N-th connect message for this peer comes with the file * descriptor for vector N-1. Count messages to find the vector. */ if (peer->nb_eventfds >= s->vectors) { error_setg(errp, "Too many eventfd received, device has %d vectors", s->vectors); close(fd); return; } vector = peer->nb_eventfds++; IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd); event_notifier_init_fd(&peer->eventfds[vector], fd); fcntl_setfl(fd, O_NONBLOCK); /* msix/irqfd poll non block */ if (posn == s->vm_id) { setup_interrupt(s, vector, errp); /* TODO do we need to handle the error? */ } if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) { ivshmem_add_eventfd(s, posn, vector); } }
static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp) { IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd); if (msg < -1 || msg > IVSHMEM_MAX_PEERS) { error_setg(errp, "server sent invalid message %" PRId64, msg); close(fd); return; } if (msg == -1) { process_msg_shmem(s, fd, errp); return; } if (msg >= s->nb_peers) { resize_peers(s, msg + 1); } if (fd >= 0) { process_msg_connect(s, msg, fd, errp); } else { process_msg_disconnect(s, msg, errp); } }
static bool fifo_update_and_get(IVShmemState *s, const uint8_t *buf, int size, void *data, size_t len) { const uint8_t *p; uint32_t num; assert(len <= sizeof(int64_t)); /* limitation of the fifo */ if (fifo8_is_empty(&s->incoming_fifo) && size == len) { memcpy(data, buf, size); return true; } IVSHMEM_DPRINTF("short read of %d bytes\n", size); num = MIN(size, sizeof(int64_t) - fifo8_num_used(&s->incoming_fifo)); fifo8_push_all(&s->incoming_fifo, buf, num); if (fifo8_num_used(&s->incoming_fifo) < len) { assert(num == 0); return false; } size -= num; buf += num; p = fifo8_pop_buf(&s->incoming_fifo, len, &num); assert(num == len); memcpy(data, p, len); if (size > 0) { fifo8_push_all(&s->incoming_fifo, buf, size); } return true; }
/* this function increase the dynamic storage need to store data about other * peers */ static int resize_peers(IVShmemState *s, int new_min_size) { int j, old_size; /* limit number of max peers */ if (new_min_size <= 0 || new_min_size > IVSHMEM_MAX_PEERS) { return -1; } if (new_min_size <= s->nb_peers) { return 0; } old_size = s->nb_peers; s->nb_peers = new_min_size; IVSHMEM_DPRINTF("bumping storage to %d peers\n", s->nb_peers); s->peers = g_realloc(s->peers, s->nb_peers * sizeof(Peer)); for (j = old_size; j < s->nb_peers; j++) { s->peers[j].eventfds = g_new0(EventNotifier, s->vectors); s->peers[j].nb_eventfds = 0; } return 0; }
static int ivshmem_load_old(QEMUFile *f, void *opaque, int version_id) { IVShmemState *s = opaque; PCIDevice *pdev = PCI_DEVICE(s); int ret; IVSHMEM_DPRINTF("ivshmem_load_old\n"); if (version_id != 0) { return -EINVAL; } if (s->role_val == IVSHMEM_PEER) { error_report("'peer' devices are not migratable"); return -EINVAL; } ret = pci_device_load(pdev, f); if (ret) { return ret; } if (ivshmem_has_feature(s, IVSHMEM_MSI)) { msix_load(pdev, f); ivshmem_use_msix(s); } else { s->intrstatus = qemu_get_be32(f); s->intrmask = qemu_get_be32(f); } return 0; }
static uint64_t ivshmem_io_read(void *opaque, target_phys_addr_t addr, unsigned size) { IVShmemState *s = opaque; uint32_t ret; switch (addr) { case INTRMASK: ret = ivshmem_IntrMask_read(s); break; case INTRSTATUS: ret = ivshmem_IntrStatus_read(s); break; case IVPOSITION: /* return my VM ID if the memory is mapped */ if (s->shm_fd > 0) { ret = s->vm_id; } else { ret = -1; } break; default: IVSHMEM_DPRINTF("why are we reading " TARGET_FMT_plx "\n", addr); ret = 0; } return ret; }
/* this function increase the dynamic storage need to store data about other * guests */ static int increase_dynamic_storage(IVShmemState *s, int new_min_size) { int j, old_nb_alloc; /* check for integer overflow */ if (new_min_size >= INT_MAX / sizeof(Peer) - 1 || new_min_size <= 0) { return -1; } old_nb_alloc = s->nb_peers; if (new_min_size >= s->nb_peers) { /* +1 because #new_min_size is used as last array index */ s->nb_peers = new_min_size + 1; } else { return 0; } IVSHMEM_DPRINTF("bumping storage to %d guests\n", s->nb_peers); s->peers = g_realloc(s->peers, s->nb_peers * sizeof(Peer)); /* zero out new pointers */ for (j = old_nb_alloc; j < s->nb_peers; j++) { s->peers[j].eventfds = NULL; s->peers[j].nb_eventfds = 0; } return 0; }
static uint32_t ivshmem_IntrMask_read(IVShmemState *s) { uint32_t ret = s->intrmask; IVSHMEM_DPRINTF("intrmask read(w) val = 0x%04x\n", ret); return ret; }
static void ivshmem_update_irq(IVShmemState *s) { PCIDevice *d = PCI_DEVICE(s); uint32_t isr = s->intrstatus & s->intrmask; /* * Do nothing unless the device actually uses INTx. Here's how * the device variants signal interrupts, what they put in PCI * config space: * Device variant Interrupt Interrupt Pin MSI-X cap. * ivshmem-plain none 0 no * ivshmem-doorbell MSI-X 1 yes(1) * ivshmem,msi=off INTx 1 no * ivshmem,msi=on MSI-X 1(2) yes(1) * (1) if guest enabled MSI-X * (2) the device lies * Leads to the condition for doing nothing: */ if (ivshmem_has_feature(s, IVSHMEM_MSI) || !d->config[PCI_INTERRUPT_PIN]) { return; } /* don't print ISR resets */ if (isr) { IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n", isr ? 1 : 0, s->intrstatus, s->intrmask); } pci_set_irq(d, isr != 0); }
static int ivshmem_load_old(QEMUFile *f, void *opaque, int version_id) { IVShmemState *s = opaque; PCIDevice *pdev = PCI_DEVICE(s); int ret; IVSHMEM_DPRINTF("ivshmem_load_old\n"); if (version_id != 0) { return -EINVAL; } ret = ivshmem_pre_load(s); if (ret) { return ret; } ret = pci_device_load(pdev, f); if (ret) { return ret; } if (ivshmem_has_feature(s, IVSHMEM_MSI)) { msix_load(pdev, f); ivshmem_msix_vector_use(s); } else { s->intrstatus = qemu_get_be32(f); s->intrmask = qemu_get_be32(f); } return 0; }
static int ivshmem_load(QEMUFile* f, void *opaque, int version_id) { IVSHMEM_DPRINTF("ivshmem_load\n"); IVShmemState *proxy = opaque; int ret; if (version_id > 0) { return -EINVAL; } if (proxy->role_val == IVSHMEM_PEER) { fprintf(stderr, "ivshmem: 'peer' devices are not migratable\n"); return -EINVAL; } ret = pci_device_load(&proxy->dev, f); if (ret) { return ret; } if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) { msix_load(&proxy->dev, f); ivshmem_use_msix(proxy); } else { proxy->intrstatus = qemu_get_be32(f); proxy->intrmask = qemu_get_be32(f); } return 0; }
static void fake_irqfd(void *opaque, const uint8_t *buf, int size) { EventfdEntry *entry = opaque; PCIDevice *pdev = entry->pdev; IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, entry->vector); msix_notify(pdev, entry->vector); }
static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val) { IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val); s->intrmask = val; ivshmem_update_irq(s, val); }
static void ivshmem_receive(void *opaque, const uint8_t *buf, int size) { IVShmemState *s = opaque; ivshmem_IntrStatus_write(s, *buf); IVSHMEM_DPRINTF("ivshmem_receive 0x%02x\n", *buf); }
static void fake_irqfd(void *opaque, const uint8_t *buf, int size) { MSIVector *entry = opaque; PCIDevice *pdev = entry->pdev; IVShmemState *s = IVSHMEM(pdev); int vector = entry - s->msi_vectors; IVSHMEM_DPRINTF("interrupt on vector %p %d\n", pdev, vector); msix_notify(pdev, vector); }
static void ivshmem_io_writel(void *opaque, target_phys_addr_t addr, uint32_t val) { IVShmemState *s = opaque; uint64_t write_one = 1; uint16_t dest = val >> 16; uint16_t vector = val & 0xff; addr &= 0xfc; IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr); switch (addr) { case INTRMASK: ivshmem_IntrMask_write(s, val); break; case INTRSTATUS: ivshmem_IntrStatus_write(s, val); break; case DOORBELL: /* check that dest VM ID is reasonable */ if ((dest < 0) || (dest > s->max_peer)) { IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest); break; } /* check doorbell range */ if ((vector >= 0) && (vector < s->peers[dest].nb_eventfds)) { IVSHMEM_DPRINTF("Writing %" PRId64 " to VM %d on vector %d\n", write_one, dest, vector); if (write(s->peers[dest].eventfds[vector], &(write_one), 8) != 8) { IVSHMEM_DPRINTF("error writing to eventfd\n"); } } break; default: IVSHMEM_DPRINTF("Invalid VM Doorbell VM %d\n", dest); } }
static void process_msg_disconnect(IVShmemState *s, uint16_t posn, Error **errp) { IVSHMEM_DPRINTF("posn %d has gone away\n", posn); if (posn >= s->nb_peers || posn == s->vm_id) { error_setg(errp, "invalid peer %d", posn); return; } close_peer_eventfds(s, posn); }
static void ivshmem_io_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) { IVShmemState *s = opaque; uint16_t dest = val >> 16; uint16_t vector = val & 0xff; addr &= 0xfc; IVSHMEM_DPRINTF("writing to addr " TARGET_FMT_plx "\n", addr); switch (addr) { case INTRMASK: ivshmem_IntrMask_write(s, val); break; case INTRSTATUS: ivshmem_IntrStatus_write(s, val); break; case DOORBELL: /* check that dest VM ID is reasonable */ if (dest >= s->nb_peers) { IVSHMEM_DPRINTF("Invalid destination VM ID (%d)\n", dest); break; } /* check doorbell range */ if (vector < s->peers[dest].nb_eventfds) { IVSHMEM_DPRINTF("Notifying VM %d on vector %d\n", dest, vector); event_notifier_set(&s->peers[dest].eventfds[vector]); } else { IVSHMEM_DPRINTF("Invalid destination vector %d on VM %d\n", vector, dest); } break; default: IVSHMEM_DPRINTF("Unhandled write " TARGET_FMT_plx "\n", addr); } }
static void ivshmem_remove_kvm_msi_virq(IVShmemState *s, int vector) { IVSHMEM_DPRINTF("ivshmem_remove_kvm_msi_virq vector:%d\n", vector); if (s->msi_vectors[vector].pdev == NULL) { return; } /* it was cleaned when masked in the frontend. */ kvm_irqchip_release_virq(kvm_state, s->msi_vectors[vector].virq); s->msi_vectors[vector].pdev = NULL; }
/* accessing registers - based on rtl8139 */ static void ivshmem_update_irq(IVShmemState *s, int val) { int isr; isr = (s->intrstatus & s->intrmask) & 0xffffffff; /* don't print ISR resets */ if (isr) { IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n", isr ? 1 : 0, s->intrstatus, s->intrmask); } qemu_set_irq(s->dev.irq[0], (isr != 0)); }
/* Select the MSI-X vectors used by device. * ivshmem maps events to vectors statically, so * we just enable all vectors on init and after reset. */ static void ivshmem_use_msix(IVShmemState * s) { PCIDevice *d = PCI_DEVICE(s); int i; IVSHMEM_DPRINTF("%s, msix present: %d\n", __func__, msix_present(d)); if (!msix_present(d)) { return; } for (i = 0; i < s->vectors; i++) { msix_vector_use(d, i); } }
static int ivshmem_setup_msi(IVShmemState * s) { if (msix_init_exclusive_bar(PCI_DEVICE(s), s->vectors, 1)) { return -1; } IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors); /* allocate QEMU char devices for receiving interrupts */ s->msi_vectors = g_malloc0(s->vectors * sizeof(MSIVector)); ivshmem_use_msix(s); return 0; }
/* accessing registers - based on rtl8139 */ static void ivshmem_update_irq(IVShmemState *s, int val) { PCIDevice *d = PCI_DEVICE(s); int isr; isr = (s->intrstatus & s->intrmask) & 0xffffffff; /* don't print ISR resets */ if (isr) { IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n", isr ? 1 : 0, s->intrstatus, s->intrmask); } pci_set_irq(d, (isr != 0)); }
static void ivshmem_vector_mask(PCIDevice *dev, unsigned vector) { IVShmemState *s = IVSHMEM_COMMON(dev); EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; int ret; IVSHMEM_DPRINTF("vector mask %p %d\n", dev, vector); ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, s->msi_vectors[vector].virq); if (ret != 0) { error_report("remove_irqfd_notifier_gsi failed"); } }