Example #1
0
int
qemuSetupGlobalCpuCgroup(virDomainObjPtr vm)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    unsigned long long period = vm->def->cputune.global_period;
    long long quota = vm->def->cputune.global_quota;
    char *mem_mask = NULL;
    virDomainNumatuneMemMode mem_mode;

    if ((period || quota) &&
        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("cgroup cpu is required for scheduler tuning"));
        return -1;
    }

    /*
     * If CPU cgroup controller is not initialized here, then we need
     * neither period nor quota settings.  And if CPUSET controller is
     * not initialized either, then there's nothing to do anyway.
     */
    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) &&
        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
        return 0;


    if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
        mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
        virDomainNumatuneMaybeFormatNodeset(vm->def->numa,
                                            priv->autoNodeset,
                                            &mem_mask, -1) < 0)
        goto cleanup;

    if (period || quota) {
        if (qemuSetupCgroupVcpuBW(priv->cgroup, period, quota) < 0)
            goto cleanup;
    }

    VIR_FREE(mem_mask);

    return 0;

 cleanup:
    VIR_FREE(mem_mask);

    return -1;
}
static int
CVE_2013_6456_libvirt1_1_0_lxcDomainDetachDeviceHostdevMiscLive(virDomainObjPtr vm,
                                     virDomainDeviceDefPtr dev)
{
    virLXCDomainObjPrivatePtr priv = vm->privateData;
    virDomainHostdevDefPtr def = NULL;
    int i, ret = -1;
    char *dst = NULL;

    if (!priv->initpid) {
        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
                       _("Cannot attach disk until init PID is known"));
        goto cleanup;
    }

    if ((i = virDomainHostdevFind(vm->def,
                                  dev->data.hostdev,
                                  &def)) < 0) {
        virReportError(VIR_ERR_OPERATION_FAILED,
                       _("hostdev %s not found"),
                       dev->data.hostdev->source.caps.u.misc.chardev);
        goto cleanup;
    }

    if (virAsprintf(&dst, "/proc/%llu/root/%s",
                    (unsigned long long)priv->initpid,
                    def->source.caps.u.misc.chardev) < 0) {
        virReportOOMError();
        goto cleanup;
    }

    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) {
        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
                       _("devices cgroup isn't mounted"));
        goto cleanup;
    }

    VIR_DEBUG("Unlinking %s", dst);
    if (unlink(dst) < 0 && errno != ENOENT) {
        virDomainAuditHostdev(vm, def, "detach", false);
        virReportSystemError(errno,
                             _("Unable to remove device %s"), dst);
        goto cleanup;
    }
    virDomainAuditHostdev(vm, def, "detach", true);

    if (virCgroupDenyDevicePath(priv->cgroup, def->source.caps.u.misc.chardev, VIR_CGROUP_DEVICE_RWM) != 0)
        VIR_WARN("cannot deny device %s for domain %s",
                 def->source.caps.u.misc.chardev, vm->def->name);

    virDomainHostdevRemove(vm->def, i);
    virDomainHostdevDefFree(def);

    ret = 0;

cleanup:
    VIR_FREE(dst);
    return ret;
}
Example #3
0
int
qemuTeardownImageCgroup(virDomainObjPtr vm,
                        virStorageSourcePtr src)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    int perms = VIR_CGROUP_DEVICE_RWM;
    size_t i;
    int ret;

    if (!virCgroupHasController(priv->cgroup,
                                VIR_CGROUP_CONTROLLER_DEVICES))
        return 0;

    if (!src->path || !virStorageSourceIsLocalStorage(src)) {
        VIR_DEBUG("Not updating cgroups for disk path '%s', type: %s",
                  NULLSTR(src->path), virStorageTypeToString(src->type));
        return 0;
    }

    if (virFileExists(DEVICE_MAPPER_CONTROL_PATH)) {
        for (i = 0; i < vm->def->ndisks; i++) {
            virStorageSourcePtr diskSrc = vm->def->disks[i]->src;

            if (src == diskSrc)
                continue;

            if (virStoragePRDefIsManaged(diskSrc->pr))
                break;
        }

        if (i == vm->def->ndisks) {
            VIR_DEBUG("Disabling device mapper control");
            ret = virCgroupDenyDevicePath(priv->cgroup,
                                          DEVICE_MAPPER_CONTROL_PATH, perms, true);
            virDomainAuditCgroupPath(vm, priv->cgroup, "deny",
                                     DEVICE_MAPPER_CONTROL_PATH,
                                     virCgroupGetDevicePermsString(perms), ret);
            if (ret < 0)
                return ret;
        }
    }

    VIR_DEBUG("Deny path %s", src->path);

    ret = virCgroupDenyDevicePath(priv->cgroup, src->path, perms, true);

    virDomainAuditCgroupPath(vm, priv->cgroup, "deny", src->path,
                             virCgroupGetDevicePermsString(perms), ret);

    /* If you're looking for a counter part to
     * qemuSetupImagePathCgroup you're at the right place.
     * However, we can't just blindly deny all the device mapper
     * targets of src->path because they might still be used by
     * another disk in domain. Just like we are not removing
     * disks from namespace. */

    return ret;
}
Example #4
0
static int
qemuSetupImagePathCgroup(virDomainObjPtr vm,
                         const char *path,
                         bool readonly)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    int perms = VIR_CGROUP_DEVICE_READ;
    char **targetPaths = NULL;
    size_t i;
    int rv;
    int ret = -1;

    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
        return 0;

    if (!readonly)
        perms |= VIR_CGROUP_DEVICE_WRITE;

    VIR_DEBUG("Allow path %s, perms: %s",
              path, virCgroupGetDevicePermsString(perms));

    rv = virCgroupAllowDevicePath(priv->cgroup, path, perms, true);

    virDomainAuditCgroupPath(vm, priv->cgroup, "allow", path,
                             virCgroupGetDevicePermsString(perms),
                             rv);
    if (rv < 0)
        goto cleanup;

    if (rv > 0) {
        /* @path is neither character device nor block device. */
        ret = 0;
        goto cleanup;
    }

    if (virDevMapperGetTargets(path, &targetPaths) < 0 &&
        errno != ENOSYS && errno != EBADF) {
        virReportSystemError(errno,
                             _("Unable to get devmapper targets for %s"),
                             path);
        goto cleanup;
    }

    for (i = 0; targetPaths && targetPaths[i]; i++) {
        rv = virCgroupAllowDevicePath(priv->cgroup, targetPaths[i], perms, false);

        virDomainAuditCgroupPath(vm, priv->cgroup, "allow", targetPaths[i],
                                 virCgroupGetDevicePermsString(perms),
                                 rv);
        if (rv < 0)
            goto cleanup;
    }

    ret = 0;
 cleanup:
    virStringListFree(targetPaths);
    return ret;
}
Example #5
0
int
qemuTeardownHostdevCgroup(virDomainObjPtr vm,
                       virDomainHostdevDefPtr dev)
{
    int ret = -1;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virDomainHostdevSubsysPCIPtr pcisrc = &dev->source.subsys.u.pci;
    virPCIDevicePtr pci = NULL;
    char *path = NULL;

    /* currently this only does something for PCI devices using vfio
     * for device assignment, but it is called for *all* hostdev
     * devices.
     */

    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
        return 0;

    if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS) {

        switch (dev->source.subsys.type) {
        case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI:
            if (pcisrc->backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) {
                int rv;

                pci = virPCIDeviceNew(pcisrc->addr.domain,
                                      pcisrc->addr.bus,
                                      pcisrc->addr.slot,
                                      pcisrc->addr.function);
                if (!pci)
                    goto cleanup;

                if (!(path = virPCIDeviceGetIOMMUGroupDev(pci)))
                    goto cleanup;

                VIR_DEBUG("Cgroup deny %s for PCI device assignment", path);
                rv = virCgroupDenyDevicePath(priv->cgroup, path,
                                             VIR_CGROUP_DEVICE_RWM);
                virDomainAuditCgroupPath(vm, priv->cgroup,
                                         "deny", path, "rwm", rv == 0);
                if (rv < 0)
                    goto cleanup;
            }
            break;
        case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB:
            /* nothing to tear down for USB */
            break;
        default:
            break;
        }
    }

    ret = 0;
 cleanup:
    virPCIDeviceFree(pci);
    VIR_FREE(path);
    return ret;
}
Example #6
0
int
qemuSetupDiskCgroup(virDomainObjPtr vm,
                    virDomainDiskDefPtr disk)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;

    if (!virCgroupHasController(priv->cgroup,
                                VIR_CGROUP_CONTROLLER_DEVICES))
        return 0;

    return virDomainDiskDefForeachPath(disk, true, qemuSetupDiskPathAllow, vm);
}
Example #7
0
static int
qemuSetupBlkioCgroup(virDomainObjPtr vm)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    size_t i;

    if (!virCgroupHasController(priv->cgroup,
                                VIR_CGROUP_CONTROLLER_BLKIO)) {
        if (vm->def->blkio.weight || vm->def->blkio.ndevices) {
            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                           _("Block I/O tuning is not available on this host"));
            return -1;
        } else {
            return 0;
        }
    }

    if (vm->def->blkio.weight != 0 &&
        virCgroupSetBlkioWeight(priv->cgroup, vm->def->blkio.weight) < 0)
        return -1;

    if (vm->def->blkio.ndevices) {
        for (i = 0; i < vm->def->blkio.ndevices; i++) {
            virBlkioDevicePtr dev = &vm->def->blkio.devices[i];
            if (dev->weight &&
                (virCgroupSetBlkioDeviceWeight(priv->cgroup, dev->path,
                                               dev->weight) < 0))
                return -1;

            if (dev->riops &&
                (virCgroupSetBlkioDeviceReadIops(priv->cgroup, dev->path,
                                                 dev->riops) < 0))
                return -1;

            if (dev->wiops &&
                (virCgroupSetBlkioDeviceWriteIops(priv->cgroup, dev->path,
                                                  dev->wiops) < 0))
                return -1;

            if (dev->rbps &&
                (virCgroupSetBlkioDeviceReadBps(priv->cgroup, dev->path,
                                                dev->rbps) < 0))
                return -1;

            if (dev->wbps &&
                (virCgroupSetBlkioDeviceWriteBps(priv->cgroup, dev->path,
                                                 dev->wbps) < 0))
                return -1;
        }
    }

    return 0;
}
Example #8
0
static int
qemuSetImageCgroupInternal(virDomainObjPtr vm,
                           virStorageSourcePtr src,
                           bool deny,
                           bool forceReadonly)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    int perms = VIR_CGROUP_DEVICE_READ;
    int ret;

    if (!virCgroupHasController(priv->cgroup,
                                VIR_CGROUP_CONTROLLER_DEVICES))
        return 0;

    if (!src->path || !virStorageSourceIsLocalStorage(src)) {
        VIR_DEBUG("Not updating cgroups for disk path '%s', type: %s",
                  NULLSTR(src->path), virStorageTypeToString(src->type));
        return 0;
    }

    if (deny) {
        perms |= VIR_CGROUP_DEVICE_WRITE | VIR_CGROUP_DEVICE_MKNOD;

        VIR_DEBUG("Deny path %s", src->path);

        ret = virCgroupDenyDevicePath(priv->cgroup, src->path, perms);
    } else {
        if (!src->readonly && !forceReadonly)
            perms |= VIR_CGROUP_DEVICE_WRITE;

        VIR_DEBUG("Allow path %s, perms: %s",
                  src->path, virCgroupGetDevicePermsString(perms));

        ret = virCgroupAllowDevicePath(priv->cgroup, src->path, perms);
    }

    virDomainAuditCgroupPath(vm, priv->cgroup,
                             deny ? "deny" : "allow",
                             src->path,
                             virCgroupGetDevicePermsString(perms),
                             ret == 0);

    /* Get this for root squash NFS */
    if (ret < 0 &&
        virLastErrorIsSystemErrno(EACCES)) {
        VIR_DEBUG("Ignoring EACCES for %s", src->path);
        virResetLastError();
        ret = 0;
    }

    return ret;
}
Example #9
0
static int
qemuSetupCpusetCgroup(virDomainObjPtr vm)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;

    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
        return 0;

    if (virCgroupSetCpusetMemoryMigrate(priv->cgroup, true) < 0)
        return -1;

    return 0;
}
Example #10
0
static int
qemuSetupSEVCgroup(virDomainObjPtr vm)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    int ret;

    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
        return 0;

    ret = virCgroupAllowDevicePath(priv->cgroup, "/dev/sev",
                                   VIR_CGROUP_DEVICE_RW, false);
    virDomainAuditCgroupPath(vm, priv->cgroup, "allow", "/dev/sev",
                             "rw", ret);
    return ret;
}
Example #11
0
static int
qemuSetupGraphicsCgroup(virDomainObjPtr vm,
                        virDomainGraphicsDefPtr gfx)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    const char *rendernode = virDomainGraphicsGetRenderNode(gfx);
    int ret;

    if (!rendernode ||
        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
        return 0;

    ret = virCgroupAllowDevicePath(priv->cgroup, rendernode,
                                   VIR_CGROUP_DEVICE_RW, false);
    virDomainAuditCgroupPath(vm, priv->cgroup, "allow", rendernode,
                             "rw", ret);
    return ret;
}
Example #12
0
static int
qemuSetupCpuCgroup(virQEMUDriverPtr driver,
                   virDomainObjPtr vm)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virObjectEventPtr event = NULL;
    virTypedParameterPtr eventParams = NULL;
    int eventNparams = 0;
    int eventMaxparams = 0;

    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
       if (vm->def->cputune.sharesSpecified) {
           virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                          _("CPU tuning is not available on this host"));
           return -1;
       } else {
           return 0;
       }
    }

    if (vm->def->cputune.sharesSpecified) {
        unsigned long long val;
        if (virCgroupSetCpuShares(priv->cgroup, vm->def->cputune.shares) < 0)
            return -1;

        if (virCgroupGetCpuShares(priv->cgroup, &val) < 0)
            return -1;
        if (vm->def->cputune.shares != val) {
            vm->def->cputune.shares = val;
            if (virTypedParamsAddULLong(&eventParams, &eventNparams,
                                        &eventMaxparams,
                                        VIR_DOMAIN_TUNABLE_CPU_CPU_SHARES,
                                        val) < 0)
                return -1;

            event = virDomainEventTunableNewFromObj(vm, eventParams, eventNparams);
        }

        if (event)
            qemuDomainEventQueue(driver, event);
    }

    return 0;
}
Example #13
0
int
qemuTeardownMemoryDevicesCgroup(virDomainObjPtr vm,
                                virDomainMemoryDefPtr mem)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    int rv;

    if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM)
        return 0;

    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
        return 0;

    rv = virCgroupDenyDevicePath(priv->cgroup, mem->nvdimmPath,
                                 VIR_CGROUP_DEVICE_RWM, false);
    virDomainAuditCgroupPath(vm, priv->cgroup,
                             "deny", mem->nvdimmPath, "rwm", rv);
    return rv;
}
Example #14
0
int
qemuTeardownHostdevCgroup(virDomainObjPtr vm,
                       virDomainHostdevDefPtr dev)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    char **path = NULL;
    size_t i, npaths = 0;
    int rv, ret = -1;

    /* currently this only does something for PCI devices using vfio
     * for device assignment, but it is called for *all* hostdev
     * devices.
     */

    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
        return 0;

    if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
        dev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI &&
        dev->source.subsys.u.pci.backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO &&
        qemuDomainGetHostdevPath(vm->def, dev, true,
                                 &npaths, &path, NULL) < 0)
        goto cleanup;

    for (i = 0; i < npaths; i++) {
        VIR_DEBUG("Cgroup deny %s", path[i]);
        rv = virCgroupDenyDevicePath(priv->cgroup, path[i],
                                     VIR_CGROUP_DEVICE_RWM, false);
        virDomainAuditCgroupPath(vm, priv->cgroup,
                                 "deny", path[i], "rwm", rv);
        if (rv < 0)
            goto cleanup;
    }

    ret = 0;
 cleanup:
    for (i = 0; i < npaths; i++)
        VIR_FREE(path[i]);
    VIR_FREE(path);
    return ret;
}
Example #15
0
static int
qemuSetupCpuCgroup(virDomainObjPtr vm)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;

    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
       if (vm->def->cputune.shares) {
           virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                          _("CPU tuning is not available on this host"));
           return -1;
       } else {
           return 0;
       }
    }

    if (vm->def->cputune.shares &&
        virCgroupSetCpuShares(priv->cgroup, vm->def->cputune.shares) < 0)
        return -1;

    return 0;
}
Example #16
0
int
qemuSetupMemoryDevicesCgroup(virDomainObjPtr vm,
                             virDomainMemoryDefPtr mem)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    int rv;

    if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM)
        return 0;

    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
        return 0;

    VIR_DEBUG("Setting devices Cgroup for NVDIMM device: %s", mem->nvdimmPath);
    rv = virCgroupAllowDevicePath(priv->cgroup, mem->nvdimmPath,
                                  VIR_CGROUP_DEVICE_RW, false);
    virDomainAuditCgroupPath(vm, priv->cgroup, "allow",
                             mem->nvdimmPath, "rw", rv);

    return rv;
}
Example #17
0
int
qemuTeardownInputCgroup(virDomainObjPtr vm,
                        virDomainInputDefPtr dev)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    int ret = 0;

    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
        return 0;

    switch (dev->type) {
    case VIR_DOMAIN_INPUT_TYPE_PASSTHROUGH:
        VIR_DEBUG("Process path '%s' for input device", dev->source.evdev);
        ret = virCgroupDenyDevicePath(priv->cgroup, dev->source.evdev,
                                      VIR_CGROUP_DEVICE_RWM, false);
        virDomainAuditCgroupPath(vm, priv->cgroup, "deny", dev->source.evdev, "rwm", ret);
        break;
    }

    return ret;
}
Example #18
0
/**
 * qemuCgroupEmulatorAllNodesAllow:
 * @cgroup: domain cgroup pointer
 * @retData: filled with structure used to roll back the operation
 *
 * Allows all NUMA nodes for the qemu emulator thread temporarily. This is
 * necessary when hotplugging cpus since it requires memory allocated in the
 * DMA region. Afterwards the operation can be reverted by
 * qemuCgroupEmulatorAllNodesRestore.
 *
 * Returns 0 on success -1 on error
 */
int
qemuCgroupEmulatorAllNodesAllow(virCgroupPtr cgroup,
                                qemuCgroupEmulatorAllNodesDataPtr *retData)
{
    qemuCgroupEmulatorAllNodesDataPtr data = NULL;
    char *all_nodes_str = NULL;
    virBitmapPtr all_nodes = NULL;
    int ret = -1;

    if (!virNumaIsAvailable() ||
        !virCgroupHasController(cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
        return 0;

    if (!(all_nodes = virNumaGetHostMemoryNodeset()))
        goto cleanup;

    if (!(all_nodes_str = virBitmapFormat(all_nodes)))
        goto cleanup;

    if (VIR_ALLOC(data) < 0)
        goto cleanup;

    if (virCgroupNewThread(cgroup, VIR_CGROUP_THREAD_EMULATOR, 0,
                           false, &data->emulatorCgroup) < 0)
        goto cleanup;

    if (virCgroupGetCpusetMems(data->emulatorCgroup, &data->emulatorMemMask) < 0 ||
        virCgroupSetCpusetMems(data->emulatorCgroup, all_nodes_str) < 0)
        goto cleanup;

    VIR_STEAL_PTR(*retData, data);
    ret = 0;

 cleanup:
    VIR_FREE(all_nodes_str);
    virBitmapFree(all_nodes);
    qemuCgroupEmulatorAllNodesDataFree(data);

    return ret;
}
Example #19
0
static int
qemuTeardownChrSourceCgroup(virDomainObjPtr vm,
                            virDomainChrSourceDefPtr source)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    int ret;

    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
        return 0;

    if (source->type != VIR_DOMAIN_CHR_TYPE_DEV)
        return 0;

    VIR_DEBUG("Process path '%s' for device", source->data.file.path);

    ret = virCgroupDenyDevicePath(priv->cgroup, source->data.file.path,
                                  VIR_CGROUP_DEVICE_RW, false);
    virDomainAuditCgroupPath(vm, priv->cgroup, "deny",
                             source->data.file.path, "rw", ret);

    return ret;
}
Example #20
0
int
qemuSetupCgroupForEmulator(virQEMUDriverPtr driver,
                           virDomainObjPtr vm,
                           virBitmapPtr nodemask)
{
    virBitmapPtr cpumask = NULL;
    virBitmapPtr cpumap = NULL;
    virCgroupPtr cgroup_emulator = NULL;
    virDomainDefPtr def = vm->def;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    unsigned long long period = vm->def->cputune.emulator_period;
    long long quota = vm->def->cputune.emulator_quota;

    if ((period || quota) &&
        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("cgroup cpu is required for scheduler tuning"));
        return -1;
    }

    if (priv->cgroup == NULL)
        return 0; /* Not supported, so claim success */

    if (virCgroupNewEmulator(priv->cgroup, true, &cgroup_emulator) < 0)
        goto cleanup;

    if (virCgroupMoveTask(priv->cgroup, cgroup_emulator) < 0)
        goto cleanup;

    if (def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) {
        if (!(cpumap = qemuPrepareCpumap(driver, nodemask)))
            goto cleanup;
        cpumask = cpumap;
    } else if (def->cputune.emulatorpin) {
        cpumask = def->cputune.emulatorpin->cpumask;
    } else if (def->cpumask) {
        cpumask = def->cpumask;
    }

    if (cpumask) {
        if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET) &&
            qemuSetupCgroupEmulatorPin(cgroup_emulator, cpumask) < 0)
            goto cleanup;
    }

    if (period || quota) {
        if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) &&
            qemuSetupCgroupVcpuBW(cgroup_emulator, period,
                                  quota) < 0)
            goto cleanup;
    }

    virCgroupFree(&cgroup_emulator);
    virBitmapFree(cpumap);
    return 0;

 cleanup:
    virBitmapFree(cpumap);

    if (cgroup_emulator) {
        virCgroupRemove(cgroup_emulator);
        virCgroupFree(&cgroup_emulator);
    }

    return -1;
}
Example #21
0
/**
 * virLXCProcessStart:
 * @conn: pointer to connection
 * @driver: pointer to driver structure
 * @vm: pointer to virtual machine structure
 * @autoDestroy: mark the domain for auto destruction
 * @reason: reason for switching vm to running state
 *
 * Starts a vm
 *
 * Returns 0 on success or -1 in case of error
 */
int virLXCProcessStart(virConnectPtr conn,
                       virLXCDriverPtr  driver,
                       virDomainObjPtr vm,
                       unsigned int nfiles, int *files,
                       bool autoDestroy,
                       virDomainRunningReason reason)
{
    int rc = -1, r;
    size_t nttyFDs = 0;
    int *ttyFDs = NULL;
    size_t i;
    char *logfile = NULL;
    int logfd = -1;
    size_t nveths = 0;
    char **veths = NULL;
    int handshakefds[2] = { -1, -1 };
    off_t pos = -1;
    char ebuf[1024];
    char *timestamp;
    virCommandPtr cmd = NULL;
    virLXCDomainObjPrivatePtr priv = vm->privateData;
    virCapsPtr caps = NULL;
    virErrorPtr err = NULL;
    virLXCDriverConfigPtr cfg = virLXCDriverGetConfig(driver);
    virCgroupPtr selfcgroup;
    int status;

    if (virCgroupNewSelf(&selfcgroup) < 0)
        return -1;

    if (!virCgroupHasController(selfcgroup,
                                VIR_CGROUP_CONTROLLER_CPUACCT)) {
        virCgroupFree(&selfcgroup);
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("Unable to find 'cpuacct' cgroups controller mount"));
        return -1;
    }
    if (!virCgroupHasController(selfcgroup,
                                VIR_CGROUP_CONTROLLER_DEVICES)) {
        virCgroupFree(&selfcgroup);
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("Unable to find 'devices' cgroups controller mount"));
        return -1;
    }
    if (!virCgroupHasController(selfcgroup,
                                VIR_CGROUP_CONTROLLER_MEMORY)) {
        virCgroupFree(&selfcgroup);
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("Unable to find 'memory' cgroups controller mount"));
        return -1;
    }
    virCgroupFree(&selfcgroup);

    if (virFileMakePath(cfg->logDir) < 0) {
        virReportSystemError(errno,
                             _("Cannot create log directory '%s'"),
                             cfg->logDir);
        return -1;
    }

    if (!vm->def->resource) {
        virDomainResourceDefPtr res;

        if (VIR_ALLOC(res) < 0)
            goto cleanup;

        if (VIR_STRDUP(res->partition, "/machine") < 0) {
            VIR_FREE(res);
            goto cleanup;
        }

        vm->def->resource = res;
    }

    if (virAsprintf(&logfile, "%s/%s.log",
                    cfg->logDir, vm->def->name) < 0)
        return -1;

    if (!(caps = virLXCDriverGetCapabilities(driver, false)))
        goto cleanup;

    /* Do this up front, so any part of the startup process can add
     * runtime state to vm->def that won't be persisted. This let's us
     * report implicit runtime defaults in the XML, like vnc listen/socket
     */
    VIR_DEBUG("Setting current domain def as transient");
    if (virDomainObjSetDefTransient(caps, driver->xmlopt, vm, true) < 0)
        goto cleanup;

    /* Run an early hook to set-up missing devices */
    if (virHookPresent(VIR_HOOK_DRIVER_LXC)) {
        char *xml = virDomainDefFormat(vm->def, 0);
        int hookret;

        hookret = virHookCall(VIR_HOOK_DRIVER_LXC, vm->def->name,
                              VIR_HOOK_LXC_OP_PREPARE, VIR_HOOK_SUBOP_BEGIN,
                              NULL, xml, NULL);
        VIR_FREE(xml);

        /*
         * If the script raised an error abort the launch
         */
        if (hookret < 0)
            goto cleanup;
    }

    if (virLXCProcessEnsureRootFS(vm) < 0)
        goto cleanup;

    /* Must be run before security labelling */
    VIR_DEBUG("Preparing host devices");
    if (virLXCPrepareHostDevices(driver, vm->def) < 0)
        goto cleanup;

    /* Here we open all the PTYs we need on the host OS side.
     * The LXC controller will open the guest OS side PTYs
     * and forward I/O between them.
     */
    nttyFDs = vm->def->nconsoles;
    if (VIR_ALLOC_N(ttyFDs, nttyFDs) < 0)
        goto cleanup;
    for (i = 0; i < vm->def->nconsoles; i++)
        ttyFDs[i] = -1;

    /* If you are using a SecurityDriver with dynamic labelling,
       then generate a security label for isolation */
    VIR_DEBUG("Generating domain security label (if required)");
    if (vm->def->nseclabels &&
        vm->def->seclabels[0]->type == VIR_DOMAIN_SECLABEL_DEFAULT)
        vm->def->seclabels[0]->type = VIR_DOMAIN_SECLABEL_NONE;

    if (virSecurityManagerGenLabel(driver->securityManager, vm->def) < 0) {
        virDomainAuditSecurityLabel(vm, false);
        goto cleanup;
    }
    virDomainAuditSecurityLabel(vm, true);

    VIR_DEBUG("Setting domain security labels");
    if (virSecurityManagerSetAllLabel(driver->securityManager,
                                      vm->def, NULL) < 0)
        goto cleanup;

    for (i = 0; i < vm->def->nconsoles; i++) {
        char *ttyPath;
        if (vm->def->consoles[i]->source.type != VIR_DOMAIN_CHR_TYPE_PTY) {
            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                           _("Only PTY console types are supported"));
            goto cleanup;
        }

        if (virFileOpenTty(&ttyFDs[i], &ttyPath, 1) < 0) {
            virReportSystemError(errno, "%s",
                                 _("Failed to allocate tty"));
            goto cleanup;
        }

        VIR_FREE(vm->def->consoles[i]->source.data.file.path);
        vm->def->consoles[i]->source.data.file.path = ttyPath;

        VIR_FREE(vm->def->consoles[i]->info.alias);
        if (virAsprintf(&vm->def->consoles[i]->info.alias, "console%zu", i) < 0)
            goto cleanup;
    }

    if (virLXCProcessSetupInterfaces(conn, vm->def, &nveths, &veths) < 0)
        goto cleanup;

    /* Save the configuration for the controller */
    if (virDomainSaveConfig(cfg->stateDir, vm->def) < 0)
        goto cleanup;

    if ((logfd = open(logfile, O_WRONLY | O_APPEND | O_CREAT,
             S_IRUSR|S_IWUSR)) < 0) {
        virReportSystemError(errno,
                             _("Failed to open '%s'"),
                             logfile);
        goto cleanup;
    }

    if (pipe(handshakefds) < 0) {
        virReportSystemError(errno, "%s",
                             _("Unable to create pipe"));
        goto cleanup;
    }

    if (!(cmd = virLXCProcessBuildControllerCmd(driver,
                                                vm,
                                                nveths, veths,
                                                ttyFDs, nttyFDs,
                                                files, nfiles,
                                                handshakefds[1])))
        goto cleanup;
    virCommandSetOutputFD(cmd, &logfd);
    virCommandSetErrorFD(cmd, &logfd);

    /* now that we know it is about to start call the hook if present */
    if (virHookPresent(VIR_HOOK_DRIVER_LXC)) {
        char *xml = virDomainDefFormat(vm->def, 0);
        int hookret;

        hookret = virHookCall(VIR_HOOK_DRIVER_LXC, vm->def->name,
                              VIR_HOOK_LXC_OP_START, VIR_HOOK_SUBOP_BEGIN,
                              NULL, xml, NULL);
        VIR_FREE(xml);

        /*
         * If the script raised an error abort the launch
         */
        if (hookret < 0)
            goto cleanup;
    }

    /* Log timestamp */
    if ((timestamp = virTimeStringNow()) == NULL)
        goto cleanup;
    if (safewrite(logfd, timestamp, strlen(timestamp)) < 0 ||
        safewrite(logfd, START_POSTFIX, strlen(START_POSTFIX)) < 0) {
        VIR_WARN("Unable to write timestamp to logfile: %s",
                 virStrerror(errno, ebuf, sizeof(ebuf)));
    }
    VIR_FREE(timestamp);

    /* Log generated command line */
    virCommandWriteArgLog(cmd, logfd);
    if ((pos = lseek(logfd, 0, SEEK_END)) < 0)
        VIR_WARN("Unable to seek to end of logfile: %s",
                 virStrerror(errno, ebuf, sizeof(ebuf)));

    virCommandRawStatus(cmd);
    if (virCommandRun(cmd, &status) < 0)
        goto cleanup;

    if (status != 0) {
        if (virLXCProcessReadLogOutput(vm, logfile, pos, ebuf,
                                       sizeof(ebuf)) <= 0) {
            if (WIFEXITED(status))
                snprintf(ebuf, sizeof(ebuf), _("unexpected exit status %d"),
                         WEXITSTATUS(status));
            else
                snprintf(ebuf, sizeof(ebuf), "%s", _("terminated abnormally"));
        }
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("guest failed to start: %s"), ebuf);
        goto cleanup;
    }


    if (VIR_CLOSE(handshakefds[1]) < 0) {
        virReportSystemError(errno, "%s", _("could not close handshake fd"));
        goto cleanup;
    }

    /* Connect to the controller as a client *first* because
     * this will block until the child has written their
     * pid file out to disk & created their cgroup */
    if (!(priv->monitor = virLXCProcessConnectMonitor(driver, vm))) {
        /* Intentionally overwrite the real monitor error message,
         * since a better one is almost always found in the logs
         */
        if (virLXCProcessReadLogOutput(vm, logfile, pos, ebuf, sizeof(ebuf)) > 0) {
            virResetLastError();
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("guest failed to start: %s"), ebuf);
        }
        goto cleanup;
    }

    /* And get its pid */
    if ((r = virPidFileRead(cfg->stateDir, vm->def->name, &vm->pid)) < 0) {
        if (virLXCProcessReadLogOutput(vm, logfile, pos, ebuf, sizeof(ebuf)) > 0)
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("guest failed to start: %s"), ebuf);
        else
            virReportSystemError(-r,
                                 _("Failed to read pid file %s/%s.pid"),
                                 cfg->stateDir, vm->def->name);
        goto cleanup;
    }

    if (virCgroupNewDetectMachine(vm->def->name, "lxc", vm->pid,
                                  vm->def->resource ?
                                  vm->def->resource->partition :
                                  NULL,
                                  -1, &priv->cgroup) < 0)
        goto error;

    if (!priv->cgroup) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("No valid cgroup for machine %s"),
                       vm->def->name);
        goto error;
    }

    priv->stopReason = VIR_DOMAIN_EVENT_STOPPED_FAILED;
    priv->wantReboot = false;
    vm->def->id = vm->pid;
    virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason);
    priv->doneStopEvent = false;

    if (virAtomicIntInc(&driver->nactive) == 1 && driver->inhibitCallback)
        driver->inhibitCallback(true, driver->inhibitOpaque);

    if (lxcContainerWaitForContinue(handshakefds[0]) < 0) {
        char out[1024];

        if (!(virLXCProcessReadLogOutput(vm, logfile, pos, out, 1024) < 0)) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("guest failed to start: %s"), out);
        }

        goto error;
    }

    if (autoDestroy &&
        virCloseCallbacksSet(driver->closeCallbacks, vm,
                             conn, lxcProcessAutoDestroy) < 0)
        goto error;

    if (virDomainObjSetDefTransient(caps, driver->xmlopt,
                                    vm, false) < 0)
        goto error;

    /* Write domain status to disk.
     *
     * XXX: Earlier we wrote the plain "live" domain XML to this
     * location for the benefit of libvirt_lxc. We're now overwriting
     * it with the live status XML instead. This is a (currently
     * harmless) inconsistency we should fix one day */
    if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm) < 0)
        goto error;

    /* finally we can call the 'started' hook script if any */
    if (virHookPresent(VIR_HOOK_DRIVER_LXC)) {
        char *xml = virDomainDefFormat(vm->def, 0);
        int hookret;

        hookret = virHookCall(VIR_HOOK_DRIVER_LXC, vm->def->name,
                              VIR_HOOK_LXC_OP_STARTED, VIR_HOOK_SUBOP_BEGIN,
                              NULL, xml, NULL);
        VIR_FREE(xml);

        /*
         * If the script raised an error abort the launch
         */
        if (hookret < 0)
            goto error;
    }

    rc = 0;

 cleanup:
    if (rc != 0 && !err)
        err = virSaveLastError();
    virCommandFree(cmd);
    if (VIR_CLOSE(logfd) < 0) {
        virReportSystemError(errno, "%s", _("could not close logfile"));
        rc = -1;
    }
    for (i = 0; i < nveths; i++) {
        if (rc != 0 && veths[i])
            ignore_value(virNetDevVethDelete(veths[i]));
        VIR_FREE(veths[i]);
    }
    if (rc != 0) {
        if (vm->newDef) {
            virDomainDefFree(vm->newDef);
            vm->newDef = NULL;
        }
        if (priv->monitor) {
            virObjectUnref(priv->monitor);
            priv->monitor = NULL;
        }
        virDomainConfVMNWFilterTeardown(vm);

        virSecurityManagerRestoreAllLabel(driver->securityManager,
                                          vm->def, false);
        virSecurityManagerReleaseLabel(driver->securityManager, vm->def);
        /* Clear out dynamically assigned labels */
        if (vm->def->nseclabels &&
            vm->def->seclabels[0]->type == VIR_DOMAIN_SECLABEL_DYNAMIC) {
            VIR_FREE(vm->def->seclabels[0]->model);
            VIR_FREE(vm->def->seclabels[0]->label);
            VIR_FREE(vm->def->seclabels[0]->imagelabel);
        }
    }
    for (i = 0; i < nttyFDs; i++)
        VIR_FORCE_CLOSE(ttyFDs[i]);
    VIR_FREE(ttyFDs);
    VIR_FORCE_CLOSE(handshakefds[0]);
    VIR_FORCE_CLOSE(handshakefds[1]);
    VIR_FREE(logfile);
    virObjectUnref(cfg);
    virObjectUnref(caps);

    if (err) {
        virSetError(err);
        virFreeError(err);
    }

    return rc;

 error:
    err = virSaveLastError();
    virLXCProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED);
    goto cleanup;
}
Example #22
0
int
qemuSetupCgroupForVcpu(virDomainObjPtr vm)
{
    virCgroupPtr cgroup_vcpu = NULL;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virDomainDefPtr def = vm->def;
    size_t i, j;
    unsigned long long period = vm->def->cputune.period;
    long long quota = vm->def->cputune.quota;
    char *mem_mask = NULL;
    virDomainNumatuneMemMode mem_mode;

    if ((period || quota) &&
        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("cgroup cpu is required for scheduler tuning"));
        return -1;
    }

    /*
     * If CPU cgroup controller is not initialized here, then we need
     * neither period nor quota settings.  And if CPUSET controller is
     * not initialized either, then there's nothing to do anyway.
     */
    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) &&
        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
        return 0;

    /* We are trying to setup cgroups for CPU pinning, which can also be done
     * with virProcessSetAffinity, thus the lack of cgroups is not fatal here.
     */
    if (priv->cgroup == NULL)
        return 0;

    if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) {
        /* If we don't know VCPU<->PID mapping or all vcpu runs in the same
         * thread, we cannot control each vcpu.
         */
        return 0;
    }

    if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
        mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
        virDomainNumatuneMaybeFormatNodeset(vm->def->numa,
                                            priv->autoNodeset,
                                            &mem_mask, -1) < 0)
        goto cleanup;

    for (i = 0; i < priv->nvcpupids; i++) {
        virCgroupFree(&cgroup_vcpu);
        if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_VCPU, i,
                               true, &cgroup_vcpu) < 0)
            goto cleanup;

        /* move the thread for vcpu to sub dir */
        if (virCgroupAddTask(cgroup_vcpu, priv->vcpupids[i]) < 0)
            goto cleanup;

        if (period || quota) {
            if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0)
                goto cleanup;
        }

        /* Set vcpupin in cgroup if vcpupin xml is provided */
        if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) {
            virBitmapPtr cpumap = NULL;

            if (mem_mask &&
                virCgroupSetCpusetMems(cgroup_vcpu, mem_mask) < 0)
                goto cleanup;

            /* try to use the default cpu maps */
            if (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO)
                cpumap = priv->autoCpuset;
            else
                cpumap = vm->def->cpumask;

            /* lookup a more specific pinning info */
            for (j = 0; j < def->cputune.nvcpupin; j++) {
                if (def->cputune.vcpupin[j]->id == i) {
                    cpumap = def->cputune.vcpupin[j]->cpumask;
                    break;
                }
            }

            if (!cpumap)
                continue;

            if (qemuSetupCgroupCpusetCpus(cgroup_vcpu, cpumap) < 0)
                goto cleanup;
        }
    }
    virCgroupFree(&cgroup_vcpu);
    VIR_FREE(mem_mask);

    return 0;

 cleanup:
    if (cgroup_vcpu) {
        virCgroupRemove(cgroup_vcpu);
        virCgroupFree(&cgroup_vcpu);
    }
    VIR_FREE(mem_mask);

    return -1;
}
Example #23
0
int
qemuSetupHostdevCGroup(virDomainObjPtr vm,
                       virDomainHostdevDefPtr dev)
{
    int ret = -1;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virPCIDevicePtr pci = NULL;
    virUSBDevicePtr usb = NULL;
    virSCSIDevicePtr scsi = NULL;
    char *path = NULL;

    /* currently this only does something for PCI devices using vfio
     * for device assignment, but it is called for *all* hostdev
     * devices.
     */

    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
        return 0;

    if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS) {

        switch (dev->source.subsys.type) {
        case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI:
            if (dev->source.subsys.u.pci.backend
                == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) {
                int rv;

                pci = virPCIDeviceNew(dev->source.subsys.u.pci.addr.domain,
                                      dev->source.subsys.u.pci.addr.bus,
                                      dev->source.subsys.u.pci.addr.slot,
                                      dev->source.subsys.u.pci.addr.function);
                if (!pci)
                    goto cleanup;

                if (!(path = virPCIDeviceGetIOMMUGroupDev(pci)))
                    goto cleanup;

                VIR_DEBUG("Cgroup allow %s for PCI device assignment", path);
                rv = virCgroupAllowDevicePath(priv->cgroup, path,
                                              VIR_CGROUP_DEVICE_RW);
                virDomainAuditCgroupPath(vm, priv->cgroup,
                                         "allow", path, "rw", rv == 0);
                if (rv < 0)
                    goto cleanup;
            }
            break;

        case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB:
            /* NB: hostdev->missing wasn't previously checked in the
             * case of hotplug, only when starting a domain. Now it is
             * always checked, and the cgroup setup skipped if true.
             */
            if (dev->missing)
                break;
            if ((usb = virUSBDeviceNew(dev->source.subsys.u.usb.bus,
                                       dev->source.subsys.u.usb.device,
                                       NULL)) == NULL) {
                goto cleanup;
            }

            /* oddly, qemuSetupHostUSBDeviceCgroup doesn't ever
             * reference the usb object we just created
             */
            if (virUSBDeviceFileIterate(usb, qemuSetupHostUSBDeviceCgroup,
                                        vm) < 0) {
                goto cleanup;
            }
            break;

        case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_SCSI:
            if ((scsi = virSCSIDeviceNew(NULL,
                                         dev->source.subsys.u.scsi.adapter,
                                         dev->source.subsys.u.scsi.bus,
                                         dev->source.subsys.u.scsi.target,
                                         dev->source.subsys.u.scsi.unit,
                                         dev->readonly,
                                         dev->shareable)) == NULL)
                goto cleanup;

            if (virSCSIDeviceFileIterate(scsi,
                                         qemuSetupHostSCSIDeviceCgroup,
                                         vm) < 0)
                goto cleanup;

        default:
            break;
        }
    }

    ret = 0;
 cleanup:
    virPCIDeviceFree(pci);
    virUSBDeviceFree(usb);
    virSCSIDeviceFree(scsi);
    VIR_FREE(path);
    return ret;
}
Example #24
0
static int
qemuSetupCpusetCgroup(virDomainObjPtr vm,
                      virBitmapPtr nodemask,
                      virCapsPtr caps)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    char *mem_mask = NULL;
    char *cpu_mask = NULL;
    int ret = -1;

    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
        return 0;

    if ((vm->def->numatune.memory.nodemask ||
         (vm->def->numatune.memory.placement_mode ==
          VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO)) &&
        vm->def->numatune.memory.mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) {

        if (vm->def->numatune.memory.placement_mode ==
            VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO)
            mem_mask = virBitmapFormat(nodemask);
        else
            mem_mask = virBitmapFormat(vm->def->numatune.memory.nodemask);

        if (!mem_mask) {
            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                           _("failed to convert memory nodemask"));
            goto cleanup;
        }

        if (virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0)
            goto cleanup;
    }

    if (vm->def->cpumask ||
        (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO)) {

        if (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) {
            virBitmapPtr cpumap;
            if (!(cpumap = virCapabilitiesGetCpusForNodemask(caps, nodemask)))
                goto cleanup;
            cpu_mask = virBitmapFormat(cpumap);
            virBitmapFree(cpumap);
        } else {
            cpu_mask = virBitmapFormat(vm->def->cpumask);
        }

        if (!cpu_mask) {
            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                           _("failed to convert cpu mask"));
            goto cleanup;
        }

        if (virCgroupSetCpusetCpus(priv->cgroup, cpu_mask) < 0)
            goto cleanup;
    }

    ret = 0;
 cleanup:
    VIR_FREE(mem_mask);
    VIR_FREE(cpu_mask);
    return ret;
}
Example #25
0
int
qemuSetupCgroupForVcpu(virDomainObjPtr vm)
{
    virCgroupPtr cgroup_vcpu = NULL;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virDomainDefPtr def = vm->def;
    size_t i, j;
    unsigned long long period = vm->def->cputune.period;
    long long quota = vm->def->cputune.quota;

    if ((period || quota) &&
        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("cgroup cpu is required for scheduler tuning"));
        return -1;
    }

    /* We are trying to setup cgroups for CPU pinning, which can also be done
     * with virProcessInfoSetAffinity, thus the lack of cgroups is not fatal
     * here.
     */
    if (priv->cgroup == NULL)
        return 0;

    if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) {
        /* If we don't know VCPU<->PID mapping or all vcpu runs in the same
         * thread, we cannot control each vcpu.
         */
        VIR_WARN("Unable to get vcpus' pids.");
        return 0;
    }

    for (i = 0; i < priv->nvcpupids; i++) {
        if (virCgroupNewVcpu(priv->cgroup, i, true, &cgroup_vcpu) < 0)
            goto cleanup;

        /* move the thread for vcpu to sub dir */
        if (virCgroupAddTask(cgroup_vcpu, priv->vcpupids[i]) < 0)
            goto cleanup;

        if (period || quota) {
            if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0)
                goto cleanup;
        }

        /* Set vcpupin in cgroup if vcpupin xml is provided */
        if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) {
            /* find the right CPU to pin, otherwise
             * qemuSetupCgroupVcpuPin will fail. */
            for (j = 0; j < def->cputune.nvcpupin; j++) {
                if (def->cputune.vcpupin[j]->vcpuid != i)
                    continue;

                if (qemuSetupCgroupVcpuPin(cgroup_vcpu,
                                           def->cputune.vcpupin,
                                           def->cputune.nvcpupin,
                                           i) < 0)
                    goto cleanup;

                break;
            }
        }

        virCgroupFree(&cgroup_vcpu);
    }

    return 0;

 cleanup:
    if (cgroup_vcpu) {
        virCgroupRemove(cgroup_vcpu);
        virCgroupFree(&cgroup_vcpu);
    }

    return -1;
}
Example #26
0
static int
qemuSetupDevicesCgroup(virQEMUDriverPtr driver,
                       virDomainObjPtr vm)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virQEMUDriverConfigPtr cfg = NULL;
    const char *const *deviceACL = NULL;
    int rv = -1;
    int ret = -1;
    size_t i;

    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
        return 0;

    rv = virCgroupDenyAllDevices(priv->cgroup);
    virDomainAuditCgroup(vm, priv->cgroup, "deny", "all", rv == 0);
    if (rv < 0) {
        if (virLastErrorIsSystemErrno(EPERM)) {
            virResetLastError();
            VIR_WARN("Group devices ACL is not accessible, disabling whitelisting");
            return 0;
        }

        goto cleanup;
    }

    for (i = 0; i < vm->def->ndisks; i++) {
        if (qemuSetupDiskCgroup(vm, vm->def->disks[i]) < 0)
            goto cleanup;
    }

    rv = virCgroupAllowDeviceMajor(priv->cgroup, 'c', DEVICE_PTY_MAJOR,
                                   VIR_CGROUP_DEVICE_RW);
    virDomainAuditCgroupMajor(vm, priv->cgroup, "allow", DEVICE_PTY_MAJOR,
                              "pty", "rw", rv == 0);
    if (rv < 0)
        goto cleanup;

    cfg = virQEMUDriverGetConfig(driver);
    deviceACL = cfg->cgroupDeviceACL ?
                (const char *const *)cfg->cgroupDeviceACL :
                defaultDeviceACL;

    if (vm->def->nsounds &&
        ((!vm->def->ngraphics && cfg->nogfxAllowHostAudio) ||
         (vm->def->graphics &&
          ((vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC &&
           cfg->vncAllowHostAudio) ||
           (vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_SDL))))) {
        rv = virCgroupAllowDeviceMajor(priv->cgroup, 'c', DEVICE_SND_MAJOR,
                                       VIR_CGROUP_DEVICE_RW);
        virDomainAuditCgroupMajor(vm, priv->cgroup, "allow", DEVICE_SND_MAJOR,
                                  "sound", "rw", rv == 0);
        if (rv < 0)
            goto cleanup;
    }

    for (i = 0; deviceACL[i] != NULL; i++) {
        if (!virFileExists(deviceACL[i])) {
            VIR_DEBUG("Ignoring non-existent device %s", deviceACL[i]);
            continue;
        }

        rv = virCgroupAllowDevicePath(priv->cgroup, deviceACL[i],
                                      VIR_CGROUP_DEVICE_RW);
        virDomainAuditCgroupPath(vm, priv->cgroup, "allow", deviceACL[i], "rw", rv == 0);
        if (rv < 0 &&
            !virLastErrorIsSystemErrno(ENOENT))
            goto cleanup;
    }

    if (virDomainChrDefForeach(vm->def,
                               true,
                               qemuSetupChardevCgroup,
                               vm) < 0)
        goto cleanup;

    if (vm->def->tpm &&
        (qemuSetupTPMCgroup(vm->def,
                            vm->def->tpm,
                            vm) < 0))
        goto cleanup;

    for (i = 0; i < vm->def->nhostdevs; i++) {
        if (qemuSetupHostdevCGroup(vm, vm->def->hostdevs[i]) < 0)
            goto cleanup;
    }

    for (i = 0; i < vm->def->nrngs; i++) {
        if (vm->def->rngs[i]->backend == VIR_DOMAIN_RNG_BACKEND_RANDOM) {
            VIR_DEBUG("Setting Cgroup ACL for RNG device");
            rv = virCgroupAllowDevicePath(priv->cgroup,
                                          vm->def->rngs[i]->source.file,
                                          VIR_CGROUP_DEVICE_RW);
            virDomainAuditCgroupPath(vm, priv->cgroup, "allow",
                                     vm->def->rngs[i]->source.file,
                                     "rw", rv == 0);
            if (rv < 0 &&
                !virLastErrorIsSystemErrno(ENOENT))
                goto cleanup;
        }
    }

    ret = 0;
 cleanup:
    virObjectUnref(cfg);
    return ret;
}
Example #27
0
int
qemuSetupHostdevCGroup(virDomainObjPtr vm,
                       virDomainHostdevDefPtr dev)
{
    int ret = -1;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virDomainHostdevSubsysUSBPtr usbsrc = &dev->source.subsys.u.usb;
    virDomainHostdevSubsysPCIPtr pcisrc = &dev->source.subsys.u.pci;
    virDomainHostdevSubsysSCSIPtr scsisrc = &dev->source.subsys.u.scsi;
    virPCIDevicePtr pci = NULL;
    virUSBDevicePtr usb = NULL;
    virSCSIDevicePtr scsi = NULL;
    char *path = NULL;

    /* currently this only does something for PCI devices using vfio
     * for device assignment, but it is called for *all* hostdev
     * devices.
     */

    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
        return 0;

    if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS) {

        switch (dev->source.subsys.type) {
        case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI:
            if (pcisrc->backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) {
                int rv;

                pci = virPCIDeviceNew(pcisrc->addr.domain,
                                      pcisrc->addr.bus,
                                      pcisrc->addr.slot,
                                      pcisrc->addr.function);
                if (!pci)
                    goto cleanup;

                if (!(path = virPCIDeviceGetIOMMUGroupDev(pci)))
                    goto cleanup;

                VIR_DEBUG("Cgroup allow %s for PCI device assignment", path);
                rv = virCgroupAllowDevicePath(priv->cgroup, path,
                                              VIR_CGROUP_DEVICE_RW);
                virDomainAuditCgroupPath(vm, priv->cgroup,
                                         "allow", path, "rw", rv == 0);
                if (rv < 0)
                    goto cleanup;
            }
            break;

        case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB:
            /* NB: hostdev->missing wasn't previously checked in the
             * case of hotplug, only when starting a domain. Now it is
             * always checked, and the cgroup setup skipped if true.
             */
            if (dev->missing)
                break;
            if ((usb = virUSBDeviceNew(usbsrc->bus, usbsrc->device,
                                       NULL)) == NULL) {
                goto cleanup;
            }

            /* oddly, qemuSetupHostUSBDeviceCgroup doesn't ever
             * reference the usb object we just created
             */
            if (virUSBDeviceFileIterate(usb, qemuSetupHostUSBDeviceCgroup,
                                        vm) < 0) {
                goto cleanup;
            }
            break;

        case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_SCSI: {
            if (scsisrc->protocol ==
                VIR_DOMAIN_HOSTDEV_SCSI_PROTOCOL_TYPE_ISCSI) {
                virDomainHostdevSubsysSCSIiSCSIPtr iscsisrc = &scsisrc->u.iscsi;
                /* Follow qemuSetupDiskCgroup() and qemuSetImageCgroupInternal()
                 * which does nothing for non local storage
                 */
                VIR_DEBUG("Not updating cgroups for hostdev iSCSI path '%s'",
                          iscsisrc->path);
            } else {
                virDomainHostdevSubsysSCSIHostPtr scsihostsrc =
                    &scsisrc->u.host;
                if ((scsi = virSCSIDeviceNew(NULL,
                                             scsihostsrc->adapter,
                                             scsihostsrc->bus,
                                             scsihostsrc->target,
                                             scsihostsrc->unit,
                                             dev->readonly,
                                             dev->shareable)) == NULL)
                    goto cleanup;

                if (virSCSIDeviceFileIterate(scsi,
                                             qemuSetupHostSCSIDeviceCgroup,
                                             vm) < 0)
                    goto cleanup;
            }
            break;
        }

        default:
            break;
        }
    }

    ret = 0;
 cleanup:
    virPCIDeviceFree(pci);
    virUSBDeviceFree(usb);
    virSCSIDeviceFree(scsi);
    VIR_FREE(path);
    return ret;
}
Example #28
0
int
qemuSetupCgroupForIOThreads(virDomainObjPtr vm)
{
    virCgroupPtr cgroup_iothread = NULL;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virDomainDefPtr def = vm->def;
    size_t i;
    unsigned long long period = vm->def->cputune.period;
    long long quota = vm->def->cputune.quota;
    char *mem_mask = NULL;
    virDomainNumatuneMemMode mem_mode;

    if ((period || quota) &&
        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("cgroup cpu is required for scheduler tuning"));
        return -1;
    }

    /*
     * If CPU cgroup controller is not initialized here, then we need
     * neither period nor quota settings.  And if CPUSET controller is
     * not initialized either, then there's nothing to do anyway.
     */
    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) &&
        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
        return 0;

    /* We are trying to setup cgroups for CPU pinning, which can also be done
     * with virProcessSetAffinity, thus the lack of cgroups is not fatal here.
     */
    if (priv->cgroup == NULL)
        return 0;

    if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
        mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
        virDomainNumatuneMaybeFormatNodeset(vm->def->numa,
                                            priv->autoNodeset,
                                            &mem_mask, -1) < 0)
        goto cleanup;

    for (i = 0; i < def->niothreadids; i++) {
        /* IOThreads are numbered 1..n, although the array is 0..n-1,
         * so we will account for that here
         */
        if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_IOTHREAD,
                               def->iothreadids[i]->iothread_id,
                               true, &cgroup_iothread) < 0)
            goto cleanup;

        /* move the thread for iothread to sub dir */
        if (virCgroupAddTask(cgroup_iothread,
                             def->iothreadids[i]->thread_id) < 0)
            goto cleanup;

        if (period || quota) {
            if (qemuSetupCgroupVcpuBW(cgroup_iothread, period, quota) < 0)
                goto cleanup;
        }

        /* Set iothreadpin in cgroup if iothreadpin xml is provided */
        if (virCgroupHasController(priv->cgroup,
                                   VIR_CGROUP_CONTROLLER_CPUSET)) {
            virBitmapPtr cpumask = NULL;

            if (mem_mask &&
                virCgroupSetCpusetMems(cgroup_iothread, mem_mask) < 0)
                goto cleanup;

            if (def->iothreadids[i]->cpumask)
                cpumask = def->iothreadids[i]->cpumask;
            else if (def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO)
                cpumask = priv->autoCpuset;
            else
                cpumask = def->cpumask;

            if (cpumask &&
                qemuSetupCgroupCpusetCpus(cgroup_iothread, cpumask) < 0)
                goto cleanup;
        }

        virCgroupFree(&cgroup_iothread);
    }
    VIR_FREE(mem_mask);

    return 0;

 cleanup:
    if (cgroup_iothread) {
        virCgroupRemove(cgroup_iothread);
        virCgroupFree(&cgroup_iothread);
    }
    VIR_FREE(mem_mask);

    return -1;
}
Example #29
0
int
qemuSetupCgroupForEmulator(virDomainObjPtr vm)
{
    virBitmapPtr cpumask = NULL;
    virCgroupPtr cgroup_emulator = NULL;
    virDomainDefPtr def = vm->def;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    unsigned long long period = vm->def->cputune.emulator_period;
    long long quota = vm->def->cputune.emulator_quota;

    if ((period || quota) &&
        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("cgroup cpu is required for scheduler tuning"));
        return -1;
    }

    /*
     * If CPU cgroup controller is not initialized here, then we need
     * neither period nor quota settings.  And if CPUSET controller is
     * not initialized either, then there's nothing to do anyway.
     */
    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) &&
        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
        return 0;

    if (priv->cgroup == NULL)
        return 0; /* Not supported, so claim success */

    if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_EMULATOR, 0,
                           true, &cgroup_emulator) < 0)
        goto cleanup;

    if (virCgroupMoveTask(priv->cgroup, cgroup_emulator) < 0)
        goto cleanup;

    if (def->cputune.emulatorpin)
        cpumask = def->cputune.emulatorpin;
    else if (def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO)
        cpumask = priv->autoCpuset;
    else if (def->cpumask)
        cpumask = def->cpumask;

    if (cpumask) {
        if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET) &&
            qemuSetupCgroupCpusetCpus(cgroup_emulator, cpumask) < 0)
            goto cleanup;
    }

    if (period || quota) {
        if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) &&
            qemuSetupCgroupVcpuBW(cgroup_emulator, period,
                                  quota) < 0)
            goto cleanup;
    }

    virCgroupFree(&cgroup_emulator);
    return 0;

 cleanup:
    if (cgroup_emulator) {
        virCgroupRemove(cgroup_emulator);
        virCgroupFree(&cgroup_emulator);
    }

    return -1;
}
Example #30
0
static void
qemuRestoreCgroupState(virDomainObjPtr vm)
{
    char *mem_mask = NULL;
    char *nodeset = NULL;
    int empty = -1;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    size_t i = 0;
    virBitmapPtr all_nodes;
    virCgroupPtr cgroup_temp = NULL;

    if (!virNumaIsAvailable() ||
        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
        return;

    if (!(all_nodes = virNumaGetHostMemoryNodeset()))
        goto error;

    if (!(mem_mask = virBitmapFormat(all_nodes)))
        goto error;

    if ((empty = virCgroupHasEmptyTasks(priv->cgroup,
                                        VIR_CGROUP_CONTROLLER_CPUSET)) <= 0)
        goto error;

    if (virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0)
        goto error;

    for (i = 0; i < virDomainDefGetVcpusMax(vm->def); i++) {
        virDomainVcpuDefPtr vcpu = virDomainDefGetVcpu(vm->def, i);

        if (!vcpu->online)
            continue;

        if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_VCPU, i,
                               false, &cgroup_temp) < 0 ||
            virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 ||
            virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 ||
            virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0)
            goto cleanup;

        VIR_FREE(nodeset);
        virCgroupFree(&cgroup_temp);
    }

    for (i = 0; i < vm->def->niothreadids; i++) {
        if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_IOTHREAD,
                               vm->def->iothreadids[i]->iothread_id,
                               false, &cgroup_temp) < 0 ||
            virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 ||
            virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 ||
            virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0)
            goto cleanup;

        VIR_FREE(nodeset);
        virCgroupFree(&cgroup_temp);
    }

    if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_EMULATOR, 0,
                           false, &cgroup_temp) < 0 ||
        virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 ||
        virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 ||
        virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0)
        goto cleanup;

 cleanup:
    VIR_FREE(mem_mask);
    VIR_FREE(nodeset);
    virBitmapFree(all_nodes);
    virCgroupFree(&cgroup_temp);
    return;

 error:
    virResetLastError();
    VIR_DEBUG("Couldn't restore cgroups to meaningful state");
    goto cleanup;
}