static int lxcControllerRun(virDomainDefPtr def, unsigned int nveths, char **veths, int monitor, int client, int appPty) { int rc = -1; int control[2] = { -1, -1}; int containerPty = -1; char *containerPtyPath = NULL; pid_t container = -1; virDomainFSDefPtr root; char *devpts = NULL; char *devptmx = NULL; if (socketpair(PF_UNIX, SOCK_STREAM, 0, control) < 0) { virReportSystemError(errno, "%s", _("sockpair failed")); goto cleanup; } root = virDomainGetRootFilesystem(def); if (lxcSetContainerResources(def) < 0) goto cleanup; /* * If doing a chroot style setup, we need to prepare * a private /dev/pts for the child now, which they * will later move into position. * * This is complex because 'virsh console' needs to * use /dev/pts from the host OS, and the guest OS * needs to use /dev/pts from the guest. * * This means that we (libvirt_lxc) need to see and * use both /dev/pts instances. We're running in the * host OS context though and don't want to expose * the guest OS /dev/pts there. * * Thus we call unshare(CLONE_NS) so that we can see * the guest's new /dev/pts, without it becoming * visible to the host OS. We also put the root FS * into slave mode, just in case it was currently * marked as shared */ if (root) { VIR_DEBUG0("Setting up private /dev/pts"); if (unshare(CLONE_NEWNS) < 0) { virReportSystemError(errno, "%s", _("Cannot unshare mount namespace")); goto cleanup; } if (mount("", "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) { virReportSystemError(errno, "%s", _("Failed to switch root mount into slave mode")); goto cleanup; } if (virAsprintf(&devpts, "%s/dev/pts", root->src) < 0 || virAsprintf(&devptmx, "%s/dev/pts/ptmx", root->src) < 0) { virReportOOMError(); goto cleanup; } if (virFileMakePath(devpts) != 0) { virReportSystemError(errno, _("Failed to make path %s"), devpts); goto cleanup; } VIR_DEBUG("Mouting 'devpts' on %s", devpts); if (mount("devpts", devpts, "devpts", 0, "newinstance,ptmxmode=0666,mode=0620,gid=5") < 0) { virReportSystemError(errno, _("Failed to mount devpts on %s"), devpts); goto cleanup; } if (access(devptmx, R_OK) < 0) { VIR_WARN0("Kernel does not support private devpts, using shared devpts"); VIR_FREE(devptmx); } } if (devptmx) { VIR_DEBUG("Opening tty on private %s", devptmx); if (virFileOpenTtyAt(devptmx, &containerPty, &containerPtyPath, 0) < 0) { virReportSystemError(errno, "%s", _("Failed to allocate tty")); goto cleanup; } } else { VIR_DEBUG0("Opening tty on shared /dev/ptmx"); if (virFileOpenTty(&containerPty, &containerPtyPath, 0) < 0) { virReportSystemError(errno, "%s", _("Failed to allocate tty")); goto cleanup; } } if (lxcSetPersonality(def) < 0) goto cleanup; if ((container = lxcContainerStart(def, nveths, veths, control[1], containerPtyPath)) < 0) goto cleanup; VIR_FORCE_CLOSE(control[1]); if (lxcControllerMoveInterfaces(nveths, veths, container) < 0) goto cleanup; if (lxcContainerSendContinue(control[0]) < 0) goto cleanup; /* Now the container is running, there's no need for us to keep any elevated capabilities */ if (lxcControllerClearCapabilities() < 0) goto cleanup; rc = lxcControllerMain(monitor, client, appPty, containerPty, container); cleanup: VIR_FREE(devptmx); VIR_FREE(devpts); VIR_FORCE_CLOSE(control[0]); VIR_FORCE_CLOSE(control[1]); VIR_FREE(containerPtyPath); VIR_FORCE_CLOSE(containerPty); if (container > 1) { int status; kill(container, SIGTERM); if (!(waitpid(container, &status, WNOHANG) == 0 && WIFEXITED(status))) kill(container, SIGKILL); waitpid(container, NULL, 0); } return rc; }
/** * virLXCProcessStart: * @conn: pointer to connection * @driver: pointer to driver structure * @vm: pointer to virtual machine structure * @autoDestroy: mark the domain for auto destruction * @reason: reason for switching vm to running state * * Starts a vm * * Returns 0 on success or -1 in case of error */ int virLXCProcessStart(virConnectPtr conn, virLXCDriverPtr driver, virDomainObjPtr vm, unsigned int nfiles, int *files, bool autoDestroy, virDomainRunningReason reason) { int rc = -1, r; size_t nttyFDs = 0; int *ttyFDs = NULL; size_t i; char *logfile = NULL; int logfd = -1; size_t nveths = 0; char **veths = NULL; int handshakefds[2] = { -1, -1 }; off_t pos = -1; char ebuf[1024]; char *timestamp; virCommandPtr cmd = NULL; virLXCDomainObjPrivatePtr priv = vm->privateData; virCapsPtr caps = NULL; virErrorPtr err = NULL; virLXCDriverConfigPtr cfg = virLXCDriverGetConfig(driver); virCgroupPtr selfcgroup; int status; if (virCgroupNewSelf(&selfcgroup) < 0) return -1; if (!virCgroupHasController(selfcgroup, VIR_CGROUP_CONTROLLER_CPUACCT)) { virCgroupFree(&selfcgroup); virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("Unable to find 'cpuacct' cgroups controller mount")); return -1; } if (!virCgroupHasController(selfcgroup, VIR_CGROUP_CONTROLLER_DEVICES)) { virCgroupFree(&selfcgroup); virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("Unable to find 'devices' cgroups controller mount")); return -1; } if (!virCgroupHasController(selfcgroup, VIR_CGROUP_CONTROLLER_MEMORY)) { virCgroupFree(&selfcgroup); virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("Unable to find 'memory' cgroups controller mount")); return -1; } virCgroupFree(&selfcgroup); if (virFileMakePath(cfg->logDir) < 0) { virReportSystemError(errno, _("Cannot create log directory '%s'"), cfg->logDir); return -1; } if (!vm->def->resource) { virDomainResourceDefPtr res; if (VIR_ALLOC(res) < 0) goto cleanup; if (VIR_STRDUP(res->partition, "/machine") < 0) { VIR_FREE(res); goto cleanup; } vm->def->resource = res; } if (virAsprintf(&logfile, "%s/%s.log", cfg->logDir, vm->def->name) < 0) return -1; if (!(caps = virLXCDriverGetCapabilities(driver, false))) goto cleanup; /* Do this up front, so any part of the startup process can add * runtime state to vm->def that won't be persisted. This let's us * report implicit runtime defaults in the XML, like vnc listen/socket */ VIR_DEBUG("Setting current domain def as transient"); if (virDomainObjSetDefTransient(caps, driver->xmlopt, vm, true) < 0) goto cleanup; /* Run an early hook to set-up missing devices */ if (virHookPresent(VIR_HOOK_DRIVER_LXC)) { char *xml = virDomainDefFormat(vm->def, 0); int hookret; hookret = virHookCall(VIR_HOOK_DRIVER_LXC, vm->def->name, VIR_HOOK_LXC_OP_PREPARE, VIR_HOOK_SUBOP_BEGIN, NULL, xml, NULL); VIR_FREE(xml); /* * If the script raised an error abort the launch */ if (hookret < 0) goto cleanup; } if (virLXCProcessEnsureRootFS(vm) < 0) goto cleanup; /* Must be run before security labelling */ VIR_DEBUG("Preparing host devices"); if (virLXCPrepareHostDevices(driver, vm->def) < 0) goto cleanup; /* Here we open all the PTYs we need on the host OS side. * The LXC controller will open the guest OS side PTYs * and forward I/O between them. */ nttyFDs = vm->def->nconsoles; if (VIR_ALLOC_N(ttyFDs, nttyFDs) < 0) goto cleanup; for (i = 0; i < vm->def->nconsoles; i++) ttyFDs[i] = -1; /* If you are using a SecurityDriver with dynamic labelling, then generate a security label for isolation */ VIR_DEBUG("Generating domain security label (if required)"); if (vm->def->nseclabels && vm->def->seclabels[0]->type == VIR_DOMAIN_SECLABEL_DEFAULT) vm->def->seclabels[0]->type = VIR_DOMAIN_SECLABEL_NONE; if (virSecurityManagerGenLabel(driver->securityManager, vm->def) < 0) { virDomainAuditSecurityLabel(vm, false); goto cleanup; } virDomainAuditSecurityLabel(vm, true); VIR_DEBUG("Setting domain security labels"); if (virSecurityManagerSetAllLabel(driver->securityManager, vm->def, NULL) < 0) goto cleanup; for (i = 0; i < vm->def->nconsoles; i++) { char *ttyPath; if (vm->def->consoles[i]->source.type != VIR_DOMAIN_CHR_TYPE_PTY) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("Only PTY console types are supported")); goto cleanup; } if (virFileOpenTty(&ttyFDs[i], &ttyPath, 1) < 0) { virReportSystemError(errno, "%s", _("Failed to allocate tty")); goto cleanup; } VIR_FREE(vm->def->consoles[i]->source.data.file.path); vm->def->consoles[i]->source.data.file.path = ttyPath; VIR_FREE(vm->def->consoles[i]->info.alias); if (virAsprintf(&vm->def->consoles[i]->info.alias, "console%zu", i) < 0) goto cleanup; } if (virLXCProcessSetupInterfaces(conn, vm->def, &nveths, &veths) < 0) goto cleanup; /* Save the configuration for the controller */ if (virDomainSaveConfig(cfg->stateDir, vm->def) < 0) goto cleanup; if ((logfd = open(logfile, O_WRONLY | O_APPEND | O_CREAT, S_IRUSR|S_IWUSR)) < 0) { virReportSystemError(errno, _("Failed to open '%s'"), logfile); goto cleanup; } if (pipe(handshakefds) < 0) { virReportSystemError(errno, "%s", _("Unable to create pipe")); goto cleanup; } if (!(cmd = virLXCProcessBuildControllerCmd(driver, vm, nveths, veths, ttyFDs, nttyFDs, files, nfiles, handshakefds[1]))) goto cleanup; virCommandSetOutputFD(cmd, &logfd); virCommandSetErrorFD(cmd, &logfd); /* now that we know it is about to start call the hook if present */ if (virHookPresent(VIR_HOOK_DRIVER_LXC)) { char *xml = virDomainDefFormat(vm->def, 0); int hookret; hookret = virHookCall(VIR_HOOK_DRIVER_LXC, vm->def->name, VIR_HOOK_LXC_OP_START, VIR_HOOK_SUBOP_BEGIN, NULL, xml, NULL); VIR_FREE(xml); /* * If the script raised an error abort the launch */ if (hookret < 0) goto cleanup; } /* Log timestamp */ if ((timestamp = virTimeStringNow()) == NULL) goto cleanup; if (safewrite(logfd, timestamp, strlen(timestamp)) < 0 || safewrite(logfd, START_POSTFIX, strlen(START_POSTFIX)) < 0) { VIR_WARN("Unable to write timestamp to logfile: %s", virStrerror(errno, ebuf, sizeof(ebuf))); } VIR_FREE(timestamp); /* Log generated command line */ virCommandWriteArgLog(cmd, logfd); if ((pos = lseek(logfd, 0, SEEK_END)) < 0) VIR_WARN("Unable to seek to end of logfile: %s", virStrerror(errno, ebuf, sizeof(ebuf))); virCommandRawStatus(cmd); if (virCommandRun(cmd, &status) < 0) goto cleanup; if (status != 0) { if (virLXCProcessReadLogOutput(vm, logfile, pos, ebuf, sizeof(ebuf)) <= 0) { if (WIFEXITED(status)) snprintf(ebuf, sizeof(ebuf), _("unexpected exit status %d"), WEXITSTATUS(status)); else snprintf(ebuf, sizeof(ebuf), "%s", _("terminated abnormally")); } virReportError(VIR_ERR_INTERNAL_ERROR, _("guest failed to start: %s"), ebuf); goto cleanup; } if (VIR_CLOSE(handshakefds[1]) < 0) { virReportSystemError(errno, "%s", _("could not close handshake fd")); goto cleanup; } /* Connect to the controller as a client *first* because * this will block until the child has written their * pid file out to disk & created their cgroup */ if (!(priv->monitor = virLXCProcessConnectMonitor(driver, vm))) { /* Intentionally overwrite the real monitor error message, * since a better one is almost always found in the logs */ if (virLXCProcessReadLogOutput(vm, logfile, pos, ebuf, sizeof(ebuf)) > 0) { virResetLastError(); virReportError(VIR_ERR_INTERNAL_ERROR, _("guest failed to start: %s"), ebuf); } goto cleanup; } /* And get its pid */ if ((r = virPidFileRead(cfg->stateDir, vm->def->name, &vm->pid)) < 0) { if (virLXCProcessReadLogOutput(vm, logfile, pos, ebuf, sizeof(ebuf)) > 0) virReportError(VIR_ERR_INTERNAL_ERROR, _("guest failed to start: %s"), ebuf); else virReportSystemError(-r, _("Failed to read pid file %s/%s.pid"), cfg->stateDir, vm->def->name); goto cleanup; } if (virCgroupNewDetectMachine(vm->def->name, "lxc", vm->pid, vm->def->resource ? vm->def->resource->partition : NULL, -1, &priv->cgroup) < 0) goto error; if (!priv->cgroup) { virReportError(VIR_ERR_INTERNAL_ERROR, _("No valid cgroup for machine %s"), vm->def->name); goto error; } priv->stopReason = VIR_DOMAIN_EVENT_STOPPED_FAILED; priv->wantReboot = false; vm->def->id = vm->pid; virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason); priv->doneStopEvent = false; if (virAtomicIntInc(&driver->nactive) == 1 && driver->inhibitCallback) driver->inhibitCallback(true, driver->inhibitOpaque); if (lxcContainerWaitForContinue(handshakefds[0]) < 0) { char out[1024]; if (!(virLXCProcessReadLogOutput(vm, logfile, pos, out, 1024) < 0)) { virReportError(VIR_ERR_INTERNAL_ERROR, _("guest failed to start: %s"), out); } goto error; } if (autoDestroy && virCloseCallbacksSet(driver->closeCallbacks, vm, conn, lxcProcessAutoDestroy) < 0) goto error; if (virDomainObjSetDefTransient(caps, driver->xmlopt, vm, false) < 0) goto error; /* Write domain status to disk. * * XXX: Earlier we wrote the plain "live" domain XML to this * location for the benefit of libvirt_lxc. We're now overwriting * it with the live status XML instead. This is a (currently * harmless) inconsistency we should fix one day */ if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm) < 0) goto error; /* finally we can call the 'started' hook script if any */ if (virHookPresent(VIR_HOOK_DRIVER_LXC)) { char *xml = virDomainDefFormat(vm->def, 0); int hookret; hookret = virHookCall(VIR_HOOK_DRIVER_LXC, vm->def->name, VIR_HOOK_LXC_OP_STARTED, VIR_HOOK_SUBOP_BEGIN, NULL, xml, NULL); VIR_FREE(xml); /* * If the script raised an error abort the launch */ if (hookret < 0) goto error; } rc = 0; cleanup: if (rc != 0 && !err) err = virSaveLastError(); virCommandFree(cmd); if (VIR_CLOSE(logfd) < 0) { virReportSystemError(errno, "%s", _("could not close logfile")); rc = -1; } for (i = 0; i < nveths; i++) { if (rc != 0 && veths[i]) ignore_value(virNetDevVethDelete(veths[i])); VIR_FREE(veths[i]); } if (rc != 0) { if (vm->newDef) { virDomainDefFree(vm->newDef); vm->newDef = NULL; } if (priv->monitor) { virObjectUnref(priv->monitor); priv->monitor = NULL; } virDomainConfVMNWFilterTeardown(vm); virSecurityManagerRestoreAllLabel(driver->securityManager, vm->def, false); virSecurityManagerReleaseLabel(driver->securityManager, vm->def); /* Clear out dynamically assigned labels */ if (vm->def->nseclabels && vm->def->seclabels[0]->type == VIR_DOMAIN_SECLABEL_DYNAMIC) { VIR_FREE(vm->def->seclabels[0]->model); VIR_FREE(vm->def->seclabels[0]->label); VIR_FREE(vm->def->seclabels[0]->imagelabel); } } for (i = 0; i < nttyFDs; i++) VIR_FORCE_CLOSE(ttyFDs[i]); VIR_FREE(ttyFDs); VIR_FORCE_CLOSE(handshakefds[0]); VIR_FORCE_CLOSE(handshakefds[1]); VIR_FREE(logfile); virObjectUnref(cfg); virObjectUnref(caps); if (err) { virSetError(err); virFreeError(err); } return rc; error: err = virSaveLastError(); virLXCProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED); goto cleanup; }
static int lxcControllerRun(virDomainDefPtr def, unsigned int nveths, char **veths, int monitor, int client, int *ttyFDs, size_t nttyFDs, int handshakefd) { int rc = -1; int control[2] = { -1, -1}; int containerhandshake[2] = { -1, -1 }; int *containerTtyFDs = NULL; char **containerTtyPaths = NULL; pid_t container = -1; virDomainFSDefPtr root; char *devpts = NULL; char *devptmx = NULL; size_t nloopDevs = 0; int *loopDevs = NULL; size_t i; if (VIR_ALLOC_N(containerTtyFDs, nttyFDs) < 0) { virReportOOMError(); goto cleanup; } if (VIR_ALLOC_N(containerTtyPaths, nttyFDs) < 0) { virReportOOMError(); goto cleanup; } if (socketpair(PF_UNIX, SOCK_STREAM, 0, control) < 0) { virReportSystemError(errno, "%s", _("sockpair failed")); goto cleanup; } if (socketpair(PF_UNIX, SOCK_STREAM, 0, containerhandshake) < 0) { virReportSystemError(errno, "%s", _("socketpair failed")); goto cleanup; } if (lxcSetupLoopDevices(def, &nloopDevs, &loopDevs) < 0) goto cleanup; root = virDomainGetRootFilesystem(def); if (lxcSetContainerResources(def) < 0) goto cleanup; /* * If doing a chroot style setup, we need to prepare * a private /dev/pts for the child now, which they * will later move into position. * * This is complex because 'virsh console' needs to * use /dev/pts from the host OS, and the guest OS * needs to use /dev/pts from the guest. * * This means that we (libvirt_lxc) need to see and * use both /dev/pts instances. We're running in the * host OS context though and don't want to expose * the guest OS /dev/pts there. * * Thus we call unshare(CLONE_NS) so that we can see * the guest's new /dev/pts, without it becoming * visible to the host OS. We also put the root FS * into slave mode, just in case it was currently * marked as shared */ if (root) { VIR_DEBUG("Setting up private /dev/pts"); if (!virFileExists(root->src)) { virReportSystemError(errno, _("root source %s does not exist"), root->src); goto cleanup; } if (unshare(CLONE_NEWNS) < 0) { virReportSystemError(errno, "%s", _("Cannot unshare mount namespace")); goto cleanup; } if (mount("", "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) { virReportSystemError(errno, "%s", _("Failed to switch root mount into slave mode")); goto cleanup; } if (virAsprintf(&devpts, "%s/dev/pts", root->src) < 0 || virAsprintf(&devptmx, "%s/dev/pts/ptmx", root->src) < 0) { virReportOOMError(); goto cleanup; } if (virFileMakePath(devpts) < 0) { virReportSystemError(errno, _("Failed to make path %s"), devpts); goto cleanup; } /* XXX should we support gid=X for X!=5 for distros which use * a different gid for tty? */ VIR_DEBUG("Mounting 'devpts' on %s", devpts); if (mount("devpts", devpts, "devpts", 0, "newinstance,ptmxmode=0666,mode=0620,gid=5") < 0) { virReportSystemError(errno, _("Failed to mount devpts on %s"), devpts); goto cleanup; } if (access(devptmx, R_OK) < 0) { VIR_WARN("Kernel does not support private devpts, using shared devpts"); VIR_FREE(devptmx); } } else { if (nttyFDs != -1) { lxcError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("Expected exactly one TTY fd")); goto cleanup; } } for (i = 0 ; i < nttyFDs ; i++) { if (devptmx) { VIR_DEBUG("Opening tty on private %s", devptmx); if (lxcCreateTty(devptmx, &containerTtyFDs[i], &containerTtyPaths[i]) < 0) { virReportSystemError(errno, "%s", _("Failed to allocate tty")); goto cleanup; } } else { VIR_DEBUG("Opening tty on shared /dev/ptmx"); if (virFileOpenTty(&containerTtyFDs[i], &containerTtyPaths[i], 0) < 0) { virReportSystemError(errno, "%s", _("Failed to allocate tty")); goto cleanup; } } } if (lxcSetPersonality(def) < 0) goto cleanup; if ((container = lxcContainerStart(def, nveths, veths, control[1], containerhandshake[1], containerTtyPaths, nttyFDs)) < 0) goto cleanup; VIR_FORCE_CLOSE(control[1]); VIR_FORCE_CLOSE(containerhandshake[1]); if (lxcControllerMoveInterfaces(nveths, veths, container) < 0) goto cleanup; if (lxcContainerSendContinue(control[0]) < 0) { virReportSystemError(errno, "%s", _("Unable to send container continue message")); goto cleanup; } if (lxcContainerWaitForContinue(containerhandshake[0]) < 0) { virReportSystemError(errno, "%s", _("error receiving signal from container")); goto cleanup; } /* Now the container is fully setup... */ /* ...we can close the loop devices... */ for (i = 0 ; i < nloopDevs ; i++) VIR_FORCE_CLOSE(loopDevs[i]); /* ...and reduce our privileges */ if (lxcControllerClearCapabilities() < 0) goto cleanup; if (lxcContainerSendContinue(handshakefd) < 0) { virReportSystemError(errno, "%s", _("error sending continue signal to parent")); goto cleanup; } VIR_FORCE_CLOSE(handshakefd); if (virSetBlocking(monitor, false) < 0 || virSetBlocking(client, false) < 0) { virReportSystemError(errno, "%s", _("Unable to set file descriptor non blocking")); goto cleanup; } for (i = 0 ; i < nttyFDs ; i++) { if (virSetBlocking(ttyFDs[i], false) < 0 || virSetBlocking(containerTtyFDs[i], false) < 0) { virReportSystemError(errno, "%s", _("Unable to set file descriptor non blocking")); goto cleanup; } } rc = lxcControllerMain(monitor, client, ttyFDs, containerTtyFDs, nttyFDs, container); monitor = client = -1; cleanup: VIR_FREE(devptmx); VIR_FREE(devpts); VIR_FORCE_CLOSE(control[0]); VIR_FORCE_CLOSE(control[1]); VIR_FORCE_CLOSE(handshakefd); VIR_FORCE_CLOSE(containerhandshake[0]); VIR_FORCE_CLOSE(containerhandshake[1]); for (i = 0 ; i < nttyFDs ; i++) VIR_FREE(containerTtyPaths[i]); VIR_FREE(containerTtyPaths); for (i = 0 ; i < nttyFDs ; i++) VIR_FORCE_CLOSE(containerTtyFDs[i]); VIR_FREE(containerTtyFDs); for (i = 0 ; i < nloopDevs ; i++) VIR_FORCE_CLOSE(loopDevs[i]); VIR_FREE(loopDevs); virPidAbort(container); return rc; }