static int launch_uml (guestfs_h *g, void *datav, const char *arg) { struct backend_uml_data *data = datav; CLEANUP_FREE_STRINGSBUF DECLARE_STRINGSBUF (cmdline); int console_sock = -1, daemon_sock = -1; int r; int csv[2], dsv[2]; CLEANUP_FREE char *kernel = NULL, *dtb = NULL, *initrd = NULL, *appliance = NULL; int has_appliance_drive; CLEANUP_FREE char *appliance_cow = NULL; uint32_t size; CLEANUP_FREE void *buf = NULL; struct drive *drv; size_t i; struct hv_param *hp; char *term = getenv ("TERM"); if (!uml_supported (g)) return -1; if (!g->nr_drives) { error (g, _("you must call guestfs_add_drive before guestfs_launch")); return -1; } /* Assign a random unique ID to this run. */ if (guestfs___random_string (data->umid, UML_UMID_LEN) == -1) { perrorf (g, "guestfs___random_string"); return -1; } /* Locate and/or build the appliance. */ if (guestfs___build_appliance (g, &kernel, &dtb, &initrd, &appliance) == -1) return -1; has_appliance_drive = appliance != NULL; /* Create COW overlays for the appliance. Note that the documented * syntax ubd0=cow,orig does not work since kernel 3.3. See: * http://thread.gmane.org/gmane.linux.uml.devel/13556 */ if (has_appliance_drive) { appliance_cow = make_cow_overlay (g, appliance); if (!appliance_cow) goto cleanup0; } /* The socket that the daemon will talk to us on. */ if (socketpair (AF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC, 0, dsv) == -1) { perrorf (g, "socketpair"); goto cleanup0; } /* The console socket. */ if (!g->direct_mode) { if (socketpair (AF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC, 0, csv) == -1) { perrorf (g, "socketpair"); close (dsv[0]); close (dsv[1]); goto cleanup0; } } /* Construct the vmlinux command line. We have to do this before * forking, because after fork we are not allowed to use * non-signal-safe functions such as malloc. */ #define ADD_CMDLINE(str) \ guestfs___add_string (g, &cmdline, (str)) #define ADD_CMDLINE_PRINTF(fs,...) \ guestfs___add_sprintf (g, &cmdline, (fs), ##__VA_ARGS__) ADD_CMDLINE (g->hv); /* Give this instance a unique random ID. */ ADD_CMDLINE_PRINTF ("umid=%s", data->umid); /* Set memory size. */ ADD_CMDLINE_PRINTF ("mem=%dM", g->memsize); /* vmlinux appears to ignore this, but let's add it anyway. */ ADD_CMDLINE_PRINTF ("initrd=%s", initrd); /* Make sure our appliance init script runs first. */ ADD_CMDLINE ("init=/init"); /* This tells the /init script not to reboot at the end. */ ADD_CMDLINE ("guestfs_noreboot=1"); /* Root filesystem should be mounted read-write (default seems to * be "ro"). */ ADD_CMDLINE ("rw"); /* See also guestfs___appliance_command_line. */ if (g->verbose) ADD_CMDLINE ("guestfs_verbose=1"); ADD_CMDLINE ("panic=1"); ADD_CMDLINE_PRINTF ("TERM=%s", term ? term : "linux"); if (g->selinux) ADD_CMDLINE ("selinux=1 enforcing=0"); else ADD_CMDLINE ("selinux=0"); /* XXX This isn't quite right. Multiple append args won't work. */ if (g->append) ADD_CMDLINE (g->append); /* Add the drives. */ ITER_DRIVES (g, i, drv) { if (!drv->overlay) ADD_CMDLINE_PRINTF ("ubd%zu=%s", i, drv->src.u.path); else ADD_CMDLINE_PRINTF ("ubd%zu=%s", i, drv->overlay); } /* Add the ext2 appliance drive (after all the drives). */ if (has_appliance_drive) { char drv_name[64] = "ubd"; guestfs___drive_name (g->nr_drives, &drv_name[3]); ADD_CMDLINE_PRINTF ("ubd%zu=%s", g->nr_drives, appliance_cow); ADD_CMDLINE_PRINTF ("root=/dev/%s", drv_name); } /* Create the daemon socket. */ ADD_CMDLINE_PRINTF ("ssl3=fd:%d", dsv[1]); ADD_CMDLINE ("guestfs_channel=/dev/ttyS3"); #if 0 /* XXX This could be made to work. */ #ifdef VALGRIND_DAEMON /* Set up virtio-serial channel for valgrind messages. */ ADD_CMDLINE ("-chardev"); ADD_CMDLINE_PRINTF ("file,path=%s/valgrind.log.%d,id=valgrind", VALGRIND_LOG_PATH, getpid ()); ADD_CMDLINE ("-device"); ADD_CMDLINE ("virtserialport,chardev=valgrind,name=org.libguestfs.valgrind"); #endif #endif /* Add any vmlinux parameters. */ for (hp = g->hv_params; hp; hp = hp->next) { ADD_CMDLINE (hp->hv_param); if (hp->hv_value) ADD_CMDLINE (hp->hv_value); } /* Finish off the command line. */ guestfs___end_stringsbuf (g, &cmdline); r = fork (); if (r == -1) { perrorf (g, "fork"); if (!g->direct_mode) { close (csv[0]); close (csv[1]); } close (dsv[0]); close (dsv[1]); goto cleanup0; } if (r == 0) { /* Child (vmlinux). */ /* Set up the daemon socket for the child. */ close (dsv[0]); set_cloexec_flag (dsv[1], 0); /* so it doesn't close across exec */ if (!g->direct_mode) { /* Set up stdin, stdout, stderr. */ close (0); close (1); close (csv[0]); /* We set the FD_CLOEXEC flag on the socket above, but now (in * the child) it's safe to unset this flag so vmlinux can use the * socket. */ set_cloexec_flag (csv[1], 0); /* Stdin. */ if (dup (csv[1]) == -1) { dup_failed: perror ("dup failed"); _exit (EXIT_FAILURE); } /* Stdout. */ if (dup (csv[1]) == -1) goto dup_failed; /* Send stderr to the pipe as well. */ close (2); if (dup (csv[1]) == -1) goto dup_failed; close (csv[1]); } /* Dump the command line (after setting up stderr above). */ if (g->verbose) print_vmlinux_command_line (g, cmdline.argv); /* Put vmlinux in a new process group. */ if (g->pgroup) setpgid (0, 0); setenv ("LC_ALL", "C", 1); execv (g->hv, cmdline.argv); /* Run vmlinux. */ perror (g->hv); _exit (EXIT_FAILURE); } /* Parent (library). */ data->pid = r; /* Fork the recovery process off which will kill vmlinux if the * parent process fails to do so (eg. if the parent segfaults). */ data->recoverypid = -1; if (g->recovery_proc) { r = fork (); if (r == 0) { int i, fd, max_fd; struct sigaction sa; pid_t vmlinux_pid = data->pid; pid_t parent_pid = getppid (); /* Remove all signal handlers. See the justification here: * https://www.redhat.com/archives/libvir-list/2008-August/msg00303.html * We don't mask signal handlers yet, so this isn't completely * race-free, but better than not doing it at all. */ memset (&sa, 0, sizeof sa); sa.sa_handler = SIG_DFL; sa.sa_flags = 0; sigemptyset (&sa.sa_mask); for (i = 1; i < NSIG; ++i) sigaction (i, &sa, NULL); /* Close all other file descriptors. This ensures that we don't * hold open (eg) pipes from the parent process. */ max_fd = sysconf (_SC_OPEN_MAX); if (max_fd == -1) max_fd = 1024; if (max_fd > 65536) max_fd = 65536; /* bound the amount of work we do here */ for (fd = 0; fd < max_fd; ++fd) close (fd); /* It would be nice to be able to put this in the same process * group as vmlinux (ie. setpgid (0, vmlinux_pid)). However * this is not possible because we don't have any guarantee here * that the vmlinux process has started yet. */ if (g->pgroup) setpgid (0, 0); /* Writing to argv is hideously complicated and error prone. See: * http://git.postgresql.org/gitweb/?p=postgresql.git;a=blob;f=src/backend/utils/misc/ps_status.c;hb=HEAD */ /* Loop around waiting for one or both of the other processes to * disappear. It's fair to say this is very hairy. The PIDs that * we are looking at might be reused by another process. We are * effectively polling. Is the cure worse than the disease? */ for (;;) { if (kill (vmlinux_pid, 0) == -1) /* vmlinux's gone away, we aren't needed */ _exit (EXIT_SUCCESS); if (kill (parent_pid, 0) == -1) { /* Parent's gone away, vmlinux still around, so kill vmlinux. */ kill (data->pid, SIGKILL); _exit (EXIT_SUCCESS); } sleep (2); } } /* Don't worry, if the fork failed, this will be -1. The recovery * process isn't essential. */ data->recoverypid = r; } if (!g->direct_mode) { /* Close the other end of the console socketpair. */ close (csv[1]); console_sock = csv[0]; /* stdin of child */ csv[0] = -1; } daemon_sock = dsv[0]; close (dsv[1]); dsv[0] = -1; g->state = LAUNCHING; /* Wait for vmlinux to start and to connect back to us via * virtio-serial and send the GUESTFS_LAUNCH_FLAG message. */ g->conn = guestfs___new_conn_socket_connected (g, daemon_sock, console_sock); if (!g->conn) goto cleanup1; /* g->conn now owns these sockets. */ daemon_sock = console_sock = -1; /* We now have to wait for vmlinux to start up, the daemon to start * running, and for it to send the GUESTFS_LAUNCH_FLAG to us. */ r = guestfs___recv_from_daemon (g, &size, &buf); if (r == -1) { guestfs___launch_failed_error (g); goto cleanup1; } if (size != GUESTFS_LAUNCH_FLAG) { guestfs___launch_failed_error (g); goto cleanup1; } if (g->verbose) guestfs___print_timestamped_message (g, "appliance is up"); /* This is possible in some really strange situations, such as * guestfsd starts up OK but then vmlinux immediately exits. Check * for it because the caller is probably expecting to be able to * send commands after this function returns. */ if (g->state != READY) { error (g, _("vmlinux launched and contacted daemon, but state != READY")); goto cleanup1; } if (has_appliance_drive) guestfs___add_dummy_appliance_drive (g); return 0; cleanup1: if (!g->direct_mode && csv[0] >= 0) close (csv[0]); if (dsv[0] >= 0) close (dsv[0]); if (data->pid > 0) kill (data->pid, SIGKILL); if (data->recoverypid > 0) kill (data->recoverypid, SIGKILL); if (data->pid > 0) waitpid (data->pid, NULL, 0); if (data->recoverypid > 0) waitpid (data->recoverypid, NULL, 0); data->pid = 0; data->recoverypid = 0; memset (&g->launch_t, 0, sizeof g->launch_t); cleanup0: if (daemon_sock >= 0) close (daemon_sock); if (console_sock >= 0) close (console_sock); if (g->conn) { g->conn->ops->free_connection (g, g->conn); g->conn = NULL; } g->state = CONFIG; return -1; }
static int launch_direct (guestfs_h *g, void *datav, const char *arg) { struct backend_direct_data *data = datav; CLEANUP_FREE_STRINGSBUF DECLARE_STRINGSBUF (cmdline); int daemon_accept_sock = -1, console_sock = -1; int r; int flags; int sv[2]; char guestfsd_sock[256]; struct sockaddr_un addr; CLEANUP_FREE char *kernel = NULL, *dtb = NULL, *initrd = NULL, *appliance = NULL; int has_appliance_drive; CLEANUP_FREE char *appliance_dev = NULL; uint32_t size; CLEANUP_FREE void *buf = NULL; struct drive *drv; size_t i; int virtio_scsi; struct hv_param *hp; bool has_kvm; bool force_tcg; /* At present you must add drives before starting the appliance. In * future when we enable hotplugging you won't need to do this. */ if (!g->nr_drives) { error (g, _("you must call guestfs_add_drive before guestfs_launch")); return -1; } force_tcg = guestfs___get_backend_setting_bool (g, "force_tcg"); if (!force_tcg) debian_kvm_warning (g); guestfs___launch_send_progress (g, 0); TRACE0 (launch_build_appliance_start); /* Locate and/or build the appliance. */ if (guestfs___build_appliance (g, &kernel, &dtb, &initrd, &appliance) == -1) return -1; has_appliance_drive = appliance != NULL; TRACE0 (launch_build_appliance_end); guestfs___launch_send_progress (g, 3); if (g->verbose) guestfs___print_timestamped_message (g, "begin testing qemu features"); /* Get qemu help text and version. */ if (qemu_supports (g, data, NULL) == -1) goto cleanup0; /* Using virtio-serial, we need to create a local Unix domain socket * for qemu to connect to. */ snprintf (guestfsd_sock, sizeof guestfsd_sock, "%s/guestfsd.sock", g->tmpdir); unlink (guestfsd_sock); daemon_accept_sock = socket (AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); if (daemon_accept_sock == -1) { perrorf (g, "socket"); goto cleanup0; } addr.sun_family = AF_UNIX; strncpy (addr.sun_path, guestfsd_sock, UNIX_PATH_MAX); addr.sun_path[UNIX_PATH_MAX-1] = '\0'; if (bind (daemon_accept_sock, &addr, sizeof addr) == -1) { perrorf (g, "bind"); goto cleanup0; } if (listen (daemon_accept_sock, 1) == -1) { perrorf (g, "listen"); goto cleanup0; } if (!g->direct_mode) { if (socketpair (AF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC, 0, sv) == -1) { perrorf (g, "socketpair"); goto cleanup0; } } if (g->verbose) guestfs___print_timestamped_message (g, "finished testing qemu features"); /* Construct the qemu command line. We have to do this before * forking, because after fork we are not allowed to use * non-signal-safe functions such as malloc. */ #define ADD_CMDLINE(str) \ guestfs___add_string (g, &cmdline, (str)) #define ADD_CMDLINE_STRING_NODUP(str) \ guestfs___add_string_nodup (g, &cmdline, (str)) #define ADD_CMDLINE_PRINTF(fs,...) \ guestfs___add_sprintf (g, &cmdline, (fs), ##__VA_ARGS__) ADD_CMDLINE (g->hv); /* CVE-2011-4127 mitigation: Disable SCSI ioctls on virtio-blk * devices. The -global option must exist, but you can pass any * strings to it so we don't need to check for the specific virtio * feature. */ if (qemu_supports (g, data, "-global")) { ADD_CMDLINE ("-global"); ADD_CMDLINE (VIRTIO_BLK ".scsi=off"); } if (qemu_supports (g, data, "-nodefconfig")) ADD_CMDLINE ("-nodefconfig"); /* This oddly named option doesn't actually enable FIPS. It just * causes qemu to do the right thing if FIPS is enabled in the * kernel. So like libvirt, we pass it unconditionally. */ if (qemu_supports (g, data, "-enable-fips")) ADD_CMDLINE ("-enable-fips"); /* Newer versions of qemu (from around 2009/12) changed the * behaviour of monitors so that an implicit '-monitor stdio' is * assumed if we are in -nographic mode and there is no other * -monitor option. Only a single stdio device is allowed, so * this broke the '-serial stdio' option. There is a new flag * called -nodefaults which gets rid of all this default crud, so * let's use that to avoid this and any future surprises. */ if (qemu_supports (g, data, "-nodefaults")) ADD_CMDLINE ("-nodefaults"); ADD_CMDLINE ("-display"); ADD_CMDLINE ("none"); #ifdef MACHINE_TYPE ADD_CMDLINE ("-M"); ADD_CMDLINE (MACHINE_TYPE); #endif /* If this is uncommented, then qemu won't start running the * appliance immediately. It will wait for you to connect to it * using gdb: * * $ gdb * (gdb) symbol-file /path/to/vmlinux * (gdb) target remote tcp::1234 * (gdb) cont * * You can then debug the appliance kernel, which is useful to debug * boot failures (especially ones where there are no debug messages * printed - tip: look in the kernel log_buf). * * On Fedora, install kernel-debuginfo for the vmlinux file * (containing symbols). Make sure the symbols precisely match the * kernel being used. */ #if 0 ADD_CMDLINE ("-S"); ADD_CMDLINE ("-s"); warning (g, "qemu debugging is enabled, connect gdb to tcp::1234 to begin"); #endif /* Try to guess if KVM is available. We are just checking that * /dev/kvm is openable. That's not reliable, since /dev/kvm * might be openable by qemu but not by us (think: SELinux) in * which case the user would not get hardware virtualization, * although at least shouldn't fail. */ has_kvm = is_openable (g, "/dev/kvm", O_RDWR|O_CLOEXEC); /* The qemu -machine option (added 2010-12) is a bit more sane * since it falls back through various different acceleration * modes, so try that first (thanks Markus Armbruster). */ if (qemu_supports (g, data, "-machine")) { ADD_CMDLINE ("-machine"); if (!force_tcg) ADD_CMDLINE ("accel=kvm:tcg"); else ADD_CMDLINE ("accel=tcg"); } else { /* qemu sometimes needs this option to enable hardware * virtualization, but some versions of 'qemu-kvm' will use KVM * regardless (even where this option appears in the help text). * It is rumoured that there are versions of qemu where * supplying this option when hardware virtualization is not * available will cause qemu to fail. A giant clusterfuck with * the qemu command line, again. */ if (has_kvm && !force_tcg && qemu_supports (g, data, "-enable-kvm")) ADD_CMDLINE ("-enable-kvm"); } if (g->smp > 1) { ADD_CMDLINE ("-smp"); ADD_CMDLINE_PRINTF ("%d", g->smp); } ADD_CMDLINE ("-m"); ADD_CMDLINE_PRINTF ("%d", g->memsize); /* Force exit instead of reboot on panic */ ADD_CMDLINE ("-no-reboot"); /* These are recommended settings, see RHBZ#1053847. */ ADD_CMDLINE ("-rtc"); ADD_CMDLINE ("driftfix=slew"); #ifndef __arm__ /* qemu-system-arm advertises the -no-hpet option but if you try * to use it, it usefully says: * "Option no-hpet not supported for this target". * Cheers qemu developers. How many years have we been asking for * capabilities? Could be 3 or 4 years, I forget. */ ADD_CMDLINE ("-no-hpet"); #endif ADD_CMDLINE ("-no-kvm-pit-reinjection"); ADD_CMDLINE ("-kernel"); ADD_CMDLINE (kernel); if (dtb) { ADD_CMDLINE ("-dtb"); ADD_CMDLINE (dtb); } ADD_CMDLINE ("-initrd"); ADD_CMDLINE (initrd); /* Add drives */ virtio_scsi = qemu_supports_virtio_scsi (g, data); if (virtio_scsi) { /* Create the virtio-scsi bus. */ ADD_CMDLINE ("-device"); ADD_CMDLINE (VIRTIO_SCSI ",id=scsi"); } ITER_DRIVES (g, i, drv) { CLEANUP_FREE char *file = NULL, *escaped_file = NULL, *param = NULL; if (!drv->overlay) { /* Make the file= parameter. */ file = guestfs___drive_source_qemu_param (g, &drv->src); escaped_file = qemu_escape_param (g, file); /* Make the first part of the -drive parameter, everything up to * the if=... at the end. */ param = safe_asprintf (g, "file=%s%s,cache=%s%s%s%s%s,id=hd%zu", escaped_file, drv->readonly ? ",snapshot=on" : "", drv->cachemode ? drv->cachemode : "writeback", drv->src.format ? ",format=" : "", drv->src.format ? drv->src.format : "", drv->disk_label ? ",serial=" : "", drv->disk_label ? drv->disk_label : "", i); } else { /* Writable qcow2 overlay on top of read-only drive. */ escaped_file = qemu_escape_param (g, drv->overlay); param = safe_asprintf (g, "file=%s,cache=unsafe,format=qcow2%s%s,id=hd%zu", escaped_file, drv->disk_label ? ",serial=" : "", drv->disk_label ? drv->disk_label : "", i); } /* If there's an explicit 'iface', use it. Otherwise default to * virtio-scsi if available. Otherwise default to virtio-blk. */ if (drv->iface && STREQ (drv->iface, "virtio")) /* virtio-blk */ goto virtio_blk; #if defined(__arm__) || defined(__powerpc__) else if (drv->iface && STREQ (drv->iface, "ide")) { error (g, "'ide' interface does not work on ARM or PowerPC"); goto cleanup0; } #endif else if (drv->iface) { ADD_CMDLINE ("-drive"); ADD_CMDLINE_PRINTF ("%s,if=%s", param, drv->iface); } else if (virtio_scsi) { ADD_CMDLINE ("-drive"); ADD_CMDLINE_PRINTF ("%s,if=none" /* sic */, param); ADD_CMDLINE ("-device"); ADD_CMDLINE_PRINTF ("scsi-hd,drive=hd%zu", i); } else { virtio_blk: ADD_CMDLINE ("-drive"); ADD_CMDLINE_PRINTF ("%s,if=none" /* sic */, param); ADD_CMDLINE ("-device"); ADD_CMDLINE_PRINTF (VIRTIO_BLK ",drive=hd%zu", i); } }