static int launch_unix (guestfs_h *g, void *datav, const char *sockpath) { int r, daemon_sock = -1; struct sockaddr_un addr; uint32_t size; void *buf = NULL; if (g->hv_params) { error (g, _("cannot set hv parameters with the 'unix:' backend")); return -1; } if (g->verbose) guestfs_int_print_timestamped_message (g, "connecting to %s", sockpath); daemon_sock = socket (AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); if (daemon_sock == -1) { perrorf (g, "socket"); return -1; } addr.sun_family = AF_UNIX; strncpy (addr.sun_path, sockpath, UNIX_PATH_MAX); addr.sun_path[UNIX_PATH_MAX-1] = '\0'; g->state = LAUNCHING; if (connect (daemon_sock, (struct sockaddr *) &addr, sizeof addr) == -1) { perrorf (g, "bind"); goto cleanup; } g->conn = guestfs_int_new_conn_socket_connected (g, daemon_sock, -1); if (!g->conn) goto cleanup; /* g->conn now owns this socket. */ daemon_sock = -1; r = guestfs_int_recv_from_daemon (g, &size, &buf); free (buf); if (r == -1) goto cleanup; if (size != GUESTFS_LAUNCH_FLAG) { error (g, _("guestfs_launch failed, unexpected initial message from guestfsd")); goto cleanup; } if (g->verbose) guestfs_int_print_timestamped_message (g, "connected"); if (g->state != READY) { error (g, _("contacted guestfsd, but state != READY")); goto cleanup; } return 0; cleanup: if (daemon_sock >= 0) close (daemon_sock); if (g->conn) { g->conn->ops->free_connection (g, g->conn); g->conn = NULL; } return -1; }
/* Build supermin appliance from supermin_path to $TMPDIR/.guestfs-$UID. * * Returns: * 0 = built * -1 = error (aborts launch) */ static int build_supermin_appliance (guestfs_h *g, const char *supermin_path, uid_t uid, char **kernel, char **dtb, char **initrd, char **appliance) { CLEANUP_FREE char *tmpdir = guestfs_get_cachedir (g); struct stat statbuf; size_t len; /* len must be longer than the length of any pathname we can * generate in this function. */ len = strlen (tmpdir) + 128; char cachedir[len]; snprintf (cachedir, len, "%s/.guestfs-%ju", tmpdir, (uintmax_t) uid); char lockfile[len]; snprintf (lockfile, len, "%s/lock", cachedir); char appliancedir[len]; snprintf (appliancedir, len, "%s/appliance.d", cachedir); ignore_value (mkdir (cachedir, 0755)); ignore_value (chmod (cachedir, 0755)); /* RHBZ#921292 */ /* See if the cache directory exists and passes some simple checks * to make sure it has not been tampered with. */ if (lstat (cachedir, &statbuf) == -1) return 0; if (statbuf.st_uid != uid) { error (g, _("security: cached appliance %s is not owned by UID %ju"), cachedir, (uintmax_t) uid); return -1; } if (!S_ISDIR (statbuf.st_mode)) { error (g, _("security: cached appliance %s is not a directory (mode %o)"), cachedir, statbuf.st_mode); return -1; } if ((statbuf.st_mode & 0022) != 0) { error (g, _("security: cached appliance %s is writable by group or other (mode %o)"), cachedir, statbuf.st_mode); return -1; } (void) utimes (cachedir, NULL); if (g->verbose) guestfs_int_print_timestamped_message (g, "begin building supermin appliance"); /* Build the appliance if it needs to be built. */ if (g->verbose) guestfs_int_print_timestamped_message (g, "run supermin"); if (run_supermin_build (g, lockfile, appliancedir, supermin_path) == -1) return -1; if (g->verbose) guestfs_int_print_timestamped_message (g, "finished building supermin appliance"); /* Return the appliance filenames. */ *kernel = safe_malloc (g, len); #ifdef DTB_WILDCARD *dtb = safe_malloc (g, len); #else *dtb = NULL; #endif *initrd = safe_malloc (g, len); *appliance = safe_malloc (g, len); snprintf (*kernel, len, "%s/kernel", appliancedir); #ifdef DTB_WILDCARD snprintf (*dtb, len, "%s/dtb", appliancedir); #endif snprintf (*initrd, len, "%s/initrd", appliancedir); snprintf (*appliance, len, "%s/root", appliancedir); /* Touch the files so they don't get deleted (as they are in /var/tmp). */ (void) utimes (*kernel, NULL); #ifdef DTB_WILDCARD (void) utimes (*dtb, NULL); #endif (void) utimes (*initrd, NULL); /* Checking backend != "uml" is a big hack. UML encodes the mtime * of the original backing file (in this case, the appliance) in the * COW file, and checks it when adding it to the VM. If there are * multiple threads running and one touches the appliance here, it * will disturb the mtime and UML will give an error. * * We can get rid of this hack as soon as UML fixes the * ubdN=cow,original parsing bug, since we won't need to run * uml_mkcow separately, so there is no possible race. * * XXX */ if (STRNEQ (g->backend, "uml")) (void) utimes (*appliance, NULL); return 0; }
static int launch_uml (guestfs_h *g, void *datav, const char *arg) { struct backend_uml_data *data = datav; CLEANUP_FREE_STRINGSBUF DECLARE_STRINGSBUF (cmdline); int console_sock = -1, daemon_sock = -1; int r; int csv[2], dsv[2]; CLEANUP_FREE char *kernel = NULL, *initrd = NULL, *appliance = NULL; int has_appliance_drive; CLEANUP_FREE char *appliance_cow = NULL; uint32_t size; CLEANUP_FREE void *buf = NULL; struct drive *drv; size_t i; struct hv_param *hp; char *term = getenv ("TERM"); if (!uml_supported (g)) return -1; if (!g->nr_drives) { error (g, _("you must call guestfs_add_drive before guestfs_launch")); return -1; } /* Assign a random unique ID to this run. */ if (guestfs_int_random_string (data->umid, UML_UMID_LEN) == -1) { perrorf (g, "guestfs_int_random_string"); return -1; } /* Locate and/or build the appliance. */ if (guestfs_int_build_appliance (g, &kernel, &initrd, &appliance) == -1) return -1; has_appliance_drive = appliance != NULL; /* Create COW overlays for the appliance. Note that the documented * syntax ubd0=cow,orig does not work since kernel 3.3. See: * http://thread.gmane.org/gmane.linux.uml.devel/13556 */ if (has_appliance_drive) { appliance_cow = make_cow_overlay (g, appliance); if (!appliance_cow) goto cleanup0; } /* The socket that the daemon will talk to us on. */ if (socketpair (AF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC, 0, dsv) == -1) { perrorf (g, "socketpair"); goto cleanup0; } /* The console socket. */ if (!g->direct_mode) { if (socketpair (AF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC, 0, csv) == -1) { perrorf (g, "socketpair"); close (dsv[0]); close (dsv[1]); goto cleanup0; } } /* Construct the vmlinux command line. We have to do this before * forking, because after fork we are not allowed to use * non-signal-safe functions such as malloc. */ #define ADD_CMDLINE(str) \ guestfs_int_add_string (g, &cmdline, (str)) #define ADD_CMDLINE_PRINTF(fs,...) \ guestfs_int_add_sprintf (g, &cmdline, (fs), ##__VA_ARGS__) ADD_CMDLINE (g->hv); /* Give this instance a unique random ID. */ ADD_CMDLINE_PRINTF ("umid=%s", data->umid); /* Set memory size. */ ADD_CMDLINE_PRINTF ("mem=%dM", g->memsize); /* vmlinux appears to ignore this, but let's add it anyway. */ ADD_CMDLINE_PRINTF ("initrd=%s", initrd); /* Make sure our appliance init script runs first. */ ADD_CMDLINE ("init=/init"); /* This tells the /init script not to reboot at the end. */ ADD_CMDLINE ("guestfs_noreboot=1"); /* Root filesystem should be mounted read-write (default seems to * be "ro"). */ ADD_CMDLINE ("rw"); /* See also guestfs_int_appliance_command_line. */ if (g->verbose) ADD_CMDLINE ("guestfs_verbose=1"); ADD_CMDLINE ("panic=1"); ADD_CMDLINE_PRINTF ("TERM=%s", term ? term : "linux"); if (g->selinux) ADD_CMDLINE ("selinux=1 enforcing=0"); else ADD_CMDLINE ("selinux=0"); /* XXX This isn't quite right. Multiple append args won't work. */ if (g->append) ADD_CMDLINE (g->append); /* Add the drives. */ ITER_DRIVES (g, i, drv) { if (!drv->overlay) ADD_CMDLINE_PRINTF ("ubd%zu=%s", i, drv->src.u.path); else ADD_CMDLINE_PRINTF ("ubd%zu=%s", i, drv->overlay); } /* Add the ext2 appliance drive (after all the drives). */ if (has_appliance_drive) { char drv_name[64] = "ubd"; guestfs_int_drive_name (g->nr_drives, &drv_name[3]); ADD_CMDLINE_PRINTF ("ubd%zu=%s", g->nr_drives, appliance_cow); ADD_CMDLINE_PRINTF ("root=/dev/%s", drv_name); } /* Create the daemon socket. */ ADD_CMDLINE_PRINTF ("ssl3=fd:%d", dsv[1]); ADD_CMDLINE ("guestfs_channel=/dev/ttyS3"); /* Add any vmlinux parameters. */ for (hp = g->hv_params; hp; hp = hp->next) { ADD_CMDLINE (hp->hv_param); if (hp->hv_value) ADD_CMDLINE (hp->hv_value); } /* Finish off the command line. */ guestfs_int_end_stringsbuf (g, &cmdline); r = fork (); if (r == -1) { perrorf (g, "fork"); if (!g->direct_mode) { close (csv[0]); close (csv[1]); } close (dsv[0]); close (dsv[1]); goto cleanup0; } if (r == 0) { /* Child (vmlinux). */ /* Set up the daemon socket for the child. */ close (dsv[0]); set_cloexec_flag (dsv[1], 0); /* so it doesn't close across exec */ if (!g->direct_mode) { /* Set up stdin, stdout, stderr. */ close (0); close (1); close (csv[0]); /* We set the FD_CLOEXEC flag on the socket above, but now (in * the child) it's safe to unset this flag so vmlinux can use the * socket. */ set_cloexec_flag (csv[1], 0); /* Stdin. */ if (dup (csv[1]) == -1) { dup_failed: perror ("dup failed"); _exit (EXIT_FAILURE); } /* Stdout. */ if (dup (csv[1]) == -1) goto dup_failed; /* Send stderr to the pipe as well. */ close (2); if (dup (csv[1]) == -1) goto dup_failed; close (csv[1]); /* RHBZ#1123007 */ close_file_descriptors (fd > 2 && fd != dsv[1]); } /* Dump the command line (after setting up stderr above). */ if (g->verbose) print_vmlinux_command_line (g, cmdline.argv); /* Put vmlinux in a new process group. */ if (g->pgroup) setpgid (0, 0); setenv ("LC_ALL", "C", 1); execv (g->hv, cmdline.argv); /* Run vmlinux. */ perror (g->hv); _exit (EXIT_FAILURE); } /* Parent (library). */ data->pid = r; /* Fork the recovery process off which will kill vmlinux if the * parent process fails to do so (eg. if the parent segfaults). */ data->recoverypid = -1; if (g->recovery_proc) { r = fork (); if (r == 0) { struct sigaction sa; pid_t vmlinux_pid = data->pid; pid_t parent_pid = getppid (); /* Remove all signal handlers. See the justification here: * https://www.redhat.com/archives/libvir-list/2008-August/msg00303.html * We don't mask signal handlers yet, so this isn't completely * race-free, but better than not doing it at all. */ memset (&sa, 0, sizeof sa); sa.sa_handler = SIG_DFL; sa.sa_flags = 0; sigemptyset (&sa.sa_mask); for (i = 1; i < NSIG; ++i) sigaction (i, &sa, NULL); /* Close all other file descriptors. This ensures that we don't * hold open (eg) pipes from the parent process. */ close_file_descriptors (1); /* It would be nice to be able to put this in the same process * group as vmlinux (ie. setpgid (0, vmlinux_pid)). However * this is not possible because we don't have any guarantee here * that the vmlinux process has started yet. */ if (g->pgroup) setpgid (0, 0); /* Writing to argv is hideously complicated and error prone. See: * http://git.postgresql.org/gitweb/?p=postgresql.git;a=blob;f=src/backend/utils/misc/ps_status.c;hb=HEAD */ /* Loop around waiting for one or both of the other processes to * disappear. It's fair to say this is very hairy. The PIDs that * we are looking at might be reused by another process. We are * effectively polling. Is the cure worse than the disease? */ for (;;) { if (kill (vmlinux_pid, 0) == -1) /* vmlinux's gone away, we aren't needed */ _exit (EXIT_SUCCESS); if (kill (parent_pid, 0) == -1) { /* Parent's gone away, vmlinux still around, so kill vmlinux. */ kill (data->pid, SIGKILL); _exit (EXIT_SUCCESS); } sleep (2); } } /* Don't worry, if the fork failed, this will be -1. The recovery * process isn't essential. */ data->recoverypid = r; } if (!g->direct_mode) { /* Close the other end of the console socketpair. */ close (csv[1]); console_sock = csv[0]; /* stdin of child */ csv[0] = -1; } daemon_sock = dsv[0]; close (dsv[1]); dsv[0] = -1; g->state = LAUNCHING; /* Wait for vmlinux to start and to connect back to us via * virtio-serial and send the GUESTFS_LAUNCH_FLAG message. */ g->conn = guestfs_int_new_conn_socket_connected (g, daemon_sock, console_sock); if (!g->conn) goto cleanup1; /* g->conn now owns these sockets. */ daemon_sock = console_sock = -1; /* We now have to wait for vmlinux to start up, the daemon to start * running, and for it to send the GUESTFS_LAUNCH_FLAG to us. */ r = guestfs_int_recv_from_daemon (g, &size, &buf); if (r == -1) { guestfs_int_launch_failed_error (g); goto cleanup1; } if (size != GUESTFS_LAUNCH_FLAG) { guestfs_int_launch_failed_error (g); goto cleanup1; } if (g->verbose) guestfs_int_print_timestamped_message (g, "appliance is up"); /* This is possible in some really strange situations, such as * guestfsd starts up OK but then vmlinux immediately exits. Check * for it because the caller is probably expecting to be able to * send commands after this function returns. */ if (g->state != READY) { error (g, _("vmlinux launched and contacted daemon, but state != READY")); goto cleanup1; } if (has_appliance_drive) guestfs_int_add_dummy_appliance_drive (g); return 0; cleanup1: if (!g->direct_mode && csv[0] >= 0) close (csv[0]); if (dsv[0] >= 0) close (dsv[0]); if (data->pid > 0) kill (data->pid, SIGKILL); if (data->recoverypid > 0) kill (data->recoverypid, SIGKILL); if (data->pid > 0) waitpid (data->pid, NULL, 0); if (data->recoverypid > 0) waitpid (data->recoverypid, NULL, 0); data->pid = 0; data->recoverypid = 0; memset (&g->launch_t, 0, sizeof g->launch_t); cleanup0: if (daemon_sock >= 0) close (daemon_sock); if (console_sock >= 0) close (console_sock); if (g->conn) { g->conn->ops->free_connection (g, g->conn); g->conn = NULL; } g->state = CONFIG; return -1; }