static bool test_unpriv_remount(const char *fstype, const char *mount_options, int mount_flags, int remount_flags, int invalid_flags) { pid_t child; child = fork(); if (child == -1) { die("fork failed: %s\n", strerror(errno)); } if (child != 0) { /* parent */ pid_t pid; int status; pid = waitpid(child, &status, 0); if (pid == -1) { die("waitpid failed: %s\n", strerror(errno)); } if (pid != child) { die("waited for %d got %d\n", child, pid); } if (!WIFEXITED(status)) { die("child did not terminate cleanly\n"); } return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false; } create_and_enter_userns(); if (unshare(CLONE_NEWNS) != 0) { die("unshare(CLONE_NEWNS) failed: %s\n", strerror(errno)); } if (mount("testing", "/tmp", fstype, mount_flags, mount_options) != 0) { die("mount of %s with options '%s' on /tmp failed: %s\n", fstype, mount_options? mount_options : "", strerror(errno)); } create_and_enter_userns(); if (unshare(CLONE_NEWNS) != 0) { die("unshare(CLONE_NEWNS) failed: %s\n", strerror(errno)); } if (mount("/tmp", "/tmp", "none", MS_REMOUNT | MS_BIND | remount_flags, NULL) != 0) { /* system("cat /proc/self/mounts"); */ die("remount of /tmp failed: %s\n", strerror(errno)); } if (mount("/tmp", "/tmp", "none", MS_REMOUNT | MS_BIND | invalid_flags, NULL) == 0) { /* system("cat /proc/self/mounts"); */ die("remount of /tmp with invalid flags " "succeeded unexpectedly\n"); } exit(EXIT_SUCCESS); }
int sandbox(void* sandbox_arg) { // Get rid of unused parameter warning (void)sandbox_arg; pid_t child_pid = getpid(); if (arg_debug) printf("Initializing child process\n"); // close each end of the unused pipes close(parent_to_child_fds[1]); close(child_to_parent_fds[0]); // wait for parent to do base setup wait_for_other(parent_to_child_fds[0]); if (arg_debug && child_pid == 1) printf("PID namespace installed\n"); //**************************** // set hostname //**************************** if (cfg.hostname) { if (sethostname(cfg.hostname, strlen(cfg.hostname)) < 0) errExit("sethostname"); } //**************************** // mount namespace //**************************** // mount events are not forwarded between the host the sandbox if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) { chk_chroot(); } //**************************** // log sandbox data //**************************** if (cfg.name) fs_logger2("sandbox name:", cfg.name); fs_logger2int("sandbox pid:", (int) sandbox_pid); if (cfg.chrootdir) fs_logger("sandbox filesystem: chroot"); else if (arg_overlay) fs_logger("sandbox filesystem: overlay"); else fs_logger("sandbox filesystem: local"); fs_logger("install mount namespace"); //**************************** // netfilter etc. //**************************** if (arg_netfilter && any_bridge_configured()) { // assuming by default the client filter netfilter(arg_netfilter_file); } if (arg_netfilter6 && any_bridge_configured()) { // assuming by default the client filter netfilter6(arg_netfilter6_file); } // load IBUS env variables if (arg_nonetwork || any_bridge_configured() || any_interface_configured()) { // do nothing - there are problems with ibus version 1.5.11 } else env_ibus_load(); // grab a copy of cp command fs_build_cp_command(); // trace pre-install if (arg_trace || arg_tracelog) fs_trace_preload(); //**************************** // configure filesystem //**************************** #ifdef HAVE_CHROOT if (cfg.chrootdir) { fs_chroot(cfg.chrootdir); // redo cp command fs_build_cp_command(); // force caps and seccomp if not started as root if (getuid() != 0) { // force default seccomp inside the chroot, no keep or drop list // the list build on top of the default drop list is kept intact arg_seccomp = 1; if (cfg.seccomp_list_drop) { free(cfg.seccomp_list_drop); cfg.seccomp_list_drop = NULL; } if (cfg.seccomp_list_keep) { free(cfg.seccomp_list_keep); cfg.seccomp_list_keep = NULL; } // disable all capabilities if (arg_caps_default_filter || arg_caps_list) fprintf(stderr, "Warning: all capabilities disabled for a regular user during chroot\n"); arg_caps_drop_all = 1; // drop all supplementary groups; /etc/group file inside chroot // is controlled by a regular usr arg_nogroups = 1; if (!arg_quiet) printf("Dropping all Linux capabilities and enforcing default seccomp filter\n"); } else arg_seccomp = 1; //**************************** // trace pre-install, this time inside chroot //**************************** if (arg_trace || arg_tracelog) fs_trace_preload(); } else #endif if (arg_overlay) fs_overlayfs(); else fs_basic_fs(); //**************************** // set hostname in /etc/hostname //**************************** if (cfg.hostname) { fs_hostname(cfg.hostname); } //**************************** // private mode //**************************** if (arg_private) { if (cfg.home_private) // --private= fs_private_homedir(); else if (cfg.home_private_keep) // --private-home= fs_private_home_list(); else // --private fs_private(); } if (arg_private_dev) fs_private_dev(); if (arg_private_etc) { fs_private_etc_list(); // create /etc/ld.so.preload file again if (arg_trace || arg_tracelog) fs_trace_preload(); } if (arg_private_bin) fs_private_bin_list(); //**************************** // apply the profile file //**************************** if (cfg.profile) { // apply all whitelist commands ... fs_whitelist(); // ... followed by blacklist commands fs_blacklist(); } //**************************** // install trace //**************************** if (arg_trace || arg_tracelog) fs_trace(); //**************************** // update /proc, /dev, /boot directorymy //**************************** fs_proc_sys_dev_boot(); //**************************** // --nosound and fix for pulseaudio 7.0 //**************************** if (arg_nosound) pulseaudio_disable(); else pulseaudio_init(); //**************************** // networking //**************************** if (arg_nonetwork) { net_if_up("lo"); if (arg_debug) printf("Network namespace enabled, only loopback interface available\n"); } else if (any_bridge_configured() || any_interface_configured()) { // configure lo and eth0...eth3 net_if_up("lo"); if (mac_not_zero(cfg.bridge0.macsandbox)) net_config_mac(cfg.bridge0.devsandbox, cfg.bridge0.macsandbox); sandbox_if_up(&cfg.bridge0); if (mac_not_zero(cfg.bridge1.macsandbox)) net_config_mac(cfg.bridge1.devsandbox, cfg.bridge1.macsandbox); sandbox_if_up(&cfg.bridge1); if (mac_not_zero(cfg.bridge2.macsandbox)) net_config_mac(cfg.bridge2.devsandbox, cfg.bridge2.macsandbox); sandbox_if_up(&cfg.bridge2); if (mac_not_zero(cfg.bridge3.macsandbox)) net_config_mac(cfg.bridge3.devsandbox, cfg.bridge3.macsandbox); sandbox_if_up(&cfg.bridge3); // add a default route if (cfg.defaultgw) { // set the default route if (net_add_route(0, 0, cfg.defaultgw)) fprintf(stderr, "Warning: cannot configure default route\n"); } // enable interfaces if (cfg.interface0.configured && cfg.interface0.ip) { if (arg_debug) printf("Configuring %d.%d.%d.%d address on interface %s\n", PRINT_IP(cfg.interface0.ip), cfg.interface0.dev); net_if_ip(cfg.interface0.dev, cfg.interface0.ip, cfg.interface0.mask, cfg.interface0.mtu); net_if_up(cfg.interface0.dev); } if (cfg.interface1.configured && cfg.interface1.ip) { if (arg_debug) printf("Configuring %d.%d.%d.%d address on interface %s\n", PRINT_IP(cfg.interface1.ip), cfg.interface1.dev); net_if_ip(cfg.interface1.dev, cfg.interface1.ip, cfg.interface1.mask, cfg.interface1.mtu); net_if_up(cfg.interface1.dev); } if (cfg.interface2.configured && cfg.interface2.ip) { if (arg_debug) printf("Configuring %d.%d.%d.%d address on interface %s\n", PRINT_IP(cfg.interface2.ip), cfg.interface2.dev); net_if_ip(cfg.interface2.dev, cfg.interface2.ip, cfg.interface2.mask, cfg.interface2.mtu); net_if_up(cfg.interface2.dev); } if (cfg.interface3.configured && cfg.interface3.ip) { if (arg_debug) printf("Configuring %d.%d.%d.%d address on interface %s\n", PRINT_IP(cfg.interface3.ip), cfg.interface3.dev); net_if_ip(cfg.interface3.dev, cfg.interface3.ip, cfg.interface3.mask, cfg.interface3.mtu); net_if_up(cfg.interface3.dev); } if (arg_debug) printf("Network namespace enabled\n"); } // if any dns server is configured, it is time to set it now fs_resolvconf(); fs_logger_print(); fs_logger_change_owner(); // print network configuration if (!arg_quiet) { if (any_bridge_configured() || any_interface_configured() || cfg.defaultgw || cfg.dns1) { printf("\n"); if (any_bridge_configured() || any_interface_configured()) net_ifprint(); if (cfg.defaultgw != 0) printf("Default gateway %d.%d.%d.%d\n", PRINT_IP(cfg.defaultgw)); if (cfg.dns1 != 0) printf("DNS server %d.%d.%d.%d\n", PRINT_IP(cfg.dns1)); if (cfg.dns2 != 0) printf("DNS server %d.%d.%d.%d\n", PRINT_IP(cfg.dns2)); if (cfg.dns3 != 0) printf("DNS server %d.%d.%d.%d\n", PRINT_IP(cfg.dns3)); printf("\n"); } } fs_delete_cp_command(); //**************************** // set application environment //**************************** prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); // kill the child in case the parent died int cwd = 0; if (cfg.cwd) { if (chdir(cfg.cwd) == 0) cwd = 1; } if (!cwd) { if (chdir("/") < 0) errExit("chdir"); if (cfg.homedir) { struct stat s; if (stat(cfg.homedir, &s) == 0) { /* coverity[toctou] */ if (chdir(cfg.homedir) < 0) errExit("chdir"); } } } // set environment env_defaults(); // set user-supplied environment variables env_apply(); //**************************** // set security filters //**************************** // set capabilities if (!arg_noroot) set_caps(); // set rlimits set_rlimits(); // set seccomp #ifdef HAVE_SECCOMP // install protocol filter if (cfg.protocol) { protocol_filter(); // install filter protocol_filter_save(); // save filter in PROTOCOL_CFG } // if a keep list is available, disregard the drop list if (arg_seccomp == 1) { if (cfg.seccomp_list_keep) seccomp_filter_keep(); else if (cfg.seccomp_list_errno) seccomp_filter_errno(); else seccomp_filter_drop(); } #endif // set cpu affinity if (cfg.cpus) { save_cpu(); // save cpu affinity mask to CPU_CFG file set_cpu_affinity(); } // save cgroup in CGROUP_CFG file if (cfg.cgroup) save_cgroup(); //**************************************** // drop privileges or create a new user namespace //**************************************** save_nogroups(); if (arg_noroot) { int rv = unshare(CLONE_NEWUSER); if (rv == -1) { fprintf(stderr, "Error: cannot mount a new user namespace\n"); perror("unshare"); drop_privs(arg_nogroups); } } else drop_privs(arg_nogroups); // notify parent that new user namespace has been created so a proper // UID/GID map can be setup notify_other(child_to_parent_fds[1]); close(child_to_parent_fds[1]); // wait for parent to finish setting up a proper UID/GID map wait_for_other(parent_to_child_fds[0]); close(parent_to_child_fds[0]); // somehow, the new user namespace resets capabilities; // we need to do them again if (arg_noroot) { set_caps(); if (arg_debug) printf("noroot user namespace installed\n"); } //**************************************** // fork the application and monitor it //**************************************** pid_t app_pid = fork(); if (app_pid == -1) errExit("fork"); if (app_pid == 0) { prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); // kill the child in case the parent died start_application(); // start app } monitor_application(app_pid); // monitor application return 0; }
static int test_pppoe_server(sd_event *e) { sd_netlink *rtnl; sd_netlink_message *m; pid_t pid; int r, client_ifindex, server_ifindex; r = unshare(CLONE_NEWNET); if (r < 0 && errno == EPERM) return EXIT_TEST_SKIP; assert_se(r >= 0); assert_se(sd_netlink_open(&rtnl) >= 0); assert_se(sd_netlink_attach_event(rtnl, e, 0) >= 0); assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0) >= 0); assert_se(sd_netlink_message_append_string(m, IFLA_IFNAME, "pppoe-server") >= 0); assert_se(sd_netlink_message_open_container(m, IFLA_LINKINFO) >= 0); assert_se(sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, "veth") >= 0); assert_se(sd_netlink_message_open_container(m, VETH_INFO_PEER) >= 0); assert_se(sd_netlink_message_append_string(m, IFLA_IFNAME, "pppoe-client") >= 0); assert_se(sd_netlink_message_close_container(m) >= 0); assert_se(sd_netlink_message_close_container(m) >= 0); assert_se(sd_netlink_message_close_container(m) >= 0); assert_se(sd_netlink_call(rtnl, m, 0, NULL) >= 0); client_ifindex = (int) if_nametoindex("pppoe-client"); assert_se(client_ifindex > 0); server_ifindex = (int) if_nametoindex("pppoe-server"); assert_se(server_ifindex > 0); m = sd_netlink_message_unref(m); assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_SETLINK, client_ifindex) >= 0); assert_se(sd_rtnl_message_link_set_flags(m, IFF_UP, IFF_UP) >= 0); assert_se(sd_netlink_call(rtnl, m, 0, NULL) >= 0); m = sd_netlink_message_unref(m); assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_SETLINK, server_ifindex) >= 0); assert_se(sd_rtnl_message_link_set_flags(m, IFF_UP, IFF_UP) >= 0); assert_se(sd_netlink_call(rtnl, m, 0, NULL) >= 0); pid = fork(); assert_se(pid >= 0); if (pid == 0) { /* let the client send some discover messages before the server is started */ sleep(2); /* TODO: manage pppoe-server-options */ execlp("pppoe-server", "pppoe-server", "-F", "-I", "pppoe-server", "-C", "Test-AC", "-S", "Service-Default", "-S", "Service-First-Auxiliary", "-S", "Service-Second-Auxiliary", NULL); assert_not_reached("failed to execute pppoe-server. not installed?"); } client_run("pppoe-client", e); assert_se(kill(pid, SIGTERM) >= 0); assert_se(wait_for_terminate(pid, NULL) >= 0); assert_se(!sd_netlink_message_unref(m)); assert_se(!sd_netlink_unref(rtnl)); return EXIT_SUCCESS; }
int import_fork_tar_x(const char *path, pid_t *ret) { _cleanup_close_pair_ int pipefd[2] = { -1, -1 }; pid_t pid; int r; assert(path); assert(ret); if (pipe2(pipefd, O_CLOEXEC) < 0) return log_error_errno(errno, "Failed to create pipe for tar: %m"); pid = fork(); if (pid < 0) return log_error_errno(errno, "Failed to fork off tar: %m"); if (pid == 0) { int null_fd; uint64_t retain = (1ULL << CAP_CHOWN) | (1ULL << CAP_FOWNER) | (1ULL << CAP_FSETID) | (1ULL << CAP_MKNOD) | (1ULL << CAP_SETFCAP) | (1ULL << CAP_DAC_OVERRIDE); /* Child */ reset_all_signal_handlers(); reset_signal_mask(); assert_se(prctl(PR_SET_PDEATHSIG, SIGTERM) == 0); pipefd[1] = safe_close(pipefd[1]); if (dup2(pipefd[0], STDIN_FILENO) != STDIN_FILENO) { log_error_errno(errno, "Failed to dup2() fd: %m"); _exit(EXIT_FAILURE); } if (pipefd[0] != STDIN_FILENO) pipefd[0] = safe_close(pipefd[0]); null_fd = open("/dev/null", O_WRONLY|O_NOCTTY); if (null_fd < 0) { log_error_errno(errno, "Failed to open /dev/null: %m"); _exit(EXIT_FAILURE); } if (dup2(null_fd, STDOUT_FILENO) != STDOUT_FILENO) { log_error_errno(errno, "Failed to dup2() fd: %m"); _exit(EXIT_FAILURE); } if (null_fd != STDOUT_FILENO) null_fd = safe_close(null_fd); fd_cloexec(STDIN_FILENO, false); fd_cloexec(STDOUT_FILENO, false); fd_cloexec(STDERR_FILENO, false); if (unshare(CLONE_NEWNET) < 0) log_error_errno(errno, "Failed to lock tar into network namespace, ignoring: %m"); r = capability_bounding_set_drop(~retain, true); if (r < 0) log_error_errno(r, "Failed to drop capabilities, ignoring: %m"); execlp("tar", "tar", "--numeric-owner", "-C", path, "-px", NULL); log_error_errno(errno, "Failed to execute tar: %m"); _exit(EXIT_FAILURE); } pipefd[0] = safe_close(pipefd[0]); r = pipefd[1]; pipefd[1] = -1; *ret = pid; return r; }
int main(gint argc, gchar **argv) { const gchar *self = *argv++; if (argc < 2) { g_message("%s command [arguments...]", self); return 1; } g_autofree const gchar *prefix = create_tmpdir(); pid_t cpid = fork(); if (cpid < 0) fail("fork", errno); else if (cpid == 0) { uid_t uid = getuid(); gid_t gid = getgid(); if (unshare(CLONE_NEWNS | CLONE_NEWUSER) < 0) { int unshare_errno = errno; g_message("Requires Linux version >= 3.19 built with CONFIG_USER_NS"); if (g_file_test("/proc/sys/kernel/unprivileged_userns_clone", G_FILE_TEST_EXISTS)) g_message("Run: sudo sysctl -w kernel.unprivileged_userns_clone=1"); fail("unshare", unshare_errno); } spit("/proc/self/setgroups", "deny"); spit("/proc/self/uid_map", "%d %d 1", uid, uid); spit("/proc/self/gid_map", "%d %d 1", gid, gid); // If there is a /host directory, assume this is nested chrootenv and use it as host instead. gboolean nested_host = g_file_test("/host", G_FILE_TEST_EXISTS | G_FILE_TEST_IS_DIR); g_autofree const gchar *host = nested_host ? "/host" : "/"; bind(host, prefix); // Replace /host by an actual (inner) /host. if (nested_host) { fail_if(g_mkdir("/real-host", 0755)); fail_if(mount("/host/host", "/real-host", NULL, MS_BIND | MS_REC, NULL)); // For some reason umount("/host") returns EBUSY even immediately after // pivot_root. We detach it at least to keep `/proc/mounts` from blowing // up in nested cases. fail_if(umount2("/host", MNT_DETACH)); fail_if(mount("/real-host", "/host", NULL, MS_MOVE, NULL)); fail_if(rmdir("/real-host")); } fail_if(chdir("/")); fail_if(execvp(*argv, argv)); } else { int status; fail_if(waitpid(cpid, &status, 0) != cpid); fail_if(rmdir(prefix)); if (WIFEXITED(status)) return WEXITSTATUS(status); else if (WIFSIGNALED(status)) kill(getpid(), WTERMSIG(status)); return 1; } }
int main(int argc, char **argv) { int ret = -1, fd, status; char path[PATH_MAX]; pid_t pid; if (!getenv("ZDTM_NEWNS")) { if (mount_and_add(cgname, "test") < 0) return -1; if (unshare(CLONE_NEWCGROUP) < 0) { pr_perror("unshare"); goto out; } } test_init(argc, argv); test_daemon(); test_waitsig(); sprintf(path, "name=%s", cgname); /* first check that the task is in zdtmtst:/ */ if (!pid_in_cgroup(getpid(), path, "/")) { fail("pid not in cgroup /"); goto out; } /* now check that the task is in the right place in a ns by setnsing to * someone else's ns and looking there. */ pid = fork(); if (pid < 0) { pr_perror("fork"); goto out; } if (pid == 0) { sprintf(path, "/proc/%d/ns/cgroup", 1); fd = open(path, O_RDONLY); if (fd < 0) { pr_perror("open"); exit(1); } ret = setns(fd, CLONE_NEWCGROUP); close(fd); if (ret < 0) { pr_perror("setns"); exit(1); } sprintf(path, "name=%s", cgname); if (!pid_in_cgroup(getppid(), path, "/test")) { fail("pid not in cgroup %s", path); exit(1); } exit(0); } if (pid != waitpid(pid, &status, 0)) { pr_err("wrong pid"); goto out; } if (!WIFEXITED(status) || WEXITSTATUS(status)) { pr_err("got bad exit status %d\n", status); goto out; } ret = 0; pass(); out: sprintf(path, "%s/%s/test", dirname, cgname); rmdir(path); sprintf(path, "%s/%s", dirname, cgname); umount(path); rmdir(path); rmdir(dirname); return ret; }
int main(int argc, char *argv[]) { int c; int fd; char path[PATH_MAX]; int nsid; int pid; char *cwd = get_current_dir_name(); static struct sched_param sp; while ((c = getopt(argc, argv, "+cdnpa:g:r:vh")) != -1) switch(c) { case 'c': /* close file descriptors except stdin/out/error */ for (fd = getdtablesize(); fd > 2; fd--) close(fd); break; case 'd': /* detach from tty */ if (getpgrp() == getpid()) { switch(fork()) { case -1: perror("fork"); return 1; case 0: /* child */ break; default: /* parent */ return 0; } } setsid(); break; case 'n': /* run in network and mount namespaces */ if (unshare(CLONE_NEWNET|CLONE_NEWNS) == -1) { perror("unshare"); return 1; } /* mount sysfs to pick up the new network namespace */ if (mount("sysfs", "/sys", "sysfs", MS_MGC_VAL, NULL) == -1) { perror("mount"); return 1; } break; case 'p': /* print pid */ printf("\001%d\n", getpid()); fflush(stdout); break; case 'a': /* Attach to pid's network namespace and mount namespace */ pid = atoi(optarg); sprintf(path, "/proc/%d/ns/net", pid); nsid = open(path, O_RDONLY); if (nsid < 0) { perror(path); return 1; } if (setns(nsid, 0) != 0) { perror("setns"); return 1; } /* Plan A: call setns() to attach to mount namespace */ sprintf(path, "/proc/%d/ns/mnt", pid); nsid = open(path, O_RDONLY); if (nsid < 0 || setns(nsid, 0) != 0) { /* Plan B: chroot/chdir into pid's root file system */ sprintf(path, "/proc/%d/root", pid); if (chroot(path) < 0) { perror(path); return 1; } } /* chdir to correct working directory */ if (chdir(cwd) != 0) { perror(cwd); return 1; } break; case 'g': /* Attach to cgroup */ cgroup(optarg); break; case 'r': /* Set RT scheduling priority */ sp.sched_priority = atoi(optarg); if (sched_setscheduler(getpid(), SCHED_RR, &sp) < 0) { perror("sched_setscheduler"); return 1; } break; case 'v': printf("%s\n", VERSION); exit(0); case 'h': usage(argv[0]); exit(0); default: usage(argv[0]); exit(1); } if (optind < argc) { execvp(argv[optind], &argv[optind]); perror(argv[optind]); return 1; } usage(argv[0]); return 0; }
int sc_create_or_join_ns_group(struct sc_ns_group *group, struct sc_apparmor *apparmor, const char *base_snap_name, const char *snap_name) { // Open the mount namespace file. char mnt_fname[PATH_MAX] = { 0 }; sc_must_snprintf(mnt_fname, sizeof mnt_fname, "%s%s", group->name, SC_NS_MNT_FILE); int mnt_fd SC_CLEANUP(sc_cleanup_close) = -1; // NOTE: There is no O_EXCL here because the file can be around but // doesn't have to be a mounted namespace. // // If the mounted namespace is discarded with // sc_discard_preserved_ns_group() it will revert to a regular file. If // snap-confine is killed for whatever reason after the file is created but // before the file is bind-mounted it will also be a regular file. mnt_fd = openat(group->dir_fd, mnt_fname, O_CREAT | O_RDONLY | O_CLOEXEC | O_NOFOLLOW, 0600); if (mnt_fd < 0) { die("cannot open mount namespace file for namespace group %s", group->name); } // Check if we got an nsfs-based or procfs file or a regular file. This can // be reliably tested because nsfs has an unique filesystem type // NSFS_MAGIC. On older kernels that don't support nsfs yet we can look // for PROC_SUPER_MAGIC instead. // We can just ensure that this is the case thanks to fstatfs. struct statfs ns_statfs_buf; if (fstatfs(mnt_fd, &ns_statfs_buf) < 0) { die("cannot perform fstatfs() on the mount namespace file descriptor"); } // Stat the mount namespace as well, this is later used to check if the // namespace is used by other processes if we are considering discarding a // stale namespace. struct stat ns_stat_buf; if (fstat(mnt_fd, &ns_stat_buf) < 0) { die("cannot perform fstat() on the mount namespace file descriptor"); } #ifndef NSFS_MAGIC // Account for kernel headers old enough to not know about NSFS_MAGIC. #define NSFS_MAGIC 0x6e736673 #endif if (ns_statfs_buf.f_type == NSFS_MAGIC || ns_statfs_buf.f_type == PROC_SUPER_MAGIC) { // Inspect and perhaps discard the preserved mount namespace. if (sc_inspect_and_maybe_discard_stale_ns (mnt_fd, snap_name, base_snap_name) == EAGAIN) { return EAGAIN; } // Remember the vanilla working directory so that we may attempt to restore it later. char *vanilla_cwd SC_CLEANUP(sc_cleanup_string) = NULL; vanilla_cwd = get_current_dir_name(); if (vanilla_cwd == NULL) { die("cannot get the current working directory"); } // Move to the mount namespace of the snap we're trying to start. debug ("attempting to re-associate the mount namespace with the namespace group %s", group->name); if (setns(mnt_fd, CLONE_NEWNS) < 0) { die("cannot re-associate the mount namespace with namespace group %s", group->name); } debug ("successfully re-associated the mount namespace with the namespace group %s", group->name); // Try to re-locate back to vanilla working directory. This can fail // because that directory is no longer present. if (chdir(vanilla_cwd) != 0) { debug ("cannot remain in %s, moving to the void directory", vanilla_cwd); if (chdir(SC_VOID_DIR) != 0) { die("cannot change directory to %s", SC_VOID_DIR); } debug("successfully moved to %s", SC_VOID_DIR); } return 0; } debug("initializing new namespace group %s", group->name); // Create a new namespace and ask the caller to populate it. // For rationale of forking see this: // https://lists.linuxfoundation.org/pipermail/containers/2013-August/033386.html // // The eventfd created here is used to synchronize the child and the parent // processes. It effectively tells the child to perform the capture // operation. group->event_fd = eventfd(0, EFD_CLOEXEC); if (group->event_fd < 0) { die("cannot create eventfd for mount namespace capture"); } debug("forking support process for mount namespace capture"); // Store the PID of the "parent" process. This done instead of calls to // getppid() because then we can reliably track the PID of the parent even // if the child process is re-parented. pid_t parent = getpid(); // Glibc defines pid as a signed 32bit integer. There's no standard way to // print pid's portably so this is the best we can do. pid_t pid = fork(); debug("forked support process has pid %d", (int)pid); if (pid < 0) { die("cannot fork support process for mount namespace capture"); } if (pid == 0) { // This is the child process which will capture the mount namespace. // // It will do so by bind-mounting the SC_NS_MNT_FILE after the parent // process calls unshare() and finishes setting up the namespace // completely. // Change the hat to a sub-profile that has limited permissions // necessary to accomplish the capture of the mount namespace. debug ("changing apparmor hat of the support process for mount namespace capture"); sc_maybe_aa_change_hat(apparmor, "mount-namespace-capture-helper", 0); // Configure the child to die as soon as the parent dies. In an odd // case where the parent is killed then we don't want to complete our // task or wait for anything. if (prctl(PR_SET_PDEATHSIG, SIGINT, 0, 0, 0) < 0) { die("cannot set parent process death notification signal to SIGINT"); } // Check that parent process is still alive. If this is the case then // we can *almost* reliably rely on the PR_SET_PDEATHSIG signal to wake // us up from eventfd_read() below. In the rare case that the PID numbers // overflow and the now-dead parent PID is recycled we will still hang // forever on the read from eventfd below. debug("ensuring that parent process is still alive"); if (kill(parent, 0) < 0) { switch (errno) { case ESRCH: debug("parent process has already terminated"); abort(); default: die("cannot ensure that parent process is still alive"); break; } } if (fchdir(group->dir_fd) < 0) { die("cannot move process for mount namespace capture to namespace group directory"); } debug ("waiting for a eventfd data from the parent process to continue"); eventfd_t value = 0; sc_enable_sanity_timeout(); if (eventfd_read(group->event_fd, &value) < 0) { die("cannot read expected data from eventfd"); } sc_disable_sanity_timeout(); debug ("capturing mount namespace of process %d in namespace group %s", (int)parent, group->name); char src[PATH_MAX] = { 0 }; char dst[PATH_MAX] = { 0 }; sc_must_snprintf(src, sizeof src, "/proc/%d/ns/mnt", (int)parent); sc_must_snprintf(dst, sizeof dst, "%s%s", group->name, SC_NS_MNT_FILE); if (mount(src, dst, NULL, MS_BIND, NULL) < 0) { die("cannot bind-mount the mount namespace file %s -> %s", src, dst); } debug ("successfully captured mount namespace in namespace group %s", group->name); exit(0); } else { group->child = pid; // Unshare the mount namespace and set a flag instructing the caller that // the namespace is pristine and needs to be populated now. debug("unsharing the mount namespace"); if (unshare(CLONE_NEWNS) < 0) { die("cannot unshare the mount namespace"); } group->should_populate = true; } return 0; }
static int test_pidfd_send_signal_recycled_pid_fail(void) { int i, ret; pid_t pid1; const char *test_name = "pidfd_send_signal signal recycled pid"; ret = unshare(CLONE_NEWPID); if (ret < 0) ksft_exit_fail_msg("%s test: Failed to unshare pid namespace\n", test_name); ret = unshare(CLONE_NEWNS); if (ret < 0) ksft_exit_fail_msg( "%s test: Failed to unshare mount namespace\n", test_name); ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0); if (ret < 0) ksft_exit_fail_msg("%s test: Failed to remount / private\n", test_name); /* pid 1 in new pid namespace */ pid1 = fork(); if (pid1 < 0) ksft_exit_fail_msg("%s test: Failed to create new process\n", test_name); if (pid1 == 0) { char buf[256]; pid_t pid2; int pidfd = -1; (void)umount2("/proc", MNT_DETACH); ret = mount("proc", "/proc", "proc", 0, NULL); if (ret < 0) _exit(PIDFD_ERROR); /* grab pid PID_RECYCLE */ for (i = 0; i <= PIDFD_MAX_DEFAULT; i++) { pid2 = fork(); if (pid2 < 0) _exit(PIDFD_ERROR); if (pid2 == 0) _exit(PIDFD_PASS); if (pid2 == PID_RECYCLE) { snprintf(buf, sizeof(buf), "/proc/%d", pid2); ksft_print_msg("pid to recycle is %d\n", pid2); pidfd = open(buf, O_DIRECTORY | O_CLOEXEC); } if (wait_for_pid(pid2)) _exit(PIDFD_ERROR); if (pid2 >= PID_RECYCLE) break; } /* * We want to be as predictable as we can so if we haven't been * able to grab pid PID_RECYCLE skip the test. */ if (pid2 != PID_RECYCLE) { /* skip test */ close(pidfd); _exit(PIDFD_SKIP); } if (pidfd < 0) _exit(PIDFD_ERROR); for (i = 0; i <= PIDFD_MAX_DEFAULT; i++) { char c; int pipe_fds[2]; pid_t recycled_pid; int child_ret = PIDFD_PASS; ret = pipe2(pipe_fds, O_CLOEXEC); if (ret < 0) _exit(PIDFD_ERROR); recycled_pid = fork(); if (recycled_pid < 0) _exit(PIDFD_ERROR); if (recycled_pid == 0) { close(pipe_fds[1]); (void)read(pipe_fds[0], &c, 1); close(pipe_fds[0]); _exit(PIDFD_PASS); } /* * Stop the child so we can inspect whether we have * recycled pid PID_RECYCLE. */ close(pipe_fds[0]); ret = kill(recycled_pid, SIGSTOP); close(pipe_fds[1]); if (ret) { (void)wait_for_pid(recycled_pid); _exit(PIDFD_ERROR); } /* * We have recycled the pid. Try to signal it. This * needs to fail since this is a different process than * the one the pidfd refers to. */ if (recycled_pid == PID_RECYCLE) { ret = sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0); if (ret && errno == ESRCH) child_ret = PIDFD_XFAIL; else child_ret = PIDFD_FAIL; } /* let the process move on */ ret = kill(recycled_pid, SIGCONT); if (ret) (void)kill(recycled_pid, SIGKILL); if (wait_for_pid(recycled_pid)) _exit(PIDFD_ERROR); switch (child_ret) { case PIDFD_FAIL: /* fallthrough */ case PIDFD_XFAIL: _exit(child_ret); case PIDFD_PASS: break; default: /* not reached */ _exit(PIDFD_ERROR); } /* * If the user set a custom pid_max limit we could be * in the millions. * Skip the test in this case. */ if (recycled_pid > PIDFD_MAX_DEFAULT) _exit(PIDFD_SKIP); } /* failed to recycle pid */ _exit(PIDFD_SKIP); } ret = wait_for_pid(pid1); switch (ret) { case PIDFD_FAIL: ksft_exit_fail_msg( "%s test: Managed to signal recycled pid %d\n", test_name, PID_RECYCLE); case PIDFD_PASS: ksft_exit_fail_msg("%s test: Failed to recycle pid %d\n", test_name, PID_RECYCLE); case PIDFD_SKIP: ksft_print_msg("%s test: Skipping test\n", test_name); ret = 0; break; case PIDFD_XFAIL: ksft_test_result_pass( "%s test: Failed to signal recycled pid as expected\n", test_name); ret = 0; break; default /* PIDFD_ERROR */: ksft_exit_fail_msg("%s test: Error while running tests\n", test_name); } return ret; }
int main(int argc, char ** argv) { FILE *containerimage_fp; FILE *loop_fp; FILE *config_fp; char *containerimage; char *containername; char *containerpath; char *username; char *command; char *tmpdir; char *loop_dev_lock; char *loop_dev_cache; char *loop_dev = 0; char *config_path; char *tmp_config_string; char cwd[PATH_MAX]; int cwd_fd; int tmpdirlock_fd; int containerimage_fd; int loop_dev_lock_fd; int gid_list_count; int retval = 0; uid_t uid; gid_t gid; gid_t *gid_list; pid_t namespace_fork_pid = 0; struct passwd *pw; //****************************************************************************// // Init //****************************************************************************// signal(SIGINT, sighandler); signal(SIGKILL, sighandler); signal(SIGQUIT, sighandler); openlog("Singularity", LOG_CONS | LOG_NDELAY, LOG_LOCAL0); // Get all user/group info uid = getuid(); gid = getgid(); gid_list_count = getgroups(0, NULL); gid_list = (gid_t *) malloc(sizeof(gid_t) * gid_list_count); if ( getgroups(gid_list_count, gid_list) < 0 ) { fprintf(stderr, "ABORT: Could not obtain current supplementary group list: %s\n", strerror(errno)); return(255); } pw = getpwuid(uid); // Check to make sure we are installed correctly if ( seteuid(0) < 0 ) { fprintf(stderr, "ABORT: Check installation, must be performed by root.\n"); return(255); } // Lets start off as the calling UID if ( seteuid(uid) < 0 ) { fprintf(stderr, "ABORT: Could not set effective uid to %d: %s\n", uid, strerror(errno)); return(255); } if ( setegid(gid) < 0 ) { fprintf(stderr, "ABORT: Could not set effective gid to %d: %s\n", gid, strerror(errno)); return(255); } username = pw->pw_name; containerimage = getenv("SINGULARITY_IMAGE"); command = getenv("SINGULARITY_COMMAND"); unsetenv("SINGULARITY_COMMAND"); unsetenv("SINGULARITY_EXEC"); config_path = (char *) malloc(strlen(SYSCONFDIR) + 30); snprintf(config_path, strlen(SYSCONFDIR) + 30, "%s/singularity/singularity.conf", SYSCONFDIR); // Figure out where we start if ( (cwd_fd = open(".", O_RDONLY)) < 0 ) { fprintf(stderr, "ABORT: Could not open cwd fd (%s)!\n", strerror(errno)); return(1); } if ( getcwd(cwd, PATH_MAX) == NULL ) { fprintf(stderr, "Could not obtain current directory path: %s\n", strerror(errno)); return(1); } if ( containerimage == NULL ) { fprintf(stderr, "ABORT: SINGULARITY_IMAGE undefined!\n"); return(1); } if ( is_file(containerimage) != 0 ) { fprintf(stderr, "ABORT: Container image path is invalid: %s\n", containerimage); return(1); } if ( is_file(config_path) != 0 ) { fprintf(stderr, "ABORT: Configuration file not found: %s\n", config_path); return(255); } if ( is_owner(config_path, 0) != 0 ) { fprintf(stderr, "ABORT: Configuration file is not owned by root: %s\n", config_path); return(255); } // TODO: Offer option to only run containers owned by root (so root can approve // containers) if ( uid == 0 && is_owner(containerimage, 0) < 0 ) { fprintf(stderr, "ABORT: Root should only run containers that root owns!\n"); return(1); } containername = basename(strdup(containerimage)); tmpdir = strjoin("/tmp/.singularity-", file_id(containerimage)); loop_dev_lock = joinpath(tmpdir, "loop_dev.lock"); loop_dev_cache = joinpath(tmpdir, "loop_dev"); containerpath = (char *) malloc(strlen(tmpdir) + 5); snprintf(containerpath, strlen(tmpdir) + 5, "%s/mnt", tmpdir); syslog(LOG_NOTICE, "User=%s[%d], Command=%s, Container=%s, CWD=%s, Arg1=%s", username, uid, command, containerimage, cwd, argv[1]); //****************************************************************************// // Setup //****************************************************************************// if ( ( config_fp = fopen(config_path, "r") ) == NULL ) { fprintf(stderr, "ERROR: Could not open config file %s: %s\n", config_path, strerror(errno)); return(255); } if ( getenv("SINGULARITY_WRITABLE") == NULL ) { if ( ( containerimage_fp = fopen(containerimage, "r") ) == NULL ) { fprintf(stderr, "ERROR: Could not open image read only %s: %s\n", containerimage, strerror(errno)); return(255); } containerimage_fd = fileno(containerimage_fp); if ( flock(containerimage_fd, LOCK_SH | LOCK_NB) < 0 ) { fprintf(stderr, "ABORT: Image is locked by another process\n"); return(5); } } else { if ( ( containerimage_fp = fopen(containerimage, "r+") ) == NULL ) { fprintf(stderr, "ERROR: Could not open image read/write %s: %s\n", containerimage, strerror(errno)); return(255); } containerimage_fd = fileno(containerimage_fp); if ( flock(containerimage_fd, LOCK_EX | LOCK_NB) < 0 ) { fprintf(stderr, "ABORT: Image is locked by another process\n"); return(5); } } //****************************************************************************// // We are now running with escalated privileges until we exec //****************************************************************************// if ( seteuid(0) < 0 ) { fprintf(stderr, "ABORT: Could not escalate effective user privileges %s\n", strerror(errno)); return(255); } if ( setegid(0) < 0 ) { fprintf(stderr, "ABORT: Could not escalate effective group privileges: %s\n", strerror(errno)); return(255); } if ( s_mkpath(tmpdir, 0755) < 0 ) { fprintf(stderr, "ABORT: Could not create temporary directory %s: %s\n", tmpdir, strerror(errno)); return(255); } if ( is_owner(tmpdir, 0) < 0 ) { fprintf(stderr, "ABORT: Container working directory has wrong ownership: %s\n", tmpdir); syslog(LOG_ERR, "Container working directory has wrong ownership: %s", tmpdir); return(255); } tmpdirlock_fd = open(tmpdir, O_RDONLY); if ( tmpdirlock_fd < 0 ) { fprintf(stderr, "ERROR: Could not obtain file descriptor on %s: %s\n", tmpdir, strerror(errno)); return(255); } if ( flock(tmpdirlock_fd, LOCK_SH | LOCK_NB) < 0 ) { fprintf(stderr, "ERROR: Could not obtain shared lock on %s: %s\n", tmpdir, strerror(errno)); return(255); } if ( ( loop_dev_lock_fd = open(loop_dev_lock, O_CREAT | O_RDWR, 0644) ) < 0 ) { fprintf(stderr, "ERROR: Could not open loop_dev_lock %s: %s\n", loop_dev_lock, strerror(errno)); return(255); } if ( s_mkpath(containerpath, 0755) < 0 ) { fprintf(stderr, "ABORT: Could not create directory %s: %s\n", containerpath, strerror(errno)); return(255); } if ( is_owner(containerpath, 0) < 0 ) { fprintf(stderr, "ABORT: Container directory is not root owned: %s\n", containerpath); syslog(LOG_ERR, "Container directory has wrong ownership: %s", tmpdir); return(255); } if ( flock(loop_dev_lock_fd, LOCK_EX | LOCK_NB) == 0 ) { loop_dev = obtain_loop_dev(); if ( ( loop_fp = fopen(loop_dev, "r+") ) < 0 ) { fprintf(stderr, "ERROR: Failed to open loop device %s: %s\n", loop_dev, strerror(errno)); syslog(LOG_ERR, "Failed to open loop device %s: %s", loop_dev, strerror(errno)); return(255); } if ( associate_loop(containerimage_fp, loop_fp, 1) < 0 ) { fprintf(stderr, "ERROR: Could not associate %s to loop device %s\n", containerimage, loop_dev); syslog(LOG_ERR, "Failed to associate %s to loop device %s", containerimage, loop_dev); return(255); } if ( fileput(loop_dev_cache, loop_dev) < 0 ) { fprintf(stderr, "ERROR: Could not write to loop_dev_cache %s: %s\n", loop_dev_cache, strerror(errno)); return(255); } flock(loop_dev_lock_fd, LOCK_SH | LOCK_NB); } else { flock(loop_dev_lock_fd, LOCK_SH); if ( ( loop_dev = filecat(loop_dev_cache) ) == NULL ) { fprintf(stderr, "ERROR: Could not retrieve loop_dev_cache from %s\n", loop_dev_cache); return(255); } if ( ( loop_fp = fopen(loop_dev, "r") ) < 0 ) { fprintf(stderr, "ERROR: Failed to open loop device %s: %s\n", loop_dev, strerror(errno)); return(255); } } //****************************************************************************// // Management fork //****************************************************************************// namespace_fork_pid = fork(); if ( namespace_fork_pid == 0 ) { //****************************************************************************// // Setup namespaces //****************************************************************************// if ( unshare(CLONE_NEWNS) < 0 ) { fprintf(stderr, "ABORT: Could not virtualize mount namespace: %s\n", strerror(errno)); return(255); } // Privatize the mount namespaces (thank you for the pointer Doug Jacobsen!) if ( mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0 ) { // I am not sure if this error needs to be caught, maybe it will fail // on older kernels? If so, we can fix then. fprintf(stderr, "ABORT: Could not make mountspaces private: %s\n", strerror(errno)); return(255); } #ifdef NS_CLONE_NEWPID if ( getenv("SINGULARITY_NO_NAMESPACE_PID") == NULL ) { unsetenv("SINGULARITY_NO_NAMESPACE_PID"); if ( unshare(CLONE_NEWPID) < 0 ) { fprintf(stderr, "ABORT: Could not virtualize PID namespace: %s\n", strerror(errno)); return(255); } } #else #ifdef NS_CLONE_PID // This is for older legacy CLONE_PID if ( getenv("SINGULARITY_NO_NAMESPACE_PID") == NULL ) { unsetenv("SINGULARITY_NO_NAMESPACE_PID"); if ( unshare(CLONE_PID) < 0 ) { fprintf(stderr, "ABORT: Could not virtualize PID namespace: %s\n", strerror(errno)); return(255); } } #endif #endif #ifdef NS_CLONE_FS if ( getenv("SINGULARITY_NO_NAMESPACE_FS") == NULL ) { unsetenv("SINGULARITY_NO_NAMESPACE_FS"); if ( unshare(CLONE_FS) < 0 ) { fprintf(stderr, "ABORT: Could not virtualize file system namespace: %s\n", strerror(errno)); return(255); } } #endif #ifdef NS_CLONE_FILES if ( getenv("SINGULARITY_NO_NAMESPACE_FILES") == NULL ) { unsetenv("SINGULARITY_NO_NAMESPACE_FILES"); if ( unshare(CLONE_FILES) < 0 ) { fprintf(stderr, "ABORT: Could not virtualize file descriptor namespace: %s\n", strerror(errno)); return(255); } } #endif //****************************************************************************// // Mount image //****************************************************************************// if ( getenv("SINGULARITY_WRITABLE") == NULL ) { unsetenv("SINGULARITY_WRITABLE"); if ( mount_image(loop_dev, containerpath, 0) < 0 ) { fprintf(stderr, "ABORT: exiting...\n"); return(255); } } else { if ( mount_image(loop_dev, containerpath, 1) < 0 ) { fprintf(stderr, "ABORT: exiting...\n"); return(255); } } //****************************************************************************// // Check image //****************************************************************************// if ( is_exec(joinpath(containerpath, "/bin/sh")) < 0 ) { fprintf(stderr, "ERROR: Container image does not have a valid /bin/sh\n"); return(1); } //****************************************************************************// // Bind mounts //****************************************************************************// if ( getenv("SINGULARITY_CONTAIN") == NULL ) { unsetenv("SINGULARITY_CONTAIN"); rewind(config_fp); while ( ( tmp_config_string = config_get_key_value(config_fp, "bind path") ) != NULL ) { if ( ( is_file(tmp_config_string) != 0 ) && ( is_dir(tmp_config_string) != 0 ) ) { fprintf(stderr, "ERROR: Non existant bind source path: '%s'\n", tmp_config_string); continue; } if ( ( is_file(joinpath(containerpath, tmp_config_string)) != 0 ) && ( is_dir(joinpath(containerpath, tmp_config_string)) != 0 ) ) { fprintf(stderr, "WARNING: Non existant bind container destination path: '%s'\n", tmp_config_string); continue; } if ( mount_bind(tmp_config_string, joinpath(containerpath, tmp_config_string), 0) < 0 ) { fprintf(stderr, "ABORTING!\n"); return(255); } } if (is_file(joinpath(containerpath, "/etc/nsswitch.conf")) == 0 ) { if ( is_file(joinpath(SYSCONFDIR, "/singularity/default-nsswitch.conf")) == 0 ) { if ( mount_bind(joinpath(SYSCONFDIR, "/singularity/default-nsswitch.conf"), joinpath(containerpath, "/etc/nsswitch.conf"), 0) != 0 ) { fprintf(stderr, "ABORT: Could not bind /etc/nsswitch.conf\n"); return(255); } } else { fprintf(stderr, "WARNING: Template /etc/nsswitch.conf does not exist: %s\n", joinpath(SYSCONFDIR, "/singularity/default-nsswitch.conf")); } } if ( uid != 0 ) { // If we are root, no need to mess with passwd or group if (is_file(joinpath(containerpath, "/etc/passwd")) == 0 ) { if ( is_file(joinpath(tmpdir, "/passwd")) < 0 ) { if ( build_passwd(joinpath(containerpath, "/etc/passwd"), joinpath(tmpdir, "/passwd")) < 0 ) { fprintf(stderr, "ABORT: Failed creating template password file\n"); return(255); } } if ( mount_bind(joinpath(tmpdir, "/passwd"), joinpath(containerpath, "/etc/passwd"), 0) < 0 ) { fprintf(stderr, "ABORT: Could not bind /etc/passwd\n"); return(255); } } if (is_file(joinpath(containerpath, "/etc/group")) == 0 ) { if ( is_file(joinpath(tmpdir, "/group")) < 0 ) { if ( build_group(joinpath(containerpath, "/etc/group"), joinpath(tmpdir, "/group")) < 0 ) { fprintf(stderr, "ABORT: Failed creating template group file\n"); return(255); } } if ( mount_bind(joinpath(tmpdir, "/group"), joinpath(containerpath, "/etc/group"), 0) < 0 ) { fprintf(stderr, "ABORT: Could not bind /etc/group\n"); return(255); } } } } //****************************************************************************// // Fork child in new namespaces //****************************************************************************// exec_fork_pid = fork(); if ( exec_fork_pid == 0 ) { //****************************************************************************// // Enter the file system //****************************************************************************// if ( chroot(containerpath) < 0 ) { fprintf(stderr, "ABORT: failed enter CONTAINERIMAGE: %s\n", containerpath); return(255); } if ( chdir("/") < 0 ) { fprintf(stderr, "ABORT: Could not chdir after chroot to /: %s\n", strerror(errno)); return(1); } //****************************************************************************// // Setup real mounts within the container //****************************************************************************// rewind(config_fp); if ( config_get_key_bool(config_fp, "mount proc", 1) > 0 ) { if ( is_dir("/proc") == 0 ) { if ( mount("proc", "/proc", "proc", 0, NULL) < 0 ) { fprintf(stderr, "ABORT: Could not mount /proc: %s\n", strerror(errno)); return(255); } } } rewind(config_fp); if ( config_get_key_bool(config_fp, "mount sys", 1) > 0 ) { if ( is_dir("/sys") == 0 ) { if ( mount("sysfs", "/sys", "sysfs", 0, NULL) < 0 ) { fprintf(stderr, "ABORT: Could not mount /sys: %s\n", strerror(errno)); return(255); } } } //****************************************************************************// // Drop all privileges for good //****************************************************************************// if ( setgroups(gid_list_count, gid_list) < 0 ) { fprintf(stderr, "ABOFT: Could not reset supplementary group list: %s\n", strerror(errno)); return(255); } if ( setregid(gid, gid) < 0 ) { fprintf(stderr, "ABORT: Could not dump real and effective group privileges: %s\n", strerror(errno)); return(255); } if ( setreuid(uid, uid) < 0 ) { fprintf(stderr, "ABORT: Could not dump real and effective user privileges: %s\n", strerror(errno)); return(255); } //****************************************************************************// // Setup final environment //****************************************************************************// // After this, we exist only within the container... Let's make it known! if ( setenv("SINGULARITY_CONTAINER", "true", 0) != 0 ) { fprintf(stderr, "ABORT: Could not set SINGULARITY_CONTAINER to 'true'\n"); return(1); } if ( is_dir(cwd) == 0 ) { if ( chdir(cwd) < 0 ) { fprintf(stderr, "ABORT: Could not chdir to: %s: %s\n", cwd, strerror(errno)); return(1); } } else { if ( fchdir(cwd_fd) < 0 ) { fprintf(stderr, "ABORT: Could not fchdir to cwd: %s\n", strerror(errno)); return(1); } } //****************************************************************************// // Execv to container process //****************************************************************************// if ( command == NULL ) { fprintf(stderr, "No command specified, launching 'shell'\n"); command = strdup("shell"); } if ( strcmp(command, "run") == 0 ) { if ( is_exec("/singularity") == 0 ) { argv[0] = strdup("/singularity"); if ( execv("/singularity", argv) != 0 ) { fprintf(stderr, "ABORT: exec of /bin/sh failed: %s\n", strerror(errno)); } } else { fprintf(stderr, "No Singularity runscript found, launching 'shell'\n"); command = strdup("shell"); } } if ( strcmp(command, "exec") == 0 ) { if ( argc <= 1 ) { fprintf(stderr, "ABORT: Exec requires a command to run\n"); return(1); } if ( execvp(argv[1], &argv[1]) != 0 ) { fprintf(stderr, "ABORT: execvp of '%s' failed: %s\n", argv[1], strerror(errno)); return(1); } } if ( strcmp(command, "shell") == 0 ) { char *prompt; prompt = (char *) malloc(strlen(containername) + 16); snprintf(prompt, strlen(containerimage) + 16, "Singularity/%s> ", containername); setenv("PS1", prompt, 1); if ( is_exec("/bin/bash") == 0 ) { char *args[argc+2]; int i; args[0] = strdup("/bin/bash"); args[1] = strdup("--norc"); args[2] = strdup("--noprofile"); for(i=1; i<=argc; i++) { args[i+2] = argv[i]; } if ( execv("/bin/bash", args) != 0 ) { fprintf(stderr, "ABORT: exec of /bin/bash failed: %s\n", strerror(errno)); } } else { argv[0] = strdup("/bin/sh"); if ( execv("/bin/sh", argv) != 0 ) { fprintf(stderr, "ABORT: exec of /bin/sh failed: %s\n", strerror(errno)); } } } // If we get here... we fail on bad command fprintf(stderr, "ABORT: Unrecognized Singularity command: %s\n", command); return(1); //****************************************************************************// // Outer child waits for inner child //****************************************************************************// } else if ( exec_fork_pid > 0 ) { int tmpstatus; strncpy(argv[0], "Singularity: exec", strlen(argv[0])); if ( seteuid(uid) < 0 ) { fprintf(stderr, "ABORT: Could not set effective user privileges to %d: %s\n", uid, strerror(errno)); return(255); } waitpid(exec_fork_pid, &tmpstatus, 0); retval = WEXITSTATUS(tmpstatus); } else { fprintf(stderr, "ABORT: Could not fork namespace process: %s\n", strerror(errno)); return(255); } return(retval); } else if ( namespace_fork_pid > 0 ) { int tmpstatus; strncpy(argv[0], "Singularity: namespace", strlen(argv[0])); if ( seteuid(uid) < 0 ) { fprintf(stderr, "ABORT: Could not set effective user privileges to %d: %s\n", uid, strerror(errno)); return(255); } waitpid(namespace_fork_pid, &tmpstatus, 0); retval = WEXITSTATUS(tmpstatus); } else { fprintf(stderr, "ABORT: Could not fork management process: %s\n", strerror(errno)); return(255); } //****************************************************************************// // Final wrap up before exiting //****************************************************************************// if ( close(cwd_fd) < 0 ) { fprintf(stderr, "ERROR: Could not close cwd_fd: %s\n", strerror(errno)); retval++; } if ( flock(tmpdirlock_fd, LOCK_EX | LOCK_NB) == 0 ) { close(tmpdirlock_fd); if ( seteuid(0) < 0 ) { fprintf(stderr, "ABORT: Could not re-escalate effective user privileges: %s\n", strerror(errno)); return(255); } if ( s_rmdir(tmpdir) < 0 ) { fprintf(stderr, "WARNING: Could not remove all files in %s: %s\n", tmpdir, strerror(errno)); } // Dissociate loops from here Just in case autoflush didn't work. (void)disassociate_loop(loop_fp); if ( seteuid(uid) < 0 ) { fprintf(stderr, "ABORT: Could not drop effective user privileges: %s\n", strerror(errno)); return(255); } } else { // printf("Not removing tmpdir, lock still\n"); } close(containerimage_fd); close(tmpdirlock_fd); free(loop_dev_lock); free(containerpath); free(tmpdir); closelog(); return(retval); }
int main(int argc, char ** argv) { FILE *loop_fp; FILE *containerimage_fp; char *containerimage; char *mountpoint; char *loop_dev; int retval = 0; uid_t uid = geteuid(); signal(SIGINT, sighandler); signal(SIGKILL, sighandler); signal(SIGQUIT, sighandler); if ( uid != 0 ) { message(ERROR, "Calling user must be root\n"); ABORT(1); } if ( argv[1] == NULL || argv[2] == NULL ) { fprintf(stderr, "USAGE: %s [singularity container image] [mount point] (shell container args)\n", argv[0]); return(1); } containerimage = strdup(argv[1]); mountpoint = strdup(argv[2]); if ( is_file(containerimage) < 0 ) { message(ERROR, "Container image not found: %s\n", containerimage); ABORT(1); } if ( is_dir(mountpoint) < 0 ) { message(ERROR, "Mount point must be a directory: %s\n", mountpoint); ABORT(1); } message(DEBUG, "Opening container image: %s\n", containerimage); if ( ( containerimage_fp = fopen(containerimage, "r+") ) < 0 ) { // Flawfinder: ignore message(ERROR, "Could not open image %s: %s\n", containerimage, strerror(errno)); ABORT(255); } message(DEBUG, "Binding container to loop interface\n"); if ( ( loop_fp = loop_bind(containerimage_fp, &loop_dev, 1)) == NULL ) { message(ERROR, "Could not bind image to loop!\n"); ABORT(255); } message(DEBUG, "Forking namespace child\n"); namespace_fork_pid = fork(); if ( namespace_fork_pid == 0 ) { if ( unshare(CLONE_NEWNS) < 0 ) { message(ERROR, "Could not virtualize mount namespace: %s\n", strerror(errno)); ABORT(255); } if ( mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0 ) { message(ERROR, "Could not make mountspaces private: %s\n", strerror(errno)); ABORT(255); } if ( mount_image(loop_dev, mountpoint, 1) < 0 ) { message(ERROR, "Failed mounting image...\n"); ABORT(255); } message(DEBUG, "Forking exec child\n"); exec_fork_pid = fork(); if ( exec_fork_pid == 0 ) { argv[2] = strdup("/bin/bash"); if ( execv("/bin/bash", &argv[2]) != 0 ) { // Flawfinder: ignore (exec* is necessary) message(ERROR, "Exec of /bin/bash failed: %s\n", strerror(errno)); } // We should never get here, so if we do, make it an error return(-1); } else if ( exec_fork_pid > 0 ) { int tmpstatus; strncpy(argv[0], "Singularity: exec", strlen(argv[0])); // Flawfinder: ignore message(DEBUG, "Waiting for exec child to return\n"); waitpid(exec_fork_pid, &tmpstatus, 0); retval = WEXITSTATUS(tmpstatus); message(DEBUG, "Exec child returned (RETVAL=%d)\n", retval); return(retval); } else { fprintf(stderr, "ABORT: Could not exec child process: %s\n", strerror(errno)); retval++; } } else if ( namespace_fork_pid > 0 ) { int tmpstatus; strncpy(argv[0], "Singularity: namespace", strlen(argv[0])); // Flawfinder: ignore message(DEBUG, "Waiting for namespace child to return\n"); waitpid(namespace_fork_pid, &tmpstatus, 0); retval = WEXITSTATUS(tmpstatus); message(DEBUG, "Namespace child returned (RETVAL=%d)\n", retval); } else { fprintf(stderr, "ABORT: Could not fork management process: %s\n", strerror(errno)); return(255); } return(retval); }
int main(int argc, char *argv[]) { int c; int fd; char path[PATH_MAX]; int pid; int detach = 0; int netns = 0; int mountns = 0; int mountnspid = 0; int pidns = 0; int printpid = 0; int mountprocfs = 0; static struct sched_param sp; while ((c = getopt(argc, argv, "+cdnmiufpa:b:k:j:g:r:vh")) != -1) switch (c) { case 'c': /* close file descriptors except stdin/out/error */ for (fd = getdtablesize(); fd > 2; fd--) close(fd); break; case 'd': /* detach from tty */ detach = 1; /* delay setsid() incase new PID namespace */ break; case 'n': /* run in network namespace */ if (unshare(CLONE_NEWNET) == -1) { perror("unshare"); return 1; } netns = NET_NS_CREATE; break; case 'm': /* run in mount namespace */ if (unshare(CLONE_NEWNS) == -1) { perror("unshare"); return 1; } /* mount sysfs to pick up the new network namespace */ mountns = MOUNT_NS_CREATE; /* delay mount of /sysfs */ break; case 'i': /* run in new PID namespace */ if (unshare(CLONE_NEWPID) == -1) { perror("unshare"); return 1; } pidns = PID_NS_CREATE; /* record creation of PID namespace */ break; case 'u': /* run in new UTS namespace */ if (unshare(CLONE_NEWUTS) == -1) { perror("unshare"); return 1; } break; case 'f': /* mount procfs (for new PID namespaces) */ mountprocfs = TRUE; /* delay mounting proc until new NS established */ break; case 'p': /* print pid */ printpid = TRUE; /* delay printing PID until after NS procesisng*/ break; case 'a': /* Attach to pid's network namespace */ pid = atoi(optarg); sprintf(path, "/proc/%d/ns/net", pid); if (attachToNS(path) != 0) { return 1; } netns = NET_NS_JOIN; break; case 'b': /* Attach to pid's mount namespace */ mountns = MOUNT_NS_JOIN; /* delay joining mount namespace */ mountnspid = atoi(optarg); /* record PID to join */ break; case 'k': /* Attach to pid's PID namespace */ pid = atoi(optarg); sprintf(path, "/proc/%d/ns/pid", pid); if (attachToNS(path) != 0) { return 1; } pidns = PID_NS_JOIN; /* record join of PID namespace */ break; case 'j': /* Attach to pid's UTS namespace */ pid = atoi(optarg); sprintf(path, "/proc/%d/ns/uts", pid); if (attachToNS(path) != 0) { return 1; } break; case 'g': /* Attach to cgroup */ cgroup(optarg); break; case 'r': /* Set RT scheduling priority */ sp.sched_priority = atoi(optarg); if (sched_setscheduler(getpid(), SCHED_RR, &sp) < 0) { perror("sched_setscheduler"); return 1; } break; case 'v': printf("%s\n", VERSION); exit(0); case 'h': usage(argv[0]); exit(0); default: usage(argv[0]); exit(1); } /* fork to create / join PID namespace */ if (pidns == PID_NS_CREATE || pidns == PID_NS_JOIN) { int status = 0; pid_t pid = fork(); switch (pid) { case -1: perror("fork"); return 1; case 0: /* child */ break; default: /* parent */ /* print global PID (not namespace PID)*/ if (printpid == 1) { printf("\001%d\n", pid); fflush(stdout); } /* wait on the PID to handle attachment for 'mx'*/ if (waitpid(pid, &status, 0) == -1) return 1; if (WIFEXITED(status)) /* caught child exit, forward return code*/ return WEXITSTATUS(status); else if (WIFSIGNALED(status)) kill(getpid(), WTERMSIG(status)); /* child exit failed, (although return won't distinguish) */ return 1; } } /* if requested, we are in the new/requested PID namespace */ /* completed performing other namespaces (PID/network) operations */ /* go ahead and join the mount namespace if requested */ if (mountns == MOUNT_NS_JOIN && mountnspid != FALSE) { sprintf(path, "/proc/%d/ns/mnt", pid); if (attachToNS(path) != 0) { return 1; } } /* if mount of procfs requested, check for pidns and mountns */ if (mountprocfs && (pidns != PID_NS_CREATE || mountns != MOUNT_NS_CREATE)) { /* requested procfs, but required PID and/or mount namespace missing */ return 1; } /* mount procfs to pick up the new PID namespace */ if (mountprocfs && (mount("none", "/proc", NULL, MS_PRIVATE | MS_REC, NULL) != 0 || mount("proc", "/proc", "proc", MS_NOSUID | MS_NOEXEC | MS_NODEV, NULL) != 0)) { perror("mount"); } /* mount sysfs to pick up the new PID namespace */ if (netns == NET_NS_CREATE && mountns == MOUNT_NS_CREATE) { if (mount("sysfs", "/sys", "sysfs", MS_MGC_VAL, NULL) == -1) { perror("mount"); return 1; } } /* setsid() if requested & required (not needed if using PID namespace) */ if (detach == 1 && pidns == FALSE) { if (getpgrp() == getpid()) { switch (fork()) { case -1: perror("fork"); return 1; case 0: /* child */ break; default: /* parent */ return 0; } } setsid(); } /* print pid if requested (if in new namespace, we don't print local PID) */ if (printpid == 1 && pidns == 0) { printf("\001%d\n", getpid()); fflush(stdout); } /* launch if requested */ if (optind < argc) { execvp(argv[optind], &argv[optind]); perror(argv[optind]); return 1; } usage(argv[0]); return 0; }
int main(int argc, char **argv) { task_waiter_t lock; pid_t pid = -1; int status = 1; task_waiter_init(&lock); test_init(argc, argv); pid = fork(); if (pid < 0) { pr_perror("fork"); return 1; } if (pid == 0) { int fd; DIR *d; struct dirent *de; if (unshare(CLONE_NEWNS)) { pr_perror("unshare"); return 1; } if (mount(NULL, "/", NULL, MS_PRIVATE | MS_REC, NULL)) { pr_perror("mount"); return 1; } if (mkdir(dirname, 0600) < 0) { pr_perror("mkdir"); return 1; } if (mount(dirname, dirname, NULL, MS_BIND, NULL)) { pr_perror("mount"); return 1; } if (chdir(dirname)) return 1; fd = open("test.ghost", O_CREAT | O_WRONLY, 0600); if (fd < 0) { pr_perror("open"); return 1; } if (unlink("test.ghost")) { pr_perror("unlink"); return 1; } task_waiter_complete(&lock, 1); test_waitsig(); if (close(fd)) { pr_perror("close"); return 1; } d = opendir("."); if (d == NULL) { pr_perror("opendir"); return 1; } while ((de = readdir(d)) != NULL) { if (!strcmp(de->d_name, ".")) continue; if (!strcmp(de->d_name, "..")) continue; pr_err("%s\n", de->d_name); } closedir(d); return 0; } task_waiter_wait4(&lock, 1); test_daemon(); test_waitsig(); kill(pid, SIGTERM); wait(&status); if (status) { fail("Test died"); return 1; } pass(); return 0; }
static bool test_priv_mount_unpriv_remount(void) { pid_t child; int ret; const char *orig_path = "/dev"; const char *dest_path = "/tmp"; int orig_mnt_flags, remount_mnt_flags; child = fork(); if (child == -1) { die("fork failed: %s\n", strerror(errno)); } if (child != 0) { /* parent */ pid_t pid; int status; pid = waitpid(child, &status, 0); if (pid == -1) { die("waitpid failed: %s\n", strerror(errno)); } if (pid != child) { die("waited for %d got %d\n", child, pid); } if (!WIFEXITED(status)) { die("child did not terminate cleanly\n"); } return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false; } orig_mnt_flags = read_mnt_flags(orig_path); create_and_enter_userns(); ret = unshare(CLONE_NEWNS); if (ret != 0) { die("unshare(CLONE_NEWNS) failed: %s\n", strerror(errno)); } ret = mount(orig_path, dest_path, "bind", MS_BIND | MS_REC, NULL); if (ret != 0) { die("recursive bind mount of %s onto %s failed: %s\n", orig_path, dest_path, strerror(errno)); } ret = mount(dest_path, dest_path, "none", MS_REMOUNT | MS_BIND | orig_mnt_flags , NULL); if (ret != 0) { /* system("cat /proc/self/mounts"); */ die("remount of /tmp failed: %s\n", strerror(errno)); } remount_mnt_flags = read_mnt_flags(dest_path); if (orig_mnt_flags != remount_mnt_flags) { die("Mount flags unexpectedly changed during remount of %s originally mounted on %s\n", dest_path, orig_path); } exit(EXIT_SUCCESS); }
int main(int argc, char *argv[]) { int c, pid, ret, status; char buf[1]; int pipe_fds1[2], /* child tells parent it has unshared */ pipe_fds2[2]; /* parent tells child it is mapped and may proceed */ unsigned long flags = CLONE_NEWUSER | CLONE_NEWNS; char ttyname0[256] = {0}, ttyname1[256] = {0}, ttyname2[256] = {0}; char *default_args[] = {"/bin/sh", NULL}; lxc_log_fd = STDERR_FILENO; if (isatty(STDIN_FILENO)) { ret = readlink("/proc/self/fd/0", ttyname0, sizeof(ttyname0)); if (ret < 0) { CMD_SYSERROR("Failed to open stdin"); _exit(EXIT_FAILURE); } ret = readlink("/proc/self/fd/1", ttyname1, sizeof(ttyname1)); if (ret < 0) { CMD_SYSINFO("Failed to open stdout. Continuing"); ttyname1[0] = '\0'; } ret = readlink("/proc/self/fd/2", ttyname2, sizeof(ttyname2)); if (ret < 0) { CMD_SYSINFO("Failed to open stderr. Continuing"); ttyname2[0] = '\0'; } } lxc_list_init(&active_map); while ((c = getopt(argc, argv, "m:h")) != EOF) { switch (c) { case 'm': ret = parse_map(optarg); if (ret < 0) { usage(argv[0]); _exit(EXIT_FAILURE); } break; case 'h': usage(argv[0]); _exit(EXIT_SUCCESS); default: usage(argv[0]); _exit(EXIT_FAILURE); } }; if (lxc_list_empty(&active_map)) { ret = find_default_map(); if (ret < 0) { fprintf(stderr, "Failed to find subuid or subgid allocation\n"); _exit(EXIT_FAILURE); } } argv = &argv[optind]; argc = argc - optind; if (argc < 1) argv = default_args; ret = pipe2(pipe_fds1, O_CLOEXEC); if (ret < 0) { CMD_SYSERROR("Failed to open new pipe"); _exit(EXIT_FAILURE); } ret = pipe2(pipe_fds2, O_CLOEXEC); if (ret < 0) { CMD_SYSERROR("Failed to open new pipe"); close(pipe_fds1[0]); close(pipe_fds1[1]); _exit(EXIT_FAILURE); } pid = fork(); if (pid < 0) { close(pipe_fds1[0]); close(pipe_fds1[1]); close(pipe_fds2[0]); close(pipe_fds2[1]); _exit(EXIT_FAILURE); } if (pid == 0) { close(pipe_fds1[0]); close(pipe_fds2[1]); opentty(ttyname0, STDIN_FILENO); opentty(ttyname1, STDOUT_FILENO); opentty(ttyname2, STDERR_FILENO); ret = unshare(flags); if (ret < 0) { CMD_SYSERROR("Failed to unshare mount and user namespace"); close(pipe_fds1[1]); close(pipe_fds2[0]); _exit(EXIT_FAILURE); } buf[0] = '1'; ret = lxc_write_nointr(pipe_fds1[1], buf, 1); if (ret != 1) { CMD_SYSERROR("Failed to write to pipe file descriptor %d", pipe_fds1[1]); close(pipe_fds1[1]); close(pipe_fds2[0]); _exit(EXIT_FAILURE); } ret = lxc_read_nointr(pipe_fds2[0], buf, 1); if (ret != 1) { CMD_SYSERROR("Failed to read from pipe file descriptor %d", pipe_fds2[0]); close(pipe_fds1[1]); close(pipe_fds2[0]); _exit(EXIT_FAILURE); } close(pipe_fds1[1]); close(pipe_fds2[0]); if (buf[0] != '1') { fprintf(stderr, "Received unexpected value from parent process\n"); _exit(EXIT_FAILURE); } ret = do_child((void *)argv); if (ret < 0) _exit(EXIT_FAILURE); _exit(EXIT_SUCCESS); } close(pipe_fds1[1]); close(pipe_fds2[0]); ret = lxc_read_nointr(pipe_fds1[0], buf, 1); if (ret <= 0) CMD_SYSERROR("Failed to read from pipe file descriptor %d", pipe_fds1[0]); buf[0] = '1'; ret = lxc_map_ids(&active_map, pid); if (ret < 0) fprintf(stderr, "Failed to write id mapping for child process\n"); ret = lxc_write_nointr(pipe_fds2[1], buf, 1); if (ret < 0) { CMD_SYSERROR("Failed to write to pipe file descriptor %d", pipe_fds2[1]); _exit(EXIT_FAILURE); } ret = waitpid(pid, &status, __WALL); if (ret < 0) { CMD_SYSERROR("Failed to wait on child process"); _exit(EXIT_FAILURE); } _exit(WEXITSTATUS(status)); }
/* * Given a lxc_storage (presumably blockdev-based), detect the fstype * by trying mounting (in a private mntns) it. * @lxc_storage: bdev to investigate * @type: preallocated char* in which to write the fstype * @len: length of passed in char* * Returns length of fstype, of -1 on error */ int detect_fs(struct lxc_storage *bdev, char *type, int len) { int ret; int p[2]; size_t linelen; pid_t pid; FILE *f; char *sp1, *sp2, *sp3; const char *l, *srcdev; char devpath[PATH_MAX]; char *line = NULL; if (!bdev || !bdev->src || !bdev->dest) return -1; srcdev = lxc_storage_get_path(bdev->src, bdev->type); ret = pipe(p); if (ret < 0) return -1; if ((pid = fork()) < 0) return -1; if (pid > 0) { int status; close(p[1]); memset(type, 0, len); ret = read(p[0], type, len - 1); close(p[0]); if (ret < 0) { SYSERROR("error reading from pipe"); wait(&status); return -1; } else if (ret == 0) { ERROR("child exited early - fstype not found"); wait(&status); return -1; } wait(&status); type[len - 1] = '\0'; INFO("detected fstype %s for %s", type, srcdev); return ret; } if (unshare(CLONE_NEWNS) < 0) exit(1); if (detect_shared_rootfs()) { if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) { SYSERROR("Failed to make / rslave"); ERROR("Continuing..."); } } ret = mount_unknown_fs(srcdev, bdev->dest, bdev->mntopts); if (ret < 0) { ERROR("failed mounting %s onto %s to detect fstype", srcdev, bdev->dest); exit(1); } l = linkderef(srcdev, devpath); if (!l) exit(1); f = fopen("/proc/self/mounts", "r"); if (!f) exit(1); while (getline(&line, &linelen, f) != -1) { sp1 = strchr(line, ' '); if (!sp1) exit(1); *sp1 = '\0'; if (strcmp(line, l)) continue; sp2 = strchr(sp1 + 1, ' '); if (!sp2) exit(1); *sp2 = '\0'; sp3 = strchr(sp2 + 1, ' '); if (!sp3) exit(1); *sp3 = '\0'; sp2++; if (write(p[1], sp2, strlen(sp2)) != strlen(sp2)) exit(1); exit(0); } exit(1); }
int main(int argc, char *argv[]) { int c; unsigned long flags = CLONE_NEWUSER | CLONE_NEWNS; char ttyname0[256], ttyname1[256], ttyname2[256]; int status; int ret; int pid; char *default_args[] = {"/bin/sh", NULL}; char buf[1]; int pipe1[2], // child tells parent it has unshared pipe2[2]; // parent tells child it is mapped and may proceed memset(ttyname0, '\0', sizeof(ttyname0)); memset(ttyname1, '\0', sizeof(ttyname1)); memset(ttyname2, '\0', sizeof(ttyname2)); if (isatty(0)) { ret = readlink("/proc/self/fd/0", ttyname0, sizeof(ttyname0)); if (ret < 0) { perror("unable to open stdin."); exit(1); } ret = readlink("/proc/self/fd/1", ttyname1, sizeof(ttyname1)); if (ret < 0) { printf("Warning: unable to open stdout, continuing."); memset(ttyname1, '\0', sizeof(ttyname1)); } ret = readlink("/proc/self/fd/2", ttyname2, sizeof(ttyname2)); if (ret < 0) { printf("Warning: unable to open stderr, continuing."); memset(ttyname2, '\0', sizeof(ttyname2)); } } lxc_list_init(&active_map); while ((c = getopt(argc, argv, "m:h")) != EOF) { switch (c) { case 'm': if (parse_map(optarg)) usage(argv[0]); break; case 'h': default: usage(argv[0]); } }; if (lxc_list_empty(&active_map)) { if (find_default_map()) { fprintf(stderr, "You have no allocated subuids or subgids\n"); exit(1); } } argv = &argv[optind]; argc = argc - optind; if (argc < 1) { argv = default_args; argc = 1; } if (pipe(pipe1) < 0 || pipe(pipe2) < 0) { perror("pipe"); exit(1); } if ((pid = fork()) == 0) { // Child. close(pipe1[0]); close(pipe2[1]); opentty(ttyname0, 0); opentty(ttyname1, 1); opentty(ttyname2, 2); ret = unshare(flags); if (ret < 0) { perror("unshare"); return 1; } buf[0] = '1'; if (write(pipe1[1], buf, 1) < 1) { perror("write pipe"); exit(1); } if (read(pipe2[0], buf, 1) < 1) { perror("read pipe"); exit(1); } if (buf[0] != '1') { fprintf(stderr, "parent had an error, child exiting\n"); exit(1); } close(pipe1[1]); close(pipe2[0]); return do_child((void*)argv); } close(pipe1[1]); close(pipe2[0]); if (read(pipe1[0], buf, 1) < 1) { perror("read pipe"); exit(1); } buf[0] = '1'; if (lxc_map_ids(&active_map, pid)) { fprintf(stderr, "error mapping child\n"); ret = 0; } if (write(pipe2[1], buf, 1) < 0) { perror("write to pipe"); exit(1); } if ((ret = waitpid(pid, &status, __WALL)) < 0) { printf("waitpid() returns %d, errno %d\n", ret, errno); exit(1); } exit(WEXITSTATUS(status)); }
int main(int argc, char ** argv) { char *containerimage; char *mountpoint; char *bootstrap_script; char *defintion_script; char *loop_dev; int retval = 0; int containerimage_fd; int loop_fd; uid_t uid = geteuid(); if ( uid != 0 ) { fprintf(stderr, "ABORT: Calling user must be root\n"); return(1); } if ( argv[1] == NULL || argv[2] == NULL ) { fprintf(stderr, "USAGE: %s [singularity container image] [bootstrap definition]\n", argv[0]); return(1); } containerimage = strdup(argv[1]); defintion_script = strdup(argv[2]); bootstrap_script = strjoin(LIBEXECDIR, "/singularity/bootstrap.sh"); mountpoint = getenv("SINGULARITY_BUILD_ROOT"); if ( is_file(containerimage) < 0 ) { fprintf(stderr, "ABORT: Container image not found: %s\n", containerimage); return(1); } if ( is_dir(mountpoint) < 0 ) { fprintf(stderr, "ABORT: Mount point must be a directory: %s\n", mountpoint); return(1); } if ( unshare(CLONE_NEWNS) < 0 ) { fprintf(stderr, "ABORT: Could not virtulize mount namespace\n"); return(255); } if ( mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0 ) { fprintf(stderr, "ABORT: Could not make mountspaces private: %s\n", strerror(errno)); return(255); } if ( ( containerimage_fd = open(containerimage, O_RDWR) ) < 0 ) { fprintf(stderr, "ERROR: Could not open image %s: %s\n", containerimage, strerror(errno)); return(255); } loop_dev = obtain_loop_dev(); if ( ( loop_fd = open(loop_dev, O_RDWR) ) < 0 ) { fprintf(stderr, "ERROR: Failed to open %s: %s\n", loop_dev, strerror(errno)); return(-1); } if ( associate_loop(containerimage_fd, loop_fd) < 0 ) { fprintf(stderr, "ERROR: Could not associate %s to loop device %s\n", containerimage, loop_dev); return(255); } if ( mount_image(loop_dev, mountpoint, 1) < 0 ) { fprintf(stderr, "ABORT: exiting...\n"); return(255); } child_pid = fork(); if ( child_pid == 0 ) { char *exec[4]; exec[0] = strdup("/bin/bash"); exec[1] = strdup(bootstrap_script); exec[2] = strdup(defintion_script); exec[3] = NULL; if ( execv("/bin/bash", exec) != 0 ) { fprintf(stderr, "ABORT: exec of bootstrap failed: %s\n", strerror(errno)); } } else if ( child_pid > 0 ) { int tmpstatus; signal(SIGINT, sighandler); signal(SIGKILL, sighandler); signal(SIGQUIT, sighandler); waitpid(child_pid, &tmpstatus, 0); retval = WEXITSTATUS(tmpstatus); } else { fprintf(stderr, "ABORT: Could not fork child process\n"); retval++; } return(retval); }
int main(int argc, char **argv) { struct bindmnt *bmnt; uid_t uid = getuid(); gid_t gid = getgid(); const char *chrootdir, *cmd, *argv0; char **cmdargs, buf[32]; int c, fd; const struct option longopts[] = { { NULL, 0, NULL, 0 } }; chrootdir = cmd = NULL; argv0 = argv[0]; while ((c = getopt_long(argc, argv, "b:V", longopts, NULL)) != -1) { switch (c) { case 'b': if (optarg == NULL || *optarg == '\0') break; add_bindmount(optarg); break; case 'V': printf("%s\n", XBPS_RELVER); exit(EXIT_SUCCESS); case '?': default: usage(argv0); } } argc -= optind; argv += optind; if (argc < 2) usage(argv0); chrootdir = argv[0]; cmd = argv[1]; cmdargs = argv + 1; /* Never allow chrootdir == / */ if (strcmp(chrootdir, "/") == 0) die("/ is not allowed to be used as chrootdir"); /* Make chrootdir absolute */ if (chrootdir[0] != '/') { char cwd[PATH_MAX-1]; if (getcwd(cwd, sizeof(cwd)) == NULL) die("getcwd"); chrootdir = xbps_xasprintf("%s/%s", cwd, chrootdir); } /* * Unshare from the current process namespaces and set ours. */ if (unshare(CLONE_NEWUSER|CLONE_NEWNS|CLONE_NEWIPC|CLONE_NEWUTS) == -1) { errval = 99; die("unshare"); } /* * Setup uid/gid user mappings and restrict setgroups(). */ if ((fd = open("/proc/self/uid_map", O_RDWR)) == -1) die("failed to open /proc/self/uidmap rw"); if (write(fd, buf, snprintf(buf, sizeof buf, "%u %u 1\n", uid, uid)) == -1) die("failed to write to /proc/self/uid_map"); close(fd); if ((fd = open("/proc/self/setgroups", O_RDWR)) != -1) { if (write(fd, "deny", 4) == -1) die("failed to write to /proc/self/setgroups"); close(fd); } if ((fd = open("/proc/self/gid_map", O_RDWR)) == -1) die("failed to open /proc/self/gid_map rw"); if (write(fd, buf, snprintf(buf, sizeof buf, "%u %u 1\n", gid, gid)) == -1) die("failed to write to /proc/self/gid_map"); close(fd); /* bind mount /proc */ bindmount(chrootdir, "/proc", NULL); /* bind mount /sys */ bindmount(chrootdir, "/sys", NULL); /* bind mount /dev */ bindmount(chrootdir, "/dev", NULL); /* bind mount all user specified mnts */ SIMPLEQ_FOREACH(bmnt, &bindmnt_queue, entries) bindmount(chrootdir, bmnt->src, bmnt->dest); /* move chrootdir to / and chroot to it */ if (chdir(chrootdir) == -1) die("chdir to %s", chrootdir); if (mount(".", ".", NULL, MS_BIND|MS_PRIVATE, NULL) == -1) die("Failed to bind mount %s", chrootdir); if (mount(chrootdir, "/", NULL, MS_MOVE, NULL) == -1) die("Failed to move %s as rootfs", chrootdir); if (chroot(".") == -1) die("Failed to chroot to %s", chrootdir); if (execvp(cmd, cmdargs) == -1) die("Failed to execute command %s", cmd); /* NOTREACHED */ exit(EXIT_FAILURE); }
int main(int argc, char *argv[]) { char** cmdline = &argv[1]; char* args = NULL; if (cmdline[0] && cmdline[0][0] == '-' && argc) { args = argv[1]; cmdline = &argv[2]; argc--; } if (args && strstr(args, "h")) { printf("Usage: %s [-options] command [args]\n\n", argv[0]); printf("Options:\n"); printf(" -m, --mount unshare mounts namespace\n"); printf(" -u, --uts unshare UTS namespace (hostname etc)\n"); printf(" -i, --ipc unshare System V IPC namespace\n"); printf(" -n, --net unshare network namespace\n\n"); printf(" -h, --help display this help and exit\n"); printf(" -V, --version output version information and exit\n"); printf(" note: when -i is used PID isolation is also performed\n"); return 0; } if (argc < 2 || strlen(cmdline[0]) == 0) { printf("Usage: %s [-options] command [args]\n", argv[0]); return 1; } if (args && strstr(args, "V")) { printf("unshare with PID \n", argv[0]); return 0; } int cloneflags=0; if (args && strstr(args, "m")) { unshare(CLONE_NEWNS); } if (args && strstr(args, "n")) { unshare(CLONE_NEWNET); } if (args && strstr(args, "u")) { unshare(CLONE_NEWUTS); } if (args && strstr(args, "i")) { unshare(CLONE_NEWIPC); cloneflags |= CLONE_NEWPID; } char stack[10240]; pid_t pid = clone(jail_process, &stack, cloneflags, cmdline); if (pid == -1) { perror("Error launching jail"); return 1; } int ret = 0; while(wait(&ret) != pid); return WEXITSTATUS(ret); }
static int netns_add(int argc, char **argv) { /* This function creates a new network namespace and * a new mount namespace and bind them into a well known * location in the filesystem based on the name provided. * * The mount namespace is created so that any necessary * userspace tweaks like remounting /sys, or bind mounting * a new /etc/resolv.conf can be shared between uers. */ char netns_path[MAXPATHLEN]; const char *name; int fd; int made_netns_run_dir_mount = 0; if (argc < 1) { fprintf(stderr, "No netns name specified\n"); return -1; } name = argv[0]; snprintf(netns_path, sizeof(netns_path), "%s/%s", NETNS_RUN_DIR, name); if (create_netns_dir()) return -1; /* Make it possible for network namespace mounts to propagate between * mount namespaces. This makes it likely that a unmounting a network * namespace file in one namespace will unmount the network namespace * file in all namespaces allowing the network namespace to be freed * sooner. */ while (mount("", NETNS_RUN_DIR, "none", MS_SHARED | MS_REC, NULL)) { /* Fail unless we need to make the mount point */ if (errno != EINVAL || made_netns_run_dir_mount) { fprintf(stderr, "mount --make-shared %s failed: %s\n", NETNS_RUN_DIR, strerror(errno)); return -1; } /* Upgrade NETNS_RUN_DIR to a mount point */ if (mount(NETNS_RUN_DIR, NETNS_RUN_DIR, "none", MS_BIND, NULL)) { fprintf(stderr, "mount --bind %s %s failed: %s\n", NETNS_RUN_DIR, NETNS_RUN_DIR, strerror(errno)); return -1; } made_netns_run_dir_mount = 1; } /* Create the filesystem state */ fd = open(netns_path, O_RDONLY|O_CREAT|O_EXCL, 0); if (fd < 0) { fprintf(stderr, "Cannot create namespace file \"%s\": %s\n", netns_path, strerror(errno)); return -1; } close(fd); if (unshare(CLONE_NEWNET) < 0) { fprintf(stderr, "Failed to create a new network namespace \"%s\": %s\n", name, strerror(errno)); goto out_delete; } /* Bind the netns last so I can watch for it */ if (mount("/proc/self/ns/net", netns_path, "none", MS_BIND, NULL) < 0) { fprintf(stderr, "Bind /proc/self/ns/net -> %s failed: %s\n", netns_path, strerror(errno)); goto out_delete; } return 0; out_delete: netns_delete(argc, argv); return -1; }
static int create_ns(const char *statedir, const char *name) { char str[64]; uid_t uid = getuid(); gid_t gid = getgid(); if (unshare(CLONE_NEWNS | CLONE_NEWNET | CLONE_NEWUTS | CLONE_NEWUSER) < 0) pdie("can't unshare namespaces"); if (access("/proc/self/setgroups", O_RDONLY) == 0) write_file("/proc/self/setgroups", "deny"); snprintf(str, sizeof(str), "0 %d 1", uid); write_file("/proc/self/uid_map", str); snprintf(str, sizeof(str), "0 %d 1", gid); write_file("/proc/self/gid_map", str); if (sethostname(name, strlen(name)) < 0) pdie("can't set hostname"); setup_ipv4("lo", "127.0.0.1", "255.0.0.0", false, 0); mkdir(statedir, 0755); char *local_etc = populate_statedir(statedir, "etc", true); char *workdir = populate_statedir(statedir, "workdir", true); char *resolv = populate_statedir(statedir, "etc/resolv.conf", false); char *mount_opts; if (asprintf(&mount_opts, "lowerdir=/etc,upperdir=%s,workdir=%s", local_etc, workdir) < 0) { die("can't allocate memory\n"); } /* * overlayfs is only usable on patched kernels (e.g. Ubuntu) due to * permission checks, but it is the cleanest solution because it * overrides symlinks. If we have to use bind mounts instead, * tell the watcher process to re-create the bind mount if * resolv.conf gets deleted. */ int inotify_fd = -1, inotify_wd = -1; if (mount("overlay", "/etc", "overlay", 0, mount_opts) == 0) { /* pass through */ } else { inotify_fd = inotify_init(); if (inotify_fd < 0) pdie("can't create inotify socket"); inotify_wd = watch_and_bind_mount(inotify_fd, resolv); if (inotify_wd < 0) die("can't watch resolv.conf\n"); } /* * Create the initial watcher connection here so that the parent * process doesn't need to wait for the child process to start up. */ int initial_conn_fd[2]; if (socketpair(AF_UNIX, SOCK_STREAM, 0, initial_conn_fd) < 0) pdie("socketpair failed"); write_pid(statedir, create_watcher(statedir, initial_conn_fd[1], resolv, inotify_fd, inotify_wd)); close(initial_conn_fd[1]); close(inotify_fd); free(mount_opts); free(resolv); free(workdir); free(local_etc); return initial_conn_fd[0]; }
DrawingContext::DrawingContext(int BufferWi, int BufferHt, bool allowShared, bool alpha) : nullBitmap(wxNullBitmap) { unshare(nullBitmap); image = new wxImage(BufferWi > 0 ? BufferWi : 1, BufferHt > 0 ? BufferHt : 1); if (alpha) { image->SetAlpha(); for(wxCoord x=0; x<BufferWi; x++) { for(wxCoord y=0; y<BufferHt; y++) { image->SetAlpha(x, y, wxIMAGE_ALPHA_TRANSPARENT); } } } bitmap = new wxBitmap(*image); dc = new wxMemoryDC(*bitmap); if (!allowShared) { //make sure we UnShare everything that is being held onto //also use "non-normal" defaults to avoid "==" issue that //would keep it from using the non-shared versions wxFont font(*wxITALIC_FONT); unshare(font); dc->SetFont(font); wxBrush brush(*wxYELLOW_BRUSH); unshare(brush); dc->SetBrush(brush); dc->SetBackground(brush); wxPen pen(*wxGREEN_PEN); unshare(pen); dc->SetPen(pen); unshare(dc->GetBrush()); unshare(dc->GetBackground()); unshare(dc->GetFont()); unshare(dc->GetPen()); unshare(dc->GetTextForeground()); unshare(dc->GetTextBackground()); #ifndef LINUX wxColor c(12, 25, 3); unshare(c); dc->SetTextBackground(c); wxColor c2(0, 35, 5); unshare(c2); dc->SetTextForeground(c2); #endif } dc->SelectObject(nullBitmap); delete bitmap; bitmap = nullptr; gc = nullptr; }
static void test_get_process_cmdline_harder(void) { char path[] = "/tmp/test-cmdlineXXXXXX"; _cleanup_close_ int fd = -1; _cleanup_free_ char *line = NULL; pid_t pid; if (geteuid() != 0) return; #if HAVE_VALGRIND_VALGRIND_H /* valgrind patches open(/proc//cmdline) * so, test_get_process_cmdline_harder fails always * See https://github.com/systemd/systemd/pull/3555#issuecomment-226564908 */ if (RUNNING_ON_VALGRIND) return; #endif pid = fork(); if (pid > 0) { siginfo_t si; (void) wait_for_terminate(pid, &si); assert_se(si.si_code == CLD_EXITED); assert_se(si.si_status == 0); return; } assert_se(pid == 0); assert_se(unshare(CLONE_NEWNS) >= 0); assert_se(mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) >= 0); fd = mkostemp(path, O_CLOEXEC); assert_se(fd >= 0); if (mount(path, "/proc/self/cmdline", "bind", MS_BIND, NULL) < 0) { /* This happens under selinux… Abort the test in this case. */ log_warning_errno(errno, "mount(..., \"/proc/self/cmdline\", \"bind\", ...) failed: %m"); assert(errno == EACCES); return; } assert_se(unlink(path) >= 0); assert_se(prctl(PR_SET_NAME, "testa") >= 0); assert_se(get_process_cmdline(getpid_cached(), 0, false, &line) == -ENOENT); assert_se(get_process_cmdline(getpid_cached(), 0, true, &line) >= 0); assert_se(streq(line, "[testa]")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 1, true, &line) >= 0); assert_se(streq(line, "")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 2, true, &line) >= 0); assert_se(streq(line, "[")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 3, true, &line) >= 0); assert_se(streq(line, "[.")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 4, true, &line) >= 0); assert_se(streq(line, "[..")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 5, true, &line) >= 0); assert_se(streq(line, "[...")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 6, true, &line) >= 0); assert_se(streq(line, "[...]")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 7, true, &line) >= 0); assert_se(streq(line, "[t...]")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 8, true, &line) >= 0); assert_se(streq(line, "[testa]")); line = mfree(line); assert_se(write(fd, "\0\0\0\0\0\0\0\0\0", 10) == 10); assert_se(get_process_cmdline(getpid_cached(), 0, false, &line) == -ENOENT); assert_se(get_process_cmdline(getpid_cached(), 0, true, &line) >= 0); assert_se(streq(line, "[testa]")); line = mfree(line); assert_se(write(fd, "foo\0bar\0\0\0\0\0", 10) == 10); assert_se(get_process_cmdline(getpid_cached(), 0, false, &line) >= 0); assert_se(streq(line, "foo bar")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 0, true, &line) >= 0); assert_se(streq(line, "foo bar")); line = mfree(line); assert_se(write(fd, "quux", 4) == 4); assert_se(get_process_cmdline(getpid_cached(), 0, false, &line) >= 0); assert_se(streq(line, "foo bar quux")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 0, true, &line) >= 0); assert_se(streq(line, "foo bar quux")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 1, true, &line) >= 0); assert_se(streq(line, "")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 2, true, &line) >= 0); assert_se(streq(line, ".")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 3, true, &line) >= 0); assert_se(streq(line, "..")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 4, true, &line) >= 0); assert_se(streq(line, "...")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 5, true, &line) >= 0); assert_se(streq(line, "f...")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 6, true, &line) >= 0); assert_se(streq(line, "fo...")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 7, true, &line) >= 0); assert_se(streq(line, "foo...")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 8, true, &line) >= 0); assert_se(streq(line, "foo...")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 9, true, &line) >= 0); assert_se(streq(line, "foo b...")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 10, true, &line) >= 0); assert_se(streq(line, "foo ba...")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 11, true, &line) >= 0); assert_se(streq(line, "foo bar...")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 12, true, &line) >= 0); assert_se(streq(line, "foo bar...")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 13, true, &line) >= 0); assert_se(streq(line, "foo bar quux")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 14, true, &line) >= 0); assert_se(streq(line, "foo bar quux")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 1000, true, &line) >= 0); assert_se(streq(line, "foo bar quux")); line = mfree(line); assert_se(ftruncate(fd, 0) >= 0); assert_se(prctl(PR_SET_NAME, "aaaa bbbb cccc") >= 0); assert_se(get_process_cmdline(getpid_cached(), 0, false, &line) == -ENOENT); assert_se(get_process_cmdline(getpid_cached(), 0, true, &line) >= 0); assert_se(streq(line, "[aaaa bbbb cccc]")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 10, true, &line) >= 0); assert_se(streq(line, "[aaaa...]")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 11, true, &line) >= 0); assert_se(streq(line, "[aaaa...]")); line = mfree(line); assert_se(get_process_cmdline(getpid_cached(), 12, true, &line) >= 0); assert_se(streq(line, "[aaaa b...]")); line = mfree(line); safe_close(fd); _exit(EXIT_SUCCESS); }
/* * Given a bdev (presumably blockdev-based), detect the fstype * by trying mounting (in a private mntns) it. * @bdev: bdev to investigate * @type: preallocated char* in which to write the fstype * @len: length of passed in char* * Returns length of fstype, of -1 on error */ static int detect_fs(struct bdev *bdev, char *type, int len) { int p[2], ret; size_t linelen; pid_t pid; FILE *f; char *sp1, *sp2, *sp3, *line = NULL; char *srcdev; if (!bdev || !bdev->src || !bdev->dest) return -1; srcdev = bdev->src; if (strcmp(bdev->type, "loop") == 0) srcdev = bdev->src + 5; process_lock(); ret = pipe(p); process_unlock(); if (ret < 0) return -1; if ((pid = fork()) < 0) return -1; if (pid > 0) { int status; process_lock(); close(p[1]); process_unlock(); memset(type, 0, len); ret = read(p[0], type, len-1); process_lock(); close(p[0]); process_unlock(); if (ret < 0) { SYSERROR("error reading from pipe"); wait(&status); return -1; } else if (ret == 0) { ERROR("child exited early - fstype not found"); wait(&status); return -1; } wait(&status); type[len-1] = '\0'; INFO("detected fstype %s for %s", type, srcdev); return ret; } process_unlock(); // we're no longer sharing if (unshare(CLONE_NEWNS) < 0) exit(1); ret = mount_unknow_fs(srcdev, bdev->dest, 0); if (ret < 0) { ERROR("failed mounting %s onto %s to detect fstype", srcdev, bdev->dest); exit(1); } // if symlink, get the real dev name char devpath[MAXPATHLEN]; char *l = linkderef(srcdev, devpath); if (!l) exit(1); f = fopen("/proc/self/mounts", "r"); if (!f) exit(1); while (getline(&line, &linelen, f) != -1) { sp1 = index(line, ' '); if (!sp1) exit(1); *sp1 = '\0'; if (strcmp(line, l)) continue; sp2 = index(sp1+1, ' '); if (!sp2) exit(1); *sp2 = '\0'; sp3 = index(sp2+1, ' '); if (!sp3) exit(1); *sp3 = '\0'; sp2++; if (write(p[1], sp2, strlen(sp2)) != strlen(sp2)) exit(1); exit(0); } exit(1); }
int main(int argc, char **argv) { char path[PATH_MAX], bpath[PATH_MAX], spath[PATH_MAX], bspath[PATH_MAX]; pid_t pid; int status; task_waiter_t t; test_init(argc, argv); task_waiter_init(&t); mount(NULL, "/", NULL, MS_SHARED, NULL); snprintf(path, sizeof(path), "%s/test", dirname); snprintf(bpath, sizeof(bpath), "%s/test.bind", dirname); snprintf(spath, sizeof(spath), "%s/test/sub", dirname); snprintf(bspath, sizeof(bspath), "%s/test.bind/sub", dirname); if (mkdir(dirname, 0700) || mkdir(path, 0700) || mkdir(spath, 0700) || mkdir(bpath, 0700)) { err("mkdir"); return 1; } pid = fork(); if (pid < 0) { err("fork"); return 1; } if (pid == 0) { unshare(CLONE_NEWNS); if (mount(path, bpath, NULL, MS_BIND, NULL)) { err("mount"); return 1; } task_waiter_complete(&t, 1); task_waiter_wait4(&t, 2); if (access(bspath, F_OK)) { fail("%s isn't accessiable", bspath); return 1; } if (umount2(bpath, MNT_DETACH)) { fail("umount"); return 1; } return 0; } task_waiter_wait4(&t, 1); if (mount("test", spath, "tmpfs", 0, NULL)) { err("mount"); return 1; } test_daemon(); test_waitsig(); task_waiter_complete(&t, 2); if (waitpid(pid, &status, 0) != pid) { err("waitpid %d", pid); return 1; } if (status) { err("%d/%d/%d/%d", WIFEXITED(status), WEXITSTATUS(status), WIFSIGNALED(status), WTERMSIG(status)); return 1; } pass(); return 0; }
int main(gint argc, gchar **argv) { const gchar *self = *argv++; if (argc < 2) { g_message("%s command [arguments...]", self); return 1; } if (g_getenv("NIX_CHROOTENV")) g_warning("chrootenv doesn't stack!"); else g_setenv("NIX_CHROOTENV", "", TRUE); g_autofree gchar *prefix = g_build_filename(g_get_tmp_dir(), "chrootenvXXXXXX", NULL); fail_if(!g_mkdtemp_full(prefix, 0755)); pid_t cpid = fork(); if (cpid < 0) fail("fork", errno); else if (cpid == 0) { uid_t uid = getuid(); gid_t gid = getgid(); if (unshare(CLONE_NEWNS | CLONE_NEWUSER) < 0) { int unshare_errno = errno; g_message("Requires Linux version >= 3.19 built with CONFIG_USER_NS"); if (g_file_test("/proc/sys/kernel/unprivileged_userns_clone", G_FILE_TEST_EXISTS)) g_message("Run: sudo sysctl -w kernel.unprivileged_userns_clone=1"); fail("unshare", unshare_errno); } spit("/proc/self/setgroups", "deny"); spit("/proc/self/uid_map", "%d %d 1", uid, uid); spit("/proc/self/gid_map", "%d %d 1", gid, gid); bind("/", prefix); fail_if(chroot(prefix)); fail_if(chdir("/")); fail_if(execvp(*argv, argv)); } else { int status; fail_if(waitpid(cpid, &status, 0) != cpid); fail_if(nftw(prefix, nftw_remove, getdtablesize(), FTW_DEPTH | FTW_MOUNT | FTW_PHYS)); if (WIFEXITED(status)) return WEXITSTATUS(status); else if (WIFSIGNALED(status)) kill(getpid(), WTERMSIG(status)); return 1; } }
static void* start_thread(void* p) { test_assert(0 == unshare(CLONE_FILES)); test_assert(0 == close(STDOUT_FILENO)); return NULL; }
int sandbox(void* sandbox_arg) { // Get rid of unused parameter warning (void)sandbox_arg; pid_t child_pid = getpid(); if (arg_debug) printf("Initializing child process\n"); // close each end of the unused pipes close(parent_to_child_fds[1]); close(child_to_parent_fds[0]); // wait for parent to do base setup wait_for_other(parent_to_child_fds[0]); if (arg_debug && child_pid == 1) printf("PID namespace installed\n"); //**************************** // set hostname //**************************** if (cfg.hostname) { if (sethostname(cfg.hostname, strlen(cfg.hostname)) < 0) errExit("sethostname"); } //**************************** // mount namespace //**************************** // mount events are not forwarded between the host the sandbox if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) { chk_chroot(); } //**************************** // netfilter //**************************** if (arg_netfilter && any_bridge_configured()) { // assuming by default the client filter netfilter(arg_netfilter_file); } //**************************** // trace pre-install //**************************** if (arg_trace) fs_trace_preload(); //**************************** // configure filesystem //**************************** #ifdef HAVE_CHROOT if (cfg.chrootdir) { fs_chroot(cfg.chrootdir); // force caps and seccomp if not started as root if (getuid() != 0) { // force default seccomp inside the chroot, no keep or drop list // the list build on top of the default drop list is kept intact arg_seccomp = 1; if (arg_seccomp_list_drop) { free(arg_seccomp_list_drop); arg_seccomp_list_drop = NULL; } if (arg_seccomp_list_keep) { free(arg_seccomp_list_keep); arg_seccomp_list_keep = NULL; } // disable all capabilities if (arg_caps_default_filter || arg_caps_list) fprintf(stderr, "Warning: all capabilities disabled for a regular user during chroot\n"); arg_caps_drop_all = 1; // drop all supplementary groups; /etc/group file inside chroot // is controlled by a regular usr arg_nogroups = 1; printf("Dropping all Linux capabilities and enforcing default seccomp filter\n"); } //**************************** // trace pre-install, this time inside chroot //**************************** if (arg_trace) fs_trace_preload(); } else #endif if (arg_overlay) fs_overlayfs(); else fs_basic_fs(); //**************************** // set hostname in /etc/hostname //**************************** if (cfg.hostname) { fs_hostname(cfg.hostname); } //**************************** // apply the profile file //**************************** if (cfg.profile) fs_blacklist(cfg.homedir); //**************************** // private mode //**************************** if (arg_private) { if (cfg.home_private) // --private= fs_private_homedir(); else if (cfg.home_private_keep) // --private-home= fs_private_home_list(); else // --private fs_private(); } if (arg_private_dev) fs_private_dev(); if (arg_private_etc) fs_private_etc_list(); //**************************** // install trace //**************************** if (arg_trace) fs_trace(); //**************************** // update /proc, /dev, /boot directorymy //**************************** fs_proc_sys_dev_boot(); //**************************** // networking //**************************** if (arg_nonetwork) { net_if_up("lo"); if (arg_debug) printf("Network namespace enabled, only loopback interface available\n"); } else if (any_bridge_configured()) { // configure lo and eth0...eth3 net_if_up("lo"); if (mac_not_zero(cfg.bridge0.macsandbox)) net_config_mac(cfg.bridge0.devsandbox, cfg.bridge0.macsandbox); sandbox_if_up(&cfg.bridge0); if (mac_not_zero(cfg.bridge1.macsandbox)) net_config_mac(cfg.bridge1.devsandbox, cfg.bridge1.macsandbox); sandbox_if_up(&cfg.bridge1); if (mac_not_zero(cfg.bridge2.macsandbox)) net_config_mac(cfg.bridge2.devsandbox, cfg.bridge2.macsandbox); sandbox_if_up(&cfg.bridge2); if (mac_not_zero(cfg.bridge3.macsandbox)) net_config_mac(cfg.bridge3.devsandbox, cfg.bridge3.macsandbox); sandbox_if_up(&cfg.bridge3); // add a default route if (cfg.defaultgw) { // set the default route if (net_add_route(0, 0, cfg.defaultgw)) fprintf(stderr, "Warning: cannot configure default route\n"); } if (arg_debug) printf("Network namespace enabled\n"); } // if any dns server is configured, it is time to set it now fs_resolvconf(); // print network configuration if (any_bridge_configured() || cfg.defaultgw || cfg.dns1) { printf("\n"); if (any_bridge_configured()) net_ifprint(); if (cfg.defaultgw != 0) printf("Default gateway %d.%d.%d.%d\n", PRINT_IP(cfg.defaultgw)); if (cfg.dns1 != 0) printf("DNS server %d.%d.%d.%d\n", PRINT_IP(cfg.dns1)); if (cfg.dns2 != 0) printf("DNS server %d.%d.%d.%d\n", PRINT_IP(cfg.dns2)); if (cfg.dns3 != 0) printf("DNS server %d.%d.%d.%d\n", PRINT_IP(cfg.dns3)); printf("\n"); } //**************************** // start executable //**************************** prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); // kill the child in case the parent died int cwd = 0; if (cfg.cwd) { if (chdir(cfg.cwd) == 0) cwd = 1; } if (!cwd) { if (chdir("/") < 0) errExit("chdir"); if (cfg.homedir) { struct stat s; if (stat(cfg.homedir, &s) == 0) { /* coverity[toctou] */ if (chdir(cfg.homedir) < 0) errExit("chdir"); } } } // set environment // fix qt 4.8 if (setenv("QT_X11_NO_MITSHM", "1", 1) < 0) errExit("setenv"); if (setenv("container", "firejail", 1) < 0) // LXC sets container=lxc, errExit("setenv"); if (arg_zsh && setenv("SHELL", "/usr/bin/zsh", 1) < 0) errExit("setenv"); if (arg_csh && setenv("SHELL", "/bin/csh", 1) < 0) errExit("setenv"); if (cfg.shell && setenv("SHELL", cfg.shell, 1) < 0) errExit("setenv"); // set prompt color to green //export PS1='\[\e[1;32m\][\u@\h \W]\$\[\e[0m\] ' if (setenv("PROMPT_COMMAND", "export PS1=\"\\[\\e[1;32m\\][\\u@\\h \\W]\\$\\[\\e[0m\\] \"", 1) < 0) errExit("setenv"); // set user-supplied environment variables env_apply(); // set capabilities if (!arg_noroot) set_caps(); // set rlimits set_rlimits(); // set seccomp #ifdef HAVE_SECCOMP // if a keep list is available, disregard the drop list if (arg_seccomp == 1) { if (arg_seccomp_list_keep) seccomp_filter_keep(); // this will also save the fmyilter to MNT_DIR/seccomp file else seccomp_filter_drop(); // this will also save the filter to MNT_DIR/seccomp file } #endif // set cpu affinity if (cfg.cpus) { save_cpu(); // save cpu affinity mask to MNT_DIR/cpu file set_cpu_affinity(); } // save cgroup in MNT_DIR/cgroup file if (cfg.cgroup) save_cgroup(); //**************************************** // drop privileges or create a new user namespace //**************************************** save_nogroups(); if (arg_noroot) { int rv = unshare(CLONE_NEWUSER); if (rv == -1) { fprintf(stderr, "Error: cannot mount a new user namespace\n"); perror("unshare"); drop_privs(arg_nogroups); } } else drop_privs(arg_nogroups); // notify parent that new user namespace has been created so a proper // UID/GID map can be setup notify_other(child_to_parent_fds[1]); close(child_to_parent_fds[1]); // wait for parent to finish setting up a proper UID/GID map wait_for_other(parent_to_child_fds[0]); close(parent_to_child_fds[0]); // somehow, the new user namespace resets capabilities; // we need to do them again if (arg_noroot) { set_caps(); if (arg_debug) printf("User namespace (noroot) installed\n"); } //**************************************** // start the program without using a shell //**************************************** if (arg_shell_none) { if (arg_debug) { int i; for (i = cfg.original_program_index; i < cfg.original_argc; i++) { if (cfg.original_argv[i] == NULL) break; printf("execvp argument %d: %s\n", i - cfg.original_program_index, cfg.original_argv[i]); } } if (!arg_command) printf("Child process initialized\n"); execvp(cfg.original_argv[cfg.original_program_index], &cfg.original_argv[cfg.original_program_index]); } //**************************************** // start the program using a shell //**************************************** else { // choose the shell requested by the user, or use bash as default char *sh; if (cfg.shell) sh = cfg.shell; else if (arg_zsh) sh = "/usr/bin/zsh"; else if (arg_csh) sh = "/bin/csh"; else sh = "/bin/bash"; char *arg[5]; int index = 0; arg[index++] = sh; arg[index++] = "-c"; assert(cfg.command_line); if (arg_debug) printf("Starting %s\n", cfg.command_line); if (arg_doubledash) arg[index++] = "--"; arg[index++] = cfg.command_line; arg[index] = NULL; assert(index < 5); if (arg_debug) { char *msg; if (asprintf(&msg, "sandbox %d, execvp into %s", sandbox_pid, cfg.command_line) == -1) errExit("asprintf"); logmsg(msg); free(msg); } if (arg_debug) { int i; for (i = 0; i < 5; i++) { if (arg[i] == NULL) break; printf("execvp argument %d: %s\n", i, arg[i]); } } if (!arg_command) printf("Child process initialized\n"); execvp(sh, arg); } perror("execvp"); return 0; }
int main(int argc, char **argv) { char path[PATH_MAX], bpath[PATH_MAX], spath[PATH_MAX]; pid_t pid; int status; task_waiter_t t; test_init(argc, argv); task_waiter_init(&t); snprintf(path, sizeof(path), "%s/test", dirname); snprintf(bpath, sizeof(bpath), "%s/test.bind", dirname); snprintf(spath, sizeof(spath), "%s/test/sub", dirname); if (mkdir(dirname, 0700)) { pr_perror("mkdir"); return 1; } if (mount(NULL, "/", NULL, MS_SHARED, NULL)) { pr_perror("mount"); return 1; } #ifdef SHARED_BIND02 /* */ if (mount(dirname, dirname, "tmpfs", 0, NULL) || mount(NULL, dirname, NULL, MS_SHARED, NULL)) { pr_perror("mount"); return 1; } #endif if (mkdir(path, 0700) || mkdir(spath, 0700) || mkdir(bpath, 0700)) { pr_perror("mkdir"); return 1; } pid = fork(); if (pid < 0) { pr_perror("fork"); return 1; } if (pid == 0) { if (unshare(CLONE_NEWNS)) { pr_perror("unshare"); return 1; } if (mount(path, bpath, NULL, MS_BIND, NULL)) { pr_perror("mount"); return 1; } task_waiter_complete(&t, 1); task_waiter_wait4(&t, 2); if (umount(spath)) { task_waiter_complete(&t, 2); fail("umount"); return 1; } task_waiter_complete(&t, 3); task_waiter_wait4(&t, 4); return 0; } task_waiter_wait4(&t, 1); if (mount("test", spath, "tmpfs", 0, NULL)) { pr_perror("mount"); return 1; } test_daemon(); test_waitsig(); task_waiter_complete(&t, 2); task_waiter_wait4(&t, 3); if (umount(bpath)) { task_waiter_complete(&t, 2); fail("umount"); return 1; } task_waiter_complete(&t, 4); if (waitpid(pid, &status, 0) != pid) { pr_perror("waitpid %d", pid); return 1; } if (status) { pr_perror("%d/%d/%d/%d", WIFEXITED(status), WEXITSTATUS(status), WIFSIGNALED(status), WTERMSIG(status)); return 1; } pass(); return 0; }