static
bool test_unpriv_remount(const char *fstype, const char *mount_options,
			 int mount_flags, int remount_flags, int invalid_flags)
{
	pid_t child;

	child = fork();
	if (child == -1) {
		die("fork failed: %s\n",
			strerror(errno));
	}
	if (child != 0) { /* parent */
		pid_t pid;
		int status;
		pid = waitpid(child, &status, 0);
		if (pid == -1) {
			die("waitpid failed: %s\n",
				strerror(errno));
		}
		if (pid != child) {
			die("waited for %d got %d\n",
				child, pid);
		}
		if (!WIFEXITED(status)) {
			die("child did not terminate cleanly\n");
		}
		return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
	}

	create_and_enter_userns();
	if (unshare(CLONE_NEWNS) != 0) {
		die("unshare(CLONE_NEWNS) failed: %s\n",
			strerror(errno));
	}

	if (mount("testing", "/tmp", fstype, mount_flags, mount_options) != 0) {
		die("mount of %s with options '%s' on /tmp failed: %s\n",
		    fstype,
		    mount_options? mount_options : "",
		    strerror(errno));
	}

	create_and_enter_userns();

	if (unshare(CLONE_NEWNS) != 0) {
		die("unshare(CLONE_NEWNS) failed: %s\n",
			strerror(errno));
	}

	if (mount("/tmp", "/tmp", "none",
		  MS_REMOUNT | MS_BIND | remount_flags, NULL) != 0) {
		/* system("cat /proc/self/mounts"); */
		die("remount of /tmp failed: %s\n",
		    strerror(errno));
	}

	if (mount("/tmp", "/tmp", "none",
		  MS_REMOUNT | MS_BIND | invalid_flags, NULL) == 0) {
		/* system("cat /proc/self/mounts"); */
		die("remount of /tmp with invalid flags "
		    "succeeded unexpectedly\n");
	}
	exit(EXIT_SUCCESS);
}
Exemple #2
0
int sandbox(void* sandbox_arg) {
	// Get rid of unused parameter warning
	(void)sandbox_arg;

	pid_t child_pid = getpid();
	if (arg_debug)
		printf("Initializing child process\n");	

 	// close each end of the unused pipes
 	close(parent_to_child_fds[1]);
 	close(child_to_parent_fds[0]);
 
 	// wait for parent to do base setup
 	wait_for_other(parent_to_child_fds[0]);

	if (arg_debug && child_pid == 1)
		printf("PID namespace installed\n");

	//****************************
	// set hostname
	//****************************
	if (cfg.hostname) {
		if (sethostname(cfg.hostname, strlen(cfg.hostname)) < 0)
			errExit("sethostname");
	}

	//****************************
	// mount namespace
	//****************************
	// mount events are not forwarded between the host the sandbox
	if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
		chk_chroot();
	}
	
	
	//****************************
	// log sandbox data
	//****************************
	if (cfg.name)
		fs_logger2("sandbox name:", cfg.name);
	fs_logger2int("sandbox pid:", (int) sandbox_pid);
	if (cfg.chrootdir)
		fs_logger("sandbox filesystem: chroot");
	else if (arg_overlay)	
		fs_logger("sandbox filesystem: overlay");
	else
		fs_logger("sandbox filesystem: local");
	fs_logger("install mount namespace");
	
	//****************************
	// netfilter etc.
	//****************************
	if (arg_netfilter && any_bridge_configured()) { // assuming by default the client filter
		netfilter(arg_netfilter_file);
	}
	if (arg_netfilter6 && any_bridge_configured()) { // assuming by default the client filter
		netfilter6(arg_netfilter6_file);
	}

	// load IBUS env variables
	if (arg_nonetwork || any_bridge_configured() || any_interface_configured()) {
		// do nothing - there are problems with ibus version 1.5.11
	}
	else
		env_ibus_load();
	
	// grab a copy of cp command
	fs_build_cp_command();
	
	// trace pre-install
	if (arg_trace || arg_tracelog)
		fs_trace_preload();

	//****************************
	// configure filesystem
	//****************************
#ifdef HAVE_CHROOT		
	if (cfg.chrootdir) {
		fs_chroot(cfg.chrootdir);
		// redo cp command
		fs_build_cp_command();
		
		// force caps and seccomp if not started as root
		if (getuid() != 0) {
			// force default seccomp inside the chroot, no keep or drop list
			// the list build on top of the default drop list is kept intact
			arg_seccomp = 1;
			if (cfg.seccomp_list_drop) {
				free(cfg.seccomp_list_drop);
				cfg.seccomp_list_drop = NULL;
			}
			if (cfg.seccomp_list_keep) {
				free(cfg.seccomp_list_keep);
				cfg.seccomp_list_keep = NULL;
			}
			
			// disable all capabilities
			if (arg_caps_default_filter || arg_caps_list)
				fprintf(stderr, "Warning: all capabilities disabled for a regular user during chroot\n");
			arg_caps_drop_all = 1;
			
			// drop all supplementary groups; /etc/group file inside chroot
			// is controlled by a regular usr
			arg_nogroups = 1;
			if (!arg_quiet)
				printf("Dropping all Linux capabilities and enforcing default seccomp filter\n");
		}
		else
			arg_seccomp = 1;
						
		//****************************
		// trace pre-install, this time inside chroot
		//****************************
		if (arg_trace || arg_tracelog)
			fs_trace_preload();
	}
	else 
#endif		
	if (arg_overlay)	
		fs_overlayfs();
	else
		fs_basic_fs();
	

	//****************************
	// set hostname in /etc/hostname
	//****************************
	if (cfg.hostname) {
		fs_hostname(cfg.hostname);
	}
	
	//****************************
	// private mode
	//****************************
	if (arg_private) {
		if (cfg.home_private)	// --private=
			fs_private_homedir();
		else if (cfg.home_private_keep) // --private-home=
			fs_private_home_list();
		else // --private
			fs_private();
	}
	
	if (arg_private_dev)
		fs_private_dev();
	if (arg_private_etc) {
		fs_private_etc_list();
		// create /etc/ld.so.preload file again
		if (arg_trace || arg_tracelog)
			fs_trace_preload();
	}
	if (arg_private_bin)
		fs_private_bin_list();
	
	//****************************
	// apply the profile file
	//****************************
	if (cfg.profile) {
		// apply all whitelist commands ... 
		fs_whitelist();
		
		// ... followed by blacklist commands
		fs_blacklist();
	}
	
	//****************************
	// install trace
	//****************************
	if (arg_trace || arg_tracelog)
		fs_trace();
		
	//****************************
	// update /proc, /dev, /boot directorymy
	//****************************
	fs_proc_sys_dev_boot();
	
	//****************************
	// --nosound and fix for pulseaudio 7.0
	//****************************
	if (arg_nosound)
		pulseaudio_disable();
	else
		pulseaudio_init();
	
	//****************************
	// networking
	//****************************
	if (arg_nonetwork) {
		net_if_up("lo");
		if (arg_debug)
			printf("Network namespace enabled, only loopback interface available\n");
	}
	else if (any_bridge_configured() || any_interface_configured()) {
		// configure lo and eth0...eth3
		net_if_up("lo");
		
		if (mac_not_zero(cfg.bridge0.macsandbox))
			net_config_mac(cfg.bridge0.devsandbox, cfg.bridge0.macsandbox);
		sandbox_if_up(&cfg.bridge0);
		
		if (mac_not_zero(cfg.bridge1.macsandbox))
			net_config_mac(cfg.bridge1.devsandbox, cfg.bridge1.macsandbox);
		sandbox_if_up(&cfg.bridge1);
		
		if (mac_not_zero(cfg.bridge2.macsandbox))
			net_config_mac(cfg.bridge2.devsandbox, cfg.bridge2.macsandbox);
		sandbox_if_up(&cfg.bridge2);
		
		if (mac_not_zero(cfg.bridge3.macsandbox))
			net_config_mac(cfg.bridge3.devsandbox, cfg.bridge3.macsandbox);
		sandbox_if_up(&cfg.bridge3);
		
		// add a default route
		if (cfg.defaultgw) {
			// set the default route
			if (net_add_route(0, 0, cfg.defaultgw))
				fprintf(stderr, "Warning: cannot configure default route\n");
		}
		
		// enable interfaces
		if (cfg.interface0.configured && cfg.interface0.ip) {
			if (arg_debug)
				printf("Configuring %d.%d.%d.%d address on interface %s\n", PRINT_IP(cfg.interface0.ip), cfg.interface0.dev);
			net_if_ip(cfg.interface0.dev, cfg.interface0.ip, cfg.interface0.mask, cfg.interface0.mtu);
			net_if_up(cfg.interface0.dev);
		}			
		if (cfg.interface1.configured && cfg.interface1.ip) {
			if (arg_debug)
				printf("Configuring %d.%d.%d.%d address on interface %s\n", PRINT_IP(cfg.interface1.ip), cfg.interface1.dev);
			net_if_ip(cfg.interface1.dev, cfg.interface1.ip, cfg.interface1.mask, cfg.interface1.mtu);
			net_if_up(cfg.interface1.dev);
		}			
		if (cfg.interface2.configured && cfg.interface2.ip) {
			if (arg_debug)
				printf("Configuring %d.%d.%d.%d address on interface %s\n", PRINT_IP(cfg.interface2.ip), cfg.interface2.dev);
			net_if_ip(cfg.interface2.dev, cfg.interface2.ip, cfg.interface2.mask, cfg.interface2.mtu);
			net_if_up(cfg.interface2.dev);
		}			
		if (cfg.interface3.configured && cfg.interface3.ip) {
			if (arg_debug)
				printf("Configuring %d.%d.%d.%d address on interface %s\n", PRINT_IP(cfg.interface3.ip), cfg.interface3.dev);
			net_if_ip(cfg.interface3.dev, cfg.interface3.ip, cfg.interface3.mask, cfg.interface3.mtu);
			net_if_up(cfg.interface3.dev);
		}			
			
		if (arg_debug)
			printf("Network namespace enabled\n");
	}
	
	// if any dns server is configured, it is time to set it now
	fs_resolvconf();
	fs_logger_print();
	fs_logger_change_owner();

	// print network configuration
	if (!arg_quiet) {
		if (any_bridge_configured() || any_interface_configured() || cfg.defaultgw || cfg.dns1) {
			printf("\n");
			if (any_bridge_configured() || any_interface_configured())
				net_ifprint();
			if (cfg.defaultgw != 0)
				printf("Default gateway %d.%d.%d.%d\n", PRINT_IP(cfg.defaultgw));
			if (cfg.dns1 != 0)
				printf("DNS server %d.%d.%d.%d\n", PRINT_IP(cfg.dns1));
			if (cfg.dns2 != 0)
				printf("DNS server %d.%d.%d.%d\n", PRINT_IP(cfg.dns2));
			if (cfg.dns3 != 0)
				printf("DNS server %d.%d.%d.%d\n", PRINT_IP(cfg.dns3));
			printf("\n");
		}
	}
	
	fs_delete_cp_command();

	//****************************
	// set application environment
	//****************************
	prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); // kill the child in case the parent died
	int cwd = 0;
	if (cfg.cwd) {
		if (chdir(cfg.cwd) == 0)
			cwd = 1;
	}
	
	if (!cwd) {
		if (chdir("/") < 0)
			errExit("chdir");
		if (cfg.homedir) {
			struct stat s;
			if (stat(cfg.homedir, &s) == 0) {
				/* coverity[toctou] */
				if (chdir(cfg.homedir) < 0)
					errExit("chdir");
			}
		}
	}
	
	// set environment
	env_defaults();
	
	// set user-supplied environment variables
	env_apply();

	//****************************
	// set security filters
	//****************************
	// set capabilities
	if (!arg_noroot)
		set_caps();

	// set rlimits
	set_rlimits();

	// set seccomp
#ifdef HAVE_SECCOMP
	// install protocol filter
	if (cfg.protocol) {
		protocol_filter();	// install filter	
		protocol_filter_save();	// save filter in PROTOCOL_CFG
	}

	// if a keep list is available, disregard the drop list
	if (arg_seccomp == 1) {
		if (cfg.seccomp_list_keep)
			seccomp_filter_keep();
		else if (cfg.seccomp_list_errno)
			seccomp_filter_errno(); 
		else
			seccomp_filter_drop();
	}
#endif

	// set cpu affinity
	if (cfg.cpus) {
		save_cpu(); // save cpu affinity mask to CPU_CFG file
		set_cpu_affinity();
	}
	
	// save cgroup in CGROUP_CFG file
	if (cfg.cgroup)
		save_cgroup();

	//****************************************
	// drop privileges or create a new user namespace
	//****************************************
	save_nogroups();
	if (arg_noroot) {
		int rv = unshare(CLONE_NEWUSER);
		if (rv == -1) {
			fprintf(stderr, "Error: cannot mount a new user namespace\n");
			perror("unshare");
			drop_privs(arg_nogroups);
		}
	}
	else
		drop_privs(arg_nogroups);
 	
	// notify parent that new user namespace has been created so a proper
 	// UID/GID map can be setup
 	notify_other(child_to_parent_fds[1]);
 	close(child_to_parent_fds[1]);
 
 	// wait for parent to finish setting up a proper UID/GID map
 	wait_for_other(parent_to_child_fds[0]);
 	close(parent_to_child_fds[0]);

	// somehow, the new user namespace resets capabilities;
	// we need to do them again
	if (arg_noroot) {
		set_caps();
		if (arg_debug)
			printf("noroot user namespace installed\n");
	}


	//****************************************
	// fork the application and monitor it
	//****************************************
	pid_t app_pid = fork();
	if (app_pid == -1)
		errExit("fork");
		
	if (app_pid == 0) {
		prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); // kill the child in case the parent died
		start_application();	// start app
	}

	monitor_application(app_pid);	// monitor application
	
	return 0;
}
Exemple #3
0
static int test_pppoe_server(sd_event *e) {
        sd_netlink *rtnl;
        sd_netlink_message *m;
        pid_t pid;
        int r, client_ifindex, server_ifindex;

        r = unshare(CLONE_NEWNET);
        if (r < 0 && errno == EPERM)
                return EXIT_TEST_SKIP;

        assert_se(r >= 0);

        assert_se(sd_netlink_open(&rtnl) >= 0);
        assert_se(sd_netlink_attach_event(rtnl, e, 0) >= 0);

        assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0) >= 0);
        assert_se(sd_netlink_message_append_string(m, IFLA_IFNAME, "pppoe-server") >= 0);
        assert_se(sd_netlink_message_open_container(m, IFLA_LINKINFO) >= 0);
        assert_se(sd_netlink_message_open_container_union(m, IFLA_INFO_DATA, "veth") >= 0);
        assert_se(sd_netlink_message_open_container(m, VETH_INFO_PEER) >= 0);
        assert_se(sd_netlink_message_append_string(m, IFLA_IFNAME, "pppoe-client") >= 0);
        assert_se(sd_netlink_message_close_container(m) >= 0);
        assert_se(sd_netlink_message_close_container(m) >= 0);
        assert_se(sd_netlink_message_close_container(m) >= 0);
        assert_se(sd_netlink_call(rtnl, m, 0, NULL) >= 0);

        client_ifindex = (int) if_nametoindex("pppoe-client");
        assert_se(client_ifindex > 0);
        server_ifindex = (int) if_nametoindex("pppoe-server");
        assert_se(server_ifindex > 0);

        m = sd_netlink_message_unref(m);
        assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_SETLINK, client_ifindex) >= 0);
        assert_se(sd_rtnl_message_link_set_flags(m, IFF_UP, IFF_UP) >= 0);
        assert_se(sd_netlink_call(rtnl, m, 0, NULL) >= 0);

        m = sd_netlink_message_unref(m);
        assert_se(sd_rtnl_message_new_link(rtnl, &m, RTM_SETLINK, server_ifindex) >= 0);
        assert_se(sd_rtnl_message_link_set_flags(m, IFF_UP, IFF_UP) >= 0);
        assert_se(sd_netlink_call(rtnl, m, 0, NULL) >= 0);

        pid = fork();
        assert_se(pid >= 0);
        if (pid == 0) {
                /* let the client send some discover messages before the server is started */
                sleep(2);

                /* TODO: manage pppoe-server-options */
                execlp("pppoe-server", "pppoe-server", "-F",
                       "-I", "pppoe-server",
                       "-C", "Test-AC",
                       "-S", "Service-Default",
                       "-S", "Service-First-Auxiliary",
                       "-S", "Service-Second-Auxiliary",
                       NULL);
                assert_not_reached("failed to execute pppoe-server. not installed?");
        }

        client_run("pppoe-client", e);

        assert_se(kill(pid, SIGTERM) >= 0);
        assert_se(wait_for_terminate(pid, NULL) >= 0);

        assert_se(!sd_netlink_message_unref(m));
        assert_se(!sd_netlink_unref(rtnl));

        return EXIT_SUCCESS;
}
int import_fork_tar_x(const char *path, pid_t *ret) {
        _cleanup_close_pair_ int pipefd[2] = { -1, -1 };
        pid_t pid;
        int r;

        assert(path);
        assert(ret);

        if (pipe2(pipefd, O_CLOEXEC) < 0)
                return log_error_errno(errno, "Failed to create pipe for tar: %m");

        pid = fork();
        if (pid < 0)
                return log_error_errno(errno, "Failed to fork off tar: %m");

        if (pid == 0) {
                int null_fd;
                uint64_t retain =
                        (1ULL << CAP_CHOWN) |
                        (1ULL << CAP_FOWNER) |
                        (1ULL << CAP_FSETID) |
                        (1ULL << CAP_MKNOD) |
                        (1ULL << CAP_SETFCAP) |
                        (1ULL << CAP_DAC_OVERRIDE);

                /* Child */

                reset_all_signal_handlers();
                reset_signal_mask();
                assert_se(prctl(PR_SET_PDEATHSIG, SIGTERM) == 0);

                pipefd[1] = safe_close(pipefd[1]);

                if (dup2(pipefd[0], STDIN_FILENO) != STDIN_FILENO) {
                        log_error_errno(errno, "Failed to dup2() fd: %m");
                        _exit(EXIT_FAILURE);
                }

                if (pipefd[0] != STDIN_FILENO)
                        pipefd[0] = safe_close(pipefd[0]);

                null_fd = open("/dev/null", O_WRONLY|O_NOCTTY);
                if (null_fd < 0) {
                        log_error_errno(errno, "Failed to open /dev/null: %m");
                        _exit(EXIT_FAILURE);
                }

                if (dup2(null_fd, STDOUT_FILENO) != STDOUT_FILENO) {
                        log_error_errno(errno, "Failed to dup2() fd: %m");
                        _exit(EXIT_FAILURE);
                }

                if (null_fd != STDOUT_FILENO)
                        null_fd = safe_close(null_fd);

                fd_cloexec(STDIN_FILENO, false);
                fd_cloexec(STDOUT_FILENO, false);
                fd_cloexec(STDERR_FILENO, false);

                if (unshare(CLONE_NEWNET) < 0)
                        log_error_errno(errno, "Failed to lock tar into network namespace, ignoring: %m");

                r = capability_bounding_set_drop(~retain, true);
                if (r < 0)
                        log_error_errno(r, "Failed to drop capabilities, ignoring: %m");

                execlp("tar", "tar", "--numeric-owner", "-C", path, "-px", NULL);
                log_error_errno(errno, "Failed to execute tar: %m");
                _exit(EXIT_FAILURE);
        }

        pipefd[0] = safe_close(pipefd[0]);
        r = pipefd[1];
        pipefd[1] = -1;

        *ret = pid;

        return r;
}
Exemple #5
0
int main(gint argc, gchar **argv) {
  const gchar *self = *argv++;

  if (argc < 2) {
    g_message("%s command [arguments...]", self);
    return 1;
  }

  g_autofree const gchar *prefix = create_tmpdir();

  pid_t cpid = fork();

  if (cpid < 0)
    fail("fork", errno);

  else if (cpid == 0) {
    uid_t uid = getuid();
    gid_t gid = getgid();

    if (unshare(CLONE_NEWNS | CLONE_NEWUSER) < 0) {
      int unshare_errno = errno;

      g_message("Requires Linux version >= 3.19 built with CONFIG_USER_NS");
      if (g_file_test("/proc/sys/kernel/unprivileged_userns_clone",
                      G_FILE_TEST_EXISTS))
        g_message("Run: sudo sysctl -w kernel.unprivileged_userns_clone=1");

      fail("unshare", unshare_errno);
    }

    spit("/proc/self/setgroups", "deny");
    spit("/proc/self/uid_map", "%d %d 1", uid, uid);
    spit("/proc/self/gid_map", "%d %d 1", gid, gid);

    // If there is a /host directory, assume this is nested chrootenv and use it as host instead.
    gboolean nested_host = g_file_test("/host", G_FILE_TEST_EXISTS | G_FILE_TEST_IS_DIR);
    g_autofree const gchar *host = nested_host ? "/host" : "/";

    bind(host, prefix);

    // Replace /host by an actual (inner) /host.
    if (nested_host) {
      fail_if(g_mkdir("/real-host", 0755));
      fail_if(mount("/host/host", "/real-host", NULL, MS_BIND | MS_REC, NULL));
      // For some reason umount("/host") returns EBUSY even immediately after
      // pivot_root. We detach it at least to keep `/proc/mounts` from blowing
      // up in nested cases.
      fail_if(umount2("/host", MNT_DETACH));
      fail_if(mount("/real-host", "/host", NULL, MS_MOVE, NULL));
      fail_if(rmdir("/real-host"));
    }

    fail_if(chdir("/"));
    fail_if(execvp(*argv, argv));
  }

  else {
    int status;

    fail_if(waitpid(cpid, &status, 0) != cpid);
    fail_if(rmdir(prefix));

    if (WIFEXITED(status))
      return WEXITSTATUS(status);

    else if (WIFSIGNALED(status))
      kill(getpid(), WTERMSIG(status));

    return 1;
  }
}
Exemple #6
0
int main(int argc, char **argv)
{
	int ret = -1, fd, status;
	char path[PATH_MAX];
	pid_t pid;

	if (!getenv("ZDTM_NEWNS")) {
		if (mount_and_add(cgname, "test") < 0)
			return -1;

		if (unshare(CLONE_NEWCGROUP) < 0) {
			pr_perror("unshare");
			goto out;
		}
	}

	test_init(argc, argv);

	test_daemon();
	test_waitsig();

	sprintf(path, "name=%s", cgname);

	/* first check that the task is in zdtmtst:/ */
	if (!pid_in_cgroup(getpid(), path, "/")) {
		fail("pid not in cgroup /");
		goto out;
	}

	/* now check that the task is in the right place in a ns by setnsing to
	 * someone else's ns and looking there.
	 */
	pid = fork();
	if (pid < 0) {
		pr_perror("fork");
		goto out;
	}

	if (pid == 0) {
		sprintf(path, "/proc/%d/ns/cgroup", 1);
		fd = open(path, O_RDONLY);
		if (fd < 0) {
			pr_perror("open");
			exit(1);
		}

		ret = setns(fd, CLONE_NEWCGROUP);
		close(fd);
		if (ret < 0) {
			pr_perror("setns");
			exit(1);
		}

		sprintf(path, "name=%s", cgname);
		if (!pid_in_cgroup(getppid(), path, "/test")) {
			fail("pid not in cgroup %s", path);
			exit(1);
		}

		exit(0);
	}

	if (pid != waitpid(pid, &status, 0)) {
		pr_err("wrong pid");
		goto out;
	}

	if (!WIFEXITED(status) || WEXITSTATUS(status)) {
		pr_err("got bad exit status %d\n", status);
		goto out;
	}

	ret = 0;
	pass();

out:
	sprintf(path, "%s/%s/test", dirname, cgname);
	rmdir(path);
	sprintf(path, "%s/%s", dirname, cgname);
	umount(path);
	rmdir(path);
	rmdir(dirname);
	return ret;
}
Exemple #7
0
int main(int argc, char *argv[])
{
    int c;
    int fd;
    char path[PATH_MAX];
    int nsid;
    int pid;
    char *cwd = get_current_dir_name();

    static struct sched_param sp;
    while ((c = getopt(argc, argv, "+cdnpa:g:r:vh")) != -1)
        switch(c) {
        case 'c':
            /* close file descriptors except stdin/out/error */
            for (fd = getdtablesize(); fd > 2; fd--)
                close(fd);
            break;
        case 'd':
            /* detach from tty */
            if (getpgrp() == getpid()) {
                switch(fork()) {
                    case -1:
                        perror("fork");
                        return 1;
                    case 0:     /* child */
                        break;
                    default:    /* parent */
                        return 0;
                }
            }
            setsid();
            break;
        case 'n':
            /* run in network and mount namespaces */
            if (unshare(CLONE_NEWNET|CLONE_NEWNS) == -1) {
                perror("unshare");
                return 1;
            }
            /* mount sysfs to pick up the new network namespace */
            if (mount("sysfs", "/sys", "sysfs", MS_MGC_VAL, NULL) == -1) {
                perror("mount");
                return 1;
            }
            break;
        case 'p':
            /* print pid */
            printf("\001%d\n", getpid());
            fflush(stdout);
            break;
        case 'a':
            /* Attach to pid's network namespace and mount namespace */
            pid = atoi(optarg);
            sprintf(path, "/proc/%d/ns/net", pid);
            nsid = open(path, O_RDONLY);
            if (nsid < 0) {
                perror(path);
                return 1;
            }
            if (setns(nsid, 0) != 0) {
                perror("setns");
                return 1;
            }
            /* Plan A: call setns() to attach to mount namespace */
            sprintf(path, "/proc/%d/ns/mnt", pid);
            nsid = open(path, O_RDONLY);
            if (nsid < 0 || setns(nsid, 0) != 0) {
                /* Plan B: chroot/chdir into pid's root file system */
                sprintf(path, "/proc/%d/root", pid);
                if (chroot(path) < 0) {
                    perror(path);
                    return 1;
                }
            }
            /* chdir to correct working directory */
            if (chdir(cwd) != 0) {
                perror(cwd);
                return 1;
            }
            break;
        case 'g':
            /* Attach to cgroup */
            cgroup(optarg);
            break;
        case 'r':
            /* Set RT scheduling priority */
            sp.sched_priority = atoi(optarg);
            if (sched_setscheduler(getpid(), SCHED_RR, &sp) < 0) {
                perror("sched_setscheduler");
                return 1;
            }
            break;
        case 'v':
            printf("%s\n", VERSION);
            exit(0);
        case 'h':
            usage(argv[0]);
            exit(0);
        default:
            usage(argv[0]);
            exit(1);
        }

    if (optind < argc) {
        execvp(argv[optind], &argv[optind]);
        perror(argv[optind]);
        return 1;
    }

    usage(argv[0]);

    return 0;
}
int sc_create_or_join_ns_group(struct sc_ns_group *group,
			       struct sc_apparmor *apparmor,
			       const char *base_snap_name,
			       const char *snap_name)
{
	// Open the mount namespace file.
	char mnt_fname[PATH_MAX] = { 0 };
	sc_must_snprintf(mnt_fname, sizeof mnt_fname, "%s%s", group->name,
			 SC_NS_MNT_FILE);
	int mnt_fd SC_CLEANUP(sc_cleanup_close) = -1;
	// NOTE: There is no O_EXCL here because the file can be around but
	// doesn't have to be a mounted namespace.
	//
	// If the mounted namespace is discarded with
	// sc_discard_preserved_ns_group() it will revert to a regular file.  If
	// snap-confine is killed for whatever reason after the file is created but
	// before the file is bind-mounted it will also be a regular file.
	mnt_fd = openat(group->dir_fd, mnt_fname,
			O_CREAT | O_RDONLY | O_CLOEXEC | O_NOFOLLOW, 0600);
	if (mnt_fd < 0) {
		die("cannot open mount namespace file for namespace group %s",
		    group->name);
	}
	// Check if we got an nsfs-based or procfs file or a regular file. This can
	// be reliably tested because nsfs has an unique filesystem type
	// NSFS_MAGIC.  On older kernels that don't support nsfs yet we can look
	// for PROC_SUPER_MAGIC instead. 
	// We can just ensure that this is the case thanks to fstatfs.
	struct statfs ns_statfs_buf;
	if (fstatfs(mnt_fd, &ns_statfs_buf) < 0) {
		die("cannot perform fstatfs() on the mount namespace file descriptor");
	}
	// Stat the mount namespace as well, this is later used to check if the
	// namespace is used by other processes if we are considering discarding a
	// stale namespace.
	struct stat ns_stat_buf;
	if (fstat(mnt_fd, &ns_stat_buf) < 0) {
		die("cannot perform fstat() on the mount namespace file descriptor");
	}
#ifndef NSFS_MAGIC
// Account for kernel headers old enough to not know about NSFS_MAGIC.
#define NSFS_MAGIC 0x6e736673
#endif
	if (ns_statfs_buf.f_type == NSFS_MAGIC
	    || ns_statfs_buf.f_type == PROC_SUPER_MAGIC) {

		// Inspect and perhaps discard the preserved mount namespace.
		if (sc_inspect_and_maybe_discard_stale_ns
		    (mnt_fd, snap_name, base_snap_name) == EAGAIN) {
			return EAGAIN;
		}
		// Remember the vanilla working directory so that we may attempt to restore it later.
		char *vanilla_cwd SC_CLEANUP(sc_cleanup_string) = NULL;
		vanilla_cwd = get_current_dir_name();
		if (vanilla_cwd == NULL) {
			die("cannot get the current working directory");
		}
		// Move to the mount namespace of the snap we're trying to start.
		debug
		    ("attempting to re-associate the mount namespace with the namespace group %s",
		     group->name);
		if (setns(mnt_fd, CLONE_NEWNS) < 0) {
			die("cannot re-associate the mount namespace with namespace group %s", group->name);
		}
		debug
		    ("successfully re-associated the mount namespace with the namespace group %s",
		     group->name);

		// Try to re-locate back to vanilla working directory. This can fail
		// because that directory is no longer present.
		if (chdir(vanilla_cwd) != 0) {
			debug
			    ("cannot remain in %s, moving to the void directory",
			     vanilla_cwd);
			if (chdir(SC_VOID_DIR) != 0) {
				die("cannot change directory to %s",
				    SC_VOID_DIR);
			}
			debug("successfully moved to %s", SC_VOID_DIR);
		}
		return 0;
	}
	debug("initializing new namespace group %s", group->name);
	// Create a new namespace and ask the caller to populate it.
	// For rationale of forking see this:
	// https://lists.linuxfoundation.org/pipermail/containers/2013-August/033386.html
	//
	// The eventfd created here is used to synchronize the child and the parent
	// processes. It effectively tells the child to perform the capture
	// operation.
	group->event_fd = eventfd(0, EFD_CLOEXEC);
	if (group->event_fd < 0) {
		die("cannot create eventfd for mount namespace capture");
	}
	debug("forking support process for mount namespace capture");
	// Store the PID of the "parent" process. This done instead of calls to
	// getppid() because then we can reliably track the PID of the parent even
	// if the child process is re-parented.
	pid_t parent = getpid();
	// Glibc defines pid as a signed 32bit integer. There's no standard way to
	// print pid's portably so this is the best we can do.
	pid_t pid = fork();
	debug("forked support process has pid %d", (int)pid);
	if (pid < 0) {
		die("cannot fork support process for mount namespace capture");
	}
	if (pid == 0) {
		// This is the child process which will capture the mount namespace.
		//
		// It will do so by bind-mounting the SC_NS_MNT_FILE after the parent
		// process calls unshare() and finishes setting up the namespace
		// completely.
		// Change the hat to a sub-profile that has limited permissions
		// necessary to accomplish the capture of the mount namespace.
		debug
		    ("changing apparmor hat of the support process for mount namespace capture");
		sc_maybe_aa_change_hat(apparmor,
				       "mount-namespace-capture-helper", 0);
		// Configure the child to die as soon as the parent dies. In an odd
		// case where the parent is killed then we don't want to complete our
		// task or wait for anything.
		if (prctl(PR_SET_PDEATHSIG, SIGINT, 0, 0, 0) < 0) {
			die("cannot set parent process death notification signal to SIGINT");
		}
		// Check that parent process is still alive. If this is the case then
		// we can *almost* reliably rely on the PR_SET_PDEATHSIG signal to wake
		// us up from eventfd_read() below. In the rare case that the PID numbers
		// overflow and the now-dead parent PID is recycled we will still hang
		// forever on the read from eventfd below.
		debug("ensuring that parent process is still alive");
		if (kill(parent, 0) < 0) {
			switch (errno) {
			case ESRCH:
				debug("parent process has already terminated");
				abort();
			default:
				die("cannot ensure that parent process is still alive");
				break;
			}
		}
		if (fchdir(group->dir_fd) < 0) {
			die("cannot move process for mount namespace capture to namespace group directory");
		}
		debug
		    ("waiting for a eventfd data from the parent process to continue");
		eventfd_t value = 0;
		sc_enable_sanity_timeout();
		if (eventfd_read(group->event_fd, &value) < 0) {
			die("cannot read expected data from eventfd");
		}
		sc_disable_sanity_timeout();
		debug
		    ("capturing mount namespace of process %d in namespace group %s",
		     (int)parent, group->name);
		char src[PATH_MAX] = { 0 };
		char dst[PATH_MAX] = { 0 };
		sc_must_snprintf(src, sizeof src, "/proc/%d/ns/mnt",
				 (int)parent);
		sc_must_snprintf(dst, sizeof dst, "%s%s", group->name,
				 SC_NS_MNT_FILE);
		if (mount(src, dst, NULL, MS_BIND, NULL) < 0) {
			die("cannot bind-mount the mount namespace file %s -> %s", src, dst);
		}
		debug
		    ("successfully captured mount namespace in namespace group %s",
		     group->name);
		exit(0);
	} else {
		group->child = pid;
		// Unshare the mount namespace and set a flag instructing the caller that 
		// the namespace is pristine and needs to be populated now.
		debug("unsharing the mount namespace");
		if (unshare(CLONE_NEWNS) < 0) {
			die("cannot unshare the mount namespace");
		}
		group->should_populate = true;
	}
	return 0;
}
Exemple #9
0
static int test_pidfd_send_signal_recycled_pid_fail(void)
{
	int i, ret;
	pid_t pid1;
	const char *test_name = "pidfd_send_signal signal recycled pid";

	ret = unshare(CLONE_NEWPID);
	if (ret < 0)
		ksft_exit_fail_msg("%s test: Failed to unshare pid namespace\n",
				   test_name);

	ret = unshare(CLONE_NEWNS);
	if (ret < 0)
		ksft_exit_fail_msg(
			"%s test: Failed to unshare mount namespace\n",
			test_name);

	ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0);
	if (ret < 0)
		ksft_exit_fail_msg("%s test: Failed to remount / private\n",
				   test_name);

	/* pid 1 in new pid namespace */
	pid1 = fork();
	if (pid1 < 0)
		ksft_exit_fail_msg("%s test: Failed to create new process\n",
				   test_name);

	if (pid1 == 0) {
		char buf[256];
		pid_t pid2;
		int pidfd = -1;

		(void)umount2("/proc", MNT_DETACH);
		ret = mount("proc", "/proc", "proc", 0, NULL);
		if (ret < 0)
			_exit(PIDFD_ERROR);

		/* grab pid PID_RECYCLE */
		for (i = 0; i <= PIDFD_MAX_DEFAULT; i++) {
			pid2 = fork();
			if (pid2 < 0)
				_exit(PIDFD_ERROR);

			if (pid2 == 0)
				_exit(PIDFD_PASS);

			if (pid2 == PID_RECYCLE) {
				snprintf(buf, sizeof(buf), "/proc/%d", pid2);
				ksft_print_msg("pid to recycle is %d\n", pid2);
				pidfd = open(buf, O_DIRECTORY | O_CLOEXEC);
			}

			if (wait_for_pid(pid2))
				_exit(PIDFD_ERROR);

			if (pid2 >= PID_RECYCLE)
				break;
		}

		/*
		 * We want to be as predictable as we can so if we haven't been
		 * able to grab pid PID_RECYCLE skip the test.
		 */
		if (pid2 != PID_RECYCLE) {
			/* skip test */
			close(pidfd);
			_exit(PIDFD_SKIP);
		}

		if (pidfd < 0)
			_exit(PIDFD_ERROR);

		for (i = 0; i <= PIDFD_MAX_DEFAULT; i++) {
			char c;
			int pipe_fds[2];
			pid_t recycled_pid;
			int child_ret = PIDFD_PASS;

			ret = pipe2(pipe_fds, O_CLOEXEC);
			if (ret < 0)
				_exit(PIDFD_ERROR);

			recycled_pid = fork();
			if (recycled_pid < 0)
				_exit(PIDFD_ERROR);

			if (recycled_pid == 0) {
				close(pipe_fds[1]);
				(void)read(pipe_fds[0], &c, 1);
				close(pipe_fds[0]);

				_exit(PIDFD_PASS);
			}

			/*
			 * Stop the child so we can inspect whether we have
			 * recycled pid PID_RECYCLE.
			 */
			close(pipe_fds[0]);
			ret = kill(recycled_pid, SIGSTOP);
			close(pipe_fds[1]);
			if (ret) {
				(void)wait_for_pid(recycled_pid);
				_exit(PIDFD_ERROR);
			}

			/*
			 * We have recycled the pid. Try to signal it. This
			 * needs to fail since this is a different process than
			 * the one the pidfd refers to.
			 */
			if (recycled_pid == PID_RECYCLE) {
				ret = sys_pidfd_send_signal(pidfd, SIGCONT,
							    NULL, 0);
				if (ret && errno == ESRCH)
					child_ret = PIDFD_XFAIL;
				else
					child_ret = PIDFD_FAIL;
			}

			/* let the process move on */
			ret = kill(recycled_pid, SIGCONT);
			if (ret)
				(void)kill(recycled_pid, SIGKILL);

			if (wait_for_pid(recycled_pid))
				_exit(PIDFD_ERROR);

			switch (child_ret) {
			case PIDFD_FAIL:
				/* fallthrough */
			case PIDFD_XFAIL:
				_exit(child_ret);
			case PIDFD_PASS:
				break;
			default:
				/* not reached */
				_exit(PIDFD_ERROR);
			}

			/*
			 * If the user set a custom pid_max limit we could be
			 * in the millions.
			 * Skip the test in this case.
			 */
			if (recycled_pid > PIDFD_MAX_DEFAULT)
				_exit(PIDFD_SKIP);
		}

		/* failed to recycle pid */
		_exit(PIDFD_SKIP);
	}

	ret = wait_for_pid(pid1);
	switch (ret) {
	case PIDFD_FAIL:
		ksft_exit_fail_msg(
			"%s test: Managed to signal recycled pid %d\n",
			test_name, PID_RECYCLE);
	case PIDFD_PASS:
		ksft_exit_fail_msg("%s test: Failed to recycle pid %d\n",
				   test_name, PID_RECYCLE);
	case PIDFD_SKIP:
		ksft_print_msg("%s test: Skipping test\n", test_name);
		ret = 0;
		break;
	case PIDFD_XFAIL:
		ksft_test_result_pass(
			"%s test: Failed to signal recycled pid as expected\n",
			test_name);
		ret = 0;
		break;
	default /* PIDFD_ERROR */:
		ksft_exit_fail_msg("%s test: Error while running tests\n",
				   test_name);
	}

	return ret;
}
Exemple #10
0
int main(int argc, char ** argv) {
    FILE *containerimage_fp;
    FILE *loop_fp;
    FILE *config_fp;
    char *containerimage;
    char *containername;
    char *containerpath;
    char *username;
    char *command;
    char *tmpdir;
    char *loop_dev_lock;
    char *loop_dev_cache;
    char *loop_dev = 0;
    char *config_path;
    char *tmp_config_string;
    char cwd[PATH_MAX];
    int cwd_fd;
    int tmpdirlock_fd;
    int containerimage_fd;
    int loop_dev_lock_fd;
    int gid_list_count;
    int retval = 0;
    uid_t uid;
    gid_t gid;
    gid_t *gid_list;
    pid_t namespace_fork_pid = 0;
    struct passwd *pw;


//****************************************************************************//
// Init
//****************************************************************************//

    signal(SIGINT, sighandler);
    signal(SIGKILL, sighandler);
    signal(SIGQUIT, sighandler);

    openlog("Singularity", LOG_CONS | LOG_NDELAY, LOG_LOCAL0);

    // Get all user/group info
    uid = getuid();
    gid = getgid();
    gid_list_count = getgroups(0, NULL);
    gid_list = (gid_t *) malloc(sizeof(gid_t) * gid_list_count);
    if ( getgroups(gid_list_count, gid_list) < 0 ) {
        fprintf(stderr, "ABORT: Could not obtain current supplementary group list: %s\n", strerror(errno));
        return(255);
    }
    pw = getpwuid(uid);

    // Check to make sure we are installed correctly
    if ( seteuid(0) < 0 ) {
        fprintf(stderr, "ABORT: Check installation, must be performed by root.\n");
        return(255);
    }

    // Lets start off as the calling UID
    if ( seteuid(uid) < 0 ) {
        fprintf(stderr, "ABORT: Could not set effective uid to %d: %s\n", uid, strerror(errno));
        return(255);
    }
    if ( setegid(gid) < 0 ) {
        fprintf(stderr, "ABORT: Could not set effective gid to %d: %s\n", gid, strerror(errno));
        return(255);
    }

    username = pw->pw_name;
    containerimage = getenv("SINGULARITY_IMAGE");
    command = getenv("SINGULARITY_COMMAND");

    unsetenv("SINGULARITY_COMMAND");
    unsetenv("SINGULARITY_EXEC");

    config_path = (char *) malloc(strlen(SYSCONFDIR) + 30);
    snprintf(config_path, strlen(SYSCONFDIR) + 30, "%s/singularity/singularity.conf", SYSCONFDIR);

    // Figure out where we start
    if ( (cwd_fd = open(".", O_RDONLY)) < 0 ) {
        fprintf(stderr, "ABORT: Could not open cwd fd (%s)!\n", strerror(errno));
        return(1);
    }
    if ( getcwd(cwd, PATH_MAX) == NULL ) {
        fprintf(stderr, "Could not obtain current directory path: %s\n", strerror(errno));
        return(1);
    }

    if ( containerimage == NULL ) {
        fprintf(stderr, "ABORT: SINGULARITY_IMAGE undefined!\n");
        return(1);
    }

    if ( is_file(containerimage) != 0 ) {
        fprintf(stderr, "ABORT: Container image path is invalid: %s\n", containerimage);
        return(1);
    }

    if ( is_file(config_path) != 0 ) {
        fprintf(stderr, "ABORT: Configuration file not found: %s\n", config_path);
        return(255);
    }

    if ( is_owner(config_path, 0) != 0 ) {
        fprintf(stderr, "ABORT: Configuration file is not owned by root: %s\n", config_path);
        return(255);
    }

    // TODO: Offer option to only run containers owned by root (so root can approve
    // containers)
    if ( uid == 0 && is_owner(containerimage, 0) < 0 ) {
        fprintf(stderr, "ABORT: Root should only run containers that root owns!\n");
        return(1);
    }

    containername = basename(strdup(containerimage));

    tmpdir = strjoin("/tmp/.singularity-", file_id(containerimage));
    loop_dev_lock = joinpath(tmpdir, "loop_dev.lock");
    loop_dev_cache = joinpath(tmpdir, "loop_dev");

    containerpath = (char *) malloc(strlen(tmpdir) + 5);
    snprintf(containerpath, strlen(tmpdir) + 5, "%s/mnt", tmpdir);

    syslog(LOG_NOTICE, "User=%s[%d], Command=%s, Container=%s, CWD=%s, Arg1=%s", username, uid, command, containerimage, cwd, argv[1]);


//****************************************************************************//
// Setup
//****************************************************************************//


    if ( ( config_fp = fopen(config_path, "r") ) == NULL ) {
        fprintf(stderr, "ERROR: Could not open config file %s: %s\n", config_path, strerror(errno));
        return(255);
    }

    if ( getenv("SINGULARITY_WRITABLE") == NULL ) {
        if ( ( containerimage_fp = fopen(containerimage, "r") ) == NULL ) {
            fprintf(stderr, "ERROR: Could not open image read only %s: %s\n", containerimage, strerror(errno));
            return(255);
        }
        containerimage_fd = fileno(containerimage_fp);
        if ( flock(containerimage_fd, LOCK_SH | LOCK_NB) < 0 ) {
            fprintf(stderr, "ABORT: Image is locked by another process\n");
            return(5);
        }
    } else {
        if ( ( containerimage_fp = fopen(containerimage, "r+") ) == NULL ) {
            fprintf(stderr, "ERROR: Could not open image read/write %s: %s\n", containerimage, strerror(errno));
            return(255);
        }
        containerimage_fd = fileno(containerimage_fp);
        if ( flock(containerimage_fd, LOCK_EX | LOCK_NB) < 0 ) {
            fprintf(stderr, "ABORT: Image is locked by another process\n");
            return(5);
        }
    }


//****************************************************************************//
// We are now running with escalated privileges until we exec
//****************************************************************************//

    if ( seteuid(0) < 0 ) {
        fprintf(stderr, "ABORT: Could not escalate effective user privileges %s\n", strerror(errno));
        return(255);
    }
    if ( setegid(0) < 0 ) {
        fprintf(stderr, "ABORT: Could not escalate effective group privileges: %s\n", strerror(errno));
        return(255);
    }

    if ( s_mkpath(tmpdir, 0755) < 0 ) {
        fprintf(stderr, "ABORT: Could not create temporary directory %s: %s\n", tmpdir, strerror(errno));
        return(255);
    }

    if ( is_owner(tmpdir, 0) < 0 ) {
        fprintf(stderr, "ABORT: Container working directory has wrong ownership: %s\n", tmpdir);
        syslog(LOG_ERR, "Container working directory has wrong ownership: %s", tmpdir);
        return(255);
    }

    tmpdirlock_fd = open(tmpdir, O_RDONLY);
    if ( tmpdirlock_fd < 0 ) {
        fprintf(stderr, "ERROR: Could not obtain file descriptor on %s: %s\n", tmpdir, strerror(errno));
        return(255);
    }

    if ( flock(tmpdirlock_fd, LOCK_SH | LOCK_NB) < 0 ) {
        fprintf(stderr, "ERROR: Could not obtain shared lock on %s: %s\n", tmpdir, strerror(errno));
        return(255);
    }

    if ( ( loop_dev_lock_fd = open(loop_dev_lock, O_CREAT | O_RDWR, 0644) ) < 0 ) {
        fprintf(stderr, "ERROR: Could not open loop_dev_lock %s: %s\n", loop_dev_lock, strerror(errno));
        return(255);
    }

    if ( s_mkpath(containerpath, 0755) < 0 ) {
        fprintf(stderr, "ABORT: Could not create directory %s: %s\n", containerpath, strerror(errno));
        return(255);
    }

    if ( is_owner(containerpath, 0) < 0 ) {
        fprintf(stderr, "ABORT: Container directory is not root owned: %s\n", containerpath);
        syslog(LOG_ERR, "Container directory has wrong ownership: %s", tmpdir);
        return(255);
    }

    if ( flock(loop_dev_lock_fd, LOCK_EX | LOCK_NB) == 0 ) {
        loop_dev = obtain_loop_dev();

        if ( ( loop_fp = fopen(loop_dev, "r+") ) < 0 ) {
            fprintf(stderr, "ERROR: Failed to open loop device %s: %s\n", loop_dev, strerror(errno));
            syslog(LOG_ERR, "Failed to open loop device %s: %s", loop_dev, strerror(errno));
            return(255);
        }

        if ( associate_loop(containerimage_fp, loop_fp, 1) < 0 ) {
            fprintf(stderr, "ERROR: Could not associate %s to loop device %s\n", containerimage, loop_dev);
            syslog(LOG_ERR, "Failed to associate %s to loop device %s", containerimage, loop_dev);
            return(255);
        }

        if ( fileput(loop_dev_cache, loop_dev) < 0 ) {
            fprintf(stderr, "ERROR: Could not write to loop_dev_cache %s: %s\n", loop_dev_cache, strerror(errno));
            return(255);
        }

        flock(loop_dev_lock_fd, LOCK_SH | LOCK_NB);

    } else {
        flock(loop_dev_lock_fd, LOCK_SH);
        if ( ( loop_dev = filecat(loop_dev_cache) ) == NULL ) {
            fprintf(stderr, "ERROR: Could not retrieve loop_dev_cache from %s\n", loop_dev_cache);
            return(255);
        }

        if ( ( loop_fp = fopen(loop_dev, "r") ) < 0 ) {
            fprintf(stderr, "ERROR: Failed to open loop device %s: %s\n", loop_dev, strerror(errno));
            return(255);
        }
    }



//****************************************************************************//
// Management fork
//****************************************************************************//

    namespace_fork_pid = fork();
    if ( namespace_fork_pid == 0 ) {

//****************************************************************************//
// Setup namespaces
//****************************************************************************//

        if ( unshare(CLONE_NEWNS) < 0 ) {
            fprintf(stderr, "ABORT: Could not virtualize mount namespace: %s\n", strerror(errno));
            return(255);
        }

        // Privatize the mount namespaces (thank you for the pointer Doug Jacobsen!)
        if ( mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0 ) {
            // I am not sure if this error needs to be caught, maybe it will fail
            // on older kernels? If so, we can fix then.
            fprintf(stderr, "ABORT: Could not make mountspaces private: %s\n", strerror(errno));
            return(255);
        }

#ifdef NS_CLONE_NEWPID
        if ( getenv("SINGULARITY_NO_NAMESPACE_PID") == NULL ) {
            unsetenv("SINGULARITY_NO_NAMESPACE_PID");
            if ( unshare(CLONE_NEWPID) < 0 ) {
                fprintf(stderr, "ABORT: Could not virtualize PID namespace: %s\n", strerror(errno));
                return(255);
            }
        }
#else
#ifdef NS_CLONE_PID
        // This is for older legacy CLONE_PID
        if ( getenv("SINGULARITY_NO_NAMESPACE_PID") == NULL ) {
            unsetenv("SINGULARITY_NO_NAMESPACE_PID");
            if ( unshare(CLONE_PID) < 0 ) {
                fprintf(stderr, "ABORT: Could not virtualize PID namespace: %s\n", strerror(errno));
                return(255);
            }
        }
#endif
#endif
#ifdef NS_CLONE_FS
        if ( getenv("SINGULARITY_NO_NAMESPACE_FS") == NULL ) {
            unsetenv("SINGULARITY_NO_NAMESPACE_FS");
            if ( unshare(CLONE_FS) < 0 ) {
                fprintf(stderr, "ABORT: Could not virtualize file system namespace: %s\n", strerror(errno));
                return(255);
            }
        }
#endif
#ifdef NS_CLONE_FILES
        if ( getenv("SINGULARITY_NO_NAMESPACE_FILES") == NULL ) {
            unsetenv("SINGULARITY_NO_NAMESPACE_FILES");
            if ( unshare(CLONE_FILES) < 0 ) {
                fprintf(stderr, "ABORT: Could not virtualize file descriptor namespace: %s\n", strerror(errno));
                return(255);
            }
        }
#endif


//****************************************************************************//
// Mount image
//****************************************************************************//

        if ( getenv("SINGULARITY_WRITABLE") == NULL ) {
            unsetenv("SINGULARITY_WRITABLE");
            if ( mount_image(loop_dev, containerpath, 0) < 0 ) {
                fprintf(stderr, "ABORT: exiting...\n");
                return(255);
            }
        } else {
            if ( mount_image(loop_dev, containerpath, 1) < 0 ) {
                fprintf(stderr, "ABORT: exiting...\n");
                return(255);
            }
        }


//****************************************************************************//
// Check image
//****************************************************************************//

        if ( is_exec(joinpath(containerpath, "/bin/sh")) < 0 ) {
            fprintf(stderr, "ERROR: Container image does not have a valid /bin/sh\n");
            return(1);
        }


//****************************************************************************//
// Bind mounts
//****************************************************************************//

        if ( getenv("SINGULARITY_CONTAIN") == NULL ) {
            unsetenv("SINGULARITY_CONTAIN");
    
            rewind(config_fp);
            while ( ( tmp_config_string = config_get_key_value(config_fp, "bind path") ) != NULL ) {
                if ( ( is_file(tmp_config_string) != 0 ) && ( is_dir(tmp_config_string) != 0 ) ) {
                    fprintf(stderr, "ERROR: Non existant bind source path: '%s'\n", tmp_config_string);
                    continue;
                }
                if ( ( is_file(joinpath(containerpath, tmp_config_string)) != 0 ) && ( is_dir(joinpath(containerpath, tmp_config_string)) != 0 ) ) {
                    fprintf(stderr, "WARNING: Non existant bind container destination path: '%s'\n", tmp_config_string);
                    continue;
                }
                if ( mount_bind(tmp_config_string, joinpath(containerpath, tmp_config_string), 0) < 0 ) {
                    fprintf(stderr, "ABORTING!\n");
                    return(255);
                }
            }


            if (is_file(joinpath(containerpath, "/etc/nsswitch.conf")) == 0 ) {
                if ( is_file(joinpath(SYSCONFDIR, "/singularity/default-nsswitch.conf")) == 0 ) {
                    if ( mount_bind(joinpath(SYSCONFDIR, "/singularity/default-nsswitch.conf"), joinpath(containerpath, "/etc/nsswitch.conf"), 0) != 0 ) {
                        fprintf(stderr, "ABORT: Could not bind /etc/nsswitch.conf\n");
                        return(255);
                    }
                } else {
                    fprintf(stderr, "WARNING: Template /etc/nsswitch.conf does not exist: %s\n", joinpath(SYSCONFDIR, "/singularity/default-nsswitch.conf"));
                }
            }

            if ( uid != 0 ) { // If we are root, no need to mess with passwd or group
                if (is_file(joinpath(containerpath, "/etc/passwd")) == 0 ) {
                    if ( is_file(joinpath(tmpdir, "/passwd")) < 0 ) {
                        if ( build_passwd(joinpath(containerpath, "/etc/passwd"), joinpath(tmpdir, "/passwd")) < 0 ) {
                            fprintf(stderr, "ABORT: Failed creating template password file\n");
                            return(255);
                        }
                    }
                    if ( mount_bind(joinpath(tmpdir, "/passwd"), joinpath(containerpath, "/etc/passwd"), 0) < 0 ) {
                        fprintf(stderr, "ABORT: Could not bind /etc/passwd\n");
                        return(255);
                    }
                }
    
                if (is_file(joinpath(containerpath, "/etc/group")) == 0 ) {
                    if ( is_file(joinpath(tmpdir, "/group")) < 0 ) {
                        if ( build_group(joinpath(containerpath, "/etc/group"), joinpath(tmpdir, "/group")) < 0 ) {
                            fprintf(stderr, "ABORT: Failed creating template group file\n");
                            return(255);
                        }
                    }
                    if ( mount_bind(joinpath(tmpdir, "/group"), joinpath(containerpath, "/etc/group"), 0) < 0 ) {
                        fprintf(stderr, "ABORT: Could not bind /etc/group\n");
                        return(255);
                    }
                }
            }
        }

//****************************************************************************//
// Fork child in new namespaces
//****************************************************************************//

        exec_fork_pid = fork();

        if ( exec_fork_pid == 0 ) {


//****************************************************************************//
// Enter the file system
//****************************************************************************//

            if ( chroot(containerpath) < 0 ) {
                fprintf(stderr, "ABORT: failed enter CONTAINERIMAGE: %s\n", containerpath);
                return(255);
            }
            if ( chdir("/") < 0 ) {
                fprintf(stderr, "ABORT: Could not chdir after chroot to /: %s\n", strerror(errno));
                return(1);
            }


//****************************************************************************//
// Setup real mounts within the container
//****************************************************************************//

            rewind(config_fp);
            if ( config_get_key_bool(config_fp, "mount proc", 1) > 0 ) {
                if ( is_dir("/proc") == 0 ) {
                    if ( mount("proc", "/proc", "proc", 0, NULL) < 0 ) {
                        fprintf(stderr, "ABORT: Could not mount /proc: %s\n", strerror(errno));
                        return(255);
                    }
                }
            }

            rewind(config_fp);
            if ( config_get_key_bool(config_fp, "mount sys", 1) > 0 ) {
                if ( is_dir("/sys") == 0 ) {
                    if ( mount("sysfs", "/sys", "sysfs", 0, NULL) < 0 ) {
                        fprintf(stderr, "ABORT: Could not mount /sys: %s\n", strerror(errno));
                        return(255);
                    }
                }
            }


//****************************************************************************//
// Drop all privileges for good
//****************************************************************************//

            if ( setgroups(gid_list_count, gid_list) < 0 ) {
                fprintf(stderr, "ABOFT: Could not reset supplementary group list: %s\n", strerror(errno));
                return(255);
            }
            if ( setregid(gid, gid) < 0 ) {
                fprintf(stderr, "ABORT: Could not dump real and effective group privileges: %s\n", strerror(errno));
                return(255);
            }
            if ( setreuid(uid, uid) < 0 ) {
                fprintf(stderr, "ABORT: Could not dump real and effective user privileges: %s\n", strerror(errno));
                return(255);
            }


//****************************************************************************//
// Setup final environment
//****************************************************************************//

            // After this, we exist only within the container... Let's make it known!
            if ( setenv("SINGULARITY_CONTAINER", "true", 0) != 0 ) {
                fprintf(stderr, "ABORT: Could not set SINGULARITY_CONTAINER to 'true'\n");
                return(1);
            }

            if ( is_dir(cwd) == 0 ) {
               if ( chdir(cwd) < 0 ) {
                    fprintf(stderr, "ABORT: Could not chdir to: %s: %s\n", cwd, strerror(errno));
                    return(1);
                }
            } else {
                if ( fchdir(cwd_fd) < 0 ) {
                    fprintf(stderr, "ABORT: Could not fchdir to cwd: %s\n", strerror(errno));
                    return(1);
                }
            }


//****************************************************************************//
// Execv to container process
//****************************************************************************//

            if ( command == NULL ) {
                fprintf(stderr, "No command specified, launching 'shell'\n");
                command = strdup("shell");
            }

            if ( strcmp(command, "run") == 0 ) {
                if ( is_exec("/singularity") == 0 ) {
                    argv[0] = strdup("/singularity");
                    if ( execv("/singularity", argv) != 0 ) {
                        fprintf(stderr, "ABORT: exec of /bin/sh failed: %s\n", strerror(errno));
                    }
                } else {
                    fprintf(stderr, "No Singularity runscript found, launching 'shell'\n");
                    command = strdup("shell");
                }
            }
            
            if ( strcmp(command, "exec") == 0 ) {
                if ( argc <= 1 ) {
                    fprintf(stderr, "ABORT: Exec requires a command to run\n");
                    return(1);
                }
                if ( execvp(argv[1], &argv[1]) != 0 ) {
                    fprintf(stderr, "ABORT: execvp of '%s' failed: %s\n", argv[1], strerror(errno));
                    return(1);
                }
            }
            
            if ( strcmp(command, "shell") == 0 ) {
                char *prompt;

                prompt = (char *) malloc(strlen(containername) + 16);
                snprintf(prompt, strlen(containerimage) + 16, "Singularity/%s> ", containername);
                setenv("PS1", prompt, 1);

                if ( is_exec("/bin/bash") == 0 ) {
                    char *args[argc+2];
                    int i;

                    args[0] = strdup("/bin/bash");
                    args[1] = strdup("--norc");
                    args[2] = strdup("--noprofile");
                    for(i=1; i<=argc; i++) {
                        args[i+2] = argv[i];
                    }

                    if ( execv("/bin/bash", args) != 0 ) {
                        fprintf(stderr, "ABORT: exec of /bin/bash failed: %s\n", strerror(errno));
                    }
                } else {
                    argv[0] = strdup("/bin/sh");
                    if ( execv("/bin/sh", argv) != 0 ) {
                        fprintf(stderr, "ABORT: exec of /bin/sh failed: %s\n", strerror(errno));
                    }
                }
            }

            // If we get here... we fail on bad command
            fprintf(stderr, "ABORT: Unrecognized Singularity command: %s\n", command);
            return(1);


//****************************************************************************//
// Outer child waits for inner child
//****************************************************************************//

        } else if ( exec_fork_pid > 0 ) {
            int tmpstatus;

            strncpy(argv[0], "Singularity: exec", strlen(argv[0]));

            if ( seteuid(uid) < 0 ) {
                fprintf(stderr, "ABORT: Could not set effective user privileges to %d: %s\n", uid, strerror(errno));
                return(255);
            }

            waitpid(exec_fork_pid, &tmpstatus, 0);
            retval = WEXITSTATUS(tmpstatus);
        } else {
            fprintf(stderr, "ABORT: Could not fork namespace process: %s\n", strerror(errno));
            return(255);
        }
        return(retval);

    } else if ( namespace_fork_pid > 0 ) {
        int tmpstatus;

        strncpy(argv[0], "Singularity: namespace", strlen(argv[0]));

        if ( seteuid(uid) < 0 ) {
            fprintf(stderr, "ABORT: Could not set effective user privileges to %d: %s\n", uid, strerror(errno));
            return(255);
        }

        waitpid(namespace_fork_pid, &tmpstatus, 0);
        retval = WEXITSTATUS(tmpstatus);
    } else {
        fprintf(stderr, "ABORT: Could not fork management process: %s\n", strerror(errno));
        return(255);
    }


//****************************************************************************//
// Final wrap up before exiting
//****************************************************************************//


    if ( close(cwd_fd) < 0 ) {
        fprintf(stderr, "ERROR: Could not close cwd_fd: %s\n", strerror(errno));
        retval++;
    }

    if ( flock(tmpdirlock_fd, LOCK_EX | LOCK_NB) == 0 ) {
        close(tmpdirlock_fd);
        if ( seteuid(0) < 0 ) {
            fprintf(stderr, "ABORT: Could not re-escalate effective user privileges: %s\n", strerror(errno));
            return(255);
        }

        if ( s_rmdir(tmpdir) < 0 ) {
            fprintf(stderr, "WARNING: Could not remove all files in %s: %s\n", tmpdir, strerror(errno));
        }
    
        // Dissociate loops from here Just in case autoflush didn't work.
        (void)disassociate_loop(loop_fp);

        if ( seteuid(uid) < 0 ) {
            fprintf(stderr, "ABORT: Could not drop effective user privileges: %s\n", strerror(errno));
            return(255);
        }

    } else {
//        printf("Not removing tmpdir, lock still\n");
    }

    close(containerimage_fd);
    close(tmpdirlock_fd);

    free(loop_dev_lock);
    free(containerpath);
    free(tmpdir);
    closelog();

    return(retval);
}
Exemple #11
0
int main(int argc, char ** argv) {
    FILE *loop_fp;
    FILE *containerimage_fp;
    char *containerimage;
    char *mountpoint;
    char *loop_dev;
    int retval = 0;
    uid_t uid = geteuid();

    signal(SIGINT, sighandler);
    signal(SIGKILL, sighandler);
    signal(SIGQUIT, sighandler);


    if ( uid != 0 ) {
        message(ERROR, "Calling user must be root\n");
        ABORT(1);
    }

    if ( argv[1] == NULL || argv[2] == NULL ) {
        fprintf(stderr, "USAGE: %s [singularity container image] [mount point] (shell container args)\n", argv[0]);
        return(1);
    }

    containerimage = strdup(argv[1]);
    mountpoint = strdup(argv[2]);

    if ( is_file(containerimage) < 0 ) {
        message(ERROR, "Container image not found: %s\n", containerimage);
        ABORT(1);
    }

    if ( is_dir(mountpoint) < 0 ) {
        message(ERROR, "Mount point must be a directory: %s\n", mountpoint);
        ABORT(1);
    }

    message(DEBUG, "Opening container image: %s\n", containerimage);
    if ( ( containerimage_fp = fopen(containerimage, "r+") ) < 0 ) { // Flawfinder: ignore
        message(ERROR, "Could not open image %s: %s\n", containerimage, strerror(errno));
        ABORT(255);
    }

    message(DEBUG, "Binding container to loop interface\n");
    if ( ( loop_fp = loop_bind(containerimage_fp, &loop_dev, 1)) == NULL ) {
        message(ERROR, "Could not bind image to loop!\n");
        ABORT(255);
    }

    message(DEBUG, "Forking namespace child\n");
    namespace_fork_pid = fork();
    if ( namespace_fork_pid == 0 ) {

        if ( unshare(CLONE_NEWNS) < 0 ) {
            message(ERROR, "Could not virtualize mount namespace: %s\n", strerror(errno));
            ABORT(255);
        }

        if ( mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0 ) {
            message(ERROR, "Could not make mountspaces private: %s\n", strerror(errno));
            ABORT(255);
        }


        if ( mount_image(loop_dev, mountpoint, 1) < 0 ) {
            message(ERROR, "Failed mounting image...\n");
            ABORT(255);
        }

        message(DEBUG, "Forking exec child\n");
        exec_fork_pid = fork();
        if ( exec_fork_pid == 0 ) {

            argv[2] = strdup("/bin/bash");

            if ( execv("/bin/bash", &argv[2]) != 0 ) { // Flawfinder: ignore (exec* is necessary)
                message(ERROR, "Exec of /bin/bash failed: %s\n", strerror(errno));
            }
            // We should never get here, so if we do, make it an error
            return(-1);

        } else if ( exec_fork_pid > 0 ) {
            int tmpstatus;

            strncpy(argv[0], "Singularity: exec", strlen(argv[0])); // Flawfinder: ignore

            message(DEBUG, "Waiting for exec child to return\n");
            waitpid(exec_fork_pid, &tmpstatus, 0);
            retval = WEXITSTATUS(tmpstatus);

            message(DEBUG, "Exec child returned (RETVAL=%d)\n", retval);

            return(retval);
        } else {
            fprintf(stderr, "ABORT: Could not exec child process: %s\n", strerror(errno));
            retval++;
        }

    } else if ( namespace_fork_pid > 0 ) {
        int tmpstatus;

        strncpy(argv[0], "Singularity: namespace", strlen(argv[0])); // Flawfinder: ignore

        message(DEBUG, "Waiting for namespace child to return\n");
        waitpid(namespace_fork_pid, &tmpstatus, 0);

        retval = WEXITSTATUS(tmpstatus);
        message(DEBUG, "Namespace child returned (RETVAL=%d)\n", retval);

    } else {
        fprintf(stderr, "ABORT: Could not fork management process: %s\n", strerror(errno));
        return(255);
    }

    return(retval);
}
Exemple #12
0
int main(int argc, char *argv[]) {
    int c;
    int fd;
    char path[PATH_MAX];
    int pid;
    int detach = 0;
    int netns = 0;
    int mountns = 0;
    int mountnspid = 0;
    int pidns = 0;
    int printpid = 0;
    int mountprocfs = 0;
    static struct sched_param sp;
    while ((c = getopt(argc, argv, "+cdnmiufpa:b:k:j:g:r:vh")) != -1)
        switch (c) {
        case 'c':
            /* close file descriptors except stdin/out/error */
            for (fd = getdtablesize(); fd > 2; fd--)
                close(fd);
            break;
        case 'd':
            /* detach from tty */
            detach = 1; /* delay setsid() incase new PID namespace */
            break;
        case 'n':
            /* run in network namespace */
            if (unshare(CLONE_NEWNET) == -1) {
                perror("unshare");
                return 1;
            }
            netns = NET_NS_CREATE;
            break;
        case 'm':
            /* run in mount namespace */
            if (unshare(CLONE_NEWNS) == -1) {
                perror("unshare");
                return 1;
            }
            /* mount sysfs to pick up the new network namespace */
            mountns = MOUNT_NS_CREATE; /* delay mount of /sysfs */
            break;
        case 'i':
            /* run in new PID namespace */
            if (unshare(CLONE_NEWPID) == -1) {
                perror("unshare");
                return 1;
            }
            pidns = PID_NS_CREATE; /* record creation of PID namespace */
            break;
        case 'u':
            /* run in new UTS namespace */
            if (unshare(CLONE_NEWUTS) == -1) {
                perror("unshare");
                return 1;
            }
            break;
        case 'f':
            /* mount procfs (for new PID namespaces) */
            mountprocfs = TRUE; /* delay mounting proc until new NS established */
            break;
        case 'p':
            /* print pid */
            printpid = TRUE; /* delay printing PID until after NS procesisng*/
            break;
        case 'a':
            /* Attach to pid's network namespace */
            pid = atoi(optarg);
            sprintf(path, "/proc/%d/ns/net", pid);
            if (attachToNS(path) != 0) {
                return 1;
            }
            netns = NET_NS_JOIN;
            break;
        case 'b':
            /* Attach to pid's mount namespace */
            mountns = MOUNT_NS_JOIN; /* delay joining mount namespace */
            mountnspid = atoi(optarg); /* record PID to join */
            break;
        case 'k':
            /* Attach to pid's PID namespace */
            pid = atoi(optarg);
            sprintf(path, "/proc/%d/ns/pid", pid);
            if (attachToNS(path) != 0) {
                return 1;
            }
            pidns = PID_NS_JOIN; /* record join of PID namespace */
            break;
        case 'j':
            /* Attach to pid's UTS namespace */
            pid = atoi(optarg);
            sprintf(path, "/proc/%d/ns/uts", pid);
            if (attachToNS(path) != 0) {
                return 1;
            }
            break;
        case 'g':
            /* Attach to cgroup */
            cgroup(optarg);
            break;
        case 'r':
            /* Set RT scheduling priority */
            sp.sched_priority = atoi(optarg);
            if (sched_setscheduler(getpid(), SCHED_RR, &sp) < 0) {
                perror("sched_setscheduler");
                return 1;
            }
            break;
        case 'v':
            printf("%s\n", VERSION);
            exit(0);
        case 'h':
            usage(argv[0]);
            exit(0);
        default:
            usage(argv[0]);
            exit(1);
        }

    /* fork to create / join PID namespace */
    if (pidns == PID_NS_CREATE || pidns == PID_NS_JOIN) {
        int status = 0;
        pid_t pid = fork();
        switch (pid) {
        case -1:
            perror("fork");
            return 1;
        case 0: /* child */
            break;
        default: /* parent */
            /* print global PID (not namespace PID)*/
            if (printpid == 1) {
                printf("\001%d\n", pid);
                fflush(stdout);
            }
            /* wait on the PID to handle attachment for 'mx'*/
            if (waitpid(pid, &status, 0) == -1)
                return 1;
            if (WIFEXITED(status))
                /* caught child exit, forward return code*/
                return WEXITSTATUS(status);
            else if (WIFSIGNALED(status))
                kill(getpid(), WTERMSIG(status));
            /* child exit failed, (although return won't distinguish) */
            return 1;
        }
    }

    /* if requested, we are in the new/requested PID namespace */
    /* completed performing other namespaces (PID/network) operations */

    /* go ahead and join the mount namespace if requested */
    if (mountns == MOUNT_NS_JOIN && mountnspid != FALSE) {
        sprintf(path, "/proc/%d/ns/mnt", pid);
        if (attachToNS(path) != 0) {
            return 1;
        }
    }

    /* if mount of procfs requested, check for pidns and mountns */
    if (mountprocfs && (pidns != PID_NS_CREATE || mountns != MOUNT_NS_CREATE)) {
        /* requested procfs, but required PID and/or mount namespace missing  */
        return 1;
    }

    /* mount procfs to pick up the new PID namespace */
    if (mountprocfs
            && (mount("none", "/proc", NULL, MS_PRIVATE | MS_REC, NULL) != 0
                    || mount("proc", "/proc", "proc",
                            MS_NOSUID | MS_NOEXEC | MS_NODEV, NULL) != 0)) {
        perror("mount");
    }

    /* mount sysfs to pick up the new PID namespace */
    if (netns == NET_NS_CREATE && mountns == MOUNT_NS_CREATE) {
        if (mount("sysfs", "/sys", "sysfs", MS_MGC_VAL, NULL) == -1) {
            perror("mount");
            return 1;
        }
    }

    /* setsid() if requested & required (not needed if using PID namespace) */
    if (detach == 1 && pidns == FALSE) {
        if (getpgrp() == getpid()) {
            switch (fork()) {
            case -1:
                perror("fork");
                return 1;
            case 0: /* child */
                break;
            default: /* parent */
                return 0;
            }
        }
        setsid();
    }

    /* print pid if requested (if in new namespace, we don't print local PID) */
    if (printpid == 1 && pidns == 0) {
        printf("\001%d\n", getpid());
        fflush(stdout);
    }

    /* launch if requested */
    if (optind < argc) {
        execvp(argv[optind], &argv[optind]);
        perror(argv[optind]);
        return 1;
    }

    usage(argv[0]);

    return 0;
}
Exemple #13
0
int main(int argc, char **argv)
{
	task_waiter_t lock;
	pid_t pid = -1;
	int status = 1;

	task_waiter_init(&lock);
	test_init(argc, argv);

	pid = fork();
	if (pid < 0) {
		pr_perror("fork");
		return 1;
	}

	if (pid == 0) {
		int fd;
		DIR *d;
		struct dirent *de;

		if (unshare(CLONE_NEWNS)) {
			pr_perror("unshare");
			return 1;
		}
		if (mount(NULL, "/", NULL, MS_PRIVATE | MS_REC, NULL)) {
			pr_perror("mount");
			return 1;
		}

		if (mkdir(dirname, 0600) < 0) {
			pr_perror("mkdir");
			return 1;
		}

		if (mount(dirname, dirname, NULL, MS_BIND, NULL)) {
			pr_perror("mount");
			return 1;
		}
		if (chdir(dirname))
			return 1;

		fd = open("test.ghost", O_CREAT | O_WRONLY, 0600);
		if (fd < 0) {
			pr_perror("open");
			return 1;
		}

		if (unlink("test.ghost")) {
			pr_perror("unlink");
			return 1;
		}

		task_waiter_complete(&lock, 1);
		test_waitsig();

		if (close(fd)) {
			pr_perror("close");
			return 1;
		}
		d = opendir(".");
		if (d == NULL) {
			pr_perror("opendir");
			return 1;
		}
		while ((de = readdir(d)) != NULL) {
			if (!strcmp(de->d_name, "."))
				continue;
			if (!strcmp(de->d_name, ".."))
				continue;
			pr_err("%s\n", de->d_name);
		}
		closedir(d);

		return 0;
	}

	task_waiter_wait4(&lock, 1);
	test_daemon();
	test_waitsig();


	kill(pid, SIGTERM);
	wait(&status);
	if (status) {
		fail("Test died");
		return 1;
	}
	pass();

	return 0;
}
static bool test_priv_mount_unpriv_remount(void)
{
	pid_t child;
	int ret;
	const char *orig_path = "/dev";
	const char *dest_path = "/tmp";
	int orig_mnt_flags, remount_mnt_flags;

	child = fork();
	if (child == -1) {
		die("fork failed: %s\n",
			strerror(errno));
	}
	if (child != 0) { /* parent */
		pid_t pid;
		int status;
		pid = waitpid(child, &status, 0);
		if (pid == -1) {
			die("waitpid failed: %s\n",
				strerror(errno));
		}
		if (pid != child) {
			die("waited for %d got %d\n",
				child, pid);
		}
		if (!WIFEXITED(status)) {
			die("child did not terminate cleanly\n");
		}
		return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
	}

	orig_mnt_flags = read_mnt_flags(orig_path);

	create_and_enter_userns();
	ret = unshare(CLONE_NEWNS);
	if (ret != 0) {
		die("unshare(CLONE_NEWNS) failed: %s\n",
			strerror(errno));
	}

	ret = mount(orig_path, dest_path, "bind", MS_BIND | MS_REC, NULL);
	if (ret != 0) {
		die("recursive bind mount of %s onto %s failed: %s\n",
			orig_path, dest_path, strerror(errno));
	}

	ret = mount(dest_path, dest_path, "none",
		    MS_REMOUNT | MS_BIND | orig_mnt_flags , NULL);
	if (ret != 0) {
		/* system("cat /proc/self/mounts"); */
		die("remount of /tmp failed: %s\n",
		    strerror(errno));
	}

	remount_mnt_flags = read_mnt_flags(dest_path);
	if (orig_mnt_flags != remount_mnt_flags) {
		die("Mount flags unexpectedly changed during remount of %s originally mounted on %s\n",
			dest_path, orig_path);
	}
	exit(EXIT_SUCCESS);
}
Exemple #15
0
int main(int argc, char *argv[])
{
	int c, pid, ret, status;
	char buf[1];
	int pipe_fds1[2], /* child tells parent it has unshared */
	    pipe_fds2[2]; /* parent tells child it is mapped and may proceed */
	unsigned long flags = CLONE_NEWUSER | CLONE_NEWNS;
	char ttyname0[256] = {0}, ttyname1[256] = {0}, ttyname2[256] = {0};
	char *default_args[] = {"/bin/sh", NULL};

	lxc_log_fd = STDERR_FILENO;

	if (isatty(STDIN_FILENO)) {
		ret = readlink("/proc/self/fd/0", ttyname0, sizeof(ttyname0));
		if (ret < 0) {
			CMD_SYSERROR("Failed to open stdin");
			_exit(EXIT_FAILURE);
		}

		ret = readlink("/proc/self/fd/1", ttyname1, sizeof(ttyname1));
		if (ret < 0) {
			CMD_SYSINFO("Failed to open stdout. Continuing");
			ttyname1[0] = '\0';
		}

		ret = readlink("/proc/self/fd/2", ttyname2, sizeof(ttyname2));
		if (ret < 0) {
			CMD_SYSINFO("Failed to open stderr. Continuing");
			ttyname2[0] = '\0';
		}
	}

	lxc_list_init(&active_map);

	while ((c = getopt(argc, argv, "m:h")) != EOF) {
		switch (c) {
		case 'm':
			ret = parse_map(optarg);
			if (ret < 0) {
				usage(argv[0]);
				_exit(EXIT_FAILURE);
			}
			break;
		case 'h':
			usage(argv[0]);
			_exit(EXIT_SUCCESS);
		default:
			usage(argv[0]);
			_exit(EXIT_FAILURE);
		}
	};

	if (lxc_list_empty(&active_map)) {
		ret = find_default_map();
		if (ret < 0) {
			fprintf(stderr, "Failed to find subuid or subgid allocation\n");
			_exit(EXIT_FAILURE);
		}
	}

	argv = &argv[optind];
	argc = argc - optind;
	if (argc < 1)
		argv = default_args;

	ret = pipe2(pipe_fds1, O_CLOEXEC);
	if (ret < 0) {
		CMD_SYSERROR("Failed to open new pipe");
		_exit(EXIT_FAILURE);
	}

	ret = pipe2(pipe_fds2, O_CLOEXEC);
	if (ret < 0) {
		CMD_SYSERROR("Failed to open new pipe");
		close(pipe_fds1[0]);
		close(pipe_fds1[1]);
		_exit(EXIT_FAILURE);
	}

	pid = fork();
	if (pid < 0) {
		close(pipe_fds1[0]);
		close(pipe_fds1[1]);
		close(pipe_fds2[0]);
		close(pipe_fds2[1]);
		_exit(EXIT_FAILURE);
	}

	if (pid == 0) {
		close(pipe_fds1[0]);
		close(pipe_fds2[1]);

		opentty(ttyname0, STDIN_FILENO);
		opentty(ttyname1, STDOUT_FILENO);
		opentty(ttyname2, STDERR_FILENO);

		ret = unshare(flags);
		if (ret < 0) {
			CMD_SYSERROR("Failed to unshare mount and user namespace");
			close(pipe_fds1[1]);
			close(pipe_fds2[0]);
			_exit(EXIT_FAILURE);
		}

		buf[0] = '1';
		ret = lxc_write_nointr(pipe_fds1[1], buf, 1);
		if (ret != 1) {
			CMD_SYSERROR("Failed to write to pipe file descriptor %d",
				     pipe_fds1[1]);
			close(pipe_fds1[1]);
			close(pipe_fds2[0]);
			_exit(EXIT_FAILURE);
		}

		ret = lxc_read_nointr(pipe_fds2[0], buf, 1);
		if (ret != 1) {
			CMD_SYSERROR("Failed to read from pipe file descriptor %d",
				     pipe_fds2[0]);
			close(pipe_fds1[1]);
			close(pipe_fds2[0]);
			_exit(EXIT_FAILURE);
		}

		close(pipe_fds1[1]);
		close(pipe_fds2[0]);

		if (buf[0] != '1') {
			fprintf(stderr, "Received unexpected value from parent process\n");
			_exit(EXIT_FAILURE);
		}

		ret = do_child((void *)argv);
		if (ret < 0)
			_exit(EXIT_FAILURE);

		_exit(EXIT_SUCCESS);
	}

	close(pipe_fds1[1]);
	close(pipe_fds2[0]);

	ret = lxc_read_nointr(pipe_fds1[0], buf, 1);
	if (ret <= 0)
		CMD_SYSERROR("Failed to read from pipe file descriptor %d", pipe_fds1[0]);

	buf[0] = '1';

	ret = lxc_map_ids(&active_map, pid);
	if (ret < 0)
		fprintf(stderr, "Failed to write id mapping for child process\n");

	ret = lxc_write_nointr(pipe_fds2[1], buf, 1);
	if (ret < 0) {
		CMD_SYSERROR("Failed to write to pipe file descriptor %d", pipe_fds2[1]);
		_exit(EXIT_FAILURE);
	}

	ret = waitpid(pid, &status, __WALL);
	if (ret < 0) {
		CMD_SYSERROR("Failed to wait on child process");
		_exit(EXIT_FAILURE);
	}

	_exit(WEXITSTATUS(status));
}
Exemple #16
0
/*
 * Given a lxc_storage (presumably blockdev-based), detect the fstype
 * by trying mounting (in a private mntns) it.
 * @lxc_storage: bdev to investigate
 * @type: preallocated char* in which to write the fstype
 * @len: length of passed in char*
 * Returns length of fstype, of -1 on error
 */
int detect_fs(struct lxc_storage *bdev, char *type, int len)
{
	int ret;
	int p[2];
	size_t linelen;
	pid_t pid;
	FILE *f;
	char *sp1, *sp2, *sp3;
	const char *l, *srcdev;
	char devpath[PATH_MAX];
	char *line = NULL;

	if (!bdev || !bdev->src || !bdev->dest)
		return -1;

	srcdev = lxc_storage_get_path(bdev->src, bdev->type);

	ret = pipe(p);
	if (ret < 0)
		return -1;

	if ((pid = fork()) < 0)
		return -1;

	if (pid > 0) {
		int status;
		close(p[1]);
		memset(type, 0, len);
		ret = read(p[0], type, len - 1);
		close(p[0]);
		if (ret < 0) {
			SYSERROR("error reading from pipe");
			wait(&status);
			return -1;
		} else if (ret == 0) {
			ERROR("child exited early - fstype not found");
			wait(&status);
			return -1;
		}
		wait(&status);
		type[len - 1] = '\0';
		INFO("detected fstype %s for %s", type, srcdev);
		return ret;
	}

	if (unshare(CLONE_NEWNS) < 0)
		exit(1);

	if (detect_shared_rootfs()) {
		if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) {
			SYSERROR("Failed to make / rslave");
			ERROR("Continuing...");
		}
	}

	ret = mount_unknown_fs(srcdev, bdev->dest, bdev->mntopts);
	if (ret < 0) {
		ERROR("failed mounting %s onto %s to detect fstype", srcdev,
		      bdev->dest);
		exit(1);
	}

	l = linkderef(srcdev, devpath);
	if (!l)
		exit(1);
	f = fopen("/proc/self/mounts", "r");
	if (!f)
		exit(1);

	while (getline(&line, &linelen, f) != -1) {
		sp1 = strchr(line, ' ');
		if (!sp1)
			exit(1);
		*sp1 = '\0';
		if (strcmp(line, l))
			continue;
		sp2 = strchr(sp1 + 1, ' ');
		if (!sp2)
			exit(1);
		*sp2 = '\0';
		sp3 = strchr(sp2 + 1, ' ');
		if (!sp3)
			exit(1);
		*sp3 = '\0';
		sp2++;
		if (write(p[1], sp2, strlen(sp2)) != strlen(sp2))
			exit(1);

		exit(0);
	}

	exit(1);
}
Exemple #17
0
int main(int argc, char *argv[])
{
	int c;
	unsigned long flags = CLONE_NEWUSER | CLONE_NEWNS;
	char ttyname0[256], ttyname1[256], ttyname2[256];
	int status;
	int ret;
	int pid;
	char *default_args[] = {"/bin/sh", NULL};
	char buf[1];
	int pipe1[2],  // child tells parent it has unshared
	    pipe2[2];  // parent tells child it is mapped and may proceed

	memset(ttyname0, '\0', sizeof(ttyname0));
	memset(ttyname1, '\0', sizeof(ttyname1));
	memset(ttyname2, '\0', sizeof(ttyname2));
	if (isatty(0)) {
		ret = readlink("/proc/self/fd/0", ttyname0, sizeof(ttyname0));
		if (ret < 0) {
			perror("unable to open stdin.");
			exit(1);
		}
		ret = readlink("/proc/self/fd/1", ttyname1, sizeof(ttyname1));
		if (ret < 0) {
			printf("Warning: unable to open stdout, continuing.");
			memset(ttyname1, '\0', sizeof(ttyname1));
		}
		ret = readlink("/proc/self/fd/2", ttyname2, sizeof(ttyname2));
		if (ret < 0) {
			printf("Warning: unable to open stderr, continuing.");
			memset(ttyname2, '\0', sizeof(ttyname2));
		}
	}

	lxc_list_init(&active_map);

	while ((c = getopt(argc, argv, "m:h")) != EOF) {
		switch (c) {
			case 'm': if (parse_map(optarg)) usage(argv[0]); break;
			case 'h':
			default:
				  usage(argv[0]);
		}
	};

	if (lxc_list_empty(&active_map)) {
		if (find_default_map()) {
			fprintf(stderr, "You have no allocated subuids or subgids\n");
			exit(1);
		}
	}

	argv = &argv[optind];
	argc = argc - optind;
	if (argc < 1) {
		argv = default_args;
		argc = 1;
	}

	if (pipe(pipe1) < 0 || pipe(pipe2) < 0) {
		perror("pipe");
		exit(1);
	}
	if ((pid = fork()) == 0) {
		// Child.

		close(pipe1[0]);
		close(pipe2[1]);
		opentty(ttyname0, 0);
		opentty(ttyname1, 1);
		opentty(ttyname2, 2);

		ret = unshare(flags);
		if (ret < 0) {
			perror("unshare");
			return 1;
		}
		buf[0] = '1';
		if (write(pipe1[1], buf, 1) < 1) {
			perror("write pipe");
			exit(1);
		}
		if (read(pipe2[0], buf, 1) < 1) {
			perror("read pipe");
			exit(1);
		}
		if (buf[0] != '1') {
			fprintf(stderr, "parent had an error, child exiting\n");
			exit(1);
		}

		close(pipe1[1]);
		close(pipe2[0]);
		return do_child((void*)argv);
	}

	close(pipe1[1]);
	close(pipe2[0]);
	if (read(pipe1[0], buf, 1) < 1) {
		perror("read pipe");
		exit(1);
	}

	buf[0] = '1';

	if (lxc_map_ids(&active_map, pid)) {
		fprintf(stderr, "error mapping child\n");
		ret = 0;
	}
	if (write(pipe2[1], buf, 1) < 0) {
		perror("write to pipe");
		exit(1);
	}

	if ((ret = waitpid(pid, &status, __WALL)) < 0) {
		printf("waitpid() returns %d, errno %d\n", ret, errno);
		exit(1);
	}

	exit(WEXITSTATUS(status));
}
Exemple #18
0
int main(int argc, char ** argv) {
    char *containerimage;
    char *mountpoint;
    char *bootstrap_script;
    char *defintion_script;
    char *loop_dev;
    int retval = 0;
    int containerimage_fd;
    int loop_fd;
    uid_t uid = geteuid();

    if ( uid != 0 ) {
        fprintf(stderr, "ABORT: Calling user must be root\n");
        return(1);
    }

    if ( argv[1] == NULL || argv[2] == NULL ) {
        fprintf(stderr, "USAGE: %s [singularity container image] [bootstrap definition]\n", argv[0]);
        return(1);
    }

    containerimage = strdup(argv[1]);
    defintion_script = strdup(argv[2]);
    bootstrap_script = strjoin(LIBEXECDIR, "/singularity/bootstrap.sh");

    mountpoint = getenv("SINGULARITY_BUILD_ROOT");

    if ( is_file(containerimage) < 0 ) {
        fprintf(stderr, "ABORT: Container image not found: %s\n", containerimage);
        return(1);
    }

    if ( is_dir(mountpoint) < 0 ) {
        fprintf(stderr, "ABORT: Mount point must be a directory: %s\n", mountpoint);
        return(1);
    }

    if ( unshare(CLONE_NEWNS) < 0 ) {
        fprintf(stderr, "ABORT: Could not virtulize mount namespace\n");
        return(255);
    }

    if ( mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0 ) {
        fprintf(stderr, "ABORT: Could not make mountspaces private: %s\n", strerror(errno));
        return(255);
    }


    if ( ( containerimage_fd = open(containerimage, O_RDWR) ) < 0 ) {
        fprintf(stderr, "ERROR: Could not open image %s: %s\n", containerimage, strerror(errno));
        return(255);
    }

    loop_dev = obtain_loop_dev();

    if ( ( loop_fd = open(loop_dev, O_RDWR) ) < 0 ) {
        fprintf(stderr, "ERROR: Failed to open %s: %s\n", loop_dev, strerror(errno));
        return(-1);
    }

    if ( associate_loop(containerimage_fd, loop_fd) < 0 ) {
        fprintf(stderr, "ERROR: Could not associate %s to loop device %s\n", containerimage, loop_dev);
        return(255);
    }

    if ( mount_image(loop_dev, mountpoint, 1) < 0 ) {
        fprintf(stderr, "ABORT: exiting...\n");
        return(255);
    }

    child_pid = fork();

    if ( child_pid == 0 ) {
        char *exec[4];

        exec[0] = strdup("/bin/bash");
        exec[1] = strdup(bootstrap_script);
        exec[2] = strdup(defintion_script);
        exec[3] = NULL;

        if ( execv("/bin/bash", exec) != 0 ) {
            fprintf(stderr, "ABORT: exec of bootstrap failed: %s\n", strerror(errno));
        }

    } else if ( child_pid > 0 ) {
        int tmpstatus;
        signal(SIGINT, sighandler);
        signal(SIGKILL, sighandler);
        signal(SIGQUIT, sighandler);

        waitpid(child_pid, &tmpstatus, 0);
        retval = WEXITSTATUS(tmpstatus);

    } else {
        fprintf(stderr, "ABORT: Could not fork child process\n");
        retval++;
    }

    return(retval);
}
Exemple #19
0
int
main(int argc, char **argv)
{
	struct bindmnt *bmnt;
	uid_t uid = getuid();
	gid_t gid = getgid();
	const char *chrootdir, *cmd, *argv0;
	char **cmdargs, buf[32];
	int c, fd;
	const struct option longopts[] = {
		{ NULL, 0, NULL, 0 }
	};

	chrootdir = cmd = NULL;
	argv0 = argv[0];

	while ((c = getopt_long(argc, argv, "b:V", longopts, NULL)) != -1) {
		switch (c) {
		case 'b':
			if (optarg == NULL || *optarg == '\0')
				break;
			add_bindmount(optarg);
			break;
		case 'V':
			printf("%s\n", XBPS_RELVER);
			exit(EXIT_SUCCESS);
		case '?':
		default:
			usage(argv0);
		}
	}
	argc -= optind;
	argv += optind;

	if (argc < 2)
		usage(argv0);

	chrootdir = argv[0];
	cmd = argv[1];
	cmdargs = argv + 1;

	/* Never allow chrootdir == / */
	if (strcmp(chrootdir, "/") == 0)
		die("/ is not allowed to be used as chrootdir");

	/* Make chrootdir absolute */
	if (chrootdir[0] != '/') {
		char cwd[PATH_MAX-1];
		if (getcwd(cwd, sizeof(cwd)) == NULL)
			die("getcwd");
		chrootdir = xbps_xasprintf("%s/%s", cwd, chrootdir);
	}

	/*
	 * Unshare from the current process namespaces and set ours.
	 */
	if (unshare(CLONE_NEWUSER|CLONE_NEWNS|CLONE_NEWIPC|CLONE_NEWUTS) == -1) {
		errval = 99;
		die("unshare");
	}
	/*
	 * Setup uid/gid user mappings and restrict setgroups().
	 */
	if ((fd = open("/proc/self/uid_map", O_RDWR)) == -1)
		die("failed to open /proc/self/uidmap rw");
	if (write(fd, buf, snprintf(buf, sizeof buf, "%u %u 1\n", uid, uid)) == -1)
		die("failed to write to /proc/self/uid_map");

	close(fd);

	if ((fd = open("/proc/self/setgroups", O_RDWR)) != -1) {
		if (write(fd, "deny", 4) == -1)
			die("failed to write to /proc/self/setgroups");
		close(fd);
	}

	if ((fd = open("/proc/self/gid_map", O_RDWR)) == -1)
		die("failed to open /proc/self/gid_map rw");
	if (write(fd, buf, snprintf(buf, sizeof buf, "%u %u 1\n", gid, gid)) == -1)
		die("failed to write to /proc/self/gid_map");

	close(fd);

	/* bind mount /proc */
	bindmount(chrootdir, "/proc", NULL);

	/* bind mount /sys */
	bindmount(chrootdir, "/sys", NULL);

	/* bind mount /dev */
	bindmount(chrootdir, "/dev", NULL);

	/* bind mount all user specified mnts */
	SIMPLEQ_FOREACH(bmnt, &bindmnt_queue, entries)
		bindmount(chrootdir, bmnt->src, bmnt->dest);

	/* move chrootdir to / and chroot to it */
	if (chdir(chrootdir) == -1)
		die("chdir to %s", chrootdir);

	if (mount(".", ".", NULL, MS_BIND|MS_PRIVATE, NULL) == -1)
		die("Failed to bind mount %s", chrootdir);

	if (mount(chrootdir, "/", NULL, MS_MOVE, NULL) == -1)
		die("Failed to move %s as rootfs", chrootdir);

	if (chroot(".") == -1)
		die("Failed to chroot to %s", chrootdir);

	if (execvp(cmd, cmdargs) == -1)
		die("Failed to execute command %s", cmd);

	/* NOTREACHED */
	exit(EXIT_FAILURE);
}
Exemple #20
0
int main(int argc, char *argv[])
{
	char** cmdline = &argv[1];
	char* args = NULL;
	if (cmdline[0] && cmdline[0][0] == '-' && argc)
	{
		args = argv[1];
		cmdline = &argv[2];
		argc--;
	}

	if (args && strstr(args, "h"))
	{
		printf("Usage: %s [-options] command [args]\n\n", argv[0]);
		printf("Options:\n");
		printf(" -m, --mount       unshare mounts namespace\n");
		printf(" -u, --uts         unshare UTS namespace (hostname etc)\n");
		printf(" -i, --ipc         unshare System V IPC namespace\n");
		printf(" -n, --net         unshare network namespace\n\n");
		printf(" -h, --help     display this help and exit\n");
		printf(" -V, --version  output version information and exit\n");
		printf(" note: when -i is used PID isolation is also performed\n");
		return 0;
	}

	if (argc < 2 || strlen(cmdline[0]) == 0)
	{
		printf("Usage: %s [-options] command [args]\n", argv[0]);
		return 1;
	}

	if (args && strstr(args, "V"))
	{
		printf("unshare with PID \n", argv[0]);
		return 0;
	}
	int cloneflags=0;

	if (args && strstr(args, "m"))
	{
		unshare(CLONE_NEWNS);
	}
	if (args && strstr(args, "n"))
	{
		unshare(CLONE_NEWNET);
	}
	if (args && strstr(args, "u"))
	{
		unshare(CLONE_NEWUTS);
	}
	if (args && strstr(args, "i"))
	{
		unshare(CLONE_NEWIPC);
		cloneflags |= CLONE_NEWPID;
	}

	char stack[10240];
	pid_t pid = clone(jail_process, &stack, cloneflags, cmdline);
	if (pid == -1)
	{
		perror("Error launching jail");
		return 1;
	}
	int ret = 0;
	while(wait(&ret) != pid);
	return WEXITSTATUS(ret);
}
Exemple #21
0
static int netns_add(int argc, char **argv)
{
	/* This function creates a new network namespace and
	 * a new mount namespace and bind them into a well known
	 * location in the filesystem based on the name provided.
	 *
	 * The mount namespace is created so that any necessary
	 * userspace tweaks like remounting /sys, or bind mounting
	 * a new /etc/resolv.conf can be shared between uers.
	 */
	char netns_path[MAXPATHLEN];
	const char *name;
	int fd;
	int made_netns_run_dir_mount = 0;

	if (argc < 1) {
		fprintf(stderr, "No netns name specified\n");
		return -1;
	}
	name = argv[0];

	snprintf(netns_path, sizeof(netns_path), "%s/%s", NETNS_RUN_DIR, name);

	if (create_netns_dir())
		return -1;

	/* Make it possible for network namespace mounts to propagate between
	 * mount namespaces.  This makes it likely that a unmounting a network
	 * namespace file in one namespace will unmount the network namespace
	 * file in all namespaces allowing the network namespace to be freed
	 * sooner.
	 */
	while (mount("", NETNS_RUN_DIR, "none", MS_SHARED | MS_REC, NULL)) {
		/* Fail unless we need to make the mount point */
		if (errno != EINVAL || made_netns_run_dir_mount) {
			fprintf(stderr, "mount --make-shared %s failed: %s\n",
				NETNS_RUN_DIR, strerror(errno));
			return -1;
		}

		/* Upgrade NETNS_RUN_DIR to a mount point */
		if (mount(NETNS_RUN_DIR, NETNS_RUN_DIR, "none", MS_BIND, NULL)) {
			fprintf(stderr, "mount --bind %s %s failed: %s\n",
				NETNS_RUN_DIR, NETNS_RUN_DIR, strerror(errno));
			return -1;
		}
		made_netns_run_dir_mount = 1;
	}

	/* Create the filesystem state */
	fd = open(netns_path, O_RDONLY|O_CREAT|O_EXCL, 0);
	if (fd < 0) {
		fprintf(stderr, "Cannot create namespace file \"%s\": %s\n",
			netns_path, strerror(errno));
		return -1;
	}
	close(fd);
	if (unshare(CLONE_NEWNET) < 0) {
		fprintf(stderr, "Failed to create a new network namespace \"%s\": %s\n",
			name, strerror(errno));
		goto out_delete;
	}

	/* Bind the netns last so I can watch for it */
	if (mount("/proc/self/ns/net", netns_path, "none", MS_BIND, NULL) < 0) {
		fprintf(stderr, "Bind /proc/self/ns/net -> %s failed: %s\n",
			netns_path, strerror(errno));
		goto out_delete;
	}
	return 0;
out_delete:
	netns_delete(argc, argv);
	return -1;
}
Exemple #22
0
static int create_ns(const char *statedir, const char *name)
{
	char str[64];
	uid_t uid = getuid();
	gid_t gid = getgid();

	if (unshare(CLONE_NEWNS | CLONE_NEWNET |
		    CLONE_NEWUTS | CLONE_NEWUSER) < 0)
		pdie("can't unshare namespaces");

	if (access("/proc/self/setgroups", O_RDONLY) == 0)
		write_file("/proc/self/setgroups", "deny");
	snprintf(str, sizeof(str), "0 %d 1", uid);
	write_file("/proc/self/uid_map", str);
	snprintf(str, sizeof(str), "0 %d 1", gid);
	write_file("/proc/self/gid_map", str);

	if (sethostname(name, strlen(name)) < 0)
		pdie("can't set hostname");
	setup_ipv4("lo", "127.0.0.1", "255.0.0.0", false, 0);

	mkdir(statedir, 0755);
	char *local_etc = populate_statedir(statedir, "etc", true);
	char *workdir = populate_statedir(statedir, "workdir", true);
	char *resolv = populate_statedir(statedir, "etc/resolv.conf", false);

	char *mount_opts;
	if (asprintf(&mount_opts, "lowerdir=/etc,upperdir=%s,workdir=%s",
		     local_etc, workdir) < 0) {
		die("can't allocate memory\n");
	}

	/*
	 * overlayfs is only usable on patched kernels (e.g. Ubuntu) due to
	 * permission checks, but it is the cleanest solution because it
	 * overrides symlinks.  If we have to use bind mounts instead,
	 * tell the watcher process to re-create the bind mount if
	 * resolv.conf gets deleted.
	 */
	int inotify_fd = -1, inotify_wd = -1;
	if (mount("overlay", "/etc", "overlay", 0, mount_opts) == 0) {
		/* pass through */
	} else {
		inotify_fd = inotify_init();
		if (inotify_fd < 0)
			pdie("can't create inotify socket");

		inotify_wd = watch_and_bind_mount(inotify_fd, resolv);
		if (inotify_wd < 0)
			die("can't watch resolv.conf\n");
	}

	/*
	 * Create the initial watcher connection here so that the parent
	 * process doesn't need to wait for the child process to start up.
	 */
	int initial_conn_fd[2];
	if (socketpair(AF_UNIX, SOCK_STREAM, 0, initial_conn_fd) < 0)
		pdie("socketpair failed");
	write_pid(statedir, create_watcher(statedir, initial_conn_fd[1],
					   resolv, inotify_fd, inotify_wd));
	close(initial_conn_fd[1]);
	close(inotify_fd);

	free(mount_opts);
	free(resolv);
	free(workdir);
	free(local_etc);

	return initial_conn_fd[0];
}
Exemple #23
0
DrawingContext::DrawingContext(int BufferWi, int BufferHt, bool allowShared, bool alpha) : nullBitmap(wxNullBitmap)
{
    unshare(nullBitmap);
    image = new wxImage(BufferWi > 0 ? BufferWi : 1, BufferHt > 0 ? BufferHt : 1);
    if (alpha) {
        image->SetAlpha();
        for(wxCoord x=0; x<BufferWi; x++) {
            for(wxCoord y=0; y<BufferHt; y++) {
                image->SetAlpha(x, y, wxIMAGE_ALPHA_TRANSPARENT);
            }
        }
    }
    bitmap = new wxBitmap(*image);
    dc = new wxMemoryDC(*bitmap);

    if (!allowShared) {
        //make sure we UnShare everything that is being held onto
        //also use "non-normal" defaults to avoid "==" issue that
        //would keep it from using the non-shared versions
        wxFont font(*wxITALIC_FONT);
        unshare(font);
        dc->SetFont(font);

        wxBrush brush(*wxYELLOW_BRUSH);
        unshare(brush);
        dc->SetBrush(brush);
        dc->SetBackground(brush);

        wxPen pen(*wxGREEN_PEN);
        unshare(pen);
        dc->SetPen(pen);

        unshare(dc->GetBrush());
        unshare(dc->GetBackground());
        unshare(dc->GetFont());
        unshare(dc->GetPen());
        unshare(dc->GetTextForeground());
        unshare(dc->GetTextBackground());
    #ifndef LINUX
        wxColor c(12, 25, 3);
        unshare(c);
        dc->SetTextBackground(c);

        wxColor c2(0, 35, 5);
        unshare(c2);
        dc->SetTextForeground(c2);
    #endif
    }

    dc->SelectObject(nullBitmap);
    delete bitmap;
    bitmap = nullptr;
    gc = nullptr;
}
Exemple #24
0
static void test_get_process_cmdline_harder(void) {
        char path[] = "/tmp/test-cmdlineXXXXXX";
        _cleanup_close_ int fd = -1;
        _cleanup_free_ char *line = NULL;
        pid_t pid;

        if (geteuid() != 0)
                return;

#if HAVE_VALGRIND_VALGRIND_H
        /* valgrind patches open(/proc//cmdline)
         * so, test_get_process_cmdline_harder fails always
         * See https://github.com/systemd/systemd/pull/3555#issuecomment-226564908 */
        if (RUNNING_ON_VALGRIND)
                return;
#endif

        pid = fork();
        if (pid > 0) {
                siginfo_t si;

                (void) wait_for_terminate(pid, &si);

                assert_se(si.si_code == CLD_EXITED);
                assert_se(si.si_status == 0);

                return;
        }

        assert_se(pid == 0);
        assert_se(unshare(CLONE_NEWNS) >= 0);

        assert_se(mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) >= 0);

        fd = mkostemp(path, O_CLOEXEC);
        assert_se(fd >= 0);

        if (mount(path, "/proc/self/cmdline", "bind", MS_BIND, NULL) < 0) {
                /* This happens under selinux… Abort the test in this case. */
                log_warning_errno(errno, "mount(..., \"/proc/self/cmdline\", \"bind\", ...) failed: %m");
                assert(errno == EACCES);
                return;
        }

        assert_se(unlink(path) >= 0);

        assert_se(prctl(PR_SET_NAME, "testa") >= 0);

        assert_se(get_process_cmdline(getpid_cached(), 0, false, &line) == -ENOENT);

        assert_se(get_process_cmdline(getpid_cached(), 0, true, &line) >= 0);
        assert_se(streq(line, "[testa]"));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 1, true, &line) >= 0);
        assert_se(streq(line, ""));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 2, true, &line) >= 0);
        assert_se(streq(line, "["));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 3, true, &line) >= 0);
        assert_se(streq(line, "[."));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 4, true, &line) >= 0);
        assert_se(streq(line, "[.."));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 5, true, &line) >= 0);
        assert_se(streq(line, "[..."));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 6, true, &line) >= 0);
        assert_se(streq(line, "[...]"));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 7, true, &line) >= 0);
        assert_se(streq(line, "[t...]"));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 8, true, &line) >= 0);
        assert_se(streq(line, "[testa]"));
        line = mfree(line);

        assert_se(write(fd, "\0\0\0\0\0\0\0\0\0", 10) == 10);

        assert_se(get_process_cmdline(getpid_cached(), 0, false, &line) == -ENOENT);

        assert_se(get_process_cmdline(getpid_cached(), 0, true, &line) >= 0);
        assert_se(streq(line, "[testa]"));
        line = mfree(line);

        assert_se(write(fd, "foo\0bar\0\0\0\0\0", 10) == 10);

        assert_se(get_process_cmdline(getpid_cached(), 0, false, &line) >= 0);
        assert_se(streq(line, "foo bar"));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 0, true, &line) >= 0);
        assert_se(streq(line, "foo bar"));
        line = mfree(line);

        assert_se(write(fd, "quux", 4) == 4);
        assert_se(get_process_cmdline(getpid_cached(), 0, false, &line) >= 0);
        assert_se(streq(line, "foo bar quux"));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 0, true, &line) >= 0);
        assert_se(streq(line, "foo bar quux"));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 1, true, &line) >= 0);
        assert_se(streq(line, ""));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 2, true, &line) >= 0);
        assert_se(streq(line, "."));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 3, true, &line) >= 0);
        assert_se(streq(line, ".."));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 4, true, &line) >= 0);
        assert_se(streq(line, "..."));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 5, true, &line) >= 0);
        assert_se(streq(line, "f..."));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 6, true, &line) >= 0);
        assert_se(streq(line, "fo..."));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 7, true, &line) >= 0);
        assert_se(streq(line, "foo..."));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 8, true, &line) >= 0);
        assert_se(streq(line, "foo..."));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 9, true, &line) >= 0);
        assert_se(streq(line, "foo b..."));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 10, true, &line) >= 0);
        assert_se(streq(line, "foo ba..."));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 11, true, &line) >= 0);
        assert_se(streq(line, "foo bar..."));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 12, true, &line) >= 0);
        assert_se(streq(line, "foo bar..."));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 13, true, &line) >= 0);
        assert_se(streq(line, "foo bar quux"));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 14, true, &line) >= 0);
        assert_se(streq(line, "foo bar quux"));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 1000, true, &line) >= 0);
        assert_se(streq(line, "foo bar quux"));
        line = mfree(line);

        assert_se(ftruncate(fd, 0) >= 0);
        assert_se(prctl(PR_SET_NAME, "aaaa bbbb cccc") >= 0);

        assert_se(get_process_cmdline(getpid_cached(), 0, false, &line) == -ENOENT);

        assert_se(get_process_cmdline(getpid_cached(), 0, true, &line) >= 0);
        assert_se(streq(line, "[aaaa bbbb cccc]"));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 10, true, &line) >= 0);
        assert_se(streq(line, "[aaaa...]"));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 11, true, &line) >= 0);
        assert_se(streq(line, "[aaaa...]"));
        line = mfree(line);

        assert_se(get_process_cmdline(getpid_cached(), 12, true, &line) >= 0);
        assert_se(streq(line, "[aaaa b...]"));
        line = mfree(line);

        safe_close(fd);
        _exit(EXIT_SUCCESS);
}
Exemple #25
0
/*
 * Given a bdev (presumably blockdev-based), detect the fstype
 * by trying mounting (in a private mntns) it.
 * @bdev: bdev to investigate
 * @type: preallocated char* in which to write the fstype
 * @len: length of passed in char*
 * Returns length of fstype, of -1 on error
 */
static int detect_fs(struct bdev *bdev, char *type, int len)
{
	int  p[2], ret;
	size_t linelen;
	pid_t pid;
	FILE *f;
	char *sp1, *sp2, *sp3, *line = NULL;
	char *srcdev;

	if (!bdev || !bdev->src || !bdev->dest)
		return -1;

	srcdev = bdev->src;
	if (strcmp(bdev->type, "loop") == 0)
		srcdev = bdev->src + 5;

	process_lock();
	ret = pipe(p);
	process_unlock();
	if (ret < 0)
		return -1;
	if ((pid = fork()) < 0)
		return -1;
	if (pid > 0) {
		int status;
		process_lock();
		close(p[1]);
		process_unlock();
		memset(type, 0, len);
		ret = read(p[0], type, len-1);
		process_lock();
		close(p[0]);
		process_unlock();
		if (ret < 0) {
			SYSERROR("error reading from pipe");
			wait(&status);
			return -1;
		} else if (ret == 0) {
			ERROR("child exited early - fstype not found");
			wait(&status);
			return -1;
		}
		wait(&status);
		type[len-1] = '\0';
		INFO("detected fstype %s for %s", type, srcdev);
		return ret;
	}

	process_unlock(); // we're no longer sharing
	if (unshare(CLONE_NEWNS) < 0)
		exit(1);

	ret = mount_unknow_fs(srcdev, bdev->dest, 0);
	if (ret < 0) {
		ERROR("failed mounting %s onto %s to detect fstype", srcdev, bdev->dest);
		exit(1);
	}
	// if symlink, get the real dev name
	char devpath[MAXPATHLEN];
	char *l = linkderef(srcdev, devpath);
	if (!l)
		exit(1);
	f = fopen("/proc/self/mounts", "r");
	if (!f)
		exit(1);
	while (getline(&line, &linelen, f) != -1) {
		sp1 = index(line, ' ');
		if (!sp1)
			exit(1);
		*sp1 = '\0';
		if (strcmp(line, l))
			continue;
		sp2 = index(sp1+1, ' ');
		if (!sp2)
			exit(1);
		*sp2 = '\0';
		sp3 = index(sp2+1, ' ');
		if (!sp3)
			exit(1);
		*sp3 = '\0';
		sp2++;
		if (write(p[1], sp2, strlen(sp2)) != strlen(sp2))
			exit(1);
		exit(0);
	}
	exit(1);
}
Exemple #26
0
int main(int argc, char **argv)
{
	char path[PATH_MAX], bpath[PATH_MAX], spath[PATH_MAX], bspath[PATH_MAX];
	pid_t pid;
	int status;
	task_waiter_t t;

	test_init(argc, argv);

	task_waiter_init(&t);

	mount(NULL, "/", NULL, MS_SHARED, NULL);

	snprintf(path, sizeof(path), "%s/test", dirname);
	snprintf(bpath, sizeof(bpath), "%s/test.bind", dirname);
	snprintf(spath, sizeof(spath), "%s/test/sub", dirname);
	snprintf(bspath, sizeof(bspath), "%s/test.bind/sub", dirname);

	if (mkdir(dirname, 0700) ||
	    mkdir(path, 0700) ||
	    mkdir(spath, 0700) ||
	    mkdir(bpath, 0700)) {
		err("mkdir");
		return 1;
	}

	pid = fork();
	if (pid < 0) {
		err("fork");
		return 1;
	}
	if (pid == 0) {
		unshare(CLONE_NEWNS);
		if (mount(path, bpath, NULL, MS_BIND, NULL)) {
			err("mount");
			return 1;
		}

		task_waiter_complete(&t, 1);
		task_waiter_wait4(&t, 2);

		if (access(bspath, F_OK)) {
			fail("%s isn't accessiable", bspath);
			return 1;
		}


		if (umount2(bpath, MNT_DETACH)) {
			fail("umount");
			return 1;
		}

		return 0;
	}

	task_waiter_wait4(&t, 1);

	if (mount("test", spath, "tmpfs", 0, NULL)) {
		err("mount");
		return 1;
	}

	test_daemon();
	test_waitsig();

	task_waiter_complete(&t, 2);

	if (waitpid(pid, &status, 0) != pid) {
		err("waitpid %d", pid);
		return 1;
	}

	if (status) {
		err("%d/%d/%d/%d", WIFEXITED(status), WEXITSTATUS(status), WIFSIGNALED(status), WTERMSIG(status));
		return 1;
	}

	pass();

	return 0;
}
Exemple #27
0
int main(gint argc, gchar **argv) {
  const gchar *self = *argv++;

  if (argc < 2) {
    g_message("%s command [arguments...]", self);
    return 1;
  }

  if (g_getenv("NIX_CHROOTENV"))
    g_warning("chrootenv doesn't stack!");
  else
    g_setenv("NIX_CHROOTENV", "", TRUE);

  g_autofree gchar *prefix =
      g_build_filename(g_get_tmp_dir(), "chrootenvXXXXXX", NULL);

  fail_if(!g_mkdtemp_full(prefix, 0755));

  pid_t cpid = fork();

  if (cpid < 0)
    fail("fork", errno);

  else if (cpid == 0) {
    uid_t uid = getuid();
    gid_t gid = getgid();

    if (unshare(CLONE_NEWNS | CLONE_NEWUSER) < 0) {
      int unshare_errno = errno;

      g_message("Requires Linux version >= 3.19 built with CONFIG_USER_NS");
      if (g_file_test("/proc/sys/kernel/unprivileged_userns_clone",
                      G_FILE_TEST_EXISTS))
        g_message("Run: sudo sysctl -w kernel.unprivileged_userns_clone=1");

      fail("unshare", unshare_errno);
    }

    spit("/proc/self/setgroups", "deny");
    spit("/proc/self/uid_map", "%d %d 1", uid, uid);
    spit("/proc/self/gid_map", "%d %d 1", gid, gid);

    bind("/", prefix);

    fail_if(chroot(prefix));
    fail_if(chdir("/"));
    fail_if(execvp(*argv, argv));
  }

  else {
    int status;

    fail_if(waitpid(cpid, &status, 0) != cpid);
    fail_if(nftw(prefix, nftw_remove, getdtablesize(),
                 FTW_DEPTH | FTW_MOUNT | FTW_PHYS));

    if (WIFEXITED(status))
      return WEXITSTATUS(status);

    else if (WIFSIGNALED(status))
      kill(getpid(), WTERMSIG(status));

    return 1;
  }
}
Exemple #28
0
static void* start_thread(void* p) {
  test_assert(0 == unshare(CLONE_FILES));
  test_assert(0 == close(STDOUT_FILENO));
  return NULL;
}
Exemple #29
0
int sandbox(void* sandbox_arg) {
	// Get rid of unused parameter warning
	(void)sandbox_arg;

	pid_t child_pid = getpid();
	if (arg_debug)
		printf("Initializing child process\n");	

 	// close each end of the unused pipes
 	close(parent_to_child_fds[1]);
 	close(child_to_parent_fds[0]);
 
 	// wait for parent to do base setup
 	wait_for_other(parent_to_child_fds[0]);

	if (arg_debug && child_pid == 1)
		printf("PID namespace installed\n");

	//****************************
	// set hostname
	//****************************
	if (cfg.hostname) {
		if (sethostname(cfg.hostname, strlen(cfg.hostname)) < 0)
			errExit("sethostname");
	}

	//****************************
	// mount namespace
	//****************************
	// mount events are not forwarded between the host the sandbox
	if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) < 0) {
		chk_chroot();
	}
	
	//****************************
	// netfilter
	//****************************
	if (arg_netfilter && any_bridge_configured()) { // assuming by default the client filter
		netfilter(arg_netfilter_file);
	}

	//****************************
	// trace pre-install
	//****************************
	if (arg_trace)
		fs_trace_preload();

	//****************************
	// configure filesystem
	//****************************
	
#ifdef HAVE_CHROOT		
	if (cfg.chrootdir) {
		fs_chroot(cfg.chrootdir);
		// force caps and seccomp if not started as root
		if (getuid() != 0) {
			// force default seccomp inside the chroot, no keep or drop list
			// the list build on top of the default drop list is kept intact
			arg_seccomp = 1;
			if (arg_seccomp_list_drop) {
				free(arg_seccomp_list_drop);
				arg_seccomp_list_drop = NULL;
			}
			if (arg_seccomp_list_keep) {
				free(arg_seccomp_list_keep);
				arg_seccomp_list_keep = NULL;
			}
			
			// disable all capabilities
			if (arg_caps_default_filter || arg_caps_list)
				fprintf(stderr, "Warning: all capabilities disabled for a regular user during chroot\n");
			arg_caps_drop_all = 1;
			
			// drop all supplementary groups; /etc/group file inside chroot
			// is controlled by a regular usr
			arg_nogroups = 1;
			printf("Dropping all Linux capabilities and enforcing default seccomp filter\n");
		}
						
		//****************************
		// trace pre-install, this time inside chroot
		//****************************
		if (arg_trace)
			fs_trace_preload();
	}
	else 
#endif		
	if (arg_overlay)
		fs_overlayfs();
	else
		fs_basic_fs();
	

	//****************************
	// set hostname in /etc/hostname
	//****************************
	if (cfg.hostname) {
		fs_hostname(cfg.hostname);
	}
	
	//****************************
	// apply the profile file
	//****************************
	if (cfg.profile)
		fs_blacklist(cfg.homedir);
	
	//****************************
	// private mode
	//****************************
	if (arg_private) {
		if (cfg.home_private)	// --private=
			fs_private_homedir();
		else if (cfg.home_private_keep) // --private-home=
			fs_private_home_list();
		else // --private
			fs_private();
	}
	
	if (arg_private_dev)
		fs_private_dev();
	if (arg_private_etc)
		fs_private_etc_list();
	
	//****************************
	// install trace
	//****************************
	if (arg_trace)
		fs_trace();
		
	//****************************
	// update /proc, /dev, /boot directorymy
	//****************************
	fs_proc_sys_dev_boot();
	
	//****************************
	// networking
	//****************************
	if (arg_nonetwork) {
		net_if_up("lo");
		if (arg_debug)
			printf("Network namespace enabled, only loopback interface available\n");
	}
	else if (any_bridge_configured()) {
		// configure lo and eth0...eth3
		net_if_up("lo");
		
		if (mac_not_zero(cfg.bridge0.macsandbox))
			net_config_mac(cfg.bridge0.devsandbox, cfg.bridge0.macsandbox);
		sandbox_if_up(&cfg.bridge0);
		
		if (mac_not_zero(cfg.bridge1.macsandbox))
			net_config_mac(cfg.bridge1.devsandbox, cfg.bridge1.macsandbox);
		sandbox_if_up(&cfg.bridge1);
		
		if (mac_not_zero(cfg.bridge2.macsandbox))
			net_config_mac(cfg.bridge2.devsandbox, cfg.bridge2.macsandbox);
		sandbox_if_up(&cfg.bridge2);
		
		if (mac_not_zero(cfg.bridge3.macsandbox))
			net_config_mac(cfg.bridge3.devsandbox, cfg.bridge3.macsandbox);
		sandbox_if_up(&cfg.bridge3);
		
		// add a default route
		if (cfg.defaultgw) {
			// set the default route
			if (net_add_route(0, 0, cfg.defaultgw))
				fprintf(stderr, "Warning: cannot configure default route\n");
		}
			
		if (arg_debug)
			printf("Network namespace enabled\n");
	}
	
	// if any dns server is configured, it is time to set it now
	fs_resolvconf();

	// print network configuration
	if (any_bridge_configured() || cfg.defaultgw || cfg.dns1) {
		printf("\n");
		if (any_bridge_configured())
			net_ifprint();
		if (cfg.defaultgw != 0)
			printf("Default gateway %d.%d.%d.%d\n", PRINT_IP(cfg.defaultgw));
		if (cfg.dns1 != 0)
			printf("DNS server %d.%d.%d.%d\n", PRINT_IP(cfg.dns1));
		if (cfg.dns2 != 0)
			printf("DNS server %d.%d.%d.%d\n", PRINT_IP(cfg.dns2));
		if (cfg.dns3 != 0)
			printf("DNS server %d.%d.%d.%d\n", PRINT_IP(cfg.dns3));
		printf("\n");
	}
	
	

	//****************************
	// start executable
	//****************************
	prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); // kill the child in case the parent died
	int cwd = 0;
	if (cfg.cwd) {
		if (chdir(cfg.cwd) == 0)
			cwd = 1;
	}
	
	if (!cwd) {
		if (chdir("/") < 0)
			errExit("chdir");
		if (cfg.homedir) {
			struct stat s;
			if (stat(cfg.homedir, &s) == 0) {
				/* coverity[toctou] */
				if (chdir(cfg.homedir) < 0)
					errExit("chdir");
			}
		}
	}
	
	// set environment
	// fix qt 4.8
	if (setenv("QT_X11_NO_MITSHM", "1", 1) < 0)
		errExit("setenv");
	if (setenv("container", "firejail", 1) < 0) // LXC sets container=lxc,
		errExit("setenv");
	if (arg_zsh && setenv("SHELL", "/usr/bin/zsh", 1) < 0)
		errExit("setenv");
	if (arg_csh && setenv("SHELL", "/bin/csh", 1) < 0)
		errExit("setenv");
	if (cfg.shell && setenv("SHELL", cfg.shell, 1) < 0)
		errExit("setenv");
	// set prompt color to green
	//export PS1='\[\e[1;32m\][\u@\h \W]\$\[\e[0m\] '
	if (setenv("PROMPT_COMMAND", "export PS1=\"\\[\\e[1;32m\\][\\u@\\h \\W]\\$\\[\\e[0m\\] \"", 1) < 0)
		errExit("setenv");
	// set user-supplied environment variables
	env_apply();

	// set capabilities
	if (!arg_noroot)
		set_caps();

	// set rlimits
	set_rlimits();

	// set seccomp
#ifdef HAVE_SECCOMP
	// if a keep list is available, disregard the drop list
	if (arg_seccomp == 1) {
		if (arg_seccomp_list_keep)
			seccomp_filter_keep(); // this will also save the fmyilter to MNT_DIR/seccomp file
		else
			seccomp_filter_drop(); // this will also save the filter to MNT_DIR/seccomp file
	}
#endif

	// set cpu affinity
	if (cfg.cpus) {
		save_cpu(); // save cpu affinity mask to MNT_DIR/cpu file
		set_cpu_affinity();
	}
	
	// save cgroup in MNT_DIR/cgroup file
	if (cfg.cgroup)
		save_cgroup();

	//****************************************
	// drop privileges or create a new user namespace
	//****************************************
	save_nogroups();
	if (arg_noroot) {
		int rv = unshare(CLONE_NEWUSER);
		if (rv == -1) {
			fprintf(stderr, "Error: cannot mount a new user namespace\n");
			perror("unshare");
			drop_privs(arg_nogroups);
		}
	}
	else
		drop_privs(arg_nogroups);
 	
	// notify parent that new user namespace has been created so a proper
 	// UID/GID map can be setup
 	notify_other(child_to_parent_fds[1]);
 	close(child_to_parent_fds[1]);
 
 	// wait for parent to finish setting up a proper UID/GID map
 	wait_for_other(parent_to_child_fds[0]);
 	close(parent_to_child_fds[0]);

	// somehow, the new user namespace resets capabilities;
	// we need to do them again
	if (arg_noroot) {
		set_caps();
		if (arg_debug)
			printf("User namespace (noroot) installed\n");
	}


	//****************************************
	// start the program without using a shell
	//****************************************
	if (arg_shell_none) {
		if (arg_debug) {
			int i;
			for (i = cfg.original_program_index; i < cfg.original_argc; i++) {
				if (cfg.original_argv[i] == NULL)
					break;
				printf("execvp argument %d: %s\n", i - cfg.original_program_index, cfg.original_argv[i]);
			}
		}

		if (!arg_command)
			printf("Child process initialized\n");
		execvp(cfg.original_argv[cfg.original_program_index], &cfg.original_argv[cfg.original_program_index]);
	}
	//****************************************
	// start the program using a shell
	//****************************************
	else {
		// choose the shell requested by the user, or use bash as default
		char *sh;
		if (cfg.shell)
	 		sh = cfg.shell;
		else if (arg_zsh)
			sh = "/usr/bin/zsh";
		else if (arg_csh)
			sh = "/bin/csh";
		else
			sh = "/bin/bash";
			
		char *arg[5];
		int index = 0;
		arg[index++] = sh;
		arg[index++] = "-c";
		assert(cfg.command_line);
		if (arg_debug)
			printf("Starting %s\n", cfg.command_line);
		if (arg_doubledash) 
			arg[index++] = "--";
		arg[index++] = cfg.command_line;
		arg[index] = NULL;
		assert(index < 5);
		
		if (arg_debug) {
			char *msg;
			if (asprintf(&msg, "sandbox %d, execvp into %s", sandbox_pid, cfg.command_line) == -1)
				errExit("asprintf");
			logmsg(msg);
			free(msg);
		}
		
		if (arg_debug) {
			int i;
			for (i = 0; i < 5; i++) {
				if (arg[i] == NULL)
					break;
				printf("execvp argument %d: %s\n", i, arg[i]);
			}
		}
		
		if (!arg_command)
			printf("Child process initialized\n");
		execvp(sh, arg);
	}
	

	perror("execvp");
	return 0;
}
Exemple #30
0
int main(int argc, char **argv)
{
	char path[PATH_MAX], bpath[PATH_MAX], spath[PATH_MAX];
	pid_t pid;
	int status;
	task_waiter_t t;

	test_init(argc, argv);

	task_waiter_init(&t);

	snprintf(path, sizeof(path), "%s/test", dirname);
	snprintf(bpath, sizeof(bpath), "%s/test.bind", dirname);
	snprintf(spath, sizeof(spath), "%s/test/sub", dirname);
	if (mkdir(dirname, 0700)) {
		pr_perror("mkdir");
		return 1;
	}

	if (mount(NULL, "/", NULL, MS_SHARED, NULL)) {
		pr_perror("mount");
		return 1;
	}

#ifdef SHARED_BIND02
	/* */
	if (mount(dirname, dirname, "tmpfs", 0, NULL) ||
	    mount(NULL, dirname, NULL, MS_SHARED, NULL)) {
		pr_perror("mount");
		return 1;
	}
#endif

	if (mkdir(path, 0700) ||
	    mkdir(spath, 0700) ||
	    mkdir(bpath, 0700)) {
		pr_perror("mkdir");
		return 1;
	}

	pid = fork();
	if (pid < 0) {
		pr_perror("fork");
		return 1;
	}
	if (pid == 0) {
		if (unshare(CLONE_NEWNS)) {
			pr_perror("unshare");
			return 1;
		}
		if (mount(path, bpath, NULL, MS_BIND, NULL)) {
			pr_perror("mount");
			return 1;
		}

		task_waiter_complete(&t, 1);
		task_waiter_wait4(&t, 2);
		if (umount(spath)) {
			task_waiter_complete(&t, 2);
			fail("umount");
			return 1;
		}
		task_waiter_complete(&t, 3);
		task_waiter_wait4(&t, 4);

		return 0;
	}

	task_waiter_wait4(&t, 1);

	if (mount("test", spath, "tmpfs", 0, NULL)) {
		pr_perror("mount");
		return 1;
	}


	test_daemon();
	test_waitsig();

	task_waiter_complete(&t, 2);
	task_waiter_wait4(&t, 3);

	if (umount(bpath)) {
		task_waiter_complete(&t, 2);
		fail("umount");
		return 1;
	}

	task_waiter_complete(&t, 4);

	if (waitpid(pid, &status, 0) != pid) {
		pr_perror("waitpid %d", pid);
		return 1;
	}

	if (status) {
		pr_perror("%d/%d/%d/%d", WIFEXITED(status), WEXITSTATUS(status), WIFSIGNALED(status), WTERMSIG(status));
		return 1;
	}

	pass();

	return 0;
}