static bool should_discard_current_ns(dev_t base_snap_dev)
{
	// Inspect the namespace and check if we should discard it.
	//
	// The namespace may become "stale" when the rootfs is not the same
	// device we found above. This will happen whenever the base snap is
	// refreshed since the namespace was first created.
	struct sc_mountinfo_entry *mie;
	struct sc_mountinfo *mi SC_CLEANUP(sc_cleanup_mountinfo) = NULL;

	mi = sc_parse_mountinfo(NULL);
	if (mi == NULL) {
		die("cannot parse mountinfo of the current process");
	}
	for (mie = sc_first_mountinfo_entry(mi); mie != NULL;
	     mie = sc_next_mountinfo_entry(mie)) {
		if (!sc_streq(mie->mount_dir, "/")) {
			continue;
		}
		// NOTE: we want the initial rootfs just in case overmount
		// was used to do something weird. The initial rootfs was
		// set up by snap-confine and that is the one we want to
		// measure.
		debug("found root filesystem inside the mount namespace %d:%d",
		      mie->dev_major, mie->dev_minor);
		return base_snap_dev != MKDEV(mie->dev_major, mie->dev_minor);
	}
	die("cannot find mount entry of the root filesystem inside snap namespace");
}
static dev_t find_base_snap_device(const char *base_snap_name,
				   const char *base_snap_rev)
{
	// Find the backing device of the base snap.
	// TODO: add support for "try mode" base snaps that also need
	// consideration of the mie->root component.
	dev_t base_snap_dev = 0;
	char base_squashfs_path[PATH_MAX];
	sc_must_snprintf(base_squashfs_path,
			 sizeof base_squashfs_path, "%s/%s/%s",
			 SNAP_MOUNT_DIR, base_snap_name, base_snap_rev);
	struct sc_mountinfo *mi SC_CLEANUP(sc_cleanup_mountinfo) = NULL;
	mi = sc_parse_mountinfo(NULL);
	if (mi == NULL) {
		die("cannot parse mountinfo of the current process");
	}
	bool found = false;
	for (struct sc_mountinfo_entry * mie =
	     sc_first_mountinfo_entry(mi); mie != NULL;
	     mie = sc_next_mountinfo_entry(mie)) {
		if (sc_streq(mie->mount_dir, base_squashfs_path)) {
			base_snap_dev = MKDEV(mie->dev_major, mie->dev_minor);
			debug("found base snap filesystem device %d:%d",
			      mie->dev_major, mie->dev_minor);
			// Don't break when found, we are interested in the last
			// entry as this is the "effective" one.
			found = true;
		}
	}
	if (!found) {
		die("cannot find device backing the base snap %s",
		    base_snap_name);
	}
	return base_snap_dev;
}
Beispiel #3
0
static void test_sc_streq()
{
	g_assert_false(sc_streq(NULL, NULL));
	g_assert_false(sc_streq(NULL, "text"));
	g_assert_false(sc_streq("text", NULL));
	g_assert_false(sc_streq("foo", "bar"));
	g_assert_false(sc_streq("foo", "barbar"));
	g_assert_false(sc_streq("foofoo", "bar"));
	g_assert_true(sc_streq("text", "text"));
	g_assert_true(sc_streq("", ""));
}
Beispiel #4
0
sc_distro sc_classify_distro(void)
{
	FILE *f SC_CLEANUP(sc_cleanup_file) = fopen(os_release, "r");
	if (f == NULL) {
		return SC_DISTRO_CLASSIC;
	}

	bool is_core = false;
	int core_version = 0;
	char buf[255] = { 0 };

	while (fgets(buf, sizeof buf, f) != NULL) {
		size_t len = strlen(buf);
		if (len > 0 && buf[len - 1] == '\n') {
			buf[len - 1] = '\0';
		}
		if (sc_streq(buf, "ID=\"ubuntu-core\"")
		    || sc_streq(buf, "ID=ubuntu-core")) {
			is_core = true;
		} else if (sc_streq(buf, "VERSION_ID=\"16\"")
			   || sc_streq(buf, "VERSION_ID=16")) {
			core_version = 16;
		} else if (sc_streq(buf, "VARIANT_ID=\"snappy\"")
			   || sc_streq(buf, "VARIANT_ID=snappy")) {
			is_core = true;
		}
	}

	if (!is_core) {
		/* Since classic systems don't have a /meta/snap.yaml file the simple
		   presence of that file qualifies as SC_DISTRO_CORE_OTHER. */
		if (access(meta_snap_yaml, F_OK) == 0) {
			is_core = true;
		}
	}

	if (is_core) {
		if (core_version == 16) {
			return SC_DISTRO_CORE16;
		}
		return SC_DISTRO_CORE_OTHER;
	} else {
		return SC_DISTRO_CLASSIC;
	}
}
int sc_apply_seccomp_bpf(const char *filter_profile)
{
	debug("loading bpf program for security tag %s", filter_profile);

	char profile_path[PATH_MAX] = { 0 };
	sc_must_snprintf(profile_path, sizeof(profile_path), "%s/%s.bin",
			 filter_profile_dir, filter_profile);

	// Wait some time for the security profile to show up. When
	// the system boots snapd will created security profiles, but
	// a service snap (e.g. network-manager) starts in parallel with
	// snapd so for such snaps, the profiles may not be generated
	// yet
	long max_wait = 120;
	const char *MAX_PROFILE_WAIT = getenv("SNAP_CONFINE_MAX_PROFILE_WAIT");
	if (MAX_PROFILE_WAIT != NULL) {
		char *endptr = NULL;
		errno = 0;
		long env_max_wait = strtol(MAX_PROFILE_WAIT, &endptr, 10);
		if (errno != 0 || MAX_PROFILE_WAIT == endptr || *endptr != '\0'
		    || env_max_wait <= 0) {
			die("SNAP_CONFINE_MAX_PROFILE_WAIT invalid");
		}
		max_wait = env_max_wait > 0 ? env_max_wait : max_wait;
	}
	if (max_wait > 3600) {
		max_wait = 3600;
	}
	for (long i = 0; i < max_wait; ++i) {
		if (access(profile_path, F_OK) == 0) {
			break;
		}
		sleep(1);
	}

	// validate '/' down to profile_path are root-owned and not
	// 'other' writable to avoid possibility of privilege
	// escalation via bpf program load when paths are incorrectly
	// set on the system.
	validate_bpfpath_is_safe(profile_path);

	// load bpf
	char bpf[MAX_BPF_SIZE + 1] = { 0 };	// account for EOF
	FILE *fp = fopen(profile_path, "rb");
	if (fp == NULL) {
		die("cannot read %s", profile_path);
	}
	// set 'size' to 1 to get bytes transferred
	size_t num_read = fread(bpf, 1, sizeof(bpf), fp);
	if (ferror(fp) != 0) {
		die("cannot read seccomp profile %s", profile_path);
	} else if (feof(fp) == 0) {
		die("seccomp profile %s exceeds %zu bytes", profile_path,
		    sizeof(bpf));
	}
	fclose(fp);
	debug("read %zu bytes from %s", num_read, profile_path);

	if (sc_streq(bpf, "@unrestricted\n")) {
		return 0;
	}

	uid_t real_uid, effective_uid, saved_uid;
	if (getresuid(&real_uid, &effective_uid, &saved_uid) < 0) {
		die("cannot call getresuid");
	}
	// If we can, raise privileges so that we can load the BPF into the
	// kernel via 'prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, ...)'.
	debug("raising privileges to load seccomp profile");
	if (effective_uid != 0 && saved_uid == 0) {
		if (seteuid(0) != 0) {
			die("seteuid failed");
		}
		if (geteuid() != 0) {
			die("raising privs before seccomp_load did not work");
		}
	}
	// Load filter into the kernel. Importantly we are
	// intentionally *not* setting NO_NEW_PRIVS because it
	// interferes with exec transitions in AppArmor with certain
	// snappy interfaces. Not setting NO_NEW_PRIVS does mean that
	// applications can adjust their sandbox if they have
	// CAP_SYS_ADMIN or, if running on < 4.8 kernels, break out of
	// the seccomp via ptrace. Both CAP_SYS_ADMIN and 'ptrace
	// (trace)' are blocked by AppArmor with typical snappy
	// interfaces.
	struct sock_fprog prog = {
		.len = num_read / sizeof(struct sock_filter),
		.filter = (struct sock_filter *)bpf,
	};
	if (seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, &prog) !=
	    0) {
		if (errno == ENOSYS) {
			debug("kernel doesn't support the seccomp(2) syscall");
		} else if (errno == EINVAL) {
			debug
			    ("kernel may not support the SECCOMP_FILTER_FLAG_LOG flag");
		}

		debug
		    ("falling back to prctl(2) syscall to load seccomp filter");
		if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) != 0) {
			die("cannot apply seccomp profile");
		}
	}
	// drop privileges again
	debug("dropping privileges after loading seccomp profile");
	if (geteuid() == 0) {
		unsigned real_uid = getuid();
		if (seteuid(real_uid) != 0) {
			die("seteuid failed");
		}
		if (real_uid != 0 && geteuid() == 0) {
			die("dropping privs after seccomp_load did not work");
		}
	}

	return 0;
}
Beispiel #6
0
bool sc_should_use_normal_mode(sc_distro distro, const char *base_snap_name)
{
	return distro != SC_DISTRO_CORE16 || !sc_streq(base_snap_name, "core");
}