struct sc_ns_group *sc_open_ns_group(const char *group_name, const unsigned flags) { struct sc_ns_group *group = sc_alloc_ns_group(); debug("opening namespace group directory %s", sc_ns_dir); group->dir_fd = open(sc_ns_dir, O_DIRECTORY | O_PATH | O_CLOEXEC | O_NOFOLLOW); if (group->dir_fd < 0) { if (flags & SC_NS_FAIL_GRACEFULLY && errno == ENOENT) { free(group); return NULL; } die("cannot open directory for namespace group %s", group_name); } char lock_fname[PATH_MAX]; must_snprintf(lock_fname, sizeof lock_fname, "%s%s", group_name, SC_NS_LOCK_FILE); debug("opening lock file for namespace group %s", group_name); group->lock_fd = openat(group->dir_fd, lock_fname, O_CREAT | O_RDWR | O_CLOEXEC | O_NOFOLLOW, 0600); if (group->lock_fd < 0) { die("cannot open lock file for namespace group %s", group_name); } group->name = strdup(group_name); if (group->name == NULL) { die("cannot duplicate namespace group name %s", group_name); } return group; }
/** * Create a writable mimic directory based on reference directory. * * The mimic directory is a tmpfs populated with bind mounts to the (possibly * read only) directories in the reference directory. While all the read-only * content stays read-only the actual mimic directory is writable so additional * content can be placed there. * * Flags are forwarded to sc_quirk_mkdir_bind() **/ static void sc_quirk_create_writable_mimic(const char *mimic_dir, const char *ref_dir, unsigned flags) { debug("creating writable mimic directory %s based on %s", mimic_dir, ref_dir); sc_quirk_setup_tmpfs(mimic_dir); debug("bind-mounting all the files from the reference directory"); DIR *dirp __attribute__ ((cleanup(sc_cleanup_closedir))) = NULL; dirp = opendir(ref_dir); if (dirp == NULL) { die("cannot open reference directory %s", ref_dir); } struct dirent *entryp = NULL; do { char src_name[PATH_MAX * 2]; char dest_name[PATH_MAX * 2]; // Set errno to zero, if readdir fails it will not only return null but // set errno to a non-zero value. This is how we can differentiate // end-of-directory from an actual error. errno = 0; entryp = readdir(dirp); if (entryp == NULL && errno != 0) { die("cannot read another directory entry"); } if (entryp == NULL) { break; } if (strcmp(entryp->d_name, ".") == 0 || strcmp(entryp->d_name, "..") == 0) { continue; } if (entryp->d_type != DT_DIR && entryp->d_type != DT_REG) { die("unsupported entry type of file %s (%d)", entryp->d_name, entryp->d_type); } must_snprintf(src_name, sizeof src_name, "%s/%s", ref_dir, entryp->d_name); must_snprintf(dest_name, sizeof dest_name, "%s/%s", mimic_dir, entryp->d_name); sc_quirk_mkdir_bind(src_name, dest_name, flags); } while (entryp != NULL); }
/* * Setup mount profiles as described by snapd. * * This function reads /var/lib/snapd/mount/$security_tag.fstab as a fstab(5) file * and executes the mount requests described there. * * Currently only bind mounts are allowed. All bind mounts are read only by * default though the `rw` flag can be used. * * This function is called with the rootfs being "consistent" so that it is * either the core snap on an all-snap system or the core snap + punched holes * on a classic system. **/ static void sc_setup_mount_profiles(const char *security_tag) { debug("%s: %s", __FUNCTION__, security_tag); FILE *f __attribute__ ((cleanup(sc_cleanup_endmntent))) = NULL; const char *mount_profile_dir = "/var/lib/snapd/mount"; char profile_path[PATH_MAX]; must_snprintf(profile_path, sizeof(profile_path), "%s/%s.fstab", mount_profile_dir, security_tag); debug("opening mount profile %s", profile_path); f = setmntent(profile_path, "r"); // it is ok for the file to not exist if (f == NULL && errno == ENOENT) { debug("mount profile %s doesn't exist, ignoring", profile_path); return; } // however any other error is a real error if (f == NULL) { die("cannot open %s", profile_path); } struct mntent *m = NULL; while ((m = getmntent(f)) != NULL) { debug("read mount entry\n" "\tmnt_fsname: %s\n" "\tmnt_dir: %s\n" "\tmnt_type: %s\n" "\tmnt_opts: %s\n" "\tmnt_freq: %d\n" "\tmnt_passno: %d", m->mnt_fsname, m->mnt_dir, m->mnt_type, m->mnt_opts, m->mnt_freq, m->mnt_passno); int flags = MS_BIND | MS_RDONLY | MS_NODEV | MS_NOSUID; debug("initial flags are: bind,ro,nodev,nosuid"); if (strcmp(m->mnt_type, "none") != 0) { die("cannot honor mount profile, only 'none' filesystem type is supported"); } if (hasmntopt(m, "bind") == NULL) { die("cannot honor mount profile, the bind mount flag is mandatory"); } if (hasmntopt(m, "rw") != NULL) { flags &= ~MS_RDONLY; } if (mount(m->mnt_fsname, m->mnt_dir, NULL, flags, NULL) != 0) { die("cannot mount %s at %s with options %s", m->mnt_fsname, m->mnt_dir, m->mnt_opts); } } }
void sc_setup_quirks() { // because /var/lib/snapd is essential let's move it to /tmp/snapd for a sec char snapd_tmp[] = "/tmp/snapd.quirks_XXXXXX"; if (mkdtemp(snapd_tmp) == 0) { die("cannot create temporary directory for /var/lib/snapd mount point"); } debug("performing operation: mount --move %s %s", "/var/lib/snapd", snapd_tmp); if (mount("/var/lib/snapd", snapd_tmp, NULL, MS_MOVE, NULL) != 0) { die("cannot perform operation: mount --move %s %s", "/var/lib/snapd", snapd_tmp); } // now let's make /var/lib the vanilla /var/lib from the core snap char buf[PATH_MAX]; must_snprintf(buf, sizeof buf, "%s/var/lib", sc_get_inner_core_mount_point()); sc_quirk_create_writable_mimic("/var/lib", buf, MS_RDONLY | MS_REC | MS_SLAVE | MS_NODEV | MS_NOSUID); // now let's move /var/lib/snapd (that was originally there) back debug("performing operation: umount %s", "/var/lib/snapd"); if (umount("/var/lib/snapd") != 0) { die("cannot perform operation: umount %s", "/var/lib/snapd"); } debug("performing operation: mount --move %s %s", snapd_tmp, "/var/lib/snapd"); if (mount(snapd_tmp, "/var/lib/snapd", NULL, MS_MOVE, NULL) != 0) { die("cannot perform operation: mount --move %s %s", snapd_tmp, "/var/lib/snapd"); } debug("performing operation: rmdir %s", snapd_tmp); if (rmdir(snapd_tmp) != 0) { die("cannot perform operation: rmdir %s", snapd_tmp); } // We are now ready to apply any quirks that relate to /var/lib sc_setup_lxd_quirk(); }
// TODO: simplify this, after all it is just a tmpfs // TODO: fold this into bootstrap static void setup_private_mount(const char *security_tag) { uid_t uid = getuid(); gid_t gid = getgid(); char tmpdir[MAX_BUF] = { 0 }; // Create a 0700 base directory, this is the base dir that is // protected from other users. // // Under that basedir, we put a 1777 /tmp dir that is then bind // mounted for the applications to use must_snprintf(tmpdir, sizeof(tmpdir), "/tmp/snap.%d_%s_XXXXXX", uid, security_tag); if (mkdtemp(tmpdir) == NULL) { die("cannot create temporary directory essential for private /tmp"); } // now we create a 1777 /tmp inside our private dir mode_t old_mask = umask(0); char *d = strdup(tmpdir); if (!d) { die("cannot allocate memory for string copy"); } must_snprintf(tmpdir, sizeof(tmpdir), "%s/tmp", d); free(d); if (mkdir(tmpdir, 01777) != 0) { die("cannot create temporary directory for private /tmp"); } umask(old_mask); // chdir to '/' since the mount won't apply to the current directory char *pwd = get_current_dir_name(); if (pwd == NULL) die("cannot get current working directory"); if (chdir("/") != 0) die("cannot change directory to '/'"); // MS_BIND is there from linux 2.4 if (mount(tmpdir, "/tmp", NULL, MS_BIND, NULL) != 0) { die("cannot bind mount private /tmp"); } // MS_PRIVATE needs linux > 2.6.11 if (mount("none", "/tmp", NULL, MS_PRIVATE, NULL) != 0) { die("cannot change sharing on /tmp to make it private"); } // do the chown after the bind mount to avoid potential shenanigans if (chown("/tmp/", uid, gid) < 0) { die("cannot change ownership of /tmp"); } // chdir to original directory if (chdir(pwd) != 0) die("cannot change current working directory to the original directory"); free(pwd); // ensure we set the various TMPDIRs to our newly created tmpdir const char *tmpd[] = { "TMPDIR", "TEMPDIR", NULL }; int i; for (i = 0; tmpd[i] != NULL; i++) { if (setenv(tmpd[i], "/tmp", 1) != 0) { die("cannot set environment variable '%s'", tmpd[i]); } } }
/** * Bootstrap mount namespace. * * This is a chunk of tricky code that lets us have full control over the * layout and direction of propagation of mount events. The documentation below * assumes knowledge of the 'sharedsubtree.txt' document from the kernel source * tree. * * As a reminder two definitions are quoted below: * * A 'propagation event' is defined as event generated on a vfsmount * that leads to mount or unmount actions in other vfsmounts. * * A 'peer group' is defined as a group of vfsmounts that propagate * events to each other. * * (end of quote). * * The main idea is to setup a mount namespace that has a root filesystem with * vfsmounts and peer groups that, depending on the location, either isolate * or share with the rest of the system. * * The vast majority of the filesystem is shared in one direction. Events from * the outside (from the main mount namespace) propagate inside (to namespaces * of particular snaps) so things like new snap revisions, mounted drives, etc, * just show up as expected but even if a snap is exploited or malicious in * nature it cannot affect anything in another namespace where it might cause * security or stability issues. * * Selected directories (today just /media) can be shared in both directions. * This allows snaps with sufficient privileges to either create, through the * mount system call, additional mount points that are visible by the rest of * the system (both the main mount namespace and namespaces of individual * snaps) or remove them, through the unmount system call. **/ static void sc_bootstrap_mount_namespace(const struct sc_mount_config *config) { char scratch_dir[] = "/tmp/snap.rootfs_XXXXXX"; char src[PATH_MAX]; char dst[PATH_MAX]; if (mkdtemp(scratch_dir) == NULL) { die("cannot create temporary directory for the root file system"); } // NOTE: at this stage we just called unshare(CLONE_NEWNS). We are in a new // mount namespace and have a private list of mounts. debug("scratch directory for constructing namespace: %s", scratch_dir); // Make the root filesystem recursively shared. This way propagation events // will be shared with main mount namespace. debug("performing operation: mount --make-rshared /"); if (mount("none", "/", NULL, MS_REC | MS_SHARED, NULL) < 0) { die("cannot perform operation: mount --make-rshared /"); } // Bind mount the temporary scratch directory for root filesystem over // itself so that it is a mount point. This is done so that it can become // unbindable as explained below. debug("performing operation: mount --bind %s %s", scratch_dir, scratch_dir); if (mount(scratch_dir, scratch_dir, NULL, MS_BIND, NULL) < 0) { die("cannot perform operation: mount --bind %s %s", scratch_dir, scratch_dir); } // Make the scratch directory unbindable. // // This is necessary as otherwise a mount loop can occur and the kernel // would crash. The term unbindable simply states that it cannot be bind // mounted anywhere. When we construct recursive bind mounts below this // guarantees that this directory will not be replicated anywhere. debug("performing operation: mount --make-unbindable %s", scratch_dir); if (mount("none", scratch_dir, NULL, MS_UNBINDABLE, NULL) < 0) { die("cannot perform operation: mount --make-unbindable %s", scratch_dir); } // Recursively bind mount desired root filesystem directory over the // scratch directory. This puts the initial content into the scratch space // and serves as a foundation for all subsequent operations below. // // The mount is recursive because it can either be applied to the root // filesystem of a core system (aka all-snap) or the core snap on a classic // system. In the former case we need recursive bind mounts to accurately // replicate the state of the root filesystem into the scratch directory. debug("performing operation: mount --rbind %s %s", config->rootfs_dir, scratch_dir); if (mount(config->rootfs_dir, scratch_dir, NULL, MS_REC | MS_BIND, NULL) < 0) { die("cannot perform operation: mount --rbind %s %s", config->rootfs_dir, scratch_dir); } // Make the scratch directory recursively private. Nothing done there will // be shared with any peer group, This effectively detaches us from the // original namespace and coupled with pivot_root below serves as the // foundation of the mount sandbox. debug("performing operation: mount --make-rslave %s", scratch_dir); if (mount("none", scratch_dir, NULL, MS_REC | MS_SLAVE, NULL) < 0) { die("cannot perform operation: mount --make-rslave %s", scratch_dir); } // Bind mount certain directories from the host filesystem to the scratch // directory. By default mount events will propagate in both into and out // of the peer group. This way the running application can alter any global // state visible on the host and in other snaps. This can be restricted by // disabling the "is_bidirectional" flag as can be seen below. for (const struct sc_mount * mnt = config->mounts; mnt->path != NULL; mnt++) { must_snprintf(dst, sizeof dst, "%s/%s", scratch_dir, mnt->path); debug("performing operation: mount --rbind %s %s", mnt->path, dst); if (mount(mnt->path, dst, NULL, MS_REC | MS_BIND, NULL) < 0) { die("cannot perform operation: mount --rbind %s %s", mnt->path, dst); } if (!mnt->is_bidirectional) { // Mount events will only propagate inwards to the namespace. This // way the running application cannot alter any global state apart // from that of its own snap. debug("performing operation: mount --make-rslave %s", dst); if (mount("none", dst, NULL, MS_REC | MS_SLAVE, NULL) != 0) { die("cannot perform operation: mount --make-rslave %s", dst); } } } // Since we mounted /etc from the host filesystem to the scratch directory, // we may need to put /etc/alternatives from the desired root filesystem // (e.g. the core snap) back. This way the behavior of running snaps is not // affected by the alternatives directory from the host, if one exists. // // https://bugs.launchpad.net/snap-confine/+bug/1580018 const char *etc_alternatives = "/etc/alternatives"; if (access(etc_alternatives, F_OK) == 0) { must_snprintf(src, sizeof src, "%s%s", config->rootfs_dir, etc_alternatives); must_snprintf(dst, sizeof dst, "%s%s", scratch_dir, etc_alternatives); debug("performing operation: mount --bind %s %s", src, dst); if (mount(src, dst, NULL, MS_BIND, NULL) != 0) { die("cannot perform operation: mount --bind %s %s", src, dst); } debug("performing operation: mount --make-slave %s", dst); if (mount("none", dst, NULL, MS_SLAVE, NULL) != 0) { die("cannot perform operation: mount --make-slave %s", dst); } } // Bind mount the directory where all snaps are mounted. The location of // the this directory on the host filesystem may not match the location in // the desired root filesystem. In the "core" and "ubuntu-core" snaps the // directory is always /snap. On the host it is a build-time configuration // option stored in SNAP_MOUNT_DIR. must_snprintf(dst, sizeof dst, "%s/snap", scratch_dir); debug("performing operation: mount --rbind %s %s", SNAP_MOUNT_DIR, dst); if (mount(SNAP_MOUNT_DIR, dst, NULL, MS_BIND | MS_REC | MS_SLAVE, NULL) < 0) { die("cannot perform operation: mount --rbind -o slave %s %s", SNAP_MOUNT_DIR, dst); } debug("performing operation: mount --make-rslave %s", dst); if (mount("none", dst, NULL, MS_REC | MS_SLAVE, NULL) < 0) { die("cannot perform operation: mount --make-rslave %s", dst); } // Create the hostfs directory if one is missing. This directory is a part // of packaging now so perhaps this code can be removed later. if (access(SC_HOSTFS_DIR, F_OK) != 0) { debug("creating missing hostfs directory"); if (mkdir(SC_HOSTFS_DIR, 0755) != 0) { die("cannot perform operation: mkdir %s", SC_HOSTFS_DIR); } } // Make the upcoming "put_old" directory for pivot_root private so that // mount events don't propagate to any peer group. In practice pivot root // has a number of undocumented requirements and one of them is that the // "put_old" directory (the second argument) cannot be shared in any way. must_snprintf(dst, sizeof dst, "%s/%s", scratch_dir, SC_HOSTFS_DIR); debug("performing operation: mount --bind %s %s", dst, dst); if (mount(dst, dst, NULL, MS_BIND, NULL) < 0) { die("cannot perform operation: mount --bind %s %s", dst, dst); } debug("performing operation: mount --make-private %s", dst); if (mount("none", dst, NULL, MS_PRIVATE, NULL) < 0) { die("cannot perform operation: mount --make-private %s", dst); } // On classic mount the nvidia driver. Ideally this would be done in an // uniform way after pivot_root but this is good enough and requires less // code changes the nvidia code assumes it has access to the existing // pre-pivot filesystem. if (config->on_classic) { sc_mount_nvidia_driver(scratch_dir); } // XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // pivot_root // XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // Use pivot_root to "chroot" into the scratch directory. // // Q: Why are we using something as esoteric as pivot_root(2)? // A: Because this makes apparmor handling easy. Using a normal chroot // makes all apparmor rules conditional. We are either running on an // all-snap system where this would-be chroot didn't happen and all the // rules see / as the root file system _OR_ we are running on top of a // classic distribution and this chroot has now moved all paths to // /tmp/snap.rootfs_*. // // Because we are using unshare(2) with CLONE_NEWNS we can essentially use // pivot_root just like chroot but this makes apparmor unaware of the old // root so everything works okay. // // HINT: If you are debugging this and are trying to see why pivot_root // happens to return EINVAL with any changes you may be making, please // consider applying // misc/0001-Add-printk-based-debugging-to-pivot_root.patch to your tree // kernel. debug("performing operation: pivot_root %s %s", scratch_dir, dst); if (syscall(SYS_pivot_root, scratch_dir, dst) < 0) { die("cannot perform operation: pivot_root %s %s", scratch_dir, dst); } // Unmount the self-bind mount over the scratch directory created earlier // in the original root filesystem (which is now mounted on SC_HOSTFS_DIR). // This way we can remove the temporary directory we created and "clean up" // after ourselves nicely. must_snprintf(dst, sizeof dst, "%s/%s", SC_HOSTFS_DIR, scratch_dir); debug("performing operation: umount %s", dst); if (umount2(dst, 0) < 0) { die("cannot perform operation: umount %s", dst); } // Remove the scratch directory. Note that we are using the path that is // based on the old root filesystem as after pivot_root we cannot guarantee // what is present at the same location normally. (It is probably an empty // /tmp directory that is populated in another place). debug("performing operation: rmdir %s", dst); if (rmdir(scratch_dir) < 0) { die("cannot perform operation: rmdir %s", dst); }; // Make the old root filesystem recursively slave. This way operations // performed in this mount namespace will not propagate to the peer group. // This is another essential part of the confinement system. debug("performing operation: mount --make-rslave %s", SC_HOSTFS_DIR); if (mount("none", SC_HOSTFS_DIR, NULL, MS_REC | MS_SLAVE, NULL) < 0) { die("cannot perform operation: mount --make-rslave %s", SC_HOSTFS_DIR); } // Detach the redundant hostfs version of sysfs since it shows up in the // mount table and software inspecting the mount table may become confused // (eg, docker and LP:# 162601). must_snprintf(src, sizeof src, "%s/sys", SC_HOSTFS_DIR); debug("performing operation: umount --lazy %s", src); if (umount2(src, UMOUNT_NOFOLLOW | MNT_DETACH) < 0) { die("cannot perform operation: umount --lazy %s", src); } // Detach the redundant hostfs version of /dev since it shows up in the // mount table and software inspecting the mount table may become confused. must_snprintf(src, sizeof src, "%s/dev", SC_HOSTFS_DIR); debug("performing operation: umount --lazy %s", src); if (umount2(src, UMOUNT_NOFOLLOW | MNT_DETACH) < 0) { die("cannot perform operation: umount --lazy %s", src); } // Detach the redundant hostfs version of /proc since it shows up in the // mount table and software inspecting the mount table may become confused. must_snprintf(src, sizeof src, "%s/proc", SC_HOSTFS_DIR); debug("performing operation: umount --lazy %s", src); if (umount2(src, UMOUNT_NOFOLLOW | MNT_DETACH) < 0) { die("cannot perform operation: umount --lazy %s", src); } }
void sc_create_or_join_ns_group(struct sc_ns_group *group, struct sc_apparmor *apparmor) { // Open the mount namespace file. char mnt_fname[PATH_MAX]; must_snprintf(mnt_fname, sizeof mnt_fname, "%s%s", group->name, SC_NS_MNT_FILE); int mnt_fd __attribute__ ((cleanup(sc_cleanup_close))) = -1; // NOTE: There is no O_EXCL here because the file can be around but // doesn't have to be a mounted namespace. // // If the mounted namespace is discarded with // sc_discard_preserved_ns_group() it will revert to a regular file. If // snap-confine is killed for whatever reason after the file is created but // before the file is bind-mounted it will also be a regular file. mnt_fd = openat(group->dir_fd, mnt_fname, O_CREAT | O_RDONLY | O_CLOEXEC | O_NOFOLLOW, 0600); if (mnt_fd < 0) { die("cannot open mount namespace file for namespace group %s", group->name); } // Check if we got an nsfs-based file or a regular file. This can be // reliably tested because nsfs has an unique filesystem type NSFS_MAGIC. // We can just ensure that this is the case thanks to fstatfs. struct statfs buf; if (fstatfs(mnt_fd, &buf) < 0) { die("cannot perform fstatfs() on an mount namespace file descriptor"); } #ifndef NSFS_MAGIC // Account for kernel headers old enough to not know about NSFS_MAGIC. #define NSFS_MAGIC 0x6e736673 #endif if (buf.f_type == NSFS_MAGIC) { char *vanilla_cwd __attribute__ ((cleanup(sc_cleanup_string))) = NULL; vanilla_cwd = get_current_dir_name(); if (vanilla_cwd == NULL) { die("cannot get the current working directory"); } debug ("attempting to re-associate the mount namespace with the namespace group %s", group->name); if (setns(mnt_fd, CLONE_NEWNS) < 0) { die("cannot re-associate the mount namespace with namespace group %s", group->name); } debug ("successfully re-associated the mount namespace with the namespace group %s", group->name); // Try to re-locate back to vanilla working directory. This can fail // because that directory is no longer present. if (chdir(vanilla_cwd) != 0) { debug ("cannot remain in %s, moving to the void directory", vanilla_cwd); if (chdir(SC_VOID_DIR) != 0) { die("cannot change directory to %s", SC_VOID_DIR); } debug("successfully moved to %s", SC_VOID_DIR); } return; }
scmp_filter_ctx sc_prepare_seccomp_context(const char *filter_profile) { int rc = 0; scmp_filter_ctx ctx = NULL; FILE *f = NULL; size_t lineno = 0; uid_t real_uid, effective_uid, saved_uid; struct preprocess pre; struct seccomp_args sargs; debug("preparing seccomp profile associated with security tag %s", filter_profile); // initialize hsearch map sc_map_init(); ctx = seccomp_init(SCMP_ACT_KILL); if (ctx == NULL) { errno = ENOMEM; die("seccomp_init() failed"); } // Setup native arch and any compatibility archs sc_add_seccomp_archs(ctx); // Disable NO_NEW_PRIVS because it interferes with exec transitions in // AppArmor. Unfortunately this means that security policies must be // very careful to not allow the following otherwise apps can escape // the sandbox: // - seccomp syscall // - prctl with PR_SET_SECCOMP // - ptrace (trace) in AppArmor // - capability sys_admin in AppArmor // Note that with NO_NEW_PRIVS disabled, CAP_SYS_ADMIN is required to // change the seccomp sandbox. if (getresuid(&real_uid, &effective_uid, &saved_uid) != 0) die("could not find user IDs"); // If running privileged or capable of raising, disable nnp if (real_uid == 0 || effective_uid == 0 || saved_uid == 0) if (seccomp_attr_set(ctx, SCMP_FLTATR_CTL_NNP, 0) != 0) die("Cannot disable nnp"); // Note that secure_gettenv will always return NULL when suid, so // SNAPPY_LAUNCHER_SECCOMP_PROFILE_DIR can't be (ab)used in that case. if (secure_getenv("SNAPPY_LAUNCHER_SECCOMP_PROFILE_DIR") != NULL) filter_profile_dir = secure_getenv("SNAPPY_LAUNCHER_SECCOMP_PROFILE_DIR"); char profile_path[512]; // arbitrary path name limit must_snprintf(profile_path, sizeof(profile_path), "%s/%s", filter_profile_dir, filter_profile); f = fopen(profile_path, "r"); if (f == NULL) { fprintf(stderr, "Can not open %s (%s)\n", profile_path, strerror(errno)); die("aborting"); } // Note, preprocess_filter() die()s on error preprocess_filter(f, &pre); if (pre.unrestricted) { seccomp_release(ctx); ctx = NULL; goto out; } // FIXME: right now complain mode is the equivalent to unrestricted. // We'll want to change this once we seccomp logging is in order. if (pre.complain) { seccomp_release(ctx); ctx = NULL; goto out; } char buf[SC_MAX_LINE_LENGTH]; while (fgets(buf, sizeof(buf), f) != NULL) { lineno++; // skip policy-irrelevant lines if (validate_and_trim_line(buf, sizeof(buf), lineno) == 0) continue; char *buf_copy = strdup(buf); if (buf_copy == NULL) die("Out of memory"); int pr_rc = parse_line(buf_copy, &sargs); free(buf_copy); if (pr_rc != PARSE_OK) { // as this is a syscall whitelist an invalid syscall // is ok and the error can be ignored if (pr_rc == PARSE_INVALID_SYSCALL) continue; die("could not parse line"); } rc = seccomp_rule_add_exact_array(ctx, SCMP_ACT_ALLOW, sargs.syscall_nr, sargs.length, sargs.arg_cmp); if (rc != 0) { rc = seccomp_rule_add_array(ctx, SCMP_ACT_ALLOW, sargs.syscall_nr, sargs.length, sargs.arg_cmp); if (rc != 0) { fprintf(stderr, "seccomp_rule_add_array failed with %i for '%s'\n", rc, buf); errno = 0; die("aborting"); } } } out: if (f != NULL) { if (fclose(f) != 0) die("could not close seccomp file"); } sc_map_destroy(); return ctx; }