int mount_cgroup_controllers(char ***join_controllers) { _cleanup_set_free_free_ Set *controllers = NULL; int r; if (!cg_is_legacy_wanted()) return 0; /* Mount all available cgroup controllers that are built into the kernel. */ controllers = set_new(&string_hash_ops); if (!controllers) return log_oom(); r = cg_kernel_controllers(controllers); if (r < 0) return log_error_errno(r, "Failed to enumerate cgroup controllers: %m"); for (;;) { _cleanup_free_ char *options = NULL, *controller = NULL, *where = NULL; MountPoint p = { .what = "cgroup", .type = "cgroup", .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV, .mode = MNT_IN_CONTAINER, }; char ***k = NULL; controller = set_steal_first(controllers); if (!controller) break; if (join_controllers) for (k = join_controllers; *k; k++) if (strv_find(*k, controller)) break; if (k && *k) { char **i, **j; for (i = *k, j = *k; *i; i++) { if (!streq(*i, controller)) { _cleanup_free_ char *t; t = set_remove(controllers, *i); if (!t) { free(*i); continue; } } *(j++) = *i; } *j = NULL; options = strv_join(*k, ","); if (!options) return log_oom(); } else { options = controller; controller = NULL; } where = strappend("/sys/fs/cgroup/", options); if (!where) return log_oom(); p.where = where; p.options = options; r = mount_one(&p, true); if (r < 0) return r; if (r > 0 && k && *k) { char **i; for (i = *k; *i; i++) { _cleanup_free_ char *t = NULL; t = strappend("/sys/fs/cgroup/", *i); if (!t) return log_oom(); r = symlink(options, t); if (r < 0 && errno != EEXIST) return log_error_errno(errno, "Failed to create symlink %s: %m", t); #ifdef SMACK_RUN_LABEL r = mac_smack_copy(t, options); if (r < 0 && r != -EOPNOTSUPP) return log_error_errno(r, "Failed to copy smack label from %s to %s: %m", options, t); #endif } } } /* Now that we mounted everything, let's make the tmpfs the * cgroup file systems are mounted into read-only. */ (void) mount("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755"); return 0; } #if defined(HAVE_SELINUX) || defined(HAVE_SMACK) static int nftw_cb( const char *fpath, const struct stat *sb, int tflag, struct FTW *ftwbuf) { /* No need to label /dev twice in a row... */ if (_unlikely_(ftwbuf->level == 0)) return FTW_CONTINUE; label_fix(fpath, false, false); /* /run/initramfs is static data and big, no need to * dynamically relabel its contents at boot... */ if (_unlikely_(ftwbuf->level == 1 && tflag == FTW_D && streq(fpath, "/run/initramfs"))) return FTW_SKIP_SUBTREE; return FTW_CONTINUE; }; #endif int mount_setup(bool loaded_policy) { unsigned i; int r = 0; for (i = 0; i < ELEMENTSOF(mount_table); i ++) { int j; j = mount_one(mount_table + i, loaded_policy); if (j != 0 && r >= 0) r = j; } if (r < 0) return r; #if defined(HAVE_SELINUX) || defined(HAVE_SMACK) /* Nodes in devtmpfs and /run need to be manually updated for * the appropriate labels, after mounting. The other virtual * API file systems like /sys and /proc do not need that, they * use the same label for all their files. */ if (loaded_policy) { usec_t before_relabel, after_relabel; char timespan[FORMAT_TIMESPAN_MAX]; before_relabel = now(CLOCK_MONOTONIC); nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL); nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL); after_relabel = now(CLOCK_MONOTONIC); log_info("Relabelled /dev and /run in %s.", format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel, 0)); } #endif /* Create a few default symlinks, which are normally created * by udevd, but some scripts might need them before we start * udevd. */ dev_setup(NULL, UID_INVALID, GID_INVALID); /* Mark the root directory as shared in regards to mount * propagation. The kernel defaults to "private", but we think * it makes more sense to have a default of "shared" so that * nspawn and the container tools work out of the box. If * specific setups need other settings they can reset the * propagation mode to private if needed. */ if (detect_container() <= 0) if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0) log_warning_errno(errno, "Failed to set up the root directory for shared mount propagation: %m"); /* Create a few directories we always want around, Note that * sd_booted() checks for /run/systemd/system, so this mkdir * really needs to stay for good, otherwise software that * copied sd-daemon.c into their sources will misdetect * systemd. */ mkdir_label("/run/systemd", 0755); mkdir_label("/run/systemd/system", 0755); mkdir_label("/run/systemd/inaccessible", 0000); return 0; }
/* Mount legacy cgroup hierarchy when cgroup namespaces are unsupported. */ static int mount_legacy_cgns_unsupported( const char *dest, CGroupUnified unified_requested, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context) { _cleanup_set_free_free_ Set *controllers = NULL; const char *cgroup_root; int r; cgroup_root = prefix_roota(dest, "/sys/fs/cgroup"); (void) mkdir_p(cgroup_root, 0755); /* Mount a tmpfs to /sys/fs/cgroup if it's not mounted there yet. */ r = path_is_mount_point(cgroup_root, dest, AT_SYMLINK_FOLLOW); if (r < 0) return log_error_errno(r, "Failed to determine if /sys/fs/cgroup is already mounted: %m"); if (r == 0) { _cleanup_free_ char *options = NULL; r = tmpfs_patch_options("mode=755", uid_shift == 0 ? UID_INVALID : uid_shift, selinux_apifs_context, &options); if (r < 0) return log_oom(); r = mount_verbose(LOG_ERR, "tmpfs", cgroup_root, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, options); if (r < 0) return r; } r = cg_all_unified(); if (r < 0) return r; if (r > 0) goto skip_controllers; r = cg_kernel_controllers(&controllers); if (r < 0) return log_error_errno(r, "Failed to determine cgroup controllers: %m"); for (;;) { _cleanup_free_ char *controller = NULL, *origin = NULL, *combined = NULL; controller = set_steal_first(controllers); if (!controller) break; origin = prefix_root("/sys/fs/cgroup/", controller); if (!origin) return log_oom(); r = readlink_malloc(origin, &combined); if (r == -EINVAL) { /* Not a symbolic link, but directly a single cgroup hierarchy */ r = mount_legacy_cgroup_hierarchy(dest, controller, controller, true); if (r < 0) return r; } else if (r < 0) return log_error_errno(r, "Failed to read link %s: %m", origin); else { _cleanup_free_ char *target = NULL; target = prefix_root(dest, origin); if (!target) return log_oom(); /* A symbolic link, a combination of controllers in one hierarchy */ if (!filename_is_valid(combined)) { log_warning("Ignoring invalid combined hierarchy %s.", combined); continue; } r = mount_legacy_cgroup_hierarchy(dest, combined, combined, true); if (r < 0) return r; r = symlink_idempotent(combined, target, false); if (r == -EINVAL) return log_error_errno(r, "Invalid existing symlink for combined hierarchy: %m"); if (r < 0) return log_error_errno(r, "Failed to create symlink for combined hierarchy: %m"); } } skip_controllers: if (unified_requested >= CGROUP_UNIFIED_SYSTEMD) { r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_HYBRID, "unified", false); if (r < 0) return r; } r = mount_legacy_cgroup_hierarchy(dest, SYSTEMD_CGROUP_CONTROLLER_LEGACY, "systemd", false); if (r < 0) return r; return mount_verbose(LOG_ERR, NULL, cgroup_root, NULL, MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755"); }