int proc_setup_self(struct super_block *s) { struct inode *root_inode = s->s_root->d_inode; struct pid_namespace *ns = s->s_fs_info; struct dentry *self; mutex_lock(&root_inode->i_mutex); self = d_alloc_name(s->s_root, "self"); if (self) { struct inode *inode = new_inode_pseudo(s); if (inode) { inode->i_ino = self_inum; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_mode = S_IFLNK | S_IRWXUGO; inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; inode->i_op = &proc_self_inode_operations; d_add(self, inode); } else { dput(self); self = ERR_PTR(-ENOMEM); } } else { self = ERR_PTR(-ENOMEM); } mutex_unlock(&root_inode->i_mutex); if (IS_ERR(self)) { pr_err("proc_fill_super: can't allocate /proc/self\n"); return PTR_ERR(self); } ns->proc_self = self; return 0; }
/* * A single inode exists for all anon_inode files. Contrary to pipes, * anon_inode inodes have no associated per-instance data, so we need * only allocate one of them. */ static struct inode *anon_inode_mkinode(void) { struct inode *inode = new_inode_pseudo(anon_inode_mnt->mnt_sb); if (!inode) return ERR_PTR(-ENOMEM); inode->i_ino = get_next_ino(); inode->i_fop = &anon_inode_fops; inode->i_mapping->a_ops = &anon_aops; /* * Mark the inode dirty from the very beginning, * that way it will never be moved to the dirty * list because mark_inode_dirty() will think * that it already _is_ on the dirty list. */ inode->i_state = I_DIRTY; inode->i_mode = S_IRUSR | S_IWUSR; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); inode->i_flags |= S_PRIVATE; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; return inode; }
static struct inode *anon_inode_mkinode(struct super_block *s) { struct inode *inode = new_inode_pseudo(s); if (!inode) return ERR_PTR(-ENOMEM); inode->i_ino = get_next_ino(); inode->i_fop = &anon_inode_fops; inode->i_mapping->a_ops = &anon_aops; inode->i_state = I_DIRTY; inode->i_mode = S_IRUSR | S_IWUSR; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); inode->i_flags |= S_PRIVATE; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; return inode; }
static void *__ns_get_path(struct path *path, struct ns_common *ns) { struct vfsmount *mnt = nsfs_mnt; struct qstr qname = { .name = "", }; struct dentry *dentry; struct inode *inode; unsigned long d; rcu_read_lock(); d = atomic_long_read(&ns->stashed); if (!d) goto slow; dentry = (struct dentry *)d; if (!lockref_get_not_dead(&dentry->d_lockref)) goto slow; rcu_read_unlock(); ns->ops->put(ns); got_it: path->mnt = mntget(mnt); path->dentry = dentry; return NULL; slow: rcu_read_unlock(); inode = new_inode_pseudo(mnt->mnt_sb); if (!inode) { ns->ops->put(ns); return ERR_PTR(-ENOMEM); } inode->i_ino = ns->inum; inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); inode->i_flags |= S_IMMUTABLE; inode->i_mode = S_IFREG | S_IRUGO; inode->i_fop = &ns_file_operations; inode->i_private = ns; dentry = d_alloc_pseudo(mnt->mnt_sb, &qname); if (!dentry) { iput(inode); return ERR_PTR(-ENOMEM); } d_instantiate(dentry, inode); dentry->d_flags |= DCACHE_RCUACCESS; dentry->d_fsdata = (void *)ns->ops; d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry); if (d) { d_delete(dentry); /* make sure ->d_prune() does nothing */ dput(dentry); cpu_relax(); return ERR_PTR(-EAGAIN); } goto got_it; } void *ns_get_path(struct path *path, struct task_struct *task, const struct proc_ns_operations *ns_ops) { struct ns_common *ns; void *ret; again: ns = ns_ops->get(task); if (!ns) return ERR_PTR(-ENOENT); ret = __ns_get_path(path, ns); if (IS_ERR(ret) && PTR_ERR(ret) == -EAGAIN) goto again; return ret; } int open_related_ns(struct ns_common *ns, struct ns_common *(*get_ns)(struct ns_common *ns)) { struct path path = {}; struct file *f; void *err; int fd; fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) return fd; while (1) { struct ns_common *relative; relative = get_ns(ns); if (IS_ERR(relative)) { put_unused_fd(fd); return PTR_ERR(relative); } err = __ns_get_path(&path, relative); if (IS_ERR(err) && PTR_ERR(err) == -EAGAIN) continue; break; } if (IS_ERR(err)) { put_unused_fd(fd); return PTR_ERR(err); } f = dentry_open(&path, O_RDONLY, current_cred()); path_put(&path); if (IS_ERR(f)) { put_unused_fd(fd); fd = PTR_ERR(f); } else fd_install(fd, f); return fd; } static long ns_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { struct user_namespace *user_ns; struct ns_common *ns = get_proc_ns(file_inode(filp)); uid_t __user *argp; uid_t uid; switch (ioctl) { case NS_GET_USERNS: return open_related_ns(ns, ns_get_owner); case NS_GET_PARENT: if (!ns->ops->get_parent) return -EINVAL; return open_related_ns(ns, ns->ops->get_parent); case NS_GET_NSTYPE: return ns->ops->type; case NS_GET_OWNER_UID: if (ns->ops->type != CLONE_NEWUSER) return -EINVAL; user_ns = container_of(ns, struct user_namespace, ns); argp = (uid_t __user *) arg; uid = from_kuid_munged(current_user_ns(), user_ns->owner); return put_user(uid, argp); default: return -ENOTTY; } } int ns_get_name(char *buf, size_t size, struct task_struct *task, const struct proc_ns_operations *ns_ops) { struct ns_common *ns; int res = -ENOENT; ns = ns_ops->get(task); if (ns) { res = snprintf(buf, size, "%s:[%u]", ns_ops->name, ns->inum); ns_ops->put(ns); } return res; } struct file *proc_ns_fget(int fd) { struct file *file; file = fget(fd); if (!file) return ERR_PTR(-EBADF); if (file->f_op != &ns_file_operations) goto out_invalid; return file; out_invalid: fput(file); return ERR_PTR(-EINVAL); } static int nsfs_show_path(struct seq_file *seq, struct dentry *dentry) { struct inode *inode = d_inode(dentry); const struct proc_ns_operations *ns_ops = dentry->d_fsdata; seq_printf(seq, "%s:[%lu]", ns_ops->name, inode->i_ino); return 0; } static const struct super_operations nsfs_ops = { .statfs = simple_statfs, .evict_inode = nsfs_evict, .show_path = nsfs_show_path, }; static struct dentry *nsfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { return mount_pseudo(fs_type, "nsfs:", &nsfs_ops, &ns_dentry_operations, NSFS_MAGIC); } static struct file_system_type nsfs = { .name = "nsfs", .mount = nsfs_mount, .kill_sb = kill_anon_super, }; void __init nsfs_init(void) { nsfs_mnt = kern_mount(&nsfs); if (IS_ERR(nsfs_mnt)) panic("can't set nsfs up\n"); nsfs_mnt->mnt_sb->s_flags &= ~MS_NOUSER; }
void *ns_get_path(struct path *path, struct task_struct *task, const struct proc_ns_operations *ns_ops) { struct vfsmount *mnt = mntget(nsfs_mnt); struct qstr qname = { .name = "", }; struct dentry *dentry; struct inode *inode; struct ns_common *ns; unsigned long d; again: ns = ns_ops->get(task); if (!ns) { mntput(mnt); return ERR_PTR(-ENOENT); } rcu_read_lock(); d = atomic_long_read(&ns->stashed); if (!d) goto slow; dentry = (struct dentry *)d; if (!lockref_get_not_dead(&dentry->d_lockref)) goto slow; rcu_read_unlock(); ns_ops->put(ns); got_it: path->mnt = mnt; path->dentry = dentry; return NULL; slow: rcu_read_unlock(); inode = new_inode_pseudo(mnt->mnt_sb); if (!inode) { ns_ops->put(ns); mntput(mnt); return ERR_PTR(-ENOMEM); } inode->i_ino = ns->inum; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_flags |= S_IMMUTABLE; inode->i_mode = S_IFREG | S_IRUGO; inode->i_fop = &ns_file_operations; inode->i_private = ns; dentry = d_alloc_pseudo(mnt->mnt_sb, &qname); if (!dentry) { iput(inode); mntput(mnt); return ERR_PTR(-ENOMEM); } d_instantiate(dentry, inode); dentry->d_fsdata = (void *)ns_ops; d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry); if (d) { d_delete(dentry); /* make sure ->d_prune() does nothing */ dput(dentry); cpu_relax(); goto again; } goto got_it; } int ns_get_name(char *buf, size_t size, struct task_struct *task, const struct proc_ns_operations *ns_ops) { struct ns_common *ns; int res = -ENOENT; ns = ns_ops->get(task); if (ns) { res = snprintf(buf, size, "%s:[%u]", ns_ops->name, ns->inum); ns_ops->put(ns); } return res; } struct file *proc_ns_fget(int fd) { struct file *file; file = fget(fd); if (!file) return ERR_PTR(-EBADF); if (file->f_op != &ns_file_operations) goto out_invalid; return file; out_invalid: fput(file); return ERR_PTR(-EINVAL); } static const struct super_operations nsfs_ops = { .statfs = simple_statfs, .evict_inode = nsfs_evict, }; static struct dentry *nsfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { return mount_pseudo(fs_type, "nsfs:", &nsfs_ops, &ns_dentry_operations, NSFS_MAGIC); } static struct file_system_type nsfs = { .name = "nsfs", .mount = nsfs_mount, .kill_sb = kill_anon_super, }; void __init nsfs_init(void) { nsfs_mnt = kern_mount(&nsfs); if (IS_ERR(nsfs_mnt)) panic("can't set nsfs up\n"); nsfs_mnt->mnt_sb->s_flags &= ~MS_NOUSER; }