static ssize_t unionfs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { int err = 0; struct file *lower_file; struct dentry *dentry = file->f_path.dentry; struct dentry *parent; unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); err = unionfs_file_revalidate(file, parent, true); if (unlikely(err)) goto out; lower_file = unionfs_lower_file(file); err = vfs_write(lower_file, buf, count, ppos); /* update our inode times+sizes upon a successful lower write */ if (err >= 0) { fsstack_copy_inode_size(dentry->d_inode, lower_file->f_path.dentry->d_inode); fsstack_copy_attr_times(dentry->d_inode, lower_file->f_path.dentry->d_inode); UNIONFS_F(file)->wrote_to_file = true; /* for delayed copyup */ unionfs_check_file(file); } out: unionfs_unlock_dentry(dentry); unionfs_unlock_parent(dentry, parent); unionfs_read_unlock(dentry->d_sb); return err; }
static int unionfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { int err = 0; struct file *file, *lower_file; const struct vm_operations_struct *lower_vm_ops; struct vm_area_struct lower_vma; BUG_ON(!vma); memcpy(&lower_vma, vma, sizeof(struct vm_area_struct)); file = lower_vma.vm_file; lower_vm_ops = UNIONFS_F(file)->lower_vm_ops; BUG_ON(!lower_vm_ops); if (!lower_vm_ops->page_mkwrite) goto out; lower_file = unionfs_lower_file(file); BUG_ON(!lower_file); /* * XXX: vm_ops->page_mkwrite may be called in parallel. * Because we have to resort to temporarily changing the * vma->vm_file to point to the lower file, a concurrent * invocation of unionfs_page_mkwrite could see a different * value. In this workaround, we keep a different copy of the * vma structure in our stack, so we never expose a different * value of the vma->vm_file called to us, even temporarily. * A better fix would be to change the calling semantics of * ->page_mkwrite to take an explicit file pointer. */ lower_vma.vm_file = lower_file; err = lower_vm_ops->page_mkwrite(&lower_vma, vmf); out: return err; }
static ssize_t unionfs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos, size_t len, unsigned int flags) { ssize_t err = 0; struct file *lower_file; struct dentry *dentry = file->f_path.dentry; struct dentry *parent; unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); err = unionfs_file_revalidate(file, parent, true); if (unlikely(err)) goto out; lower_file = unionfs_lower_file(file); err = vfs_splice_from(pipe, lower_file, ppos, len, flags); /* update our inode times+sizes upon a successful lower write */ if (err >= 0) { fsstack_copy_inode_size(dentry->d_inode, lower_file->f_path.dentry->d_inode); fsstack_copy_attr_times(dentry->d_inode, lower_file->f_path.dentry->d_inode); unionfs_check_file(file); } out: unionfs_unlock_dentry(dentry); unionfs_unlock_parent(dentry, parent); unionfs_read_unlock(dentry->d_sb); return err; }
static ssize_t unionfs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { int err; struct file *lower_file; struct dentry *dentry = file->f_path.dentry; struct dentry *parent; unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); err = unionfs_file_revalidate(file, parent, false); if (unlikely(err)) goto out; lower_file = unionfs_lower_file(file); err = vfs_read(lower_file, buf, count, ppos); /* update our inode atime upon a successful lower read */ if (err >= 0) { fsstack_copy_attr_atime(dentry->d_inode, lower_file->f_path.dentry->d_inode); unionfs_check_file(file); } out: unionfs_unlock_dentry(dentry); unionfs_unlock_parent(dentry, parent); unionfs_read_unlock(dentry->d_sb); return err; }
/* pass the ioctl to the lower fs */ static long do_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct file *lower_file; int err; lower_file = unionfs_lower_file(file); err = -ENOTTY; if (!lower_file || !lower_file->f_op) goto out; if (lower_file->f_op->unlocked_ioctl) { err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); } else if (lower_file->f_op->ioctl) { lock_kernel(); err = lower_file->f_op->ioctl( lower_file->f_path.dentry->d_inode, lower_file, cmd, arg); unlock_kernel(); } out: return err; }
static int unionfs_setattr(struct dentry *dentry, struct iattr *ia) { int err = 0; struct dentry *lower_dentry; struct dentry *parent; struct inode *inode; struct inode *lower_inode; int bstart, bend, bindex; loff_t size; struct iattr lower_ia; /* check if user has permission to change inode */ err = inode_change_ok(dentry->d_inode, ia); if (err) goto out_err; unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_CHILD); parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); if (unlikely(!__unionfs_d_revalidate(dentry, parent, false, 0))) { err = -ESTALE; goto out; } bstart = dbstart(dentry); bend = dbend(dentry); inode = dentry->d_inode; /* * mode change is for clearing setuid/setgid. Allow lower filesystem * to reinterpret it in its own way. */ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) ia->ia_valid &= ~ATTR_MODE; lower_dentry = unionfs_lower_dentry(dentry); if (!lower_dentry) { /* should never happen after above revalidate */ err = -EINVAL; goto out; } /* * Get the lower inode directly from lower dentry, in case ibstart * is -1 (which happens when the file is open but unlinked. */ lower_inode = lower_dentry->d_inode; /* check if user has permission to change lower inode */ err = inode_change_ok(lower_inode, ia); if (err) goto out; /* copyup if the file is on a read only branch */ if (is_robranch_super(dentry->d_sb, bstart) || __is_rdonly(lower_inode)) { /* check if we have a branch to copy up to */ if (bstart <= 0) { err = -EACCES; goto out; } if (ia->ia_valid & ATTR_SIZE) size = ia->ia_size; else size = i_size_read(inode); /* copyup to next available branch */ for (bindex = bstart - 1; bindex >= 0; bindex--) { err = copyup_dentry(parent->d_inode, dentry, bstart, bindex, dentry->d_name.name, dentry->d_name.len, NULL, size); if (!err) break; } if (err) goto out; /* get updated lower_dentry/inode after copyup */ lower_dentry = unionfs_lower_dentry(dentry); lower_inode = unionfs_lower_inode(inode); /* * check for whiteouts in writeable branch, and remove them * if necessary. */ if (lower_dentry) { err = check_unlink_whiteout(dentry, lower_dentry, bindex); if (err > 0) /* ignore if whiteout found and removed */ err = 0; } } /* * If shrinking, first truncate upper level to cancel writing dirty * pages beyond the new eof; and also if its' maxbytes is more * limiting (fail with -EFBIG before making any change to the lower * level). There is no need to vmtruncate the upper level * afterwards in the other cases: we fsstack_copy_inode_size from * the lower level. */ if (ia->ia_valid & ATTR_SIZE) { err = inode_newsize_ok(inode, ia->ia_size); if (err) goto out; truncate_setsize(inode, ia->ia_size); } /* notify the (possibly copied-up) lower inode */ /* * Note: we use lower_dentry->d_inode, because lower_inode may be * unlinked (no inode->i_sb and i_ino==0. This happens if someone * tries to open(), unlink(), then ftruncate() a file. */ /* prepare our own lower struct iattr (with our own lower file) */ memcpy(&lower_ia, ia, sizeof(lower_ia)); if (ia->ia_valid & ATTR_FILE) { lower_ia.ia_file = unionfs_lower_file(ia->ia_file); BUG_ON(!lower_ia.ia_file); // XXX? } mutex_lock(&lower_dentry->d_inode->i_mutex); err = notify_change(lower_dentry, &lower_ia); mutex_unlock(&lower_dentry->d_inode->i_mutex); if (err) goto out; /* get attributes from the first lower inode */ if (ibstart(inode) >= 0) unionfs_copy_attr_all(inode, lower_inode); /* * unionfs_copy_attr_all will copy the lower times to our inode if * the lower ones are newer (useful for cache coherency). However, * ->setattr is the only place in which we may have to copy the * lower inode times absolutely, to support utimes(2). */ if (ia->ia_valid & ATTR_MTIME_SET) inode->i_mtime = lower_inode->i_mtime; if (ia->ia_valid & ATTR_CTIME) inode->i_ctime = lower_inode->i_ctime; if (ia->ia_valid & ATTR_ATIME_SET) inode->i_atime = lower_inode->i_atime; fsstack_copy_inode_size(inode, lower_inode); out: if (!err) unionfs_check_dentry(dentry); unionfs_unlock_dentry(dentry); unionfs_unlock_parent(dentry, parent); unionfs_read_unlock(dentry->d_sb); out_err: return err; }
/* * Helper function for unionfs_file_revalidate/locked. * Expects dentry/parent to be locked already, and revalidated. */ static int __unionfs_file_revalidate(struct file *file, struct dentry *dentry, struct dentry *parent, struct super_block *sb, int sbgen, int dgen, bool willwrite) { int fgen; int bstart, bend, orig_brid; int size; int err = 0; fgen = atomic_read(&UNIONFS_F(file)->generation); /* * There are two cases we are interested in. The first is if the * generation is lower than the super-block. The second is if * someone has copied up this file from underneath us, we also need * to refresh things. */ if (d_deleted(dentry) || (sbgen <= fgen && dbstart(dentry) == fbstart(file) && unionfs_lower_file(file))) goto out_may_copyup; /* save orig branch ID */ orig_brid = UNIONFS_F(file)->saved_branch_ids[fbstart(file)]; /* First we throw out the existing files. */ cleanup_file(file); /* Now we reopen the file(s) as in unionfs_open. */ bstart = fbstart(file) = dbstart(dentry); bend = fbend(file) = dbend(dentry); size = sizeof(struct file *) * sbmax(sb); UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL); if (unlikely(!UNIONFS_F(file)->lower_files)) { err = -ENOMEM; goto out; } size = sizeof(int) * sbmax(sb); UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL); if (unlikely(!UNIONFS_F(file)->saved_branch_ids)) { err = -ENOMEM; goto out; } if (S_ISDIR(dentry->d_inode->i_mode)) { /* We need to open all the files. */ err = open_all_files(file); if (err) goto out; } else { int new_brid; /* We only open the highest priority branch. */ err = open_highest_file(file, willwrite); if (err) goto out; new_brid = UNIONFS_F(file)->saved_branch_ids[fbstart(file)]; if (unlikely(new_brid != orig_brid && sbgen > fgen)) { /* * If we re-opened the file on a different branch * than the original one, and this was due to a new * branch inserted, then update the mnt counts of * the old and new branches accordingly. */ unionfs_mntget(dentry, bstart); unionfs_mntput(sb->s_root, branch_id_to_idx(sb, orig_brid)); } /* regular files have only one open lower file */ fbend(file) = fbstart(file); } atomic_set(&UNIONFS_F(file)->generation, atomic_read(&UNIONFS_I(dentry->d_inode)->generation)); out_may_copyup: /* Copyup on the first write to a file on a readonly branch. */ if (willwrite && IS_WRITE_FLAG(file->f_flags) && !IS_WRITE_FLAG(unionfs_lower_file(file)->f_flags) && is_robranch(dentry)) { pr_debug("unionfs: do delay copyup of \"%s\"\n", dentry->d_name.name); err = do_delayed_copyup(file, parent); /* regular files have only one open lower file */ if (!err && !S_ISDIR(dentry->d_inode->i_mode)) fbend(file) = fbstart(file); } out: if (err) { kfree(UNIONFS_F(file)->lower_files); kfree(UNIONFS_F(file)->saved_branch_ids); } return err; }
static int unionfs_mmap(struct file *file, struct vm_area_struct *vma) { int err = 0; bool willwrite; struct file *lower_file; struct dentry *dentry = file->f_path.dentry; struct dentry *parent; struct vm_operations_struct *saved_vm_ops = NULL; unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); /* This might be deferred to mmap's writepage */ willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags); err = unionfs_file_revalidate(file, parent, willwrite); if (unlikely(err)) goto out; unionfs_check_file(file); /* * File systems which do not implement ->writepage may use * generic_file_readonly_mmap as their ->mmap op. If you call * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL. * But we cannot call the lower ->mmap op, so we can't tell that * writeable mappings won't work. Therefore, our only choice is to * check if the lower file system supports the ->writepage, and if * not, return EINVAL (the same error that * generic_file_readonly_mmap returns in that case). */ lower_file = unionfs_lower_file(file); if (willwrite && !lower_file->f_mapping->a_ops->writepage) { err = -EINVAL; printk(KERN_ERR "unionfs: branch %d file system does not " "support writeable mmap\n", fbstart(file)); goto out; } /* * find and save lower vm_ops. * * XXX: the VFS should have a cleaner way of finding the lower vm_ops */ if (!UNIONFS_F(file)->lower_vm_ops) { err = lower_file->f_op->mmap(lower_file, vma); if (err) { printk(KERN_ERR "unionfs: lower mmap failed %d\n", err); goto out; } saved_vm_ops = vma->vm_ops; err = do_munmap(current->mm, vma->vm_start, vma->vm_end - vma->vm_start); if (err) { printk(KERN_ERR "unionfs: do_munmap failed %d\n", err); goto out; } } file->f_mapping->a_ops = &unionfs_dummy_aops; err = generic_file_mmap(file, vma); file->f_mapping->a_ops = &unionfs_aops; if (err) { printk(KERN_ERR "unionfs: generic_file_mmap failed %d\n", err); goto out; } vma->vm_ops = &unionfs_vm_ops; if (!UNIONFS_F(file)->lower_vm_ops) UNIONFS_F(file)->lower_vm_ops = saved_vm_ops; out: if (!err) { /* copyup could cause parent dir times to change */ unionfs_copy_attr_times(parent->d_inode); unionfs_check_file(file); } unionfs_unlock_dentry(dentry); unionfs_unlock_parent(dentry, parent); unionfs_read_unlock(dentry->d_sb); return err; }