static ssize_t unionfs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { int err = 0; struct file *lower_file; struct dentry *dentry = file->f_path.dentry; struct dentry *parent; unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); err = unionfs_file_revalidate(file, parent, true); if (unlikely(err)) goto out; lower_file = unionfs_lower_file(file); err = vfs_write(lower_file, buf, count, ppos); /* update our inode times+sizes upon a successful lower write */ if (err >= 0) { fsstack_copy_inode_size(dentry->d_inode, lower_file->f_path.dentry->d_inode); fsstack_copy_attr_times(dentry->d_inode, lower_file->f_path.dentry->d_inode); UNIONFS_F(file)->wrote_to_file = true; /* for delayed copyup */ unionfs_check_file(file); } out: unionfs_unlock_dentry(dentry); unionfs_unlock_parent(dentry, parent); unionfs_read_unlock(dentry->d_sb); return err; }
static int unionfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { int err = 0; struct file *file, *lower_file; const struct vm_operations_struct *lower_vm_ops; struct vm_area_struct lower_vma; BUG_ON(!vma); memcpy(&lower_vma, vma, sizeof(struct vm_area_struct)); file = lower_vma.vm_file; lower_vm_ops = UNIONFS_F(file)->lower_vm_ops; BUG_ON(!lower_vm_ops); if (!lower_vm_ops->page_mkwrite) goto out; lower_file = unionfs_lower_file(file); BUG_ON(!lower_file); /* * XXX: vm_ops->page_mkwrite may be called in parallel. * Because we have to resort to temporarily changing the * vma->vm_file to point to the lower file, a concurrent * invocation of unionfs_page_mkwrite could see a different * value. In this workaround, we keep a different copy of the * vma structure in our stack, so we never expose a different * value of the vma->vm_file called to us, even temporarily. * A better fix would be to change the calling semantics of * ->page_mkwrite to take an explicit file pointer. */ lower_vma.vm_file = lower_file; err = lower_vm_ops->page_mkwrite(&lower_vma, vmf); out: return err; }
/* * put all references held by upper struct file and free lower file pointer * array */ static void cleanup_file(struct file *file) { int bindex, bstart, bend; struct file **lower_files; struct file *lower_file; struct super_block *sb = file->f_path.dentry->d_sb; lower_files = UNIONFS_F(file)->lower_files; bstart = fbstart(file); bend = fbend(file); for (bindex = bstart; bindex <= bend; bindex++) { int i; /* holds (possibly) updated branch index */ int old_bid; lower_file = unionfs_lower_file_idx(file, bindex); if (!lower_file) continue; /* * Find new index of matching branch with an open * file, since branches could have been added or * deleted causing the one with open files to shift. */ old_bid = UNIONFS_F(file)->saved_branch_ids[bindex]; i = branch_id_to_idx(sb, old_bid); if (unlikely(i < 0)) { printk(KERN_ERR "unionfs: no superblock for " "file %p\n", file); continue; } /* decrement count of open files */ branchput(sb, i); /* * fput will perform an mntput for us on the correct branch. * Although we're using the file's old branch configuration, * bindex, which is the old index, correctly points to the * right branch in the file's branch list. In other words, * we're going to mntput the correct branch even if branches * have been added/removed. */ fput(lower_file); UNIONFS_F(file)->lower_files[bindex] = NULL; UNIONFS_F(file)->saved_branch_ids[bindex] = -1; } UNIONFS_F(file)->lower_files = NULL; kfree(lower_files); kfree(UNIONFS_F(file)->saved_branch_ids); /* set to NULL because caller needs to know if to kfree on error */ UNIONFS_F(file)->saved_branch_ids = NULL; }
/* open the highest priority file for a given upper file */ static int open_highest_file(struct file *file, bool willwrite) { int bindex, bstart, bend, err = 0; struct file *lower_file; struct dentry *lower_dentry; struct dentry *dentry = file->f_path.dentry; struct dentry *parent = dget_parent(dentry); struct inode *parent_inode = parent->d_inode; struct super_block *sb = dentry->d_sb; bstart = dbstart(dentry); bend = dbend(dentry); lower_dentry = unionfs_lower_dentry(dentry); if (willwrite && IS_WRITE_FLAG(file->f_flags) && is_robranch(dentry)) { for (bindex = bstart - 1; bindex >= 0; bindex--) { err = copyup_file(parent_inode, file, bstart, bindex, i_size_read(dentry->d_inode)); if (!err) break; } atomic_set(&UNIONFS_F(file)->generation, atomic_read(&UNIONFS_I(dentry->d_inode)-> generation)); goto out; } dget(lower_dentry); unionfs_mntget(dentry, bstart); lower_file = dentry_open(lower_dentry, unionfs_lower_mnt_idx(dentry, bstart), file->f_flags, current_cred()); if (IS_ERR(lower_file)) { err = PTR_ERR(lower_file); goto out; } branchget(sb, bstart); unionfs_set_lower_file(file, lower_file); /* Fix up the position. */ lower_file->f_pos = file->f_pos; memcpy(&lower_file->f_ra, &file->f_ra, sizeof(struct file_ra_state)); out: dput(parent); return err; }
int unionfs_flush(struct file *file, fl_owner_t id) { int err = 0; struct file *lower_file = NULL; struct dentry *dentry = file->f_path.dentry; struct dentry *parent; int bindex, bstart, bend; unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); err = unionfs_file_revalidate(file, parent, UNIONFS_F(file)->wrote_to_file); if (unlikely(err)) goto out; unionfs_check_file(file); bstart = fbstart(file); bend = fbend(file); for (bindex = bstart; bindex <= bend; bindex++) { lower_file = unionfs_lower_file_idx(file, bindex); if (lower_file && lower_file->f_op && lower_file->f_op->flush) { err = lower_file->f_op->flush(lower_file, id); if (err) goto out; } } out: if (!err) unionfs_check_file(file); unionfs_unlock_dentry(dentry); unionfs_unlock_parent(dentry, parent); unionfs_read_unlock(dentry->d_sb); return err; }
static int unionfs_readdir(struct file *file, void *dirent, filldir_t filldir) { int err = 0; struct file *lower_file = NULL; struct dentry *dentry = file->f_path.dentry; struct dentry *parent; struct inode *inode = NULL; struct unionfs_getdents_callback buf; struct unionfs_dir_state *uds; int bend; loff_t offset; unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); err = unionfs_file_revalidate(file, parent, false); if (unlikely(err)) goto out; inode = dentry->d_inode; uds = UNIONFS_F(file)->rdstate; if (!uds) { if (file->f_pos == DIREOF) { goto out; } else if (file->f_pos > 0) { uds = find_rdstate(inode, file->f_pos); if (unlikely(!uds)) { err = -ESTALE; goto out; } UNIONFS_F(file)->rdstate = uds; } else { init_rdstate(file); uds = UNIONFS_F(file)->rdstate; } } bend = fbend(file); while (uds->bindex <= bend) { lower_file = unionfs_lower_file_idx(file, uds->bindex); if (!lower_file) { uds->bindex++; uds->dirpos = 0; continue; } /* prepare callback buffer */ buf.filldir_called = 0; buf.filldir_error = 0; buf.entries_written = 0; buf.dirent = dirent; buf.filldir = filldir; buf.rdstate = uds; buf.sb = inode->i_sb; /* Read starting from where we last left off. */ offset = vfs_llseek(lower_file, uds->dirpos, SEEK_SET); if (offset < 0) { err = offset; goto out; } err = vfs_readdir(lower_file, unionfs_filldir, &buf); /* Save the position for when we continue. */ offset = vfs_llseek(lower_file, 0, SEEK_CUR); if (offset < 0) { err = offset; goto out; } uds->dirpos = offset; /* Copy the atime. */ fsstack_copy_attr_atime(inode, lower_file->f_path.dentry->d_inode); if (err < 0) goto out; if (buf.filldir_error) break; if (!buf.entries_written) { uds->bindex++; uds->dirpos = 0; } } if (!buf.filldir_error && uds->bindex >= bend) { /* Save the number of hash entries for next time. */ UNIONFS_I(inode)->hashsize = uds->hashentries; free_rdstate(uds); UNIONFS_F(file)->rdstate = NULL; file->f_pos = DIREOF; } else { file->f_pos = rdstate2offset(uds); } out: if (!err) unionfs_check_file(file); unionfs_unlock_dentry(dentry); unionfs_unlock_parent(dentry, parent); unionfs_read_unlock(dentry->d_sb); return err; }
/* * This is not meant to be a generic repositioning function. If you do * things that aren't supported, then we return EINVAL. * * What is allowed: * (1) seeking to the same position that you are currently at * This really has no effect, but returns where you are. * (2) seeking to the beginning of the file * This throws out all state, and lets you begin again. */ static loff_t unionfs_dir_llseek(struct file *file, loff_t offset, int origin) { struct unionfs_dir_state *rdstate; struct dentry *dentry = file->f_path.dentry; struct dentry *parent; loff_t err; unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); err = unionfs_file_revalidate(file, parent, false); if (unlikely(err)) goto out; rdstate = UNIONFS_F(file)->rdstate; /* * we let users seek to their current position, but not anywhere * else. */ if (!offset) { switch (origin) { case SEEK_SET: if (rdstate) { free_rdstate(rdstate); UNIONFS_F(file)->rdstate = NULL; } init_rdstate(file); err = 0; break; case SEEK_CUR: err = file->f_pos; break; case SEEK_END: /* Unsupported, because we would break everything. */ err = -EINVAL; break; } } else { switch (origin) { case SEEK_SET: if (rdstate) { if (offset == rdstate2offset(rdstate)) err = offset; else if (file->f_pos == DIREOF) err = DIREOF; else err = -EINVAL; } else { struct inode *inode; inode = dentry->d_inode; rdstate = find_rdstate(inode, offset); if (rdstate) { UNIONFS_F(file)->rdstate = rdstate; err = rdstate->offset; } else { err = -EINVAL; } } break; case SEEK_CUR: case SEEK_END: /* Unsupported, because we would break everything. */ err = -EINVAL; break; } } out: if (!err) unionfs_check_file(file); unionfs_unlock_dentry(dentry); unionfs_unlock_parent(dentry, parent); unionfs_read_unlock(dentry->d_sb); return err; }
/* * release all lower object references & free the file info structure * * No need to grab sb info's rwsem. */ int unionfs_file_release(struct inode *inode, struct file *file) { struct file *lower_file = NULL; struct unionfs_file_info *fileinfo; struct unionfs_inode_info *inodeinfo; struct super_block *sb = inode->i_sb; struct dentry *dentry = file->f_path.dentry; struct dentry *parent; int bindex, bstart, bend; int fgen, err = 0; /* * Since mm/memory.c:might_fault() (under PROVE_LOCKING) was * modified in 2.6.29-rc1 to call might_lock_read on mmap_sem, this * has been causing false positives in file system stacking layers. * In particular, our ->mmap is called after sys_mmap2 already holds * mmap_sem, then we lock our own mutexes; but earlier, it's * possible for lockdep to have locked our mutexes first, and then * we call a lower ->readdir which could call might_fault. The * different ordering of the locks is what lockdep complains about * -- unnecessarily. Therefore, we have no choice but to tell * lockdep to temporarily turn off lockdep here. Note: the comments * inside might_sleep also suggest that it would have been * nicer to only annotate paths that needs that might_lock_read. */ lockdep_off(); unionfs_read_lock(sb, UNIONFS_SMUTEX_PARENT); parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); /* * We try to revalidate, but the VFS ignores return return values * from file->release, so we must always try to succeed here, * including to do the kfree and dput below. So if revalidation * failed, all we can do is print some message and keep going. */ err = unionfs_file_revalidate(file, parent, UNIONFS_F(file)->wrote_to_file); if (!err) unionfs_check_file(file); fileinfo = UNIONFS_F(file); BUG_ON(file->f_path.dentry->d_inode != inode); inodeinfo = UNIONFS_I(inode); /* fput all the lower files */ fgen = atomic_read(&fileinfo->generation); bstart = fbstart(file); bend = fbend(file); for (bindex = bstart; bindex <= bend; bindex++) { lower_file = unionfs_lower_file_idx(file, bindex); if (lower_file) { unionfs_set_lower_file_idx(file, bindex, NULL); fput(lower_file); branchput(sb, bindex); } /* if there are no more refs to the dentry, dput it */ if (d_deleted(dentry)) { dput(unionfs_lower_dentry_idx(dentry, bindex)); unionfs_set_lower_dentry_idx(dentry, bindex, NULL); } } kfree(fileinfo->lower_files); kfree(fileinfo->saved_branch_ids); if (fileinfo->rdstate) { fileinfo->rdstate->access = jiffies; spin_lock(&inodeinfo->rdlock); inodeinfo->rdcount++; list_add_tail(&fileinfo->rdstate->cache, &inodeinfo->readdircache); mark_inode_dirty(inode); spin_unlock(&inodeinfo->rdlock); fileinfo->rdstate = NULL; } kfree(fileinfo); unionfs_unlock_dentry(dentry); unionfs_unlock_parent(dentry, parent); unionfs_read_unlock(sb); lockdep_on(); return err; }
int unionfs_open(struct inode *inode, struct file *file) { int err = 0; struct file *lower_file = NULL; struct dentry *dentry = file->f_path.dentry; struct dentry *parent; int bindex = 0, bstart = 0, bend = 0; int size; int valid = 0; unionfs_read_lock(inode->i_sb, UNIONFS_SMUTEX_PARENT); parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); /* don't open unhashed/deleted files */ if (d_deleted(dentry)) { err = -ENOENT; goto out_nofree; } /* XXX: should I change 'false' below to the 'willwrite' flag? */ valid = __unionfs_d_revalidate(dentry, parent, false); if (unlikely(!valid)) { err = -ESTALE; goto out_nofree; } file->private_data = kzalloc(sizeof(struct unionfs_file_info), GFP_KERNEL); if (unlikely(!UNIONFS_F(file))) { err = -ENOMEM; goto out_nofree; } fbstart(file) = -1; fbend(file) = -1; atomic_set(&UNIONFS_F(file)->generation, atomic_read(&UNIONFS_I(inode)->generation)); size = sizeof(struct file *) * sbmax(inode->i_sb); UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL); if (unlikely(!UNIONFS_F(file)->lower_files)) { err = -ENOMEM; goto out; } size = sizeof(int) * sbmax(inode->i_sb); UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL); if (unlikely(!UNIONFS_F(file)->saved_branch_ids)) { err = -ENOMEM; goto out; } bstart = fbstart(file) = dbstart(dentry); bend = fbend(file) = dbend(dentry); /* * open all directories and make the unionfs file struct point to * these lower file structs */ if (S_ISDIR(inode->i_mode)) err = __open_dir(inode, file); /* open a dir */ else err = __open_file(inode, file, parent); /* open a file */ /* freeing the allocated resources, and fput the opened files */ if (err) { for (bindex = bstart; bindex <= bend; bindex++) { lower_file = unionfs_lower_file_idx(file, bindex); if (!lower_file) continue; branchput(dentry->d_sb, bindex); /* fput calls dput for lower_dentry */ fput(lower_file); } } out: if (err) { kfree(UNIONFS_F(file)->lower_files); kfree(UNIONFS_F(file)->saved_branch_ids); kfree(UNIONFS_F(file)); } out_nofree: if (!err) { unionfs_postcopyup_setmnt(dentry); unionfs_copy_attr_times(inode); unionfs_check_file(file); unionfs_check_inode(inode); } unionfs_unlock_dentry(dentry); unionfs_unlock_parent(dentry, parent); unionfs_read_unlock(inode->i_sb); return err; }
/* * Helper function for unionfs_file_revalidate/locked. * Expects dentry/parent to be locked already, and revalidated. */ static int __unionfs_file_revalidate(struct file *file, struct dentry *dentry, struct dentry *parent, struct super_block *sb, int sbgen, int dgen, bool willwrite) { int fgen; int bstart, bend, orig_brid; int size; int err = 0; fgen = atomic_read(&UNIONFS_F(file)->generation); /* * There are two cases we are interested in. The first is if the * generation is lower than the super-block. The second is if * someone has copied up this file from underneath us, we also need * to refresh things. */ if (d_deleted(dentry) || (sbgen <= fgen && dbstart(dentry) == fbstart(file) && unionfs_lower_file(file))) goto out_may_copyup; /* save orig branch ID */ orig_brid = UNIONFS_F(file)->saved_branch_ids[fbstart(file)]; /* First we throw out the existing files. */ cleanup_file(file); /* Now we reopen the file(s) as in unionfs_open. */ bstart = fbstart(file) = dbstart(dentry); bend = fbend(file) = dbend(dentry); size = sizeof(struct file *) * sbmax(sb); UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL); if (unlikely(!UNIONFS_F(file)->lower_files)) { err = -ENOMEM; goto out; } size = sizeof(int) * sbmax(sb); UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL); if (unlikely(!UNIONFS_F(file)->saved_branch_ids)) { err = -ENOMEM; goto out; } if (S_ISDIR(dentry->d_inode->i_mode)) { /* We need to open all the files. */ err = open_all_files(file); if (err) goto out; } else { int new_brid; /* We only open the highest priority branch. */ err = open_highest_file(file, willwrite); if (err) goto out; new_brid = UNIONFS_F(file)->saved_branch_ids[fbstart(file)]; if (unlikely(new_brid != orig_brid && sbgen > fgen)) { /* * If we re-opened the file on a different branch * than the original one, and this was due to a new * branch inserted, then update the mnt counts of * the old and new branches accordingly. */ unionfs_mntget(dentry, bstart); unionfs_mntput(sb->s_root, branch_id_to_idx(sb, orig_brid)); } /* regular files have only one open lower file */ fbend(file) = fbstart(file); } atomic_set(&UNIONFS_F(file)->generation, atomic_read(&UNIONFS_I(dentry->d_inode)->generation)); out_may_copyup: /* Copyup on the first write to a file on a readonly branch. */ if (willwrite && IS_WRITE_FLAG(file->f_flags) && !IS_WRITE_FLAG(unionfs_lower_file(file)->f_flags) && is_robranch(dentry)) { pr_debug("unionfs: do delay copyup of \"%s\"\n", dentry->d_name.name); err = do_delayed_copyup(file, parent); /* regular files have only one open lower file */ if (!err && !S_ISDIR(dentry->d_inode->i_mode)) fbend(file) = fbstart(file); } out: if (err) { kfree(UNIONFS_F(file)->lower_files); kfree(UNIONFS_F(file)->saved_branch_ids); } return err; }
static int unionfs_mmap(struct file *file, struct vm_area_struct *vma) { int err = 0; bool willwrite; struct file *lower_file; struct dentry *dentry = file->f_path.dentry; struct dentry *parent; struct vm_operations_struct *saved_vm_ops = NULL; unionfs_read_lock(dentry->d_sb, UNIONFS_SMUTEX_PARENT); parent = unionfs_lock_parent(dentry, UNIONFS_DMUTEX_PARENT); unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); /* This might be deferred to mmap's writepage */ willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags); err = unionfs_file_revalidate(file, parent, willwrite); if (unlikely(err)) goto out; unionfs_check_file(file); /* * File systems which do not implement ->writepage may use * generic_file_readonly_mmap as their ->mmap op. If you call * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL. * But we cannot call the lower ->mmap op, so we can't tell that * writeable mappings won't work. Therefore, our only choice is to * check if the lower file system supports the ->writepage, and if * not, return EINVAL (the same error that * generic_file_readonly_mmap returns in that case). */ lower_file = unionfs_lower_file(file); if (willwrite && !lower_file->f_mapping->a_ops->writepage) { err = -EINVAL; printk(KERN_ERR "unionfs: branch %d file system does not " "support writeable mmap\n", fbstart(file)); goto out; } /* * find and save lower vm_ops. * * XXX: the VFS should have a cleaner way of finding the lower vm_ops */ if (!UNIONFS_F(file)->lower_vm_ops) { err = lower_file->f_op->mmap(lower_file, vma); if (err) { printk(KERN_ERR "unionfs: lower mmap failed %d\n", err); goto out; } saved_vm_ops = vma->vm_ops; err = do_munmap(current->mm, vma->vm_start, vma->vm_end - vma->vm_start); if (err) { printk(KERN_ERR "unionfs: do_munmap failed %d\n", err); goto out; } } file->f_mapping->a_ops = &unionfs_dummy_aops; err = generic_file_mmap(file, vma); file->f_mapping->a_ops = &unionfs_aops; if (err) { printk(KERN_ERR "unionfs: generic_file_mmap failed %d\n", err); goto out; } vma->vm_ops = &unionfs_vm_ops; if (!UNIONFS_F(file)->lower_vm_ops) UNIONFS_F(file)->lower_vm_ops = saved_vm_ops; out: if (!err) { /* copyup could cause parent dir times to change */ unionfs_copy_attr_times(parent->d_inode); unionfs_check_file(file); } unionfs_unlock_dentry(dentry); unionfs_unlock_parent(dentry, parent); unionfs_read_unlock(dentry->d_sb); return err; }