static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, struct file *f, int (*open)(struct inode *, struct file *), const struct cred *cred) { static const struct file_operations empty_fops = {}; struct inode *inode; int error; f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; if (unlikely(f->f_flags & O_PATH)) f->f_mode = FMODE_PATH; inode = dentry->d_inode; if (f->f_mode & FMODE_WRITE) { error = __get_file_write_access(inode, mnt); if (error) goto cleanup_file; if (!special_file(inode->i_mode)) file_take_write(f); } f->f_mapping = inode->i_mapping; f->f_path.dentry = dentry; f->f_path.mnt = mnt; f->f_pos = 0; file_sb_list_add(f, inode->i_sb); if (unlikely(f->f_mode & FMODE_PATH)) { f->f_op = &empty_fops; return f; } f->f_op = fops_get(inode->i_fop); error = security_dentry_open(f, cred); if (error) goto cleanup_all; error = break_lease(inode, f->f_flags); if (error) goto cleanup_all; if (!open && f->f_op) open = f->f_op->open; if (open) { error = open(inode, f); if (error) goto cleanup_all; } if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(inode); f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); /* NB: we're sure to have correct a_ops only after f_op->open */ if (f->f_flags & O_DIRECT) { if (!f->f_mapping->a_ops || ((!f->f_mapping->a_ops->direct_IO) && (!f->f_mapping->a_ops->get_xip_mem))) { fput(f); f = ERR_PTR(-EINVAL); } } return f; cleanup_all: fops_put(f->f_op); if (f->f_mode & FMODE_WRITE) { put_write_access(inode); if (!special_file(inode->i_mode)) { /* * We don't consider this a real * mnt_want/drop_write() pair * because it all happenend right * here, so just reset the state. */ file_reset_write(f); mnt_drop_write(mnt); } } file_sb_list_del(f); f->f_path.dentry = NULL; f->f_path.mnt = NULL; cleanup_file: put_filp(f); dput(dentry); mntput(mnt); return ERR_PTR(error); }
/* * Set various file attributes. * N.B. After this call fhp needs an fh_put */ __be32 nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, int check_guard, time_t guardtime) { struct dentry *dentry; struct inode *inode; int accmode = MAY_SATTR; int ftype = 0; int imode; __be32 err; int host_err; int size_change = 0; if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) accmode |= MAY_WRITE|MAY_OWNER_OVERRIDE; if (iap->ia_valid & ATTR_SIZE) ftype = S_IFREG; /* Get inode */ err = fh_verify(rqstp, fhp, ftype, accmode); if (err) goto out; dentry = fhp->fh_dentry; inode = dentry->d_inode; /* Ignore any mode updates on symlinks */ if (S_ISLNK(inode->i_mode)) iap->ia_valid &= ~ATTR_MODE; if (!iap->ia_valid) goto out; /* NFSv2 does not differentiate between "set-[ac]time-to-now" * which only requires access, and "set-[ac]time-to-X" which * requires ownership. * So if it looks like it might be "set both to the same time which * is close to now", and if inode_change_ok fails, then we * convert to "set to now" instead of "set to explicit time" * * We only call inode_change_ok as the last test as technically * it is not an interface that we should be using. It is only * valid if the filesystem does not define it's own i_op->setattr. */ #define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET) #define MAX_TOUCH_TIME_ERROR (30*60) if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET && iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec ) { /* Looks probable. Now just make sure time is in the right ballpark. * Solaris, at least, doesn't seem to care what the time request is. * We require it be within 30 minutes of now. */ time_t delta = iap->ia_atime.tv_sec - get_seconds(); if (delta<0) delta = -delta; if (delta < MAX_TOUCH_TIME_ERROR && inode_change_ok(inode, iap) != 0) { /* turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME * this will cause notify_change to set these times to "now" */ iap->ia_valid &= ~BOTH_TIME_SET; } } /* The size case is special. It changes the file as well as the attributes. */ if (iap->ia_valid & ATTR_SIZE) { if (iap->ia_size < inode->i_size) { err = nfsd_permission(fhp->fh_export, dentry, MAY_TRUNC|MAY_OWNER_OVERRIDE); if (err) goto out; } /* * If we are changing the size of the file, then * we need to break all leases. */ host_err = break_lease(inode, FMODE_WRITE | O_NONBLOCK); if (host_err == -EWOULDBLOCK) host_err = -ETIMEDOUT; if (host_err) /* ENOMEM or EWOULDBLOCK */ goto out_nfserr; host_err = get_write_access(inode); if (host_err) goto out_nfserr; size_change = 1; host_err = locks_verify_truncate(inode, NULL, iap->ia_size); if (host_err) { put_write_access(inode); goto out_nfserr; } DQUOT_INIT(inode); } imode = inode->i_mode; if (iap->ia_valid & ATTR_MODE) { iap->ia_mode &= S_IALLUGO; imode = iap->ia_mode |= (imode & ~S_IALLUGO); } /* Revoke setuid/setgid bit on chown/chgrp */ if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) iap->ia_valid |= ATTR_KILL_SUID; if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid) iap->ia_valid |= ATTR_KILL_SGID; /* Change the attributes. */ iap->ia_valid |= ATTR_CTIME; err = nfserr_notsync; if (!check_guard || guardtime == inode->i_ctime.tv_sec) { fh_lock(fhp); host_err = notify_change(dentry, iap); err = nfserrno(host_err); fh_unlock(fhp); } if (size_change) put_write_access(inode); if (!err) if (EX_ISSYNC(fhp->fh_export)) write_inode_now(inode, 1); out: return err; out_nfserr: err = nfserrno(host_err); goto out; }
static long do_sys_truncate(const char __user *pathname, loff_t length) { struct path path; struct inode *inode; int error; error = -EINVAL; if (length < 0) /* sorry, but loff_t says... */ goto out; error = user_path(pathname, &path); if (error) goto out; inode = path.dentry->d_inode; /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ error = -EISDIR; if (S_ISDIR(inode->i_mode)) goto dput_and_out; error = -EINVAL; if (!S_ISREG(inode->i_mode)) goto dput_and_out; error = mnt_want_write(path.mnt); if (error) goto dput_and_out; error = inode_permission(inode, MAY_WRITE); if (error) goto mnt_drop_write_and_out; error = -EPERM; if (IS_APPEND(inode)) goto mnt_drop_write_and_out; error = get_write_access(inode); if (error) goto mnt_drop_write_and_out; /* * Make sure that there are no leases. get_write_access() protects * against the truncate racing with a lease-granting setlease(). */ error = break_lease(inode, O_WRONLY); if (error) goto put_write_and_out; error = locks_verify_truncate(inode, NULL, length); if (!error) error = security_path_truncate(&path); if (!error) error = do_truncate(path.dentry, length, 0, NULL); put_write_and_out: put_write_access(inode); mnt_drop_write_and_out: mnt_drop_write(path.mnt); dput_and_out: path_put(&path); out: return error; }
static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; struct inode *inode = file_inode(vma->vm_file); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned long last_index; u64 pos = page->index << PAGE_CACHE_SHIFT; unsigned int data_blocks, ind_blocks, rblocks; struct gfs2_holder gh; loff_t size; int ret; sb_start_pagefault(inode->i_sb); /* Update file times before taking page lock */ file_update_time(vma->vm_file); ret = get_write_access(inode); if (ret) goto out; ret = gfs2_rs_alloc(ip); if (ret) goto out_write_access; gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret) goto out_uninit; set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); set_bit(GIF_SW_PAGED, &ip->i_flags); if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE)) { lock_page(page); if (!PageUptodate(page) || page->mapping != inode->i_mapping) { ret = -EAGAIN; unlock_page(page); } goto out_unlock; } ret = gfs2_rindex_update(sdp); if (ret) goto out_unlock; gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); ap.target = data_blocks + ind_blocks; ret = gfs2_quota_lock_check(ip, &ap); if (ret) goto out_unlock; ret = gfs2_inplace_reserve(ip, &ap); if (ret) goto out_quota_unlock; rblocks = RES_DINODE + ind_blocks; if (gfs2_is_jdata(ip)) rblocks += data_blocks ? data_blocks : 1; if (ind_blocks || data_blocks) { rblocks += RES_STATFS + RES_QUOTA; rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); } ret = gfs2_trans_begin(sdp, rblocks, 0); if (ret) goto out_trans_fail; lock_page(page); ret = -EINVAL; size = i_size_read(inode); last_index = (size - 1) >> PAGE_CACHE_SHIFT; /* Check page index against inode size */ if (size == 0 || (page->index > last_index)) goto out_trans_end; ret = -EAGAIN; /* If truncated, we must retry the operation, we may have raced * with the glock demotion code. */ if (!PageUptodate(page) || page->mapping != inode->i_mapping) goto out_trans_end; /* Unstuff, if required, and allocate backing blocks for page */ ret = 0; if (gfs2_is_stuffed(ip)) ret = gfs2_unstuff_dinode(ip, page); if (ret == 0) ret = gfs2_allocate_page_backing(page); out_trans_end: if (ret) unlock_page(page); gfs2_trans_end(sdp); out_trans_fail: gfs2_inplace_release(ip); out_quota_unlock: gfs2_quota_unlock(ip); out_unlock: gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); if (ret == 0) { set_page_dirty(page); wait_for_stable_page(page); } out_write_access: put_write_access(inode); out: sb_end_pagefault(inode->i_sb); return block_page_mkwrite_return(ret); } static const struct vm_operations_struct gfs2_vm_ops = { .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = gfs2_page_mkwrite, }; /** * gfs2_mmap - * @file: The file to map * @vma: The VMA which described the mapping * * There is no need to get a lock here unless we should be updating * atime. We ignore any locking errors since the only consequence is * a missed atime update (which will just be deferred until later). * * Returns: 0 */ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) { struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); if (!(file->f_flags & O_NOATIME) && !IS_NOATIME(&ip->i_inode)) { struct gfs2_holder i_gh; int error; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return error; /* grab lock to update inode */ gfs2_glock_dq_uninit(&i_gh); file_accessed(file); } vma->vm_ops = &gfs2_vm_ops; return 0; } /** * gfs2_open_common - This is common to open and atomic_open * @inode: The inode being opened * @file: The file being opened * * This maybe called under a glock or not depending upon how it has * been called. We must always be called under a glock for regular * files, however. For other file types, it does not matter whether * we hold the glock or not. * * Returns: Error code or 0 for success */ int gfs2_open_common(struct inode *inode, struct file *file) { struct gfs2_file *fp; int ret; if (S_ISREG(inode->i_mode)) { ret = generic_file_open(inode, file); if (ret) return ret; } fp = kzalloc(sizeof(struct gfs2_file), GFP_NOFS); if (!fp) return -ENOMEM; mutex_init(&fp->f_fl_mutex); gfs2_assert_warn(GFS2_SB(inode), !file->private_data); file->private_data = fp; return 0; } /** * gfs2_open - open a file * @inode: the inode to open * @file: the struct file for this opening * * After atomic_open, this function is only used for opening files * which are already cached. We must still get the glock for regular * files to ensure that we have the file size uptodate for the large * file check which is in the common code. That is only an issue for * regular files though. * * Returns: errno */ static int gfs2_open(struct inode *inode, struct file *file) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder i_gh; int error; bool need_unlock = false; if (S_ISREG(ip->i_inode.i_mode)) { error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return error; need_unlock = true; } error = gfs2_open_common(inode, file); if (need_unlock) gfs2_glock_dq_uninit(&i_gh); return error; } /** * gfs2_release - called to close a struct file * @inode: the inode the struct file belongs to * @file: the struct file being closed * * Returns: errno */ static int gfs2_release(struct inode *inode, struct file *file) { struct gfs2_inode *ip = GFS2_I(inode); kfree(file->private_data); file->private_data = NULL; if (!(file->f_mode & FMODE_WRITE)) return 0; gfs2_rs_delete(ip, &inode->i_writecount); return 0; } /** * gfs2_fsync - sync the dirty data for a file (across the cluster) * @file: the file that points to the dentry * @start: the start position in the file to sync * @end: the end position in the file to sync * @datasync: set if we can ignore timestamp changes * * We split the data flushing here so that we don't wait for the data * until after we've also sent the metadata to disk. Note that for * data=ordered, we will write & wait for the data at the log flush * stage anyway, so this is unlikely to make much of a difference * except in the data=writeback case. * * If the fdatawrite fails due to any reason except -EIO, we will * continue the remainder of the fsync, although we'll still report * the error at the end. This is to match filemap_write_and_wait_range() * behaviour. * * Returns: errno */ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; int sync_state = inode->i_state & I_DIRTY_ALL; struct gfs2_inode *ip = GFS2_I(inode); int ret = 0, ret1 = 0; if (mapping->nrpages) { ret1 = filemap_fdatawrite_range(mapping, start, end); if (ret1 == -EIO) return ret1; } if (!gfs2_is_jdata(ip)) sync_state &= ~I_DIRTY_PAGES; if (datasync) sync_state &= ~(I_DIRTY_SYNC | I_DIRTY_TIME); if (sync_state) { ret = sync_inode_metadata(inode, 1); if (ret) return ret; if (gfs2_is_jdata(ip)) filemap_write_and_wait(mapping); gfs2_ail_flush(ip->i_gl, 1); } if (mapping->nrpages) ret = filemap_fdatawait_range(mapping, start, end); return ret ? ret : ret1; } /** * gfs2_file_write_iter - Perform a write to a file * @iocb: The io context * @iov: The data to write * @nr_segs: Number of @iov segments * @pos: The file position * * We have to do a lock/unlock here to refresh the inode size for * O_APPEND writes, otherwise we can land up writing at the wrong * offset. There is still a race, but provided the app is using its * own file locking, this will make O_APPEND work as expected. * */ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct gfs2_inode *ip = GFS2_I(file_inode(file)); int ret; ret = gfs2_rs_alloc(ip); if (ret) return ret; gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from)); if (iocb->ki_flags & IOCB_APPEND) { struct gfs2_holder gh; ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); if (ret) return ret; gfs2_glock_dq_uninit(&gh); } return generic_file_write_iter(iocb, from); } static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, int mode) { struct gfs2_inode *ip = GFS2_I(inode); struct buffer_head *dibh; int error; unsigned int nr_blks; sector_t lblock = offset >> inode->i_blkbits; error = gfs2_meta_inode_buffer(ip, &dibh); if (unlikely(error)) return error; gfs2_trans_add_meta(ip->i_gl, dibh); if (gfs2_is_stuffed(ip)) { error = gfs2_unstuff_dinode(ip, NULL); if (unlikely(error)) goto out; } while (len) { struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; bh_map.b_size = len; set_buffer_zeronew(&bh_map); error = gfs2_block_map(inode, lblock, &bh_map, 1); if (unlikely(error)) goto out; len -= bh_map.b_size; nr_blks = bh_map.b_size >> inode->i_blkbits; lblock += nr_blks; if (!buffer_new(&bh_map)) continue; if (unlikely(!buffer_zeronew(&bh_map))) { error = -EIO; goto out; } } out: brelse(dibh); return error; } /** * calc_max_reserv() - Reverse of write_calc_reserv. Given a number of * blocks, determine how many bytes can be written. * @ip: The inode in question. * @len: Max cap of bytes. What we return in *len must be <= this. * @data_blocks: Compute and return the number of data blocks needed * @ind_blocks: Compute and return the number of indirect blocks needed * @max_blocks: The total blocks available to work with. * * Returns: void, but @len, @data_blocks and @ind_blocks are filled in. */ static void calc_max_reserv(struct gfs2_inode *ip, loff_t *len, unsigned int *data_blocks, unsigned int *ind_blocks, unsigned int max_blocks) { loff_t max = *len; const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); for (tmp = max_data; tmp > sdp->sd_diptrs;) { tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); max_data -= tmp; } *data_blocks = max_data; *ind_blocks = max_blocks - max_data; *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; if (*len > max) { *len = max; gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks); } } static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned int data_blocks = 0, ind_blocks = 0, rblocks; loff_t bytes, max_bytes, max_blks = UINT_MAX; int error; const loff_t pos = offset; const loff_t count = len; loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; loff_t max_chunk_size = UINT_MAX & bsize_mask; next = (next + 1) << sdp->sd_sb.sb_bsize_shift; offset &= bsize_mask; len = next - offset; bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; if (!bytes) bytes = UINT_MAX; bytes &= bsize_mask; if (bytes == 0) bytes = sdp->sd_sb.sb_bsize; gfs2_size_hint(file, offset, len); gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks); ap.min_target = data_blocks + ind_blocks; while (len > 0) { if (len < bytes) bytes = len; if (!gfs2_write_alloc_required(ip, offset, bytes)) { len -= bytes; offset += bytes; continue; } /* We need to determine how many bytes we can actually * fallocate without exceeding quota or going over the * end of the fs. We start off optimistically by assuming * we can write max_bytes */ max_bytes = (len > max_chunk_size) ? max_chunk_size : len; /* Since max_bytes is most likely a theoretical max, we * calculate a more realistic 'bytes' to serve as a good * starting point for the number of bytes we may be able * to write */ gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); ap.target = data_blocks + ind_blocks; error = gfs2_quota_lock_check(ip, &ap); if (error) return error; /* ap.allowed tells us how many blocks quota will allow * us to write. Check if this reduces max_blks */ if (ap.allowed && ap.allowed < max_blks) max_blks = ap.allowed; error = gfs2_inplace_reserve(ip, &ap); if (error) goto out_qunlock; /* check if the selected rgrp limits our max_blks further */ if (ap.allowed && ap.allowed < max_blks) max_blks = ap.allowed; /* Almost done. Calculate bytes that can be written using * max_blks. We also recompute max_bytes, data_blocks and * ind_blocks */ calc_max_reserv(ip, &max_bytes, &data_blocks, &ind_blocks, max_blks); rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks); if (gfs2_is_jdata(ip)) rblocks += data_blocks ? data_blocks : 1; error = gfs2_trans_begin(sdp, rblocks, PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); if (error) goto out_trans_fail; error = fallocate_chunk(inode, offset, max_bytes, mode); gfs2_trans_end(sdp); if (error) goto out_trans_fail; len -= max_bytes; offset += max_bytes; gfs2_inplace_release(ip); gfs2_quota_unlock(ip); } if (!(mode & FALLOC_FL_KEEP_SIZE) && (pos + count) > inode->i_size) { i_size_write(inode, pos + count); file_update_time(file); mark_inode_dirty(inode); } return generic_write_sync(file, pos, count); out_trans_fail: gfs2_inplace_release(ip); out_qunlock: gfs2_quota_unlock(ip); return error; } static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int ret; if ((mode & ~FALLOC_FL_KEEP_SIZE) || gfs2_is_jdata(ip)) return -EOPNOTSUPP; mutex_lock(&inode->i_mutex); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret) goto out_uninit; if (!(mode & FALLOC_FL_KEEP_SIZE) && (offset + len) > inode->i_size) { ret = inode_newsize_ok(inode, offset + len); if (ret) goto out_unlock; } ret = get_write_access(inode); if (ret) goto out_unlock; ret = gfs2_rs_alloc(ip); if (ret) goto out_putw; ret = __gfs2_fallocate(file, mode, offset, len); if (ret) gfs2_rs_deltree(ip->i_res); out_putw: put_write_access(inode); out_unlock: gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); mutex_unlock(&inode->i_mutex); return ret; } static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { int error; struct gfs2_inode *ip = GFS2_I(out->f_mapping->host); error = gfs2_rs_alloc(ip); if (error) return (ssize_t)error; gfs2_size_hint(out, *ppos, len); return iter_file_splice_write(pipe, out, ppos, len, flags); } #ifdef CONFIG_GFS2_FS_LOCKING_DLM /** * gfs2_lock - acquire/release a posix lock on a file * @file: the file pointer * @cmd: either modify or retrieve lock state, possibly wait * @fl: type and range of lock * * Returns: errno */ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) { struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); struct lm_lockstruct *ls = &sdp->sd_lockstruct; if (!(fl->fl_flags & FL_POSIX)) return -ENOLCK; if (__mandatory_lock(&ip->i_inode) && fl->fl_type != F_UNLCK) return -ENOLCK; if (cmd == F_CANCELLK) { /* Hack: */ cmd = F_SETLK; fl->fl_type = F_UNLCK; } if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { if (fl->fl_type == F_UNLCK) locks_lock_file_wait(file, fl); return -EIO; } if (IS_GETLK(cmd)) return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl); else if (fl->fl_type == F_UNLCK) return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl); else return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl); } static int do_flock(struct file *file, int cmd, struct file_lock *fl) { struct gfs2_file *fp = file->private_data; struct gfs2_holder *fl_gh = &fp->f_fl_gh; struct gfs2_inode *ip = GFS2_I(file_inode(file)); struct gfs2_glock *gl; unsigned int state; int flags; int error = 0; int sleeptime; state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY_1CB) | GL_EXACT; mutex_lock(&fp->f_fl_mutex); gl = fl_gh->gh_gl; if (gl) { if (fl_gh->gh_state == state) goto out; locks_lock_file_wait(file, &(struct file_lock){.fl_type = F_UNLCK}); gfs2_glock_dq(fl_gh); gfs2_holder_reinit(state, flags, fl_gh); } else {
static int hpfs_unlink(struct inode *dir, struct dentry *dentry) { const unsigned char *name = dentry->d_name.name; unsigned len = dentry->d_name.len; struct quad_buffer_head qbh; struct hpfs_dirent *de; struct inode *inode = dentry->d_inode; dnode_secno dno; fnode_secno fno; int r; int rep = 0; int err; lock_kernel(); hpfs_adjust_length(name, &len); again: mutex_lock(&hpfs_i(inode)->i_parent_mutex); mutex_lock(&hpfs_i(dir)->i_mutex); err = -ENOENT; de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh); if (!de) goto out; err = -EPERM; if (de->first) goto out1; err = -EISDIR; if (de->directory) goto out1; fno = de->fnode; r = hpfs_remove_dirent(dir, dno, de, &qbh, 1); switch (r) { case 1: hpfs_error(dir->i_sb, "there was error when removing dirent"); err = -EFSERROR; break; case 2: /* no space for deleting, try to truncate file */ err = -ENOSPC; if (rep++) break; mutex_unlock(&hpfs_i(dir)->i_mutex); mutex_unlock(&hpfs_i(inode)->i_parent_mutex); dentry_unhash(dentry); if (!d_unhashed(dentry)) { dput(dentry); unlock_kernel(); return -ENOSPC; } if (generic_permission(inode, MAY_WRITE, 0, NULL) || !S_ISREG(inode->i_mode) || get_write_access(inode)) { d_rehash(dentry); dput(dentry); } else { struct iattr newattrs; /*printk("HPFS: truncating file before delete.\n");*/ newattrs.ia_size = 0; newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; err = notify_change(dentry, &newattrs); put_write_access(inode); dput(dentry); if (!err) goto again; } unlock_kernel(); return -ENOSPC; default: drop_nlink(inode); err = 0; } goto out; out1: hpfs_brelse4(&qbh); out: mutex_unlock(&hpfs_i(dir)->i_mutex); mutex_unlock(&hpfs_i(inode)->i_parent_mutex); unlock_kernel(); return err; }
static int do_dentry_open(struct file *f, struct inode *inode, int (*open)(struct inode *, struct file *), const struct cred *cred) { static const struct file_operations empty_fops = {}; int error; f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; path_get(&f->f_path); f->f_inode = inode; f->f_mapping = inode->i_mapping; if (unlikely(f->f_flags & O_PATH)) { f->f_mode = FMODE_PATH; f->f_op = &empty_fops; return 0; } if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { error = get_write_access(inode); if (unlikely(error)) goto cleanup_file; error = __mnt_want_write(f->f_path.mnt); if (unlikely(error)) { put_write_access(inode); goto cleanup_file; } f->f_mode |= FMODE_WRITER; } /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */ if (S_ISREG(inode->i_mode)) f->f_mode |= FMODE_ATOMIC_POS; f->f_op = fops_get(inode->i_fop); if (unlikely(WARN_ON(!f->f_op))) { error = -ENODEV; goto cleanup_all; } error = security_file_open(f, cred); if (error) goto cleanup_all; error = break_lease(inode, f->f_flags); if (error) goto cleanup_all; if (!open) open = f->f_op->open; if (open) { error = open(inode, f); if (error) goto cleanup_all; } if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(inode); if ((f->f_mode & FMODE_READ) && likely(f->f_op->read || f->f_op->read_iter)) f->f_mode |= FMODE_CAN_READ; if ((f->f_mode & FMODE_WRITE) && likely(f->f_op->write || f->f_op->write_iter)) f->f_mode |= FMODE_CAN_WRITE; f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); return 0; cleanup_all: fops_put(f->f_op); if (f->f_mode & FMODE_WRITER) { put_write_access(inode); __mnt_drop_write(f->f_path.mnt); } cleanup_file: path_put(&f->f_path); f->f_path.mnt = NULL; f->f_path.dentry = NULL; f->f_inode = NULL; return error; }
static inline int do_aout_core_dump(long signr, struct pt_regs * regs) { struct inode * inode = NULL; struct file file; unsigned short fs; int has_dumped = 0; char corefile[6+sizeof(current->comm)]; unsigned long dump_start, dump_size; struct user dump; #ifdef __alpha__ # define START_DATA(u) (u.start_data) #elif defined(CONFIG_ARM) # define START_DATA(u) ((u.u_tsize << PAGE_SHIFT) + u.start_code) #else # define START_DATA(u) (u.u_tsize << PAGE_SHIFT) #endif if (!current->dumpable || current->mm->count != 1) return 0; current->dumpable = 0; /* See if we have enough room to write the upage. */ if (current->rlim[RLIMIT_CORE].rlim_cur < PAGE_SIZE) return 0; fs = get_fs(); set_fs(KERNEL_DS); memcpy(corefile,"core.",5); #if 0 memcpy(corefile+5,current->comm,sizeof(current->comm)); #else corefile[4] = '\0'; #endif if (open_namei(corefile,O_CREAT | 2 | O_TRUNC,0600,&inode,NULL)) { inode = NULL; goto end_coredump; } if (!S_ISREG(inode->i_mode)) goto end_coredump; if (!inode->i_op || !inode->i_op->default_file_ops) goto end_coredump; if (get_write_access(inode)) goto end_coredump; file.f_mode = 3; file.f_flags = 0; file.f_count = 1; file.f_inode = inode; file.f_pos = 0; file.f_reada = 0; file.f_op = inode->i_op->default_file_ops; if (file.f_op->open) if (file.f_op->open(inode,&file)) goto done_coredump; if (!file.f_op->write) goto close_coredump; has_dumped = 1; current->flags |= PF_DUMPCORE; strncpy(dump.u_comm, current->comm, sizeof(current->comm)); dump.u_ar0 = (void *)(((unsigned long)(&dump.regs)) - ((unsigned long)(&dump))); dump.signal = signr; dump_thread(regs, &dump); /* If the size of the dump file exceeds the rlimit, then see what would happen if we wrote the stack, but not the data area. */ if ((dump.u_dsize+dump.u_ssize+1) * PAGE_SIZE > current->rlim[RLIMIT_CORE].rlim_cur) dump.u_dsize = 0; /* Make sure we have enough room to write the stack and data areas. */ if ((dump.u_ssize+1) * PAGE_SIZE > current->rlim[RLIMIT_CORE].rlim_cur) dump.u_ssize = 0; /* make sure we actually have a data and stack area to dump */ set_fs(USER_DS); if (verify_area(VERIFY_READ, (void *) START_DATA(dump), dump.u_dsize << PAGE_SHIFT)) dump.u_dsize = 0; if (verify_area(VERIFY_READ, (void *) dump.start_stack, dump.u_ssize << PAGE_SHIFT)) dump.u_ssize = 0; set_fs(KERNEL_DS); /* struct user */ DUMP_WRITE(&dump,sizeof(dump)); /* Now dump all of the user data. Include malloced stuff as well */ DUMP_SEEK(PAGE_SIZE); /* now we start writing out the user space info */ set_fs(USER_DS); /* Dump the data area */ if (dump.u_dsize != 0) { dump_start = START_DATA(dump); dump_size = dump.u_dsize << PAGE_SHIFT; DUMP_WRITE(dump_start,dump_size); } /* Now prepare to dump the stack area */ if (dump.u_ssize != 0) { dump_start = dump.start_stack; dump_size = dump.u_ssize << PAGE_SHIFT; DUMP_WRITE(dump_start,dump_size); } /* Finally dump the task struct. Not be used by gdb, but could be useful */ set_fs(KERNEL_DS); DUMP_WRITE(current,sizeof(*current)); close_coredump: if (file.f_op->release) file.f_op->release(inode,&file); done_coredump: put_write_access(inode); end_coredump: set_fs(fs); iput(inode); return has_dumped; }
static long do_sys_truncate(const char __user * path, loff_t length) { struct nameidata nd; struct inode * inode; int error; error = -EINVAL; if (length < 0) /* sorry, but loff_t says... */ goto out; error = user_path_walk(path, &nd); if (error) goto out; inode = nd.path.dentry->d_inode; /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ error = -EISDIR; if (S_ISDIR(inode->i_mode)) goto dput_and_out; error = -EINVAL; if (!S_ISREG(inode->i_mode)) goto dput_and_out; error = mnt_want_write(nd.path.mnt); if (error) goto dput_and_out; error = vfs_permission(&nd, MAY_WRITE); if (error) goto mnt_drop_write_and_out; error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto mnt_drop_write_and_out; error = get_write_access(inode); if (error) goto mnt_drop_write_and_out; /* * Make sure that there are no leases. get_write_access() protects * against the truncate racing with a lease-granting setlease(). */ error = break_lease(inode, FMODE_WRITE); if (error) goto put_write_and_out; error = locks_verify_truncate(inode, NULL, length); if (!error) { DQUOT_INIT(inode); error = do_truncate(nd.path.dentry, length, 0, NULL); } put_write_and_out: put_write_access(inode); mnt_drop_write_and_out: mnt_drop_write(nd.path.mnt); dput_and_out: path_put(&nd.path); out: return error; }
static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, struct file *f, int (*open)(struct inode *, struct file *)) { struct inode *inode; int error; f->f_flags = flags; f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; inode = dentry->d_inode; if (f->f_mode & FMODE_WRITE) { error = __get_file_write_access(inode, mnt); if (error) goto cleanup_file; if (!special_file(inode->i_mode)) file_take_write(f); } f->f_mapping = inode->i_mapping; f->f_path.dentry = dentry; f->f_path.mnt = mnt; f->f_pos = 0; f->f_op = fops_get(inode->i_fop); file_move(f, &inode->i_sb->s_files); error = security_dentry_open(f); if (error) goto cleanup_all; if (!open && f->f_op) open = f->f_op->open; if (open) { error = open(inode, f); if (error) goto cleanup_all; } f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); /* NB: we're sure to have correct a_ops only after f_op->open */ if (f->f_flags & O_DIRECT) { if (!f->f_mapping->a_ops || ((!f->f_mapping->a_ops->direct_IO) && (!f->f_mapping->a_ops->get_xip_mem))) { fput(f); f = ERR_PTR(-EINVAL); } } return f; cleanup_all: fops_put(f->f_op); if (f->f_mode & FMODE_WRITE) { put_write_access(inode); if (!special_file(inode->i_mode)) { /* * We don't consider this a real * mnt_want/drop_write() pair * because it all happenend right * here, so just reset the state. */ file_reset_write(f); mnt_drop_write(mnt); } } file_kill(f); f->f_path.dentry = NULL; f->f_path.mnt = NULL; cleanup_file: put_filp(f); dput(dentry); mntput(mnt); return ERR_PTR(error); }
int sys_open(char *filename, int flags, int mode) { struct inode *inode; register struct file *f; int error, fd, flag; f = get_empty_filp(); if (!f) { printk("\nNo filps\n"); return -ENFILE; } f->f_flags = (unsigned short int) (flag = flags); f->f_mode = (mode_t) ((flag + 1) & O_ACCMODE); if (f->f_mode) flag++; if (flag & (O_TRUNC | O_CREAT)) flag |= 2; error = open_namei(filename, flag, mode, &inode, NULL); if (!error) { #ifdef BLOAT_FS if (f->f_mode & FMODE_WRITE) { error = get_write_access(inode); if (error) goto cleanup_inode; } #endif f->f_inode = inode; f->f_pos = 0; #ifdef BLOAT_FS f->f_reada = 0; #endif f->f_op = NULL; { register struct inode_operations *iop = inode->i_op; if (iop) f->f_op = iop->default_file_ops; } { register struct file_operations *fop = f->f_op; if (fop && fop->open) { error = fop->open(inode, f); if (error) { goto cleanup_all; } } f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); /* * We have to do this last, because we mustn't export * an incomplete fd to other processes which may share * the same file table with us. */ if ((fd = get_unused_fd()) > -1) { current->files.fd[fd] = f; return fd; } error = -EMFILE; if (fop && fop->release) fop->release(inode, f); } cleanup_all: #ifdef BLOAT_FS if (f->f_mode & FMODE_WRITE) put_write_access(inode); cleanup_inode: #endif iput(inode); } cleanup_file: f->f_count--; return error; }
/* * Don't grab the superblock read-lock in unionfs_permission, which prevents * a deadlock with the branch-management "add branch" code (which grabbed * the write lock). It is safe to not grab the read lock here, because even * with branch management taking place, there is no chance that * unionfs_permission, or anything it calls, will use stale branch * information. */ static int unionfs_permission(struct inode *inode, int mask) { struct inode *lower_inode = NULL; int err = 0; int bindex, bstart, bend; const int is_file = !S_ISDIR(inode->i_mode); const int write_mask = (mask & MAY_WRITE) && !(mask & MAY_READ); struct inode *inode_grabbed = igrab(inode); struct dentry *dentry = d_find_alias(inode); if (dentry) unionfs_lock_dentry(dentry, UNIONFS_DMUTEX_CHILD); if (!UNIONFS_I(inode)->lower_inodes) { if (is_file) /* dirs can be unlinked but chdir'ed to */ err = -ESTALE; /* force revalidate */ goto out; } bstart = ibstart(inode); bend = ibend(inode); if (unlikely(bstart < 0 || bend < 0)) { /* * With branch-management, we can get a stale inode here. * If so, we return ESTALE back to link_path_walk, which * would discard the dcache entry and re-lookup the * dentry+inode. This should be equivalent to issuing * __unionfs_d_revalidate_chain on nd.dentry here. */ if (is_file) /* dirs can be unlinked but chdir'ed to */ err = -ESTALE; /* force revalidate */ goto out; } for (bindex = bstart; bindex <= bend; bindex++) { lower_inode = unionfs_lower_inode_idx(inode, bindex); if (!lower_inode) continue; /* * check the condition for D-F-D underlying files/directories, * we don't have to check for files, if we are checking for * directories. */ if (!is_file && !S_ISDIR(lower_inode->i_mode)) continue; /* * We check basic permissions, but we ignore any conditions * such as readonly file systems or branches marked as * readonly, because those conditions should lead to a * copyup taking place later on. However, if user never had * access to the file, then no copyup could ever take place. */ err = __inode_permission(lower_inode, mask); if (err && err != -EACCES && err != EPERM && bindex > 0) { umode_t mode = lower_inode->i_mode; if ((is_robranch_super(inode->i_sb, bindex) || __is_rdonly(lower_inode)) && (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) err = 0; if (IS_COPYUP_ERR(err)) err = 0; } /* * NFS HACK: NFSv2/3 return EACCES on readonly-exported, * locally readonly-mounted file systems, instead of EROFS * like other file systems do. So we have no choice here * but to intercept this and ignore it for NFS branches * marked readonly. Specifically, we avoid using NFS's own * "broken" ->permission method, and rely on * generic_permission() to do basic checking for us. */ if (err && err == -EACCES && is_robranch_super(inode->i_sb, bindex) && lower_inode->i_sb->s_magic == NFS_SUPER_MAGIC) err = generic_permission(lower_inode, mask, NULL); /* * The permissions are an intersection of the overall directory * permissions, so we fail if one fails. */ if (err) goto out; /* only the leftmost file matters. */ if (is_file || write_mask) { if (is_file && write_mask) { err = get_write_access(lower_inode); if (!err) put_write_access(lower_inode); } break; } } /* sync times which may have changed (asynchronously) below */ unionfs_copy_attr_times(inode); out: unionfs_check_inode(inode); if (dentry) { unionfs_unlock_dentry(dentry); dput(dentry); } iput(inode_grabbed); return err; }
static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, struct file *f, int (*open)(struct inode *, struct file *)) { struct inode *inode; int error; f->f_flags = flags; f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; inode = dentry->d_inode; if (f->f_mode & FMODE_WRITE) { error = get_write_access(inode); if (error) goto cleanup_file; } f->f_mapping = inode->i_mapping; f->f_dentry = dentry; f->f_vfsmnt = mnt; f->f_pos = 0; f->f_op = fops_get(inode->i_fop); file_move(f, &inode->i_sb->s_files); if (!open && f->f_op) open = f->f_op->open; if (open) { error = open(inode, f); if (error) goto cleanup_all; } f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); /* NB: we're sure to have correct a_ops only after f_op->open */ if (f->f_flags & O_DIRECT) { if (!f->f_mapping->a_ops || ((!f->f_mapping->a_ops->direct_IO) && (!f->f_mapping->a_ops->get_xip_page))) { fput(f); f = ERR_PTR(-EINVAL); } } return f; cleanup_all: fops_put(f->f_op); if (f->f_mode & FMODE_WRITE) put_write_access(inode); file_kill(f); f->f_dentry = NULL; f->f_vfsmnt = NULL; cleanup_file: put_filp(f); dput(dentry); mntput(mnt); return ERR_PTR(error); }
static int do_dentry_open(struct file *f, int (*open)(struct inode *, struct file *), const struct cred *cred) { static const struct file_operations empty_fops = {}; struct inode *inode; int error; f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; if (unlikely(f->f_flags & O_PATH)) f->f_mode = FMODE_PATH; path_get(&f->f_path); inode = f->f_inode = f->f_path.dentry->d_inode; if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { error = __get_file_write_access(inode, f->f_path.mnt); if (error) goto cleanup_file; file_take_write(f); } f->f_mapping = inode->i_mapping; file_sb_list_add(f, inode->i_sb); if (unlikely(f->f_mode & FMODE_PATH)) { f->f_op = &empty_fops; return 0; } f->f_op = fops_get(inode->i_fop); error = security_file_open(f, cred); if (error) goto cleanup_all; error = break_lease(inode, f->f_flags); if (error) goto cleanup_all; if (!open && f->f_op) open = f->f_op->open; if (open) { error = open(inode, f); if (error) goto cleanup_all; } if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(inode); f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); return 0; cleanup_all: fops_put(f->f_op); file_sb_list_del(f); if (f->f_mode & FMODE_WRITE) { if (!special_file(inode->i_mode)) { /* * We don't consider this a real * mnt_want/drop_write() pair * because it all happenend right * here, so just reset the state. */ put_write_access(inode); file_reset_write(f); __mnt_drop_write(f->f_path.mnt); } } cleanup_file: path_put(&f->f_path); f->f_path.mnt = NULL; f->f_path.dentry = NULL; f->f_inode = NULL; return error; }
extern int vnode_shadow_fop_open( INODE_T *inode, FILE_T *file ) { int err = 0; INODE_T *real_inode; DENT_T *rdentry = NULL; DENT_T *oldent; struct file_operations *oldfops; struct vfsmount *oldmnt, *newmnt; VNODE_T *cvp; oldmnt = file->f_vfsmnt; oldent = file->f_dentry; ASSERT(D_COUNT(oldent)); /* The Linux kernel has stopped ignoring the O_DIRECT flag. * The problem is that they wait until after they call the fop open * function to check the inode to see if it will support direct I/O. * But they get the inode pointer before they call us and check the * inode after we return so they never check the actual inode we open * but only the shadow one. Their error handling never comes back to * us and they release their old pointers and not our new ones. The * only choice we have is to not allow O_DIRECT on shadow files. */ if (file->f_flags & O_DIRECT) { err = -EINVAL; goto out_nolock; } /* Get the real dentry */ rdentry = REALDENTRY_LOCKED(oldent, &cvp); if (rdentry == NULL) { err = -ENOENT; goto out_nolock; } VNODE_DGET(rdentry); /* protect rdentry->d_inode */ if (rdentry->d_inode == NULL) { /* delete race */ err = -ENOENT; goto out; } newmnt = MDKI_MNTGET(REALVFSMNT(oldent)); if (newmnt == NULL) { err = -EOPNOTSUPP; /* XXX */ goto out; } /* Check that we can write to this file. Clean up the count on the * shadow inode. */ if (file->f_mode & FMODE_WRITE) { err = get_write_access(rdentry->d_inode); if (err) { MDKI_MNTPUT(newmnt); goto out; } } real_inode = rdentry->d_inode; /* * Swap the file structure contents to point at the underlying object. */ /* In Linux 2.6 they added the mapping stuff to the file so we have to set ** that up here, too. */ file->f_mapping = real_inode->i_mapping; VNLAYER_RA_STATE_INIT(&(file->f_ra), file->f_mapping); file->f_dentry = VNODE_DGET(rdentry); oldfops = file->f_op; file->f_vfsmnt = newmnt; file->f_op = fops_get(real_inode->i_fop); if (real_inode->i_fop && !file->f_op) /* If we failed to get the reference to a non-NULL op, bail out */ err = -EIO; /* XXX? better error code */ if (!err) { /* Move the file to the file list for the real superblock * and remove it from the shadow list */ /* It would be better to use file_move() but it's not exported */ file_list_lock(); list_del(&file->f_list); list_add(&file->f_list, &real_inode->i_sb->s_files); file_list_unlock(); if (file->f_op && file->f_op->open) { err = (*file->f_op->open)(real_inode, file); if (err) { /* restore our file to the list on our super block */ file_list_lock(); list_del(&file->f_list); list_add(&file->f_list, &oldent->d_inode->i_sb->s_files); file_list_unlock(); } } } if (err) { /* MUST put back old dentry/fops to get accounting right in upper * layer. */ put_write_access(rdentry->d_inode); if (file->f_dentry) VNODE_DPUT(file->f_dentry); if (file->f_op) fops_put(file->f_op); MDKI_MNTPUT(file->f_vfsmnt); file->f_vfsmnt = oldmnt; file->f_dentry = oldent; file->f_op = oldfops; } else { put_write_access(oldent->d_inode); VNODE_DPUT(oldent); /* Drop reference now that we've dropped our use of the file ops */ fops_put(oldfops); MDKI_MNTPUT(oldmnt); } out: VNODE_DPUT(rdentry); REALDENTRY_UNLOCK(oldent, cvp); out_nolock: MDKI_TRACE(TRACE_OPEN, "%s: opened vp=%p fp=%p rdent=%p rdcnt=%d fcnt=%d" ", err %d\n", __func__, inode, file, rdentry, rdentry ? D_COUNT(rdentry) : 0, F_COUNT(file), -err); return(err); }