static int ecryptfs_flush(struct file *file, fl_owner_t td) { struct file *lower_file = ecryptfs_file_to_lower(file); if (lower_file->f_op && lower_file->f_op->flush) { filemap_write_and_wait(file->f_mapping); return lower_file->f_op->flush(lower_file, td); } return 0; }
int v9fs_dir_release(struct inode *inode, struct file *filp) { struct p9_fid *fid; fid = filp->private_data; P9_DPRINTK(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n", inode, filp, fid->fid); filemap_write_and_wait(inode->i_mapping); p9_client_clunk(fid); return 0; }
static int ecryptfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { int rc; rc = filemap_write_and_wait(file->f_mapping); if (rc) return rc; return vfs_fsync(ecryptfs_file_to_lower(file), datasync); }
int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { if (!trans || !trans->transaction) { struct inode *btree_inode; btree_inode = root->fs_info->btree_inode; return filemap_write_and_wait(btree_inode->i_mapping); } return btrfs_write_and_wait_marked_extents(root, &trans->transaction->dirty_pages, EXTENT_DIRTY); }
int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) { int retval; struct v9fs_session_info *v9ses; struct p9_fid *fid; struct p9_iattr_dotl p9attr; P9_DPRINTK(P9_DEBUG_VFS, "\n"); retval = inode_change_ok(dentry->d_inode, iattr); if (retval) return retval; p9attr.valid = iattr->ia_valid; p9attr.mode = iattr->ia_mode; p9attr.uid = iattr->ia_uid; p9attr.gid = iattr->ia_gid; p9attr.size = iattr->ia_size; p9attr.atime_sec = iattr->ia_atime.tv_sec; p9attr.atime_nsec = iattr->ia_atime.tv_nsec; p9attr.mtime_sec = iattr->ia_mtime.tv_sec; p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec; retval = -EPERM; v9ses = v9fs_dentry2v9ses(dentry); fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) return PTR_ERR(fid); /* Write all dirty data */ if (S_ISREG(dentry->d_inode->i_mode)) filemap_write_and_wait(dentry->d_inode->i_mapping); retval = p9_client_setattr(fid, &p9attr); if (retval < 0) return retval; if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size != i_size_read(dentry->d_inode)) truncate_setsize(dentry->d_inode, iattr->ia_size); v9fs_invalidate_inode_attr(dentry->d_inode); setattr_copy(dentry->d_inode, iattr); mark_inode_dirty(dentry->d_inode); if (iattr->ia_valid & ATTR_MODE) { /* We also want to update ACL when we update mode bits */ retval = v9fs_acl_chmod(dentry); if (retval < 0) return retval; } return 0; }
static int tierfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { int rc; TRACE_ENTRY(); rc = filemap_write_and_wait(file->f_mapping); if (rc) return rc; TRACE_EXIT(); return vfs_fsync(tierfs_file_to_lower(file), datasync); }
static int lofs_fsync(FSYNC_ARGS(struct file *file, struct dentry *dentry, loff_t start, loff_t end, int datasync)) { struct file *lower = lofs_file_to_lower(file); int result = 0; /* Make sure the LOFS pages are flushed out to the lower filesystem. * Different kernels have different utility functions to help with * this; in the worst case (2.6.9) we roll our own. */ #if defined(HAVE_OFFSET_IN_FSYNC) /* This is the 3.2.0+ version. */ result = filemap_write_and_wait_range(file->f_mapping, start, end); #elif defined(HAVE_FILEMAP_WRITE_AND_WAIT) /* This is the 2.6.18 - 3.2.0 version. */ result = filemap_write_and_wait(file->f_mapping); #else /* This is for versions prior to 2.6.18. This is basically a copy of * the implementation of filemap_write_and_wait, which unfortunately was * not added until 2.6.18 or so. */ if (file->f_mapping->nrpages) { result = filemap_fdatawrite(file->f_mapping); if (result != -EIO) { int result2 = filemap_fdatawait(file->f_mapping); if (result == 0) { result = result2; } } } #endif if (result != 0) { return result; } /* Then give the lower filesystem a chance to do its own sync. */ return FSYNC_HELPER(FSYNC_ARGS(lower, lofs_dentry_to_lower(dentry), start, end, datasync)); }
/* * set the attributes of an inode */ int afs_setattr(struct dentry *dentry, struct iattr *attr) { struct afs_fs_cursor fc; struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); struct key *key; int ret; _enter("{%x:%u},{n=%pd},%x", vnode->fid.vid, vnode->fid.vnode, dentry, attr->ia_valid); if (!(attr->ia_valid & (ATTR_SIZE | ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME))) { _leave(" = 0 [unsupported]"); return 0; } /* flush any dirty data outstanding on a regular file */ if (S_ISREG(vnode->vfs_inode.i_mode)) filemap_write_and_wait(vnode->vfs_inode.i_mapping); if (attr->ia_valid & ATTR_FILE) { key = afs_file_key(attr->ia_file); } else { key = afs_request_key(vnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); goto error; } } ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, vnode, key)) { while (afs_select_fileserver(&fc)) { fc.cb_break = vnode->cb_break + vnode->cb_s_break; afs_fs_setattr(&fc, attr); } afs_check_for_remote_deletion(&fc, fc.vnode); afs_vnode_commit_status(&fc, vnode, fc.cb_break); ret = afs_end_vnode_operation(&fc); } if (!(attr->ia_valid & ATTR_FILE)) key_put(key); error: _leave(" = %d", ret); return ret; }
int jfs_umount(struct super_block *sb) { struct jfs_sb_info *sbi = JFS_SBI(sb); struct inode *ipbmap = sbi->ipbmap; struct inode *ipimap = sbi->ipimap; struct inode *ipaimap = sbi->ipaimap; struct inode *ipaimap2 = sbi->ipaimap2; struct jfs_log *log; int rc = 0; jfs_info("UnMount JFS: sb:0x%p", sb); if ((log = sbi->log)) /* * Wait for outstanding transactions to be written to log: */ jfs_flush_journal(log, 2); diUnmount(ipimap, 0); diFreeSpecial(ipimap); sbi->ipimap = NULL; ipaimap2 = sbi->ipaimap2; if (ipaimap2) { diUnmount(ipaimap2, 0); diFreeSpecial(ipaimap2); sbi->ipaimap2 = NULL; } ipaimap = sbi->ipaimap; diUnmount(ipaimap, 0); diFreeSpecial(ipaimap); sbi->ipaimap = NULL; dbUnmount(ipbmap, 0); diFreeSpecial(ipbmap); sbi->ipimap = NULL; filemap_write_and_wait(sbi->direct_inode->i_mapping); if (log) { updateSuper(sb, FM_CLEAN); rc = lmLogClose(sb); } jfs_info("UnMount JFS Complete: rc = %d", rc); return rc; }
static int smb_file_release(struct inode *inode, struct file * file) { lock_kernel(); if (!--SMB_I(inode)->openers) { /* We must flush any dirty pages now as we won't be able to write anything after close. mmap can trigger this. "openers" should perhaps include mmap'ers ... */ filemap_write_and_wait(inode->i_mapping); smb_close(inode); } unlock_kernel(); return 0; }
/* * dbSync() */ int dbSync(struct inode *ipbmap) { struct dbmap_disk *dbmp_le; struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; struct metapage *mp; int i; /* * write bmap global control page */ /* get the buffer for the on-disk bmap descriptor. */ mp = read_metapage(ipbmap, BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage, PSIZE, 0); if (mp == NULL) { jfs_err("dbSync: read_metapage failed!"); return -EIO; } /* copy the in-memory version of the bmap to the on-disk version */ dbmp_le = (struct dbmap_disk *) mp->data; dbmp_le->dn_mapsize = cpu_to_le64(bmp->db_mapsize); dbmp_le->dn_nfree = cpu_to_le64(bmp->db_nfree); dbmp_le->dn_l2nbperpage = cpu_to_le32(bmp->db_l2nbperpage); dbmp_le->dn_numag = cpu_to_le32(bmp->db_numag); dbmp_le->dn_maxlevel = cpu_to_le32(bmp->db_maxlevel); dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag); dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref); dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel); dbmp_le->dn_agheight = cpu_to_le32(bmp->db_agheight); dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth); dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart); dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size); for (i = 0; i < MAXAG; i++) dbmp_le->dn_agfree[i] = cpu_to_le64(bmp->db_agfree[i]); dbmp_le->dn_agsize = cpu_to_le64(bmp->db_agsize); dbmp_le->dn_maxfreebud = bmp->db_maxfreebud; /* write the buffer */ write_metapage(mp); /* * write out dirty pages of bmap */ filemap_write_and_wait(ipbmap->i_mapping); diWriteSpecial(ipbmap, 0); return (0); }
static int tierfs_flush(struct file *file, fl_owner_t td) { struct file *lower_file = tierfs_file_to_lower(file); TRACE_ENTRY(); if (lower_file->f_op) { if (lower_file->f_op->flush ) { filemap_write_and_wait(file->f_mapping); return lower_file->f_op->flush(lower_file, td); } } TRACE_EXIT(); return 0; }
/* * Pre-COW all shared blocks within a given byte range of a file and turn off * the reflink flag if we unshare all of the file's blocks. */ int xfs_reflink_unshare( struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len) { struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t fbno; xfs_filblks_t end; xfs_off_t isize; int error; if (!xfs_is_reflink_inode(ip)) return 0; trace_xfs_reflink_unshare(ip, offset, len); inode_dio_wait(VFS_I(ip)); /* Try to CoW the selected ranges */ xfs_ilock(ip, XFS_ILOCK_EXCL); fbno = XFS_B_TO_FSBT(mp, offset); isize = i_size_read(VFS_I(ip)); end = XFS_B_TO_FSB(mp, offset + len); error = xfs_reflink_dirty_extents(ip, fbno, end, isize); if (error) goto out_unlock; xfs_iunlock(ip, XFS_ILOCK_EXCL); /* Wait for the IO to finish */ error = filemap_write_and_wait(VFS_I(ip)->i_mapping); if (error) goto out; /* Turn off the reflink flag if possible. */ error = xfs_reflink_try_clear_inode_flag(ip); if (error) goto out; return 0; out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); out: trace_xfs_reflink_unshare_error(ip, error, _RET_IP_); return error; }
static int ioctl_fiemap(struct file *filp, unsigned long arg) { struct fiemap fiemap; struct fiemap_extent_info fieinfo = { 0, }; struct inode *inode = filp->f_dentry->d_inode; struct super_block *sb = inode->i_sb; u64 len; int error; if (!(inode->i_sb->s_type->fs_flags & FS_HAS_FIEMAP)) return -EOPNOTSUPP; if (!inode->i_op->fiemap) return -EOPNOTSUPP; if (copy_from_user(&fiemap, (struct fiemap __user *)arg, sizeof(struct fiemap))) return -EFAULT; if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS) return -EINVAL; error = fiemap_check_ranges(sb, fiemap.fm_start, fiemap.fm_length, &len); if (error) return error; fieinfo.fi_flags = fiemap.fm_flags; fieinfo.fi_extents_max = fiemap.fm_extent_count; fieinfo.fi_extents_start = (struct fiemap_extent *)(arg + sizeof(fiemap)); if (fiemap.fm_extent_count != 0 && !access_ok(VERIFY_WRITE, fieinfo.fi_extents_start, fieinfo.fi_extents_max * sizeof(struct fiemap_extent))) return -EFAULT; if (fieinfo.fi_flags & FIEMAP_FLAG_SYNC) filemap_write_and_wait(inode->i_mapping); error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start, len); fiemap.fm_flags = fieinfo.fi_flags; fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped; if (copy_to_user((char *)arg, &fiemap, sizeof(fiemap))) error = -EFAULT; return error; }
/* * If we are changing DAX flags, we have to ensure the file is clean and any * cached objects in the address space are invalidated and removed. This * requires us to lock out other IO and page faults similar to a truncate * operation. The locks need to be held until the transaction has been committed * so that the cache invalidation is atomic with respect to the DAX flag * manipulation. */ static int xfs_ioctl_setattr_dax_invalidate( struct xfs_inode *ip, struct fsxattr *fa, int *join_flags) { struct inode *inode = VFS_I(ip); int error; *join_flags = 0; /* * It is only valid to set the DAX flag on regular files and * directories on filesystems where the block size is equal to the page * size. On directories it serves as an inherit hint. */ if (fa->fsx_xflags & FS_XFLAG_DAX) { if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) return -EINVAL; if (ip->i_mount->m_sb.sb_blocksize != PAGE_SIZE) return -EINVAL; } /* If the DAX state is not changing, we have nothing to do here. */ if ((fa->fsx_xflags & FS_XFLAG_DAX) && IS_DAX(inode)) return 0; if (!(fa->fsx_xflags & FS_XFLAG_DAX) && !IS_DAX(inode)) return 0; /* lock, flush and invalidate mapping in preparation for flag change */ xfs_ilock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL); error = filemap_write_and_wait(inode->i_mapping); if (error) goto out_unlock; error = invalidate_inode_pages2(inode->i_mapping); if (error) goto out_unlock; *join_flags = XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL; return 0; out_unlock: xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL); return error; }
static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl) { int res = 0; struct inode *inode = filp->f_path.dentry->d_inode; P9_DPRINTK(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl); /* No mandatory locks */ if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) return -ENOLCK; if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) { filemap_write_and_wait(inode->i_mapping); invalidate_mapping_pages(&inode->i_data, 0, -1); } return res; }
int xfs_flushinval_pages( xfs_inode_t *ip, xfs_off_t first, xfs_off_t last, int fiopt) { struct address_space *mapping = ip->i_vnode->i_mapping; int ret = 0; if (mapping->nrpages) { xfs_iflags_clear(ip, XFS_ITRUNCATED); ret = filemap_write_and_wait(mapping); if (!ret) truncate_inode_pages(mapping, first); } return ret; }
int jfs_umount_rw(struct super_block *sb) { struct jfs_sb_info *sbi = JFS_SBI(sb); struct jfs_log *log = sbi->log; if (!log) return 0; jfs_flush_journal(log, 2); dbSync(sbi->ipbmap); diSync(sbi->ipimap); filemap_write_and_wait(sbi->direct_inode->i_mapping); updateSuper(sb, FM_CLEAN); return lmLogClose(sb); }
/* * set the attributes of an inode */ int afs_setattr(struct dentry *dentry, struct iattr *attr) { struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); struct key *key; int ret; _enter("{%x:%u},{n=%pd},%x", vnode->fid.vid, vnode->fid.vnode, dentry, attr->ia_valid); if (!(attr->ia_valid & (ATTR_SIZE | ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME))) { _leave(" = 0 [unsupported]"); return 0; } /* flush any dirty data outstanding on a regular file */ if (S_ISREG(vnode->vfs_inode.i_mode)) { filemap_write_and_wait(vnode->vfs_inode.i_mapping); afs_writeback_all(vnode); } if (attr->ia_valid & ATTR_FILE) { key = attr->ia_file->private_data; } else { key = afs_request_key(vnode->volume->cell); if (IS_ERR(key)) { ret = PTR_ERR(key); goto error; } } ret = afs_vnode_setattr(vnode, key, attr); if (!(attr->ia_valid & ATTR_FILE)) key_put(key); error: _leave(" = %d", ret); return ret; }
static int exofs_file_fsync(struct file *filp, int datasync) { int ret; struct address_space *mapping = filp->f_mapping; struct inode *inode = mapping->host; struct super_block *sb; ret = filemap_write_and_wait(mapping); if (ret) return ret; /* sync the inode attributes */ ret = write_inode_now(inode, 1); /* This is a good place to write the sb */ /* TODO: Sechedule an sb-sync on create */ sb = inode->i_sb; if (sb->s_dirt) exofs_sync_fs(sb, 1); return ret; }
static void xen_update_blkif_status(struct xen_blkif *blkif) { int err; char name[TASK_COMM_LEN]; /* Not ready to connect? */ if (!blkif->irq || !blkif->vbd.bdev) return; /* Already connected? */ if (blkif->be->dev->state == XenbusStateConnected) return; /* Attempt to connect: exit if we fail to. */ connect(blkif->be); if (blkif->be->dev->state != XenbusStateConnected) return; err = blkback_name(blkif, name); if (err) { xenbus_dev_error(blkif->be->dev, err, "get blkback dev name"); return; } err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping); if (err) { xenbus_dev_error(blkif->be->dev, err, "block flush"); return; } invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, "%s", name); if (IS_ERR(blkif->xenblkd)) { err = PTR_ERR(blkif->xenblkd); blkif->xenblkd = NULL; xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); } }
static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned long last_index; u64 pos = page->index << PAGE_CACHE_SHIFT; unsigned int data_blocks, ind_blocks, rblocks; int alloc_required = 0; struct gfs2_holder gh; loff_t size; int ret; sb_start_pagefault(inode->i_sb); /* Update file times before taking page lock */ file_update_time(vma->vm_file); ret = get_write_access(inode); if (ret) goto out; ret = gfs2_rs_alloc(ip); if (ret) goto out_write_access; gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret) goto out_uninit; set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); set_bit(GIF_SW_PAGED, &ip->i_flags); gfs2_size_hint(inode, pos, PAGE_CACHE_SIZE); ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required); if (ret) goto out_unlock; if (!alloc_required) { lock_page(page); if (!PageUptodate(page) || page->mapping != inode->i_mapping) { ret = -EAGAIN; unlock_page(page); } goto out_unlock; } ret = gfs2_rindex_update(sdp); if (ret) goto out_unlock; ret = gfs2_quota_lock_check(ip); if (ret) goto out_unlock; gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); ap.target = data_blocks + ind_blocks; ret = gfs2_inplace_reserve(ip, &ap); if (ret) goto out_quota_unlock; rblocks = RES_DINODE + ind_blocks; if (gfs2_is_jdata(ip)) rblocks += data_blocks ? data_blocks : 1; if (ind_blocks || data_blocks) { rblocks += RES_STATFS + RES_QUOTA; rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); } ret = gfs2_trans_begin(sdp, rblocks, 0); if (ret) goto out_trans_fail; lock_page(page); ret = -EINVAL; size = i_size_read(inode); last_index = (size - 1) >> PAGE_CACHE_SHIFT; /* Check page index against inode size */ if (size == 0 || (page->index > last_index)) goto out_trans_end; ret = -EAGAIN; /* If truncated, we must retry the operation, we may have raced * with the glock demotion code. */ if (!PageUptodate(page) || page->mapping != inode->i_mapping) goto out_trans_end; /* Unstuff, if required, and allocate backing blocks for page */ ret = 0; if (gfs2_is_stuffed(ip)) ret = gfs2_unstuff_dinode(ip, page); if (ret == 0) ret = gfs2_allocate_page_backing(page); out_trans_end: if (ret) unlock_page(page); gfs2_trans_end(sdp); out_trans_fail: gfs2_inplace_release(ip); out_quota_unlock: gfs2_quota_unlock(ip); out_unlock: gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); if (ret == 0) { set_page_dirty(page); wait_for_stable_page(page); } out_write_access: put_write_access(inode); out: sb_end_pagefault(inode->i_sb); return block_page_mkwrite_return(ret); } static const struct vm_operations_struct gfs2_vm_ops = { .fault = filemap_fault, .page_mkwrite = gfs2_page_mkwrite, }; /** * gfs2_mmap - * @file: The file to map * @vma: The VMA which described the mapping * * There is no need to get a lock here unless we should be updating * atime. We ignore any locking errors since the only consequence is * a missed atime update (which will just be deferred until later). * * Returns: 0 */ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) { struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); if (!(file->f_flags & O_NOATIME) && !IS_NOATIME(&ip->i_inode)) { struct gfs2_holder i_gh; int error; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return error; /* grab lock to update inode */ gfs2_glock_dq_uninit(&i_gh); file_accessed(file); } vma->vm_ops = &gfs2_vm_ops; vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } /** * gfs2_open - open a file * @inode: the inode to open * @file: the struct file for this opening * * Returns: errno */ static int gfs2_open(struct inode *inode, struct file *file) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder i_gh; struct gfs2_file *fp; int error; fp = kzalloc(sizeof(struct gfs2_file), GFP_KERNEL); if (!fp) return -ENOMEM; mutex_init(&fp->f_fl_mutex); gfs2_assert_warn(GFS2_SB(inode), !file->private_data); file->private_data = fp; if (S_ISREG(ip->i_inode.i_mode)) { error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) goto fail; if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) { error = -EOVERFLOW; goto fail_gunlock; } gfs2_glock_dq_uninit(&i_gh); } return 0; fail_gunlock: gfs2_glock_dq_uninit(&i_gh); fail: file->private_data = NULL; kfree(fp); return error; } /** * gfs2_release - called to close a struct file * @inode: the inode the struct file belongs to * @file: the struct file being closed * * Returns: errno */ static int gfs2_release(struct inode *inode, struct file *file) { struct gfs2_inode *ip = GFS2_I(inode); kfree(file->private_data); file->private_data = NULL; if (!(file->f_mode & FMODE_WRITE)) return 0; gfs2_rs_delete(ip); return 0; } /** * gfs2_fsync - sync the dirty data for a file (across the cluster) * @file: the file that points to the dentry * @start: the start position in the file to sync * @end: the end position in the file to sync * @datasync: set if we can ignore timestamp changes * * The VFS will flush data for us. We only need to worry * about metadata here. * * Returns: errno */ static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; int sync_state = inode->i_state & I_DIRTY; struct gfs2_inode *ip = GFS2_I(inode); int ret; if (!gfs2_is_jdata(ip)) sync_state &= ~I_DIRTY_PAGES; if (datasync) sync_state &= ~I_DIRTY_SYNC; if (sync_state) { ret = sync_inode_metadata(inode, 1); if (ret) return ret; if (gfs2_is_jdata(ip)) filemap_write_and_wait(inode->i_mapping); gfs2_ail_flush(ip->i_gl, 1); } return 0; } /** * gfs2_file_aio_write - Perform a write to a file * @iocb: The io context * @iov: The data to write * @nr_segs: Number of @iov segments * @pos: The file position * * We have to do a lock/unlock here to refresh the inode size for * O_APPEND writes, otherwise we can land up writing at the wrong * offset. There is still a race, but provided the app is using its * own file locking, this will make O_APPEND work as expected. * */ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; size_t writesize = iov_length(iov, nr_segs); struct dentry *dentry = file->f_dentry; struct gfs2_inode *ip = GFS2_I(dentry->d_inode); struct gfs2_sbd *sdp; int ret; sdp = GFS2_SB(file->f_mapping->host); ret = gfs2_rs_alloc(ip); if (ret) return ret; gfs2_size_hint(file->f_dentry->d_inode, pos, writesize); if (file->f_flags & O_APPEND) { struct gfs2_holder gh; ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); if (ret) return ret; gfs2_glock_dq_uninit(&gh); } return generic_file_aio_write(iocb, iov, nr_segs, pos); } static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { int error; struct inode *inode = out->f_mapping->host; struct gfs2_inode *ip = GFS2_I(inode); error = gfs2_rs_alloc(ip); if (error) return (ssize_t)error; gfs2_size_hint(inode, *ppos, len); return generic_file_splice_write(pipe, out, ppos, len, flags); }
static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; struct inode *inode = file_inode(vma->vm_file); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned long last_index; u64 pos = page->index << PAGE_CACHE_SHIFT; unsigned int data_blocks, ind_blocks, rblocks; struct gfs2_holder gh; loff_t size; int ret; sb_start_pagefault(inode->i_sb); /* Update file times before taking page lock */ file_update_time(vma->vm_file); ret = get_write_access(inode); if (ret) goto out; ret = gfs2_rs_alloc(ip); if (ret) goto out_write_access; gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret) goto out_uninit; set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); set_bit(GIF_SW_PAGED, &ip->i_flags); if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE)) { lock_page(page); if (!PageUptodate(page) || page->mapping != inode->i_mapping) { ret = -EAGAIN; unlock_page(page); } goto out_unlock; } ret = gfs2_rindex_update(sdp); if (ret) goto out_unlock; ret = gfs2_quota_lock_check(ip); if (ret) goto out_unlock; gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); ap.target = data_blocks + ind_blocks; ret = gfs2_inplace_reserve(ip, &ap); if (ret) goto out_quota_unlock; rblocks = RES_DINODE + ind_blocks; if (gfs2_is_jdata(ip)) rblocks += data_blocks ? data_blocks : 1; if (ind_blocks || data_blocks) { rblocks += RES_STATFS + RES_QUOTA; rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); } ret = gfs2_trans_begin(sdp, rblocks, 0); if (ret) goto out_trans_fail; lock_page(page); ret = -EINVAL; size = i_size_read(inode); last_index = (size - 1) >> PAGE_CACHE_SHIFT; /* Check page index against inode size */ if (size == 0 || (page->index > last_index)) goto out_trans_end; ret = -EAGAIN; /* If truncated, we must retry the operation, we may have raced * with the glock demotion code. */ if (!PageUptodate(page) || page->mapping != inode->i_mapping) goto out_trans_end; /* Unstuff, if required, and allocate backing blocks for page */ ret = 0; if (gfs2_is_stuffed(ip)) ret = gfs2_unstuff_dinode(ip, page); if (ret == 0) ret = gfs2_allocate_page_backing(page); out_trans_end: if (ret) unlock_page(page); gfs2_trans_end(sdp); out_trans_fail: gfs2_inplace_release(ip); out_quota_unlock: gfs2_quota_unlock(ip); out_unlock: gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); if (ret == 0) { set_page_dirty(page); wait_for_stable_page(page); } out_write_access: put_write_access(inode); out: sb_end_pagefault(inode->i_sb); return block_page_mkwrite_return(ret); } static const struct vm_operations_struct gfs2_vm_ops = { .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = gfs2_page_mkwrite, .remap_pages = generic_file_remap_pages, }; /** * gfs2_mmap - * @file: The file to map * @vma: The VMA which described the mapping * * There is no need to get a lock here unless we should be updating * atime. We ignore any locking errors since the only consequence is * a missed atime update (which will just be deferred until later). * * Returns: 0 */ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) { struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); if (!(file->f_flags & O_NOATIME) && !IS_NOATIME(&ip->i_inode)) { struct gfs2_holder i_gh; int error; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return error; /* grab lock to update inode */ gfs2_glock_dq_uninit(&i_gh); file_accessed(file); } vma->vm_ops = &gfs2_vm_ops; return 0; } /** * gfs2_open_common - This is common to open and atomic_open * @inode: The inode being opened * @file: The file being opened * * This maybe called under a glock or not depending upon how it has * been called. We must always be called under a glock for regular * files, however. For other file types, it does not matter whether * we hold the glock or not. * * Returns: Error code or 0 for success */ int gfs2_open_common(struct inode *inode, struct file *file) { struct gfs2_file *fp; int ret; if (S_ISREG(inode->i_mode)) { ret = generic_file_open(inode, file); if (ret) return ret; } fp = kzalloc(sizeof(struct gfs2_file), GFP_NOFS); if (!fp) return -ENOMEM; mutex_init(&fp->f_fl_mutex); gfs2_assert_warn(GFS2_SB(inode), !file->private_data); file->private_data = fp; return 0; } /** * gfs2_open - open a file * @inode: the inode to open * @file: the struct file for this opening * * After atomic_open, this function is only used for opening files * which are already cached. We must still get the glock for regular * files to ensure that we have the file size uptodate for the large * file check which is in the common code. That is only an issue for * regular files though. * * Returns: errno */ static int gfs2_open(struct inode *inode, struct file *file) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder i_gh; int error; bool need_unlock = false; if (S_ISREG(ip->i_inode.i_mode)) { error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return error; need_unlock = true; } error = gfs2_open_common(inode, file); if (need_unlock) gfs2_glock_dq_uninit(&i_gh); return error; } /** * gfs2_release - called to close a struct file * @inode: the inode the struct file belongs to * @file: the struct file being closed * * Returns: errno */ static int gfs2_release(struct inode *inode, struct file *file) { struct gfs2_inode *ip = GFS2_I(inode); kfree(file->private_data); file->private_data = NULL; if (!(file->f_mode & FMODE_WRITE)) return 0; gfs2_rs_delete(ip, &inode->i_writecount); return 0; } /** * gfs2_fsync - sync the dirty data for a file (across the cluster) * @file: the file that points to the dentry * @start: the start position in the file to sync * @end: the end position in the file to sync * @datasync: set if we can ignore timestamp changes * * We split the data flushing here so that we don't wait for the data * until after we've also sent the metadata to disk. Note that for * data=ordered, we will write & wait for the data at the log flush * stage anyway, so this is unlikely to make much of a difference * except in the data=writeback case. * * If the fdatawrite fails due to any reason except -EIO, we will * continue the remainder of the fsync, although we'll still report * the error at the end. This is to match filemap_write_and_wait_range() * behaviour. * * Returns: errno */ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; int sync_state = inode->i_state & I_DIRTY; struct gfs2_inode *ip = GFS2_I(inode); int ret = 0, ret1 = 0; if (mapping->nrpages) { ret1 = filemap_fdatawrite_range(mapping, start, end); if (ret1 == -EIO) return ret1; } if (!gfs2_is_jdata(ip)) sync_state &= ~I_DIRTY_PAGES; if (datasync) sync_state &= ~I_DIRTY_SYNC; if (sync_state) { ret = sync_inode_metadata(inode, 1); if (ret) return ret; if (gfs2_is_jdata(ip)) filemap_write_and_wait(mapping); gfs2_ail_flush(ip->i_gl, 1); } if (mapping->nrpages) ret = filemap_fdatawait_range(mapping, start, end); return ret ? ret : ret1; } /** * gfs2_file_aio_write - Perform a write to a file * @iocb: The io context * @iov: The data to write * @nr_segs: Number of @iov segments * @pos: The file position * * We have to do a lock/unlock here to refresh the inode size for * O_APPEND writes, otherwise we can land up writing at the wrong * offset. There is still a race, but provided the app is using its * own file locking, this will make O_APPEND work as expected. * */ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; size_t writesize = iov_length(iov, nr_segs); struct gfs2_inode *ip = GFS2_I(file_inode(file)); int ret; ret = gfs2_rs_alloc(ip); if (ret) return ret; gfs2_size_hint(file, pos, writesize); if (file->f_flags & O_APPEND) { struct gfs2_holder gh; ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); if (ret) return ret; gfs2_glock_dq_uninit(&gh); } return generic_file_aio_write(iocb, iov, nr_segs, pos); } static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, int mode) { struct gfs2_inode *ip = GFS2_I(inode); struct buffer_head *dibh; int error; loff_t size = len; unsigned int nr_blks; sector_t lblock = offset >> inode->i_blkbits; error = gfs2_meta_inode_buffer(ip, &dibh); if (unlikely(error)) return error; gfs2_trans_add_meta(ip->i_gl, dibh); if (gfs2_is_stuffed(ip)) { error = gfs2_unstuff_dinode(ip, NULL); if (unlikely(error)) goto out; } while (len) { struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; bh_map.b_size = len; set_buffer_zeronew(&bh_map); error = gfs2_block_map(inode, lblock, &bh_map, 1); if (unlikely(error)) goto out; len -= bh_map.b_size; nr_blks = bh_map.b_size >> inode->i_blkbits; lblock += nr_blks; if (!buffer_new(&bh_map)) continue; if (unlikely(!buffer_zeronew(&bh_map))) { error = -EIO; goto out; } } if (offset + size > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE)) i_size_write(inode, offset + size); mark_inode_dirty(inode); out: brelse(dibh); return error; } static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, unsigned int *data_blocks, unsigned int *ind_blocks) { const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); unsigned int max_blocks = ip->i_rgd->rd_free_clone; unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); for (tmp = max_data; tmp > sdp->sd_diptrs;) { tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); max_data -= tmp; } /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, so it might end up with fewer data blocks */ if (max_data <= *data_blocks) return; *data_blocks = max_data; *ind_blocks = max_blocks - max_data; *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; if (*len > max) { *len = max; gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks); } } static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned int data_blocks = 0, ind_blocks = 0, rblocks; loff_t bytes, max_bytes; int error; const loff_t pos = offset; const loff_t count = len; loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; loff_t max_chunk_size = UINT_MAX & bsize_mask; struct gfs2_holder gh; next = (next + 1) << sdp->sd_sb.sb_bsize_shift; /* We only support the FALLOC_FL_KEEP_SIZE mode */ if (mode & ~FALLOC_FL_KEEP_SIZE) return -EOPNOTSUPP; offset &= bsize_mask; len = next - offset; bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; if (!bytes) bytes = UINT_MAX; bytes &= bsize_mask; if (bytes == 0) bytes = sdp->sd_sb.sb_bsize; error = gfs2_rs_alloc(ip); if (error) return error; mutex_lock(&inode->i_mutex); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); error = gfs2_glock_nq(&gh); if (unlikely(error)) goto out_uninit; gfs2_size_hint(file, offset, len); while (len > 0) { if (len < bytes) bytes = len; if (!gfs2_write_alloc_required(ip, offset, bytes)) { len -= bytes; offset += bytes; continue; } error = gfs2_quota_lock_check(ip); if (error) goto out_unlock; retry: gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); ap.target = data_blocks + ind_blocks; error = gfs2_inplace_reserve(ip, &ap); if (error) { if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { bytes >>= 1; bytes &= bsize_mask; if (bytes == 0) bytes = sdp->sd_sb.sb_bsize; goto retry; } goto out_qunlock; } max_bytes = bytes; calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len, &max_bytes, &data_blocks, &ind_blocks); rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks); if (gfs2_is_jdata(ip)) rblocks += data_blocks ? data_blocks : 1; error = gfs2_trans_begin(sdp, rblocks, PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); if (error) goto out_trans_fail; error = fallocate_chunk(inode, offset, max_bytes, mode); gfs2_trans_end(sdp); if (error) goto out_trans_fail; len -= max_bytes; offset += max_bytes; gfs2_inplace_release(ip); gfs2_quota_unlock(ip); } if (error == 0) error = generic_write_sync(file, pos, count); goto out_unlock; out_trans_fail: gfs2_inplace_release(ip); out_qunlock: gfs2_quota_unlock(ip); out_unlock: gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); mutex_unlock(&inode->i_mutex); return error; } #ifdef CONFIG_GFS2_FS_LOCKING_DLM /** * gfs2_setlease - acquire/release a file lease * @file: the file pointer * @arg: lease type * @fl: file lock * * We don't currently have a way to enforce a lease across the whole * cluster; until we do, disable leases (by just returning -EINVAL), * unless the administrator has requested purely local locking. * * Locking: called under i_lock * * Returns: errno */ static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl) { return -EINVAL; } /** * gfs2_lock - acquire/release a posix lock on a file * @file: the file pointer * @cmd: either modify or retrieve lock state, possibly wait * @fl: type and range of lock * * Returns: errno */ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) { struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); struct lm_lockstruct *ls = &sdp->sd_lockstruct; if (!(fl->fl_flags & FL_POSIX)) return -ENOLCK; if (__mandatory_lock(&ip->i_inode) && fl->fl_type != F_UNLCK) return -ENOLCK; if (cmd == F_CANCELLK) { /* Hack: */ cmd = F_SETLK; fl->fl_type = F_UNLCK; } if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { if (fl->fl_type == F_UNLCK) posix_lock_file_wait(file, fl); return -EIO; } if (IS_GETLK(cmd)) return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl); else if (fl->fl_type == F_UNLCK) return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl); else return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl); } static int do_flock(struct file *file, int cmd, struct file_lock *fl) { struct gfs2_file *fp = file->private_data; struct gfs2_holder *fl_gh = &fp->f_fl_gh; struct gfs2_inode *ip = GFS2_I(file_inode(file)); struct gfs2_glock *gl; unsigned int state; int flags; int error = 0; state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE; mutex_lock(&fp->f_fl_mutex); gl = fl_gh->gh_gl; if (gl) { if (fl_gh->gh_state == state) goto out; flock_lock_file_wait(file, &(struct file_lock){.fl_type = F_UNLCK}); gfs2_glock_dq_wait(fl_gh); gfs2_holder_reinit(state, flags, fl_gh); } else { error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr, &gfs2_flock_glops, CREATE, &gl); if (error) goto out; gfs2_holder_init(gl, state, flags, fl_gh); gfs2_glock_put(gl); } error = gfs2_glock_nq(fl_gh); if (error) { gfs2_holder_uninit(fl_gh); if (error == GLR_TRYFAILED) error = -EAGAIN; } else { error = flock_lock_file_wait(file, fl); gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); } out: mutex_unlock(&fp->f_fl_mutex); return error; }
static void ecryptfs_vma_close(struct vm_area_struct *vma) { filemap_write_and_wait(vma->vm_file->f_mapping); }
/* * Initialize a new device for device replace target from a given source dev * and path. * * Return 0 and new device in @device_out, otherwise return < 0 */ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, const char *device_path, struct btrfs_device *srcdev, struct btrfs_device **device_out) { struct btrfs_device *device; struct block_device *bdev; struct list_head *devices; struct rcu_string *name; u64 devid = BTRFS_DEV_REPLACE_DEVID; int ret = 0; *device_out = NULL; if (fs_info->fs_devices->seeding) { btrfs_err(fs_info, "the filesystem is a seed filesystem!"); return -EINVAL; } bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, fs_info->bdev_holder); if (IS_ERR(bdev)) { btrfs_err(fs_info, "target device %s is invalid!", device_path); return PTR_ERR(bdev); } filemap_write_and_wait(bdev->bd_inode->i_mapping); devices = &fs_info->fs_devices->devices; list_for_each_entry(device, devices, dev_list) { if (device->bdev == bdev) { btrfs_err(fs_info, "target device is in the filesystem!"); ret = -EEXIST; goto error; } } if (i_size_read(bdev->bd_inode) < btrfs_device_get_total_bytes(srcdev)) { btrfs_err(fs_info, "target device is smaller than source device!"); ret = -EINVAL; goto error; } device = btrfs_alloc_device(NULL, &devid, NULL); if (IS_ERR(device)) { ret = PTR_ERR(device); goto error; } name = rcu_string_strdup(device_path, GFP_KERNEL); if (!name) { btrfs_free_device(device); ret = -ENOMEM; goto error; } rcu_assign_pointer(device->name, name); mutex_lock(&fs_info->fs_devices->device_list_mutex); set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); device->generation = 0; device->io_width = fs_info->sectorsize; device->io_align = fs_info->sectorsize; device->sector_size = fs_info->sectorsize; device->total_bytes = btrfs_device_get_total_bytes(srcdev); device->disk_total_bytes = btrfs_device_get_disk_total_bytes(srcdev); device->bytes_used = btrfs_device_get_bytes_used(srcdev); device->commit_total_bytes = srcdev->commit_total_bytes; device->commit_bytes_used = device->bytes_used; device->fs_info = fs_info; device->bdev = bdev; set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state); device->mode = FMODE_EXCL; device->dev_stats_valid = 1; set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE); device->fs_devices = fs_info->fs_devices; list_add(&device->dev_list, &fs_info->fs_devices->devices); fs_info->fs_devices->num_devices++; fs_info->fs_devices->open_devices++; mutex_unlock(&fs_info->fs_devices->device_list_mutex); *device_out = device; return 0; error: blkdev_put(bdev, FMODE_EXCL); return ret; }
/* * Called at inode eviction from icache */ void ext3_evict_inode (struct inode *inode) { struct ext3_inode_info *ei = EXT3_I(inode); struct ext3_block_alloc_info *rsv; handle_t *handle; int want_delete = 0; trace_ext3_evict_inode(inode); if (!inode->i_nlink && !is_bad_inode(inode)) { dquot_initialize(inode); want_delete = 1; } /* * When journalling data dirty buffers are tracked only in the journal. * So although mm thinks everything is clean and ready for reaping the * inode might still have some pages to write in the running * transaction or waiting to be checkpointed. Thus calling * journal_invalidatepage() (via truncate_inode_pages()) to discard * these buffers can cause data loss. Also even if we did not discard * these buffers, we would have no way to find them after the inode * is reaped and thus user could see stale data if he tries to read * them before the transaction is checkpointed. So be careful and * force everything to disk here... We use ei->i_datasync_tid to * store the newest transaction containing inode's data. * * Note that directories do not have this problem because they don't * use page cache. * * The s_journal check handles the case when ext3_get_journal() fails * and puts the journal inode. */ if (inode->i_nlink && ext3_should_journal_data(inode) && EXT3_SB(inode->i_sb)->s_journal && (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && inode->i_ino != EXT3_JOURNAL_INO) { tid_t commit_tid = atomic_read(&ei->i_datasync_tid); journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; log_start_commit(journal, commit_tid); log_wait_commit(journal, commit_tid); filemap_write_and_wait(&inode->i_data); } truncate_inode_pages(&inode->i_data, 0); ext3_discard_reservation(inode); rsv = ei->i_block_alloc_info; ei->i_block_alloc_info = NULL; if (unlikely(rsv)) kfree(rsv); if (!want_delete) goto no_delete; handle = start_transaction(inode); if (IS_ERR(handle)) { /* * If we're going to skip the normal cleanup, we still need to * make sure that the in-core orphan linked list is properly * cleaned up. */ ext3_orphan_del(NULL, inode); goto no_delete; } if (IS_SYNC(inode)) handle->h_sync = 1; inode->i_size = 0; if (inode->i_blocks) ext3_truncate(inode); /* * Kill off the orphan record created when the inode lost the last * link. Note that ext3_orphan_del() has to be able to cope with the * deletion of a non-existent orphan - ext3_truncate() could * have removed the record. */ ext3_orphan_del(handle, inode); ei->i_dtime = get_seconds(); /* * One subtle ordering requirement: if anything has gone wrong * (transaction abort, IO errors, whatever), then we can still * do these next steps (the fs will already have been marked as * having errors), but we can't free the inode if the mark_dirty * fails. */ if (ext3_mark_inode_dirty(handle, inode)) { /* If that failed, just dquot_drop() and be done with that */ dquot_drop(inode); clear_inode(inode); } else { ext3_xattr_delete_inode(handle, inode); dquot_free_inode(inode); dquot_drop(inode); clear_inode(inode); ext3_free_inode(handle, inode); } ext3_journal_stop(handle); return; no_delete: clear_inode(inode); dquot_drop(inode); }
static int ecryptfs_flush(struct file *file, fl_owner_t td) { return file->f_mode & FMODE_WRITE ? filemap_write_and_wait(file->f_mapping) : 0; }
int xfs_swap_extents( xfs_inode_t *ip, /* target inode */ xfs_inode_t *tip, /* tmp inode */ xfs_swapext_t *sxp) { xfs_mount_t *mp = ip->i_mount; xfs_trans_t *tp; xfs_bstat_t *sbp = &sxp->sx_stat; xfs_ifork_t *tempifp, *ifp, *tifp; int src_log_flags, target_log_flags; int error = 0; int aforkblks = 0; int taforkblks = 0; __uint64_t tmp; tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); if (!tempifp) { error = XFS_ERROR(ENOMEM); goto out; } /* * we have to do two separate lock calls here to keep lockdep * happy. If we try to get all the locks in one call, lock will * report false positives when we drop the ILOCK and regain them * below. */ xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); /* Verify that both files have the same format */ if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { error = XFS_ERROR(EINVAL); goto out_unlock; } /* Verify both files are either real-time or non-realtime */ if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { error = XFS_ERROR(EINVAL); goto out_unlock; } error = -filemap_write_and_wait(VFS_I(tip)->i_mapping); if (error) goto out_unlock; truncate_pagecache_range(VFS_I(tip), 0, -1); /* Verify O_DIRECT for ftmp */ if (VN_CACHED(VFS_I(tip)) != 0) { error = XFS_ERROR(EINVAL); goto out_unlock; } /* Verify all data are being swapped */ if (sxp->sx_offset != 0 || sxp->sx_length != ip->i_d.di_size || sxp->sx_length != tip->i_d.di_size) { error = XFS_ERROR(EFAULT); goto out_unlock; } trace_xfs_swap_extent_before(ip, 0); trace_xfs_swap_extent_before(tip, 1); /* check inode formats now that data is flushed */ error = xfs_swap_extents_check_format(ip, tip); if (error) { xfs_notice(mp, "%s: inode 0x%llx format is incompatible for exchanging.", __func__, ip->i_ino); goto out_unlock; } /* * Compare the current change & modify times with that * passed in. If they differ, we abort this swap. * This is the mechanism used to ensure the calling * process that the file was not changed out from * under it. */ if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) || (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) || (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) || (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) { error = XFS_ERROR(EBUSY); goto out_unlock; } /* We need to fail if the file is memory mapped. Once we have tossed * all existing pages, the page fault will have no option * but to go to the filesystem for pages. By making the page fault call * vop_read (or write in the case of autogrow) they block on the iolock * until we have switched the extents. */ if (VN_MAPPED(VFS_I(ip))) { error = XFS_ERROR(EBUSY); goto out_unlock; } xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(tip, XFS_ILOCK_EXCL); /* * There is a race condition here since we gave up the * ilock. However, the data fork will not change since * we have the iolock (locked for truncation too) so we * are safe. We don't really care if non-io related * fields change. */ truncate_pagecache_range(VFS_I(ip), 0, -1); tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); if (error) { xfs_iunlock(ip, XFS_IOLOCK_EXCL); xfs_iunlock(tip, XFS_IOLOCK_EXCL); xfs_trans_cancel(tp, 0); goto out; } xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); /* * Count the number of extended attribute blocks */ if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks); if (error) goto out_trans_cancel; } if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &taforkblks); if (error) goto out_trans_cancel; } xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); /* * Before we've swapped the forks, lets set the owners of the forks * appropriately. We have to do this as we are demand paging the btree * buffers, and so the validation done on read will expect the owner * field to be correctly set. Once we change the owners, we can swap the * inode forks. * * Note the trickiness in setting the log flags - we set the owner log * flag on the opposite inode (i.e. the inode we are setting the new * owner to be) because once we swap the forks and log that, log * recovery is going to see the fork as owned by the swapped inode, * not the pre-swapped inodes. */ src_log_flags = XFS_ILOG_CORE; target_log_flags = XFS_ILOG_CORE; if (ip->i_d.di_version == 3 && ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { target_log_flags |= XFS_ILOG_DOWNER; error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, tip->i_ino, NULL); if (error) goto out_trans_cancel; } if (tip->i_d.di_version == 3 && tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { src_log_flags |= XFS_ILOG_DOWNER; error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK, ip->i_ino, NULL); if (error) goto out_trans_cancel; } /* * Swap the data forks of the inodes */ ifp = &ip->i_df; tifp = &tip->i_df; *tempifp = *ifp; /* struct copy */ *ifp = *tifp; /* struct copy */ *tifp = *tempifp; /* struct copy */ /* * Fix the on-disk inode values */ tmp = (__uint64_t)ip->i_d.di_nblocks; ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; tmp = (__uint64_t) ip->i_d.di_nextents; ip->i_d.di_nextents = tip->i_d.di_nextents; tip->i_d.di_nextents = tmp; tmp = (__uint64_t) ip->i_d.di_format; ip->i_d.di_format = tip->i_d.di_format; tip->i_d.di_format = tmp; /* * The extents in the source inode could still contain speculative * preallocation beyond EOF (e.g. the file is open but not modified * while defrag is in progress). In that case, we need to copy over the * number of delalloc blocks the data fork in the source inode is * tracking beyond EOF so that when the fork is truncated away when the * temporary inode is unlinked we don't underrun the i_delayed_blks * counter on that inode. */ ASSERT(tip->i_delayed_blks == 0); tip->i_delayed_blks = ip->i_delayed_blks; ip->i_delayed_blks = 0; switch (ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the * pointer. Otherwise it's already NULL or * pointing to the extent. */ if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; } src_log_flags |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: ASSERT(ip->i_d.di_version < 3 || (src_log_flags & XFS_ILOG_DOWNER)); src_log_flags |= XFS_ILOG_DBROOT; break; } switch (tip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the * pointer. Otherwise it's already NULL or * pointing to the extent. */ if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext; } target_log_flags |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: target_log_flags |= XFS_ILOG_DBROOT; ASSERT(tip->i_d.di_version < 3 || (target_log_flags & XFS_ILOG_DOWNER)); break; } xfs_trans_log_inode(tp, ip, src_log_flags); xfs_trans_log_inode(tp, tip, target_log_flags); /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); trace_xfs_swap_extent_after(ip, 0); trace_xfs_swap_extent_after(tip, 1); out: kmem_free(tempifp); return error; out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); goto out; out_trans_cancel: xfs_trans_cancel(tp, 0); goto out_unlock; }
/* * Get inode's extents as described in bmv, and format for output. * Calls formatter to fill the user's buffer until all extents * are mapped, until the passed-in bmv->bmv_count slots have * been filled, or until the formatter short-circuits the loop, * if it is tracking filled-in extents on its own. */ int /* error code */ xfs_getbmap( xfs_inode_t *ip, struct getbmapx *bmv, /* user bmap structure */ xfs_bmap_format_t formatter, /* format to user */ void *arg) /* formatter arg */ { __int64_t bmvend; /* last block requested */ int error = 0; /* return value */ __int64_t fixlen; /* length for -1 case */ int i; /* extent number */ int lock; /* lock state */ xfs_bmbt_irec_t *map; /* buffer for user's data */ xfs_mount_t *mp; /* file system mount point */ int nex; /* # of user extents can do */ int nexleft; /* # of user extents left */ int subnex; /* # of bmapi's can do */ int nmap; /* number of map entries */ struct getbmapx *out; /* output structure */ int whichfork; /* data or attr fork */ int prealloced; /* this is a file with * preallocated data space */ int iflags; /* interface flags */ int bmapi_flags; /* flags for xfs_bmapi */ int cur_ext = 0; mp = ip->i_mount; iflags = bmv->bmv_iflags; whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; if (whichfork == XFS_ATTR_FORK) { if (XFS_IFORK_Q(ip)) { if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE && ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) return XFS_ERROR(EINVAL); } else if (unlikely( ip->i_d.di_aformat != 0 && ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) { XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW, ip->i_mount); return XFS_ERROR(EFSCORRUPTED); } prealloced = 0; fixlen = 1LL << 32; } else { if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && ip->i_d.di_format != XFS_DINODE_FMT_BTREE && ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) return XFS_ERROR(EINVAL); if (xfs_get_extsz_hint(ip) || ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ prealloced = 1; fixlen = mp->m_super->s_maxbytes; } else { prealloced = 0; fixlen = XFS_ISIZE(ip); } } if (bmv->bmv_length == -1) { fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen)); bmv->bmv_length = max_t(__int64_t, fixlen - bmv->bmv_offset, 0); } else if (bmv->bmv_length == 0) { bmv->bmv_entries = 0; return 0; } else if (bmv->bmv_length < 0) { return XFS_ERROR(EINVAL); } nex = bmv->bmv_count - 1; if (nex <= 0) return XFS_ERROR(EINVAL); bmvend = bmv->bmv_offset + bmv->bmv_length; if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) return XFS_ERROR(ENOMEM); out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0); if (!out) return XFS_ERROR(ENOMEM); xfs_ilock(ip, XFS_IOLOCK_SHARED); if (whichfork == XFS_DATA_FORK) { if (!(iflags & BMV_IF_DELALLOC) && (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { error = -filemap_write_and_wait(VFS_I(ip)->i_mapping); if (error) goto out_unlock_iolock; /* * Even after flushing the inode, there can still be * delalloc blocks on the inode beyond EOF due to * speculative preallocation. These are not removed * until the release function is called or the inode * is inactivated. Hence we cannot assert here that * ip->i_delayed_blks == 0. */ } lock = xfs_ilock_data_map_shared(ip); } else { lock = xfs_ilock_attr_map_shared(ip); } /* * Don't let nex be bigger than the number of extents * we can have assuming alternating holes and real extents. */ if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1) nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; bmapi_flags = xfs_bmapi_aflag(whichfork); if (!(iflags & BMV_IF_PREALLOC)) bmapi_flags |= XFS_BMAPI_IGSTATE; /* * Allocate enough space to handle "subnex" maps at a time. */ error = ENOMEM; subnex = 16; map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS); if (!map) goto out_unlock_ilock; bmv->bmv_entries = 0; if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 && (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) { error = 0; goto out_free_map; } nexleft = nex; do { nmap = (nexleft > subnex) ? subnex : nexleft; error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), XFS_BB_TO_FSB(mp, bmv->bmv_length), map, &nmap, bmapi_flags); if (error) goto out_free_map; ASSERT(nmap <= subnex); for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) { out[cur_ext].bmv_oflags = 0; if (map[i].br_state == XFS_EXT_UNWRITTEN) out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; else if (map[i].br_startblock == DELAYSTARTBLOCK) out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC; out[cur_ext].bmv_offset = XFS_FSB_TO_BB(mp, map[i].br_startoff); out[cur_ext].bmv_length = XFS_FSB_TO_BB(mp, map[i].br_blockcount); out[cur_ext].bmv_unused1 = 0; out[cur_ext].bmv_unused2 = 0; /* * delayed allocation extents that start beyond EOF can * occur due to speculative EOF allocation when the * delalloc extent is larger than the largest freespace * extent at conversion time. These extents cannot be * converted by data writeback, so can exist here even * if we are not supposed to be finding delalloc * extents. */ if (map[i].br_startblock == DELAYSTARTBLOCK && map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) ASSERT((iflags & BMV_IF_DELALLOC) != 0); if (map[i].br_startblock == HOLESTARTBLOCK && whichfork == XFS_ATTR_FORK) { /* came to the end of attribute fork */ out[cur_ext].bmv_oflags |= BMV_OF_LAST; goto out_free_map; } if (!xfs_getbmapx_fix_eof_hole(ip, &out[cur_ext], prealloced, bmvend, map[i].br_startblock)) goto out_free_map; bmv->bmv_offset = out[cur_ext].bmv_offset + out[cur_ext].bmv_length; bmv->bmv_length = max_t(__int64_t, 0, bmvend - bmv->bmv_offset); /* * In case we don't want to return the hole, * don't increase cur_ext so that we can reuse * it in the next loop. */ if ((iflags & BMV_IF_NO_HOLES) && map[i].br_startblock == HOLESTARTBLOCK) { memset(&out[cur_ext], 0, sizeof(out[cur_ext])); continue; } nexleft--; bmv->bmv_entries++; cur_ext++; } } while (nmap && nexleft && bmv->bmv_length); out_free_map: kmem_free(map); out_unlock_ilock: xfs_iunlock(ip, lock); out_unlock_iolock: xfs_iunlock(ip, XFS_IOLOCK_SHARED); for (i = 0; i < cur_ext; i++) { int full = 0; /* user array is full */ /* format results & advance arg */ error = formatter(&arg, &out[i], &full); if (error || full) break; } kmem_free(out); return error; }
/* * NAME: jfs_umount(vfsp, flags, crp) * * FUNCTION: vfs_umount() * * PARAMETERS: vfsp - virtual file system pointer * flags - unmount for shutdown * crp - credential * * RETURN : EBUSY - device has open files */ int jfs_umount(struct super_block *sb) { struct jfs_sb_info *sbi = JFS_SBI(sb); struct inode *ipbmap = sbi->ipbmap; struct inode *ipimap = sbi->ipimap; struct inode *ipaimap = sbi->ipaimap; struct inode *ipaimap2 = sbi->ipaimap2; struct jfs_log *log; int rc = 0; jfs_info("UnMount JFS: sb:0x%p", sb); /* * update superblock and close log * * if mounted read-write and log based recovery was enabled */ if ((log = sbi->log)) /* * Wait for outstanding transactions to be written to log: */ jfs_flush_journal(log, 1); /* * close fileset inode allocation map (aka fileset inode) */ diUnmount(ipimap, 0); diFreeSpecial(ipimap); sbi->ipimap = NULL; /* * close secondary aggregate inode allocation map */ ipaimap2 = sbi->ipaimap2; if (ipaimap2) { diUnmount(ipaimap2, 0); diFreeSpecial(ipaimap2); sbi->ipaimap2 = NULL; } /* * close aggregate inode allocation map */ ipaimap = sbi->ipaimap; diUnmount(ipaimap, 0); diFreeSpecial(ipaimap); sbi->ipaimap = NULL; /* * close aggregate block allocation map */ dbUnmount(ipbmap, 0); diFreeSpecial(ipbmap); sbi->ipimap = NULL; /* * Make sure all metadata makes it to disk before we mark * the superblock as clean */ filemap_write_and_wait(sbi->direct_inode->i_mapping); /* * ensure all file system file pages are propagated to their * home blocks on disk (and their in-memory buffer pages are * invalidated) BEFORE updating file system superblock state * (to signify file system is unmounted cleanly, and thus in * consistent state) and log superblock active file system * list (to signify skip logredo()). */ if (log) { /* log = NULL if read-only mount */ updateSuper(sb, FM_CLEAN); /* * close log: * * remove file system from log active file system list. */ rc = lmLogClose(sb); } jfs_info("UnMount JFS Complete: rc = %d", rc); return rc; }