static void rgrp_go_inval(struct gfs2_glock *gl, int flags) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct address_space *mapping = &sdp->sd_aspace; struct gfs2_rgrpd *rgd = gl->gl_object; if (rgd) gfs2_rgrp_brelse(rgd); WARN_ON_ONCE(!(flags & DIO_METADATA)); gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); truncate_inode_pages_range(mapping, gl->gl_vm.start, gl->gl_vm.end); if (rgd) rgd->rd_flags &= ~GFS2_RDF_UPTODATE; }
int xfs_flushinval_pages( xfs_inode_t *ip, xfs_off_t first, xfs_off_t last, int fiopt) { struct address_space *mapping = VFS_I(ip)->i_mapping; int ret = 0; trace_xfs_pagecache_inval(ip, first, last); xfs_iflags_clear(ip, XFS_ITRUNCATED); ret = filemap_write_and_wait_range(mapping, first, last == -1 ? LLONG_MAX : last); if (!ret) truncate_inode_pages_range(mapping, first, last); return -ret; }
/** * truncate_pagecache_range - unmap and remove pagecache that is hole-punched * @inode: inode * @lstart: offset of beginning of hole * @lend: offset of last byte of hole * * This function should typically be called before the filesystem * releases resources associated with the freed range (eg. deallocates * blocks). This way, pagecache will always stay logically coherent * with on-disk format, and the filesystem would not have to deal with * situations such as writepage being called for a page that has already * had its underlying blocks deallocated. */ void truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend) { struct address_space *mapping = inode->i_mapping; loff_t unmap_start = round_up(lstart, PAGE_SIZE); loff_t unmap_end = round_down(1 + lend, PAGE_SIZE) - 1; /* * This rounding is currently just for example: unmap_mapping_range * expands its hole outwards, whereas we want it to contract the hole * inwards. However, existing callers of truncate_pagecache_range are * doing their own page rounding first. Note that unmap_mapping_range * allows holelen 0 for all, and we allow lend -1 for end of file. */ /* * Unlike in truncate_pagecache, unmap_mapping_range is called only * once (before truncating pagecache), and without "even_cows" flag: * hole-punching should not remove private COWed pages from the hole. */ if ((u64)unmap_end > (u64)unmap_start) unmap_mapping_range(mapping, unmap_start, 1 + unmap_end - unmap_start, 0); truncate_inode_pages_range(mapping, lstart, lend); }
static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct address_space *mapping = inode->i_mapping; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int rv; /* * Deferred lock, even if its a write, since we do no allocation * on this path. All we need change is atime, and this lock mode * ensures that other nodes have flushed their buffered read caches * (i.e. their page cache entries for this inode). We do not, * unfortunately have the option of only flushing a range like * the VFS does. */ gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); rv = gfs2_glock_nq(&gh); if (rv) return rv; rv = gfs2_ok_for_dio(ip, rw, offset); if (rv != 1) goto out; /* dio not valid, fall back to buffered i/o */ /* * Now since we are holding a deferred (CW) lock at this point, you * might be wondering why this is ever needed. There is a case however * where we've granted a deferred local lock against a cached exclusive * glock. That is ok provided all granted local locks are deferred, but * it also means that it is possible to encounter pages which are * cached and possibly also mapped. So here we check for that and sort * them out ahead of the dio. The glock state machine will take care of * everything else. * * If in fact the cached glock state (gl->gl_state) is deferred (CW) in * the first place, mapping->nr_pages will always be zero. */ if (mapping->nrpages) { loff_t lstart = offset & (PAGE_CACHE_SIZE - 1); loff_t len = iov_length(iov, nr_segs); loff_t end = PAGE_ALIGN(offset + len) - 1; rv = 0; if (len == 0) goto out; if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len); rv = filemap_write_and_wait_range(mapping, lstart, end); if (rv) goto out; if (rw == WRITE) truncate_inode_pages_range(mapping, lstart, end); } rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, gfs2_get_block_direct, NULL, NULL, 0); out: gfs2_glock_dq(&gh); gfs2_holder_uninit(&gh); return rv; }
int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) { struct list_head inode_list, tmp_inode_list; struct list_head dir_list; int err; int ret = 0; unsigned long s_flags = sbi->sb->s_flags; bool need_writecp = false; #ifdef CONFIG_QUOTA int quota_enabled; #endif if (s_flags & SB_RDONLY) { f2fs_msg(sbi->sb, KERN_INFO, "recover fsync data on readonly fs"); sbi->sb->s_flags &= ~SB_RDONLY; } #ifdef CONFIG_QUOTA /* Needed for iput() to work correctly and not trash data */ sbi->sb->s_flags |= SB_ACTIVE; /* Turn on quotas so that they are updated correctly */ quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY); #endif fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", sizeof(struct fsync_inode_entry)); if (!fsync_entry_slab) { err = -ENOMEM; goto out; } INIT_LIST_HEAD(&inode_list); INIT_LIST_HEAD(&tmp_inode_list); INIT_LIST_HEAD(&dir_list); /* prevent checkpoint */ mutex_lock(&sbi->cp_mutex); /* step #1: find fsynced inode numbers */ err = find_fsync_dnodes(sbi, &inode_list, check_only); if (err || list_empty(&inode_list)) goto skip; if (check_only) { ret = 1; goto skip; } need_writecp = true; /* step #2: recover data */ err = recover_data(sbi, &inode_list, &tmp_inode_list, &dir_list); if (!err) f2fs_bug_on(sbi, !list_empty(&inode_list)); else { /* restore s_flags to let iput() trash data */ sbi->sb->s_flags = s_flags; } skip: destroy_fsync_dnodes(&inode_list, err); destroy_fsync_dnodes(&tmp_inode_list, err); /* truncate meta pages to be used by the recovery */ truncate_inode_pages_range(META_MAPPING(sbi), (loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, -1); if (err) { truncate_inode_pages_final(NODE_MAPPING(sbi)); truncate_inode_pages_final(META_MAPPING(sbi)); } else { clear_sbi_flag(sbi, SBI_POR_DOING); } mutex_unlock(&sbi->cp_mutex); /* let's drop all the directory inodes for clean checkpoint */ destroy_fsync_dnodes(&dir_list, err); if (need_writecp) { set_sbi_flag(sbi, SBI_IS_RECOVERED); if (!err) { struct cp_control cpc = { .reason = CP_RECOVERY, }; err = f2fs_write_checkpoint(sbi, &cpc); } } kmem_cache_destroy(fsync_entry_slab); out: #ifdef CONFIG_QUOTA /* Turn quotas off */ if (quota_enabled) f2fs_quota_off_umount(sbi->sb); #endif sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ return ret ? ret: err; }
static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, unsigned long srcfd, u64 off, u64 len, u64 destoff) { int rc; struct cifsFileInfo *smb_file_target = dst_file->private_data; struct inode *target_inode = file_inode(dst_file); struct cifs_tcon *target_tcon; struct fd src_file; struct cifsFileInfo *smb_file_src; struct inode *src_inode; struct cifs_tcon *src_tcon; cifs_dbg(FYI, "ioctl clone range\n"); /* the destination must be opened for writing */ if (!(dst_file->f_mode & FMODE_WRITE)) { cifs_dbg(FYI, "file target not open for write\n"); return -EINVAL; } /* check if target volume is readonly and take reference */ rc = mnt_want_write_file(dst_file); if (rc) { cifs_dbg(FYI, "mnt_want_write failed with rc %d\n", rc); return rc; } src_file = fdget(srcfd); if (!src_file.file) { rc = -EBADF; goto out_drop_write; } if ((!src_file.file->private_data) || (!dst_file->private_data)) { rc = -EBADF; cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); goto out_fput; } rc = -EXDEV; smb_file_target = dst_file->private_data; smb_file_src = src_file.file->private_data; src_tcon = tlink_tcon(smb_file_src->tlink); target_tcon = tlink_tcon(smb_file_target->tlink); /* check if source and target are on same tree connection */ if (src_tcon != target_tcon) { cifs_dbg(VFS, "file copy src and target on different volume\n"); goto out_fput; } src_inode = src_file.file->f_dentry->d_inode; /* * Note: cifs case is easier than btrfs since server responsible for * checks for proper open modes and file type and if it wants * server could even support copy of range where source = target */ /* so we do not deadlock racing two ioctls on same files */ if (target_inode < src_inode) { mutex_lock_nested(&target_inode->i_mutex, I_MUTEX_PARENT); mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD); } else { mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_PARENT); mutex_lock_nested(&target_inode->i_mutex, I_MUTEX_CHILD); } /* determine range to clone */ rc = -EINVAL; if (off + len > src_inode->i_size || off + len < off) goto out_unlock; if (len == 0) len = src_inode->i_size - off; cifs_dbg(FYI, "about to flush pages\n"); /* should we flush first and last page first */ truncate_inode_pages_range(&target_inode->i_data, destoff, PAGE_CACHE_ALIGN(destoff + len)-1); if (target_tcon->ses->server->ops->clone_range) rc = target_tcon->ses->server->ops->clone_range(xid, smb_file_src, smb_file_target, off, len, destoff); /* force revalidate of size and timestamps of target file now that target is updated on the server */ CIFS_I(target_inode)->time = 0; out_unlock: /* although unlocking in the reverse order from locking is not strictly necessary here it is a little cleaner to be consistent */ if (target_inode < src_inode) { mutex_unlock(&src_inode->i_mutex); mutex_unlock(&target_inode->i_mutex); } else { mutex_unlock(&target_inode->i_mutex); mutex_unlock(&src_inode->i_mutex); } out_fput: fdput(src_file); out_drop_write: mnt_drop_write_file(dst_file); return rc; }
/* * Link a range of blocks from one file to another. */ int xfs_reflink_remap_range( struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe) { struct inode *inode_in = file_inode(file_in); struct xfs_inode *src = XFS_I(inode_in); struct inode *inode_out = file_inode(file_out); struct xfs_inode *dest = XFS_I(inode_out); struct xfs_mount *mp = src->i_mount; bool same_inode = (inode_in == inode_out); xfs_fileoff_t sfsbno, dfsbno; xfs_filblks_t fsblen; xfs_extlen_t cowextsize; ssize_t ret; if (!xfs_sb_version_hasreflink(&mp->m_sb)) return -EOPNOTSUPP; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; /* Lock both files against IO */ lock_two_nondirectories(inode_in, inode_out); if (same_inode) xfs_ilock(src, XFS_MMAPLOCK_EXCL); else xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); /* Check file eligibility and prepare for block sharing. */ ret = -EINVAL; /* Don't reflink realtime inodes */ if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest)) goto out_unlock; /* Don't share DAX file data for now. */ if (IS_DAX(inode_in) || IS_DAX(inode_out)) goto out_unlock; ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, &len, is_dedupe); if (ret <= 0) goto out_unlock; trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); /* Set flags and remap blocks. */ ret = xfs_reflink_set_inode_flag(src, dest); if (ret) goto out_unlock; dfsbno = XFS_B_TO_FSBT(mp, pos_out); sfsbno = XFS_B_TO_FSBT(mp, pos_in); fsblen = XFS_B_TO_FSB(mp, len); ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen, pos_out + len); if (ret) goto out_unlock; /* Zap any page cache for the destination file's range. */ truncate_inode_pages_range(&inode_out->i_data, pos_out, PAGE_ALIGN(pos_out + len) - 1); /* * Carry the cowextsize hint from src to dest if we're sharing the * entire source file to the entire destination file, the source file * has a cowextsize hint, and the destination file does not. */ cowextsize = 0; if (pos_in == 0 && len == i_size_read(inode_in) && (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) && pos_out == 0 && len >= i_size_read(inode_out) && !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) cowextsize = src->i_d.di_cowextsize; ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, is_dedupe); out_unlock: xfs_iunlock(src, XFS_MMAPLOCK_EXCL); if (!same_inode) xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); unlock_two_nondirectories(inode_in, inode_out); if (ret) trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); return ret; }
/** * truncate_inode_pages - truncate *all* the pages from an offset * @mapping: mapping to truncate * @lstart: offset from which to truncate * * Called under (and serialised by) inode->i_mutex. * * Note: When this function returns, there can be a page in the process of * deletion (inside __delete_from_page_cache()) in the specified range. Thus * mapping->nrpages can be non-zero when this function returns even after * truncation of the whole mapping. */ void truncate_inode_pages(struct address_space *mapping, loff_t lstart) { truncate_inode_pages_range(mapping, lstart, (loff_t)-1); }
/////////////////////////////////////////////////////////// // ufsd_log // // /////////////////////////////////////////////////////////// static void ufsd_log( IN const char * fmt, IN int len ) { if ( len <= 0 || 0 == fmt[0] ) return; if ( !log_file_opened && 0 != ufsd_trace_file[0] ) { log_file_opened = 1; log_file = filp_open( ufsd_trace_file, O_WRONLY | O_CREAT | O_TRUNC, S_IRUGO | S_IWUGO ); if ( IS_ERR(log_file) ) { long error = PTR_ERR(log_file); log_file = NULL; printk(KERN_NOTICE QUOTED_UFSD_DEVICE": failed to start log to '%s' (errno=%ld), using system log\n", ufsd_trace_file, -error); } else { assert(NULL != log_file); } } if ( NULL != log_file && NULL != log_file->f_op && NULL != log_file->f_op->write && !log_file_error ) { mm_segment_t old_limit = get_fs(); long error = 0; set_fs( KERNEL_DS ); if ( 0 != UFSD_CycleMB ) { size_t bytes = UFSD_CycleMB << 20; int to_write = log_file->f_pos + len > bytes? (bytes - log_file->f_pos) : len; assert( to_write >= 0 ); if ( to_write <= 0 ) to_write = 0; else { error = log_file->f_op->write(log_file, fmt, to_write, &log_file->f_pos); if ( error < 0 ) log_file_error = error; fmt += to_write; len -= to_write; } if ( 0 != len ) log_file->f_pos = 0; } if ( 0 != len ) { error = log_file->f_op->write(log_file, fmt, len, &log_file->f_pos ); if ( error < 0 ) log_file_error = error; } #if 0 // LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) if ( 0 == log_file_error ) { struct address_space* m = log_file->f_dentry->d_inode->i_mapping; long hint = log_file->f_pos - 4*PAGE_SIZE; if ( m->nrpages > 32 && hint > 0 ) { unsigned end = hint & ~(PAGE_SIZE - 1); // unsigned long nrpages = m->nrpages; int err = filemap_fdatawrite_range( m, 0, end - 1 ); if ( 0 == err ) truncate_inode_pages_range( m, 0, end - 1 ); // printk("truncate_inode_pages_range %x, %lu -> %lu, %d\n", end, nrpages, m->nrpages, err ); } } #endif set_fs(old_limit); if ( error < 0 ) printk("log write failed: %ld\n", -error); } // Comment out this 'else' to duplicate the output to klog. else { printk( KERN_NOTICE QUOTED_UFSD_DEVICE":%s", fmt ); } }
static int nfs42_clone_file_range(struct file *src_file, loff_t src_off, struct file *dst_file, loff_t dst_off, u64 count) { struct inode *dst_inode = file_inode(dst_file); struct nfs_server *server = NFS_SERVER(dst_inode); struct inode *src_inode = file_inode(src_file); unsigned int bs = server->clone_blksize; bool same_inode = false; int ret; /* check alignment w.r.t. clone_blksize */ ret = -EINVAL; if (bs) { if (!IS_ALIGNED(src_off, bs) || !IS_ALIGNED(dst_off, bs)) goto out; if (!IS_ALIGNED(count, bs) && i_size_read(src_inode) != (src_off + count)) goto out; } if (src_inode == dst_inode) same_inode = true; /* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */ if (same_inode) { inode_lock(src_inode); } else if (dst_inode < src_inode) { inode_lock_nested(dst_inode, I_MUTEX_PARENT); inode_lock_nested(src_inode, I_MUTEX_CHILD); } else { inode_lock_nested(src_inode, I_MUTEX_PARENT); inode_lock_nested(dst_inode, I_MUTEX_CHILD); } /* flush all pending writes on both src and dst so that server * has the latest data */ ret = nfs_sync_inode(src_inode); if (ret) goto out_unlock; ret = nfs_sync_inode(dst_inode); if (ret) goto out_unlock; ret = nfs42_proc_clone(src_file, dst_file, src_off, dst_off, count); /* truncate inode page cache of the dst range so that future reads can fetch * new data from server */ if (!ret) truncate_inode_pages_range(&dst_inode->i_data, dst_off, dst_off + count - 1); out_unlock: if (same_inode) { inode_unlock(src_inode); } else if (dst_inode < src_inode) { inode_unlock(src_inode); inode_unlock(dst_inode); } else { inode_unlock(dst_inode); inode_unlock(src_inode); } out: return ret; }