STATIC int xfs_vn_getattr( struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; xfs_itrace_entry(ip); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); stat->size = XFS_ISIZE(ip); stat->dev = inode->i_sb->s_dev; stat->mode = ip->i_d.di_mode; stat->nlink = ip->i_d.di_nlink; stat->uid = ip->i_d.di_uid; stat->gid = ip->i_d.di_gid; stat->ino = ip->i_ino; #if XFS_BIG_INUMS stat->ino += mp->m_inoadd; #endif stat->atime = inode->i_atime; stat->mtime.tv_sec = ip->i_d.di_mtime.t_sec; stat->mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; stat->ctime.tv_sec = ip->i_d.di_ctime.t_sec; stat->ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; stat->blocks = XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); switch (inode->i_mode & S_IFMT) { case S_IFBLK: case S_IFCHR: stat->blksize = BLKDEV_IOSIZE; stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, sysv_minor(ip->i_df.if_u2.if_rdev)); break; default: if (XFS_IS_REALTIME_INODE(ip)) { /* * If the file blocks are being allocated from a * realtime volume, then return the inode's realtime * extent size or the realtime volume's extent size. */ stat->blksize = xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog; } else stat->blksize = xfs_preferred_iosize(mp); stat->rdev = 0; break; } return 0; }
STATIC int xfs_inode_free_eofblocks( struct xfs_inode *ip, int flags, void *args) { int ret; struct xfs_eofblocks *eofb = args; bool need_iolock = true; int match; ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); if (!xfs_can_free_eofblocks(ip, false)) { /* inode could be preallocated or append-only */ trace_xfs_inode_free_eofblocks_invalid(ip); xfs_inode_clear_eofblocks_tag(ip); return 0; } /* * If the mapping is dirty the operation can block and wait for some * time. Unless we are waiting, skip it. */ if (!(flags & SYNC_WAIT) && mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY)) return 0; if (eofb) { if (eofb->eof_flags & XFS_EOF_FLAGS_UNION) match = xfs_inode_match_id_union(ip, eofb); else match = xfs_inode_match_id(ip, eofb); if (!match) return 0; /* skip the inode if the file size is too small */ if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && XFS_ISIZE(ip) < eofb->eof_min_file_size) return 0; /* * A scan owner implies we already hold the iolock. Skip it in * xfs_free_eofblocks() to avoid deadlock. This also eliminates * the possibility of EAGAIN being returned. */ if (eofb->eof_scan_owner == ip->i_ino) need_iolock = false; } ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock); /* don't revisit the inode if we're not waiting */ if (ret == -EAGAIN && !(flags & SYNC_WAIT)) ret = 0; return ret; }
/* * Automatic CoW Reservation Freeing * * These functions automatically garbage collect leftover CoW reservations * that were made on behalf of a cowextsize hint when we start to run out * of quota or when the reservations sit around for too long. If the file * has dirty pages or is undergoing writeback, its CoW reservations will * be retained. * * The actual garbage collection piggybacks off the same code that runs * the speculative EOF preallocation garbage collector. */ STATIC int xfs_inode_free_cowblocks( struct xfs_inode *ip, int flags, void *args) { int ret; struct xfs_eofblocks *eofb = args; int match; struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); /* * Just clear the tag if we have an empty cow fork or none at all. It's * possible the inode was fully unshared since it was originally tagged. */ if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) { trace_xfs_inode_free_cowblocks_invalid(ip); xfs_inode_clear_cowblocks_tag(ip); return 0; } /* * If the mapping is dirty or under writeback we cannot touch the * CoW fork. Leave it alone if we're in the midst of a directio. */ if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) || mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) || atomic_read(&VFS_I(ip)->i_dio_count)) return 0; if (eofb) { if (eofb->eof_flags & XFS_EOF_FLAGS_UNION) match = xfs_inode_match_id_union(ip, eofb); else match = xfs_inode_match_id(ip, eofb); if (!match) return 0; /* skip the inode if the file size is too small */ if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && XFS_ISIZE(ip) < eofb->eof_min_file_size) return 0; } /* Free the CoW blocks */ xfs_ilock(ip, XFS_IOLOCK_EXCL); xfs_ilock(ip, XFS_MMAPLOCK_EXCL); ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); xfs_iunlock(ip, XFS_IOLOCK_EXCL); return ret; }
/* * Pull the link count and size up from the xfs inode to the linux inode */ STATIC void xfs_validate_fields( struct inode *inode) { struct xfs_inode *ip = XFS_I(inode); loff_t size; /* we're under i_sem so i_size can't change under us */ size = XFS_ISIZE(ip); if (i_size_read(inode) != size) i_size_write(inode, size); }
STATIC int xfs_inode_free_eofblocks( struct xfs_inode *ip, int flags, void *args) { int ret = 0; struct xfs_eofblocks *eofb = args; int match; if (!xfs_can_free_eofblocks(ip, false)) { /* inode could be preallocated or append-only */ trace_xfs_inode_free_eofblocks_invalid(ip); xfs_inode_clear_eofblocks_tag(ip); return 0; } /* * If the mapping is dirty the operation can block and wait for some * time. Unless we are waiting, skip it. */ if (!(flags & SYNC_WAIT) && mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY)) return 0; if (eofb) { if (eofb->eof_flags & XFS_EOF_FLAGS_UNION) match = xfs_inode_match_id_union(ip, eofb); else match = xfs_inode_match_id(ip, eofb); if (!match) return 0; /* skip the inode if the file size is too small */ if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && XFS_ISIZE(ip) < eofb->eof_min_file_size) return 0; } /* * If the caller is waiting, return -EAGAIN to keep the background * scanner moving and revisit the inode in a subsequent pass. */ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { if (flags & SYNC_WAIT) ret = -EAGAIN; return ret; } ret = xfs_free_eofblocks(ip); xfs_iunlock(ip, XFS_IOLOCK_EXCL); return ret; }
int xfs_wait_on_pages( xfs_inode_t *ip, xfs_off_t first, xfs_off_t last) { struct address_space *mapping = VFS_I(ip)->i_mapping; if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) { return -filemap_fdatawait_range(mapping, first, last == -1 ? XFS_ISIZE(ip) - 1 : last); } return 0; }
STATIC int xfs_vn_getattr( struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; trace_xfs_getattr(ip); if (XFS_FORCED_SHUTDOWN(mp)) return -XFS_ERROR(EIO); stat->size = XFS_ISIZE(ip); stat->dev = inode->i_sb->s_dev; stat->mode = ip->i_d.di_mode; stat->nlink = ip->i_d.di_nlink; stat->uid = ip->i_d.di_uid; stat->gid = ip->i_d.di_gid; stat->ino = ip->i_ino; stat->atime = inode->i_atime; stat->mtime = inode->i_mtime; stat->ctime = inode->i_ctime; stat->blocks = XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); switch (inode->i_mode & S_IFMT) { case S_IFBLK: case S_IFCHR: stat->blksize = BLKDEV_IOSIZE; stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, sysv_minor(ip->i_df.if_u2.if_rdev)); break; default: if (XFS_IS_REALTIME_INODE(ip)) { stat->blksize = xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog; } else stat->blksize = xfs_preferred_iosize(mp); stat->rdev = 0; break; } return 0; }
/* * Automatic CoW Reservation Freeing * * These functions automatically garbage collect leftover CoW reservations * that were made on behalf of a cowextsize hint when we start to run out * of quota or when the reservations sit around for too long. If the file * has dirty pages or is undergoing writeback, its CoW reservations will * be retained. * * The actual garbage collection piggybacks off the same code that runs * the speculative EOF preallocation garbage collector. */ STATIC int xfs_inode_free_cowblocks( struct xfs_inode *ip, int flags, void *args) { struct xfs_eofblocks *eofb = args; struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); int match; int ret = 0; if (!xfs_prep_free_cowblocks(ip, ifp)) return 0; if (eofb) { if (eofb->eof_flags & XFS_EOF_FLAGS_UNION) match = xfs_inode_match_id_union(ip, eofb); else match = xfs_inode_match_id(ip, eofb); if (!match) return 0; /* skip the inode if the file size is too small */ if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && XFS_ISIZE(ip) < eofb->eof_min_file_size) return 0; } /* Free the CoW blocks */ xfs_ilock(ip, XFS_IOLOCK_EXCL); xfs_ilock(ip, XFS_MMAPLOCK_EXCL); /* * Check again, nobody else should be able to dirty blocks or change * the reflink iflag now that we have the first two locks held. */ if (xfs_prep_free_cowblocks(ip, ifp)) ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); xfs_iunlock(ip, XFS_IOLOCK_EXCL); return ret; }
STATIC int xfs_inode_free_eofblocks( struct xfs_inode *ip, struct xfs_perag *pag, int flags, void *args) { int ret; struct xfs_eofblocks *eofb = args; if (!xfs_can_free_eofblocks(ip, false)) { /* inode could be preallocated or append-only */ trace_xfs_inode_free_eofblocks_invalid(ip); xfs_inode_clear_eofblocks_tag(ip); return 0; } /* * If the mapping is dirty the operation can block and wait for some * time. Unless we are waiting, skip it. */ if (!(flags & SYNC_WAIT) && mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY)) return 0; if (eofb) { if (!xfs_inode_match_id(ip, eofb)) return 0; /* skip the inode if the file size is too small */ if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && XFS_ISIZE(ip) < eofb->eof_min_file_size) return 0; } ret = xfs_free_eofblocks(ip->i_mount, ip, true); /* don't revisit the inode if we're not waiting */ if (ret == EAGAIN && !(flags & SYNC_WAIT)) ret = 0; return ret; }
/* * returns 1 for success, 0 if we failed to map the extent. */ STATIC int xfs_getbmapx_fix_eof_hole( xfs_inode_t *ip, /* xfs incore inode pointer */ struct getbmapx *out, /* output structure */ int prealloced, /* this is a file with * preallocated data space */ __int64_t end, /* last block requested */ xfs_fsblock_t startblock) { __int64_t fixlen; xfs_mount_t *mp; /* file system mount point */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_extnum_t lastx; /* last extent pointer */ xfs_fileoff_t fileblock; if (startblock == HOLESTARTBLOCK) { mp = ip->i_mount; out->bmv_block = -1; fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip))); fixlen -= out->bmv_offset; if (prealloced && out->bmv_offset + out->bmv_length == end) { /* Came to hole at EOF. Trim it. */ if (fixlen <= 0) return 0; out->bmv_length = fixlen; } } else { if (startblock == DELAYSTARTBLOCK) out->bmv_block = -2; else out->bmv_block = xfs_fsb_to_db(ip, startblock); fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset); ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); if (xfs_iext_bno_to_ext(ifp, fileblock, &lastx) && (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1)) out->bmv_oflags |= BMV_OF_LAST; } return 1; }
int xfs_ioc_space( struct xfs_inode *ip, struct inode *inode, struct file *filp, int ioflags, unsigned int cmd, xfs_flock64_t *bf) { struct iattr iattr; enum xfs_prealloc_flags flags = 0; uint iolock = XFS_IOLOCK_EXCL; int error; /* * Only allow the sys admin to reserve space unless * unwritten extents are enabled. */ if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) && !capable(CAP_SYS_ADMIN)) return -EPERM; if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) return -EPERM; if (!(filp->f_mode & FMODE_WRITE)) return -EBADF; if (!S_ISREG(inode->i_mode)) return -EINVAL; if (filp->f_flags & O_DSYNC) flags |= XFS_PREALLOC_SYNC; if (ioflags & XFS_IO_INVIS) flags |= XFS_PREALLOC_INVISIBLE; error = mnt_want_write_file(filp); if (error) return error; xfs_ilock(ip, iolock); error = xfs_break_layouts(inode, &iolock, false); if (error) goto out_unlock; xfs_ilock(ip, XFS_MMAPLOCK_EXCL); iolock |= XFS_MMAPLOCK_EXCL; switch (bf->l_whence) { case 0: /*SEEK_SET*/ break; case 1: /*SEEK_CUR*/ bf->l_start += filp->f_pos; break; case 2: /*SEEK_END*/ bf->l_start += XFS_ISIZE(ip); break; default: error = -EINVAL; goto out_unlock; } /* * length of <= 0 for resv/unresv/zero is invalid. length for * alloc/free is ignored completely and we have no idea what userspace * might have set it to, so set it to zero to allow range * checks to pass. */ switch (cmd) { case XFS_IOC_ZERO_RANGE: case XFS_IOC_RESVSP: case XFS_IOC_RESVSP64: case XFS_IOC_UNRESVSP: case XFS_IOC_UNRESVSP64: if (bf->l_len <= 0) { error = -EINVAL; goto out_unlock; } break; default: bf->l_len = 0; break; } if (bf->l_start < 0 || bf->l_start > inode->i_sb->s_maxbytes || bf->l_start + bf->l_len < 0 || bf->l_start + bf->l_len >= inode->i_sb->s_maxbytes) { error = -EINVAL; goto out_unlock; } switch (cmd) { case XFS_IOC_ZERO_RANGE: flags |= XFS_PREALLOC_SET; error = xfs_zero_file_space(ip, bf->l_start, bf->l_len); break; case XFS_IOC_RESVSP: case XFS_IOC_RESVSP64: flags |= XFS_PREALLOC_SET; error = xfs_alloc_file_space(ip, bf->l_start, bf->l_len, XFS_BMAPI_PREALLOC); break; case XFS_IOC_UNRESVSP: case XFS_IOC_UNRESVSP64: error = xfs_free_file_space(ip, bf->l_start, bf->l_len); break; case XFS_IOC_ALLOCSP: case XFS_IOC_ALLOCSP64: case XFS_IOC_FREESP: case XFS_IOC_FREESP64: flags |= XFS_PREALLOC_CLEAR; if (bf->l_start > XFS_ISIZE(ip)) { error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), bf->l_start - XFS_ISIZE(ip), 0); if (error) goto out_unlock; } iattr.ia_valid = ATTR_SIZE; iattr.ia_size = bf->l_start; error = xfs_setattr_size(ip, &iattr); break; default: ASSERT(0); error = -EINVAL; } if (error) goto out_unlock; error = xfs_update_prealloc_flags(ip, flags); out_unlock: xfs_iunlock(ip, iolock); mnt_drop_write_file(filp); return error; }
/* * This is called by xfs_inactive to free any blocks beyond eof * when the link count isn't zero and by xfs_dm_punch_hole() when * punching a hole to EOF. */ int xfs_free_eofblocks( xfs_mount_t *mp, xfs_inode_t *ip, bool need_iolock) { xfs_trans_t *tp; int error; xfs_fileoff_t end_fsb; xfs_fileoff_t last_fsb; xfs_filblks_t map_len; int nimaps; xfs_bmbt_irec_t imap; /* * Figure out if there are any blocks beyond the end * of the file. If not, then there is nothing to do. */ end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip)); last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); if (last_fsb <= end_fsb) return 0; map_len = last_fsb - end_fsb; nimaps = 1; xfs_ilock(ip, XFS_ILOCK_SHARED); error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0); xfs_iunlock(ip, XFS_ILOCK_SHARED); if (!error && (nimaps != 0) && (imap.br_startblock != HOLESTARTBLOCK || ip->i_delayed_blks)) { /* * Attach the dquots to the inode up front. */ error = xfs_qm_dqattach(ip, 0); if (error) return error; /* * There are blocks after the end of file. * Free them up now by truncating the file to * its current size. */ tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); if (need_iolock) { if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { xfs_trans_cancel(tp, 0); return EAGAIN; } } error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); if (error) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); xfs_trans_cancel(tp, 0); if (need_iolock) xfs_iunlock(ip, XFS_IOLOCK_EXCL); return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); /* * Do not update the on-disk file size. If we update the * on-disk file size and then the system crashes before the * contents of the file are flushed to disk then the files * may be full of holes (ie NULL files bug). */ error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, XFS_ISIZE(ip)); if (error) { /* * If we get an error at this point we simply don't * bother truncating the file. */ xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); } else { error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (!error) xfs_inode_clear_eofblocks_tag(ip); } xfs_iunlock(ip, XFS_ILOCK_EXCL); if (need_iolock) xfs_iunlock(ip, XFS_IOLOCK_EXCL); } return error; }
/* * Get inode's extents as described in bmv, and format for output. * Calls formatter to fill the user's buffer until all extents * are mapped, until the passed-in bmv->bmv_count slots have * been filled, or until the formatter short-circuits the loop, * if it is tracking filled-in extents on its own. */ int /* error code */ xfs_getbmap( xfs_inode_t *ip, struct getbmapx *bmv, /* user bmap structure */ xfs_bmap_format_t formatter, /* format to user */ void *arg) /* formatter arg */ { __int64_t bmvend; /* last block requested */ int error = 0; /* return value */ __int64_t fixlen; /* length for -1 case */ int i; /* extent number */ int lock; /* lock state */ xfs_bmbt_irec_t *map; /* buffer for user's data */ xfs_mount_t *mp; /* file system mount point */ int nex; /* # of user extents can do */ int nexleft; /* # of user extents left */ int subnex; /* # of bmapi's can do */ int nmap; /* number of map entries */ struct getbmapx *out; /* output structure */ int whichfork; /* data or attr fork */ int prealloced; /* this is a file with * preallocated data space */ int iflags; /* interface flags */ int bmapi_flags; /* flags for xfs_bmapi */ int cur_ext = 0; mp = ip->i_mount; iflags = bmv->bmv_iflags; whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; if (whichfork == XFS_ATTR_FORK) { if (XFS_IFORK_Q(ip)) { if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE && ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) return XFS_ERROR(EINVAL); } else if (unlikely( ip->i_d.di_aformat != 0 && ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) { XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW, ip->i_mount); return XFS_ERROR(EFSCORRUPTED); } prealloced = 0; fixlen = 1LL << 32; } else { if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && ip->i_d.di_format != XFS_DINODE_FMT_BTREE && ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) return XFS_ERROR(EINVAL); if (xfs_get_extsz_hint(ip) || ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ prealloced = 1; fixlen = mp->m_super->s_maxbytes; } else { prealloced = 0; fixlen = XFS_ISIZE(ip); } } if (bmv->bmv_length == -1) { fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen)); bmv->bmv_length = max_t(__int64_t, fixlen - bmv->bmv_offset, 0); } else if (bmv->bmv_length == 0) { bmv->bmv_entries = 0; return 0; } else if (bmv->bmv_length < 0) { return XFS_ERROR(EINVAL); } nex = bmv->bmv_count - 1; if (nex <= 0) return XFS_ERROR(EINVAL); bmvend = bmv->bmv_offset + bmv->bmv_length; if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) return XFS_ERROR(ENOMEM); out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0); if (!out) return XFS_ERROR(ENOMEM); xfs_ilock(ip, XFS_IOLOCK_SHARED); if (whichfork == XFS_DATA_FORK) { if (!(iflags & BMV_IF_DELALLOC) && (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { error = -filemap_write_and_wait(VFS_I(ip)->i_mapping); if (error) goto out_unlock_iolock; /* * Even after flushing the inode, there can still be * delalloc blocks on the inode beyond EOF due to * speculative preallocation. These are not removed * until the release function is called or the inode * is inactivated. Hence we cannot assert here that * ip->i_delayed_blks == 0. */ } lock = xfs_ilock_data_map_shared(ip); } else { lock = xfs_ilock_attr_map_shared(ip); } /* * Don't let nex be bigger than the number of extents * we can have assuming alternating holes and real extents. */ if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1) nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; bmapi_flags = xfs_bmapi_aflag(whichfork); if (!(iflags & BMV_IF_PREALLOC)) bmapi_flags |= XFS_BMAPI_IGSTATE; /* * Allocate enough space to handle "subnex" maps at a time. */ error = ENOMEM; subnex = 16; map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS); if (!map) goto out_unlock_ilock; bmv->bmv_entries = 0; if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 && (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) { error = 0; goto out_free_map; } nexleft = nex; do { nmap = (nexleft > subnex) ? subnex : nexleft; error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), XFS_BB_TO_FSB(mp, bmv->bmv_length), map, &nmap, bmapi_flags); if (error) goto out_free_map; ASSERT(nmap <= subnex); for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) { out[cur_ext].bmv_oflags = 0; if (map[i].br_state == XFS_EXT_UNWRITTEN) out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; else if (map[i].br_startblock == DELAYSTARTBLOCK) out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC; out[cur_ext].bmv_offset = XFS_FSB_TO_BB(mp, map[i].br_startoff); out[cur_ext].bmv_length = XFS_FSB_TO_BB(mp, map[i].br_blockcount); out[cur_ext].bmv_unused1 = 0; out[cur_ext].bmv_unused2 = 0; /* * delayed allocation extents that start beyond EOF can * occur due to speculative EOF allocation when the * delalloc extent is larger than the largest freespace * extent at conversion time. These extents cannot be * converted by data writeback, so can exist here even * if we are not supposed to be finding delalloc * extents. */ if (map[i].br_startblock == DELAYSTARTBLOCK && map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) ASSERT((iflags & BMV_IF_DELALLOC) != 0); if (map[i].br_startblock == HOLESTARTBLOCK && whichfork == XFS_ATTR_FORK) { /* came to the end of attribute fork */ out[cur_ext].bmv_oflags |= BMV_OF_LAST; goto out_free_map; } if (!xfs_getbmapx_fix_eof_hole(ip, &out[cur_ext], prealloced, bmvend, map[i].br_startblock)) goto out_free_map; bmv->bmv_offset = out[cur_ext].bmv_offset + out[cur_ext].bmv_length; bmv->bmv_length = max_t(__int64_t, 0, bmvend - bmv->bmv_offset); /* * In case we don't want to return the hole, * don't increase cur_ext so that we can reuse * it in the next loop. */ if ((iflags & BMV_IF_NO_HOLES) && map[i].br_startblock == HOLESTARTBLOCK) { memset(&out[cur_ext], 0, sizeof(out[cur_ext])); continue; } nexleft--; bmv->bmv_entries++; cur_ext++; } } while (nmap && nexleft && bmv->bmv_length); out_free_map: kmem_free(map); out_unlock_ilock: xfs_iunlock(ip, lock); out_unlock_iolock: xfs_iunlock(ip, XFS_IOLOCK_SHARED); for (i = 0; i < cur_ext; i++) { int full = 0; /* user array is full */ /* format results & advance arg */ error = formatter(&arg, &out[i], &full); if (error || full) break; } kmem_free(out); return error; }
/* * Zero file bytes between startoff and endoff inclusive. * The iolock is held exclusive and no blocks are buffered. * * This function is used by xfs_free_file_space() to zero * partial blocks when the range to free is not block aligned. * When unreserving space with boundaries that are not block * aligned we round up the start and round down the end * boundaries and then use this function to zero the parts of * the blocks that got dropped during the rounding. */ STATIC int xfs_zero_remaining_bytes( xfs_inode_t *ip, xfs_off_t startoff, xfs_off_t endoff) { xfs_bmbt_irec_t imap; xfs_fileoff_t offset_fsb; xfs_off_t lastoffset; xfs_off_t offset; xfs_buf_t *bp; xfs_mount_t *mp = ip->i_mount; int nimap; int error = 0; /* * Avoid doing I/O beyond eof - it's not necessary * since nothing can read beyond eof. The space will * be zeroed when the file is extended anyway. */ if (startoff >= XFS_ISIZE(ip)) return 0; if (endoff > XFS_ISIZE(ip)) endoff = XFS_ISIZE(ip); bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp, BTOBB(mp->m_sb.sb_blocksize), 0); if (!bp) return XFS_ERROR(ENOMEM); xfs_buf_unlock(bp); for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { uint lock_mode; offset_fsb = XFS_B_TO_FSBT(mp, offset); nimap = 1; lock_mode = xfs_ilock_data_map_shared(ip); error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0); xfs_iunlock(ip, lock_mode); if (error || nimap < 1) break; ASSERT(imap.br_blockcount >= 1); ASSERT(imap.br_startoff == offset_fsb); lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1; if (lastoffset > endoff) lastoffset = endoff; if (imap.br_startblock == HOLESTARTBLOCK) continue; ASSERT(imap.br_startblock != DELAYSTARTBLOCK); if (imap.br_state == XFS_EXT_UNWRITTEN) continue; XFS_BUF_UNDONE(bp); XFS_BUF_UNWRITE(bp); XFS_BUF_READ(bp); XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); if (XFS_FORCED_SHUTDOWN(mp)) { error = XFS_ERROR(EIO); break; } xfs_buf_iorequest(bp); error = xfs_buf_iowait(bp); if (error) { xfs_buf_ioerror_alert(bp, "xfs_zero_remaining_bytes(read)"); break; } memset(bp->b_addr + (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), 0, lastoffset - offset + 1); XFS_BUF_UNDONE(bp); XFS_BUF_UNREAD(bp); XFS_BUF_WRITE(bp); if (XFS_FORCED_SHUTDOWN(mp)) { error = XFS_ERROR(EIO); break; } xfs_buf_iorequest(bp); error = xfs_buf_iowait(bp); if (error) { xfs_buf_ioerror_alert(bp, "xfs_zero_remaining_bytes(write)"); break; } } xfs_buf_free(bp); return error; }
/* * Prepare two files for range cloning. Upon a successful return both inodes * will have the iolock and mmaplock held, the page cache of the out file will * be truncated, and any leases on the out file will have been broken. This * function borrows heavily from xfs_file_aio_write_checks. * * The VFS allows partial EOF blocks to "match" for dedupe even though it hasn't * checked that the bytes beyond EOF physically match. Hence we cannot use the * EOF block in the source dedupe range because it's not a complete block match, * hence can introduce a corruption into the file that has it's block replaced. * * In similar fashion, the VFS file cloning also allows partial EOF blocks to be * "block aligned" for the purposes of cloning entire files. However, if the * source file range includes the EOF block and it lands within the existing EOF * of the destination file, then we can expose stale data from beyond the source * file EOF in the destination file. * * XFS doesn't support partial block sharing, so in both cases we have check * these cases ourselves. For dedupe, we can simply round the length to dedupe * down to the previous whole block and ignore the partial EOF block. While this * means we can't dedupe the last block of a file, this is an acceptible * tradeoff for simplicity on implementation. * * For cloning, we want to share the partial EOF block if it is also the new EOF * block of the destination file. If the partial EOF block lies inside the * existing destination EOF, then we have to abort the clone to avoid exposing * stale data in the destination file. Hence we reject these clone attempts with * -EINVAL in this case. */ int xfs_reflink_remap_prep( struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *len, unsigned int remap_flags) { struct inode *inode_in = file_inode(file_in); struct xfs_inode *src = XFS_I(inode_in); struct inode *inode_out = file_inode(file_out); struct xfs_inode *dest = XFS_I(inode_out); bool same_inode = (inode_in == inode_out); ssize_t ret; /* Lock both files against IO */ ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out); if (ret) return ret; if (same_inode) xfs_ilock(src, XFS_MMAPLOCK_EXCL); else xfs_lock_two_inodes(src, XFS_MMAPLOCK_SHARED, dest, XFS_MMAPLOCK_EXCL); /* Check file eligibility and prepare for block sharing. */ ret = -EINVAL; /* Don't reflink realtime inodes */ if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest)) goto out_unlock; /* Don't share DAX file data for now. */ if (IS_DAX(inode_in) || IS_DAX(inode_out)) goto out_unlock; ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, len, remap_flags); if (ret < 0 || *len == 0) goto out_unlock; /* Attach dquots to dest inode before changing block map */ ret = xfs_qm_dqattach(dest); if (ret) goto out_unlock; /* * Zero existing post-eof speculative preallocations in the destination * file. */ ret = xfs_reflink_zero_posteof(dest, pos_out); if (ret) goto out_unlock; /* Set flags and remap blocks. */ ret = xfs_reflink_set_inode_flag(src, dest); if (ret) goto out_unlock; /* * If pos_out > EOF, we may have dirtied blocks between EOF and * pos_out. In that case, we need to extend the flush and unmap to cover * from EOF to the end of the copy length. */ if (pos_out > XFS_ISIZE(dest)) { loff_t flen = *len + (pos_out - XFS_ISIZE(dest)); ret = xfs_flush_unmap_range(dest, XFS_ISIZE(dest), flen); } else { ret = xfs_flush_unmap_range(dest, pos_out, *len); } if (ret) goto out_unlock; return 1; out_unlock: xfs_reflink_remap_unlock(file_in, file_out); return ret; }
int xfs_ioc_space( struct xfs_inode *ip, struct inode *inode, struct file *filp, int ioflags, unsigned int cmd, xfs_flock64_t *bf) { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; struct iattr iattr; bool setprealloc = false; bool clrprealloc = false; int error; /* * Only allow the sys admin to reserve space unless * unwritten extents are enabled. */ if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) && !capable(CAP_SYS_ADMIN)) return -XFS_ERROR(EPERM); if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) return -XFS_ERROR(EPERM); if (!(filp->f_mode & FMODE_WRITE)) return -XFS_ERROR(EBADF); if (!S_ISREG(inode->i_mode)) return -XFS_ERROR(EINVAL); error = mnt_want_write_file(filp); if (error) return error; xfs_ilock(ip, XFS_IOLOCK_EXCL); switch (bf->l_whence) { case 0: /*SEEK_SET*/ break; case 1: /*SEEK_CUR*/ bf->l_start += filp->f_pos; break; case 2: /*SEEK_END*/ bf->l_start += XFS_ISIZE(ip); break; default: error = XFS_ERROR(EINVAL); goto out_unlock; } /* * length of <= 0 for resv/unresv/zero is invalid. length for * alloc/free is ignored completely and we have no idea what userspace * might have set it to, so set it to zero to allow range * checks to pass. */ switch (cmd) { case XFS_IOC_ZERO_RANGE: case XFS_IOC_RESVSP: case XFS_IOC_RESVSP64: case XFS_IOC_UNRESVSP: case XFS_IOC_UNRESVSP64: if (bf->l_len <= 0) { error = XFS_ERROR(EINVAL); goto out_unlock; } break; default: bf->l_len = 0; break; } if (bf->l_start < 0 || bf->l_start > mp->m_super->s_maxbytes || bf->l_start + bf->l_len < 0 || bf->l_start + bf->l_len >= mp->m_super->s_maxbytes) { error = XFS_ERROR(EINVAL); goto out_unlock; } switch (cmd) { case XFS_IOC_ZERO_RANGE: error = xfs_zero_file_space(ip, bf->l_start, bf->l_len); if (!error) setprealloc = true; break; case XFS_IOC_RESVSP: case XFS_IOC_RESVSP64: error = xfs_alloc_file_space(ip, bf->l_start, bf->l_len, XFS_BMAPI_PREALLOC); if (!error) setprealloc = true; break; case XFS_IOC_UNRESVSP: case XFS_IOC_UNRESVSP64: error = xfs_free_file_space(ip, bf->l_start, bf->l_len); break; case XFS_IOC_ALLOCSP: case XFS_IOC_ALLOCSP64: case XFS_IOC_FREESP: case XFS_IOC_FREESP64: if (bf->l_start > XFS_ISIZE(ip)) { error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), bf->l_start - XFS_ISIZE(ip), 0); if (error) goto out_unlock; } iattr.ia_valid = ATTR_SIZE; iattr.ia_size = bf->l_start; error = xfs_setattr_size(ip, &iattr); if (!error) clrprealloc = true; break; default: ASSERT(0); error = XFS_ERROR(EINVAL); } if (error) goto out_unlock; tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_writeid, 0, 0); if (error) { xfs_trans_cancel(tp, 0); goto out_unlock; } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); if (!(ioflags & IO_INVIS)) { ip->i_d.di_mode &= ~S_ISUID; if (ip->i_d.di_mode & S_IXGRP) ip->i_d.di_mode &= ~S_ISGID; xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); } if (setprealloc) ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; else if (clrprealloc) ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); if (filp->f_flags & O_DSYNC) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); out_unlock: xfs_iunlock(ip, XFS_IOLOCK_EXCL); mnt_drop_write_file(filp); return -error; }
STATIC xfs_fsize_t xfs_size_fn( xfs_inode_t *ip) { return XFS_ISIZE(ip); }
/* * Automatic CoW Reservation Freeing * * These functions automatically garbage collect leftover CoW reservations * that were made on behalf of a cowextsize hint when we start to run out * of quota or when the reservations sit around for too long. If the file * has dirty pages or is undergoing writeback, its CoW reservations will * be retained. * * The actual garbage collection piggybacks off the same code that runs * the speculative EOF preallocation garbage collector. */ STATIC int xfs_inode_free_cowblocks( struct xfs_inode *ip, int flags, void *args) { int ret; struct xfs_eofblocks *eofb = args; bool need_iolock = true; int match; ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0)); if (!xfs_reflink_has_real_cow_blocks(ip)) { trace_xfs_inode_free_cowblocks_invalid(ip); xfs_inode_clear_cowblocks_tag(ip); return 0; } /* * If the mapping is dirty or under writeback we cannot touch the * CoW fork. Leave it alone if we're in the midst of a directio. */ if (mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) || atomic_read(&VFS_I(ip)->i_dio_count)) return 0; if (eofb) { if (eofb->eof_flags & XFS_EOF_FLAGS_UNION) match = xfs_inode_match_id_union(ip, eofb); else match = xfs_inode_match_id(ip, eofb); if (!match) return 0; /* skip the inode if the file size is too small */ if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && XFS_ISIZE(ip) < eofb->eof_min_file_size) return 0; /* * A scan owner implies we already hold the iolock. Skip it in * xfs_free_eofblocks() to avoid deadlock. This also eliminates * the possibility of EAGAIN being returned. */ if (eofb->eof_scan_owner == ip->i_ino) need_iolock = false; } /* Free the CoW blocks */ if (need_iolock) { xfs_ilock(ip, XFS_IOLOCK_EXCL); xfs_ilock(ip, XFS_MMAPLOCK_EXCL); } ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); if (need_iolock) { xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); xfs_iunlock(ip, XFS_IOLOCK_EXCL); } return ret; }