int copy_extent_to_buffer(xfs_mount_t *mp, xfs_bmbt_irec_t rec, void *buffer, off_t offset, size_t len) { xfs_buf_t *block_buffer; int64_t copylen, copy_start; xfs_daddr_t block, start, end; char *src; off_t cofs = offset; xfs_off_t block_start; xfs_daddr_t block_size = XFS_FSB_TO_B(mp, 1); //xfs_daddr_t extent_size = XFS_FSB_TO_B(mp, rec.br_blockcount); xfs_daddr_t extent_start = XFS_FSB_TO_B(mp, rec.br_startoff); /* compute a block to start reading from */ if (offset >= extent_start) { start = XFS_B_TO_FSBT(mp, offset - extent_start); } else { buffer = buffer + extent_start - offset; cofs += extent_start - offset; start = 0; } end = min(rec.br_blockcount, XFS_B_TO_FSBT(mp, offset + len - extent_start - 1) + 1); for (block=start; block<end; block++) { block_start = XFS_FSB_TO_B(mp, (rec.br_startoff + block)); block_buffer = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, (rec.br_startblock + block)), XFS_FSB_TO_BB(mp, 1), 0); if (block_buffer == NULL) { printf("Buffer error\n"); return XFS_ERROR(EIO); } src = block_buffer->b_addr; copy_start = block_start; copylen = block_size; if (block_start < offset) { copylen = block_size + block_start - offset; copy_start = (block_size - copylen) + block_start; src = block_buffer->b_addr + (block_size - copylen); } if ((block_start + block_size) > (offset + len)) { copylen = offset + len - copy_start; } if (copylen > 0) { memcpy(buffer, src, copylen); buffer += copylen; cofs += copylen; } libxfs_putbuf(block_buffer); } return 0; }
int xfs_ioc_trim( struct xfs_mount *mp, struct fstrim_range __user *urange) { struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; unsigned int granularity = q->limits.discard_granularity; struct fstrim_range range; xfs_fsblock_t start, len, minlen; xfs_agnumber_t start_agno, end_agno, agno; __uint64_t blocks_trimmed = 0; int error, last_error = 0; if (!capable(CAP_SYS_ADMIN)) return -XFS_ERROR(EPERM); if (!blk_queue_discard(q)) return -XFS_ERROR(EOPNOTSUPP); if (copy_from_user(&range, urange, sizeof(range))) return -XFS_ERROR(EFAULT); /* * Truncating down the len isn't actually quite correct, but using * XFS_B_TO_FSB would mean we trivially get overflows for values * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default * used by the fstrim application. In the end it really doesn't * matter as trimming blocks is an advisory interface. */ start = XFS_B_TO_FSBT(mp, range.start); len = XFS_B_TO_FSBT(mp, range.len); minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen)); start_agno = XFS_FSB_TO_AGNO(mp, start); if (start_agno >= mp->m_sb.sb_agcount) return -XFS_ERROR(EINVAL); end_agno = XFS_FSB_TO_AGNO(mp, start + len); if (end_agno >= mp->m_sb.sb_agcount) end_agno = mp->m_sb.sb_agcount - 1; for (agno = start_agno; agno <= end_agno; agno++) { error = -xfs_trim_extents(mp, agno, start, len, minlen, &blocks_trimmed); if (error) last_error = error; } if (last_error) return last_error; range.len = XFS_FSB_TO_B(mp, blocks_trimmed); if (copy_to_user(urange, &range, sizeof(range))) return -XFS_ERROR(EFAULT); return 0; }
/* Convert all of the unwritten CoW extents in a file's range to real ones. */ int xfs_reflink_convert_cow( struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count) { struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); xfs_filblks_t count_fsb = end_fsb - offset_fsb; struct xfs_bmbt_irec imap; struct xfs_defer_ops dfops; xfs_fsblock_t first_block = NULLFSBLOCK; int nimaps = 1, error = 0; ASSERT(count != 0); xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_bmapi_write(NULL, ip, offset_fsb, count_fsb, XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT | XFS_BMAPI_CONVERT_ONLY, &first_block, 0, &imap, &nimaps, &dfops); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; }
/* * This routine is called to handle zeroing any space in the last block of the * file that is beyond the EOF. We do this since the size is being increased * without writing anything to that block and we don't want to read the * garbage on the disk. */ STATIC int /* error (positive) */ xfs_zero_last_block( struct xfs_inode *ip, xfs_fsize_t offset, xfs_fsize_t isize) { struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize); int zero_offset = XFS_B_FSB_OFFSET(mp, isize); int zero_len; int nimaps = 1; int error = 0; struct xfs_bmbt_irec imap; xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) return error; ASSERT(nimaps > 0); /* * If the block underlying isize is just a hole, then there * is nothing to zero. */ if (imap.br_startblock == HOLESTARTBLOCK) return 0; zero_len = mp->m_sb.sb_blocksize - zero_offset; if (isize + zero_len > offset) zero_len = offset - isize; return xfs_iozero(ip, isize, zero_len); }
/* Convert all of the unwritten CoW extents in a file's range to real ones. */ int xfs_reflink_convert_cow( struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count) { struct xfs_bmbt_irec got; struct xfs_defer_ops dfops; struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); xfs_extnum_t idx; bool found; int error = 0; xfs_ilock(ip, XFS_ILOCK_EXCL); /* Convert all the extents to real from unwritten. */ for (found = xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got); found && got.br_startoff < end_fsb; found = xfs_iext_get_extent(ifp, ++idx, &got)) { error = xfs_reflink_convert_cow_extent(ip, &got, offset_fsb, end_fsb - offset_fsb, &dfops); if (error) break; } /* Finish up. */ xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; }
/* * Find the CoW reservation for a given byte offset of a file. */ bool xfs_reflink_find_cow_mapping( struct xfs_inode *ip, xfs_off_t offset, struct xfs_bmbt_irec *imap) { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); xfs_fileoff_t offset_fsb; struct xfs_bmbt_irec got; xfs_extnum_t idx; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); ASSERT(xfs_is_reflink_inode(ip)); offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got)) return false; if (got.br_startoff > offset_fsb) return false; trace_xfs_reflink_find_cow_mapping(ip, offset, 1, XFS_IO_OVERWRITE, &got); *imap = got; return true; }
/* * This routine is called to handle zeroing any space in the last * block of the file that is beyond the EOF. We do this since the * size is being increased without writing anything to that block * and we don't want anyone to read the garbage on the disk. */ STATIC int /* error (positive) */ xfs_zero_last_block( xfs_vnode_t *vp, xfs_iocore_t *io, xfs_fsize_t isize, xfs_fsize_t end_size) { xfs_fileoff_t last_fsb; xfs_mount_t *mp; int nimaps; int zero_offset; int zero_len; int error = 0; xfs_bmbt_irec_t imap; xfs_off_t loff; ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0); mp = io->io_mount; zero_offset = XFS_B_FSB_OFFSET(mp, isize); if (zero_offset == 0) { /* * There are no extra bytes in the last block on disk to * zero, so return. */ return 0; } last_fsb = XFS_B_TO_FSBT(mp, isize); nimaps = 1; error = XFS_BMAPI(mp, NULL, io, last_fsb, 1, 0, NULL, 0, &imap, &nimaps, NULL, NULL); if (error) { return error; } ASSERT(nimaps > 0); /* * If the block underlying isize is just a hole, then there * is nothing to zero. */ if (imap.br_startblock == HOLESTARTBLOCK) { return 0; } /* * Zero the part of the last block beyond the EOF, and write it * out sync. We need to drop the ilock while we do this so we * don't deadlock when the buffer cache calls back to us. */ XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD); loff = XFS_FSB_TO_B(mp, last_fsb); zero_len = mp->m_sb.sb_blocksize - zero_offset; error = xfs_iozero(vp, loff + zero_offset, zero_len, end_size); XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); ASSERT(error >= 0); return error; }
STATIC int xfs_map_blocks( struct inode *inode, loff_t offset, struct xfs_bmbt_irec *imap, int type) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; ssize_t count = 1 << inode->i_blkbits; xfs_fileoff_t offset_fsb, end_fsb; int error = 0; int bmapi_flags = XFS_BMAPI_ENTIRE; int nimaps = 1; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; if (type == XFS_IO_UNWRITTEN) bmapi_flags |= XFS_BMAPI_IGSTATE; xfs_ilock(ip, XFS_ILOCK_SHARED); ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || (ip->i_df.if_flags & XFS_IFEXTENTS)); ASSERT(offset <= mp->m_super->s_maxbytes); if (offset + count > mp->m_super->s_maxbytes) count = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); offset_fsb = XFS_B_TO_FSBT(mp, offset); error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, imap, &nimaps, bmapi_flags); xfs_iunlock(ip, XFS_ILOCK_SHARED); if (error) return error; if (type == XFS_IO_DELALLOC && (!nimaps || isnullstartblock(imap->br_startblock))) { error = xfs_iomap_write_allocate(ip, offset, imap); if (!error) trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); return error; } #ifdef DEBUG if (type == XFS_IO_UNWRITTEN) { ASSERT(nimaps); ASSERT(imap->br_startblock != HOLESTARTBLOCK); ASSERT(imap->br_startblock != DELAYSTARTBLOCK); } #endif if (nimaps) trace_xfs_map_blocks_found(ip, offset, count, type, imap); return 0; }
/* * This routine is called to handle zeroing any space in the last * block of the file that is beyond the EOF. We do this since the * size is being increased without writing anything to that block * and we don't want anyone to read the garbage on the disk. */ STATIC int /* error (positive) */ xfs_zero_last_block( xfs_inode_t *ip, xfs_fsize_t offset, xfs_fsize_t isize) { xfs_fileoff_t last_fsb; xfs_mount_t *mp = ip->i_mount; int nimaps; int zero_offset; int zero_len; int error = 0; xfs_bmbt_irec_t imap; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); zero_offset = XFS_B_FSB_OFFSET(mp, isize); if (zero_offset == 0) { /* * There are no extra bytes in the last block on disk to * zero, so return. */ return 0; } last_fsb = XFS_B_TO_FSBT(mp, isize); nimaps = 1; error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap, &nimaps, NULL, NULL); if (error) { return error; } ASSERT(nimaps > 0); /* * If the block underlying isize is just a hole, then there * is nothing to zero. */ if (imap.br_startblock == HOLESTARTBLOCK) { return 0; } /* * Zero the part of the last block beyond the EOF, and write it * out sync. We need to drop the ilock while we do this so we * don't deadlock when the buffer cache calls back to us. */ xfs_iunlock(ip, XFS_ILOCK_EXCL); zero_len = mp->m_sb.sb_blocksize - zero_offset; if (isize + zero_len > offset) zero_len = offset - isize; error = xfs_iozero(ip, isize, zero_len); xfs_ilock(ip, XFS_ILOCK_EXCL); ASSERT(error >= 0); return error; }
/* * Allocation group level functions. */ static inline int xfs_ialloc_cluster_alignment( xfs_alloc_arg_t *args) { if (xfs_sb_version_hasalign(&args->mp->m_sb) && args->mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp))) return args->mp->m_sb.sb_inoalignmt; return 1; }
/* * Remap parts of a file's data fork after a successful CoW. */ int xfs_reflink_end_cow( struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count) { xfs_fileoff_t offset_fsb; xfs_fileoff_t end_fsb; int error = 0; trace_xfs_reflink_end_cow(ip, offset, count); offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count); /* * Walk backwards until we're out of the I/O range. The loop function * repeatedly cycles the ILOCK to allocate one transaction per remapped * extent. * * If we're being called by writeback then the the pages will still * have PageWriteback set, which prevents races with reflink remapping * and truncate. Reflink remapping prevents races with writeback by * taking the iolock and mmaplock before flushing the pages and * remapping, which means there won't be any further writeback or page * cache dirtying until the reflink completes. * * We should never have two threads issuing writeback for the same file * region. There are also have post-eof checks in the writeback * preparation code so that we don't bother writing out pages that are * about to be truncated. * * If we're being called as part of directio write completion, the dio * count is still elevated, which reflink and truncate will wait for. * Reflink remapping takes the iolock and mmaplock and waits for * pending dio to finish, which should prevent any directio until the * remap completes. Multiple concurrent directio writes to the same * region are handled by end_cow processing only occurring for the * threads which succeed; the outcome of multiple overlapping direct * writes is not well defined anyway. * * It's possible that a buffered write and a direct write could collide * here (the buffered write stumbles in after the dio flushes and * invalidates the page cache and immediately queues writeback), but we * have never supported this 100%. If either disk write succeeds the * blocks will be remapped. */ while (end_fsb > offset_fsb && !error) error = xfs_reflink_end_cow_extent(ip, offset_fsb, &end_fsb); if (error) trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_); return error; }
STATIC loff_t xfs_seek_hole( struct file *file, loff_t start, u32 type) { struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; loff_t uninitialized_var(offset); loff_t holeoff; xfs_fsize_t isize; xfs_fileoff_t fsbno; uint lock; int error; if (XFS_FORCED_SHUTDOWN(mp)) return -XFS_ERROR(EIO); lock = xfs_ilock_map_shared(ip); isize = i_size_read(inode); if (start >= isize) { error = ENXIO; goto out_unlock; } fsbno = XFS_B_TO_FSBT(mp, start); error = xfs_bmap_first_unused(NULL, ip, 1, &fsbno, XFS_DATA_FORK); if (error) goto out_unlock; holeoff = XFS_FSB_TO_B(mp, fsbno); if (holeoff <= start) offset = start; else { /* * xfs_bmap_first_unused() could return a value bigger than * isize if there are no more holes past the supplied offset. */ offset = min_t(loff_t, holeoff, isize); } if (offset != file->f_pos) file->f_pos = offset; out_unlock: xfs_iunlock_map_shared(ip, lock); if (error) return -error; return offset; }
/* * Cancel CoW reservations for some byte range of an inode. * * If cancel_real is true this function cancels all COW fork extents for the * inode; if cancel_real is false, real extents are not cleared. */ int xfs_reflink_cancel_cow_range( struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count, bool cancel_real) { struct xfs_trans *tp; xfs_fileoff_t offset_fsb; xfs_fileoff_t end_fsb; int error; trace_xfs_reflink_cancel_cow_range(ip, offset, count); ASSERT(xfs_is_reflink_inode(ip)); offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); if (count == NULLFILEOFF) end_fsb = NULLFILEOFF; else end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count); /* Start a rolling transaction to remove the mappings */ error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, 0, 0, 0, &tp); if (error) goto out; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); /* Scrape out the old CoW reservations */ error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb, cancel_real); if (error) goto out_cancel; error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; out_cancel: xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); out: trace_xfs_reflink_cancel_cow_range_error(ip, error, _RET_IP_); return error; }
/* * Pre-COW all shared blocks within a given byte range of a file and turn off * the reflink flag if we unshare all of the file's blocks. */ int xfs_reflink_unshare( struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len) { struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t fbno; xfs_filblks_t end; xfs_off_t isize; int error; if (!xfs_is_reflink_inode(ip)) return 0; trace_xfs_reflink_unshare(ip, offset, len); inode_dio_wait(VFS_I(ip)); /* Try to CoW the selected ranges */ xfs_ilock(ip, XFS_ILOCK_EXCL); fbno = XFS_B_TO_FSBT(mp, offset); isize = i_size_read(VFS_I(ip)); end = XFS_B_TO_FSB(mp, offset + len); error = xfs_reflink_dirty_extents(ip, fbno, end, isize); if (error) goto out_unlock; xfs_iunlock(ip, XFS_ILOCK_EXCL); /* Wait for the IO to finish */ error = filemap_write_and_wait(VFS_I(ip)->i_mapping); if (error) goto out; /* Turn off the reflink flag if possible. */ error = xfs_reflink_try_clear_inode_flag(ip); if (error) goto out; return 0; out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); out: trace_xfs_reflink_unshare_error(ip, error, _RET_IP_); return error; }
/* * If the caller is doing a write at the end of the file, * then extend the allocation out to the file system's write * iosize. We clean up any extra space left over when the * file is closed in xfs_inactive(). * * For sync writes, we are flushing delayed allocate space to * try to make additional space available for allocation near * the filesystem full boundary - preallocation hurts in that * situation, of course. */ STATIC int xfs_iomap_eof_want_preallocate( xfs_mount_t *mp, xfs_iocore_t *io, xfs_fsize_t isize, xfs_off_t offset, size_t count, int ioflag, xfs_bmbt_irec_t *imap, int nimaps, int *prealloc) { xfs_fileoff_t start_fsb; xfs_filblks_t count_fsb; xfs_fsblock_t firstblock; int n, error, imaps; *prealloc = 0; if ((ioflag & BMAPI_SYNC) || (offset + count) <= isize) return 0; /* * If there are any real blocks past eof, then don't * do any speculative allocation. */ start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1))); count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); while (count_fsb > 0) { imaps = nimaps; firstblock = NULLFSBLOCK; error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb, 0, &firstblock, 0, imap, &imaps, NULL, NULL); if (error) return error; for (n = 0; n < imaps; n++) { if ((imap[n].br_startblock != HOLESTARTBLOCK) && (imap[n].br_startblock != DELAYSTARTBLOCK)) return 0; start_fsb += imap[n].br_blockcount; count_fsb -= imap[n].br_blockcount; } } *prealloc = 1; return 0; }
/* Convert all of the unwritten CoW extents in a file's range to real ones. */ int xfs_reflink_convert_cow( struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count) { struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); xfs_filblks_t count_fsb = end_fsb - offset_fsb; int error; ASSERT(count != 0); xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_reflink_convert_cow_locked(ip, offset_fsb, count_fsb); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; }
STATIC int xfs_zero_last_block( xfs_inode_t *ip, xfs_fsize_t offset, xfs_fsize_t isize) { xfs_fileoff_t last_fsb; xfs_mount_t *mp = ip->i_mount; int nimaps; int zero_offset; int zero_len; int error = 0; xfs_bmbt_irec_t imap; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); zero_offset = XFS_B_FSB_OFFSET(mp, isize); if (zero_offset == 0) { return 0; } last_fsb = XFS_B_TO_FSBT(mp, isize); nimaps = 1; error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0); if (error) return error; ASSERT(nimaps > 0); if (imap.br_startblock == HOLESTARTBLOCK) { return 0; } xfs_iunlock(ip, XFS_ILOCK_EXCL); zero_len = mp->m_sb.sb_blocksize - zero_offset; if (isize + zero_len > offset) zero_len = offset - isize; error = xfs_iozero(ip, isize, zero_len); xfs_ilock(ip, XFS_ILOCK_EXCL); ASSERT(error >= 0); return error; }
/* * Add a block to the directory. * * This routine is for data and free blocks, not leaf/node blocks which are * handled by xfs_da_grow_inode. */ int xfs_dir2_grow_inode( struct xfs_da_args *args, int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */ xfs_dir2_db_t *dbp) /* out: block number added */ { struct xfs_inode *dp = args->dp; struct xfs_mount *mp = dp->i_mount; xfs_fileoff_t bno; /* directory offset of new block */ int count; /* count of filesystem blocks */ int error; trace_xfs_dir2_grow_inode(args, space); /* * Set lowest possible block in the space requested. */ bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE); count = args->geo->fsbcount; error = xfs_da_grow_inode_int(args, &bno, count); if (error) return error; *dbp = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)bno); /* * Update file's size if this is the data space and it grew. */ if (space == XFS_DIR2_DATA_SPACE) { xfs_fsize_t size; /* directory file (data) size */ size = XFS_FSB_TO_B(mp, bno + count); if (size > dp->i_d.di_size) { dp->i_d.di_size = size; xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE); } } return 0; }
/* * Ensure the size update falls into a valid allocated block. */ static int xfs_pnfs_validate_isize( struct xfs_inode *ip, xfs_off_t isize) { struct xfs_bmbt_irec imap; int nimaps = 1; int error = 0; xfs_ilock(ip, XFS_ILOCK_SHARED); error = xfs_bmapi_read(ip, XFS_B_TO_FSBT(ip->i_mount, isize - 1), 1, &imap, &nimaps, 0); xfs_iunlock(ip, XFS_ILOCK_SHARED); if (error) return error; if (imap.br_startblock == HOLESTARTBLOCK || imap.br_startblock == DELAYSTARTBLOCK || imap.br_state == XFS_EXT_UNWRITTEN) return -EIO; return 0; }
int xfs_free_file_space( struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len) { int committed; int done; xfs_fileoff_t endoffset_fsb; int error; xfs_fsblock_t firstfsb; xfs_bmap_free_t free_list; xfs_bmbt_irec_t imap; xfs_off_t ioffset; xfs_extlen_t mod=0; xfs_mount_t *mp; int nimap; uint resblks; xfs_off_t rounding; int rt; xfs_fileoff_t startoffset_fsb; xfs_trans_t *tp; mp = ip->i_mount; trace_xfs_free_file_space(ip); error = xfs_qm_dqattach(ip, 0); if (error) return error; error = 0; if (len <= 0) /* if nothing being freed */ return error; rt = XFS_IS_REALTIME_INODE(ip); startoffset_fsb = XFS_B_TO_FSB(mp, offset); endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); /* wait for the completion of any pending DIOs */ inode_dio_wait(VFS_I(ip)); rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); ioffset = offset & ~(rounding - 1); error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset, -1); if (error) goto out; truncate_pagecache_range(VFS_I(ip), ioffset, -1); /* * Need to zero the stuff we're not freeing, on disk. * If it's a realtime file & can't use unwritten extents then we * actually need to zero the extent edges. Otherwise xfs_bunmapi * will take care of it for us. */ if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) { nimap = 1; error = xfs_bmapi_read(ip, startoffset_fsb, 1, &imap, &nimap, 0); if (error) goto out; ASSERT(nimap == 0 || nimap == 1); if (nimap && imap.br_startblock != HOLESTARTBLOCK) { xfs_daddr_t block; ASSERT(imap.br_startblock != DELAYSTARTBLOCK); block = imap.br_startblock; mod = do_div(block, mp->m_sb.sb_rextsize); if (mod) startoffset_fsb += mp->m_sb.sb_rextsize - mod; } nimap = 1; error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1, &imap, &nimap, 0); if (error) goto out; ASSERT(nimap == 0 || nimap == 1); if (nimap && imap.br_startblock != HOLESTARTBLOCK) { ASSERT(imap.br_startblock != DELAYSTARTBLOCK); mod++; if (mod && (mod != mp->m_sb.sb_rextsize)) endoffset_fsb -= mod; } } if ((done = (endoffset_fsb <= startoffset_fsb))) /* * One contiguous piece to clear */ error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1); else { /* * Some full blocks, possibly two pieces to clear */ if (offset < XFS_FSB_TO_B(mp, startoffset_fsb)) error = xfs_zero_remaining_bytes(ip, offset, XFS_FSB_TO_B(mp, startoffset_fsb) - 1); if (!error && XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len) error = xfs_zero_remaining_bytes(ip, XFS_FSB_TO_B(mp, endoffset_fsb), offset + len - 1); } /* * free file space until done or until there is an error */ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); while (!error && !done) { /* * allocate and setup the transaction. Allow this * transaction to dip into the reserve blocks to ensure * the freeing of the space succeeds at ENOSPC. */ tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); tp->t_flags |= XFS_TRANS_RESERVE; error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0); /* * check for running out of space */ if (error) { /* * Free the transaction structure. */ ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); xfs_trans_cancel(tp, 0); break; } xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, ip->i_gdquot, ip->i_pdquot, resblks, 0, XFS_QMOPT_RES_REGBLKS); if (error) goto error1; xfs_trans_ijoin(tp, ip, 0); /* * issue the bunmapi() call to free the blocks */ xfs_bmap_init(&free_list, &firstfsb); error = xfs_bunmapi(tp, ip, startoffset_fsb, endoffset_fsb - startoffset_fsb, 0, 2, &firstfsb, &free_list, &done); if (error) { goto error0; } /* * complete the transaction */ error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { goto error0; } error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(ip, XFS_ILOCK_EXCL); } out: return error; error0: xfs_bmap_cancel(&free_list); error1: xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); xfs_iunlock(ip, XFS_ILOCK_EXCL); goto out; }
int xfs_alloc_file_space( struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len, int alloc_type) { xfs_mount_t *mp = ip->i_mount; xfs_off_t count; xfs_filblks_t allocated_fsb; xfs_filblks_t allocatesize_fsb; xfs_extlen_t extsz, temp; xfs_fileoff_t startoffset_fsb; xfs_fsblock_t firstfsb; int nimaps; int quota_flag; int rt; xfs_trans_t *tp; xfs_bmbt_irec_t imaps[1], *imapp; xfs_bmap_free_t free_list; uint qblocks, resblks, resrtextents; int committed; int error; trace_xfs_alloc_file_space(ip); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); error = xfs_qm_dqattach(ip, 0); if (error) return error; if (len <= 0) return XFS_ERROR(EINVAL); rt = XFS_IS_REALTIME_INODE(ip); extsz = xfs_get_extsz_hint(ip); count = len; imapp = &imaps[0]; nimaps = 1; startoffset_fsb = XFS_B_TO_FSBT(mp, offset); allocatesize_fsb = XFS_B_TO_FSB(mp, count); /* * Allocate file space until done or until there is an error */ while (allocatesize_fsb && !error) { xfs_fileoff_t s, e; /* * Determine space reservations for data/realtime. */ if (unlikely(extsz)) { s = startoffset_fsb; do_div(s, extsz); s *= extsz; e = startoffset_fsb + allocatesize_fsb; if ((temp = do_mod(startoffset_fsb, extsz))) e += temp; if ((temp = do_mod(e, extsz))) e += extsz - temp; } else { s = 0; e = allocatesize_fsb; } /* * The transaction reservation is limited to a 32-bit block * count, hence we need to limit the number of blocks we are * trying to reserve to avoid an overflow. We can't allocate * more than @nimaps extents, and an extent is limited on disk * to MAXEXTLEN (21 bits), so use that to enforce the limit. */ resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps)); if (unlikely(rt)) { resrtextents = qblocks = resblks; resrtextents /= mp->m_sb.sb_rextsize; resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); quota_flag = XFS_QMOPT_RES_RTBLKS; } else { resrtextents = 0; resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks); quota_flag = XFS_QMOPT_RES_REGBLKS; } /* * Allocate and setup the transaction. */ tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, resrtextents); /* * Check for running out of space */ if (error) { /* * Free the transaction structure. */ ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); xfs_trans_cancel(tp, 0); break; } xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); if (error) goto error1; xfs_trans_ijoin(tp, ip, 0); xfs_bmap_init(&free_list, &firstfsb); error = xfs_bmapi_write(tp, ip, startoffset_fsb, allocatesize_fsb, alloc_type, &firstfsb, 0, imapp, &nimaps, &free_list); if (error) { goto error0; } /* * Complete the transaction */ error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { goto error0; } error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) { break; } allocated_fsb = imapp->br_blockcount; if (nimaps == 0) { error = XFS_ERROR(ENOSPC); break; } startoffset_fsb += allocated_fsb; allocatesize_fsb -= allocated_fsb; } return error; error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ xfs_bmap_cancel(&free_list); xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); error1: /* Just cancel transaction */ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; }
int /* error (positive) */ xfs_zero_eof( vnode_t *vp, xfs_iocore_t *io, xfs_off_t offset, /* starting I/O offset */ xfs_fsize_t isize, /* current inode size */ xfs_fsize_t end_size) /* terminal inode size */ { struct inode *ip = LINVFS_GET_IP(vp); xfs_fileoff_t start_zero_fsb; xfs_fileoff_t end_zero_fsb; xfs_fileoff_t prev_zero_fsb; xfs_fileoff_t zero_count_fsb; xfs_fileoff_t last_fsb; xfs_extlen_t buf_len_fsb; xfs_extlen_t prev_zero_count; xfs_mount_t *mp; int nimaps; int error = 0; xfs_bmbt_irec_t imap; loff_t loff; size_t lsize; ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); mp = io->io_mount; /* * First handle zeroing the block on which isize resides. * We only zero a part of that block so it is handled specially. */ error = xfs_zero_last_block(ip, io, offset, isize, end_size); if (error) { ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); return error; } /* * Calculate the range between the new size and the old * where blocks needing to be zeroed may exist. To get the * block where the last byte in the file currently resides, * we need to subtract one from the size and truncate back * to a block boundary. We subtract 1 in case the size is * exactly on a block boundary. */ last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1); ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb); if (last_fsb == end_zero_fsb) { /* * The size was only incremented on its last block. * We took care of that above, so just return. */ return 0; } ASSERT(start_zero_fsb <= end_zero_fsb); prev_zero_fsb = NULLFILEOFF; prev_zero_count = 0; while (start_zero_fsb <= end_zero_fsb) { nimaps = 1; zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; error = XFS_BMAPI(mp, NULL, io, start_zero_fsb, zero_count_fsb, 0, NULL, 0, &imap, &nimaps, NULL); if (error) { ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); return error; } ASSERT(nimaps > 0); if (imap.br_state == XFS_EXT_UNWRITTEN || imap.br_startblock == HOLESTARTBLOCK) { /* * This loop handles initializing pages that were * partially initialized by the code below this * loop. It basically zeroes the part of the page * that sits on a hole and sets the page as P_HOLE * and calls remapf if it is a mapped file. */ prev_zero_fsb = NULLFILEOFF; prev_zero_count = 0; start_zero_fsb = imap.br_startoff + imap.br_blockcount; ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); continue; } /* * There are blocks in the range requested. * Zero them a single write at a time. We actually * don't zero the entire range returned if it is * too big and simply loop around to get the rest. * That is not the most efficient thing to do, but it * is simple and this path should not be exercised often. */ buf_len_fsb = XFS_FILBLKS_MIN(imap.br_blockcount, mp->m_writeio_blocks << 8); /* * Drop the inode lock while we're doing the I/O. * We'll still have the iolock to protect us. */ XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); loff = XFS_FSB_TO_B(mp, start_zero_fsb); lsize = XFS_FSB_TO_B(mp, buf_len_fsb); error = xfs_iozero(ip, loff, lsize, end_size); if (error) { goto out_lock; } prev_zero_fsb = start_zero_fsb; prev_zero_count = buf_len_fsb; start_zero_fsb = imap.br_startoff + buf_len_fsb; ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); } return 0; out_lock: XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); ASSERT(error >= 0); return error; }
/* * Zero file bytes between startoff and endoff inclusive. * The iolock is held exclusive and no blocks are buffered. * * This function is used by xfs_free_file_space() to zero * partial blocks when the range to free is not block aligned. * When unreserving space with boundaries that are not block * aligned we round up the start and round down the end * boundaries and then use this function to zero the parts of * the blocks that got dropped during the rounding. */ STATIC int xfs_zero_remaining_bytes( xfs_inode_t *ip, xfs_off_t startoff, xfs_off_t endoff) { xfs_bmbt_irec_t imap; xfs_fileoff_t offset_fsb; xfs_off_t lastoffset; xfs_off_t offset; xfs_buf_t *bp; xfs_mount_t *mp = ip->i_mount; int nimap; int error = 0; /* * Avoid doing I/O beyond eof - it's not necessary * since nothing can read beyond eof. The space will * be zeroed when the file is extended anyway. */ if (startoff >= XFS_ISIZE(ip)) return 0; if (endoff > XFS_ISIZE(ip)) endoff = XFS_ISIZE(ip); bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp, BTOBB(mp->m_sb.sb_blocksize), 0); if (!bp) return XFS_ERROR(ENOMEM); xfs_buf_unlock(bp); for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { uint lock_mode; offset_fsb = XFS_B_TO_FSBT(mp, offset); nimap = 1; lock_mode = xfs_ilock_data_map_shared(ip); error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0); xfs_iunlock(ip, lock_mode); if (error || nimap < 1) break; ASSERT(imap.br_blockcount >= 1); ASSERT(imap.br_startoff == offset_fsb); lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1; if (lastoffset > endoff) lastoffset = endoff; if (imap.br_startblock == HOLESTARTBLOCK) continue; ASSERT(imap.br_startblock != DELAYSTARTBLOCK); if (imap.br_state == XFS_EXT_UNWRITTEN) continue; XFS_BUF_UNDONE(bp); XFS_BUF_UNWRITE(bp); XFS_BUF_READ(bp); XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); if (XFS_FORCED_SHUTDOWN(mp)) { error = XFS_ERROR(EIO); break; } xfs_buf_iorequest(bp); error = xfs_buf_iowait(bp); if (error) { xfs_buf_ioerror_alert(bp, "xfs_zero_remaining_bytes(read)"); break; } memset(bp->b_addr + (offset - XFS_FSB_TO_B(mp, imap.br_startoff)), 0, lastoffset - offset + 1); XFS_BUF_UNDONE(bp); XFS_BUF_UNREAD(bp); XFS_BUF_WRITE(bp); if (XFS_FORCED_SHUTDOWN(mp)) { error = XFS_ERROR(EIO); break; } xfs_buf_iorequest(bp); error = xfs_buf_iowait(bp); if (error) { xfs_buf_ioerror_alert(bp, "xfs_zero_remaining_bytes(write)"); break; } } xfs_buf_free(bp); return error; }
int xfs_ioctl( bhv_desc_t *bdp, struct inode *inode, struct file *filp, int ioflags, unsigned int cmd, unsigned long arg) { int error; vnode_t *vp; xfs_inode_t *ip; xfs_mount_t *mp; vp = LINVFS_GET_VP(inode); vn_trace_entry(vp, "xfs_ioctl", (inst_t *)__return_address); ip = XFS_BHVTOI(bdp); mp = ip->i_mount; switch (cmd) { case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: case XFS_IOC_RESVSP: case XFS_IOC_UNRESVSP: case XFS_IOC_ALLOCSP64: case XFS_IOC_FREESP64: case XFS_IOC_RESVSP64: case XFS_IOC_UNRESVSP64: /* * Only allow the sys admin to reserve space unless * unwritten extents are enabled. */ if (!XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb) && !capable(CAP_SYS_ADMIN)) return -EPERM; return xfs_ioc_space(bdp, vp, filp, ioflags, cmd, arg); case XFS_IOC_DIOINFO: { struct dioattr da; da.d_miniosz = mp->m_sb.sb_blocksize; da.d_mem = mp->m_sb.sb_blocksize; /* * this only really needs to be BBSIZE. * it is set to the file system block size to * avoid having to do block zeroing on short writes. */ da.d_maxiosz = XFS_FSB_TO_B(mp, XFS_B_TO_FSBT(mp, KIO_MAX_ATOMIC_IO << 10)); if (copy_to_user((struct dioattr *)arg, &da, sizeof(da))) return -XFS_ERROR(EFAULT); return 0; } case XFS_IOC_FSBULKSTAT_SINGLE: case XFS_IOC_FSBULKSTAT: case XFS_IOC_FSINUMBERS: return xfs_ioc_bulkstat(mp, cmd, arg); case XFS_IOC_FSGEOMETRY_V1: return xfs_ioc_fsgeometry_v1(mp, arg); case XFS_IOC_FSGEOMETRY: return xfs_ioc_fsgeometry(mp, arg); case XFS_IOC_GETVERSION: case XFS_IOC_GETXFLAGS: case XFS_IOC_SETXFLAGS: case XFS_IOC_FSGETXATTR: case XFS_IOC_FSSETXATTR: case XFS_IOC_FSGETXATTRA: return xfs_ioc_xattr(vp, ip, filp, cmd, arg); case XFS_IOC_FSSETDM: { struct fsdmidata dmi; if (copy_from_user(&dmi, (struct fsdmidata *)arg, sizeof(dmi))) return -XFS_ERROR(EFAULT); error = xfs_set_dmattrs(bdp, dmi.fsd_dmevmask, dmi.fsd_dmstate, NULL); return -error; } case XFS_IOC_GETBMAP: case XFS_IOC_GETBMAPA: return xfs_ioc_getbmap(bdp, filp, ioflags, cmd, arg); case XFS_IOC_GETBMAPX: return xfs_ioc_getbmapx(bdp, arg); case XFS_IOC_FD_TO_HANDLE: case XFS_IOC_PATH_TO_HANDLE: case XFS_IOC_PATH_TO_FSHANDLE: return xfs_find_handle(cmd, arg); case XFS_IOC_OPEN_BY_HANDLE: return xfs_open_by_handle(mp, arg, filp, inode); case XFS_IOC_FSSETDM_BY_HANDLE: return xfs_fssetdm_by_handle(mp, arg, filp, inode); case XFS_IOC_READLINK_BY_HANDLE: return xfs_readlink_by_handle(mp, arg, filp, inode); case XFS_IOC_ATTRLIST_BY_HANDLE: return xfs_attrlist_by_handle(mp, arg, filp, inode); case XFS_IOC_ATTRMULTI_BY_HANDLE: return xfs_attrmulti_by_handle(mp, arg, filp, inode); case XFS_IOC_SWAPEXT: { error = xfs_swapext((struct xfs_swapext *)arg); return -error; } case XFS_IOC_FSCOUNTS: { xfs_fsop_counts_t out; error = xfs_fs_counts(mp, &out); if (error) return -error; if (copy_to_user((char *)arg, &out, sizeof(out))) return -XFS_ERROR(EFAULT); return 0; } case XFS_IOC_SET_RESBLKS: { xfs_fsop_resblks_t inout; __uint64_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user(&inout, (char *)arg, sizeof(inout))) return -XFS_ERROR(EFAULT); /* input parameter is passed in resblks field of structure */ in = inout.resblks; error = xfs_reserve_blocks(mp, &in, &inout); if (error) return -error; if (copy_to_user((char *)arg, &inout, sizeof(inout))) return -XFS_ERROR(EFAULT); return 0; } case XFS_IOC_GET_RESBLKS: { xfs_fsop_resblks_t out; if (!capable(CAP_SYS_ADMIN)) return -EPERM; error = xfs_reserve_blocks(mp, NULL, &out); if (error) return -error; if (copy_to_user((char *)arg, &out, sizeof(out))) return -XFS_ERROR(EFAULT); return 0; } case XFS_IOC_FSGROWFSDATA: { xfs_growfs_data_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user(&in, (char *)arg, sizeof(in))) return -XFS_ERROR(EFAULT); error = xfs_growfs_data(mp, &in); return -error; } case XFS_IOC_FSGROWFSLOG: { xfs_growfs_log_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user(&in, (char *)arg, sizeof(in))) return -XFS_ERROR(EFAULT); error = xfs_growfs_log(mp, &in); return -error; } case XFS_IOC_FSGROWFSRT: { xfs_growfs_rt_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user(&in, (char *)arg, sizeof(in))) return -XFS_ERROR(EFAULT); error = xfs_growfs_rt(mp, &in); return -error; } case XFS_IOC_FREEZE: if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (vp->v_vfsp->vfs_frozen == SB_UNFROZEN) { freeze_bdev(mp->m_ddev_targp->pbr_bdev); if (mp->m_rtdev_targp) freeze_bdev(mp->m_rtdev_targp->pbr_bdev); } return 0; case XFS_IOC_THAW: if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (vp->v_vfsp->vfs_frozen != SB_UNFROZEN) { thaw_bdev(mp->m_ddev_targp->pbr_bdev, inode->i_sb); if (mp->m_rtdev_targp) thaw_bdev(mp->m_ddev_targp->pbr_bdev, NULL); } return 0; case XFS_IOC_GOINGDOWN: { __uint32_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (get_user(in, (__uint32_t *)arg)) return -XFS_ERROR(EFAULT); error = xfs_fs_goingdown(mp, in); return -error; } case XFS_IOC_ERROR_INJECTION: { xfs_error_injection_t in; if (!capable(CAP_SYS_ADMIN)) return EPERM; if (copy_from_user(&in, (char *)arg, sizeof(in))) return -XFS_ERROR(EFAULT); error = xfs_errortag_add(in.errtag, mp); return -error; } case XFS_IOC_ERROR_CLEARALL: if (!capable(CAP_SYS_ADMIN)) return -EPERM; error = xfs_errortag_clearall(mp); return -error; default: return -ENOTTY; } }
/* * Add a block to the directory. * This routine is for data and free blocks, not leaf/node blocks * which are handled by xfs_da_grow_inode. */ int xfs_dir2_grow_inode( xfs_da_args_t *args, int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */ xfs_dir2_db_t *dbp) /* out: block number added */ { xfs_fileoff_t bno; /* directory offset of new block */ int count; /* count of filesystem blocks */ xfs_inode_t *dp; /* incore directory inode */ int error; int got; /* blocks actually mapped */ int i; xfs_bmbt_irec_t map; /* single structure for bmap */ int mapi; /* mapping index */ xfs_bmbt_irec_t *mapp; /* bmap mapping structure(s) */ xfs_mount_t *mp; int nmap; /* number of bmap entries */ xfs_trans_t *tp; xfs_drfsbno_t nblks; trace_xfs_dir2_grow_inode(args, space); dp = args->dp; tp = args->trans; mp = dp->i_mount; nblks = dp->i_d.di_nblocks; /* * Set lowest possible block in the space requested. */ bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE); count = mp->m_dirblkfsbs; /* * Find the first hole for our block. */ if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, XFS_DATA_FORK))) return error; nmap = 1; ASSERT(args->firstblock != NULL); /* * Try mapping the new block contiguously (one extent). */ if ((error = xfs_bmapi(tp, dp, bno, count, XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, args->firstblock, args->total, &map, &nmap, args->flist))) return error; ASSERT(nmap <= 1); if (nmap == 1) { mapp = ↦ mapi = 1; } /* * Didn't work and this is a multiple-fsb directory block. * Try again with contiguous flag turned on. */ else if (nmap == 0 && count > 1) { xfs_fileoff_t b; /* current file offset */ /* * Space for maximum number of mappings. */ mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP); /* * Iterate until we get to the end of our block. */ for (b = bno, mapi = 0; b < bno + count; ) { int c; /* current fsb count */ /* * Can't map more than MAX_NMAP at once. */ nmap = MIN(XFS_BMAP_MAX_NMAP, count); c = (int)(bno + count - b); if ((error = xfs_bmapi(tp, dp, b, c, XFS_BMAPI_WRITE|XFS_BMAPI_METADATA, args->firstblock, args->total, &mapp[mapi], &nmap, args->flist))) { kmem_free(mapp); return error; } if (nmap < 1) break; /* * Add this bunch into our table, go to the next offset. */ mapi += nmap; b = mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount; } } /* * Didn't work. */ else { mapi = 0; mapp = NULL; } /* * See how many fsb's we got. */ for (i = 0, got = 0; i < mapi; i++) got += mapp[i].br_blockcount; /* * Didn't get enough fsb's, or the first/last block's are wrong. */ if (got != count || mapp[0].br_startoff != bno || mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != bno + count) { if (mapp != &map) kmem_free(mapp); return XFS_ERROR(ENOSPC); } /* * Done with the temporary mapping table. */ if (mapp != &map) kmem_free(mapp); /* account for newly allocated blocks in reserved blocks total */ args->total -= dp->i_d.di_nblocks - nblks; *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno); /* * Update file's size if this is the data space and it grew. */ if (space == XFS_DIR2_DATA_SPACE) { xfs_fsize_t size; /* directory file (data) size */ size = XFS_FSB_TO_B(mp, bno + count); if (size > dp->i_d.di_size) { dp->i_d.di_size = size; xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); } } return 0; }
STATIC loff_t xfs_seek_data( struct file *file, loff_t start, u32 type) { struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; struct xfs_bmbt_irec map[2]; int nmap = 2; loff_t uninitialized_var(offset); xfs_fsize_t isize; xfs_fileoff_t fsbno; xfs_filblks_t end; uint lock; int error; lock = xfs_ilock_map_shared(ip); isize = i_size_read(inode); if (start >= isize) { error = ENXIO; goto out_unlock; } fsbno = XFS_B_TO_FSBT(mp, start); /* * Try to read extents from the first block indicated * by fsbno to the end block of the file. */ end = XFS_B_TO_FSB(mp, isize); error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap, XFS_BMAPI_ENTIRE); if (error) goto out_unlock; /* * Treat unwritten extent as data extent since it might * contains dirty data in page cache. */ if (map[0].br_startblock != HOLESTARTBLOCK) { offset = max_t(loff_t, start, XFS_FSB_TO_B(mp, map[0].br_startoff)); } else { if (nmap == 1) { error = ENXIO; goto out_unlock; } offset = max_t(loff_t, start, XFS_FSB_TO_B(mp, map[1].br_startoff)); } if (offset != file->f_pos) file->f_pos = offset; out_unlock: xfs_iunlock_map_shared(ip, lock); if (error) return -error; return offset; }
/* * Zero any on disk space between the current EOF and the new, larger EOF. * * This handles the normal case of zeroing the remainder of the last block in * the file and the unusual case of zeroing blocks out beyond the size of the * file. This second case only happens with fixed size extents and when the * system crashes before the inode size was updated but after blocks were * allocated. * * Expects the iolock to be held exclusive, and will take the ilock internally. */ int /* error (positive) */ xfs_zero_eof( struct xfs_inode *ip, xfs_off_t offset, /* starting I/O offset */ xfs_fsize_t isize) /* current inode size */ { struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t start_zero_fsb; xfs_fileoff_t end_zero_fsb; xfs_fileoff_t zero_count_fsb; xfs_fileoff_t last_fsb; xfs_fileoff_t zero_off; xfs_fsize_t zero_len; int nimaps; int error = 0; struct xfs_bmbt_irec imap; ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); ASSERT(offset > isize); /* * First handle zeroing the block on which isize resides. * * We only zero a part of that block so it is handled specially. */ if (XFS_B_FSB_OFFSET(mp, isize) != 0) { error = xfs_zero_last_block(ip, offset, isize); if (error) return error; } /* * Calculate the range between the new size and the old where blocks * needing to be zeroed may exist. * * To get the block where the last byte in the file currently resides, * we need to subtract one from the size and truncate back to a block * boundary. We subtract 1 in case the size is exactly on a block * boundary. */ last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1); ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb); if (last_fsb == end_zero_fsb) { /* * The size was only incremented on its last block. * We took care of that above, so just return. */ return 0; } ASSERT(start_zero_fsb <= end_zero_fsb); while (start_zero_fsb <= end_zero_fsb) { nimaps = 1; zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb, &imap, &nimaps, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) return error; ASSERT(nimaps > 0); if (imap.br_state == XFS_EXT_UNWRITTEN || imap.br_startblock == HOLESTARTBLOCK) { start_zero_fsb = imap.br_startoff + imap.br_blockcount; ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); continue; } /* * There are blocks we need to zero. */ zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); if ((zero_off + zero_len) > offset) zero_len = offset - zero_off; error = xfs_iozero(ip, zero_off, zero_len); if (error) return error; start_zero_fsb = imap.br_startoff + imap.br_blockcount; ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); } return 0; }
/* * Link a range of blocks from one file to another. */ int xfs_reflink_remap_range( struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe) { struct inode *inode_in = file_inode(file_in); struct xfs_inode *src = XFS_I(inode_in); struct inode *inode_out = file_inode(file_out); struct xfs_inode *dest = XFS_I(inode_out); struct xfs_mount *mp = src->i_mount; bool same_inode = (inode_in == inode_out); xfs_fileoff_t sfsbno, dfsbno; xfs_filblks_t fsblen; xfs_extlen_t cowextsize; ssize_t ret; if (!xfs_sb_version_hasreflink(&mp->m_sb)) return -EOPNOTSUPP; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; /* Lock both files against IO */ lock_two_nondirectories(inode_in, inode_out); if (same_inode) xfs_ilock(src, XFS_MMAPLOCK_EXCL); else xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); /* Check file eligibility and prepare for block sharing. */ ret = -EINVAL; /* Don't reflink realtime inodes */ if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest)) goto out_unlock; /* Don't share DAX file data for now. */ if (IS_DAX(inode_in) || IS_DAX(inode_out)) goto out_unlock; ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, &len, is_dedupe); if (ret <= 0) goto out_unlock; trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); /* Set flags and remap blocks. */ ret = xfs_reflink_set_inode_flag(src, dest); if (ret) goto out_unlock; dfsbno = XFS_B_TO_FSBT(mp, pos_out); sfsbno = XFS_B_TO_FSBT(mp, pos_in); fsblen = XFS_B_TO_FSB(mp, len); ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen, pos_out + len); if (ret) goto out_unlock; /* Zap any page cache for the destination file's range. */ truncate_inode_pages_range(&inode_out->i_data, pos_out, PAGE_ALIGN(pos_out + len) - 1); /* * Carry the cowextsize hint from src to dest if we're sharing the * entire source file to the entire destination file, the source file * has a cowextsize hint, and the destination file does not. */ cowextsize = 0; if (pos_in == 0 && len == i_size_read(inode_in) && (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) && pos_out == 0 && len >= i_size_read(inode_out) && !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) cowextsize = src->i_d.di_cowextsize; ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, is_dedupe); out_unlock: xfs_iunlock(src, XFS_MMAPLOCK_EXCL); if (!same_inode) xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); unlock_two_nondirectories(inode_in, inode_out); if (ret) trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); return ret; }
/* * Remap parts of a file's data fork after a successful CoW. */ int xfs_reflink_end_cow( struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count) { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); struct xfs_bmbt_irec got, del; struct xfs_trans *tp; xfs_fileoff_t offset_fsb; xfs_fileoff_t end_fsb; xfs_fsblock_t firstfsb; struct xfs_defer_ops dfops; int error; unsigned int resblks; xfs_filblks_t rlen; xfs_extnum_t idx; trace_xfs_reflink_end_cow(ip, offset, count); /* No COW extents? That's easy! */ if (ifp->if_bytes == 0) return 0; offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count); /* * Start a rolling transaction to switch the mappings. We're * unlikely ever to have to remap 16T worth of single-block * extents, so just cap the worst case extent count to 2^32-1. * Stick a warning in just in case, and avoid 64-bit division. */ BUILD_BUG_ON(MAX_RW_COUNT > UINT_MAX); if (end_fsb - offset_fsb > UINT_MAX) { error = -EFSCORRUPTED; xfs_force_shutdown(ip->i_mount, SHUTDOWN_CORRUPT_INCORE); ASSERT(0); goto out; } resblks = XFS_NEXTENTADD_SPACE_RES(ip->i_mount, (unsigned int)(end_fsb - offset_fsb), XFS_DATA_FORK); error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, resblks, 0, 0, &tp); if (error) goto out; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); /* If there is a hole at end_fsb - 1 go to the previous extent */ if (!xfs_iext_lookup_extent(ip, ifp, end_fsb - 1, &idx, &got) || got.br_startoff > end_fsb) { /* * In case of racing, overlapping AIO writes no COW extents * might be left by the time I/O completes for the loser of * the race. In that case we are done. */ if (idx <= 0) goto out_cancel; xfs_iext_get_extent(ifp, --idx, &got); } /* Walk backwards until we're out of the I/O range... */ while (got.br_startoff + got.br_blockcount > offset_fsb) { del = got; xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb); /* Extent delete may have bumped idx forward */ if (!del.br_blockcount) { idx--; goto next_extent; } ASSERT(!isnullstartblock(got.br_startblock)); /* * Don't remap unwritten extents; these are * speculatively preallocated CoW extents that have been * allocated but have not yet been involved in a write. */ if (got.br_state == XFS_EXT_UNWRITTEN) { idx--; goto next_extent; } /* Unmap the old blocks in the data fork. */ xfs_defer_init(&dfops, &firstfsb); rlen = del.br_blockcount; error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1, &firstfsb, &dfops); if (error) goto out_defer; /* Trim the extent to whatever got unmapped. */ if (rlen) { xfs_trim_extent(&del, del.br_startoff + rlen, del.br_blockcount - rlen); } trace_xfs_reflink_cow_remap(ip, &del); /* Free the CoW orphan record. */ error = xfs_refcount_free_cow_extent(tp->t_mountp, &dfops, del.br_startblock, del.br_blockcount); if (error) goto out_defer; /* Map the new blocks into the data fork. */ error = xfs_bmap_map_extent(tp->t_mountp, &dfops, ip, &del); if (error) goto out_defer; /* Remove the mapping from the CoW fork. */ xfs_bmap_del_extent_cow(ip, &idx, &got, &del); xfs_defer_ijoin(&dfops, ip); error = xfs_defer_finish(&tp, &dfops); if (error) goto out_defer; next_extent: if (!xfs_iext_get_extent(ifp, idx, &got)) break; } error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) goto out; return 0; out_defer: xfs_defer_cancel(&dfops); out_cancel: xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); out: trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_); return error; }
int /* error (positive) */ xfs_zero_eof( xfs_inode_t *ip, xfs_off_t offset, /* starting I/O offset */ xfs_fsize_t isize) /* current inode size */ { xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t start_zero_fsb; xfs_fileoff_t end_zero_fsb; xfs_fileoff_t zero_count_fsb; xfs_fileoff_t last_fsb; xfs_fileoff_t zero_off; xfs_fsize_t zero_len; int nimaps; int error = 0; xfs_bmbt_irec_t imap; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); ASSERT(offset > isize); /* * First handle zeroing the block on which isize resides. * We only zero a part of that block so it is handled specially. */ error = xfs_zero_last_block(ip, offset, isize); if (error) { ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); return error; } /* * Calculate the range between the new size and the old * where blocks needing to be zeroed may exist. To get the * block where the last byte in the file currently resides, * we need to subtract one from the size and truncate back * to a block boundary. We subtract 1 in case the size is * exactly on a block boundary. */ last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1); ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb); if (last_fsb == end_zero_fsb) { /* * The size was only incremented on its last block. * We took care of that above, so just return. */ return 0; } ASSERT(start_zero_fsb <= end_zero_fsb); while (start_zero_fsb <= end_zero_fsb) { nimaps = 1; zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb, 0, NULL, 0, &imap, &nimaps, NULL, NULL); if (error) { ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); return error; } ASSERT(nimaps > 0); if (imap.br_state == XFS_EXT_UNWRITTEN || imap.br_startblock == HOLESTARTBLOCK) { /* * This loop handles initializing pages that were * partially initialized by the code below this * loop. It basically zeroes the part of the page * that sits on a hole and sets the page as P_HOLE * and calls remapf if it is a mapped file. */ start_zero_fsb = imap.br_startoff + imap.br_blockcount; ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); continue; } /* * There are blocks we need to zero. * Drop the inode lock while we're doing the I/O. * We'll still have the iolock to protect us. */ xfs_iunlock(ip, XFS_ILOCK_EXCL); zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); if ((zero_off + zero_len) > offset) zero_len = offset - zero_off; error = xfs_iozero(ip, zero_off, zero_len); if (error) { goto out_lock; } start_zero_fsb = imap.br_startoff + imap.br_blockcount; ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); xfs_ilock(ip, XFS_ILOCK_EXCL); } return 0; out_lock: xfs_ilock(ip, XFS_ILOCK_EXCL); ASSERT(error >= 0); return error; }