/* * EOPNOTSUPP is no longer legal. For local media VFS's that do not * implement their own VOP_PUTPAGES, their VOP_PUTPAGES should call to * vnode_pager_generic_putpages() to implement the previous behaviour. * * Caller has already cleared the pmap modified bits, if any. * * All other FS's should use the bypass to get to the local media * backing vp's VOP_PUTPAGES. */ static void vnode_pager_putpages(vm_object_t object, vm_page_t *m, int count, int sync, int *rtvals) { int rtval; struct vnode *vp; int bytes = count * PAGE_SIZE; /* * Force synchronous operation if we are extremely low on memory * to prevent a low-memory deadlock. VOP operations often need to * allocate more memory to initiate the I/O ( i.e. do a BMAP * operation ). The swapper handles the case by limiting the amount * of asynchronous I/O, but that sort of solution doesn't scale well * for the vnode pager without a lot of work. * * Also, the backing vnode's iodone routine may not wake the pageout * daemon up. This should be probably be addressed XXX. */ if ((vmstats.v_free_count + vmstats.v_cache_count) < vmstats.v_pageout_free_min) { sync |= OBJPC_SYNC; } /* * Call device-specific putpages function */ vp = object->handle; rtval = VOP_PUTPAGES(vp, m, bytes, sync, rtvals, 0); if (rtval == EOPNOTSUPP) { kprintf("vnode_pager: *** WARNING *** stale FS putpages\n"); rtval = vnode_pager_generic_putpages( vp, m, bytes, sync, rtvals); } }
/* * EOPNOTSUPP is no longer legal. For local media VFS's that do not * implement their own VOP_PUTPAGES, their VOP_PUTPAGES should call to * vnode_pager_generic_putpages() to implement the previous behaviour. * * All other FS's should use the bypass to get to the local media * backing vp's VOP_PUTPAGES. */ static void vnode_pager_putpages(vm_object_t object, vm_page_t *m, int count, int flags, int *rtvals) { int rtval; struct vnode *vp; int bytes = count * PAGE_SIZE; /* * Force synchronous operation if we are extremely low on memory * to prevent a low-memory deadlock. VOP operations often need to * allocate more memory to initiate the I/O ( i.e. do a BMAP * operation ). The swapper handles the case by limiting the amount * of asynchronous I/O, but that sort of solution doesn't scale well * for the vnode pager without a lot of work. * * Also, the backing vnode's iodone routine may not wake the pageout * daemon up. This should be probably be addressed XXX. */ if (vm_cnt.v_free_count < vm_cnt.v_pageout_free_min) flags |= VM_PAGER_PUT_SYNC; /* * Call device-specific putpages function */ vp = object->handle; VM_OBJECT_WUNLOCK(object); rtval = VOP_PUTPAGES(vp, m, bytes, flags, rtvals); KASSERT(rtval != EOPNOTSUPP, ("vnode_pager: stale FS putpages\n")); VM_OBJECT_WLOCK(object); }
static int vn_put(struct uvm_object *uobj, voff_t offlo, voff_t offhi, int flags) { struct vnode *vp = (struct vnode *)uobj; return VOP_PUTPAGES(vp, offlo, offhi, flags); }
int RUMP_VOP_PUTPAGES(struct vnode *vp, off_t offlo, off_t offhi, int flags) { int error; rump_schedule(); error = VOP_PUTPAGES(vp, offlo, offhi, flags); rump_unschedule(); return error; }
/* * Handes the read/write request given in 'bp' using the vnode's VOP_READ * and VOP_WRITE operations. * * 'obp' is a pointer to the original request fed to the vnd device. */ static void handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp) { bool doread; off_t offset; size_t len, resid; struct vnode *vp; doread = bp->b_flags & B_READ; offset = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; len = bp->b_bcount; vp = vnd->sc_vp; #if defined(DEBUG) if (vnddebug & VDB_IO) printf("vnd (rdwr): vp %p, %s, rawblkno 0x%" PRIx64 ", secsize %d, offset %" PRIu64 ", bcount %d\n", vp, doread ? "read" : "write", obp->b_rawblkno, vnd->sc_dkdev.dk_label->d_secsize, offset, bp->b_bcount); #endif /* Issue the read or write operation. */ bp->b_error = vn_rdwr(doread ? UIO_READ : UIO_WRITE, vp, bp->b_data, len, offset, UIO_SYSSPACE, IO_ADV_ENCODE(POSIX_FADV_NOREUSE), vnd->sc_cred, &resid, NULL); bp->b_resid = resid; mutex_enter(vp->v_interlock); (void) VOP_PUTPAGES(vp, 0, 0, PGO_ALLPAGES | PGO_CLEANIT | PGO_FREE | PGO_SYNCIO); /* We need to increase the number of outputs on the vnode if * there was any write to it. */ if (!doread) { mutex_enter(vp->v_interlock); vp->v_numoutput++; mutex_exit(vp->v_interlock); } biodone(bp); }
static int unionfs_putpages(void *v) { struct vop_putpages_args /* { struct vnode *a_vp; voff_t a_offlo; voff_t a_offhi; int a_flags; } */ *ap = v; struct vnode *vp = ap->a_vp, *tvp; struct unionfs_node *unp; KASSERT(mutex_owned(vp->v_interlock)); unp = VTOUNIONFS(vp); tvp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp); KASSERT(tvp->v_interlock == vp->v_interlock); if (ap->a_flags & PGO_RECLAIM) { mutex_exit(vp->v_interlock); return 0; } return VOP_PUTPAGES(tvp, ap->a_offlo, ap->a_offhi, ap->a_flags); }
/* * Read/write clusters from/to backing store. * For persistent snapshots must be called with cl == 0. off is the * offset into the snapshot. */ static int fss_bs_io(struct fss_softc *sc, fss_io_type rw, u_int32_t cl, off_t off, int len, void *data) { int error; off += FSS_CLTOB(sc, cl); vn_lock(sc->sc_bs_vp, LK_EXCLUSIVE|LK_RETRY); error = vn_rdwr((rw == FSS_READ ? UIO_READ : UIO_WRITE), sc->sc_bs_vp, data, len, off, UIO_SYSSPACE, IO_ADV_ENCODE(POSIX_FADV_NOREUSE) | IO_NODELOCKED, sc->sc_bs_lwp->l_cred, NULL, NULL); if (error == 0) { mutex_enter(sc->sc_bs_vp->v_interlock); error = VOP_PUTPAGES(sc->sc_bs_vp, trunc_page(off), round_page(off+len), PGO_CLEANIT | PGO_FREE | PGO_SYNCIO); } VOP_UNLOCK(sc->sc_bs_vp); return error; }
/* * Truncate the inode oip to at most length size, freeing the * disk blocks. */ int ffs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred) { daddr_t lastblock; struct inode *oip = VTOI(ovp); daddr_t bn, lastiblock[UFS_NIADDR], indir_lbn[UFS_NIADDR]; daddr_t blks[UFS_NDADDR + UFS_NIADDR]; struct fs *fs; int offset, pgoffset, level; int64_t count, blocksreleased = 0; int i, aflag, nblocks; int error, allerror = 0; off_t osize; int sync; struct ufsmount *ump = oip->i_ump; if (ovp->v_type == VCHR || ovp->v_type == VBLK || ovp->v_type == VFIFO || ovp->v_type == VSOCK) { KASSERT(oip->i_size == 0); return 0; } if (length < 0) return (EINVAL); if (ovp->v_type == VLNK && (oip->i_size < ump->um_maxsymlinklen || (ump->um_maxsymlinklen == 0 && DIP(oip, blocks) == 0))) { KDASSERT(length == 0); memset(SHORTLINK(oip), 0, (size_t)oip->i_size); oip->i_size = 0; DIP_ASSIGN(oip, size, 0); oip->i_flag |= IN_CHANGE | IN_UPDATE; return (ffs_update(ovp, NULL, NULL, 0)); } if (oip->i_size == length) { /* still do a uvm_vnp_setsize() as writesize may be larger */ uvm_vnp_setsize(ovp, length); oip->i_flag |= IN_CHANGE | IN_UPDATE; return (ffs_update(ovp, NULL, NULL, 0)); } fs = oip->i_fs; if (length > ump->um_maxfilesize) return (EFBIG); if ((oip->i_flags & SF_SNAPSHOT) != 0) ffs_snapremove(ovp); osize = oip->i_size; aflag = ioflag & IO_SYNC ? B_SYNC : 0; /* * Lengthen the size of the file. We must ensure that the * last byte of the file is allocated. Since the smallest * value of osize is 0, length will be at least 1. */ if (osize < length) { if (ffs_lblkno(fs, osize) < UFS_NDADDR && ffs_lblkno(fs, osize) != ffs_lblkno(fs, length) && ffs_blkroundup(fs, osize) != osize) { off_t eob; eob = ffs_blkroundup(fs, osize); uvm_vnp_setwritesize(ovp, eob); error = ufs_balloc_range(ovp, osize, eob - osize, cred, aflag); if (error) { (void) ffs_truncate(ovp, osize, ioflag & IO_SYNC, cred); return error; } if (ioflag & IO_SYNC) { mutex_enter(ovp->v_interlock); VOP_PUTPAGES(ovp, trunc_page(osize & fs->fs_bmask), round_page(eob), PGO_CLEANIT | PGO_SYNCIO | PGO_JOURNALLOCKED); } } uvm_vnp_setwritesize(ovp, length); error = ufs_balloc_range(ovp, length - 1, 1, cred, aflag); if (error) { (void) ffs_truncate(ovp, osize, ioflag & IO_SYNC, cred); return (error); } uvm_vnp_setsize(ovp, length); oip->i_flag |= IN_CHANGE | IN_UPDATE; KASSERT(ovp->v_size == oip->i_size); return (ffs_update(ovp, NULL, NULL, 0)); } /* * When truncating a regular file down to a non-block-aligned size, * we must zero the part of last block which is past the new EOF. * We must synchronously flush the zeroed pages to disk * since the new pages will be invalidated as soon as we * inform the VM system of the new, smaller size. * We must do this before acquiring the GLOCK, since fetching * the pages will acquire the GLOCK internally. * So there is a window where another thread could see a whole * zeroed page past EOF, but that's life. */ offset = ffs_blkoff(fs, length); pgoffset = length & PAGE_MASK; if (ovp->v_type == VREG && (pgoffset != 0 || offset != 0) && osize > length) { daddr_t lbn; voff_t eoz; int size; if (offset != 0) { error = ufs_balloc_range(ovp, length - 1, 1, cred, aflag); if (error) return error; } lbn = ffs_lblkno(fs, length); size = ffs_blksize(fs, oip, lbn); eoz = MIN(MAX(ffs_lblktosize(fs, lbn) + size, round_page(pgoffset)), osize); ubc_zerorange(&ovp->v_uobj, length, eoz - length, UBC_UNMAP_FLAG(ovp)); if (round_page(eoz) > round_page(length)) { mutex_enter(ovp->v_interlock); error = VOP_PUTPAGES(ovp, round_page(length), round_page(eoz), PGO_CLEANIT | PGO_DEACTIVATE | PGO_JOURNALLOCKED | ((ioflag & IO_SYNC) ? PGO_SYNCIO : 0)); if (error) return error; } } genfs_node_wrlock(ovp); oip->i_size = length; DIP_ASSIGN(oip, size, length); uvm_vnp_setsize(ovp, length); /* * Calculate index into inode's block list of * last direct and indirect blocks (if any) * which we want to keep. Lastblock is -1 when * the file is truncated to 0. */ lastblock = ffs_lblkno(fs, length + fs->fs_bsize - 1) - 1; lastiblock[SINGLE] = lastblock - UFS_NDADDR; lastiblock[DOUBLE] = lastiblock[SINGLE] - FFS_NINDIR(fs); lastiblock[TRIPLE] = lastiblock[DOUBLE] - FFS_NINDIR(fs) * FFS_NINDIR(fs); nblocks = btodb(fs->fs_bsize); /* * Update file and block pointers on disk before we start freeing * blocks. If we crash before free'ing blocks below, the blocks * will be returned to the free list. lastiblock values are also * normalized to -1 for calls to ffs_indirtrunc below. */ sync = 0; for (level = TRIPLE; level >= SINGLE; level--) { blks[UFS_NDADDR + level] = DIP(oip, ib[level]); if (lastiblock[level] < 0 && blks[UFS_NDADDR + level] != 0) { sync = 1; DIP_ASSIGN(oip, ib[level], 0); lastiblock[level] = -1; } } for (i = 0; i < UFS_NDADDR; i++) { blks[i] = DIP(oip, db[i]); if (i > lastblock && blks[i] != 0) { sync = 1; DIP_ASSIGN(oip, db[i], 0); } } oip->i_flag |= IN_CHANGE | IN_UPDATE; if (sync) { error = ffs_update(ovp, NULL, NULL, UPDATE_WAIT); if (error && !allerror) allerror = error; } /* * Having written the new inode to disk, save its new configuration * and put back the old block pointers long enough to process them. * Note that we save the new block configuration so we can check it * when we are done. */ for (i = 0; i < UFS_NDADDR; i++) { bn = DIP(oip, db[i]); DIP_ASSIGN(oip, db[i], blks[i]); blks[i] = bn; } for (i = 0; i < UFS_NIADDR; i++) { bn = DIP(oip, ib[i]); DIP_ASSIGN(oip, ib[i], blks[UFS_NDADDR + i]); blks[UFS_NDADDR + i] = bn; } oip->i_size = osize; DIP_ASSIGN(oip, size, osize); error = vtruncbuf(ovp, lastblock + 1, 0, 0); if (error && !allerror) allerror = error; /* * Indirect blocks first. */ indir_lbn[SINGLE] = -UFS_NDADDR; indir_lbn[DOUBLE] = indir_lbn[SINGLE] - FFS_NINDIR(fs) - 1; indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - FFS_NINDIR(fs) * FFS_NINDIR(fs) - 1; for (level = TRIPLE; level >= SINGLE; level--) { if (oip->i_ump->um_fstype == UFS1) bn = ufs_rw32(oip->i_ffs1_ib[level],UFS_FSNEEDSWAP(fs)); else bn = ufs_rw64(oip->i_ffs2_ib[level],UFS_FSNEEDSWAP(fs)); if (bn != 0) { error = ffs_indirtrunc(oip, indir_lbn[level], FFS_FSBTODB(fs, bn), lastiblock[level], level, &count); if (error) allerror = error; blocksreleased += count; if (lastiblock[level] < 0) { DIP_ASSIGN(oip, ib[level], 0); if (oip->i_ump->um_mountp->mnt_wapbl) { UFS_WAPBL_REGISTER_DEALLOCATION( oip->i_ump->um_mountp, FFS_FSBTODB(fs, bn), fs->fs_bsize); } else ffs_blkfree(fs, oip->i_devvp, bn, fs->fs_bsize, oip->i_number); blocksreleased += nblocks; } } if (lastiblock[level] >= 0) goto done; } /* * All whole direct blocks or frags. */ for (i = UFS_NDADDR - 1; i > lastblock; i--) { long bsize; if (oip->i_ump->um_fstype == UFS1) bn = ufs_rw32(oip->i_ffs1_db[i], UFS_FSNEEDSWAP(fs)); else bn = ufs_rw64(oip->i_ffs2_db[i], UFS_FSNEEDSWAP(fs)); if (bn == 0) continue; DIP_ASSIGN(oip, db[i], 0); bsize = ffs_blksize(fs, oip, i); if ((oip->i_ump->um_mountp->mnt_wapbl) && (ovp->v_type != VREG)) { UFS_WAPBL_REGISTER_DEALLOCATION(oip->i_ump->um_mountp, FFS_FSBTODB(fs, bn), bsize); } else ffs_blkfree(fs, oip->i_devvp, bn, bsize, oip->i_number); blocksreleased += btodb(bsize); } if (lastblock < 0) goto done; /* * Finally, look for a change in size of the * last direct block; release any frags. */ if (oip->i_ump->um_fstype == UFS1) bn = ufs_rw32(oip->i_ffs1_db[lastblock], UFS_FSNEEDSWAP(fs)); else bn = ufs_rw64(oip->i_ffs2_db[lastblock], UFS_FSNEEDSWAP(fs)); if (bn != 0) { long oldspace, newspace; /* * Calculate amount of space we're giving * back as old block size minus new block size. */ oldspace = ffs_blksize(fs, oip, lastblock); oip->i_size = length; DIP_ASSIGN(oip, size, length); newspace = ffs_blksize(fs, oip, lastblock); if (newspace == 0) panic("itrunc: newspace"); if (oldspace - newspace > 0) { /* * Block number of space to be free'd is * the old block # plus the number of frags * required for the storage we're keeping. */ bn += ffs_numfrags(fs, newspace); if ((oip->i_ump->um_mountp->mnt_wapbl) && (ovp->v_type != VREG)) { UFS_WAPBL_REGISTER_DEALLOCATION( oip->i_ump->um_mountp, FFS_FSBTODB(fs, bn), oldspace - newspace); } else ffs_blkfree(fs, oip->i_devvp, bn, oldspace - newspace, oip->i_number); blocksreleased += btodb(oldspace - newspace); } } done: #ifdef DIAGNOSTIC for (level = SINGLE; level <= TRIPLE; level++) if (blks[UFS_NDADDR + level] != DIP(oip, ib[level])) panic("itrunc1"); for (i = 0; i < UFS_NDADDR; i++) if (blks[i] != DIP(oip, db[i])) panic("itrunc2"); if (length == 0 && (!LIST_EMPTY(&ovp->v_cleanblkhd) || !LIST_EMPTY(&ovp->v_dirtyblkhd))) panic("itrunc3"); #endif /* DIAGNOSTIC */ /* * Put back the real size. */ oip->i_size = length; DIP_ASSIGN(oip, size, length); DIP_ADD(oip, blocks, -blocksreleased); genfs_node_unlock(ovp); oip->i_flag |= IN_CHANGE; UFS_WAPBL_UPDATE(ovp, NULL, NULL, 0); #if defined(QUOTA) || defined(QUOTA2) (void) chkdq(oip, -blocksreleased, NOCRED, 0); #endif KASSERT(ovp->v_type != VREG || ovp->v_size == oip->i_size); return (allerror); }
int do_posix_fadvise(int fd, off_t offset, off_t len, int advice) { file_t *fp; vnode_t *vp; off_t endoffset; int error; CTASSERT(POSIX_FADV_NORMAL == UVM_ADV_NORMAL); CTASSERT(POSIX_FADV_RANDOM == UVM_ADV_RANDOM); CTASSERT(POSIX_FADV_SEQUENTIAL == UVM_ADV_SEQUENTIAL); if (len == 0) { endoffset = INT64_MAX; } else if (len > 0 && (INT64_MAX - offset) >= len) { endoffset = offset + len; } else { return EINVAL; } if ((fp = fd_getfile(fd)) == NULL) { return EBADF; } if (fp->f_type != DTYPE_VNODE) { if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { error = ESPIPE; } else { error = EOPNOTSUPP; } fd_putfile(fd); return error; } switch (advice) { case POSIX_FADV_WILLNEED: case POSIX_FADV_DONTNEED: vp = fp->f_vnode; if (vp->v_type != VREG && vp->v_type != VBLK) { fd_putfile(fd); return 0; } break; } switch (advice) { case POSIX_FADV_NORMAL: case POSIX_FADV_RANDOM: case POSIX_FADV_SEQUENTIAL: /* * We ignore offset and size. Must lock the file to * do this, as f_advice is sub-word sized. */ mutex_enter(&fp->f_lock); fp->f_advice = (u_char)advice; mutex_exit(&fp->f_lock); error = 0; break; case POSIX_FADV_WILLNEED: vp = fp->f_vnode; error = uvm_readahead(&vp->v_uobj, offset, endoffset - offset); break; case POSIX_FADV_DONTNEED: vp = fp->f_vnode; /* * Align the region to page boundaries as VOP_PUTPAGES expects * by shrinking it. We shrink instead of expand because we * do not want to deactivate cache outside of the requested * region. It means that if the specified region is smaller * than PAGE_SIZE, we do nothing. */ if (round_page(offset) < trunc_page(endoffset) && offset <= round_page(offset)) { mutex_enter(vp->v_interlock); error = VOP_PUTPAGES(vp, round_page(offset), trunc_page(endoffset), PGO_DEACTIVATE | PGO_CLEANIT); } else { error = 0; } break; case POSIX_FADV_NOREUSE: /* Not implemented yet. */ error = 0; break; default: error = EINVAL; break; } fd_putfile(fd); return error; }
/* ARGSUSED */ int ffs_full_fsync(struct vnode *vp, int flags) { int error, i, uflags; struct mount *mp; KASSERT(vp->v_tag == VT_UFS); KASSERT(VTOI(vp) != NULL); KASSERT(vp->v_type != VCHR && vp->v_type != VBLK); error = 0; uflags = UPDATE_CLOSE | ((flags & FSYNC_WAIT) ? UPDATE_WAIT : 0); mp = vp->v_mount; /* * Flush all dirty data associated with the vnode. */ if (vp->v_type == VREG) { int pflags = PGO_ALLPAGES | PGO_CLEANIT; if ((flags & FSYNC_WAIT)) pflags |= PGO_SYNCIO; if (fstrans_getstate(mp) == FSTRANS_SUSPENDING) pflags |= PGO_FREE; mutex_enter(vp->v_interlock); error = VOP_PUTPAGES(vp, 0, 0, pflags); if (error) return error; } #ifdef WAPBL if (mp && mp->mnt_wapbl) { /* * Don't bother writing out metadata if the syncer is * making the request. We will let the sync vnode * write it out in a single burst through a call to * VFS_SYNC(). */ if ((flags & (FSYNC_DATAONLY | FSYNC_LAZY)) != 0) return 0; if ((VTOI(vp)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY | IN_MODIFIED | IN_ACCESSED)) != 0) { error = UFS_WAPBL_BEGIN(mp); if (error) return error; error = ffs_update(vp, NULL, NULL, uflags); UFS_WAPBL_END(mp); } if (error || (flags & FSYNC_NOLOG) != 0) return error; /* * Don't flush the log if the vnode being flushed * contains no dirty buffers that could be in the log. */ if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { error = wapbl_flush(mp->mnt_wapbl, 0); if (error) return error; } if ((flags & FSYNC_WAIT) != 0) { mutex_enter(vp->v_interlock); while (vp->v_numoutput != 0) cv_wait(&vp->v_cv, vp->v_interlock); mutex_exit(vp->v_interlock); } return error; } #endif /* WAPBL */ error = vflushbuf(vp, (flags & FSYNC_WAIT) != 0); if (error == 0) error = ffs_update(vp, NULL, NULL, uflags); if (error == 0 && (flags & FSYNC_CACHE) != 0) { i = 1; (void)VOP_IOCTL(VTOI(vp)->i_devvp, DIOCCACHESYNC, &i, FWRITE, kauth_cred_get()); } return error; }
int ffs_fsync(void *v) { struct vop_fsync_args /* { struct vnode *a_vp; kauth_cred_t a_cred; int a_flags; off_t a_offlo; off_t a_offhi; struct lwp *a_l; } */ *ap = v; struct buf *bp; int num, error, i; struct indir ia[NIADDR + 1]; int bsize; daddr_t blk_high; struct vnode *vp; struct mount *mp; vp = ap->a_vp; mp = vp->v_mount; fstrans_start(mp, FSTRANS_LAZY); if ((ap->a_offlo == 0 && ap->a_offhi == 0) || (vp->v_type != VREG)) { error = ffs_full_fsync(vp, ap->a_flags); goto out; } bsize = mp->mnt_stat.f_iosize; blk_high = ap->a_offhi / bsize; if (ap->a_offhi % bsize != 0) blk_high++; /* * First, flush all pages in range. */ mutex_enter(vp->v_interlock); error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo), round_page(ap->a_offhi), PGO_CLEANIT | ((ap->a_flags & FSYNC_WAIT) ? PGO_SYNCIO : 0)); if (error) { goto out; } #ifdef WAPBL KASSERT(vp->v_type == VREG); if (mp->mnt_wapbl) { /* * Don't bother writing out metadata if the syncer is * making the request. We will let the sync vnode * write it out in a single burst through a call to * VFS_SYNC(). */ if ((ap->a_flags & (FSYNC_DATAONLY | FSYNC_LAZY)) != 0) { fstrans_done(mp); return 0; } error = 0; if (vp->v_tag == VT_UFS && VTOI(vp)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY | IN_MODIFIED | IN_ACCESSED)) { error = UFS_WAPBL_BEGIN(mp); if (error) { fstrans_done(mp); return error; } error = ffs_update(vp, NULL, NULL, UPDATE_CLOSE | ((ap->a_flags & FSYNC_WAIT) ? UPDATE_WAIT : 0)); UFS_WAPBL_END(mp); } if (error || (ap->a_flags & FSYNC_NOLOG) != 0) { fstrans_done(mp); return error; } error = wapbl_flush(mp->mnt_wapbl, 0); fstrans_done(mp); return error; } #endif /* WAPBL */ /* * Then, flush indirect blocks. */ if (blk_high >= NDADDR) { error = ufs_getlbns(vp, blk_high, ia, &num); if (error) goto out; mutex_enter(&bufcache_lock); for (i = 0; i < num; i++) { if ((bp = incore(vp, ia[i].in_lbn)) == NULL) continue; if ((bp->b_cflags & BC_BUSY) != 0 || (bp->b_oflags & BO_DELWRI) == 0) continue; bp->b_cflags |= BC_BUSY | BC_VFLUSH; mutex_exit(&bufcache_lock); bawrite(bp); mutex_enter(&bufcache_lock); } mutex_exit(&bufcache_lock); } if (ap->a_flags & FSYNC_WAIT) { mutex_enter(vp->v_interlock); while (vp->v_numoutput > 0) cv_wait(&vp->v_cv, vp->v_interlock); mutex_exit(vp->v_interlock); } error = ffs_update(vp, NULL, NULL, UPDATE_CLOSE | (((ap->a_flags & (FSYNC_WAIT | FSYNC_DATAONLY)) == FSYNC_WAIT) ? UPDATE_WAIT : 0)); if (error == 0 && ap->a_flags & FSYNC_CACHE) { int l = 0; VOP_IOCTL(VTOI(vp)->i_devvp, DIOCCACHESYNC, &l, FWRITE, curlwp->l_cred); } out: fstrans_done(mp); return error; }
int lfs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred) { daddr_t lastblock; struct inode *oip = VTOI(ovp); daddr_t bn, lbn, lastiblock[ULFS_NIADDR], indir_lbn[ULFS_NIADDR]; /* XXX ondisk32 */ int32_t newblks[ULFS_NDADDR + ULFS_NIADDR]; struct lfs *fs; struct buf *bp; int offset, size, level; daddr_t count, rcount; daddr_t blocksreleased = 0, real_released = 0; int i, nblocks; int aflags, error, allerror = 0; off_t osize; long lastseg; size_t bc; int obufsize, odb; int usepc; if (ovp->v_type == VCHR || ovp->v_type == VBLK || ovp->v_type == VFIFO || ovp->v_type == VSOCK) { KASSERT(oip->i_size == 0); return 0; } if (length < 0) return (EINVAL); /* * Just return and not update modification times. */ if (oip->i_size == length) { /* still do a uvm_vnp_setsize() as writesize may be larger */ uvm_vnp_setsize(ovp, length); return (0); } fs = oip->i_lfs; if (ovp->v_type == VLNK && (oip->i_size < fs->um_maxsymlinklen || (fs->um_maxsymlinklen == 0 && oip->i_ffs1_blocks == 0))) { #ifdef DIAGNOSTIC if (length != 0) panic("lfs_truncate: partial truncate of symlink"); #endif memset((char *)SHORTLINK(oip), 0, (u_int)oip->i_size); oip->i_size = oip->i_ffs1_size = 0; oip->i_flag |= IN_CHANGE | IN_UPDATE; return (lfs_update(ovp, NULL, NULL, 0)); } if (oip->i_size == length) { oip->i_flag |= IN_CHANGE | IN_UPDATE; return (lfs_update(ovp, NULL, NULL, 0)); } lfs_imtime(fs); osize = oip->i_size; usepc = (ovp->v_type == VREG && ovp != fs->lfs_ivnode); ASSERT_NO_SEGLOCK(fs); /* * Lengthen the size of the file. We must ensure that the * last byte of the file is allocated. Since the smallest * value of osize is 0, length will be at least 1. */ if (osize < length) { if (length > fs->um_maxfilesize) return (EFBIG); aflags = B_CLRBUF; if (ioflag & IO_SYNC) aflags |= B_SYNC; if (usepc) { if (lfs_lblkno(fs, osize) < ULFS_NDADDR && lfs_lblkno(fs, osize) != lfs_lblkno(fs, length) && lfs_blkroundup(fs, osize) != osize) { off_t eob; eob = lfs_blkroundup(fs, osize); uvm_vnp_setwritesize(ovp, eob); error = ulfs_balloc_range(ovp, osize, eob - osize, cred, aflags); if (error) { (void) lfs_truncate(ovp, osize, ioflag & IO_SYNC, cred); return error; } if (ioflag & IO_SYNC) { mutex_enter(ovp->v_interlock); VOP_PUTPAGES(ovp, trunc_page(osize & lfs_sb_getbmask(fs)), round_page(eob), PGO_CLEANIT | PGO_SYNCIO); } } uvm_vnp_setwritesize(ovp, length); error = ulfs_balloc_range(ovp, length - 1, 1, cred, aflags); if (error) { (void) lfs_truncate(ovp, osize, ioflag & IO_SYNC, cred); return error; } uvm_vnp_setsize(ovp, length); oip->i_flag |= IN_CHANGE | IN_UPDATE; KASSERT(ovp->v_size == oip->i_size); oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1; return (lfs_update(ovp, NULL, NULL, 0)); } else { error = lfs_reserve(fs, ovp, NULL, lfs_btofsb(fs, (ULFS_NIADDR + 2) << lfs_sb_getbshift(fs))); if (error) return (error); error = lfs_balloc(ovp, length - 1, 1, cred, aflags, &bp); lfs_reserve(fs, ovp, NULL, -lfs_btofsb(fs, (ULFS_NIADDR + 2) << lfs_sb_getbshift(fs))); if (error) return (error); oip->i_ffs1_size = oip->i_size = length; uvm_vnp_setsize(ovp, length); (void) VOP_BWRITE(bp->b_vp, bp); oip->i_flag |= IN_CHANGE | IN_UPDATE; oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1; return (lfs_update(ovp, NULL, NULL, 0)); } } if ((error = lfs_reserve(fs, ovp, NULL, lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs)))) != 0) return (error); /* * Shorten the size of the file. If the file is not being * truncated to a block boundary, the contents of the * partial block following the end of the file must be * zero'ed in case it ever becomes accessible again because * of subsequent file growth. Directories however are not * zero'ed as they should grow back initialized to empty. */ offset = lfs_blkoff(fs, length); lastseg = -1; bc = 0; if (ovp != fs->lfs_ivnode) lfs_seglock(fs, SEGM_PROT); if (offset == 0) { oip->i_size = oip->i_ffs1_size = length; } else if (!usepc) { lbn = lfs_lblkno(fs, length); aflags = B_CLRBUF; if (ioflag & IO_SYNC) aflags |= B_SYNC; error = lfs_balloc(ovp, length - 1, 1, cred, aflags, &bp); if (error) { lfs_reserve(fs, ovp, NULL, -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs))); goto errout; } obufsize = bp->b_bufsize; odb = lfs_btofsb(fs, bp->b_bcount); oip->i_size = oip->i_ffs1_size = length; size = lfs_blksize(fs, oip, lbn); if (ovp->v_type != VDIR) memset((char *)bp->b_data + offset, 0, (u_int)(size - offset)); allocbuf(bp, size, 1); if ((bp->b_flags & B_LOCKED) != 0 && bp->b_iodone == NULL) { mutex_enter(&lfs_lock); locked_queue_bytes -= obufsize - bp->b_bufsize; mutex_exit(&lfs_lock); } if (bp->b_oflags & BO_DELWRI) { lfs_sb_addavail(fs, odb - lfs_btofsb(fs, size)); /* XXX shouldn't this wake up on lfs_availsleep? */ } (void) VOP_BWRITE(bp->b_vp, bp); } else { /* vp->v_type == VREG && length < osize && offset != 0 */ /* * When truncating a regular file down to a non-block-aligned * size, we must zero the part of last block which is past * the new EOF. We must synchronously flush the zeroed pages * to disk since the new pages will be invalidated as soon * as we inform the VM system of the new, smaller size. * We must do this before acquiring the GLOCK, since fetching * the pages will acquire the GLOCK internally. * So there is a window where another thread could see a whole * zeroed page past EOF, but that's life. */ daddr_t xlbn; voff_t eoz; aflags = ioflag & IO_SYNC ? B_SYNC : 0; error = ulfs_balloc_range(ovp, length - 1, 1, cred, aflags); if (error) { lfs_reserve(fs, ovp, NULL, -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs))); goto errout; } xlbn = lfs_lblkno(fs, length); size = lfs_blksize(fs, oip, xlbn); eoz = MIN(lfs_lblktosize(fs, xlbn) + size, osize); ubc_zerorange(&ovp->v_uobj, length, eoz - length, UBC_UNMAP_FLAG(ovp)); if (round_page(eoz) > round_page(length)) { mutex_enter(ovp->v_interlock); error = VOP_PUTPAGES(ovp, round_page(length), round_page(eoz), PGO_CLEANIT | PGO_DEACTIVATE | ((ioflag & IO_SYNC) ? PGO_SYNCIO : 0)); if (error) { lfs_reserve(fs, ovp, NULL, -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs))); goto errout; } } } genfs_node_wrlock(ovp); oip->i_size = oip->i_ffs1_size = length; uvm_vnp_setsize(ovp, length); /* * Calculate index into inode's block list of * last direct and indirect blocks (if any) * which we want to keep. Lastblock is -1 when * the file is truncated to 0. */ /* Avoid sign overflow - XXX assumes that off_t is a quad_t. */ if (length > QUAD_MAX - lfs_sb_getbsize(fs)) lastblock = lfs_lblkno(fs, QUAD_MAX - lfs_sb_getbsize(fs)); else lastblock = lfs_lblkno(fs, length + lfs_sb_getbsize(fs) - 1) - 1; lastiblock[SINGLE] = lastblock - ULFS_NDADDR; lastiblock[DOUBLE] = lastiblock[SINGLE] - LFS_NINDIR(fs); lastiblock[TRIPLE] = lastiblock[DOUBLE] - LFS_NINDIR(fs) * LFS_NINDIR(fs); nblocks = lfs_btofsb(fs, lfs_sb_getbsize(fs)); /* * Record changed file and block pointers before we start * freeing blocks. lastiblock values are also normalized to -1 * for calls to lfs_indirtrunc below. */ memcpy((void *)newblks, (void *)&oip->i_ffs1_db[0], sizeof newblks); for (level = TRIPLE; level >= SINGLE; level--) if (lastiblock[level] < 0) { newblks[ULFS_NDADDR+level] = 0; lastiblock[level] = -1; } for (i = ULFS_NDADDR - 1; i > lastblock; i--) newblks[i] = 0; oip->i_size = oip->i_ffs1_size = osize; error = lfs_vtruncbuf(ovp, lastblock + 1, false, 0); if (error && !allerror) allerror = error; /* * Indirect blocks first. */ indir_lbn[SINGLE] = -ULFS_NDADDR; indir_lbn[DOUBLE] = indir_lbn[SINGLE] - LFS_NINDIR(fs) - 1; indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - LFS_NINDIR(fs) * LFS_NINDIR(fs) - 1; for (level = TRIPLE; level >= SINGLE; level--) { bn = oip->i_ffs1_ib[level]; if (bn != 0) { error = lfs_indirtrunc(oip, indir_lbn[level], bn, lastiblock[level], level, &count, &rcount, &lastseg, &bc); if (error) allerror = error; real_released += rcount; blocksreleased += count; if (lastiblock[level] < 0) { if (oip->i_ffs1_ib[level] > 0) real_released += nblocks; blocksreleased += nblocks; oip->i_ffs1_ib[level] = 0; lfs_blkfree(fs, oip, bn, lfs_sb_getbsize(fs), &lastseg, &bc); lfs_deregister_block(ovp, bn); } } if (lastiblock[level] >= 0) goto done; } /* * All whole direct blocks or frags. */ for (i = ULFS_NDADDR - 1; i > lastblock; i--) { long bsize, obsize; bn = oip->i_ffs1_db[i]; if (bn == 0) continue; bsize = lfs_blksize(fs, oip, i); if (oip->i_ffs1_db[i] > 0) { /* Check for fragment size changes */ obsize = oip->i_lfs_fragsize[i]; real_released += lfs_btofsb(fs, obsize); oip->i_lfs_fragsize[i] = 0; } else obsize = 0; blocksreleased += lfs_btofsb(fs, bsize); oip->i_ffs1_db[i] = 0; lfs_blkfree(fs, oip, bn, obsize, &lastseg, &bc); lfs_deregister_block(ovp, bn); } if (lastblock < 0) goto done; /* * Finally, look for a change in size of the * last direct block; release any frags. */ bn = oip->i_ffs1_db[lastblock]; if (bn != 0) { long oldspace, newspace; #if 0 long olddspace; #endif /* * Calculate amount of space we're giving * back as old block size minus new block size. */ oldspace = lfs_blksize(fs, oip, lastblock); #if 0 olddspace = oip->i_lfs_fragsize[lastblock]; #endif oip->i_size = oip->i_ffs1_size = length; newspace = lfs_blksize(fs, oip, lastblock); if (newspace == 0) panic("itrunc: newspace"); if (oldspace - newspace > 0) { blocksreleased += lfs_btofsb(fs, oldspace - newspace); } #if 0 if (bn > 0 && olddspace - newspace > 0) { /* No segment accounting here, just vnode */ real_released += lfs_btofsb(fs, olddspace - newspace); } #endif } done: /* Finish segment accounting corrections */ lfs_update_seguse(fs, oip, lastseg, bc); #ifdef DIAGNOSTIC for (level = SINGLE; level <= TRIPLE; level++) if ((newblks[ULFS_NDADDR + level] == 0) != ((oip->i_ffs1_ib[level]) == 0)) { panic("lfs itrunc1"); } for (i = 0; i < ULFS_NDADDR; i++) if ((newblks[i] == 0) != (oip->i_ffs1_db[i] == 0)) { panic("lfs itrunc2"); } if (length == 0 && (!LIST_EMPTY(&ovp->v_cleanblkhd) || !LIST_EMPTY(&ovp->v_dirtyblkhd))) panic("lfs itrunc3"); #endif /* DIAGNOSTIC */ /* * Put back the real size. */ oip->i_size = oip->i_ffs1_size = length; oip->i_lfs_effnblks -= blocksreleased; oip->i_ffs1_blocks -= real_released; mutex_enter(&lfs_lock); lfs_sb_addbfree(fs, blocksreleased); mutex_exit(&lfs_lock); #ifdef DIAGNOSTIC if (oip->i_size == 0 && (oip->i_ffs1_blocks != 0 || oip->i_lfs_effnblks != 0)) { printf("lfs_truncate: truncate to 0 but %d blks/%jd effblks\n", oip->i_ffs1_blocks, (intmax_t)oip->i_lfs_effnblks); panic("lfs_truncate: persistent blocks"); } #endif /* * If we truncated to zero, take us off the paging queue. */ mutex_enter(&lfs_lock); if (oip->i_size == 0 && oip->i_flags & IN_PAGING) { oip->i_flags &= ~IN_PAGING; TAILQ_REMOVE(&fs->lfs_pchainhd, oip, i_lfs_pchain); } mutex_exit(&lfs_lock); oip->i_flag |= IN_CHANGE; #if defined(LFS_QUOTA) || defined(LFS_QUOTA2) (void) lfs_chkdq(oip, -blocksreleased, NOCRED, 0); #endif lfs_reserve(fs, ovp, NULL, -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs))); genfs_node_unlock(ovp); errout: oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1; if (ovp != fs->lfs_ivnode) lfs_segunlock(fs); return (allerror ? allerror : error); }
* Destroy any in core blocks past the truncation length. * Inlined from vtruncbuf, so that lfs_avail could be updated. * We take the seglock to prevent cleaning from occurring while we are * invalidating blocks. */ static int lfs_vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch, int slptimeo) { struct buf *bp, *nbp; int error; struct lfs *fs; voff_t off; off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); mutex_enter(vp->v_interlock); error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); if (error) return error; fs = VTOI(vp)->i_lfs; ASSERT_SEGLOCK(fs); mutex_enter(&bufcache_lock); restart: for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { nbp = LIST_NEXT(bp, b_vnbufs); if (bp->b_lblkno < lbn) continue; error = bbusy(bp, catch, slptimeo, NULL); if (error == EPASSTHROUGH)