/*
 * EOPNOTSUPP is no longer legal.  For local media VFS's that do not
 * implement their own VOP_PUTPAGES, their VOP_PUTPAGES should call to
 * vnode_pager_generic_putpages() to implement the previous behaviour.
 *
 * Caller has already cleared the pmap modified bits, if any.
 *
 * All other FS's should use the bypass to get to the local media
 * backing vp's VOP_PUTPAGES.
 */
static void
vnode_pager_putpages(vm_object_t object, vm_page_t *m, int count,
		     int sync, int *rtvals)
{
	int rtval;
	struct vnode *vp;
	int bytes = count * PAGE_SIZE;

	/*
	 * Force synchronous operation if we are extremely low on memory
	 * to prevent a low-memory deadlock.  VOP operations often need to
	 * allocate more memory to initiate the I/O ( i.e. do a BMAP 
	 * operation ).  The swapper handles the case by limiting the amount
	 * of asynchronous I/O, but that sort of solution doesn't scale well
	 * for the vnode pager without a lot of work.
	 *
	 * Also, the backing vnode's iodone routine may not wake the pageout
	 * daemon up.  This should be probably be addressed XXX.
	 */

	if ((vmstats.v_free_count + vmstats.v_cache_count) <
	    vmstats.v_pageout_free_min) {
		sync |= OBJPC_SYNC;
	}

	/*
	 * Call device-specific putpages function
	 */
	vp = object->handle;
	rtval = VOP_PUTPAGES(vp, m, bytes, sync, rtvals, 0);
	if (rtval == EOPNOTSUPP) {
	    kprintf("vnode_pager: *** WARNING *** stale FS putpages\n");
	    rtval = vnode_pager_generic_putpages( vp, m, bytes, sync, rtvals);
	}
}
Exemple #2
0
/*
 * EOPNOTSUPP is no longer legal.  For local media VFS's that do not
 * implement their own VOP_PUTPAGES, their VOP_PUTPAGES should call to
 * vnode_pager_generic_putpages() to implement the previous behaviour.
 *
 * All other FS's should use the bypass to get to the local media
 * backing vp's VOP_PUTPAGES.
 */
static void
vnode_pager_putpages(vm_object_t object, vm_page_t *m, int count,
    int flags, int *rtvals)
{
	int rtval;
	struct vnode *vp;
	int bytes = count * PAGE_SIZE;

	/*
	 * Force synchronous operation if we are extremely low on memory
	 * to prevent a low-memory deadlock.  VOP operations often need to
	 * allocate more memory to initiate the I/O ( i.e. do a BMAP
	 * operation ).  The swapper handles the case by limiting the amount
	 * of asynchronous I/O, but that sort of solution doesn't scale well
	 * for the vnode pager without a lot of work.
	 *
	 * Also, the backing vnode's iodone routine may not wake the pageout
	 * daemon up.  This should be probably be addressed XXX.
	 */

	if (vm_cnt.v_free_count < vm_cnt.v_pageout_free_min)
		flags |= VM_PAGER_PUT_SYNC;

	/*
	 * Call device-specific putpages function
	 */
	vp = object->handle;
	VM_OBJECT_WUNLOCK(object);
	rtval = VOP_PUTPAGES(vp, m, bytes, flags, rtvals);
	KASSERT(rtval != EOPNOTSUPP, 
	    ("vnode_pager: stale FS putpages\n"));
	VM_OBJECT_WLOCK(object);
}
Exemple #3
0
static int
vn_put(struct uvm_object *uobj, voff_t offlo, voff_t offhi, int flags)
{
	struct vnode *vp = (struct vnode *)uobj;

	return VOP_PUTPAGES(vp, offlo, offhi, flags);
}
Exemple #4
0
int
RUMP_VOP_PUTPAGES(struct vnode *vp,
    off_t offlo,
    off_t offhi,
    int flags)
{
	int error;

	rump_schedule();
	error = VOP_PUTPAGES(vp, offlo, offhi, flags);
	rump_unschedule();

	return error;
}
/*
 * Handes the read/write request given in 'bp' using the vnode's VOP_READ
 * and VOP_WRITE operations.
 *
 * 'obp' is a pointer to the original request fed to the vnd device.
 */
static void
handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp)
{
	bool doread;
	off_t offset;
	size_t len, resid;
	struct vnode *vp;

	doread = bp->b_flags & B_READ;
	offset = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize;
	len = bp->b_bcount;
	vp = vnd->sc_vp;

#if defined(DEBUG)
	if (vnddebug & VDB_IO)
		printf("vnd (rdwr): vp %p, %s, rawblkno 0x%" PRIx64
		    ", secsize %d, offset %" PRIu64
		    ", bcount %d\n",
		    vp, doread ? "read" : "write", obp->b_rawblkno,
		    vnd->sc_dkdev.dk_label->d_secsize, offset,
		    bp->b_bcount);
#endif

	/* Issue the read or write operation. */
	bp->b_error =
	    vn_rdwr(doread ? UIO_READ : UIO_WRITE,
	    vp, bp->b_data, len, offset, UIO_SYSSPACE,
	    IO_ADV_ENCODE(POSIX_FADV_NOREUSE), vnd->sc_cred, &resid, NULL);
	bp->b_resid = resid;

	mutex_enter(vp->v_interlock);
	(void) VOP_PUTPAGES(vp, 0, 0,
	    PGO_ALLPAGES | PGO_CLEANIT | PGO_FREE | PGO_SYNCIO);

	/* We need to increase the number of outputs on the vnode if
	 * there was any write to it. */
	if (!doread) {
		mutex_enter(vp->v_interlock);
		vp->v_numoutput++;
		mutex_exit(vp->v_interlock);
	}

	biodone(bp);
}
Exemple #6
0
static int
unionfs_putpages(void *v)
{
	struct vop_putpages_args /* {
		struct vnode *a_vp;
		voff_t a_offlo;
		voff_t a_offhi;
		int a_flags;
	} */ *ap = v;
	struct vnode *vp = ap->a_vp, *tvp;
	struct unionfs_node *unp;

	KASSERT(mutex_owned(vp->v_interlock));

	unp = VTOUNIONFS(vp);
	tvp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp);
	KASSERT(tvp->v_interlock == vp->v_interlock);

	if (ap->a_flags & PGO_RECLAIM) {
		mutex_exit(vp->v_interlock);
		return 0;
	}
	return VOP_PUTPAGES(tvp, ap->a_offlo, ap->a_offhi, ap->a_flags);
}
Exemple #7
0
/*
 * Read/write clusters from/to backing store.
 * For persistent snapshots must be called with cl == 0. off is the
 * offset into the snapshot.
 */
static int
fss_bs_io(struct fss_softc *sc, fss_io_type rw,
    u_int32_t cl, off_t off, int len, void *data)
{
	int error;

	off += FSS_CLTOB(sc, cl);

	vn_lock(sc->sc_bs_vp, LK_EXCLUSIVE|LK_RETRY);

	error = vn_rdwr((rw == FSS_READ ? UIO_READ : UIO_WRITE), sc->sc_bs_vp,
	    data, len, off, UIO_SYSSPACE,
	    IO_ADV_ENCODE(POSIX_FADV_NOREUSE) | IO_NODELOCKED,
	    sc->sc_bs_lwp->l_cred, NULL, NULL);
	if (error == 0) {
		mutex_enter(sc->sc_bs_vp->v_interlock);
		error = VOP_PUTPAGES(sc->sc_bs_vp, trunc_page(off),
		    round_page(off+len), PGO_CLEANIT | PGO_FREE | PGO_SYNCIO);
	}

	VOP_UNLOCK(sc->sc_bs_vp);

	return error;
}
Exemple #8
0
/*
 * Truncate the inode oip to at most length size, freeing the
 * disk blocks.
 */
int
ffs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred)
{
    daddr_t lastblock;
    struct inode *oip = VTOI(ovp);
    daddr_t bn, lastiblock[UFS_NIADDR], indir_lbn[UFS_NIADDR];
    daddr_t blks[UFS_NDADDR + UFS_NIADDR];
    struct fs *fs;
    int offset, pgoffset, level;
    int64_t count, blocksreleased = 0;
    int i, aflag, nblocks;
    int error, allerror = 0;
    off_t osize;
    int sync;
    struct ufsmount *ump = oip->i_ump;

    if (ovp->v_type == VCHR || ovp->v_type == VBLK ||
            ovp->v_type == VFIFO || ovp->v_type == VSOCK) {
        KASSERT(oip->i_size == 0);
        return 0;
    }

    if (length < 0)
        return (EINVAL);

    if (ovp->v_type == VLNK &&
            (oip->i_size < ump->um_maxsymlinklen ||
             (ump->um_maxsymlinklen == 0 && DIP(oip, blocks) == 0))) {
        KDASSERT(length == 0);
        memset(SHORTLINK(oip), 0, (size_t)oip->i_size);
        oip->i_size = 0;
        DIP_ASSIGN(oip, size, 0);
        oip->i_flag |= IN_CHANGE | IN_UPDATE;
        return (ffs_update(ovp, NULL, NULL, 0));
    }
    if (oip->i_size == length) {
        /* still do a uvm_vnp_setsize() as writesize may be larger */
        uvm_vnp_setsize(ovp, length);
        oip->i_flag |= IN_CHANGE | IN_UPDATE;
        return (ffs_update(ovp, NULL, NULL, 0));
    }
    fs = oip->i_fs;
    if (length > ump->um_maxfilesize)
        return (EFBIG);

    if ((oip->i_flags & SF_SNAPSHOT) != 0)
        ffs_snapremove(ovp);

    osize = oip->i_size;
    aflag = ioflag & IO_SYNC ? B_SYNC : 0;

    /*
     * Lengthen the size of the file. We must ensure that the
     * last byte of the file is allocated. Since the smallest
     * value of osize is 0, length will be at least 1.
     */

    if (osize < length) {
        if (ffs_lblkno(fs, osize) < UFS_NDADDR &&
                ffs_lblkno(fs, osize) != ffs_lblkno(fs, length) &&
                ffs_blkroundup(fs, osize) != osize) {
            off_t eob;

            eob = ffs_blkroundup(fs, osize);
            uvm_vnp_setwritesize(ovp, eob);
            error = ufs_balloc_range(ovp, osize, eob - osize,
                                     cred, aflag);
            if (error) {
                (void) ffs_truncate(ovp, osize,
                                    ioflag & IO_SYNC, cred);
                return error;
            }
            if (ioflag & IO_SYNC) {
                mutex_enter(ovp->v_interlock);
                VOP_PUTPAGES(ovp,
                             trunc_page(osize & fs->fs_bmask),
                             round_page(eob), PGO_CLEANIT | PGO_SYNCIO |
                             PGO_JOURNALLOCKED);
            }
        }
        uvm_vnp_setwritesize(ovp, length);
        error = ufs_balloc_range(ovp, length - 1, 1, cred, aflag);
        if (error) {
            (void) ffs_truncate(ovp, osize, ioflag & IO_SYNC, cred);
            return (error);
        }
        uvm_vnp_setsize(ovp, length);
        oip->i_flag |= IN_CHANGE | IN_UPDATE;
        KASSERT(ovp->v_size == oip->i_size);
        return (ffs_update(ovp, NULL, NULL, 0));
    }

    /*
     * When truncating a regular file down to a non-block-aligned size,
     * we must zero the part of last block which is past the new EOF.
     * We must synchronously flush the zeroed pages to disk
     * since the new pages will be invalidated as soon as we
     * inform the VM system of the new, smaller size.
     * We must do this before acquiring the GLOCK, since fetching
     * the pages will acquire the GLOCK internally.
     * So there is a window where another thread could see a whole
     * zeroed page past EOF, but that's life.
     */

    offset = ffs_blkoff(fs, length);
    pgoffset = length & PAGE_MASK;
    if (ovp->v_type == VREG && (pgoffset != 0 || offset != 0) &&
            osize > length) {
        daddr_t lbn;
        voff_t eoz;
        int size;

        if (offset != 0) {
            error = ufs_balloc_range(ovp, length - 1, 1, cred,
                                     aflag);
            if (error)
                return error;
        }
        lbn = ffs_lblkno(fs, length);
        size = ffs_blksize(fs, oip, lbn);
        eoz = MIN(MAX(ffs_lblktosize(fs, lbn) + size, round_page(pgoffset)),
                  osize);
        ubc_zerorange(&ovp->v_uobj, length, eoz - length,
                      UBC_UNMAP_FLAG(ovp));
        if (round_page(eoz) > round_page(length)) {
            mutex_enter(ovp->v_interlock);
            error = VOP_PUTPAGES(ovp, round_page(length),
                                 round_page(eoz),
                                 PGO_CLEANIT | PGO_DEACTIVATE | PGO_JOURNALLOCKED |
                                 ((ioflag & IO_SYNC) ? PGO_SYNCIO : 0));
            if (error)
                return error;
        }
    }

    genfs_node_wrlock(ovp);
    oip->i_size = length;
    DIP_ASSIGN(oip, size, length);
    uvm_vnp_setsize(ovp, length);
    /*
     * Calculate index into inode's block list of
     * last direct and indirect blocks (if any)
     * which we want to keep.  Lastblock is -1 when
     * the file is truncated to 0.
     */
    lastblock = ffs_lblkno(fs, length + fs->fs_bsize - 1) - 1;
    lastiblock[SINGLE] = lastblock - UFS_NDADDR;
    lastiblock[DOUBLE] = lastiblock[SINGLE] - FFS_NINDIR(fs);
    lastiblock[TRIPLE] = lastiblock[DOUBLE] - FFS_NINDIR(fs) * FFS_NINDIR(fs);
    nblocks = btodb(fs->fs_bsize);
    /*
     * Update file and block pointers on disk before we start freeing
     * blocks.  If we crash before free'ing blocks below, the blocks
     * will be returned to the free list.  lastiblock values are also
     * normalized to -1 for calls to ffs_indirtrunc below.
     */
    sync = 0;
    for (level = TRIPLE; level >= SINGLE; level--) {
        blks[UFS_NDADDR + level] = DIP(oip, ib[level]);
        if (lastiblock[level] < 0 && blks[UFS_NDADDR + level] != 0) {
            sync = 1;
            DIP_ASSIGN(oip, ib[level], 0);
            lastiblock[level] = -1;
        }
    }
    for (i = 0; i < UFS_NDADDR; i++) {
        blks[i] = DIP(oip, db[i]);
        if (i > lastblock && blks[i] != 0) {
            sync = 1;
            DIP_ASSIGN(oip, db[i], 0);
        }
    }
    oip->i_flag |= IN_CHANGE | IN_UPDATE;
    if (sync) {
        error = ffs_update(ovp, NULL, NULL, UPDATE_WAIT);
        if (error && !allerror)
            allerror = error;
    }

    /*
     * Having written the new inode to disk, save its new configuration
     * and put back the old block pointers long enough to process them.
     * Note that we save the new block configuration so we can check it
     * when we are done.
     */
    for (i = 0; i < UFS_NDADDR; i++) {
        bn = DIP(oip, db[i]);
        DIP_ASSIGN(oip, db[i], blks[i]);
        blks[i] = bn;
    }
    for (i = 0; i < UFS_NIADDR; i++) {
        bn = DIP(oip, ib[i]);
        DIP_ASSIGN(oip, ib[i], blks[UFS_NDADDR + i]);
        blks[UFS_NDADDR + i] = bn;
    }

    oip->i_size = osize;
    DIP_ASSIGN(oip, size, osize);
    error = vtruncbuf(ovp, lastblock + 1, 0, 0);
    if (error && !allerror)
        allerror = error;

    /*
     * Indirect blocks first.
     */
    indir_lbn[SINGLE] = -UFS_NDADDR;
    indir_lbn[DOUBLE] = indir_lbn[SINGLE] - FFS_NINDIR(fs) - 1;
    indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - FFS_NINDIR(fs) * FFS_NINDIR(fs) - 1;
    for (level = TRIPLE; level >= SINGLE; level--) {
        if (oip->i_ump->um_fstype == UFS1)
            bn = ufs_rw32(oip->i_ffs1_ib[level],UFS_FSNEEDSWAP(fs));
        else
            bn = ufs_rw64(oip->i_ffs2_ib[level],UFS_FSNEEDSWAP(fs));
        if (bn != 0) {
            error = ffs_indirtrunc(oip, indir_lbn[level],
                                   FFS_FSBTODB(fs, bn), lastiblock[level], level, &count);
            if (error)
                allerror = error;
            blocksreleased += count;
            if (lastiblock[level] < 0) {
                DIP_ASSIGN(oip, ib[level], 0);
                if (oip->i_ump->um_mountp->mnt_wapbl) {
                    UFS_WAPBL_REGISTER_DEALLOCATION(
                        oip->i_ump->um_mountp,
                        FFS_FSBTODB(fs, bn), fs->fs_bsize);
                } else
                    ffs_blkfree(fs, oip->i_devvp, bn,
                                fs->fs_bsize, oip->i_number);
                blocksreleased += nblocks;
            }
        }
        if (lastiblock[level] >= 0)
            goto done;
    }

    /*
     * All whole direct blocks or frags.
     */
    for (i = UFS_NDADDR - 1; i > lastblock; i--) {
        long bsize;

        if (oip->i_ump->um_fstype == UFS1)
            bn = ufs_rw32(oip->i_ffs1_db[i], UFS_FSNEEDSWAP(fs));
        else
            bn = ufs_rw64(oip->i_ffs2_db[i], UFS_FSNEEDSWAP(fs));
        if (bn == 0)
            continue;
        DIP_ASSIGN(oip, db[i], 0);
        bsize = ffs_blksize(fs, oip, i);
        if ((oip->i_ump->um_mountp->mnt_wapbl) &&
                (ovp->v_type != VREG)) {
            UFS_WAPBL_REGISTER_DEALLOCATION(oip->i_ump->um_mountp,
                                            FFS_FSBTODB(fs, bn), bsize);
        } else
            ffs_blkfree(fs, oip->i_devvp, bn, bsize, oip->i_number);
        blocksreleased += btodb(bsize);
    }
    if (lastblock < 0)
        goto done;

    /*
     * Finally, look for a change in size of the
     * last direct block; release any frags.
     */
    if (oip->i_ump->um_fstype == UFS1)
        bn = ufs_rw32(oip->i_ffs1_db[lastblock], UFS_FSNEEDSWAP(fs));
    else
        bn = ufs_rw64(oip->i_ffs2_db[lastblock], UFS_FSNEEDSWAP(fs));
    if (bn != 0) {
        long oldspace, newspace;

        /*
         * Calculate amount of space we're giving
         * back as old block size minus new block size.
         */
        oldspace = ffs_blksize(fs, oip, lastblock);
        oip->i_size = length;
        DIP_ASSIGN(oip, size, length);
        newspace = ffs_blksize(fs, oip, lastblock);
        if (newspace == 0)
            panic("itrunc: newspace");
        if (oldspace - newspace > 0) {
            /*
             * Block number of space to be free'd is
             * the old block # plus the number of frags
             * required for the storage we're keeping.
             */
            bn += ffs_numfrags(fs, newspace);
            if ((oip->i_ump->um_mountp->mnt_wapbl) &&
                    (ovp->v_type != VREG)) {
                UFS_WAPBL_REGISTER_DEALLOCATION(
                    oip->i_ump->um_mountp, FFS_FSBTODB(fs, bn),
                    oldspace - newspace);
            } else
                ffs_blkfree(fs, oip->i_devvp, bn,
                            oldspace - newspace, oip->i_number);
            blocksreleased += btodb(oldspace - newspace);
        }
    }

done:
#ifdef DIAGNOSTIC
    for (level = SINGLE; level <= TRIPLE; level++)
        if (blks[UFS_NDADDR + level] != DIP(oip, ib[level]))
            panic("itrunc1");
    for (i = 0; i < UFS_NDADDR; i++)
        if (blks[i] != DIP(oip, db[i]))
            panic("itrunc2");
    if (length == 0 &&
            (!LIST_EMPTY(&ovp->v_cleanblkhd) || !LIST_EMPTY(&ovp->v_dirtyblkhd)))
        panic("itrunc3");
#endif /* DIAGNOSTIC */
    /*
     * Put back the real size.
     */
    oip->i_size = length;
    DIP_ASSIGN(oip, size, length);
    DIP_ADD(oip, blocks, -blocksreleased);
    genfs_node_unlock(ovp);
    oip->i_flag |= IN_CHANGE;
    UFS_WAPBL_UPDATE(ovp, NULL, NULL, 0);
#if defined(QUOTA) || defined(QUOTA2)
    (void) chkdq(oip, -blocksreleased, NOCRED, 0);
#endif
    KASSERT(ovp->v_type != VREG || ovp->v_size == oip->i_size);
    return (allerror);
}
Exemple #9
0
int
do_posix_fadvise(int fd, off_t offset, off_t len, int advice)
{
	file_t *fp;
	vnode_t *vp;
	off_t endoffset;
	int error;

	CTASSERT(POSIX_FADV_NORMAL == UVM_ADV_NORMAL);
	CTASSERT(POSIX_FADV_RANDOM == UVM_ADV_RANDOM);
	CTASSERT(POSIX_FADV_SEQUENTIAL == UVM_ADV_SEQUENTIAL);

	if (len == 0) {
		endoffset = INT64_MAX;
	} else if (len > 0 && (INT64_MAX - offset) >= len) {
		endoffset = offset + len;
	} else {
		return EINVAL;
	}
	if ((fp = fd_getfile(fd)) == NULL) {
		return EBADF;
	}
	if (fp->f_type != DTYPE_VNODE) {
		if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) {
			error = ESPIPE;
		} else {
			error = EOPNOTSUPP;
		}
		fd_putfile(fd);
		return error;
	}

	switch (advice) {
	case POSIX_FADV_WILLNEED:
	case POSIX_FADV_DONTNEED:
		vp = fp->f_vnode;
		if (vp->v_type != VREG && vp->v_type != VBLK) {
			fd_putfile(fd);
			return 0;
		}
		break;
	}

	switch (advice) {
	case POSIX_FADV_NORMAL:
	case POSIX_FADV_RANDOM:
	case POSIX_FADV_SEQUENTIAL:
		/*
		 * We ignore offset and size.  Must lock the file to
		 * do this, as f_advice is sub-word sized.
		 */
		mutex_enter(&fp->f_lock);
		fp->f_advice = (u_char)advice;
		mutex_exit(&fp->f_lock);
		error = 0;
		break;

	case POSIX_FADV_WILLNEED:
		vp = fp->f_vnode;
		error = uvm_readahead(&vp->v_uobj, offset, endoffset - offset);
		break;

	case POSIX_FADV_DONTNEED:
		vp = fp->f_vnode;
		/*
		 * Align the region to page boundaries as VOP_PUTPAGES expects
		 * by shrinking it.  We shrink instead of expand because we
		 * do not want to deactivate cache outside of the requested
		 * region.  It means that if the specified region is smaller
		 * than PAGE_SIZE, we do nothing.
		 */
		if (round_page(offset) < trunc_page(endoffset) &&
		    offset <= round_page(offset)) {
			mutex_enter(vp->v_interlock);
			error = VOP_PUTPAGES(vp,
			    round_page(offset), trunc_page(endoffset),
			    PGO_DEACTIVATE | PGO_CLEANIT);
		} else {
			error = 0;
		}
		break;

	case POSIX_FADV_NOREUSE:
		/* Not implemented yet. */
		error = 0;
		break;
	default:
		error = EINVAL;
		break;
	}

	fd_putfile(fd);
	return error;
}
Exemple #10
0
/* ARGSUSED */
int
ffs_full_fsync(struct vnode *vp, int flags)
{
	int error, i, uflags;
	struct mount *mp;

	KASSERT(vp->v_tag == VT_UFS);
	KASSERT(VTOI(vp) != NULL);
	KASSERT(vp->v_type != VCHR && vp->v_type != VBLK);

	error = 0;
	uflags = UPDATE_CLOSE | ((flags & FSYNC_WAIT) ? UPDATE_WAIT : 0);

	mp = vp->v_mount;

	/*
	 * Flush all dirty data associated with the vnode.
	 */
	if (vp->v_type == VREG) {
		int pflags = PGO_ALLPAGES | PGO_CLEANIT;

		if ((flags & FSYNC_WAIT))
			pflags |= PGO_SYNCIO;
		if (fstrans_getstate(mp) == FSTRANS_SUSPENDING)
			pflags |= PGO_FREE;
		mutex_enter(vp->v_interlock);
		error = VOP_PUTPAGES(vp, 0, 0, pflags);
		if (error)
			return error;
	}

#ifdef WAPBL
	if (mp && mp->mnt_wapbl) {
		/*
		 * Don't bother writing out metadata if the syncer is
		 * making the request.  We will let the sync vnode
		 * write it out in a single burst through a call to
		 * VFS_SYNC().
		 */
		if ((flags & (FSYNC_DATAONLY | FSYNC_LAZY)) != 0)
			return 0;

		if ((VTOI(vp)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE
		    | IN_MODIFY | IN_MODIFIED | IN_ACCESSED)) != 0) {
			error = UFS_WAPBL_BEGIN(mp);
			if (error)
				return error;
			error = ffs_update(vp, NULL, NULL, uflags);
			UFS_WAPBL_END(mp);
		}
		if (error || (flags & FSYNC_NOLOG) != 0)
			return error;

		/*
		 * Don't flush the log if the vnode being flushed
		 * contains no dirty buffers that could be in the log.
		 */
		if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
			error = wapbl_flush(mp->mnt_wapbl, 0);
			if (error)
				return error;
		}

		if ((flags & FSYNC_WAIT) != 0) {
			mutex_enter(vp->v_interlock);
			while (vp->v_numoutput != 0)
				cv_wait(&vp->v_cv, vp->v_interlock);
			mutex_exit(vp->v_interlock);
		}

		return error;
	}
#endif /* WAPBL */

	error = vflushbuf(vp, (flags & FSYNC_WAIT) != 0);
	if (error == 0)
		error = ffs_update(vp, NULL, NULL, uflags);
	if (error == 0 && (flags & FSYNC_CACHE) != 0) {
		i = 1;
		(void)VOP_IOCTL(VTOI(vp)->i_devvp, DIOCCACHESYNC, &i, FWRITE,
		    kauth_cred_get());
	}

	return error;
}
Exemple #11
0
int
ffs_fsync(void *v)
{
	struct vop_fsync_args /* {
		struct vnode *a_vp;
		kauth_cred_t a_cred;
		int a_flags;
		off_t a_offlo;
		off_t a_offhi;
		struct lwp *a_l;
	} */ *ap = v;
	struct buf *bp;
	int num, error, i;
	struct indir ia[NIADDR + 1];
	int bsize;
	daddr_t blk_high;
	struct vnode *vp;
	struct mount *mp;

	vp = ap->a_vp;
	mp = vp->v_mount;

	fstrans_start(mp, FSTRANS_LAZY);
	if ((ap->a_offlo == 0 && ap->a_offhi == 0) || (vp->v_type != VREG)) {
		error = ffs_full_fsync(vp, ap->a_flags);
		goto out;
	}

	bsize = mp->mnt_stat.f_iosize;
	blk_high = ap->a_offhi / bsize;
	if (ap->a_offhi % bsize != 0)
		blk_high++;

	/*
	 * First, flush all pages in range.
	 */

	mutex_enter(vp->v_interlock);
	error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo),
	    round_page(ap->a_offhi), PGO_CLEANIT |
	    ((ap->a_flags & FSYNC_WAIT) ? PGO_SYNCIO : 0));
	if (error) {
		goto out;
	}

#ifdef WAPBL
	KASSERT(vp->v_type == VREG);
	if (mp->mnt_wapbl) {
		/*
		 * Don't bother writing out metadata if the syncer is
		 * making the request.  We will let the sync vnode
		 * write it out in a single burst through a call to
		 * VFS_SYNC().
		 */
		if ((ap->a_flags & (FSYNC_DATAONLY | FSYNC_LAZY)) != 0) {
			fstrans_done(mp);
			return 0;
		}
		error = 0;
		if (vp->v_tag == VT_UFS && VTOI(vp)->i_flag &
		    (IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY |
				 IN_MODIFIED | IN_ACCESSED)) {
			error = UFS_WAPBL_BEGIN(mp);
			if (error) {
				fstrans_done(mp);
				return error;
			}
			error = ffs_update(vp, NULL, NULL, UPDATE_CLOSE |
			    ((ap->a_flags & FSYNC_WAIT) ? UPDATE_WAIT : 0));
			UFS_WAPBL_END(mp);
		}
		if (error || (ap->a_flags & FSYNC_NOLOG) != 0) {
			fstrans_done(mp);
			return error;
		}
		error = wapbl_flush(mp->mnt_wapbl, 0);
		fstrans_done(mp);
		return error;
	}
#endif /* WAPBL */

	/*
	 * Then, flush indirect blocks.
	 */

	if (blk_high >= NDADDR) {
		error = ufs_getlbns(vp, blk_high, ia, &num);
		if (error)
			goto out;

		mutex_enter(&bufcache_lock);
		for (i = 0; i < num; i++) {
			if ((bp = incore(vp, ia[i].in_lbn)) == NULL)
				continue;
			if ((bp->b_cflags & BC_BUSY) != 0 ||
			    (bp->b_oflags & BO_DELWRI) == 0)
				continue;
			bp->b_cflags |= BC_BUSY | BC_VFLUSH;
			mutex_exit(&bufcache_lock);
			bawrite(bp);
			mutex_enter(&bufcache_lock);
		}
		mutex_exit(&bufcache_lock);
	}

	if (ap->a_flags & FSYNC_WAIT) {
		mutex_enter(vp->v_interlock);
		while (vp->v_numoutput > 0)
			cv_wait(&vp->v_cv, vp->v_interlock);
		mutex_exit(vp->v_interlock);
	}

	error = ffs_update(vp, NULL, NULL, UPDATE_CLOSE |
	    (((ap->a_flags & (FSYNC_WAIT | FSYNC_DATAONLY)) == FSYNC_WAIT)
	    ? UPDATE_WAIT : 0));

	if (error == 0 && ap->a_flags & FSYNC_CACHE) {
		int l = 0;
		VOP_IOCTL(VTOI(vp)->i_devvp, DIOCCACHESYNC, &l, FWRITE,
			curlwp->l_cred);
	}

out:
	fstrans_done(mp);
	return error;
}
Exemple #12
0
int
lfs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred)
{
	daddr_t lastblock;
	struct inode *oip = VTOI(ovp);
	daddr_t bn, lbn, lastiblock[ULFS_NIADDR], indir_lbn[ULFS_NIADDR];
	/* XXX ondisk32 */
	int32_t newblks[ULFS_NDADDR + ULFS_NIADDR];
	struct lfs *fs;
	struct buf *bp;
	int offset, size, level;
	daddr_t count, rcount;
	daddr_t blocksreleased = 0, real_released = 0;
	int i, nblocks;
	int aflags, error, allerror = 0;
	off_t osize;
	long lastseg;
	size_t bc;
	int obufsize, odb;
	int usepc;

	if (ovp->v_type == VCHR || ovp->v_type == VBLK ||
	    ovp->v_type == VFIFO || ovp->v_type == VSOCK) {
		KASSERT(oip->i_size == 0);
		return 0;
	}

	if (length < 0)
		return (EINVAL);

	/*
	 * Just return and not update modification times.
	 */
	if (oip->i_size == length) {
		/* still do a uvm_vnp_setsize() as writesize may be larger */
		uvm_vnp_setsize(ovp, length);
		return (0);
	}

	fs = oip->i_lfs;

	if (ovp->v_type == VLNK &&
	    (oip->i_size < fs->um_maxsymlinklen ||
	     (fs->um_maxsymlinklen == 0 &&
	      oip->i_ffs1_blocks == 0))) {
#ifdef DIAGNOSTIC
		if (length != 0)
			panic("lfs_truncate: partial truncate of symlink");
#endif
		memset((char *)SHORTLINK(oip), 0, (u_int)oip->i_size);
		oip->i_size = oip->i_ffs1_size = 0;
		oip->i_flag |= IN_CHANGE | IN_UPDATE;
		return (lfs_update(ovp, NULL, NULL, 0));
	}
	if (oip->i_size == length) {
		oip->i_flag |= IN_CHANGE | IN_UPDATE;
		return (lfs_update(ovp, NULL, NULL, 0));
	}
	lfs_imtime(fs);
	osize = oip->i_size;
	usepc = (ovp->v_type == VREG && ovp != fs->lfs_ivnode);

	ASSERT_NO_SEGLOCK(fs);
	/*
	 * Lengthen the size of the file. We must ensure that the
	 * last byte of the file is allocated. Since the smallest
	 * value of osize is 0, length will be at least 1.
	 */
	if (osize < length) {
		if (length > fs->um_maxfilesize)
			return (EFBIG);
		aflags = B_CLRBUF;
		if (ioflag & IO_SYNC)
			aflags |= B_SYNC;
		if (usepc) {
			if (lfs_lblkno(fs, osize) < ULFS_NDADDR &&
			    lfs_lblkno(fs, osize) != lfs_lblkno(fs, length) &&
			    lfs_blkroundup(fs, osize) != osize) {
				off_t eob;

				eob = lfs_blkroundup(fs, osize);
				uvm_vnp_setwritesize(ovp, eob);
				error = ulfs_balloc_range(ovp, osize,
				    eob - osize, cred, aflags);
				if (error) {
					(void) lfs_truncate(ovp, osize,
						    ioflag & IO_SYNC, cred);
					return error;
				}
				if (ioflag & IO_SYNC) {
					mutex_enter(ovp->v_interlock);
					VOP_PUTPAGES(ovp,
					    trunc_page(osize & lfs_sb_getbmask(fs)),
					    round_page(eob),
					    PGO_CLEANIT | PGO_SYNCIO);
				}
			}
			uvm_vnp_setwritesize(ovp, length);
			error = ulfs_balloc_range(ovp, length - 1, 1, cred,
						 aflags);
			if (error) {
				(void) lfs_truncate(ovp, osize,
						    ioflag & IO_SYNC, cred);
				return error;
			}
			uvm_vnp_setsize(ovp, length);
			oip->i_flag |= IN_CHANGE | IN_UPDATE;
			KASSERT(ovp->v_size == oip->i_size);
			oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1;
			return (lfs_update(ovp, NULL, NULL, 0));
		} else {
			error = lfs_reserve(fs, ovp, NULL,
			    lfs_btofsb(fs, (ULFS_NIADDR + 2) << lfs_sb_getbshift(fs)));
			if (error)
				return (error);
			error = lfs_balloc(ovp, length - 1, 1, cred,
					   aflags, &bp);
			lfs_reserve(fs, ovp, NULL,
			    -lfs_btofsb(fs, (ULFS_NIADDR + 2) << lfs_sb_getbshift(fs)));
			if (error)
				return (error);
			oip->i_ffs1_size = oip->i_size = length;
			uvm_vnp_setsize(ovp, length);
			(void) VOP_BWRITE(bp->b_vp, bp);
			oip->i_flag |= IN_CHANGE | IN_UPDATE;
			oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1;
			return (lfs_update(ovp, NULL, NULL, 0));
		}
	}

	if ((error = lfs_reserve(fs, ovp, NULL,
	    lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs)))) != 0)
		return (error);

	/*
	 * Shorten the size of the file. If the file is not being
	 * truncated to a block boundary, the contents of the
	 * partial block following the end of the file must be
	 * zero'ed in case it ever becomes accessible again because
	 * of subsequent file growth. Directories however are not
	 * zero'ed as they should grow back initialized to empty.
	 */
	offset = lfs_blkoff(fs, length);
	lastseg = -1;
	bc = 0;

	if (ovp != fs->lfs_ivnode)
		lfs_seglock(fs, SEGM_PROT);
	if (offset == 0) {
		oip->i_size = oip->i_ffs1_size = length;
	} else if (!usepc) {
		lbn = lfs_lblkno(fs, length);
		aflags = B_CLRBUF;
		if (ioflag & IO_SYNC)
			aflags |= B_SYNC;
		error = lfs_balloc(ovp, length - 1, 1, cred, aflags, &bp);
		if (error) {
			lfs_reserve(fs, ovp, NULL,
			    -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs)));
			goto errout;
		}
		obufsize = bp->b_bufsize;
		odb = lfs_btofsb(fs, bp->b_bcount);
		oip->i_size = oip->i_ffs1_size = length;
		size = lfs_blksize(fs, oip, lbn);
		if (ovp->v_type != VDIR)
			memset((char *)bp->b_data + offset, 0,
			       (u_int)(size - offset));
		allocbuf(bp, size, 1);
		if ((bp->b_flags & B_LOCKED) != 0 && bp->b_iodone == NULL) {
			mutex_enter(&lfs_lock);
			locked_queue_bytes -= obufsize - bp->b_bufsize;
			mutex_exit(&lfs_lock);
		}
		if (bp->b_oflags & BO_DELWRI) {
			lfs_sb_addavail(fs, odb - lfs_btofsb(fs, size));
			/* XXX shouldn't this wake up on lfs_availsleep? */
		}
		(void) VOP_BWRITE(bp->b_vp, bp);
	} else { /* vp->v_type == VREG && length < osize && offset != 0 */
		/*
		 * When truncating a regular file down to a non-block-aligned
		 * size, we must zero the part of last block which is past
		 * the new EOF.  We must synchronously flush the zeroed pages
		 * to disk since the new pages will be invalidated as soon
		 * as we inform the VM system of the new, smaller size.
		 * We must do this before acquiring the GLOCK, since fetching
		 * the pages will acquire the GLOCK internally.
		 * So there is a window where another thread could see a whole
		 * zeroed page past EOF, but that's life.
		 */
		daddr_t xlbn;
		voff_t eoz;

		aflags = ioflag & IO_SYNC ? B_SYNC : 0;
		error = ulfs_balloc_range(ovp, length - 1, 1, cred, aflags);
		if (error) {
			lfs_reserve(fs, ovp, NULL,
				    -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs)));
			goto errout;
		}
		xlbn = lfs_lblkno(fs, length);
		size = lfs_blksize(fs, oip, xlbn);
		eoz = MIN(lfs_lblktosize(fs, xlbn) + size, osize);
		ubc_zerorange(&ovp->v_uobj, length, eoz - length,
		    UBC_UNMAP_FLAG(ovp));
		if (round_page(eoz) > round_page(length)) {
			mutex_enter(ovp->v_interlock);
			error = VOP_PUTPAGES(ovp, round_page(length),
			    round_page(eoz),
			    PGO_CLEANIT | PGO_DEACTIVATE |
			    ((ioflag & IO_SYNC) ? PGO_SYNCIO : 0));
			if (error) {
				lfs_reserve(fs, ovp, NULL,
					    -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs)));
				goto errout;
			}
		}
	}

	genfs_node_wrlock(ovp);

	oip->i_size = oip->i_ffs1_size = length;
	uvm_vnp_setsize(ovp, length);

	/*
	 * Calculate index into inode's block list of
	 * last direct and indirect blocks (if any)
	 * which we want to keep.  Lastblock is -1 when
	 * the file is truncated to 0.
	 */
	/* Avoid sign overflow - XXX assumes that off_t is a quad_t. */
	if (length > QUAD_MAX - lfs_sb_getbsize(fs))
		lastblock = lfs_lblkno(fs, QUAD_MAX - lfs_sb_getbsize(fs));
	else
		lastblock = lfs_lblkno(fs, length + lfs_sb_getbsize(fs) - 1) - 1;
	lastiblock[SINGLE] = lastblock - ULFS_NDADDR;
	lastiblock[DOUBLE] = lastiblock[SINGLE] - LFS_NINDIR(fs);
	lastiblock[TRIPLE] = lastiblock[DOUBLE] - LFS_NINDIR(fs) * LFS_NINDIR(fs);
	nblocks = lfs_btofsb(fs, lfs_sb_getbsize(fs));
	/*
	 * Record changed file and block pointers before we start
	 * freeing blocks.  lastiblock values are also normalized to -1
	 * for calls to lfs_indirtrunc below.
	 */
	memcpy((void *)newblks, (void *)&oip->i_ffs1_db[0], sizeof newblks);
	for (level = TRIPLE; level >= SINGLE; level--)
		if (lastiblock[level] < 0) {
			newblks[ULFS_NDADDR+level] = 0;
			lastiblock[level] = -1;
		}
	for (i = ULFS_NDADDR - 1; i > lastblock; i--)
		newblks[i] = 0;

	oip->i_size = oip->i_ffs1_size = osize;
	error = lfs_vtruncbuf(ovp, lastblock + 1, false, 0);
	if (error && !allerror)
		allerror = error;

	/*
	 * Indirect blocks first.
	 */
	indir_lbn[SINGLE] = -ULFS_NDADDR;
	indir_lbn[DOUBLE] = indir_lbn[SINGLE] - LFS_NINDIR(fs) - 1;
	indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - LFS_NINDIR(fs) * LFS_NINDIR(fs) - 1;
	for (level = TRIPLE; level >= SINGLE; level--) {
		bn = oip->i_ffs1_ib[level];
		if (bn != 0) {
			error = lfs_indirtrunc(oip, indir_lbn[level],
					       bn, lastiblock[level],
					       level, &count, &rcount,
					       &lastseg, &bc);
			if (error)
				allerror = error;
			real_released += rcount;
			blocksreleased += count;
			if (lastiblock[level] < 0) {
				if (oip->i_ffs1_ib[level] > 0)
					real_released += nblocks;
				blocksreleased += nblocks;
				oip->i_ffs1_ib[level] = 0;
				lfs_blkfree(fs, oip, bn, lfs_sb_getbsize(fs),
					    &lastseg, &bc);
        			lfs_deregister_block(ovp, bn);
			}
		}
		if (lastiblock[level] >= 0)
			goto done;
	}

	/*
	 * All whole direct blocks or frags.
	 */
	for (i = ULFS_NDADDR - 1; i > lastblock; i--) {
		long bsize, obsize;

		bn = oip->i_ffs1_db[i];
		if (bn == 0)
			continue;
		bsize = lfs_blksize(fs, oip, i);
		if (oip->i_ffs1_db[i] > 0) {
			/* Check for fragment size changes */
			obsize = oip->i_lfs_fragsize[i];
			real_released += lfs_btofsb(fs, obsize);
			oip->i_lfs_fragsize[i] = 0;
		} else
			obsize = 0;
		blocksreleased += lfs_btofsb(fs, bsize);
		oip->i_ffs1_db[i] = 0;
		lfs_blkfree(fs, oip, bn, obsize, &lastseg, &bc);
        	lfs_deregister_block(ovp, bn);
	}
	if (lastblock < 0)
		goto done;

	/*
	 * Finally, look for a change in size of the
	 * last direct block; release any frags.
	 */
	bn = oip->i_ffs1_db[lastblock];
	if (bn != 0) {
		long oldspace, newspace;
#if 0
		long olddspace;
#endif

		/*
		 * Calculate amount of space we're giving
		 * back as old block size minus new block size.
		 */
		oldspace = lfs_blksize(fs, oip, lastblock);
#if 0
		olddspace = oip->i_lfs_fragsize[lastblock];
#endif

		oip->i_size = oip->i_ffs1_size = length;
		newspace = lfs_blksize(fs, oip, lastblock);
		if (newspace == 0)
			panic("itrunc: newspace");
		if (oldspace - newspace > 0) {
			blocksreleased += lfs_btofsb(fs, oldspace - newspace);
		}
#if 0
		if (bn > 0 && olddspace - newspace > 0) {
			/* No segment accounting here, just vnode */
			real_released += lfs_btofsb(fs, olddspace - newspace);
		}
#endif
	}

done:
	/* Finish segment accounting corrections */
	lfs_update_seguse(fs, oip, lastseg, bc);
#ifdef DIAGNOSTIC
	for (level = SINGLE; level <= TRIPLE; level++)
		if ((newblks[ULFS_NDADDR + level] == 0) !=
		    ((oip->i_ffs1_ib[level]) == 0)) {
			panic("lfs itrunc1");
		}
	for (i = 0; i < ULFS_NDADDR; i++)
		if ((newblks[i] == 0) != (oip->i_ffs1_db[i] == 0)) {
			panic("lfs itrunc2");
		}
	if (length == 0 &&
	    (!LIST_EMPTY(&ovp->v_cleanblkhd) || !LIST_EMPTY(&ovp->v_dirtyblkhd)))
		panic("lfs itrunc3");
#endif /* DIAGNOSTIC */
	/*
	 * Put back the real size.
	 */
	oip->i_size = oip->i_ffs1_size = length;
	oip->i_lfs_effnblks -= blocksreleased;
	oip->i_ffs1_blocks -= real_released;
	mutex_enter(&lfs_lock);
	lfs_sb_addbfree(fs, blocksreleased);
	mutex_exit(&lfs_lock);
#ifdef DIAGNOSTIC
	if (oip->i_size == 0 &&
	    (oip->i_ffs1_blocks != 0 || oip->i_lfs_effnblks != 0)) {
		printf("lfs_truncate: truncate to 0 but %d blks/%jd effblks\n",
		       oip->i_ffs1_blocks, (intmax_t)oip->i_lfs_effnblks);
		panic("lfs_truncate: persistent blocks");
	}
#endif

	/*
	 * If we truncated to zero, take us off the paging queue.
	 */
	mutex_enter(&lfs_lock);
	if (oip->i_size == 0 && oip->i_flags & IN_PAGING) {
		oip->i_flags &= ~IN_PAGING;
		TAILQ_REMOVE(&fs->lfs_pchainhd, oip, i_lfs_pchain);
	}
	mutex_exit(&lfs_lock);

	oip->i_flag |= IN_CHANGE;
#if defined(LFS_QUOTA) || defined(LFS_QUOTA2)
	(void) lfs_chkdq(oip, -blocksreleased, NOCRED, 0);
#endif
	lfs_reserve(fs, ovp, NULL,
	    -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs)));
	genfs_node_unlock(ovp);
  errout:
	oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1;
	if (ovp != fs->lfs_ivnode)
		lfs_segunlock(fs);
	return (allerror ? allerror : error);
}
Exemple #13
0
 * Destroy any in core blocks past the truncation length.
 * Inlined from vtruncbuf, so that lfs_avail could be updated.
 * We take the seglock to prevent cleaning from occurring while we are
 * invalidating blocks.
 */
static int
lfs_vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch, int slptimeo)
{
	struct buf *bp, *nbp;
	int error;
	struct lfs *fs;
	voff_t off;

	off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
	mutex_enter(vp->v_interlock);
	error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
	if (error)
		return error;

	fs = VTOI(vp)->i_lfs;

	ASSERT_SEGLOCK(fs);

	mutex_enter(&bufcache_lock);
restart:	
	for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
		nbp = LIST_NEXT(bp, b_vnbufs);
		if (bp->b_lblkno < lbn)
			continue;
		error = bbusy(bp, catch, slptimeo, NULL);
		if (error == EPASSTHROUGH)