Ejemplo n.º 1
0
/*
 * Read with single-block read-ahead.  Defined in Bach (p.55), but
 * implemented as a call to breadn().
 * XXX for compatibility with old file systems.
 */
int
breada(struct vnode *vp, daddr_t blkno, int size, daddr_t rablkno, int rabsize,
    struct ucred *cred, struct buf **bpp)
{

	return (breadn(vp, blkno, size, &rablkno, &rabsize, 1, cred, bpp));	
}
Ejemplo n.º 2
0
/*
 * Vnode op for reading.
 */
static int
ext2_read(struct vop_read_args *ap)
{
	struct vnode *vp;
	struct inode *ip;
	struct uio *uio;
	struct m_ext2fs *fs;
	struct buf *bp;
	daddr_t lbn, nextlbn;
	off_t bytesinfile;
	long size, xfersize, blkoffset;
	int error, orig_resid, seqcount;
	int ioflag;

	vp = ap->a_vp;
	uio = ap->a_uio;
	ioflag = ap->a_ioflag;

	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
	ip = VTOI(vp);

#ifdef INVARIANTS
	if (uio->uio_rw != UIO_READ)
		panic("%s: mode", "ext2_read");

	if (vp->v_type == VLNK) {
		if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
			panic("%s: short symlink", "ext2_read");
	} else if (vp->v_type != VREG && vp->v_type != VDIR)
		panic("%s: type %d", "ext2_read", vp->v_type);
#endif
	orig_resid = uio->uio_resid;
	KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0"));
	if (orig_resid == 0)
		return (0);
	KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0"));
	fs = ip->i_e2fs;
	if (uio->uio_offset < ip->i_size &&
	    uio->uio_offset >= fs->e2fs_maxfilesize)
	    	return (EOVERFLOW);

	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
			break;
		lbn = lblkno(fs, uio->uio_offset);
		nextlbn = lbn + 1;
		size = blksize(fs, ip, lbn);
		blkoffset = blkoff(fs, uio->uio_offset);

		xfersize = fs->e2fs_fsize - blkoffset;
		if (uio->uio_resid < xfersize)
			xfersize = uio->uio_resid;
		if (bytesinfile < xfersize)
			xfersize = bytesinfile;

		if (lblktosize(fs, nextlbn) >= ip->i_size)
			error = bread(vp, lbn, size, NOCRED, &bp);
		else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
			error = cluster_read(vp, ip->i_size, lbn, size,
			    NOCRED, blkoffset + uio->uio_resid, seqcount,
			    0, &bp);
		} else if (seqcount > 1) {
			int nextsize = blksize(fs, ip, nextlbn);
			error = breadn(vp, lbn,
			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
		} else
			error = bread(vp, lbn, size, NOCRED, &bp);
		if (error) {
			brelse(bp);
			bp = NULL;
			break;
		}

		/*
		 * If IO_DIRECT then set B_DIRECT for the buffer.  This
		 * will cause us to attempt to release the buffer later on
		 * and will cause the buffer cache to attempt to free the
		 * underlying pages.
		 */
		if (ioflag & IO_DIRECT)
			bp->b_flags |= B_DIRECT;

		/*
		 * We should only get non-zero b_resid when an I/O error
		 * has occurred, which should cause us to break above.
		 * However, if the short read did not cause an error,
		 * then we want to ensure that we do not uiomove bad
		 * or uninitialized data.
		 */
		size -= bp->b_resid;
		if (size < xfersize) {
			if (size == 0)
				break;
			xfersize = size;
		}
		error = uiomove((char *)bp->b_data + blkoffset,
  			(int)xfersize, uio);
		if (error)
			break;

		if (ioflag & (IO_VMIO|IO_DIRECT)) {
			/*
			 * If it's VMIO or direct I/O, then we don't
			 * need the buf, mark it available for
			 * freeing. If it's non-direct VMIO, the VM has
			 * the data.
			 */
			bp->b_flags |= B_RELBUF;
			brelse(bp);
		} else {
			/*
			 * Otherwise let whoever
			 * made the request take care of
			 * freeing it. We just queue
			 * it onto another list.
			 */
			bqrelse(bp);
		}
	}

	/* 
	 * This can only happen in the case of an error
	 * because the loop above resets bp to NULL on each iteration
	 * and on normal completion has not set a new value into it.
	 * so it must have come from a 'break' statement
	 */
	if (bp != NULL) {
		if (ioflag & (IO_VMIO|IO_DIRECT)) {
			bp->b_flags |= B_RELBUF;
			brelse(bp);
		} else {
			bqrelse(bp);
		}
	}

	if ((error == 0 || uio->uio_resid != orig_resid) &&
	    (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
		ip->i_flag |= IN_ACCESS;
	return (error);
}
Ejemplo n.º 3
0
		if (uio->uio_resid < xfersize)
			xfersize = uio->uio_resid;
		if (bytesinfile < xfersize)
			xfersize = bytesinfile;

		if (nextloffset >= ip->i_size) {
			error = bread(vp, lblktodoff(fs, lbn), size, &bp);
		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
			error = cluster_read(vp, (off_t)ip->i_size,
					     lblktodoff(fs, lbn), size,
					     uio->uio_resid,
					     (ap->a_ioflag >> 16) * BKVASIZE,
					     &bp);
		} else if (seqcount > 1) {
			int nextsize = BLKSIZE(fs, ip, nextlbn);
			error = breadn(vp, lblktodoff(fs, lbn),
					size, &nextloffset, &nextsize, 1, &bp);
		} else {
			error = bread(vp, lblktodoff(fs, lbn), size, &bp);
		}
		if (error) {
			brelse(bp);
			bp = NULL;
			break;
		}

		/*
		 * We should only get non-zero b_resid when an I/O error
		 * has occurred, which should cause us to break above.
		 * However, if the short read did not cause an error,
		 * then we want to ensure that we do not uiomove bad
		 * or uninitialized data.
Ejemplo n.º 4
0
/*
 * Vnode op for read
 */
int
spec_read(void *v)
{
	struct vop_read_args *ap = v;
	struct vnode *vp = ap->a_vp;
	struct uio *uio = ap->a_uio;
 	struct proc *p = uio->uio_procp;
	struct buf *bp;
	daddr_t bn, nextbn, bscale;
	int bsize;
	struct partinfo dpart;
	size_t n;
	int on, majordev;
	int (*ioctl)(dev_t, u_long, caddr_t, int, struct proc *);
	int error = 0;

#ifdef DIAGNOSTIC
	if (uio->uio_rw != UIO_READ)
		panic("spec_read mode");
	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
		panic("spec_read proc");
#endif
	if (uio->uio_resid == 0)
		return (0);

	switch (vp->v_type) {

	case VCHR:
		VOP_UNLOCK(vp, 0, p);
		error = (*cdevsw[major(vp->v_rdev)].d_read)
			(vp->v_rdev, uio, ap->a_ioflag);
		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
		return (error);

	case VBLK:
		if (uio->uio_offset < 0)
			return (EINVAL);
		bsize = BLKDEV_IOSIZE;
		if ((majordev = major(vp->v_rdev)) < nblkdev &&
		    (ioctl = bdevsw[majordev].d_ioctl) != NULL &&
		    (*ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0) {
			u_int32_t frag =
			    DISKLABELV1_FFS_FRAG(dpart.part->p_fragblock);
			u_int32_t fsize =
			    DISKLABELV1_FFS_FSIZE(dpart.part->p_fragblock);
			if (dpart.part->p_fstype == FS_BSDFFS && frag != 0 &&
			    fsize != 0)
				bsize = frag * fsize;
		}
		bscale = btodb(bsize);
		do {
			bn = btodb(uio->uio_offset) & ~(bscale - 1);
			on = uio->uio_offset % bsize;
			n = ulmin((bsize - on), uio->uio_resid);
			if (vp->v_lastr + bscale == bn) {
				nextbn = bn + bscale;
				error = breadn(vp, bn, bsize, &nextbn, &bsize,
				    1, &bp);
			} else
				error = bread(vp, bn, bsize, &bp);
			vp->v_lastr = bn;
			n = ulmin(n, bsize - bp->b_resid);
			if (error) {
				brelse(bp);
				return (error);
			}
			error = uiomove((char *)bp->b_data + on, n, uio);
			brelse(bp);
		} while (error == 0 && uio->uio_resid > 0 && n != 0);
		return (error);

	default:
		panic("spec_read type");
	}
	/* NOTREACHED */
}
Ejemplo n.º 5
0
/*
 * Vnode op for reading.
 */
int
cd9660_read(void *v)
{
	struct vop_read_args /* {
		struct vnode *a_vp;
		struct uio *a_uio;
		int a_ioflag;
		kauth_cred_t a_cred;
	} */ *ap = v;
	struct vnode *vp = ap->a_vp;
	struct uio *uio = ap->a_uio;
	struct iso_node *ip = VTOI(vp);
	struct iso_mnt *imp;
	struct buf *bp;
	daddr_t lbn, rablock;
	off_t diff;
	int rasize, error = 0;
	long size, n, on;

	if (uio->uio_resid == 0)
		return (0);
	if (uio->uio_offset < 0)
		return (EINVAL);
	if (uio->uio_offset >= ip->i_size)
		return 0;
	ip->i_flag |= IN_ACCESS;
	imp = ip->i_mnt;

	if (vp->v_type == VREG) {
		const int advice = IO_ADV_DECODE(ap->a_ioflag);
		error = 0;

		while (uio->uio_resid > 0) {
			vsize_t bytelen = MIN(ip->i_size - uio->uio_offset,
					      uio->uio_resid);

			if (bytelen == 0)
				break;
			error = ubc_uiomove(&vp->v_uobj, uio, bytelen, advice,
			    UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
			if (error)
				break;
		}
		goto out;
	}

	do {
		lbn = cd9660_lblkno(imp, uio->uio_offset);
		on = cd9660_blkoff(imp, uio->uio_offset);
		n = MIN(imp->logical_block_size - on, uio->uio_resid);
		diff = (off_t)ip->i_size - uio->uio_offset;
		if (diff <= 0)
			return (0);
		if (diff < n)
			n = diff;
		size = cd9660_blksize(imp, ip, lbn);
		rablock = lbn + 1;
		if (cd9660_lblktosize(imp, rablock) < ip->i_size) {
			rasize = cd9660_blksize(imp, ip, rablock);
			error = breadn(vp, lbn, size, &rablock,
				       &rasize, 1, NOCRED, 0, &bp);
		} else {
			error = bread(vp, lbn, size, NOCRED, 0, &bp);
		}
		if (error) {
			return (error);
		}
		n = MIN(n, size - bp->b_resid);

		error = uiomove((char *)bp->b_data + on, (int)n, uio);
		brelse(bp, 0);
	} while (error == 0 && uio->uio_resid > 0 && n != 0);

out:
	return (error);
}
Ejemplo n.º 6
0
/* ARGSUSED */
int
ext2fs_read(void *v)
{
	struct vop_read_args *ap = v;
	struct vnode *vp;
	struct inode *ip;
	struct uio *uio;
	struct m_ext2fs *fs;
	struct buf *bp;
	daddr_t lbn, nextlbn;
	off_t bytesinfile;
	long size, xfersize, blkoffset;
	int error;

	vp = ap->a_vp;
	ip = VTOI(vp);
	uio = ap->a_uio;

#ifdef DIAGNOSTIC
	if (uio->uio_rw != UIO_READ)
		panic("%s: mode", "ext2fs_read");

	if (vp->v_type == VLNK) {
		if ((int)ext2fs_size(ip) < vp->v_mount->mnt_maxsymlinklen ||
			(vp->v_mount->mnt_maxsymlinklen == 0 &&
			 ip->i_e2fs_nblock == 0))
			panic("%s: short symlink", "ext2fs_read");
	} else if (vp->v_type != VREG && vp->v_type != VDIR)
		panic("%s: type %d", "ext2fs_read", vp->v_type);
#endif
	fs = ip->i_e2fs;
	if ((u_int64_t)uio->uio_offset >
		((u_int64_t)0x80000000 * fs->e2fs_bsize - 1))
		return (EFBIG);
	if (uio->uio_resid == 0)
		return (0);

	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
		if ((bytesinfile = ext2fs_size(ip) - uio->uio_offset) <= 0)
			break;
		lbn = lblkno(fs, uio->uio_offset);
		nextlbn = lbn + 1;
		size = fs->e2fs_bsize;
		blkoffset = blkoff(fs, uio->uio_offset);
		xfersize = fs->e2fs_bsize - blkoffset;
		if (uio->uio_resid < xfersize)
			xfersize = uio->uio_resid;
		if (bytesinfile < xfersize)
			xfersize = bytesinfile;

		if (lblktosize(fs, nextlbn) >= ext2fs_size(ip))
			error = bread(vp, lbn, size, &bp);
		else if (lbn - 1 == ip->i_ci.ci_lastr) {
			int nextsize = fs->e2fs_bsize;
			error = breadn(vp, lbn, size, &nextlbn, &nextsize,
			    1, &bp);
		} else
			error = bread(vp, lbn, size, &bp);
		if (error)
			break;
		ip->i_ci.ci_lastr = lbn;

		/*
		 * We should only get non-zero b_resid when an I/O error
		 * has occurred, which should cause us to break above.
		 * However, if the short read did not cause an error,
		 * then we want to ensure that we do not uiomove bad
		 * or uninitialized data.
		 */
		size -= bp->b_resid;
		if (size < xfersize) {
			if (size == 0)
				break;
			xfersize = size;
		}
		error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
		if (error)
			break;
		brelse(bp);
	}
	if (bp != NULL)
		brelse(bp);

	if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) {
		ip->i_flag |= IN_ACCESS;
	}
	return (error);
}
Ejemplo n.º 7
0
int
xfs_read_file(xfs_mount_t *mp, xfs_inode_t *ip, struct uio *uio, int ioflag)
{
	xfs_fileoff_t lbn, nextlbn;
	xfs_fsize_t bytesinfile;
	long size, xfersize, blkoffset;
	struct buf *bp;
	struct vnode *vp;
	int error, orig_resid;
	int seqcount;

	seqcount = ioflag >> IO_SEQSHIFT;

	orig_resid = uio->uio_resid;
	if (orig_resid <= 0)
		return (0);

	vp = XFS_ITOV(ip)->v_vnode;

	/*
	 * Ok so we couldn't do it all in one vm trick...
	 * so cycle around trying smaller bites..
	 */
	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
		if ((bytesinfile = ip->i_d.di_size - uio->uio_offset) <= 0)
			break;

		lbn = XFS_B_TO_FSBT(mp, uio->uio_offset);
		nextlbn = lbn + 1;

		/*
		 * size of buffer.  The buffer representing the
		 * end of the file is rounded up to the size of
		 * the block type ( fragment or full block,
		 * depending ).
		 */
		size = mp->m_sb.sb_blocksize;
		blkoffset = XFS_B_FSB_OFFSET(mp, uio->uio_offset);

		/*
		 * The amount we want to transfer in this iteration is
		 * one FS block less the amount of the data before
		 * our startpoint (duh!)
		 */
		xfersize = mp->m_sb.sb_blocksize - blkoffset;

		/*
		 * But if we actually want less than the block,
		 * or the file doesn't have a whole block more of data,
		 * then use the lesser number.
		 */
		if (uio->uio_resid < xfersize)
			xfersize = uio->uio_resid;
		if (bytesinfile < xfersize)
			xfersize = bytesinfile;

		if (XFS_FSB_TO_B(mp, nextlbn) >= ip->i_d.di_size ) {
			/*
			 * Don't do readahead if this is the end of the file.
			 */
			error = bread(vp, lbn, size, NOCRED, &bp);
		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
			/*
			 * Otherwise if we are allowed to cluster,
			 * grab as much as we can.
			 *
			 * XXX  This may not be a win if we are not
			 * doing sequential access.
			 */
			error = cluster_read(vp, ip->i_d.di_size, lbn,
				size, NOCRED, uio->uio_resid, seqcount, &bp);
		} else if (seqcount > 1) {
			/*
			 * If we are NOT allowed to cluster, then
			 * if we appear to be acting sequentially,
			 * fire off a request for a readahead
			 * as well as a read. Note that the 4th and 5th
			 * arguments point to arrays of the size specified in
			 * the 6th argument.
			 */
			int nextsize = mp->m_sb.sb_blocksize;
			error = breadn(vp, lbn,
			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
		} else {
			/*
			 * Failing all of the above, just read what the
			 * user asked for. Interestingly, the same as
			 * the first option above.
			 */
			error = bread(vp, lbn, size, NOCRED, &bp);
		}
		if (error) {
			brelse(bp);
			bp = NULL;
			break;
		}

		/*
		 * If IO_DIRECT then set B_DIRECT for the buffer.  This
		 * will cause us to attempt to release the buffer later on
		 * and will cause the buffer cache to attempt to free the
		 * underlying pages.
		 */
		if (ioflag & IO_DIRECT)
			bp->b_flags |= B_DIRECT;

		/*
		 * We should only get non-zero b_resid when an I/O error
		 * has occurred, which should cause us to break above.
		 * However, if the short read did not cause an error,
		 * then we want to ensure that we do not uiomove bad
		 * or uninitialized data.
		 */
		size -= bp->b_resid;
		if (size < xfersize) {
			if (size == 0)
				break;
			xfersize = size;
		}

		/*
		 * otherwise use the general form
		 */
		error = uiomove((char *)bp->b_data + blkoffset,
			    (int)xfersize, uio);

		if (error)
			break;

		if (ioflag & (IO_VMIO|IO_DIRECT) ) {
			/*
			 * If there are no dependencies, and it's VMIO,
			 * then we don't need the buf, mark it available
			 * for freeing. The VM has the data.
			 */
			bp->b_flags |= B_RELBUF;
			brelse(bp);
		} else {
			/*
			 * Otherwise let whoever
			 * made the request take care of
			 * freeing it. We just queue
			 * it onto another list.
			 */
			bqrelse(bp);
		}
	}

	/*
	 * This can only happen in the case of an error
	 * because the loop above resets bp to NULL on each iteration
	 * and on normal completion has not set a new value into it.
	 * so it must have come from a 'break' statement
	 */
	if (bp != NULL) {
		if (ioflag & (IO_VMIO|IO_DIRECT)) {
			bp->b_flags |= B_RELBUF;
			brelse(bp);
		} else
			bqrelse(bp);
	}

	return (error);
}
Ejemplo n.º 8
0
/*
 * Return buffer with the contents of block "offset" from the beginning of
 * vnode "vp".  If "res" is non-zero, fill it in with a pointer to the
 * remaining space in the vnode.
 *
 * This version includes a read-ahead optimization.
 */
int
ffs_blkatoff_ra(struct vnode *vp, off_t uoffset, char **res, struct buf **bpp,
		int seqcount)
{
	struct inode *ip;
	struct fs *fs;
	struct buf *bp;
	ufs_daddr_t lbn;
	ufs_daddr_t nextlbn;
	off_t base_loffset;
	off_t next_loffset;
	int bsize, error;
	int nextbsize;

	ip = VTOI(vp);
	fs = ip->i_fs;
	lbn = lblkno(fs, uoffset);
	base_loffset = lblktodoff(fs, lbn);
	bsize = blksize(fs, ip, lbn);

	nextlbn = lbn + 1;
	next_loffset = lblktodoff(fs, nextlbn);


	*bpp = NULL;

	if (next_loffset >= ip->i_size) {
		/*
		 * Do not do readahead if this is the last block,
		 * bsize might represent a fragment.
		 */
		error = bread(vp, base_loffset, bsize, &bp);
	} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
		/*
		 * Try to cluster if we allowed to.
		 */
		error = cluster_read(vp, (off_t)ip->i_size,
				     base_loffset, bsize,
				     bsize, seqcount * BKVASIZE, &bp);
	} else if (seqcount > 1) {
		/*
		 * Faked read ahead
		 */
		nextbsize = blksize(fs, ip, nextlbn);
		error = breadn(vp, base_loffset, bsize,
			       &next_loffset, &nextbsize, 1, &bp);
	} else {
		/*
		 * Failing all of the above, just read what the
		 * user asked for. Interestingly, the same as
		 * the first option above.
		 */
		error = bread(vp, base_loffset, bsize, &bp);
	}
	if (error) {
		brelse(bp);
		return (error);
	}
	if (res)
		*res = (char *)bp->b_data + (int)(uoffset - base_loffset);
	*bpp = bp;
	return (0);
}
Ejemplo n.º 9
0
/*
 * Extended attribute area reading.
 */
int
ffs_ea_read(struct vnode *vp, struct uio *uio, int ioflag)
{
	struct inode *ip;
	struct ufs2_dinode *dp;
	struct fs *fs;
	struct buf *bp;
	daddr64_t lbn, nextlbn;
	off_t ealeft;
	int error, size, xfersize;
	size_t oresid;

	ip = VTOI(vp);
	fs = ip->i_fs;
	dp = ip->i_din2;
	error = 0;
	oresid = uio->uio_resid;
	ealeft = dp->di_extsize;

	/*
	 * Loop over the amount of data requested by the caller, stopping only
	 * if an error occurs. By default, we always try to copy a file system
	 * block worth of bytes per iteration ('xfersize'). Check this value
	 * against what is left to be copied ('uio->uio_resid'), and the amount
	 * of bytes past our current position in the extended attribute area
	 * ('ealeft').
	 */

	while (uio->uio_resid > 0) {

		ealeft -= uio->uio_offset;
		if (ealeft <= 0)
			break;

		xfersize = fs->fs_bsize;
		if (uio->uio_resid < xfersize)
			xfersize = uio->uio_resid;
		if (ealeft < xfersize)
			xfersize = ealeft;

		/*
		 * Get the corresponding logical block number. Read it in,
		 * doing read-ahead if possible.
		 */

		lbn = lblkno(fs, uio->uio_offset);
		size = sblksize(fs, dp->di_extsize, lbn);
		nextlbn = lbn + 1;

		if (lblktosize(fs, nextlbn) >= dp->di_extsize)
			error = bread(vp, -1 - lbn, size, NOCRED, &bp);
		else {
			int nextsize = sblksize(fs, dp->di_extsize, nextlbn);
			nextlbn = -1 - nextlbn;
			error = breadn(vp, -1 - lbn,
			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
		}

		if (error) {
			brelse(bp);
			break;
		}

		/* Check for short-reads. */
		if (bp->b_resid) {
			brelse(bp);
			error = EIO;
			break;
		}

		/* Finally, copy out the data, and release the buffer. */
		error = uiomove(bp->b_data, xfersize, uio);
		brelse(bp);
		if (error)
			break;
	}

	if ((error == 0 || uio->uio_resid != oresid) &&
	    (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
		ip->i_flag |= IN_ACCESS;

	return (error);
}
Ejemplo n.º 10
0
/*
 * Vnode op for reading.
 */
int
cd9660_read(void *v)
{
	struct vop_read_args *ap = v;
	struct vnode *vp = ap->a_vp;
	register struct uio *uio = ap->a_uio;
	register struct iso_node *ip = VTOI(vp);
	register struct iso_mnt *imp;
	struct buf *bp;
	daddr_t lbn, rablock;
	off_t diff;
	int error = 0;
	long size, n, on;

	if (uio->uio_resid == 0)
		return (0);
	if (uio->uio_offset < 0)
		return (EINVAL);
	ip->i_flag |= IN_ACCESS;
	imp = ip->i_mnt;
	do {
		struct cluster_info *ci = &ip->i_ci;

		lbn = lblkno(imp, uio->uio_offset);
		on = blkoff(imp, uio->uio_offset);
		n = min((u_int)(imp->logical_block_size - on),
			uio->uio_resid);
		diff = (off_t)ip->i_size - uio->uio_offset;
		if (diff <= 0)
			return (0);
		if (diff < n)
			n = diff;
		size = blksize(imp, ip, lbn);
		rablock = lbn + 1;
#define MAX_RA 32
		if (ci->ci_lastr + 1 == lbn) {
			struct ra {
				daddr_t blks[MAX_RA];
				int sizes[MAX_RA];
			} *ra;
			int i;

			ra = malloc(sizeof *ra, M_TEMP, M_WAITOK);
			for (i = 0; i < MAX_RA &&
			    lblktosize(imp, (rablock + i)) < ip->i_size;
			    i++) {
				ra->blks[i] = rablock + i;
				ra->sizes[i] = blksize(imp, ip, rablock + i);
			}
			error = breadn(vp, lbn, size, ra->blks,
			    ra->sizes, i, &bp);
			free(ra, M_TEMP, 0);
		} else
			error = bread(vp, lbn, size, &bp);
		ci->ci_lastr = lbn;
		n = min(n, size - bp->b_resid);
		if (error) {
			brelse(bp);
			return (error);
		}

		error = uiomovei(bp->b_data + on, (int)n, uio);

		brelse(bp);
	} while (error == 0 && uio->uio_resid > 0 && n != 0);
	return (error);
}
Ejemplo n.º 11
0
/*
 * this function handles traditional block mapping
 */
static int
ext2_ind_read(struct vop_read_args *ap)
{
	struct vnode *vp;
	struct inode *ip;
	struct uio *uio;
	FS *fs;
	struct buf *bp;
	daddr_t lbn, nextlbn;
	off_t bytesinfile;
	long size, xfersize, blkoffset;
	int error, orig_resid, seqcount;
	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
	u_short mode;

	vp = ap->a_vp;
	ip = VTOI(vp);
	mode = ip->i_mode;
	uio = ap->a_uio;

#ifdef DIAGNOSTIC
	if (uio->uio_rw != UIO_READ)
		panic("%s: mode", READ_S);

	if (vp->v_type == VLNK) {
		if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
			panic("%s: short symlink", READ_S);
	} else if (vp->v_type != VREG && vp->v_type != VDIR)
		panic("%s: type %d", READ_S, vp->v_type);
#endif
	orig_resid = uio->uio_resid;
	KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0"));
	if (orig_resid == 0)
		return (0);
	KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0"));
	fs = ip->I_FS;
	if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->e2fs_maxfilesize)
		return (EOVERFLOW);
	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
			break;
		lbn = lblkno(fs, uio->uio_offset);
		nextlbn = lbn + 1;
		size = BLKSIZE(fs, ip, lbn);
		blkoffset = blkoff(fs, uio->uio_offset);

		xfersize = fs->e2fs_fsize - blkoffset;
		if (uio->uio_resid < xfersize)
			xfersize = uio->uio_resid;
		if (bytesinfile < xfersize)
			xfersize = bytesinfile;

		if (lblktosize(fs, nextlbn) >= ip->i_size)
			error = bread(vp, lbn, size, NOCRED, &bp);
		else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0)
		error = cluster_read(vp, ip->i_size, lbn, size,
  			NOCRED, blkoffset + uio->uio_resid, seqcount, &bp);
		else if (seqcount > 1) {
			int nextsize = BLKSIZE(fs, ip, nextlbn);
			error = breadn(vp, lbn,
			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
		} else
			error = bread(vp, lbn, size, NOCRED, &bp);
		if (error) {
			brelse(bp);
			bp = NULL;
			break;
		}

		/*
		 * We should only get non-zero b_resid when an I/O error
		 * has occurred, which should cause us to break above.
		 * However, if the short read did not cause an error,
		 * then we want to ensure that we do not uiomove bad
		 * or uninitialized data.
		 */
		size -= bp->b_resid;
		if (size < xfersize) {
			if (size == 0)
				break;
			xfersize = size;
		}
		error = uiomove((char *)bp->b_data + blkoffset,
  			(int)xfersize, uio);
		if (error)
			break;

		bqrelse(bp);
	}
	if (bp != NULL)
		bqrelse(bp);
	if ((error == 0 || uio->uio_resid != orig_resid) &&
	    (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
		ip->i_flag |= IN_ACCESS;
	return (error);
}
Ejemplo n.º 12
0
int
msdosfs_read(void *v)
{
	struct vop_read_args *ap = v;
	int error = 0;
	uint32_t diff;
	int blsize;
	int isadir;
	uint32_t n;
	long on;
	daddr64_t lbn, rablock, rablkno;
	struct buf *bp;
	struct vnode *vp = ap->a_vp;
	struct denode *dep = VTODE(vp);
	struct msdosfsmount *pmp = dep->de_pmp;
	struct uio *uio = ap->a_uio;

	/*
	 * If they didn't ask for any data, then we are done.
	 */
	if (uio->uio_resid == 0)
		return (0);
	if (uio->uio_offset < 0)
		return (EINVAL);

	isadir = dep->de_Attributes & ATTR_DIRECTORY;
	do {
		if (uio->uio_offset >= dep->de_FileSize)
			return (0);

		lbn = de_cluster(pmp, uio->uio_offset);
		on = uio->uio_offset & pmp->pm_crbomask;
		n = min((uint32_t) (pmp->pm_bpcluster - on), uio->uio_resid);

		/*
		 * de_FileSize is uint32_t, and we know that uio_offset <
		 * de_FileSize, so uio->uio_offset < 2^32.  Therefore
		 * the cast to uint32_t on the next line is safe.
		 */
		diff = dep->de_FileSize - (uint32_t)uio->uio_offset;
		if (diff < n)
			n = diff;

		/* convert cluster # to block # if a directory */
		if (isadir) {
			error = pcbmap(dep, lbn, &lbn, 0, &blsize);
			if (error)
				return (error);
		}
		/*
		 * If we are operating on a directory file then be sure to
		 * do i/o with the vnode for the filesystem instead of the
		 * vnode for the directory.
		 */
		if (isadir) {
			error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp);
		} else {
			rablock = lbn + 1;
			rablkno = de_cn2bn(pmp, rablock);
			if (dep->de_lastr + 1 == lbn &&
			    de_cn2off(pmp, rablock) < dep->de_FileSize)
				error = breadn(vp, de_cn2bn(pmp, lbn),
				    pmp->pm_bpcluster, &rablkno,
				    &pmp->pm_bpcluster, 1, NOCRED, &bp);
			else
				error = bread(vp, de_cn2bn(pmp, lbn),
				    pmp->pm_bpcluster, NOCRED, &bp);
			dep->de_lastr = lbn;
		}
		n = min(n, pmp->pm_bpcluster - bp->b_resid);
		if (error) {
			brelse(bp);
			return (error);
		}
		error = uiomove(bp->b_data + on, (int) n, uio);
		brelse(bp);
	} while (error == 0 && uio->uio_resid > 0 && n != 0);
	if (!isadir && !(vp->v_mount->mnt_flag & MNT_NOATIME))
		dep->de_flag |= DE_ACCESS;
	return (error);
}
Ejemplo n.º 13
0
/*
 * Write a directory entry after a call to namei, using the parameters
 * that ulfs_lookup left in nameidata and in the ulfs_lookup_results.
 *
 * DVP is the directory to be updated. It must be locked.
 * ULR is the ulfs_lookup_results structure from the final lookup step.
 * TVP is not used. (XXX: why is it here? remove it)
 * DIRP is the new directory entry contents.
 * CNP is the componentname from the final lookup step.
 * NEWDIRBP is not used and (XXX) should be removed. The previous
 * comment here said it was used by the now-removed softupdates code.
 *
 * The link count of the target inode is *not* incremented; the
 * caller does that.
 *
 * If ulr->ulr_count is 0, ulfs_lookup did not find space to insert the
 * directory entry. ulr_offset, which is the place to put the entry,
 * should be on a block boundary (and should be at the end of the
 * directory AFAIK) and a fresh block is allocated to put the new
 * directory entry in.
 *
 * If ulr->ulr_count is not zero, ulfs_lookup found a slot to insert
 * the entry into. This slot ranges from ulr_offset to ulr_offset +
 * ulr_count. However, this slot may already be partially populated
 * requiring compaction. See notes below.
 *
 * Furthermore, if ulr_count is not zero and ulr_endoff is not the
 * same as i_size, the directory is truncated to size ulr_endoff.
 */
int
ulfs_direnter(struct vnode *dvp, const struct ulfs_lookup_results *ulr,
    struct vnode *tvp, struct lfs_direct *dirp,
    struct componentname *cnp, struct buf *newdirbp)
{
	kauth_cred_t cr;
	int newentrysize;
	struct inode *dp;
	struct buf *bp;
	u_int dsize;
	struct lfs_direct *ep, *nep;
	int error, ret, lfs_blkoff, loc, spacefree;
	char *dirbuf;
	struct timespec ts;
	struct ulfsmount *ump = VFSTOULFS(dvp->v_mount);
	struct lfs *fs = ump->um_lfs;
	const int needswap = ULFS_MPNEEDSWAP(fs);
	int dirblksiz = fs->um_dirblksiz;

	error = 0;
	cr = cnp->cn_cred;

	dp = VTOI(dvp);
	newentrysize = LFS_DIRSIZ(0, dirp, 0);

	if (ulr->ulr_count == 0) {
		/*
		 * If ulr_count is 0, then namei could find no
		 * space in the directory. Here, ulr_offset will
		 * be on a directory block boundary and we will write the
		 * new entry into a fresh block.
		 */
		if (ulr->ulr_offset & (dirblksiz - 1))
			panic("ulfs_direnter: newblk");
		if ((error = lfs_balloc(dvp, (off_t)ulr->ulr_offset, dirblksiz,
		    cr, B_CLRBUF | B_SYNC, &bp)) != 0) {
			return (error);
		}
		dp->i_size = ulr->ulr_offset + dirblksiz;
		DIP_ASSIGN(dp, size, dp->i_size);
		dp->i_flag |= IN_CHANGE | IN_UPDATE;
		uvm_vnp_setsize(dvp, dp->i_size);
		dirp->d_reclen = ulfs_rw16(dirblksiz, needswap);
		dirp->d_ino = ulfs_rw32(dirp->d_ino, needswap);
		if (FSFMT(dvp)) {
#if (BYTE_ORDER == LITTLE_ENDIAN)
			if (needswap == 0) {
#else
			if (needswap != 0) {
#endif
				u_char tmp = dirp->d_namlen;
				dirp->d_namlen = dirp->d_type;
				dirp->d_type = tmp;
			}
		}
		lfs_blkoff = ulr->ulr_offset & (ump->um_mountp->mnt_stat.f_iosize - 1);
		memcpy((char *)bp->b_data + lfs_blkoff, dirp, newentrysize);
#ifdef LFS_DIRHASH
		if (dp->i_dirhash != NULL) {
			ulfsdirhash_newblk(dp, ulr->ulr_offset);
			ulfsdirhash_add(dp, dirp, ulr->ulr_offset);
			ulfsdirhash_checkblock(dp, (char *)bp->b_data + lfs_blkoff,
			    ulr->ulr_offset);
		}
#endif
		error = VOP_BWRITE(bp->b_vp, bp);
		vfs_timestamp(&ts);
		ret = lfs_update(dvp, &ts, &ts, UPDATE_DIROP);
		if (error == 0)
			return (ret);
		return (error);
	}

	/*
	 * If ulr_count is non-zero, then namei found space for the new
	 * entry in the range ulr_offset to ulr_offset + ulr_count
	 * in the directory. To use this space, we may have to compact
	 * the entries located there, by copying them together towards the
	 * beginning of the block, leaving the free space in one usable
	 * chunk at the end.
	 */

	/*
	 * Increase size of directory if entry eats into new space.
	 * This should never push the size past a new multiple of
	 * DIRBLKSIZ.
	 *
	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
	 */
	if (ulr->ulr_offset + ulr->ulr_count > dp->i_size) {
#ifdef DIAGNOSTIC
		printf("ulfs_direnter: reached 4.2-only block, "
		       "not supposed to happen\n");
#endif
		dp->i_size = ulr->ulr_offset + ulr->ulr_count;
		DIP_ASSIGN(dp, size, dp->i_size);
		dp->i_flag |= IN_CHANGE | IN_UPDATE;
	}
	/*
	 * Get the block containing the space for the new directory entry.
	 */
	error = ulfs_blkatoff(dvp, (off_t)ulr->ulr_offset, &dirbuf, &bp, true);
	if (error) {
		return (error);
	}
	/*
	 * Find space for the new entry. In the simple case, the entry at
	 * offset base will have the space. If it does not, then namei
	 * arranged that compacting the region ulr_offset to
	 * ulr_offset + ulr_count would yield the space.
	 */
	ep = (struct lfs_direct *)dirbuf;
	dsize = (ep->d_ino != 0) ? LFS_DIRSIZ(FSFMT(dvp), ep, needswap) : 0;
	spacefree = ulfs_rw16(ep->d_reclen, needswap) - dsize;
	for (loc = ulfs_rw16(ep->d_reclen, needswap); loc < ulr->ulr_count; ) {
		uint16_t reclen;

		nep = (struct lfs_direct *)(dirbuf + loc);

		/* Trim the existing slot (NB: dsize may be zero). */
		ep->d_reclen = ulfs_rw16(dsize, needswap);
		ep = (struct lfs_direct *)((char *)ep + dsize);

		reclen = ulfs_rw16(nep->d_reclen, needswap);
		loc += reclen;
		if (nep->d_ino == 0) {
			/*
			 * A mid-block unused entry. Such entries are
			 * never created by the kernel, but fsck_ffs
			 * can create them (and it doesn't fix them).
			 *
			 * Add up the free space, and initialise the
			 * relocated entry since we don't memcpy it.
			 */
			spacefree += reclen;
			ep->d_ino = 0;
			dsize = 0;
			continue;
		}
		dsize = LFS_DIRSIZ(FSFMT(dvp), nep, needswap);
		spacefree += reclen - dsize;
#ifdef LFS_DIRHASH
		if (dp->i_dirhash != NULL)
			ulfsdirhash_move(dp, nep,
			    ulr->ulr_offset + ((char *)nep - dirbuf),
			    ulr->ulr_offset + ((char *)ep - dirbuf));
#endif
		memcpy((void *)ep, (void *)nep, dsize);
	}
	/*
	 * Here, `ep' points to a directory entry containing `dsize' in-use
	 * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0,
	 * then the entry is completely unused (dsize == 0). The value
	 * of ep->d_reclen is always indeterminate.
	 *
	 * Update the pointer fields in the previous entry (if any),
	 * copy in the new entry, and write out the block.
	 */
	if (ep->d_ino == 0 ||
	    (ulfs_rw32(ep->d_ino, needswap) == ULFS_WINO &&
	     memcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) {
		if (spacefree + dsize < newentrysize)
			panic("ulfs_direnter: compact1");
		dirp->d_reclen = spacefree + dsize;
	} else {
		if (spacefree < newentrysize)
			panic("ulfs_direnter: compact2");
		dirp->d_reclen = spacefree;
		ep->d_reclen = ulfs_rw16(dsize, needswap);
		ep = (struct lfs_direct *)((char *)ep + dsize);
	}
	dirp->d_reclen = ulfs_rw16(dirp->d_reclen, needswap);
	dirp->d_ino = ulfs_rw32(dirp->d_ino, needswap);
	if (FSFMT(dvp)) {
#if (BYTE_ORDER == LITTLE_ENDIAN)
		if (needswap == 0) {
#else
		if (needswap != 0) {
#endif
			u_char tmp = dirp->d_namlen;
			dirp->d_namlen = dirp->d_type;
			dirp->d_type = tmp;
		}
	}
#ifdef LFS_DIRHASH
	if (dp->i_dirhash != NULL && (ep->d_ino == 0 ||
	    dirp->d_reclen == spacefree))
		ulfsdirhash_add(dp, dirp, ulr->ulr_offset + ((char *)ep - dirbuf));
#endif
	memcpy((void *)ep, (void *)dirp, (u_int)newentrysize);
#ifdef LFS_DIRHASH
	if (dp->i_dirhash != NULL)
		ulfsdirhash_checkblock(dp, dirbuf -
		    (ulr->ulr_offset & (dirblksiz - 1)),
		    ulr->ulr_offset & ~(dirblksiz - 1));
#endif
	error = VOP_BWRITE(bp->b_vp, bp);
	dp->i_flag |= IN_CHANGE | IN_UPDATE;
	/*
	 * If all went well, and the directory can be shortened, proceed
	 * with the truncation. Note that we have to unlock the inode for
	 * the entry that we just entered, as the truncation may need to
	 * lock other inodes which can lead to deadlock if we also hold a
	 * lock on the newly entered node.
	 */
	if (error == 0 && ulr->ulr_endoff && ulr->ulr_endoff < dp->i_size) {
#ifdef LFS_DIRHASH
		if (dp->i_dirhash != NULL)
			ulfsdirhash_dirtrunc(dp, ulr->ulr_endoff);
#endif
		(void) lfs_truncate(dvp, (off_t)ulr->ulr_endoff, IO_SYNC, cr);
	}
	return (error);
}

/*
 * Remove a directory entry after a call to namei, using the
 * parameters that ulfs_lookup left in nameidata and in the
 * ulfs_lookup_results.
 *
 * DVP is the directory to be updated. It must be locked.
 * ULR is the ulfs_lookup_results structure from the final lookup step.
 * IP, if not null, is the inode being unlinked.
 * FLAGS may contain DOWHITEOUT.
 * ISRMDIR is not used and (XXX) should be removed.
 *
 * If FLAGS contains DOWHITEOUT the entry is replaced with a whiteout
 * instead of being cleared.
 *
 * ulr->ulr_offset contains the position of the directory entry
 * to be removed.
 *
 * ulr->ulr_reclen contains the size of the directory entry to be
 * removed.
 *
 * ulr->ulr_count contains the size of the *previous* directory
 * entry. This allows finding it, for free space management. If
 * ulr_count is 0, the target entry is at the beginning of the
 * directory. (Does this ever happen? The first entry should be ".",
 * which should only be removed at rmdir time. Does rmdir come here
 * to clear out the "." and ".." entries? Perhaps, but I doubt it.)
 *
 * The space is marked free by adding it to the record length (not
 * name length) of the preceding entry. If the first entry becomes
 * free, it is marked free by setting the inode number to 0.
 *
 * The link count of IP is decremented. Note that this is not the
 * inverse behavior of ulfs_direnter, which does not adjust link
 * counts. Sigh.
 */
int
ulfs_dirremove(struct vnode *dvp, const struct ulfs_lookup_results *ulr,
	      struct inode *ip, int flags, int isrmdir)
{
	struct inode *dp = VTOI(dvp);
	struct lfs_direct *ep;
	struct buf *bp;
	int error;
	const int needswap = ULFS_MPNEEDSWAP(dp->i_lfs);

	if (flags & DOWHITEOUT) {
		/*
		 * Whiteout entry: set d_ino to ULFS_WINO.
		 */
		error = ulfs_blkatoff(dvp, (off_t)ulr->ulr_offset, (void *)&ep,
				     &bp, true);
		if (error)
			return (error);
		ep->d_ino = ulfs_rw32(ULFS_WINO, needswap);
		ep->d_type = LFS_DT_WHT;
		goto out;
	}

	if ((error = ulfs_blkatoff(dvp,
	    (off_t)(ulr->ulr_offset - ulr->ulr_count), (void *)&ep, &bp, true)) != 0)
		return (error);

#ifdef LFS_DIRHASH
	/*
	 * Remove the dirhash entry. This is complicated by the fact
	 * that `ep' is the previous entry when ulr_count != 0.
	 */
	if (dp->i_dirhash != NULL)
		ulfsdirhash_remove(dp, (ulr->ulr_count == 0) ? ep :
		   (struct lfs_direct *)((char *)ep +
		   ulfs_rw16(ep->d_reclen, needswap)), ulr->ulr_offset);
#endif

	if (ulr->ulr_count == 0) {
		/*
		 * First entry in block: set d_ino to zero.
		 */
		ep->d_ino = 0;
	} else {
		/*
		 * Collapse new free space into previous entry.
		 */
		ep->d_reclen =
		    ulfs_rw16(ulfs_rw16(ep->d_reclen, needswap) + ulr->ulr_reclen,
			needswap);
	}

#ifdef LFS_DIRHASH
	if (dp->i_dirhash != NULL) {
		int dirblksiz = ip->i_lfs->um_dirblksiz;
		ulfsdirhash_checkblock(dp, (char *)ep -
		    ((ulr->ulr_offset - ulr->ulr_count) & (dirblksiz - 1)),
		    ulr->ulr_offset & ~(dirblksiz - 1));
	}
#endif

out:
	if (ip) {
		ip->i_nlink--;
		DIP_ASSIGN(ip, nlink, ip->i_nlink);
		ip->i_flag |= IN_CHANGE;
	}
	/*
	 * XXX did it ever occur to anyone that it might be a good
	 * idea to restore ip->i_nlink if this fails? Or something?
	 * Currently on error return from this function the state of
	 * ip->i_nlink depends on what happened, and callers
	 * definitely do not take this into account.
	 */
	error = VOP_BWRITE(bp->b_vp, bp);
	dp->i_flag |= IN_CHANGE | IN_UPDATE;
	/*
	 * If the last named reference to a snapshot goes away,
	 * drop its snapshot reference so that it will be reclaimed
	 * when last open reference goes away.
	 */
	if (ip != 0 && (ip->i_flags & SF_SNAPSHOT) != 0 &&
	    ip->i_nlink == 0)
		ulfs_snapgone(ip);
	return (error);
}

/*
 * Rewrite an existing directory entry to point at the inode supplied.
 *
 * DP is the directory to update.
 * OFFSET is the position of the entry in question. It may come
 * from ulr_offset of a ulfs_lookup_results.
 * OIP is the old inode the directory previously pointed to.
 * NEWINUM is the number of the new inode.
 * NEWTYPE is the new value for the type field of the directory entry.
 * (This is ignored if the fs doesn't support that.)
 * ISRMDIR is not used and (XXX) should be removed.
 * IFLAGS are added to DP's inode flags.
 *
 * The link count of OIP is decremented. Note that the link count of
 * the new inode is *not* incremented. Yay for symmetry.
 */
int
ulfs_dirrewrite(struct inode *dp, off_t offset,
    struct inode *oip, ino_t newinum, int newtype,
    int isrmdir, int iflags)
{
	struct buf *bp;
	struct lfs_direct *ep;
	struct vnode *vdp = ITOV(dp);
	int error;

	error = ulfs_blkatoff(vdp, offset, (void *)&ep, &bp, true);
	if (error)
		return (error);
	ep->d_ino = ulfs_rw32(newinum, ULFS_IPNEEDSWAP(dp));
	if (!FSFMT(vdp))
		ep->d_type = newtype;
	oip->i_nlink--;
	DIP_ASSIGN(oip, nlink, oip->i_nlink);
	oip->i_flag |= IN_CHANGE;
	error = VOP_BWRITE(bp->b_vp, bp);
	dp->i_flag |= iflags;
	/*
	 * If the last named reference to a snapshot goes away,
	 * drop its snapshot reference so that it will be reclaimed
	 * when last open reference goes away.
	 */
	if ((oip->i_flags & SF_SNAPSHOT) != 0 && oip->i_nlink == 0)
		ulfs_snapgone(oip);
	return (error);
}

/*
 * Check if a directory is empty or not.
 * Inode supplied must be locked.
 *
 * Using a struct lfs_dirtemplate here is not precisely
 * what we want, but better than using a struct lfs_direct.
 *
 * NB: does not handle corrupted directories.
 */
int
ulfs_dirempty(struct inode *ip, ino_t parentino, kauth_cred_t cred)
{
	doff_t off;
	struct lfs_dirtemplate dbuf;
	struct lfs_direct *dp = (struct lfs_direct *)&dbuf;
	int error, namlen;
	size_t count;
	const int needswap = ULFS_IPNEEDSWAP(ip);
#define	MINDIRSIZ (sizeof (struct lfs_dirtemplate) / 2)

	for (off = 0; off < ip->i_size;
	    off += ulfs_rw16(dp->d_reclen, needswap)) {
		error = vn_rdwr(UIO_READ, ITOV(ip), (void *)dp, MINDIRSIZ, off,
		   UIO_SYSSPACE, IO_NODELOCKED, cred, &count, NULL);
		/*
		 * Since we read MINDIRSIZ, residual must
		 * be 0 unless we're at end of file.
		 */
		if (error || count != 0)
			return (0);
		/* avoid infinite loops */
		if (dp->d_reclen == 0)
			return (0);
		/* skip empty entries */
		if (dp->d_ino == 0 || ulfs_rw32(dp->d_ino, needswap) == ULFS_WINO)
			continue;
		/* accept only "." and ".." */
#if (BYTE_ORDER == LITTLE_ENDIAN)
		if (FSFMT(ITOV(ip)) && needswap == 0)
			namlen = dp->d_type;
		else
			namlen = dp->d_namlen;
#else
		if (FSFMT(ITOV(ip)) && needswap != 0)
			namlen = dp->d_type;
		else
			namlen = dp->d_namlen;
#endif
		if (namlen > 2)
			return (0);
		if (dp->d_name[0] != '.')
			return (0);
		/*
		 * At this point namlen must be 1 or 2.
		 * 1 implies ".", 2 implies ".." if second
		 * char is also "."
		 */
		if (namlen == 1 &&
		    ulfs_rw32(dp->d_ino, needswap) == ip->i_number)
			continue;
		if (dp->d_name[1] == '.' &&
		    ulfs_rw32(dp->d_ino, needswap) == parentino)
			continue;
		return (0);
	}
	return (1);
}

#define	ULFS_DIRRABLKS 0
int ulfs_dirrablks = ULFS_DIRRABLKS;

/*
 * ulfs_blkatoff: Return buffer with the contents of block "offset" from
 * the beginning of directory "vp".  If "res" is non-NULL, fill it in with
 * a pointer to the remaining space in the directory.  If the caller intends
 * to modify the buffer returned, "modify" must be true.
 */

int
ulfs_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp,
    bool modify)
{
	struct inode *ip __diagused;
	struct buf *bp;
	daddr_t lbn;
	const int dirrablks = ulfs_dirrablks;
	daddr_t *blks;
	int *blksizes;
	int run, error;
	struct mount *mp = vp->v_mount;
	const int bshift = mp->mnt_fs_bshift;
	const int bsize = 1 << bshift;
	off_t eof;

	blks = kmem_alloc((1 + dirrablks) * sizeof(daddr_t), KM_SLEEP);
	blksizes = kmem_alloc((1 + dirrablks) * sizeof(int), KM_SLEEP);
	ip = VTOI(vp);
	KASSERT(vp->v_size == ip->i_size);
	GOP_SIZE(vp, vp->v_size, &eof, 0);
	lbn = offset >> bshift;

	for (run = 0; run <= dirrablks;) {
		const off_t curoff = lbn << bshift;
		const int size = MIN(eof - curoff, bsize);

		if (size == 0) {
			break;
		}
		KASSERT(curoff < eof);
		blks[run] = lbn;
		blksizes[run] = size;
		lbn++;
		run++;
		if (size != bsize) {
			break;
		}
	}
	KASSERT(run >= 1);
	error = breadn(vp, blks[0], blksizes[0], &blks[1], &blksizes[1],
	    run - 1, NOCRED, (modify ? B_MODIFY : 0), &bp);
	if (error != 0) {
		*bpp = NULL;
		goto out;
	}
	if (res) {
		*res = (char *)bp->b_data + (offset & (bsize - 1));
	}
	*bpp = bp;

 out:
	kmem_free(blks, (1 + dirrablks) * sizeof(daddr_t));
	kmem_free(blksizes, (1 + dirrablks) * sizeof(int));
	return error;
}