/* * Read with single-block read-ahead. Defined in Bach (p.55), but * implemented as a call to breadn(). * XXX for compatibility with old file systems. */ int breada(struct vnode *vp, daddr_t blkno, int size, daddr_t rablkno, int rabsize, struct ucred *cred, struct buf **bpp) { return (breadn(vp, blkno, size, &rablkno, &rabsize, 1, cred, bpp)); }
/* * Vnode op for reading. */ static int ext2_read(struct vop_read_args *ap) { struct vnode *vp; struct inode *ip; struct uio *uio; struct m_ext2fs *fs; struct buf *bp; daddr_t lbn, nextlbn; off_t bytesinfile; long size, xfersize, blkoffset; int error, orig_resid, seqcount; int ioflag; vp = ap->a_vp; uio = ap->a_uio; ioflag = ap->a_ioflag; seqcount = ap->a_ioflag >> IO_SEQSHIFT; ip = VTOI(vp); #ifdef INVARIANTS if (uio->uio_rw != UIO_READ) panic("%s: mode", "ext2_read"); if (vp->v_type == VLNK) { if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) panic("%s: short symlink", "ext2_read"); } else if (vp->v_type != VREG && vp->v_type != VDIR) panic("%s: type %d", "ext2_read", vp->v_type); #endif orig_resid = uio->uio_resid; KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0")); if (orig_resid == 0) return (0); KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0")); fs = ip->i_e2fs; if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->e2fs_maxfilesize) return (EOVERFLOW); for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) break; lbn = lblkno(fs, uio->uio_offset); nextlbn = lbn + 1; size = blksize(fs, ip, lbn); blkoffset = blkoff(fs, uio->uio_offset); xfersize = fs->e2fs_fsize - blkoffset; if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; if (bytesinfile < xfersize) xfersize = bytesinfile; if (lblktosize(fs, nextlbn) >= ip->i_size) error = bread(vp, lbn, size, NOCRED, &bp); else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { error = cluster_read(vp, ip->i_size, lbn, size, NOCRED, blkoffset + uio->uio_resid, seqcount, 0, &bp); } else if (seqcount > 1) { int nextsize = blksize(fs, ip, nextlbn); error = breadn(vp, lbn, size, &nextlbn, &nextsize, 1, NOCRED, &bp); } else error = bread(vp, lbn, size, NOCRED, &bp); if (error) { brelse(bp); bp = NULL; break; } /* * If IO_DIRECT then set B_DIRECT for the buffer. This * will cause us to attempt to release the buffer later on * and will cause the buffer cache to attempt to free the * underlying pages. */ if (ioflag & IO_DIRECT) bp->b_flags |= B_DIRECT; /* * We should only get non-zero b_resid when an I/O error * has occurred, which should cause us to break above. * However, if the short read did not cause an error, * then we want to ensure that we do not uiomove bad * or uninitialized data. */ size -= bp->b_resid; if (size < xfersize) { if (size == 0) break; xfersize = size; } error = uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); if (error) break; if (ioflag & (IO_VMIO|IO_DIRECT)) { /* * If it's VMIO or direct I/O, then we don't * need the buf, mark it available for * freeing. If it's non-direct VMIO, the VM has * the data. */ bp->b_flags |= B_RELBUF; brelse(bp); } else { /* * Otherwise let whoever * made the request take care of * freeing it. We just queue * it onto another list. */ bqrelse(bp); } } /* * This can only happen in the case of an error * because the loop above resets bp to NULL on each iteration * and on normal completion has not set a new value into it. * so it must have come from a 'break' statement */ if (bp != NULL) { if (ioflag & (IO_VMIO|IO_DIRECT)) { bp->b_flags |= B_RELBUF; brelse(bp); } else { bqrelse(bp); } } if ((error == 0 || uio->uio_resid != orig_resid) && (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) ip->i_flag |= IN_ACCESS; return (error); }
if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; if (bytesinfile < xfersize) xfersize = bytesinfile; if (nextloffset >= ip->i_size) { error = bread(vp, lblktodoff(fs, lbn), size, &bp); } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { error = cluster_read(vp, (off_t)ip->i_size, lblktodoff(fs, lbn), size, uio->uio_resid, (ap->a_ioflag >> 16) * BKVASIZE, &bp); } else if (seqcount > 1) { int nextsize = BLKSIZE(fs, ip, nextlbn); error = breadn(vp, lblktodoff(fs, lbn), size, &nextloffset, &nextsize, 1, &bp); } else { error = bread(vp, lblktodoff(fs, lbn), size, &bp); } if (error) { brelse(bp); bp = NULL; break; } /* * We should only get non-zero b_resid when an I/O error * has occurred, which should cause us to break above. * However, if the short read did not cause an error, * then we want to ensure that we do not uiomove bad * or uninitialized data.
/* * Vnode op for read */ int spec_read(void *v) { struct vop_read_args *ap = v; struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; struct buf *bp; daddr_t bn, nextbn, bscale; int bsize; struct partinfo dpart; size_t n; int on, majordev; int (*ioctl)(dev_t, u_long, caddr_t, int, struct proc *); int error = 0; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_READ) panic("spec_read mode"); if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) panic("spec_read proc"); #endif if (uio->uio_resid == 0) return (0); switch (vp->v_type) { case VCHR: VOP_UNLOCK(vp, 0, p); error = (*cdevsw[major(vp->v_rdev)].d_read) (vp->v_rdev, uio, ap->a_ioflag); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: if (uio->uio_offset < 0) return (EINVAL); bsize = BLKDEV_IOSIZE; if ((majordev = major(vp->v_rdev)) < nblkdev && (ioctl = bdevsw[majordev].d_ioctl) != NULL && (*ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0) { u_int32_t frag = DISKLABELV1_FFS_FRAG(dpart.part->p_fragblock); u_int32_t fsize = DISKLABELV1_FFS_FSIZE(dpart.part->p_fragblock); if (dpart.part->p_fstype == FS_BSDFFS && frag != 0 && fsize != 0) bsize = frag * fsize; } bscale = btodb(bsize); do { bn = btodb(uio->uio_offset) & ~(bscale - 1); on = uio->uio_offset % bsize; n = ulmin((bsize - on), uio->uio_resid); if (vp->v_lastr + bscale == bn) { nextbn = bn + bscale; error = breadn(vp, bn, bsize, &nextbn, &bsize, 1, &bp); } else error = bread(vp, bn, bsize, &bp); vp->v_lastr = bn; n = ulmin(n, bsize - bp->b_resid); if (error) { brelse(bp); return (error); } error = uiomove((char *)bp->b_data + on, n, uio); brelse(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); return (error); default: panic("spec_read type"); } /* NOTREACHED */ }
/* * Vnode op for reading. */ int cd9660_read(void *v) { struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; kauth_cred_t a_cred; } */ *ap = v; struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct iso_node *ip = VTOI(vp); struct iso_mnt *imp; struct buf *bp; daddr_t lbn, rablock; off_t diff; int rasize, error = 0; long size, n, on; if (uio->uio_resid == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); if (uio->uio_offset >= ip->i_size) return 0; ip->i_flag |= IN_ACCESS; imp = ip->i_mnt; if (vp->v_type == VREG) { const int advice = IO_ADV_DECODE(ap->a_ioflag); error = 0; while (uio->uio_resid > 0) { vsize_t bytelen = MIN(ip->i_size - uio->uio_offset, uio->uio_resid); if (bytelen == 0) break; error = ubc_uiomove(&vp->v_uobj, uio, bytelen, advice, UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp)); if (error) break; } goto out; } do { lbn = cd9660_lblkno(imp, uio->uio_offset); on = cd9660_blkoff(imp, uio->uio_offset); n = MIN(imp->logical_block_size - on, uio->uio_resid); diff = (off_t)ip->i_size - uio->uio_offset; if (diff <= 0) return (0); if (diff < n) n = diff; size = cd9660_blksize(imp, ip, lbn); rablock = lbn + 1; if (cd9660_lblktosize(imp, rablock) < ip->i_size) { rasize = cd9660_blksize(imp, ip, rablock); error = breadn(vp, lbn, size, &rablock, &rasize, 1, NOCRED, 0, &bp); } else { error = bread(vp, lbn, size, NOCRED, 0, &bp); } if (error) { return (error); } n = MIN(n, size - bp->b_resid); error = uiomove((char *)bp->b_data + on, (int)n, uio); brelse(bp, 0); } while (error == 0 && uio->uio_resid > 0 && n != 0); out: return (error); }
/* ARGSUSED */ int ext2fs_read(void *v) { struct vop_read_args *ap = v; struct vnode *vp; struct inode *ip; struct uio *uio; struct m_ext2fs *fs; struct buf *bp; daddr_t lbn, nextlbn; off_t bytesinfile; long size, xfersize, blkoffset; int error; vp = ap->a_vp; ip = VTOI(vp); uio = ap->a_uio; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_READ) panic("%s: mode", "ext2fs_read"); if (vp->v_type == VLNK) { if ((int)ext2fs_size(ip) < vp->v_mount->mnt_maxsymlinklen || (vp->v_mount->mnt_maxsymlinklen == 0 && ip->i_e2fs_nblock == 0)) panic("%s: short symlink", "ext2fs_read"); } else if (vp->v_type != VREG && vp->v_type != VDIR) panic("%s: type %d", "ext2fs_read", vp->v_type); #endif fs = ip->i_e2fs; if ((u_int64_t)uio->uio_offset > ((u_int64_t)0x80000000 * fs->e2fs_bsize - 1)) return (EFBIG); if (uio->uio_resid == 0) return (0); for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { if ((bytesinfile = ext2fs_size(ip) - uio->uio_offset) <= 0) break; lbn = lblkno(fs, uio->uio_offset); nextlbn = lbn + 1; size = fs->e2fs_bsize; blkoffset = blkoff(fs, uio->uio_offset); xfersize = fs->e2fs_bsize - blkoffset; if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; if (bytesinfile < xfersize) xfersize = bytesinfile; if (lblktosize(fs, nextlbn) >= ext2fs_size(ip)) error = bread(vp, lbn, size, &bp); else if (lbn - 1 == ip->i_ci.ci_lastr) { int nextsize = fs->e2fs_bsize; error = breadn(vp, lbn, size, &nextlbn, &nextsize, 1, &bp); } else error = bread(vp, lbn, size, &bp); if (error) break; ip->i_ci.ci_lastr = lbn; /* * We should only get non-zero b_resid when an I/O error * has occurred, which should cause us to break above. * However, if the short read did not cause an error, * then we want to ensure that we do not uiomove bad * or uninitialized data. */ size -= bp->b_resid; if (size < xfersize) { if (size == 0) break; xfersize = size; } error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio); if (error) break; brelse(bp); } if (bp != NULL) brelse(bp); if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) { ip->i_flag |= IN_ACCESS; } return (error); }
int xfs_read_file(xfs_mount_t *mp, xfs_inode_t *ip, struct uio *uio, int ioflag) { xfs_fileoff_t lbn, nextlbn; xfs_fsize_t bytesinfile; long size, xfersize, blkoffset; struct buf *bp; struct vnode *vp; int error, orig_resid; int seqcount; seqcount = ioflag >> IO_SEQSHIFT; orig_resid = uio->uio_resid; if (orig_resid <= 0) return (0); vp = XFS_ITOV(ip)->v_vnode; /* * Ok so we couldn't do it all in one vm trick... * so cycle around trying smaller bites.. */ for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { if ((bytesinfile = ip->i_d.di_size - uio->uio_offset) <= 0) break; lbn = XFS_B_TO_FSBT(mp, uio->uio_offset); nextlbn = lbn + 1; /* * size of buffer. The buffer representing the * end of the file is rounded up to the size of * the block type ( fragment or full block, * depending ). */ size = mp->m_sb.sb_blocksize; blkoffset = XFS_B_FSB_OFFSET(mp, uio->uio_offset); /* * The amount we want to transfer in this iteration is * one FS block less the amount of the data before * our startpoint (duh!) */ xfersize = mp->m_sb.sb_blocksize - blkoffset; /* * But if we actually want less than the block, * or the file doesn't have a whole block more of data, * then use the lesser number. */ if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; if (bytesinfile < xfersize) xfersize = bytesinfile; if (XFS_FSB_TO_B(mp, nextlbn) >= ip->i_d.di_size ) { /* * Don't do readahead if this is the end of the file. */ error = bread(vp, lbn, size, NOCRED, &bp); } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { /* * Otherwise if we are allowed to cluster, * grab as much as we can. * * XXX This may not be a win if we are not * doing sequential access. */ error = cluster_read(vp, ip->i_d.di_size, lbn, size, NOCRED, uio->uio_resid, seqcount, &bp); } else if (seqcount > 1) { /* * If we are NOT allowed to cluster, then * if we appear to be acting sequentially, * fire off a request for a readahead * as well as a read. Note that the 4th and 5th * arguments point to arrays of the size specified in * the 6th argument. */ int nextsize = mp->m_sb.sb_blocksize; error = breadn(vp, lbn, size, &nextlbn, &nextsize, 1, NOCRED, &bp); } else { /* * Failing all of the above, just read what the * user asked for. Interestingly, the same as * the first option above. */ error = bread(vp, lbn, size, NOCRED, &bp); } if (error) { brelse(bp); bp = NULL; break; } /* * If IO_DIRECT then set B_DIRECT for the buffer. This * will cause us to attempt to release the buffer later on * and will cause the buffer cache to attempt to free the * underlying pages. */ if (ioflag & IO_DIRECT) bp->b_flags |= B_DIRECT; /* * We should only get non-zero b_resid when an I/O error * has occurred, which should cause us to break above. * However, if the short read did not cause an error, * then we want to ensure that we do not uiomove bad * or uninitialized data. */ size -= bp->b_resid; if (size < xfersize) { if (size == 0) break; xfersize = size; } /* * otherwise use the general form */ error = uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); if (error) break; if (ioflag & (IO_VMIO|IO_DIRECT) ) { /* * If there are no dependencies, and it's VMIO, * then we don't need the buf, mark it available * for freeing. The VM has the data. */ bp->b_flags |= B_RELBUF; brelse(bp); } else { /* * Otherwise let whoever * made the request take care of * freeing it. We just queue * it onto another list. */ bqrelse(bp); } } /* * This can only happen in the case of an error * because the loop above resets bp to NULL on each iteration * and on normal completion has not set a new value into it. * so it must have come from a 'break' statement */ if (bp != NULL) { if (ioflag & (IO_VMIO|IO_DIRECT)) { bp->b_flags |= B_RELBUF; brelse(bp); } else bqrelse(bp); } return (error); }
/* * Return buffer with the contents of block "offset" from the beginning of * vnode "vp". If "res" is non-zero, fill it in with a pointer to the * remaining space in the vnode. * * This version includes a read-ahead optimization. */ int ffs_blkatoff_ra(struct vnode *vp, off_t uoffset, char **res, struct buf **bpp, int seqcount) { struct inode *ip; struct fs *fs; struct buf *bp; ufs_daddr_t lbn; ufs_daddr_t nextlbn; off_t base_loffset; off_t next_loffset; int bsize, error; int nextbsize; ip = VTOI(vp); fs = ip->i_fs; lbn = lblkno(fs, uoffset); base_loffset = lblktodoff(fs, lbn); bsize = blksize(fs, ip, lbn); nextlbn = lbn + 1; next_loffset = lblktodoff(fs, nextlbn); *bpp = NULL; if (next_loffset >= ip->i_size) { /* * Do not do readahead if this is the last block, * bsize might represent a fragment. */ error = bread(vp, base_loffset, bsize, &bp); } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { /* * Try to cluster if we allowed to. */ error = cluster_read(vp, (off_t)ip->i_size, base_loffset, bsize, bsize, seqcount * BKVASIZE, &bp); } else if (seqcount > 1) { /* * Faked read ahead */ nextbsize = blksize(fs, ip, nextlbn); error = breadn(vp, base_loffset, bsize, &next_loffset, &nextbsize, 1, &bp); } else { /* * Failing all of the above, just read what the * user asked for. Interestingly, the same as * the first option above. */ error = bread(vp, base_loffset, bsize, &bp); } if (error) { brelse(bp); return (error); } if (res) *res = (char *)bp->b_data + (int)(uoffset - base_loffset); *bpp = bp; return (0); }
/* * Extended attribute area reading. */ int ffs_ea_read(struct vnode *vp, struct uio *uio, int ioflag) { struct inode *ip; struct ufs2_dinode *dp; struct fs *fs; struct buf *bp; daddr64_t lbn, nextlbn; off_t ealeft; int error, size, xfersize; size_t oresid; ip = VTOI(vp); fs = ip->i_fs; dp = ip->i_din2; error = 0; oresid = uio->uio_resid; ealeft = dp->di_extsize; /* * Loop over the amount of data requested by the caller, stopping only * if an error occurs. By default, we always try to copy a file system * block worth of bytes per iteration ('xfersize'). Check this value * against what is left to be copied ('uio->uio_resid'), and the amount * of bytes past our current position in the extended attribute area * ('ealeft'). */ while (uio->uio_resid > 0) { ealeft -= uio->uio_offset; if (ealeft <= 0) break; xfersize = fs->fs_bsize; if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; if (ealeft < xfersize) xfersize = ealeft; /* * Get the corresponding logical block number. Read it in, * doing read-ahead if possible. */ lbn = lblkno(fs, uio->uio_offset); size = sblksize(fs, dp->di_extsize, lbn); nextlbn = lbn + 1; if (lblktosize(fs, nextlbn) >= dp->di_extsize) error = bread(vp, -1 - lbn, size, NOCRED, &bp); else { int nextsize = sblksize(fs, dp->di_extsize, nextlbn); nextlbn = -1 - nextlbn; error = breadn(vp, -1 - lbn, size, &nextlbn, &nextsize, 1, NOCRED, &bp); } if (error) { brelse(bp); break; } /* Check for short-reads. */ if (bp->b_resid) { brelse(bp); error = EIO; break; } /* Finally, copy out the data, and release the buffer. */ error = uiomove(bp->b_data, xfersize, uio); brelse(bp); if (error) break; } if ((error == 0 || uio->uio_resid != oresid) && (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) ip->i_flag |= IN_ACCESS; return (error); }
/* * Vnode op for reading. */ int cd9660_read(void *v) { struct vop_read_args *ap = v; struct vnode *vp = ap->a_vp; register struct uio *uio = ap->a_uio; register struct iso_node *ip = VTOI(vp); register struct iso_mnt *imp; struct buf *bp; daddr_t lbn, rablock; off_t diff; int error = 0; long size, n, on; if (uio->uio_resid == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); ip->i_flag |= IN_ACCESS; imp = ip->i_mnt; do { struct cluster_info *ci = &ip->i_ci; lbn = lblkno(imp, uio->uio_offset); on = blkoff(imp, uio->uio_offset); n = min((u_int)(imp->logical_block_size - on), uio->uio_resid); diff = (off_t)ip->i_size - uio->uio_offset; if (diff <= 0) return (0); if (diff < n) n = diff; size = blksize(imp, ip, lbn); rablock = lbn + 1; #define MAX_RA 32 if (ci->ci_lastr + 1 == lbn) { struct ra { daddr_t blks[MAX_RA]; int sizes[MAX_RA]; } *ra; int i; ra = malloc(sizeof *ra, M_TEMP, M_WAITOK); for (i = 0; i < MAX_RA && lblktosize(imp, (rablock + i)) < ip->i_size; i++) { ra->blks[i] = rablock + i; ra->sizes[i] = blksize(imp, ip, rablock + i); } error = breadn(vp, lbn, size, ra->blks, ra->sizes, i, &bp); free(ra, M_TEMP, 0); } else error = bread(vp, lbn, size, &bp); ci->ci_lastr = lbn; n = min(n, size - bp->b_resid); if (error) { brelse(bp); return (error); } error = uiomovei(bp->b_data + on, (int)n, uio); brelse(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); return (error); }
/* * this function handles traditional block mapping */ static int ext2_ind_read(struct vop_read_args *ap) { struct vnode *vp; struct inode *ip; struct uio *uio; FS *fs; struct buf *bp; daddr_t lbn, nextlbn; off_t bytesinfile; long size, xfersize, blkoffset; int error, orig_resid, seqcount; seqcount = ap->a_ioflag >> IO_SEQSHIFT; u_short mode; vp = ap->a_vp; ip = VTOI(vp); mode = ip->i_mode; uio = ap->a_uio; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_READ) panic("%s: mode", READ_S); if (vp->v_type == VLNK) { if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) panic("%s: short symlink", READ_S); } else if (vp->v_type != VREG && vp->v_type != VDIR) panic("%s: type %d", READ_S, vp->v_type); #endif orig_resid = uio->uio_resid; KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0")); if (orig_resid == 0) return (0); KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0")); fs = ip->I_FS; if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->e2fs_maxfilesize) return (EOVERFLOW); for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) break; lbn = lblkno(fs, uio->uio_offset); nextlbn = lbn + 1; size = BLKSIZE(fs, ip, lbn); blkoffset = blkoff(fs, uio->uio_offset); xfersize = fs->e2fs_fsize - blkoffset; if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; if (bytesinfile < xfersize) xfersize = bytesinfile; if (lblktosize(fs, nextlbn) >= ip->i_size) error = bread(vp, lbn, size, NOCRED, &bp); else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) error = cluster_read(vp, ip->i_size, lbn, size, NOCRED, blkoffset + uio->uio_resid, seqcount, &bp); else if (seqcount > 1) { int nextsize = BLKSIZE(fs, ip, nextlbn); error = breadn(vp, lbn, size, &nextlbn, &nextsize, 1, NOCRED, &bp); } else error = bread(vp, lbn, size, NOCRED, &bp); if (error) { brelse(bp); bp = NULL; break; } /* * We should only get non-zero b_resid when an I/O error * has occurred, which should cause us to break above. * However, if the short read did not cause an error, * then we want to ensure that we do not uiomove bad * or uninitialized data. */ size -= bp->b_resid; if (size < xfersize) { if (size == 0) break; xfersize = size; } error = uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); if (error) break; bqrelse(bp); } if (bp != NULL) bqrelse(bp); if ((error == 0 || uio->uio_resid != orig_resid) && (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) ip->i_flag |= IN_ACCESS; return (error); }
int msdosfs_read(void *v) { struct vop_read_args *ap = v; int error = 0; uint32_t diff; int blsize; int isadir; uint32_t n; long on; daddr64_t lbn, rablock, rablkno; struct buf *bp; struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(vp); struct msdosfsmount *pmp = dep->de_pmp; struct uio *uio = ap->a_uio; /* * If they didn't ask for any data, then we are done. */ if (uio->uio_resid == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); isadir = dep->de_Attributes & ATTR_DIRECTORY; do { if (uio->uio_offset >= dep->de_FileSize) return (0); lbn = de_cluster(pmp, uio->uio_offset); on = uio->uio_offset & pmp->pm_crbomask; n = min((uint32_t) (pmp->pm_bpcluster - on), uio->uio_resid); /* * de_FileSize is uint32_t, and we know that uio_offset < * de_FileSize, so uio->uio_offset < 2^32. Therefore * the cast to uint32_t on the next line is safe. */ diff = dep->de_FileSize - (uint32_t)uio->uio_offset; if (diff < n) n = diff; /* convert cluster # to block # if a directory */ if (isadir) { error = pcbmap(dep, lbn, &lbn, 0, &blsize); if (error) return (error); } /* * If we are operating on a directory file then be sure to * do i/o with the vnode for the filesystem instead of the * vnode for the directory. */ if (isadir) { error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp); } else { rablock = lbn + 1; rablkno = de_cn2bn(pmp, rablock); if (dep->de_lastr + 1 == lbn && de_cn2off(pmp, rablock) < dep->de_FileSize) error = breadn(vp, de_cn2bn(pmp, lbn), pmp->pm_bpcluster, &rablkno, &pmp->pm_bpcluster, 1, NOCRED, &bp); else error = bread(vp, de_cn2bn(pmp, lbn), pmp->pm_bpcluster, NOCRED, &bp); dep->de_lastr = lbn; } n = min(n, pmp->pm_bpcluster - bp->b_resid); if (error) { brelse(bp); return (error); } error = uiomove(bp->b_data + on, (int) n, uio); brelse(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); if (!isadir && !(vp->v_mount->mnt_flag & MNT_NOATIME)) dep->de_flag |= DE_ACCESS; return (error); }
/* * Write a directory entry after a call to namei, using the parameters * that ulfs_lookup left in nameidata and in the ulfs_lookup_results. * * DVP is the directory to be updated. It must be locked. * ULR is the ulfs_lookup_results structure from the final lookup step. * TVP is not used. (XXX: why is it here? remove it) * DIRP is the new directory entry contents. * CNP is the componentname from the final lookup step. * NEWDIRBP is not used and (XXX) should be removed. The previous * comment here said it was used by the now-removed softupdates code. * * The link count of the target inode is *not* incremented; the * caller does that. * * If ulr->ulr_count is 0, ulfs_lookup did not find space to insert the * directory entry. ulr_offset, which is the place to put the entry, * should be on a block boundary (and should be at the end of the * directory AFAIK) and a fresh block is allocated to put the new * directory entry in. * * If ulr->ulr_count is not zero, ulfs_lookup found a slot to insert * the entry into. This slot ranges from ulr_offset to ulr_offset + * ulr_count. However, this slot may already be partially populated * requiring compaction. See notes below. * * Furthermore, if ulr_count is not zero and ulr_endoff is not the * same as i_size, the directory is truncated to size ulr_endoff. */ int ulfs_direnter(struct vnode *dvp, const struct ulfs_lookup_results *ulr, struct vnode *tvp, struct lfs_direct *dirp, struct componentname *cnp, struct buf *newdirbp) { kauth_cred_t cr; int newentrysize; struct inode *dp; struct buf *bp; u_int dsize; struct lfs_direct *ep, *nep; int error, ret, lfs_blkoff, loc, spacefree; char *dirbuf; struct timespec ts; struct ulfsmount *ump = VFSTOULFS(dvp->v_mount); struct lfs *fs = ump->um_lfs; const int needswap = ULFS_MPNEEDSWAP(fs); int dirblksiz = fs->um_dirblksiz; error = 0; cr = cnp->cn_cred; dp = VTOI(dvp); newentrysize = LFS_DIRSIZ(0, dirp, 0); if (ulr->ulr_count == 0) { /* * If ulr_count is 0, then namei could find no * space in the directory. Here, ulr_offset will * be on a directory block boundary and we will write the * new entry into a fresh block. */ if (ulr->ulr_offset & (dirblksiz - 1)) panic("ulfs_direnter: newblk"); if ((error = lfs_balloc(dvp, (off_t)ulr->ulr_offset, dirblksiz, cr, B_CLRBUF | B_SYNC, &bp)) != 0) { return (error); } dp->i_size = ulr->ulr_offset + dirblksiz; DIP_ASSIGN(dp, size, dp->i_size); dp->i_flag |= IN_CHANGE | IN_UPDATE; uvm_vnp_setsize(dvp, dp->i_size); dirp->d_reclen = ulfs_rw16(dirblksiz, needswap); dirp->d_ino = ulfs_rw32(dirp->d_ino, needswap); if (FSFMT(dvp)) { #if (BYTE_ORDER == LITTLE_ENDIAN) if (needswap == 0) { #else if (needswap != 0) { #endif u_char tmp = dirp->d_namlen; dirp->d_namlen = dirp->d_type; dirp->d_type = tmp; } } lfs_blkoff = ulr->ulr_offset & (ump->um_mountp->mnt_stat.f_iosize - 1); memcpy((char *)bp->b_data + lfs_blkoff, dirp, newentrysize); #ifdef LFS_DIRHASH if (dp->i_dirhash != NULL) { ulfsdirhash_newblk(dp, ulr->ulr_offset); ulfsdirhash_add(dp, dirp, ulr->ulr_offset); ulfsdirhash_checkblock(dp, (char *)bp->b_data + lfs_blkoff, ulr->ulr_offset); } #endif error = VOP_BWRITE(bp->b_vp, bp); vfs_timestamp(&ts); ret = lfs_update(dvp, &ts, &ts, UPDATE_DIROP); if (error == 0) return (ret); return (error); } /* * If ulr_count is non-zero, then namei found space for the new * entry in the range ulr_offset to ulr_offset + ulr_count * in the directory. To use this space, we may have to compact * the entries located there, by copying them together towards the * beginning of the block, leaving the free space in one usable * chunk at the end. */ /* * Increase size of directory if entry eats into new space. * This should never push the size past a new multiple of * DIRBLKSIZ. * * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. */ if (ulr->ulr_offset + ulr->ulr_count > dp->i_size) { #ifdef DIAGNOSTIC printf("ulfs_direnter: reached 4.2-only block, " "not supposed to happen\n"); #endif dp->i_size = ulr->ulr_offset + ulr->ulr_count; DIP_ASSIGN(dp, size, dp->i_size); dp->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Get the block containing the space for the new directory entry. */ error = ulfs_blkatoff(dvp, (off_t)ulr->ulr_offset, &dirbuf, &bp, true); if (error) { return (error); } /* * Find space for the new entry. In the simple case, the entry at * offset base will have the space. If it does not, then namei * arranged that compacting the region ulr_offset to * ulr_offset + ulr_count would yield the space. */ ep = (struct lfs_direct *)dirbuf; dsize = (ep->d_ino != 0) ? LFS_DIRSIZ(FSFMT(dvp), ep, needswap) : 0; spacefree = ulfs_rw16(ep->d_reclen, needswap) - dsize; for (loc = ulfs_rw16(ep->d_reclen, needswap); loc < ulr->ulr_count; ) { uint16_t reclen; nep = (struct lfs_direct *)(dirbuf + loc); /* Trim the existing slot (NB: dsize may be zero). */ ep->d_reclen = ulfs_rw16(dsize, needswap); ep = (struct lfs_direct *)((char *)ep + dsize); reclen = ulfs_rw16(nep->d_reclen, needswap); loc += reclen; if (nep->d_ino == 0) { /* * A mid-block unused entry. Such entries are * never created by the kernel, but fsck_ffs * can create them (and it doesn't fix them). * * Add up the free space, and initialise the * relocated entry since we don't memcpy it. */ spacefree += reclen; ep->d_ino = 0; dsize = 0; continue; } dsize = LFS_DIRSIZ(FSFMT(dvp), nep, needswap); spacefree += reclen - dsize; #ifdef LFS_DIRHASH if (dp->i_dirhash != NULL) ulfsdirhash_move(dp, nep, ulr->ulr_offset + ((char *)nep - dirbuf), ulr->ulr_offset + ((char *)ep - dirbuf)); #endif memcpy((void *)ep, (void *)nep, dsize); } /* * Here, `ep' points to a directory entry containing `dsize' in-use * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0, * then the entry is completely unused (dsize == 0). The value * of ep->d_reclen is always indeterminate. * * Update the pointer fields in the previous entry (if any), * copy in the new entry, and write out the block. */ if (ep->d_ino == 0 || (ulfs_rw32(ep->d_ino, needswap) == ULFS_WINO && memcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) { if (spacefree + dsize < newentrysize) panic("ulfs_direnter: compact1"); dirp->d_reclen = spacefree + dsize; } else { if (spacefree < newentrysize) panic("ulfs_direnter: compact2"); dirp->d_reclen = spacefree; ep->d_reclen = ulfs_rw16(dsize, needswap); ep = (struct lfs_direct *)((char *)ep + dsize); } dirp->d_reclen = ulfs_rw16(dirp->d_reclen, needswap); dirp->d_ino = ulfs_rw32(dirp->d_ino, needswap); if (FSFMT(dvp)) { #if (BYTE_ORDER == LITTLE_ENDIAN) if (needswap == 0) { #else if (needswap != 0) { #endif u_char tmp = dirp->d_namlen; dirp->d_namlen = dirp->d_type; dirp->d_type = tmp; } } #ifdef LFS_DIRHASH if (dp->i_dirhash != NULL && (ep->d_ino == 0 || dirp->d_reclen == spacefree)) ulfsdirhash_add(dp, dirp, ulr->ulr_offset + ((char *)ep - dirbuf)); #endif memcpy((void *)ep, (void *)dirp, (u_int)newentrysize); #ifdef LFS_DIRHASH if (dp->i_dirhash != NULL) ulfsdirhash_checkblock(dp, dirbuf - (ulr->ulr_offset & (dirblksiz - 1)), ulr->ulr_offset & ~(dirblksiz - 1)); #endif error = VOP_BWRITE(bp->b_vp, bp); dp->i_flag |= IN_CHANGE | IN_UPDATE; /* * If all went well, and the directory can be shortened, proceed * with the truncation. Note that we have to unlock the inode for * the entry that we just entered, as the truncation may need to * lock other inodes which can lead to deadlock if we also hold a * lock on the newly entered node. */ if (error == 0 && ulr->ulr_endoff && ulr->ulr_endoff < dp->i_size) { #ifdef LFS_DIRHASH if (dp->i_dirhash != NULL) ulfsdirhash_dirtrunc(dp, ulr->ulr_endoff); #endif (void) lfs_truncate(dvp, (off_t)ulr->ulr_endoff, IO_SYNC, cr); } return (error); } /* * Remove a directory entry after a call to namei, using the * parameters that ulfs_lookup left in nameidata and in the * ulfs_lookup_results. * * DVP is the directory to be updated. It must be locked. * ULR is the ulfs_lookup_results structure from the final lookup step. * IP, if not null, is the inode being unlinked. * FLAGS may contain DOWHITEOUT. * ISRMDIR is not used and (XXX) should be removed. * * If FLAGS contains DOWHITEOUT the entry is replaced with a whiteout * instead of being cleared. * * ulr->ulr_offset contains the position of the directory entry * to be removed. * * ulr->ulr_reclen contains the size of the directory entry to be * removed. * * ulr->ulr_count contains the size of the *previous* directory * entry. This allows finding it, for free space management. If * ulr_count is 0, the target entry is at the beginning of the * directory. (Does this ever happen? The first entry should be ".", * which should only be removed at rmdir time. Does rmdir come here * to clear out the "." and ".." entries? Perhaps, but I doubt it.) * * The space is marked free by adding it to the record length (not * name length) of the preceding entry. If the first entry becomes * free, it is marked free by setting the inode number to 0. * * The link count of IP is decremented. Note that this is not the * inverse behavior of ulfs_direnter, which does not adjust link * counts. Sigh. */ int ulfs_dirremove(struct vnode *dvp, const struct ulfs_lookup_results *ulr, struct inode *ip, int flags, int isrmdir) { struct inode *dp = VTOI(dvp); struct lfs_direct *ep; struct buf *bp; int error; const int needswap = ULFS_MPNEEDSWAP(dp->i_lfs); if (flags & DOWHITEOUT) { /* * Whiteout entry: set d_ino to ULFS_WINO. */ error = ulfs_blkatoff(dvp, (off_t)ulr->ulr_offset, (void *)&ep, &bp, true); if (error) return (error); ep->d_ino = ulfs_rw32(ULFS_WINO, needswap); ep->d_type = LFS_DT_WHT; goto out; } if ((error = ulfs_blkatoff(dvp, (off_t)(ulr->ulr_offset - ulr->ulr_count), (void *)&ep, &bp, true)) != 0) return (error); #ifdef LFS_DIRHASH /* * Remove the dirhash entry. This is complicated by the fact * that `ep' is the previous entry when ulr_count != 0. */ if (dp->i_dirhash != NULL) ulfsdirhash_remove(dp, (ulr->ulr_count == 0) ? ep : (struct lfs_direct *)((char *)ep + ulfs_rw16(ep->d_reclen, needswap)), ulr->ulr_offset); #endif if (ulr->ulr_count == 0) { /* * First entry in block: set d_ino to zero. */ ep->d_ino = 0; } else { /* * Collapse new free space into previous entry. */ ep->d_reclen = ulfs_rw16(ulfs_rw16(ep->d_reclen, needswap) + ulr->ulr_reclen, needswap); } #ifdef LFS_DIRHASH if (dp->i_dirhash != NULL) { int dirblksiz = ip->i_lfs->um_dirblksiz; ulfsdirhash_checkblock(dp, (char *)ep - ((ulr->ulr_offset - ulr->ulr_count) & (dirblksiz - 1)), ulr->ulr_offset & ~(dirblksiz - 1)); } #endif out: if (ip) { ip->i_nlink--; DIP_ASSIGN(ip, nlink, ip->i_nlink); ip->i_flag |= IN_CHANGE; } /* * XXX did it ever occur to anyone that it might be a good * idea to restore ip->i_nlink if this fails? Or something? * Currently on error return from this function the state of * ip->i_nlink depends on what happened, and callers * definitely do not take this into account. */ error = VOP_BWRITE(bp->b_vp, bp); dp->i_flag |= IN_CHANGE | IN_UPDATE; /* * If the last named reference to a snapshot goes away, * drop its snapshot reference so that it will be reclaimed * when last open reference goes away. */ if (ip != 0 && (ip->i_flags & SF_SNAPSHOT) != 0 && ip->i_nlink == 0) ulfs_snapgone(ip); return (error); } /* * Rewrite an existing directory entry to point at the inode supplied. * * DP is the directory to update. * OFFSET is the position of the entry in question. It may come * from ulr_offset of a ulfs_lookup_results. * OIP is the old inode the directory previously pointed to. * NEWINUM is the number of the new inode. * NEWTYPE is the new value for the type field of the directory entry. * (This is ignored if the fs doesn't support that.) * ISRMDIR is not used and (XXX) should be removed. * IFLAGS are added to DP's inode flags. * * The link count of OIP is decremented. Note that the link count of * the new inode is *not* incremented. Yay for symmetry. */ int ulfs_dirrewrite(struct inode *dp, off_t offset, struct inode *oip, ino_t newinum, int newtype, int isrmdir, int iflags) { struct buf *bp; struct lfs_direct *ep; struct vnode *vdp = ITOV(dp); int error; error = ulfs_blkatoff(vdp, offset, (void *)&ep, &bp, true); if (error) return (error); ep->d_ino = ulfs_rw32(newinum, ULFS_IPNEEDSWAP(dp)); if (!FSFMT(vdp)) ep->d_type = newtype; oip->i_nlink--; DIP_ASSIGN(oip, nlink, oip->i_nlink); oip->i_flag |= IN_CHANGE; error = VOP_BWRITE(bp->b_vp, bp); dp->i_flag |= iflags; /* * If the last named reference to a snapshot goes away, * drop its snapshot reference so that it will be reclaimed * when last open reference goes away. */ if ((oip->i_flags & SF_SNAPSHOT) != 0 && oip->i_nlink == 0) ulfs_snapgone(oip); return (error); } /* * Check if a directory is empty or not. * Inode supplied must be locked. * * Using a struct lfs_dirtemplate here is not precisely * what we want, but better than using a struct lfs_direct. * * NB: does not handle corrupted directories. */ int ulfs_dirempty(struct inode *ip, ino_t parentino, kauth_cred_t cred) { doff_t off; struct lfs_dirtemplate dbuf; struct lfs_direct *dp = (struct lfs_direct *)&dbuf; int error, namlen; size_t count; const int needswap = ULFS_IPNEEDSWAP(ip); #define MINDIRSIZ (sizeof (struct lfs_dirtemplate) / 2) for (off = 0; off < ip->i_size; off += ulfs_rw16(dp->d_reclen, needswap)) { error = vn_rdwr(UIO_READ, ITOV(ip), (void *)dp, MINDIRSIZ, off, UIO_SYSSPACE, IO_NODELOCKED, cred, &count, NULL); /* * Since we read MINDIRSIZ, residual must * be 0 unless we're at end of file. */ if (error || count != 0) return (0); /* avoid infinite loops */ if (dp->d_reclen == 0) return (0); /* skip empty entries */ if (dp->d_ino == 0 || ulfs_rw32(dp->d_ino, needswap) == ULFS_WINO) continue; /* accept only "." and ".." */ #if (BYTE_ORDER == LITTLE_ENDIAN) if (FSFMT(ITOV(ip)) && needswap == 0) namlen = dp->d_type; else namlen = dp->d_namlen; #else if (FSFMT(ITOV(ip)) && needswap != 0) namlen = dp->d_type; else namlen = dp->d_namlen; #endif if (namlen > 2) return (0); if (dp->d_name[0] != '.') return (0); /* * At this point namlen must be 1 or 2. * 1 implies ".", 2 implies ".." if second * char is also "." */ if (namlen == 1 && ulfs_rw32(dp->d_ino, needswap) == ip->i_number) continue; if (dp->d_name[1] == '.' && ulfs_rw32(dp->d_ino, needswap) == parentino) continue; return (0); } return (1); } #define ULFS_DIRRABLKS 0 int ulfs_dirrablks = ULFS_DIRRABLKS; /* * ulfs_blkatoff: Return buffer with the contents of block "offset" from * the beginning of directory "vp". If "res" is non-NULL, fill it in with * a pointer to the remaining space in the directory. If the caller intends * to modify the buffer returned, "modify" must be true. */ int ulfs_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp, bool modify) { struct inode *ip __diagused; struct buf *bp; daddr_t lbn; const int dirrablks = ulfs_dirrablks; daddr_t *blks; int *blksizes; int run, error; struct mount *mp = vp->v_mount; const int bshift = mp->mnt_fs_bshift; const int bsize = 1 << bshift; off_t eof; blks = kmem_alloc((1 + dirrablks) * sizeof(daddr_t), KM_SLEEP); blksizes = kmem_alloc((1 + dirrablks) * sizeof(int), KM_SLEEP); ip = VTOI(vp); KASSERT(vp->v_size == ip->i_size); GOP_SIZE(vp, vp->v_size, &eof, 0); lbn = offset >> bshift; for (run = 0; run <= dirrablks;) { const off_t curoff = lbn << bshift; const int size = MIN(eof - curoff, bsize); if (size == 0) { break; } KASSERT(curoff < eof); blks[run] = lbn; blksizes[run] = size; lbn++; run++; if (size != bsize) { break; } } KASSERT(run >= 1); error = breadn(vp, blks[0], blksizes[0], &blks[1], &blksizes[1], run - 1, NOCRED, (modify ? B_MODIFY : 0), &bp); if (error != 0) { *bpp = NULL; goto out; } if (res) { *res = (char *)bp->b_data + (offset & (bsize - 1)); } *bpp = bp; out: kmem_free(blks, (1 + dirrablks) * sizeof(daddr_t)); kmem_free(blksizes, (1 + dirrablks) * sizeof(int)); return error; }