static int lfs_fastvget(struct mount *mp, ino_t ino, BLOCK_INFO *blkp, int lk_flags, struct vnode **vpp) { struct ulfsmount *ump; struct lfs *fs; int error; ump = VFSTOULFS(mp); fs = ump->um_lfs; fs->lfs_cleaner_hint = blkp; error = vcache_get(mp, &ino, sizeof(ino), vpp); fs->lfs_cleaner_hint = NULL; if (error) return error; error = vn_lock(*vpp, lk_flags); if (error) { if (error == EBUSY) error = EAGAIN; vrele(*vpp); *vpp = NULL; return error; } return 0; }
/* * whiteout vnode call */ int ulfs_whiteout(void *v) { struct vop_whiteout_args /* { struct vnode *a_dvp; struct componentname *a_cnp; int a_flags; } */ *ap = v; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; int error; struct ulfsmount *ump = VFSTOULFS(dvp->v_mount); struct lfs *fs = ump->um_lfs; struct ulfs_lookup_results *ulr; /* XXX should handle this material another way */ ulr = &VTOI(dvp)->i_crap; ULFS_CHECK_CRAPCOUNTER(VTOI(dvp)); error = 0; switch (ap->a_flags) { case LOOKUP: /* 4.4 format directories support whiteout operations */ if (fs->um_maxsymlinklen > 0) return (0); return (EOPNOTSUPP); case CREATE: /* create a new directory whiteout */ fstrans_start(dvp->v_mount, FSTRANS_SHARED); #ifdef DIAGNOSTIC if (fs->um_maxsymlinklen <= 0) panic("ulfs_whiteout: old format filesystem"); #endif error = ulfs_direnter(dvp, ulr, NULL, cnp, ULFS_WINO, LFS_DT_WHT, NULL); break; case DELETE: /* remove an existing directory whiteout */ fstrans_start(dvp->v_mount, FSTRANS_SHARED); #ifdef DIAGNOSTIC if (fs->um_maxsymlinklen <= 0) panic("ulfs_whiteout: old format filesystem"); #endif cnp->cn_flags &= ~DOWHITEOUT; error = ulfs_dirremove(dvp, ulr, NULL, cnp->cn_flags, 0); break; default: panic("ulfs_whiteout: unknown op"); /* NOTREACHED */ } fstrans_done(dvp->v_mount); return (error); }
/* * lfs_gro_rename: Actually perform the rename operation. Do a little * LFS bookkeeping and then defer to ulfs_gro_rename. */ static int lfs_gro_rename(struct mount *mp, kauth_cred_t cred, struct vnode *fdvp, struct componentname *fcnp, void *fde, struct vnode *fvp, struct vnode *tdvp, struct componentname *tcnp, void *tde, struct vnode *tvp) { int error; KASSERT(mp != NULL); KASSERT(fdvp != NULL); KASSERT(fcnp != NULL); KASSERT(fde != NULL); KASSERT(fvp != NULL); KASSERT(tdvp != NULL); KASSERT(tcnp != NULL); KASSERT(tde != NULL); KASSERT(fdvp != fvp); KASSERT(fdvp != tvp); KASSERT(tdvp != fvp); KASSERT(tdvp != tvp); KASSERT(fvp != tvp); KASSERT(fdvp->v_mount == mp); KASSERT(fvp->v_mount == mp); KASSERT(tdvp->v_mount == mp); KASSERT((tvp == NULL) || (tvp->v_mount == mp)); KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE); KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE); KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); error = lfs_set_dirop(tdvp, tvp); if (error != 0) return error; MARK_VNODE(fdvp); MARK_VNODE(fvp); error = ulfs_gro_rename(mp, cred, fdvp, fcnp, fde, fvp, tdvp, tcnp, tde, tvp); UNMARK_VNODE(fdvp); UNMARK_VNODE(fvp); UNMARK_VNODE(tdvp); if (tvp) { UNMARK_VNODE(tvp); } lfs_unset_dirop(VFSTOULFS(mp)->um_lfs, tdvp, "rename"); vrele(tdvp); if (tvp) { vrele(tvp); } return error; }
/* * Do consistency checking on a directory entry: * record length must be multiple of 4 * entry must fit in rest of its DIRBLKSIZ block * record must be large enough to contain entry * name is not longer than LFS_MAXNAMLEN * name must be as long as advertised, and null terminated */ int ulfs_dirbadentry(struct vnode *dp, struct lfs_direct *ep, int entryoffsetinblock) { int i; int namlen; struct ulfsmount *ump = VFSTOULFS(dp->v_mount); struct lfs *fs = ump->um_lfs; const int needswap = ULFS_MPNEEDSWAP(fs); int dirblksiz = fs->um_dirblksiz; #if (BYTE_ORDER == LITTLE_ENDIAN) if (FSFMT(dp) && needswap == 0) namlen = ep->d_type; else namlen = ep->d_namlen; #else if (FSFMT(dp) && needswap != 0) namlen = ep->d_type; else namlen = ep->d_namlen; #endif if ((ulfs_rw16(ep->d_reclen, needswap) & 0x3) != 0 || ulfs_rw16(ep->d_reclen, needswap) > dirblksiz - (entryoffsetinblock & (dirblksiz - 1)) || ulfs_rw16(ep->d_reclen, needswap) < LFS_DIRSIZ(FSFMT(dp), ep, needswap) || namlen > LFS_MAXNAMLEN) { /*return (1); */ printf("First bad, reclen=%#x, DIRSIZ=%lu, namlen=%d, " "flags=%#x, entryoffsetinblock=%d, dirblksiz = %d\n", ulfs_rw16(ep->d_reclen, needswap), (u_long)LFS_DIRSIZ(FSFMT(dp), ep, needswap), namlen, dp->v_mount->mnt_flag, entryoffsetinblock, dirblksiz); goto bad; } if (ep->d_ino == 0) return (0); for (i = 0; i < namlen; i++) if (ep->d_name[i] == '\0') { /*return (1); */ printf("Second bad\n"); goto bad; } if (ep->d_name[i]) goto bad; return (0); bad: return (1); }
/* * Q_SYNC - sync quota files to disk. */ int lfs_q1sync(struct mount *mp) { struct ulfsmount *ump = VFSTOULFS(mp); struct vnode *vp; struct vnode_iterator *marker; struct dquot *dq; int i, error; /* * Check if the mount point has any quotas. * If not, simply return. */ for (i = 0; i < ULFS_MAXQUOTAS; i++) if (ump->um_quotas[i] != NULLVP) break; if (i == ULFS_MAXQUOTAS) return (0); /* * Search vnodes associated with this mount point, * synchronizing any modified dquot structures. */ vfs_vnode_iterator_init(mp, &marker); while ((vp = vfs_vnode_iterator_next(marker, NULL, NULL))) { error = vn_lock(vp, LK_EXCLUSIVE); if (error) { vrele(vp); continue; } if (VTOI(vp) == NULL || vp->v_type == VNON) { vput(vp); continue; } for (i = 0; i < ULFS_MAXQUOTAS; i++) { dq = VTOI(vp)->i_dquot[i]; if (dq == NODQUOT) continue; mutex_enter(&dq->dq_interlock); if (dq->dq_flags & DQ_MOD) lfs_dq1sync(vp, dq); mutex_exit(&dq->dq_interlock); } vput(vp); mutex_enter(&mntvnode_lock); } vfs_vnode_iterator_destroy(marker); return (0); }
/* * Q_SETQUOTA - assign an entire dqblk structure. */ int setquota1(struct mount *mp, u_long id, int type, struct dqblk *dqb) { struct dquot *dq; struct dquot *ndq; struct ulfsmount *ump = VFSTOULFS(mp); if ((error = lfs_dqget(NULLVP, id, ump, type, &ndq)) != 0) return (error); dq = ndq; mutex_enter(&dq->dq_interlock); /* * Copy all but the current values. * Reset time limit if previously had no soft limit or were * under it, but now have a soft limit and are over it. */ dqb->dqb_curblocks = dq->dq_curblocks; dqb->dqb_curinodes = dq->dq_curinodes; if (dq->dq_id != 0) { dqb->dqb_btime = dq->dq_btime; dqb->dqb_itime = dq->dq_itime; } if (dqb->dqb_bsoftlimit && dq->dq_curblocks >= dqb->dqb_bsoftlimit && (dq->dq_bsoftlimit == 0 || dq->dq_curblocks < dq->dq_bsoftlimit)) dqb->dqb_btime = time_second + ump->umq1_btime[type]; if (dqb->dqb_isoftlimit && dq->dq_curinodes >= dqb->dqb_isoftlimit && (dq->dq_isoftlimit == 0 || dq->dq_curinodes < dq->dq_isoftlimit)) dqb->dqb_itime = time_second + ump->umq1_itime[type]; dq->dq_un.dq1_dqb = *dqb; if (dq->dq_curblocks < dq->dq_bsoftlimit) dq->dq_flags &= ~DQ_WARN(QL_BLOCK); if (dq->dq_curinodes < dq->dq_isoftlimit) dq->dq_flags &= ~DQ_WARN(QL_FILE); if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 && dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0) dq->dq_flags |= DQ_FAKE; else dq->dq_flags &= ~DQ_FAKE; dq->dq_flags |= DQ_MOD; mutex_exit(&dq->dq_interlock); lfs_dqrele(NULLVP, dq); return (0); }
int sys_lfs_markv(struct lwp *l, const struct sys_lfs_markv_args *uap, register_t *retval) { /* { syscallarg(fsid_t *) fsidp; syscallarg(struct block_info *) blkiov; syscallarg(int) blkcnt; } */ BLOCK_INFO *blkiov; int blkcnt, error; fsid_t fsid; struct lfs *fs; struct mount *mntp; error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS, KAUTH_REQ_SYSTEM_LFS_MARKV, NULL, NULL, NULL); if (error) return (error); if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0) return (error); if ((mntp = vfs_getvfs(fsidp)) == NULL) return (ENOENT); fs = VFSTOULFS(mntp)->um_lfs; blkcnt = SCARG(uap, blkcnt); if ((u_int) blkcnt > LFS_MARKV_MAXBLKCNT) return (EINVAL); KERNEL_LOCK(1, NULL); blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV); if ((error = copyin(SCARG(uap, blkiov), blkiov, blkcnt * sizeof(BLOCK_INFO))) != 0) goto out; if ((error = lfs_markv(p, &fsid, blkiov, blkcnt)) == 0) copyout(blkiov, SCARG(uap, blkiov), blkcnt * sizeof(BLOCK_INFO)); out: lfs_free(fs, blkiov, LFS_NB_BLKIOV); KERNEL_UNLOCK_ONE(NULL); return error; }
int lfsquota1_umount(struct mount *mp, int flags) { int i, error; struct ulfsmount *ump = VFSTOULFS(mp); struct lfs *fs = ump->um_lfs; struct lwp *l = curlwp; if ((fs->um_flags & ULFS_QUOTA) == 0) return 0; if ((error = vflush(mp, NULLVP, SKIPSYSTEM | flags)) != 0) return (error); for (i = 0; i < ULFS_MAXQUOTAS; i++) { if (ump->um_quotas[i] != NULLVP) { lfsquota1_handle_cmd_quotaoff(l, ump, i); } } return 0; }
int sys_lfs_bmapv(struct lwp *l, const struct sys_lfs_bmapv_args *uap, register_t *retval) { /* { syscallarg(fsid_t *) fsidp; syscallarg(struct block_info *) blkiov; syscallarg(int) blkcnt; } */ BLOCK_INFO *blkiov; int blkcnt, error; fsid_t fsid; struct lfs *fs; struct mount *mntp; if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0) return (error); if ((mntp = vfs_getvfs(&fsid)) == NULL) return (ENOENT); fs = VFSTOULFS(mntp)->um_lfs; blkcnt = SCARG(uap, blkcnt); #if SIZE_T_MAX <= UINT_MAX if ((u_int) blkcnt > SIZE_T_MAX / sizeof(BLOCK_INFO)) return (EINVAL); #endif KERNEL_LOCK(1, NULL); blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV); if ((error = copyin(SCARG(uap, blkiov), blkiov, blkcnt * sizeof(BLOCK_INFO))) != 0) goto out; if ((error = lfs_bmapv(l, &fsid, blkiov, blkcnt)) == 0) copyout(blkiov, SCARG(uap, blkiov), blkcnt * sizeof(BLOCK_INFO)); out: lfs_free(fs, blkiov, LFS_NB_BLKIOV); KERNEL_UNLOCK_ONE(NULL); return error; }
/* * This will block until a segment in file system fsid is written. A timeout * in milliseconds may be specified which will awake the cleaner automatically. * An fsid of -1 means any file system, and a timeout of 0 means forever. */ int lfs_segwait(fsid_t *fsidp, struct timeval *tv) { struct mount *mntp; void *addr; u_long timeout; int error; KERNEL_LOCK(1, NULL); if (fsidp == NULL || (mntp = vfs_getvfs(fsidp)) == NULL) addr = &lfs_allclean_wakeup; else addr = &VFSTOULFS(mntp)->um_lfs->lfs_nextseg; /* * XXX THIS COULD SLEEP FOREVER IF TIMEOUT IS {0,0}! * XXX IS THAT WHAT IS INTENDED? */ timeout = tvtohz(tv); error = tsleep(addr, PCATCH | PVFS, "segment", timeout); KERNEL_UNLOCK_ONE(NULL); return (error == ERESTART ? EINTR : 0); }
/* * Return target name of a symbolic link */ int ulfs_readlink(void *v) { struct vop_readlink_args /* { struct vnode *a_vp; struct uio *a_uio; kauth_cred_t a_cred; } */ *ap = v; struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); struct ulfsmount *ump = VFSTOULFS(vp->v_mount); struct lfs *fs = ump->um_lfs; int isize; isize = ip->i_size; if (isize < fs->um_maxsymlinklen || (fs->um_maxsymlinklen == 0 && DIP(ip, blocks) == 0)) { uiomove((char *)SHORTLINK(ip), isize, ap->a_uio); return (0); } return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); }
/* * sys_lfs_segclean: * * Mark the segment clean. * * 0 on success * -1/errno is return on error. */ int sys_lfs_segclean(struct lwp *l, const struct sys_lfs_segclean_args *uap, register_t *retval) { /* { syscallarg(fsid_t *) fsidp; syscallarg(u_long) segment; } */ struct lfs *fs; struct mount *mntp; fsid_t fsid; int error; unsigned long segnum; error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS, KAUTH_REQ_SYSTEM_LFS_SEGCLEAN, NULL, NULL, NULL); if (error) return (error); if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0) return (error); if ((mntp = vfs_getvfs(&fsid)) == NULL) return (ENOENT); fs = VFSTOULFS(mntp)->um_lfs; segnum = SCARG(uap, segment); if ((error = vfs_busy(mntp, NULL)) != 0) return (error); KERNEL_LOCK(1, NULL); lfs_seglock(fs, SEGM_PROT); error = lfs_do_segclean(fs, segnum); lfs_segunlock(fs); KERNEL_UNLOCK_ONE(NULL); vfs_unbusy(mntp, false, NULL); return error; }
/* * Q_SETUSE - set current inode and block usage. */ int setuse(struct mount *mp, u_long id, int type, void *addr) { struct dquot *dq; struct ulfsmount *ump = VFSTOULFS(mp); struct dquot *ndq; struct dqblk usage; int error; error = copyin(addr, (void *)&usage, sizeof (struct dqblk)); if (error) return (error); if ((error = lfs_dqget(NULLVP, id, ump, type, &ndq)) != 0) return (error); dq = ndq; mutex_enter(&dq->dq_interlock); /* * Reset time limit if have a soft limit and were * previously under it, but are now over it. */ if (dq->dq_bsoftlimit && dq->dq_curblocks < dq->dq_bsoftlimit && usage.dqb_curblocks >= dq->dq_bsoftlimit) dq->dq_btime = time_second + ump->umq1_btime[type]; if (dq->dq_isoftlimit && dq->dq_curinodes < dq->dq_isoftlimit && usage.dqb_curinodes >= dq->dq_isoftlimit) dq->dq_itime = time_second + ump->umq1_itime[type]; dq->dq_curblocks = usage.dqb_curblocks; dq->dq_curinodes = usage.dqb_curinodes; if (dq->dq_curblocks < dq->dq_bsoftlimit) dq->dq_flags &= ~DQ_WARN(QL_BLOCK); if (dq->dq_curinodes < dq->dq_isoftlimit) dq->dq_flags &= ~DQ_WARN(QL_FILE); dq->dq_flags |= DQ_MOD; mutex_exit(&dq->dq_interlock); lfs_dqrele(NULLVP, dq); return (0); }
int lfs_bmapv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt) { BLOCK_INFO *blkp; IFILE *ifp; struct buf *bp; struct inode *ip = NULL; struct lfs *fs; struct mount *mntp; struct ulfsmount *ump; struct vnode *vp; ino_t lastino; daddr_t v_daddr; int cnt, error; int numrefed = 0; lfs_cleaner_pid = p->p_pid; if ((mntp = vfs_getvfs(fsidp)) == NULL) return (ENOENT); ump = VFSTOULFS(mntp); if ((error = vfs_busy(mntp, NULL)) != 0) return (error); cnt = blkcnt; fs = VFSTOULFS(mntp)->um_lfs; error = 0; /* these were inside the initialization for the for loop */ v_daddr = LFS_UNUSED_DADDR; lastino = LFS_UNUSED_INUM; for (blkp = blkiov; cnt--; ++blkp) { /* * Get the IFILE entry (only once) and see if the file still * exists. */ if (lastino != blkp->bi_inode) { /* * Finish the old file, if there was one. The presence * of a usable vnode in vp is signaled by a valid * v_daddr. */ if (v_daddr != LFS_UNUSED_DADDR) { lfs_vunref(vp); if (VTOI(vp)->i_lfs_iflags & LFSI_BMAP) { mutex_enter(vp->v_interlock); if (vget(vp, LK_NOWAIT) == 0) { if (! vrecycle(vp)) vrele(vp); } } numrefed--; } /* * Start a new file */ lastino = blkp->bi_inode; if (blkp->bi_inode == LFS_IFILE_INUM) v_daddr = fs->lfs_idaddr; else { LFS_IENTRY(ifp, fs, blkp->bi_inode, bp); v_daddr = ifp->if_daddr; brelse(bp, 0); } if (v_daddr == LFS_UNUSED_DADDR) { blkp->bi_daddr = LFS_UNUSED_DADDR; continue; } /* * A regular call to VFS_VGET could deadlock * here. Instead, we try an unlocked access. */ mutex_enter(&ulfs_ihash_lock); vp = ulfs_ihashlookup(ump->um_dev, blkp->bi_inode); if (vp != NULL && !(vp->v_iflag & VI_XLOCK)) { ip = VTOI(vp); mutex_enter(vp->v_interlock); mutex_exit(&ulfs_ihash_lock); if (lfs_vref(vp)) { v_daddr = LFS_UNUSED_DADDR; continue; } numrefed++; } else { mutex_exit(&ulfs_ihash_lock); /* * Don't VFS_VGET if we're being unmounted, * since we hold vfs_busy(). */ if (mntp->mnt_iflag & IMNT_UNMOUNT) { v_daddr = LFS_UNUSED_DADDR; continue; } error = VFS_VGET(mntp, blkp->bi_inode, &vp); if (error) { DLOG((DLOG_CLEAN, "lfs_bmapv: vget ino" "%d failed with %d", blkp->bi_inode,error)); v_daddr = LFS_UNUSED_DADDR; continue; } else { KASSERT(VOP_ISLOCKED(vp)); VTOI(vp)->i_lfs_iflags |= LFSI_BMAP; VOP_UNLOCK(vp); numrefed++; } } ip = VTOI(vp); } else if (v_daddr == LFS_UNUSED_DADDR) { /* * This can only happen if the vnode is dead. * Keep going. Note that we DO NOT set the * bi_addr to anything -- if we failed to get * the vnode, for example, we want to assume * conservatively that all of its blocks *are* * located in the segment in question. * lfs_markv will throw them out if we are * wrong. */ /* blkp->bi_daddr = LFS_UNUSED_DADDR; */ continue; } /* Past this point we are guaranteed that vp, ip are valid. */ if (blkp->bi_lbn == LFS_UNUSED_LBN) { /* * We just want the inode address, which is * conveniently in v_daddr. */ blkp->bi_daddr = v_daddr; } else { daddr_t bi_daddr; /* XXX ondisk32 */ error = VOP_BMAP(vp, blkp->bi_lbn, NULL, &bi_daddr, NULL); if (error) { blkp->bi_daddr = LFS_UNUSED_DADDR; continue; } blkp->bi_daddr = LFS_DBTOFSB(fs, bi_daddr); /* Fill in the block size, too */ if (blkp->bi_lbn >= 0) blkp->bi_size = lfs_blksize(fs, ip, blkp->bi_lbn); else blkp->bi_size = fs->lfs_bsize; } } /* * Finish the old file, if there was one. The presence * of a usable vnode in vp is signaled by a valid v_daddr. */ if (v_daddr != LFS_UNUSED_DADDR) { lfs_vunref(vp); /* Recycle as above. */ if (ip->i_lfs_iflags & LFSI_BMAP) { mutex_enter(vp->v_interlock); if (vget(vp, LK_NOWAIT) == 0) { if (! vrecycle(vp)) vrele(vp); } } numrefed--; } #ifdef DIAGNOSTIC if (numrefed != 0) panic("lfs_bmapv: numrefed=%d", numrefed); #endif vfs_unbusy(mntp, false, NULL); return 0; }
int sys_lfs_bmapv(struct lwp *l, const struct sys_lfs_bmapv_args *uap, register_t *retval) { /* { syscallarg(fsid_t *) fsidp; syscallarg(struct block_info *) blkiov; syscallarg(int) blkcnt; } */ BLOCK_INFO *blkiov; BLOCK_INFO_15 *blkiov15; int i, blkcnt, error; fsid_t fsid; struct lfs *fs; struct mount *mntp; error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS, KAUTH_REQ_SYSTEM_LFS_BMAPV, NULL, NULL, NULL); if (error) return (error); if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0) return (error); if ((mntp = vfs_getvfs(&fsid)) == NULL) return (ENOENT); fs = VFSTOULFS(mntp)->um_lfs; blkcnt = SCARG(uap, blkcnt); if ((size_t) blkcnt > SIZE_T_MAX / sizeof(BLOCK_INFO)) return (EINVAL); KERNEL_LOCK(1, NULL); blkiov = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO), LFS_NB_BLKIOV); blkiov15 = lfs_malloc(fs, blkcnt * sizeof(BLOCK_INFO_15), LFS_NB_BLKIOV); if ((error = copyin(SCARG(uap, blkiov), blkiov15, blkcnt * sizeof(BLOCK_INFO_15))) != 0) goto out; for (i = 0; i < blkcnt; i++) { blkiov[i].bi_inode = blkiov15[i].bi_inode; blkiov[i].bi_lbn = blkiov15[i].bi_lbn; blkiov[i].bi_daddr = blkiov15[i].bi_daddr; blkiov[i].bi_segcreate = blkiov15[i].bi_segcreate; blkiov[i].bi_version = blkiov15[i].bi_version; blkiov[i].bi_bp = blkiov15[i].bi_bp; blkiov[i].bi_size = blkiov15[i].bi_size; } if ((error = lfs_bmapv(l->l_proc, &fsid, blkiov, blkcnt)) == 0) { for (i = 0; i < blkcnt; i++) { blkiov15[i].bi_inode = blkiov[i].bi_inode; blkiov15[i].bi_lbn = blkiov[i].bi_lbn; blkiov15[i].bi_daddr = blkiov[i].bi_daddr; blkiov15[i].bi_segcreate = blkiov[i].bi_segcreate; blkiov15[i].bi_version = blkiov[i].bi_version; blkiov15[i].bi_bp = blkiov[i].bi_bp; blkiov15[i].bi_size = blkiov[i].bi_size; } copyout(blkiov15, SCARG(uap, blkiov), blkcnt * sizeof(BLOCK_INFO_15)); } out: lfs_free(fs, blkiov, LFS_NB_BLKIOV); lfs_free(fs, blkiov15, LFS_NB_BLKIOV); KERNEL_UNLOCK_ONE(NULL); return error; }
/* * Vnode op for reading directories. * * This routine handles converting from the on-disk directory format * "struct lfs_direct" to the in-memory format "struct dirent" as well as * byte swapping the entries if necessary. */ int ulfs_readdir(void *v) { struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; kauth_cred_t a_cred; int *a_eofflag; off_t **a_cookies; int *ncookies; } */ *ap = v; struct vnode *vp = ap->a_vp; struct lfs_direct *cdp, *ecdp; struct dirent *ndp; char *cdbuf, *ndbuf, *endp; struct uio auio, *uio; struct iovec aiov; int error; size_t count, ccount, rcount, cdbufsz, ndbufsz; off_t off, *ccp; off_t startoff; size_t skipbytes; struct ulfsmount *ump = VFSTOULFS(vp->v_mount); struct lfs *fs = ump->um_lfs; int nswap = ULFS_MPNEEDSWAP(fs); #if BYTE_ORDER == LITTLE_ENDIAN int needswap = fs->um_maxsymlinklen <= 0 && nswap == 0; #else int needswap = fs->um_maxsymlinklen <= 0 && nswap != 0; #endif uio = ap->a_uio; count = uio->uio_resid; rcount = count - ((uio->uio_offset + count) & (fs->um_dirblksiz - 1)); if (rcount < _DIRENT_MINSIZE(cdp) || count < _DIRENT_MINSIZE(ndp)) return EINVAL; startoff = uio->uio_offset & ~(fs->um_dirblksiz - 1); skipbytes = uio->uio_offset - startoff; rcount += skipbytes; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = startoff; auio.uio_resid = rcount; UIO_SETUP_SYSSPACE(&auio); auio.uio_rw = UIO_READ; cdbufsz = rcount; cdbuf = kmem_alloc(cdbufsz, KM_SLEEP); aiov.iov_base = cdbuf; aiov.iov_len = rcount; error = VOP_READ(vp, &auio, 0, ap->a_cred); if (error != 0) { kmem_free(cdbuf, cdbufsz); return error; } rcount -= auio.uio_resid; cdp = (struct lfs_direct *)(void *)cdbuf; ecdp = (struct lfs_direct *)(void *)&cdbuf[rcount]; ndbufsz = count; ndbuf = kmem_alloc(ndbufsz, KM_SLEEP); ndp = (struct dirent *)(void *)ndbuf; endp = &ndbuf[count]; off = uio->uio_offset; if (ap->a_cookies) { ccount = rcount / _DIRENT_RECLEN(cdp, 1); ccp = *(ap->a_cookies) = malloc(ccount * sizeof(*ccp), M_TEMP, M_WAITOK); } else { /* XXX: GCC */ ccount = 0; ccp = NULL; } while (cdp < ecdp) { cdp->d_reclen = ulfs_rw16(cdp->d_reclen, nswap); if (skipbytes > 0) { if (cdp->d_reclen <= skipbytes) { skipbytes -= cdp->d_reclen; cdp = _DIRENT_NEXT(cdp); continue; } /* * invalid cookie. */ error = EINVAL; goto out; } if (cdp->d_reclen == 0) { struct dirent *ondp = ndp; ndp->d_reclen = _DIRENT_MINSIZE(ndp); ndp = _DIRENT_NEXT(ndp); ondp->d_reclen = 0; cdp = ecdp; break; } if (needswap) { ndp->d_type = cdp->d_namlen; ndp->d_namlen = cdp->d_type; } else { ndp->d_type = cdp->d_type; ndp->d_namlen = cdp->d_namlen; } ndp->d_reclen = _DIRENT_RECLEN(ndp, ndp->d_namlen); if ((char *)(void *)ndp + ndp->d_reclen + _DIRENT_MINSIZE(ndp) > endp) break; ndp->d_fileno = ulfs_rw32(cdp->d_ino, nswap); (void)memcpy(ndp->d_name, cdp->d_name, ndp->d_namlen); memset(&ndp->d_name[ndp->d_namlen], 0, ndp->d_reclen - _DIRENT_NAMEOFF(ndp) - ndp->d_namlen); off += cdp->d_reclen; if (ap->a_cookies) { KASSERT(ccp - *(ap->a_cookies) < ccount); *(ccp++) = off; } ndp = _DIRENT_NEXT(ndp); cdp = _DIRENT_NEXT(cdp); } count = ((char *)(void *)ndp - ndbuf); error = uiomove(ndbuf, count, uio); out: if (ap->a_cookies) { if (error) { free(*(ap->a_cookies), M_TEMP); *(ap->a_cookies) = NULL; *(ap->a_ncookies) = 0; } else { *ap->a_ncookies = ccp - *(ap->a_cookies); } } uio->uio_offset = off; kmem_free(ndbuf, ndbufsz); kmem_free(cdbuf, cdbufsz); *ap->a_eofflag = VTOI(vp)->i_size <= uio->uio_offset; return error; }
int lfs_fastvget(struct mount *mp, ino_t ino, daddr_t daddr, struct vnode **vpp, struct ulfs1_dinode *dinp) { struct inode *ip; struct ulfs1_dinode *dip; struct vnode *vp; struct ulfsmount *ump; dev_t dev; int error, retries; struct buf *bp; struct lfs *fs; ump = VFSTOULFS(mp); dev = ump->um_dev; fs = ump->um_lfs; /* * Wait until the filesystem is fully mounted before allowing vget * to complete. This prevents possible problems with roll-forward. */ mutex_enter(&lfs_lock); while (fs->lfs_flags & LFS_NOTYET) { mtsleep(&fs->lfs_flags, PRIBIO+1, "lfs_fnotyet", 0, &lfs_lock); } mutex_exit(&lfs_lock); /* * This is playing fast and loose. Someone may have the inode * locked, in which case they are going to be distinctly unhappy * if we trash something. */ error = lfs_fasthashget(dev, ino, vpp); if (error != 0 || *vpp != NULL) return (error); /* * getnewvnode(9) will call vfs_busy, which will block if the * filesystem is being unmounted; but umount(9) is waiting for * us because we're already holding the fs busy. * XXXMP */ if (mp->mnt_iflag & IMNT_UNMOUNT) { *vpp = NULL; return EDEADLK; } error = getnewvnode(VT_LFS, mp, lfs_vnodeop_p, NULL, &vp); if (error) { *vpp = NULL; return (error); } mutex_enter(&ulfs_hashlock); error = lfs_fasthashget(dev, ino, vpp); if (error != 0 || *vpp != NULL) { mutex_exit(&ulfs_hashlock); ungetnewvnode(vp); return (error); } /* Allocate new vnode/inode. */ lfs_vcreate(mp, ino, vp); /* * Put it onto its hash chain and lock it so that other requests for * this inode will block if they arrive while we are sleeping waiting * for old data structures to be purged or for the contents of the * disk portion of this inode to be read. */ ip = VTOI(vp); ulfs_ihashins(ip); mutex_exit(&ulfs_hashlock); #ifdef notyet /* Not found in the cache => this vnode was loaded only for cleaning. */ ip->i_lfs_iflags |= LFSI_BMAP; #endif /* * XXX * This may not need to be here, logically it should go down with * the i_devvp initialization. * Ask Kirk. */ ip->i_lfs = fs; /* Read in the disk contents for the inode, copy into the inode. */ if (dinp) { error = copyin(dinp, ip->i_din.ffs1_din, sizeof (struct ulfs1_dinode)); if (error) { DLOG((DLOG_CLEAN, "lfs_fastvget: dinode copyin failed" " for ino %d\n", ino)); ulfs_ihashrem(ip); /* Unlock and discard unneeded inode. */ VOP_UNLOCK(vp); lfs_vunref(vp); *vpp = NULL; return (error); } if (ip->i_number != ino) panic("lfs_fastvget: I was fed the wrong inode!"); } else { retries = 0; again: error = bread(ump->um_devvp, LFS_FSBTODB(fs, daddr), fs->lfs_ibsize, NOCRED, 0, &bp); if (error) { DLOG((DLOG_CLEAN, "lfs_fastvget: bread failed (%d)\n", error)); /* * The inode does not contain anything useful, so it * would be misleading to leave it on its hash chain. * Iput() will return it to the free list. */ ulfs_ihashrem(ip); /* Unlock and discard unneeded inode. */ VOP_UNLOCK(vp); lfs_vunref(vp); *vpp = NULL; return (error); } dip = lfs_ifind(ump->um_lfs, ino, bp); if (dip == NULL) { /* Assume write has not completed yet; try again */ brelse(bp, BC_INVAL); ++retries; if (retries > LFS_IFIND_RETRIES) panic("lfs_fastvget: dinode not found"); DLOG((DLOG_CLEAN, "lfs_fastvget: dinode not found," " retrying...\n")); goto again; } *ip->i_din.ffs1_din = *dip; brelse(bp, 0); } lfs_vinit(mp, &vp); *vpp = vp; KASSERT(VOP_ISLOCKED(vp)); VOP_UNLOCK(vp); return (0); }
/* * Write a directory entry after a call to namei, using the parameters * that ulfs_lookup left in nameidata and in the ulfs_lookup_results. * * DVP is the directory to be updated. It must be locked. * ULR is the ulfs_lookup_results structure from the final lookup step. * TVP is not used. (XXX: why is it here? remove it) * DIRP is the new directory entry contents. * CNP is the componentname from the final lookup step. * NEWDIRBP is not used and (XXX) should be removed. The previous * comment here said it was used by the now-removed softupdates code. * * The link count of the target inode is *not* incremented; the * caller does that. * * If ulr->ulr_count is 0, ulfs_lookup did not find space to insert the * directory entry. ulr_offset, which is the place to put the entry, * should be on a block boundary (and should be at the end of the * directory AFAIK) and a fresh block is allocated to put the new * directory entry in. * * If ulr->ulr_count is not zero, ulfs_lookup found a slot to insert * the entry into. This slot ranges from ulr_offset to ulr_offset + * ulr_count. However, this slot may already be partially populated * requiring compaction. See notes below. * * Furthermore, if ulr_count is not zero and ulr_endoff is not the * same as i_size, the directory is truncated to size ulr_endoff. */ int ulfs_direnter(struct vnode *dvp, const struct ulfs_lookup_results *ulr, struct vnode *tvp, struct lfs_direct *dirp, struct componentname *cnp, struct buf *newdirbp) { kauth_cred_t cr; int newentrysize; struct inode *dp; struct buf *bp; u_int dsize; struct lfs_direct *ep, *nep; int error, ret, lfs_blkoff, loc, spacefree; char *dirbuf; struct timespec ts; struct ulfsmount *ump = VFSTOULFS(dvp->v_mount); struct lfs *fs = ump->um_lfs; const int needswap = ULFS_MPNEEDSWAP(fs); int dirblksiz = fs->um_dirblksiz; error = 0; cr = cnp->cn_cred; dp = VTOI(dvp); newentrysize = LFS_DIRSIZ(0, dirp, 0); if (ulr->ulr_count == 0) { /* * If ulr_count is 0, then namei could find no * space in the directory. Here, ulr_offset will * be on a directory block boundary and we will write the * new entry into a fresh block. */ if (ulr->ulr_offset & (dirblksiz - 1)) panic("ulfs_direnter: newblk"); if ((error = lfs_balloc(dvp, (off_t)ulr->ulr_offset, dirblksiz, cr, B_CLRBUF | B_SYNC, &bp)) != 0) { return (error); } dp->i_size = ulr->ulr_offset + dirblksiz; DIP_ASSIGN(dp, size, dp->i_size); dp->i_flag |= IN_CHANGE | IN_UPDATE; uvm_vnp_setsize(dvp, dp->i_size); dirp->d_reclen = ulfs_rw16(dirblksiz, needswap); dirp->d_ino = ulfs_rw32(dirp->d_ino, needswap); if (FSFMT(dvp)) { #if (BYTE_ORDER == LITTLE_ENDIAN) if (needswap == 0) { #else if (needswap != 0) { #endif u_char tmp = dirp->d_namlen; dirp->d_namlen = dirp->d_type; dirp->d_type = tmp; } } lfs_blkoff = ulr->ulr_offset & (ump->um_mountp->mnt_stat.f_iosize - 1); memcpy((char *)bp->b_data + lfs_blkoff, dirp, newentrysize); #ifdef LFS_DIRHASH if (dp->i_dirhash != NULL) { ulfsdirhash_newblk(dp, ulr->ulr_offset); ulfsdirhash_add(dp, dirp, ulr->ulr_offset); ulfsdirhash_checkblock(dp, (char *)bp->b_data + lfs_blkoff, ulr->ulr_offset); } #endif error = VOP_BWRITE(bp->b_vp, bp); vfs_timestamp(&ts); ret = lfs_update(dvp, &ts, &ts, UPDATE_DIROP); if (error == 0) return (ret); return (error); } /* * If ulr_count is non-zero, then namei found space for the new * entry in the range ulr_offset to ulr_offset + ulr_count * in the directory. To use this space, we may have to compact * the entries located there, by copying them together towards the * beginning of the block, leaving the free space in one usable * chunk at the end. */ /* * Increase size of directory if entry eats into new space. * This should never push the size past a new multiple of * DIRBLKSIZ. * * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. */ if (ulr->ulr_offset + ulr->ulr_count > dp->i_size) { #ifdef DIAGNOSTIC printf("ulfs_direnter: reached 4.2-only block, " "not supposed to happen\n"); #endif dp->i_size = ulr->ulr_offset + ulr->ulr_count; DIP_ASSIGN(dp, size, dp->i_size); dp->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Get the block containing the space for the new directory entry. */ error = ulfs_blkatoff(dvp, (off_t)ulr->ulr_offset, &dirbuf, &bp, true); if (error) { return (error); } /* * Find space for the new entry. In the simple case, the entry at * offset base will have the space. If it does not, then namei * arranged that compacting the region ulr_offset to * ulr_offset + ulr_count would yield the space. */ ep = (struct lfs_direct *)dirbuf; dsize = (ep->d_ino != 0) ? LFS_DIRSIZ(FSFMT(dvp), ep, needswap) : 0; spacefree = ulfs_rw16(ep->d_reclen, needswap) - dsize; for (loc = ulfs_rw16(ep->d_reclen, needswap); loc < ulr->ulr_count; ) { uint16_t reclen; nep = (struct lfs_direct *)(dirbuf + loc); /* Trim the existing slot (NB: dsize may be zero). */ ep->d_reclen = ulfs_rw16(dsize, needswap); ep = (struct lfs_direct *)((char *)ep + dsize); reclen = ulfs_rw16(nep->d_reclen, needswap); loc += reclen; if (nep->d_ino == 0) { /* * A mid-block unused entry. Such entries are * never created by the kernel, but fsck_ffs * can create them (and it doesn't fix them). * * Add up the free space, and initialise the * relocated entry since we don't memcpy it. */ spacefree += reclen; ep->d_ino = 0; dsize = 0; continue; } dsize = LFS_DIRSIZ(FSFMT(dvp), nep, needswap); spacefree += reclen - dsize; #ifdef LFS_DIRHASH if (dp->i_dirhash != NULL) ulfsdirhash_move(dp, nep, ulr->ulr_offset + ((char *)nep - dirbuf), ulr->ulr_offset + ((char *)ep - dirbuf)); #endif memcpy((void *)ep, (void *)nep, dsize); } /* * Here, `ep' points to a directory entry containing `dsize' in-use * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0, * then the entry is completely unused (dsize == 0). The value * of ep->d_reclen is always indeterminate. * * Update the pointer fields in the previous entry (if any), * copy in the new entry, and write out the block. */ if (ep->d_ino == 0 || (ulfs_rw32(ep->d_ino, needswap) == ULFS_WINO && memcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) { if (spacefree + dsize < newentrysize) panic("ulfs_direnter: compact1"); dirp->d_reclen = spacefree + dsize; } else { if (spacefree < newentrysize) panic("ulfs_direnter: compact2"); dirp->d_reclen = spacefree; ep->d_reclen = ulfs_rw16(dsize, needswap); ep = (struct lfs_direct *)((char *)ep + dsize); } dirp->d_reclen = ulfs_rw16(dirp->d_reclen, needswap); dirp->d_ino = ulfs_rw32(dirp->d_ino, needswap); if (FSFMT(dvp)) { #if (BYTE_ORDER == LITTLE_ENDIAN) if (needswap == 0) { #else if (needswap != 0) { #endif u_char tmp = dirp->d_namlen; dirp->d_namlen = dirp->d_type; dirp->d_type = tmp; } } #ifdef LFS_DIRHASH if (dp->i_dirhash != NULL && (ep->d_ino == 0 || dirp->d_reclen == spacefree)) ulfsdirhash_add(dp, dirp, ulr->ulr_offset + ((char *)ep - dirbuf)); #endif memcpy((void *)ep, (void *)dirp, (u_int)newentrysize); #ifdef LFS_DIRHASH if (dp->i_dirhash != NULL) ulfsdirhash_checkblock(dp, dirbuf - (ulr->ulr_offset & (dirblksiz - 1)), ulr->ulr_offset & ~(dirblksiz - 1)); #endif error = VOP_BWRITE(bp->b_vp, bp); dp->i_flag |= IN_CHANGE | IN_UPDATE; /* * If all went well, and the directory can be shortened, proceed * with the truncation. Note that we have to unlock the inode for * the entry that we just entered, as the truncation may need to * lock other inodes which can lead to deadlock if we also hold a * lock on the newly entered node. */ if (error == 0 && ulr->ulr_endoff && ulr->ulr_endoff < dp->i_size) { #ifdef LFS_DIRHASH if (dp->i_dirhash != NULL) ulfsdirhash_dirtrunc(dp, ulr->ulr_endoff); #endif (void) lfs_truncate(dvp, (off_t)ulr->ulr_endoff, IO_SYNC, cr); } return (error); } /* * Remove a directory entry after a call to namei, using the * parameters that ulfs_lookup left in nameidata and in the * ulfs_lookup_results. * * DVP is the directory to be updated. It must be locked. * ULR is the ulfs_lookup_results structure from the final lookup step. * IP, if not null, is the inode being unlinked. * FLAGS may contain DOWHITEOUT. * ISRMDIR is not used and (XXX) should be removed. * * If FLAGS contains DOWHITEOUT the entry is replaced with a whiteout * instead of being cleared. * * ulr->ulr_offset contains the position of the directory entry * to be removed. * * ulr->ulr_reclen contains the size of the directory entry to be * removed. * * ulr->ulr_count contains the size of the *previous* directory * entry. This allows finding it, for free space management. If * ulr_count is 0, the target entry is at the beginning of the * directory. (Does this ever happen? The first entry should be ".", * which should only be removed at rmdir time. Does rmdir come here * to clear out the "." and ".." entries? Perhaps, but I doubt it.) * * The space is marked free by adding it to the record length (not * name length) of the preceding entry. If the first entry becomes * free, it is marked free by setting the inode number to 0. * * The link count of IP is decremented. Note that this is not the * inverse behavior of ulfs_direnter, which does not adjust link * counts. Sigh. */ int ulfs_dirremove(struct vnode *dvp, const struct ulfs_lookup_results *ulr, struct inode *ip, int flags, int isrmdir) { struct inode *dp = VTOI(dvp); struct lfs_direct *ep; struct buf *bp; int error; const int needswap = ULFS_MPNEEDSWAP(dp->i_lfs); if (flags & DOWHITEOUT) { /* * Whiteout entry: set d_ino to ULFS_WINO. */ error = ulfs_blkatoff(dvp, (off_t)ulr->ulr_offset, (void *)&ep, &bp, true); if (error) return (error); ep->d_ino = ulfs_rw32(ULFS_WINO, needswap); ep->d_type = LFS_DT_WHT; goto out; } if ((error = ulfs_blkatoff(dvp, (off_t)(ulr->ulr_offset - ulr->ulr_count), (void *)&ep, &bp, true)) != 0) return (error); #ifdef LFS_DIRHASH /* * Remove the dirhash entry. This is complicated by the fact * that `ep' is the previous entry when ulr_count != 0. */ if (dp->i_dirhash != NULL) ulfsdirhash_remove(dp, (ulr->ulr_count == 0) ? ep : (struct lfs_direct *)((char *)ep + ulfs_rw16(ep->d_reclen, needswap)), ulr->ulr_offset); #endif if (ulr->ulr_count == 0) { /* * First entry in block: set d_ino to zero. */ ep->d_ino = 0; } else { /* * Collapse new free space into previous entry. */ ep->d_reclen = ulfs_rw16(ulfs_rw16(ep->d_reclen, needswap) + ulr->ulr_reclen, needswap); } #ifdef LFS_DIRHASH if (dp->i_dirhash != NULL) { int dirblksiz = ip->i_lfs->um_dirblksiz; ulfsdirhash_checkblock(dp, (char *)ep - ((ulr->ulr_offset - ulr->ulr_count) & (dirblksiz - 1)), ulr->ulr_offset & ~(dirblksiz - 1)); } #endif out: if (ip) { ip->i_nlink--; DIP_ASSIGN(ip, nlink, ip->i_nlink); ip->i_flag |= IN_CHANGE; } /* * XXX did it ever occur to anyone that it might be a good * idea to restore ip->i_nlink if this fails? Or something? * Currently on error return from this function the state of * ip->i_nlink depends on what happened, and callers * definitely do not take this into account. */ error = VOP_BWRITE(bp->b_vp, bp); dp->i_flag |= IN_CHANGE | IN_UPDATE; /* * If the last named reference to a snapshot goes away, * drop its snapshot reference so that it will be reclaimed * when last open reference goes away. */ if (ip != 0 && (ip->i_flags & SF_SNAPSHOT) != 0 && ip->i_nlink == 0) ulfs_snapgone(ip); return (error); } /* * Rewrite an existing directory entry to point at the inode supplied. * * DP is the directory to update. * OFFSET is the position of the entry in question. It may come * from ulr_offset of a ulfs_lookup_results. * OIP is the old inode the directory previously pointed to. * NEWINUM is the number of the new inode. * NEWTYPE is the new value for the type field of the directory entry. * (This is ignored if the fs doesn't support that.) * ISRMDIR is not used and (XXX) should be removed. * IFLAGS are added to DP's inode flags. * * The link count of OIP is decremented. Note that the link count of * the new inode is *not* incremented. Yay for symmetry. */ int ulfs_dirrewrite(struct inode *dp, off_t offset, struct inode *oip, ino_t newinum, int newtype, int isrmdir, int iflags) { struct buf *bp; struct lfs_direct *ep; struct vnode *vdp = ITOV(dp); int error; error = ulfs_blkatoff(vdp, offset, (void *)&ep, &bp, true); if (error) return (error); ep->d_ino = ulfs_rw32(newinum, ULFS_IPNEEDSWAP(dp)); if (!FSFMT(vdp)) ep->d_type = newtype; oip->i_nlink--; DIP_ASSIGN(oip, nlink, oip->i_nlink); oip->i_flag |= IN_CHANGE; error = VOP_BWRITE(bp->b_vp, bp); dp->i_flag |= iflags; /* * If the last named reference to a snapshot goes away, * drop its snapshot reference so that it will be reclaimed * when last open reference goes away. */ if ((oip->i_flags & SF_SNAPSHOT) != 0 && oip->i_nlink == 0) ulfs_snapgone(oip); return (error); } /* * Check if a directory is empty or not. * Inode supplied must be locked. * * Using a struct lfs_dirtemplate here is not precisely * what we want, but better than using a struct lfs_direct. * * NB: does not handle corrupted directories. */ int ulfs_dirempty(struct inode *ip, ino_t parentino, kauth_cred_t cred) { doff_t off; struct lfs_dirtemplate dbuf; struct lfs_direct *dp = (struct lfs_direct *)&dbuf; int error, namlen; size_t count; const int needswap = ULFS_IPNEEDSWAP(ip); #define MINDIRSIZ (sizeof (struct lfs_dirtemplate) / 2) for (off = 0; off < ip->i_size; off += ulfs_rw16(dp->d_reclen, needswap)) { error = vn_rdwr(UIO_READ, ITOV(ip), (void *)dp, MINDIRSIZ, off, UIO_SYSSPACE, IO_NODELOCKED, cred, &count, NULL); /* * Since we read MINDIRSIZ, residual must * be 0 unless we're at end of file. */ if (error || count != 0) return (0); /* avoid infinite loops */ if (dp->d_reclen == 0) return (0); /* skip empty entries */ if (dp->d_ino == 0 || ulfs_rw32(dp->d_ino, needswap) == ULFS_WINO) continue; /* accept only "." and ".." */ #if (BYTE_ORDER == LITTLE_ENDIAN) if (FSFMT(ITOV(ip)) && needswap == 0) namlen = dp->d_type; else namlen = dp->d_namlen; #else if (FSFMT(ITOV(ip)) && needswap != 0) namlen = dp->d_type; else namlen = dp->d_namlen; #endif if (namlen > 2) return (0); if (dp->d_name[0] != '.') return (0); /* * At this point namlen must be 1 or 2. * 1 implies ".", 2 implies ".." if second * char is also "." */ if (namlen == 1 && ulfs_rw32(dp->d_ino, needswap) == ip->i_number) continue; if (dp->d_name[1] == '.' && ulfs_rw32(dp->d_ino, needswap) == parentino) continue; return (0); } return (1); } #define ULFS_DIRRABLKS 0 int ulfs_dirrablks = ULFS_DIRRABLKS; /* * ulfs_blkatoff: Return buffer with the contents of block "offset" from * the beginning of directory "vp". If "res" is non-NULL, fill it in with * a pointer to the remaining space in the directory. If the caller intends * to modify the buffer returned, "modify" must be true. */ int ulfs_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp, bool modify) { struct inode *ip __diagused; struct buf *bp; daddr_t lbn; const int dirrablks = ulfs_dirrablks; daddr_t *blks; int *blksizes; int run, error; struct mount *mp = vp->v_mount; const int bshift = mp->mnt_fs_bshift; const int bsize = 1 << bshift; off_t eof; blks = kmem_alloc((1 + dirrablks) * sizeof(daddr_t), KM_SLEEP); blksizes = kmem_alloc((1 + dirrablks) * sizeof(int), KM_SLEEP); ip = VTOI(vp); KASSERT(vp->v_size == ip->i_size); GOP_SIZE(vp, vp->v_size, &eof, 0); lbn = offset >> bshift; for (run = 0; run <= dirrablks;) { const off_t curoff = lbn << bshift; const int size = MIN(eof - curoff, bsize); if (size == 0) { break; } KASSERT(curoff < eof); blks[run] = lbn; blksizes[run] = size; lbn++; run++; if (size != bsize) { break; } } KASSERT(run >= 1); error = breadn(vp, blks[0], blksizes[0], &blks[1], &blksizes[1], run - 1, NOCRED, (modify ? B_MODIFY : 0), &bp); if (error != 0) { *bpp = NULL; goto out; } if (res) { *res = (char *)bp->b_data + (offset & (bsize - 1)); } *bpp = bp; out: kmem_free(blks, (1 + dirrablks) * sizeof(daddr_t)); kmem_free(blksizes, (1 + dirrablks) * sizeof(int)); return error; }
/* * ulfs_rename_recalculate_fulr: If we have just entered a directory into * dvp at tulr, and we were about to remove one at fulr for an entry * named fcnp, fulr may be invalid. So, if necessary, recalculate it. */ static int ulfs_rename_recalculate_fulr(struct vnode *dvp, struct ulfs_lookup_results *fulr, const struct ulfs_lookup_results *tulr, const struct componentname *fcnp) { struct mount *mp; struct lfs *fs; struct ulfsmount *ump; int needswap; /* XXX int is a silly type for this; blame ulfsmount::um_dirblksiz. */ int dirblksiz; doff_t search_start, search_end; doff_t offset; /* Offset of entry we're examining. */ struct buf *bp; /* I/O block we're examining. */ char *dirbuf; /* Pointer into directory at search_start. */ struct lfs_direct *ep; /* Pointer to the entry we're examining. */ /* XXX direct::d_reclen is 16-bit; * ulfs_lookup_results::ulr_reclen is 32-bit. Blah. */ uint32_t reclen; /* Length of the entry we're examining. */ uint32_t prev_reclen; /* Length of the preceding entry. */ int error; KASSERT(dvp != NULL); KASSERT(dvp->v_mount != NULL); KASSERT(VTOI(dvp) != NULL); KASSERT(fulr != NULL); KASSERT(tulr != NULL); KASSERT(fulr != tulr); KASSERT(ulfs_rename_ulr_overlap_p(fulr, tulr)); mp = dvp->v_mount; ump = VFSTOULFS(mp); fs = ump->um_lfs; KASSERT(ump != NULL); KASSERT(ump == VTOI(dvp)->i_ump); KASSERT(fs == VTOI(dvp)->i_lfs); needswap = ULFS_MPNEEDSWAP(fs); dirblksiz = fs->um_dirblksiz; KASSERT(0 < dirblksiz); KASSERT((dirblksiz & (dirblksiz - 1)) == 0); /* A directory block may not span across multiple I/O blocks. */ KASSERT(dirblksiz <= mp->mnt_stat.f_iosize); /* Find the bounds of the search. */ search_start = tulr->ulr_offset; KASSERT(fulr->ulr_reclen < (LFS_MAXDIRSIZE - fulr->ulr_offset)); search_end = (fulr->ulr_offset + fulr->ulr_reclen); /* Compaction must happen only within a directory block. (*) */ KASSERT(search_start <= search_end); KASSERT((search_end - (search_start &~ (dirblksiz - 1))) <= dirblksiz); dirbuf = NULL; bp = NULL; error = ulfs_blkatoff(dvp, (off_t)search_start, &dirbuf, &bp, false); if (error) return error; KASSERT(dirbuf != NULL); KASSERT(bp != NULL); /* * Guarantee we sha'n't go past the end of the buffer we got. * dirbuf is bp->b_data + (search_start & (iosize - 1)), and * the valid range is [bp->b_data, bp->b_data + bp->b_bcount). */ KASSERT((search_end - search_start) <= (bp->b_bcount - (search_start & (mp->mnt_stat.f_iosize - 1)))); prev_reclen = fulr->ulr_count; offset = search_start; /* * Search from search_start to search_end for the entry matching * fcnp, which must be there because we found it before and it * should only at most have moved earlier. */ for (;;) { KASSERT(search_start <= offset); KASSERT(offset < search_end); /* * Examine the directory entry at offset. */ ep = (struct lfs_direct *)(dirbuf + (offset - search_start)); reclen = ulfs_rw16(ep->d_reclen, needswap); if (ep->d_ino == 0) goto next; /* Entry is unused. */ if (ulfs_rw32(ep->d_ino, needswap) == ULFS_WINO) goto next; /* Entry is whiteout. */ if (fcnp->cn_namelen != ulfs_direct_namlen(ep, dvp)) goto next; /* Wrong name length. */ if (memcmp(ep->d_name, fcnp->cn_nameptr, fcnp->cn_namelen)) goto next; /* Wrong name. */ /* Got it! */ break; next: if (! ((reclen < search_end) && (offset < (search_end - reclen)))) { brelse(bp, 0); return EIO; /* XXX Panic? What? */ } /* We may not move past the search end. */ KASSERT(reclen < search_end); KASSERT(offset < (search_end - reclen)); /* * We may not move across a directory block boundary; * see (*) above. */ KASSERT((offset &~ (dirblksiz - 1)) == ((offset + reclen) &~ (dirblksiz - 1))); prev_reclen = reclen; offset += reclen; } /* * Found the entry. Record where. */ fulr->ulr_offset = offset; fulr->ulr_reclen = reclen; /* * Record the preceding record length, but not if we're at the * start of a directory block. */ fulr->ulr_count = ((offset & (dirblksiz - 1))? prev_reclen : 0); brelse(bp, 0); return 0; }
int lfs_bmapv(struct lwp *l, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt) { BLOCK_INFO *blkp; IFILE *ifp; struct buf *bp; struct inode *ip = NULL; struct lfs *fs; struct mount *mntp; struct ulfsmount *ump; struct vnode *vp; ino_t lastino; daddr_t v_daddr; int cnt, error; int numrefed = 0; error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS, KAUTH_REQ_SYSTEM_LFS_BMAPV, NULL, NULL, NULL); if (error) return (error); if ((mntp = vfs_getvfs(fsidp)) == NULL) return (ENOENT); if ((error = vfs_busy(mntp, NULL)) != 0) return (error); ump = VFSTOULFS(mntp); fs = ump->um_lfs; if (fs->lfs_cleaner_thread == NULL) fs->lfs_cleaner_thread = curlwp; KASSERT(fs->lfs_cleaner_thread == curlwp); cnt = blkcnt; error = 0; /* these were inside the initialization for the for loop */ vp = NULL; v_daddr = LFS_UNUSED_DADDR; lastino = LFS_UNUSED_INUM; for (blkp = blkiov; cnt--; ++blkp) { /* * Get the IFILE entry (only once) and see if the file still * exists. */ if (lastino != blkp->bi_inode) { /* * Finish the old file, if there was one. */ if (vp != NULL) { vput(vp); vp = NULL; numrefed--; } /* * Start a new file */ lastino = blkp->bi_inode; if (blkp->bi_inode == LFS_IFILE_INUM) v_daddr = lfs_sb_getidaddr(fs); else { LFS_IENTRY(ifp, fs, blkp->bi_inode, bp); v_daddr = lfs_if_getdaddr(fs, ifp); brelse(bp, 0); } if (v_daddr == LFS_UNUSED_DADDR) { blkp->bi_daddr = LFS_UNUSED_DADDR; continue; } error = lfs_fastvget(mntp, blkp->bi_inode, NULL, LK_SHARED, &vp); if (error) { DLOG((DLOG_CLEAN, "lfs_bmapv: lfs_fastvget ino" "%d failed with %d", blkp->bi_inode,error)); KASSERT(vp == NULL); continue; } else { KASSERT(VOP_ISLOCKED(vp)); numrefed++; } ip = VTOI(vp); } else if (vp == NULL) { /* * This can only happen if the vnode is dead. * Keep going. Note that we DO NOT set the * bi_addr to anything -- if we failed to get * the vnode, for example, we want to assume * conservatively that all of its blocks *are* * located in the segment in question. * lfs_markv will throw them out if we are * wrong. */ continue; } /* Past this point we are guaranteed that vp, ip are valid. */ if (blkp->bi_lbn == LFS_UNUSED_LBN) { /* * We just want the inode address, which is * conveniently in v_daddr. */ blkp->bi_daddr = v_daddr; } else { daddr_t bi_daddr; error = VOP_BMAP(vp, blkp->bi_lbn, NULL, &bi_daddr, NULL); if (error) { blkp->bi_daddr = LFS_UNUSED_DADDR; continue; } blkp->bi_daddr = LFS_DBTOFSB(fs, bi_daddr); /* Fill in the block size, too */ if (blkp->bi_lbn >= 0) blkp->bi_size = lfs_blksize(fs, ip, blkp->bi_lbn); else blkp->bi_size = lfs_sb_getbsize(fs); } } /* * Finish the old file, if there was one. */ if (vp != NULL) { vput(vp); vp = NULL; numrefed--; } #ifdef DIAGNOSTIC if (numrefed != 0) panic("lfs_bmapv: numrefed=%d", numrefed); #endif vfs_unbusy(mntp, false, NULL); return 0; }
int lfs_markv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt) { BLOCK_INFO *blkp; IFILE *ifp; struct buf *bp; struct inode *ip = NULL; struct lfs *fs; struct mount *mntp; struct vnode *vp = NULL; ino_t lastino; daddr_t b_daddr, v_daddr; int cnt, error; int do_again = 0; int numrefed = 0; ino_t maxino; size_t obsize; /* number of blocks/inodes that we have already bwrite'ed */ int nblkwritten, ninowritten; if ((mntp = vfs_getvfs(fsidp)) == NULL) return (ENOENT); fs = VFSTOULFS(mntp)->um_lfs; if (fs->lfs_ronly) return EROFS; maxino = (lfs_fragstoblks(fs, VTOI(fs->lfs_ivnode)->i_ffs1_blocks) - fs->lfs_cleansz - fs->lfs_segtabsz) * fs->lfs_ifpb; cnt = blkcnt; if ((error = vfs_busy(mntp, NULL)) != 0) return (error); /* * This seglock is just to prevent the fact that we might have to sleep * from allowing the possibility that our blocks might become * invalid. * * It is also important to note here that unless we specify SEGM_CKP, * any Ifile blocks that we might be asked to clean will never get * to the disk. */ lfs_seglock(fs, SEGM_CLEAN | SEGM_CKP | SEGM_SYNC); /* Mark blocks/inodes dirty. */ error = 0; /* these were inside the initialization for the for loop */ v_daddr = LFS_UNUSED_DADDR; lastino = LFS_UNUSED_INUM; nblkwritten = ninowritten = 0; for (blkp = blkiov; cnt--; ++blkp) { /* Bounds-check incoming data, avoid panic for failed VGET */ if (blkp->bi_inode <= 0 || blkp->bi_inode >= maxino) { error = EINVAL; goto err3; } /* * Get the IFILE entry (only once) and see if the file still * exists. */ if (lastino != blkp->bi_inode) { /* * Finish the old file, if there was one. The presence * of a usable vnode in vp is signaled by a valid v_daddr. */ if (v_daddr != LFS_UNUSED_DADDR) { lfs_vunref(vp); numrefed--; } /* * Start a new file */ lastino = blkp->bi_inode; if (blkp->bi_inode == LFS_IFILE_INUM) v_daddr = fs->lfs_idaddr; else { LFS_IENTRY(ifp, fs, blkp->bi_inode, bp); /* XXX fix for force write */ v_daddr = ifp->if_daddr; brelse(bp, 0); } if (v_daddr == LFS_UNUSED_DADDR) continue; /* Get the vnode/inode. */ error = lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp, (blkp->bi_lbn == LFS_UNUSED_LBN ? blkp->bi_bp : NULL)); if (!error) { numrefed++; } if (error) { DLOG((DLOG_CLEAN, "lfs_markv: lfs_fastvget" " failed with %d (ino %d, segment %d)\n", error, blkp->bi_inode, lfs_dtosn(fs, blkp->bi_daddr))); /* * If we got EAGAIN, that means that the * Inode was locked. This is * recoverable: just clean the rest of * this segment, and let the cleaner try * again with another. (When the * cleaner runs again, this segment will * sort high on the list, since it is * now almost entirely empty.) But, we * still set v_daddr = LFS_UNUSED_ADDR * so as not to test this over and over * again. */ if (error == EAGAIN) { error = 0; do_again++; } #ifdef DIAGNOSTIC else if (error != ENOENT) panic("lfs_markv VFS_VGET FAILED"); #endif /* lastino = LFS_UNUSED_INUM; */ v_daddr = LFS_UNUSED_DADDR; vp = NULL; ip = NULL; continue; } ip = VTOI(vp); ninowritten++; } else if (v_daddr == LFS_UNUSED_DADDR) { /* * This can only happen if the vnode is dead (or * in any case we can't get it...e.g., it is * inlocked). Keep going. */ continue; } /* Past this point we are guaranteed that vp, ip are valid. */ /* Can't clean VU_DIROP directories in case of truncation */ /* XXX - maybe we should mark removed dirs specially? */ if (vp->v_type == VDIR && (vp->v_uflag & VU_DIROP)) { do_again++; continue; } /* If this BLOCK_INFO didn't contain a block, keep going. */ if (blkp->bi_lbn == LFS_UNUSED_LBN) { /* XXX need to make sure that the inode gets written in this case */ /* XXX but only write the inode if it's the right one */ if (blkp->bi_inode != LFS_IFILE_INUM) { LFS_IENTRY(ifp, fs, blkp->bi_inode, bp); if (ifp->if_daddr == blkp->bi_daddr) { mutex_enter(&lfs_lock); LFS_SET_UINO(ip, IN_CLEANING); mutex_exit(&lfs_lock); } brelse(bp, 0); } continue; } b_daddr = 0; if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) || LFS_DBTOFSB(fs, b_daddr) != blkp->bi_daddr) { if (lfs_dtosn(fs, LFS_DBTOFSB(fs, b_daddr)) == lfs_dtosn(fs, blkp->bi_daddr)) { DLOG((DLOG_CLEAN, "lfs_markv: wrong da same seg: %llx vs %llx\n", (long long)blkp->bi_daddr, (long long)LFS_DBTOFSB(fs, b_daddr))); } do_again++; continue; } /* * Check block sizes. The blocks being cleaned come from * disk, so they should have the same size as their on-disk * counterparts. */ if (blkp->bi_lbn >= 0) obsize = lfs_blksize(fs, ip, blkp->bi_lbn); else obsize = fs->lfs_bsize; /* Check for fragment size change */ if (blkp->bi_lbn >= 0 && blkp->bi_lbn < ULFS_NDADDR) { obsize = ip->i_lfs_fragsize[blkp->bi_lbn]; } if (obsize != blkp->bi_size) { DLOG((DLOG_CLEAN, "lfs_markv: ino %d lbn %lld wrong" " size (%ld != %d), try again\n", blkp->bi_inode, (long long)blkp->bi_lbn, (long) obsize, blkp->bi_size)); do_again++; continue; } /* * If we get to here, then we are keeping the block. If * it is an indirect block, we want to actually put it * in the buffer cache so that it can be updated in the * finish_meta section. If it's not, we need to * allocate a fake buffer so that writeseg can perform * the copyin and write the buffer. */ if (ip->i_number != LFS_IFILE_INUM && blkp->bi_lbn >= 0) { /* Data Block */ bp = lfs_fakebuf(fs, vp, blkp->bi_lbn, blkp->bi_size, blkp->bi_bp); /* Pretend we used bread() to get it */ bp->b_blkno = LFS_FSBTODB(fs, blkp->bi_daddr); } else { /* Indirect block or ifile */ if (blkp->bi_size != fs->lfs_bsize && ip->i_number != LFS_IFILE_INUM) panic("lfs_markv: partial indirect block?" " size=%d\n", blkp->bi_size); bp = getblk(vp, blkp->bi_lbn, blkp->bi_size, 0, 0); if (!(bp->b_oflags & (BO_DONE|BO_DELWRI))) { /* * The block in question was not found * in the cache; i.e., the block that * getblk() returned is empty. So, we * can (and should) copy in the * contents, because we've already * determined that this was the right * version of this block on disk. * * And, it can't have changed underneath * us, because we have the segment lock. */ error = copyin(blkp->bi_bp, bp->b_data, blkp->bi_size); if (error) goto err2; } } if ((error = lfs_bwrite_ext(bp, BW_CLEAN)) != 0) goto err2; nblkwritten++; /* * XXX should account indirect blocks and ifile pages as well */ if (nblkwritten + lfs_lblkno(fs, ninowritten * sizeof (struct ulfs1_dinode)) > LFS_MARKV_MAX_BLOCKS) { DLOG((DLOG_CLEAN, "lfs_markv: writing %d blks %d inos\n", nblkwritten, ninowritten)); lfs_segwrite(mntp, SEGM_CLEAN); nblkwritten = ninowritten = 0; } } /* * Finish the old file, if there was one */ if (v_daddr != LFS_UNUSED_DADDR) { lfs_vunref(vp); numrefed--; } #ifdef DIAGNOSTIC if (numrefed != 0) panic("lfs_markv: numrefed=%d", numrefed); #endif DLOG((DLOG_CLEAN, "lfs_markv: writing %d blks %d inos (check point)\n", nblkwritten, ninowritten)); /* * The last write has to be SEGM_SYNC, because of calling semantics. * It also has to be SEGM_CKP, because otherwise we could write * over the newly cleaned data contained in a checkpoint, and then * we'd be unhappy at recovery time. */ lfs_segwrite(mntp, SEGM_CLEAN | SEGM_CKP | SEGM_SYNC); lfs_segunlock(fs); vfs_unbusy(mntp, false, NULL); if (error) return (error); else if (do_again) return EAGAIN; return 0; err2: DLOG((DLOG_CLEAN, "lfs_markv err2\n")); /* * XXX we're here because copyin() failed. * XXX it means that we can't trust the cleanerd. too bad. * XXX how can we recover from this? */ err3: /* * XXX should do segwrite here anyway? */ if (v_daddr != LFS_UNUSED_DADDR) { lfs_vunref(vp); --numrefed; } lfs_segunlock(fs); vfs_unbusy(mntp, false, NULL); #ifdef DIAGNOSTIC if (numrefed != 0) panic("lfs_markv: numrefed=%d", numrefed); #endif return (error); }
int lfs_update(struct vnode *vp, const struct timespec *acc, const struct timespec *mod, int updflags) { struct inode *ip; struct lfs *fs = VFSTOULFS(vp->v_mount)->um_lfs; int flags; ASSERT_NO_SEGLOCK(fs); if (vp->v_mount->mnt_flag & MNT_RDONLY) return (0); ip = VTOI(vp); /* * If we are called from vinvalbuf, and the file's blocks have * already been scheduled for writing, but the writes have not * yet completed, lfs_vflush will not be called, and vinvalbuf * will cause a panic. So, we must wait until any pending write * for our inode completes, if we are called with UPDATE_WAIT set. */ mutex_enter(vp->v_interlock); while ((updflags & (UPDATE_WAIT|UPDATE_DIROP)) == UPDATE_WAIT && WRITEINPROG(vp)) { DLOG((DLOG_SEG, "lfs_update: sleeping on ino %d" " (in progress)\n", ip->i_number)); cv_wait(&vp->v_cv, vp->v_interlock); } mutex_exit(vp->v_interlock); LFS_ITIMES(ip, acc, mod, NULL); if (updflags & UPDATE_CLOSE) flags = ip->i_flag & (IN_MODIFIED | IN_ACCESSED | IN_CLEANING); else flags = ip->i_flag & (IN_MODIFIED | IN_CLEANING); if (flags == 0) return (0); /* If sync, push back the vnode and any dirty blocks it may have. */ if ((updflags & (UPDATE_WAIT|UPDATE_DIROP)) == UPDATE_WAIT) { /* Avoid flushing VU_DIROP. */ mutex_enter(&lfs_lock); ++fs->lfs_diropwait; while (vp->v_uflag & VU_DIROP) { DLOG((DLOG_DIROP, "lfs_update: sleeping on inode %d" " (dirops)\n", ip->i_number)); DLOG((DLOG_DIROP, "lfs_update: vflags 0x%x, iflags" " 0x%x\n", vp->v_iflag | vp->v_vflag | vp->v_uflag, ip->i_flag)); if (fs->lfs_dirops == 0) lfs_flush_fs(fs, SEGM_SYNC); else mtsleep(&fs->lfs_writer, PRIBIO+1, "lfs_fsync", 0, &lfs_lock); /* XXX KS - by falling out here, are we writing the vn twice? */ } --fs->lfs_diropwait; mutex_exit(&lfs_lock); return lfs_vflush(vp); } return 0; }