/* * Write a directory entry after a call to namei, using the parameters * that it left in nameidata. The argument dirp is the new directory * entry contents. Dvp is a pointer to the directory to be written, * which was left locked by namei. Remaining parameters (dp->i_offset, * dp->i_count) indicate how the space for the new entry is to be obtained. * Non-null bp indicates that a directory is being created (for the * soft dependency code). */ int ufs_direnter(struct vnode *dvp, struct vnode *tvp, struct direct *dirp, struct componentname *cnp, struct buf *newdirbp) { struct ucred *cr; struct proc *p; int newentrysize; struct inode *dp; struct buf *bp; u_int dsize; struct direct *ep, *nep; int error, ret, blkoff, loc, spacefree, flags; char *dirbuf; error = 0; cr = cnp->cn_cred; p = cnp->cn_proc; dp = VTOI(dvp); newentrysize = DIRSIZ(FSFMT(dvp), dirp); if (dp->i_count == 0) { /* * If dp->i_count is 0, then namei could find no * space in the directory. Here, dp->i_offset will * be on a directory block boundary and we will write the * new entry into a fresh block. */ if (dp->i_offset & (DIRBLKSIZ - 1)) panic("ufs_direnter: newblk"); flags = B_CLRBUF; if (!DOINGSOFTDEP(dvp)) flags |= B_SYNC; if ((error = UFS_BUF_ALLOC(dp, (off_t)dp->i_offset, DIRBLKSIZ, cr, flags, &bp)) != 0) { if (DOINGSOFTDEP(dvp) && newdirbp != NULL) bdwrite(newdirbp); return (error); } DIP_ASSIGN(dp, size, dp->i_offset + DIRBLKSIZ); dp->i_flag |= IN_CHANGE | IN_UPDATE; uvm_vnp_setsize(dvp, DIP(dp, size)); dirp->d_reclen = DIRBLKSIZ; blkoff = dp->i_offset & (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1); bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize); #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) { ufsdirhash_newblk(dp, dp->i_offset); ufsdirhash_add(dp, dirp, dp->i_offset); ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff, dp->i_offset); } #endif if (DOINGSOFTDEP(dvp)) { /* * Ensure that the entire newly allocated block is a * valid directory so that future growth within the * block does not have to ensure that the block is * written before the inode. */ blkoff += DIRBLKSIZ; while (blkoff < bp->b_bcount) { ((struct direct *) (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; blkoff += DIRBLKSIZ; } if (softdep_setup_directory_add(bp, dp, dp->i_offset, dirp->d_ino, newdirbp, 1) == 0) { bdwrite(bp); return (UFS_UPDATE(dp, 0)); } /* We have just allocated a directory block in an * indirect block. Rather than tracking when it gets * claimed by the inode, we simply do a VOP_FSYNC * now to ensure that it is there (in case the user * does a future fsync). Note that we have to unlock * the inode for the entry that we just entered, as * the VOP_FSYNC may need to lock other inodes which * can lead to deadlock if we also hold a lock on * the newly entered node. */ if ((error = VOP_BWRITE(bp))) return (error); if (tvp != NULL) VOP_UNLOCK(tvp, 0, p); error = VOP_FSYNC(dvp, p->p_ucred, MNT_WAIT, p); if (tvp != NULL) vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); return (error); } error = VOP_BWRITE(bp); ret = UFS_UPDATE(dp, !DOINGSOFTDEP(dvp)); if (error == 0) return (ret); return (error); } /* * If dp->i_count is non-zero, then namei found space for the new * entry in the range dp->i_offset to dp->i_offset + dp->i_count * in the directory. To use this space, we may have to compact * the entries located there, by copying them together towards the * beginning of the block, leaving the free space in one usable * chunk at the end. */ /* * Increase size of directory if entry eats into new space. * This should never push the size past a new multiple of * DIRBLKSIZE. * * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. */ if (dp->i_offset + dp->i_count > DIP(dp, size)) DIP_ASSIGN(dp, size, dp->i_offset + dp->i_count); /* * Get the block containing the space for the new directory entry. */ if ((error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, &dirbuf, &bp)) != 0) { if (DOINGSOFTDEP(dvp) && newdirbp != NULL) bdwrite(newdirbp); return (error); } /* * Find space for the new entry. In the simple case, the entry at * offset base will have the space. If it does not, then namei * arranged that compacting the region dp->i_offset to * dp->i_offset + dp->i_count would yield the space. */ ep = (struct direct *)dirbuf; dsize = ep->d_ino ? DIRSIZ(FSFMT(dvp), ep) : 0; spacefree = ep->d_reclen - dsize; for (loc = ep->d_reclen; loc < dp->i_count; ) { nep = (struct direct *)(dirbuf + loc); /* Trim the existing slot (NB: dsize may be zero). */ ep->d_reclen = dsize; ep = (struct direct *)((char *)ep + dsize); /* Read nep->d_reclen now as the bcopy() may clobber it. */ loc += nep->d_reclen; if (nep->d_ino == 0) { /* * A mid-block unused entry. Such entries are * never created by the kernel, but fsck_ffs * can create them (and it doesn't fix them). * * Add up the free space, and initialise the * relocated entry since we don't bcopy it. */ spacefree += nep->d_reclen; ep->d_ino = 0; dsize = 0; continue; } dsize = DIRSIZ(FSFMT(dvp), nep); spacefree += nep->d_reclen - dsize; #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_move(dp, nep, dp->i_offset + ((char *)nep - dirbuf), dp->i_offset + ((char *)ep - dirbuf)); #endif if (DOINGSOFTDEP(dvp)) softdep_change_directoryentry_offset(dp, dirbuf, (caddr_t)nep, (caddr_t)ep, dsize); else bcopy((caddr_t)nep, (caddr_t)ep, dsize); } /* * Here, `ep' points to a directory entry containing `dsize' in-use * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0, * then the entry is completely unused (dsize == 0). The value * of ep->d_reclen is always indeterminate. * * Update the pointer fields in the previous entry (if any), * copy in the new entry, and write out the block. */ if (ep->d_ino == 0) { if (spacefree + dsize < newentrysize) panic("ufs_direnter: compact1"); dirp->d_reclen = spacefree + dsize; } else { if (spacefree < newentrysize) panic("ufs_direnter: compact2"); dirp->d_reclen = spacefree; ep->d_reclen = dsize; ep = (struct direct *)((char *)ep + dsize); } #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL && (ep->d_ino == 0 || dirp->d_reclen == spacefree)) ufsdirhash_add(dp, dirp, dp->i_offset + ((char *)ep - dirbuf)); #endif bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize); #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_checkblock(dp, dirbuf - (dp->i_offset & (DIRBLKSIZ - 1)), dp->i_offset & ~(DIRBLKSIZ - 1)); #endif if (DOINGSOFTDEP(dvp)) { (void)softdep_setup_directory_add(bp, dp, dp->i_offset + (caddr_t)ep - dirbuf, dirp->d_ino, newdirbp, 0); bdwrite(bp); } else { error = VOP_BWRITE(bp); } dp->i_flag |= IN_CHANGE | IN_UPDATE; /* * If all went well, and the directory can be shortened, proceed * with the truncation. Note that we have to unlock the inode for * the entry that we just entered, as the truncation may need to * lock other inodes which can lead to deadlock if we also hold a * lock on the newly entered node. */ if (error == 0 && dp->i_endoff && dp->i_endoff < DIP(dp, size)) { if (tvp != NULL) VOP_UNLOCK(tvp, 0, p); #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_dirtrunc(dp, dp->i_endoff); #endif error = UFS_TRUNCATE(dp, (off_t)dp->i_endoff, IO_NORMAL | IO_SYNC, cr); if (tvp != NULL) vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); } return (error); }
/* * Main code to turn off disk quotas for a filesystem. Does not change * flags. */ static int quotaoff1(struct thread *td, struct mount *mp, int type) { struct vnode *vp; struct vnode *qvp, *mvp; struct ufsmount *ump; struct dquot *dq; struct inode *ip; struct ucred *cr; int error; ump = VFSTOUFS(mp); UFS_LOCK(ump); KASSERT((ump->um_qflags[type] & QTF_CLOSING) != 0, ("quotaoff1: flags are invalid")); if ((qvp = ump->um_quotas[type]) == NULLVP) { UFS_UNLOCK(ump); return (0); } cr = ump->um_cred[type]; UFS_UNLOCK(ump); /* * Search vnodes associated with this mount point, * deleting any references to quota file being closed. */ again: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { if (vp->v_type == VNON) { VI_UNLOCK(vp); continue; } if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto again; } ip = VTOI(vp); dq = ip->i_dquot[type]; ip->i_dquot[type] = NODQUOT; dqrele(vp, dq); VOP_UNLOCK(vp, 0); vrele(vp); } error = dqflush(qvp); if (error != 0) return (error); /* * Clear um_quotas before closing the quota vnode to prevent * access to the closed vnode from dqget/dqsync */ UFS_LOCK(ump); ump->um_quotas[type] = NULLVP; ump->um_cred[type] = NOCRED; UFS_UNLOCK(ump); vn_lock(qvp, LK_EXCLUSIVE | LK_RETRY); qvp->v_vflag &= ~VV_SYSTEM; VOP_UNLOCK(qvp, 0); error = vn_close(qvp, FREAD|FWRITE, td->td_ucred, td); crfree(cr); return (error); }
/* * Go through the disk queues to initiate sandbagged IO; * go through the inodes to write those that have been modified; * initiate the writing of the super block if it has been modified. * * Note: we are always called with the filesystem marked `MPBUSY'. */ static int ext2_sync(struct mount *mp, int waitfor) { struct vnode *mvp, *vp; struct thread *td; struct inode *ip; struct ext2mount *ump = VFSTOEXT2(mp); struct m_ext2fs *fs; int error, allerror = 0; td = curthread; fs = ump->um_e2fs; if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) { /* XXX */ printf("fs = %s\n", fs->e2fs_fsmnt); panic("ext2_sync: rofs mod"); } /* * Write back each (modified) inode. */ loop: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { if (vp->v_type == VNON) { VI_UNLOCK(vp); continue; } ip = VTOI(vp); if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && (vp->v_bufobj.bo_dirty.bv_cnt == 0 || waitfor == MNT_LAZY)) { VI_UNLOCK(vp); continue; } error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td); if (error) { if (error == ENOENT) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } continue; } if ((error = VOP_FSYNC(vp, waitfor, td)) != 0) allerror = error; VOP_UNLOCK(vp, 0); vrele(vp); } /* * Force stale file system control information to be flushed. */ if (waitfor != MNT_LAZY) { vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); if ((error = VOP_FSYNC(ump->um_devvp, waitfor, td)) != 0) allerror = error; VOP_UNLOCK(ump->um_devvp, 0); } /* * Write back modified superblock. */ if (fs->e2fs_fmod != 0) { fs->e2fs_fmod = 0; fs->e2fs->e2fs_wtime = time_second; if ((error = ext2_cgupdate(ump, waitfor)) != 0) allerror = error; } return (allerror); }
/* * this function handles traditional block mapping */ static int ext2_ind_read(struct vop_read_args *ap) { struct vnode *vp; struct inode *ip; struct uio *uio; FS *fs; struct buf *bp; daddr_t lbn, nextlbn; off_t bytesinfile; long size, xfersize, blkoffset; int error, orig_resid, seqcount; seqcount = ap->a_ioflag >> IO_SEQSHIFT; u_short mode; vp = ap->a_vp; ip = VTOI(vp); mode = ip->i_mode; uio = ap->a_uio; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_READ) panic("%s: mode", READ_S); if (vp->v_type == VLNK) { if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) panic("%s: short symlink", READ_S); } else if (vp->v_type != VREG && vp->v_type != VDIR) panic("%s: type %d", READ_S, vp->v_type); #endif orig_resid = uio->uio_resid; KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0")); if (orig_resid == 0) return (0); KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0")); fs = ip->I_FS; if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->e2fs_maxfilesize) return (EOVERFLOW); for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) break; lbn = lblkno(fs, uio->uio_offset); nextlbn = lbn + 1; size = BLKSIZE(fs, ip, lbn); blkoffset = blkoff(fs, uio->uio_offset); xfersize = fs->e2fs_fsize - blkoffset; if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; if (bytesinfile < xfersize) xfersize = bytesinfile; if (lblktosize(fs, nextlbn) >= ip->i_size) error = bread(vp, lbn, size, NOCRED, &bp); else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) error = cluster_read(vp, ip->i_size, lbn, size, NOCRED, blkoffset + uio->uio_resid, seqcount, &bp); else if (seqcount > 1) { int nextsize = BLKSIZE(fs, ip, nextlbn); error = breadn(vp, lbn, size, &nextlbn, &nextsize, 1, NOCRED, &bp); } else error = bread(vp, lbn, size, NOCRED, &bp); if (error) { brelse(bp); bp = NULL; break; } /* * We should only get non-zero b_resid when an I/O error * has occurred, which should cause us to break above. * However, if the short read did not cause an error, * then we want to ensure that we do not uiomove bad * or uninitialized data. */ size -= bp->b_resid; if (size < xfersize) { if (size == 0) break; xfersize = size; } error = uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); if (error) break; bqrelse(bp); } if (bp != NULL) bqrelse(bp); if ((error == 0 || uio->uio_resid != orig_resid) && (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) ip->i_flag |= IN_ACCESS; return (error); }
/* * Last reference to an inode. If necessary, write or delete it. */ int ufs_inactive(void *v) { struct vop_inactive_args *ap = v; struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); struct fs *fs = ip->i_fs; struct proc *p = curproc; mode_t mode; int error = 0, logged = 0, truncate_error = 0; #ifdef DIAGNOSTIC extern int prtactive; if (prtactive && vp->v_usecount != 0) vprint("ufs_inactive: pushing active", vp); #endif UFS_WAPBL_JUNLOCK_ASSERT(vp->v_mount); /* * Ignore inodes related to stale file handles. */ if (ip->i_din1 == NULL || DIP(ip, mode) == 0) goto out; if (DIP(ip, nlink) <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { error = UFS_WAPBL_BEGIN(vp->v_mount); if (error) goto out; logged = 1; if (getinoquota(ip) == 0) (void)ufs_quota_free_inode(ip, NOCRED); if (DIP(ip, size) != 0 && vp->v_mount->mnt_wapbl) { /* * When journaling, only truncate one indirect block at * a time. */ uint64_t incr = MNINDIR(ip->i_ump) << fs->fs_bshift; uint64_t base = NDADDR << fs->fs_bshift; while (!error && DIP(ip, size) > base + incr) { /* * round down to next full indirect block * boundary. */ uint64_t nsize = base + ((DIP(ip, size) - base - 1) & ~(incr - 1)); error = UFS_TRUNCATE(ip, nsize, 0, NOCRED); if (error) break; UFS_WAPBL_END(vp->v_mount); error = UFS_WAPBL_BEGIN(vp->v_mount); if (error) goto out; } } if (error == 0) { truncate_error = UFS_TRUNCATE(ip, (off_t)0, 0, NOCRED); /* XXX pedro: remove me */ if (truncate_error) printf("UFS_TRUNCATE()=%d\n", truncate_error); } DIP_ASSIGN(ip, rdev, 0); mode = DIP(ip, mode); DIP_ASSIGN(ip, mode, 0); ip->i_flag |= IN_CHANGE | IN_UPDATE; /* * Setting the mode to zero needs to wait for the inode to be * written just as does a change to the link count. So, rather * than creating a new entry point to do the same thing, we * just use softdep_change_linkcnt(). Also, we can't let * softdep co-opt us to help on its worklist, as we may end up * trying to recycle vnodes and getting to this same point a * couple of times, blowing the kernel stack. However, this * could be optimized by checking if we are coming from * vrele(), vput() or vclean() (by checking for VXLOCK) and * just avoiding the co-opt to happen in the last case. */ if (DOINGSOFTDEP(vp)) softdep_change_linkcnt(ip, 1); UFS_INODE_FREE(ip, ip->i_number, mode); } if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) { if (!logged++) { int err; err = UFS_WAPBL_BEGIN(vp->v_mount); if (err) { error = err; goto out; } } UFS_UPDATE(ip, 0); } if (logged) UFS_WAPBL_END(vp->v_mount); out: VOP_UNLOCK(vp, 0); /* * If we are done with the inode, reclaim it * so that it can be reused immediately. */ if (error == 0 && truncate_error == 0 && (ip->i_din1 == NULL || DIP(ip, mode) == 0)) vrecycle(vp, p); return (truncate_error ? truncate_error : error); }
/* * Convert a component of a pathname into a pointer to a locked inode. * This is a very central and rather complicated routine. * If the file system is not maintained in a strict tree hierarchy, * this can result in a deadlock situation (see comments in code below). * * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending * on whether the name is to be looked up, created, renamed, or deleted. * When CREATE, RENAME, or DELETE is specified, information usable in * creating, renaming, or deleting a directory entry may be calculated. * If flag has LOCKPARENT or'ed into it and the target of the pathname * exists, lookup returns both the target and its parent directory locked. * When creating or renaming and LOCKPARENT is specified, the target may * not be ".". When deleting and LOCKPARENT is specified, the target may * be "."., but the caller must check to ensure it does an vrele and vput * instead of two vputs. * * Overall outline of ufs_lookup: * * search for name in directory, to found or notfound * notfound: * if creating, return locked directory, leaving info on available slots * else return error * found: * if at end of path and deleting, return information to allow delete * if at end of path and rewriting (RENAME and LOCKPARENT), lock target * inode and return info to allow rewrite * if not at end, add name to cache; if at end and neither creating * nor deleting, add name to cache * * ext2_lookup(struct vnode *a_dvp, struct vnode **a_vpp, * struct componentname *a_cnp) */ int ext2_lookup(struct vop_old_lookup_args *ap) { struct vnode *vdp; /* vnode for directory being searched */ struct inode *dp; /* inode for directory being searched */ struct buf *bp; /* a buffer of directory entries */ struct ext2_dir_entry_2 *ep; /* the current directory entry */ int entryoffsetinblock; /* offset of ep in bp's buffer */ enum {NONE, COMPACT, FOUND} slotstatus; doff_t slotoffset; /* offset of area with free space */ int slotsize; /* size of area at slotoffset */ int slotfreespace; /* amount of space free in slot */ int slotneeded; /* size of the entry we're seeking */ int numdirpasses; /* strategy for directory search */ doff_t endsearch; /* offset to end directory search */ doff_t prevoff; /* prev entry dp->i_offset */ struct vnode *pdp; /* saved dp during symlink work */ struct vnode *tdp; /* returned by VFS_VGET */ doff_t enduseful; /* pointer past last used dir slot */ u_long bmask; /* block offset mask */ int lockparent; /* 1 => lockparent flag is set */ int wantparent; /* 1 => wantparent or lockparent flag */ int namlen, error; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; struct ucred *cred = cnp->cn_cred; int flags = cnp->cn_flags; int nameiop = cnp->cn_nameiop; int DIRBLKSIZ = VTOI(ap->a_dvp)->i_e2fs->s_blocksize; bp = NULL; slotoffset = -1; *vpp = NULL; vdp = ap->a_dvp; dp = VTOI(vdp); lockparent = flags & CNP_LOCKPARENT; wantparent = flags & (CNP_LOCKPARENT|CNP_WANTPARENT); /* * We now have a segment name to search for, and a directory to search. */ /* * Suppress search for slots unless creating * file and at end of pathname, in which case * we watch for a place to put the new file in * case it doesn't already exist. */ slotstatus = FOUND; slotfreespace = slotsize = slotneeded = 0; if (nameiop == NAMEI_CREATE || nameiop == NAMEI_RENAME) { slotstatus = NONE; slotneeded = EXT2_DIR_REC_LEN(cnp->cn_namelen); /* was slotneeded = (sizeof(struct direct) - MAXNAMLEN + cnp->cn_namelen + 3) &~ 3; */ } /* * If there is cached information on a previous search of * this directory, pick up where we last left off. * We cache only lookups as these are the most common * and have the greatest payoff. Caching CREATE has little * benefit as it usually must search the entire directory * to determine that the entry does not exist. Caching the * location of the last DELETE or RENAME has not reduced * profiling time and hence has been removed in the interest * of simplicity. */ bmask = VFSTOEXT2(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; if (nameiop != NAMEI_LOOKUP || dp->i_diroff == 0 || dp->i_diroff > dp->i_size) { entryoffsetinblock = 0; dp->i_offset = 0; numdirpasses = 1; } else { dp->i_offset = dp->i_diroff; if ((entryoffsetinblock = dp->i_offset & bmask) && (error = EXT2_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp))) return (error); numdirpasses = 2; } prevoff = dp->i_offset; endsearch = roundup(dp->i_size, DIRBLKSIZ); enduseful = 0; searchloop: while (dp->i_offset < endsearch) { /* * If necessary, get the next directory block. */ if ((dp->i_offset & bmask) == 0) { if (bp != NULL) brelse(bp); if ((error = EXT2_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp)) != 0) return (error); entryoffsetinblock = 0; } /* * If still looking for a slot, and at a DIRBLKSIZE * boundary, have to start looking for free space again. */ if (slotstatus == NONE && (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) { slotoffset = -1; slotfreespace = 0; } /* * Get pointer to next entry. * Full validation checks are slow, so we only check * enough to insure forward progress through the * directory. Complete checks can be run by patching * "dirchk" to be true. */ ep = (struct ext2_dir_entry_2 *) ((char *)bp->b_data + entryoffsetinblock); if (ep->rec_len == 0 || (dirchk && ext2_dirbadentry(vdp, ep, entryoffsetinblock))) { int i; ext2_dirbad(dp, dp->i_offset, "mangled entry"); i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); dp->i_offset += i; entryoffsetinblock += i; continue; } /* * If an appropriate sized slot has not yet been found, * check to see if one is available. Also accumulate space * in the current block so that we can determine if * compaction is viable. */ if (slotstatus != FOUND) { int size = ep->rec_len; if (ep->inode != 0) size -= EXT2_DIR_REC_LEN(ep->name_len); if (size > 0) { if (size >= slotneeded) { slotstatus = FOUND; slotoffset = dp->i_offset; slotsize = ep->rec_len; } else if (slotstatus == NONE) { slotfreespace += size; if (slotoffset == -1) slotoffset = dp->i_offset; if (slotfreespace >= slotneeded) { slotstatus = COMPACT; slotsize = dp->i_offset + ep->rec_len - slotoffset; } } } } /* * Check for a name match. */ if (ep->inode) { namlen = ep->name_len; if (namlen == cnp->cn_namelen && !bcmp(cnp->cn_nameptr, ep->name, (unsigned)namlen)) { /* * Save directory entry's inode number and * reclen in ndp->ni_ufs area, and release * directory buffer. */ dp->i_ino = ep->inode; dp->i_reclen = ep->rec_len; goto found; } } prevoff = dp->i_offset; dp->i_offset += ep->rec_len; entryoffsetinblock += ep->rec_len; if (ep->inode) enduseful = dp->i_offset; } /* notfound: */ /* * If we started in the middle of the directory and failed * to find our target, we must check the beginning as well. */ if (numdirpasses == 2) { numdirpasses--; dp->i_offset = 0; endsearch = dp->i_diroff; goto searchloop; } if (bp != NULL) brelse(bp); /* * If creating, and at end of pathname and current * directory has not been removed, then can consider * allowing file to be created. */ if ((nameiop == NAMEI_CREATE || nameiop == NAMEI_RENAME) && dp->i_nlink != 0) { /* * Access for write is interpreted as allowing * creation of files in the directory. */ if ((error = VOP_EACCESS(vdp, VWRITE, cred)) != 0) return (error); /* * Return an indication of where the new directory * entry should be put. If we didn't find a slot, * then set dp->i_count to 0 indicating * that the new slot belongs at the end of the * directory. If we found a slot, then the new entry * can be put in the range from dp->i_offset to * dp->i_offset + dp->i_count. */ if (slotstatus == NONE) { dp->i_offset = roundup(dp->i_size, DIRBLKSIZ); dp->i_count = 0; enduseful = dp->i_offset; } else { dp->i_offset = slotoffset; dp->i_count = slotsize; if (enduseful < slotoffset + slotsize) enduseful = slotoffset + slotsize; } dp->i_endoff = roundup(enduseful, DIRBLKSIZ); dp->i_flag |= IN_CHANGE | IN_UPDATE; /* * We return with the directory locked, so that * the parameters we set up above will still be * valid if we actually decide to do a direnter(). * We return ni_vp == NULL to indicate that the entry * does not currently exist; we leave a pointer to * the (locked) directory inode in ndp->ni_dvp. * The pathname buffer is saved so that the name * can be obtained later. * * NB - if the directory is unlocked, then this * information cannot be used. */ if (!lockparent) vn_unlock(vdp); return (EJUSTRETURN); } return (ENOENT); found: /* * Check that directory length properly reflects presence * of this entry. */ if (entryoffsetinblock + EXT2_DIR_REC_LEN(ep->name_len) > dp->i_size) { ext2_dirbad(dp, dp->i_offset, "i_size too small"); dp->i_size = entryoffsetinblock+EXT2_DIR_REC_LEN(ep->name_len); dp->i_flag |= IN_CHANGE | IN_UPDATE; } brelse(bp); /* * Found component in pathname. * If the final component of path name, save information * in the cache as to where the entry was found. */ if (nameiop == NAMEI_LOOKUP) dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1); /* * If deleting, and at end of pathname, return * parameters which can be used to remove file. * If the wantparent flag isn't set, we return only * the directory (in ndp->ni_dvp), otherwise we go * on and lock the inode, being careful with ".". */ if (nameiop == NAMEI_DELETE) { /* * Write access to directory required to delete files. */ if ((error = VOP_EACCESS(vdp, VWRITE, cred)) != 0) return (error); /* * Return pointer to current entry in dp->i_offset, * and distance past previous entry (if there * is a previous entry in this block) in dp->i_count. * Save directory inode pointer in ndp->ni_dvp for dirremove(). */ if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) dp->i_count = 0; else dp->i_count = dp->i_offset - prevoff; if (dp->i_number == dp->i_ino) { vref(vdp); *vpp = vdp; return (0); } if ((error = VFS_VGET(vdp->v_mount, NULL, dp->i_ino, &tdp)) != 0) return (error); /* * If directory is "sticky", then user must own * the directory, or the file in it, else she * may not delete it (unless she's root). This * implements append-only directories. */ if ((dp->i_mode & ISVTX) && cred->cr_uid != 0 && cred->cr_uid != dp->i_uid && VTOI(tdp)->i_uid != cred->cr_uid) { vput(tdp); return (EPERM); } *vpp = tdp; if (!lockparent) vn_unlock(vdp); return (0); } /* * If rewriting (RENAME), return the inode and the * information required to rewrite the present directory * Must get inode of directory entry to verify it's a * regular file, or empty directory. */ if (nameiop == NAMEI_RENAME && wantparent) { if ((error = VOP_EACCESS(vdp, VWRITE, cred)) != 0) return (error); /* * Careful about locking second inode. * This can only occur if the target is ".". */ if (dp->i_number == dp->i_ino) return (EISDIR); if ((error = VFS_VGET(vdp->v_mount, NULL, dp->i_ino, &tdp)) != 0) return (error); *vpp = tdp; if (!lockparent) vn_unlock(vdp); return (0); } /* * Step through the translation in the name. We do not `vput' the * directory because we may need it again if a symbolic link * is relative to the current directory. Instead we save it * unlocked as "pdp". We must get the target inode before unlocking * the directory to insure that the inode will not be removed * before we get it. We prevent deadlock by always fetching * inodes from the root, moving down the directory tree. Thus * when following backward pointers ".." we must unlock the * parent directory before getting the requested directory. * There is a potential race condition here if both the current * and parent directories are removed before the VFS_VGET for the * inode associated with ".." returns. We hope that this occurs * infrequently since we cannot avoid this race condition without * implementing a sophisticated deadlock detection algorithm. * Note also that this simple deadlock detection scheme will not * work if the file system has any hard links other than ".." * that point backwards in the directory structure. */ pdp = vdp; if (flags & CNP_ISDOTDOT) { vn_unlock(pdp); /* race to get the inode */ if ((error = VFS_VGET(vdp->v_mount, NULL, dp->i_ino, &tdp)) != 0) { vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY); return (error); } if (lockparent && (error = vn_lock(pdp, LK_EXCLUSIVE))) { vput(tdp); return (error); } *vpp = tdp; } else if (dp->i_number == dp->i_ino) { vref(vdp); /* we want ourself, ie "." */ *vpp = vdp; } else { if ((error = VFS_VGET(vdp->v_mount, NULL, dp->i_ino, &tdp)) != 0) return (error); if (!lockparent) vn_unlock(pdp); *vpp = tdp; } return (0); }
int ffs_fsync(void *v) { struct vop_fsync_args /* { struct vnode *a_vp; kauth_cred_t a_cred; int a_flags; off_t a_offlo; off_t a_offhi; struct lwp *a_l; } */ *ap = v; struct buf *bp; int num, error, i; struct indir ia[NIADDR + 1]; int bsize; daddr_t blk_high; struct vnode *vp; struct mount *mp; vp = ap->a_vp; mp = vp->v_mount; fstrans_start(mp, FSTRANS_LAZY); if ((ap->a_offlo == 0 && ap->a_offhi == 0) || (vp->v_type != VREG)) { error = ffs_full_fsync(vp, ap->a_flags); goto out; } bsize = mp->mnt_stat.f_iosize; blk_high = ap->a_offhi / bsize; if (ap->a_offhi % bsize != 0) blk_high++; /* * First, flush all pages in range. */ mutex_enter(vp->v_interlock); error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo), round_page(ap->a_offhi), PGO_CLEANIT | ((ap->a_flags & FSYNC_WAIT) ? PGO_SYNCIO : 0)); if (error) { goto out; } #ifdef WAPBL KASSERT(vp->v_type == VREG); if (mp->mnt_wapbl) { /* * Don't bother writing out metadata if the syncer is * making the request. We will let the sync vnode * write it out in a single burst through a call to * VFS_SYNC(). */ if ((ap->a_flags & (FSYNC_DATAONLY | FSYNC_LAZY)) != 0) { fstrans_done(mp); return 0; } error = 0; if (vp->v_tag == VT_UFS && VTOI(vp)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY | IN_MODIFIED | IN_ACCESSED)) { error = UFS_WAPBL_BEGIN(mp); if (error) { fstrans_done(mp); return error; } error = ffs_update(vp, NULL, NULL, UPDATE_CLOSE | ((ap->a_flags & FSYNC_WAIT) ? UPDATE_WAIT : 0)); UFS_WAPBL_END(mp); } if (error || (ap->a_flags & FSYNC_NOLOG) != 0) { fstrans_done(mp); return error; } error = wapbl_flush(mp->mnt_wapbl, 0); fstrans_done(mp); return error; } #endif /* WAPBL */ /* * Then, flush indirect blocks. */ if (blk_high >= NDADDR) { error = ufs_getlbns(vp, blk_high, ia, &num); if (error) goto out; mutex_enter(&bufcache_lock); for (i = 0; i < num; i++) { if ((bp = incore(vp, ia[i].in_lbn)) == NULL) continue; if ((bp->b_cflags & BC_BUSY) != 0 || (bp->b_oflags & BO_DELWRI) == 0) continue; bp->b_cflags |= BC_BUSY | BC_VFLUSH; mutex_exit(&bufcache_lock); bawrite(bp); mutex_enter(&bufcache_lock); } mutex_exit(&bufcache_lock); } if (ap->a_flags & FSYNC_WAIT) { mutex_enter(vp->v_interlock); while (vp->v_numoutput > 0) cv_wait(&vp->v_cv, vp->v_interlock); mutex_exit(vp->v_interlock); } error = ffs_update(vp, NULL, NULL, UPDATE_CLOSE | (((ap->a_flags & (FSYNC_WAIT | FSYNC_DATAONLY)) == FSYNC_WAIT) ? UPDATE_WAIT : 0)); if (error == 0 && ap->a_flags & FSYNC_CACHE) { int l = 0; VOP_IOCTL(VTOI(vp)->i_devvp, DIOCCACHESYNC, &l, FWRITE, curlwp->l_cred); } out: fstrans_done(mp); return error; }
/* * Disable logging */ int lqfs_disable(vnode_t *vp, struct fiolog *flp) { int error = 0; inode_t *ip = VTOI(vp); qfsvfs_t *qfsvfsp = ip->i_qfsvfs; fs_lqfs_common_t *fs = VFS_FS_PTR(qfsvfsp); #ifdef LUFS struct lockfs lf; struct ulockfs *ulp; #else /* QFS doesn't really support LOCKFS. */ #endif /* LUFS */ flp->error = FIOLOG_ENONE; /* * Logging is already disabled; done */ if (LQFS_GET_LOGBNO(fs) == 0 || LQFS_GET_LOGP(qfsvfsp) == NULL || !LQFS_CAPABLE(qfsvfsp)) { vfs_setmntopt(qfsvfsp->vfs_vfs, MNTOPT_NOLOGGING, NULL, 0); error = 0; goto out; } #ifdef LUFS /* * File system must be write locked to disable logging */ error = qfs_fiolfss(vp, &lf); if (error) { goto out; } if (!LOCKFS_IS_ULOCK(&lf)) { flp->error = FIOLOG_EULOCK; error = 0; goto out; } lf.lf_lock = LOCKFS_WLOCK; lf.lf_flags = 0; lf.lf_comment = NULL; error = qfs_fiolfs(vp, &lf, 1); if (error) { flp->error = FIOLOG_EWLOCK; error = 0; goto out; } #else /* QFS doesn't really support LOCKFS. */ #endif /* LUFS */ if (LQFS_GET_LOGP(qfsvfsp) == NULL || LQFS_GET_LOGBNO(fs) == 0) { goto errout; } /* * WE ARE COMMITTED TO DISABLING LOGGING PAST THIS POINT */ /* * Disable logging: * Suspend the reclaim thread and force the delete thread to exit. * When a nologging mount has completed there may still be * work for reclaim to do so just suspend this thread until * it's [deadlock-] safe for it to continue. The delete * thread won't be needed as qfs_iinactive() calls * qfs_delete() when logging is disabled. * Freeze and drain reader ops. * Commit any outstanding reader transactions (lqfs_flush). * Set the ``unmounted'' bit in the qfstrans struct. * If debug, remove metadata from matamap. * Disable matamap processing. * NULL the trans ops table. * Free all of the incore structs related to logging. * Allow reader ops. */ #ifdef LUFS qfs_thread_suspend(&qfsvfsp->vfs_reclaim); qfs_thread_exit(&qfsvfsp->vfs_delete); #else /* QFS doesn't have file reclaim nor i-node delete threads. */ #endif /* LUFS */ vfs_lock_wait(qfsvfsp->vfs_vfs); #ifdef LQFS_TODO_LOCKFS ulp = &qfsvfsp->vfs_ulockfs; mutex_enter(&ulp->ul_lock); (void) qfs_quiesce(ulp); #else /* QFS doesn't really support LOCKFS. */ #endif /* LQFS_TODO_LOCKFS */ #ifdef LQFS_TODO (void) qfs_flush(qfsvfsp->vfs_vfs); #else (void) lqfs_flush(qfsvfsp); if (LQFS_GET_LOGP(qfsvfsp)) { logmap_start_roll(LQFS_GET_LOGP(qfsvfsp)); } #endif /* LQFS_TODO */ TRANS_MATA_UMOUNT(qfsvfsp); LQFS_SET_DOMATAMAP(qfsvfsp, 0); /* * Free all of the incore structs * Aquire the ufs_scan_lock before de-linking the mtm data * structure so that we keep ufs_sync() and ufs_update() away * when they execute the ufs_scan_inodes() run while we're in * progress of enabling/disabling logging. */ mutex_enter(&qfs_scan_lock); (void) lqfs_unsnarf(qfsvfsp); mutex_exit(&qfs_scan_lock); #ifdef LQFS_TODO_LOCKFS atomic_add_long(&ufs_quiesce_pend, -1); mutex_exit(&ulp->ul_lock); #else /* QFS doesn't do this yet. */ #endif /* LQFS_TODO_LOCKFS */ vfs_setmntopt(qfsvfsp->vfs_vfs, MNTOPT_NOLOGGING, NULL, 0); vfs_unlock(qfsvfsp->vfs_vfs); LQFS_SET_FS_ROLLED(fs, FS_ALL_ROLLED); LQFS_SET_NOLOG_SI(qfsvfsp, 0); /* * Free the log space and mark the superblock as FSACTIVE */ (void) lqfs_free(qfsvfsp); #ifdef LUFS /* * Allow the reclaim thread to continue. */ qfs_thread_continue(&qfsvfsp->vfs_reclaim); #else /* QFS doesn't have a file reclaim thread. */ #endif /* LUFS */ #ifdef LQFS_TODO_LOCKFS /* * Unlock the file system */ lf.lf_lock = LOCKFS_ULOCK; lf.lf_flags = 0; error = qfs_fiolfs(vp, &lf, 1); if (error) { flp->error = FIOLOG_ENOULOCK; } #else /* QFS doesn't really support LOCKFS. */ #endif /* LQFS_LOCKFS */ error = 0; goto out; errout: #ifdef LQFS_LOCKFS lf.lf_lock = LOCKFS_ULOCK; lf.lf_flags = 0; (void) qfs_fiolfs(vp, &lf, 1); #else /* QFS doesn't really support LOCKFS. */ #endif /* LQFS_LOCKFS */ out: mutex_enter(&ip->mp->ms.m_waitwr_mutex); ip->mp->mt.fi_status |= FS_LOGSTATE_KNOWN; mutex_exit(&ip->mp->ms.m_waitwr_mutex); return (error); }
/* * Enable logging */ int lqfs_enable(struct vnode *vp, struct fiolog *flp, cred_t *cr) { int error; inode_t *ip = VTOI(vp); qfsvfs_t *qfsvfsp = ip->i_qfsvfs; fs_lqfs_common_t *fs = VFS_FS_PTR(qfsvfsp); ml_unit_t *ul; #ifdef LQFS_TODO_LOCKFS int reclaim = 0; struct lockfs lf; struct ulockfs *ulp; #else /* QFS doesn't really support LOCKFS. */ #endif /* LQFS_TODO_LOCKFS */ vfs_t *vfsp = qfsvfsp->vfs_vfs; uint64_t tmp_nbytes_actual; char fsclean; sam_sblk_t *sblk = qfsvfsp->mi.m_sbp; /* * File system is not capable of logging. */ if (!LQFS_CAPABLE(qfsvfsp)) { flp->error = FIOLOG_ENOTSUP; error = 0; goto out; } if (!SAM_MAGIC_V2A_OR_HIGHER(&sblk->info.sb)) { cmn_err(CE_WARN, "SAM-QFS: %s: Not enabling logging, " " file system is not version 2A.", qfsvfsp->mt.fi_name); cmn_err(CE_WARN, "\tUpgrade file system with samfsck -u " "first."); flp->error = FIOLOG_ENOTSUP; error = 0; goto out; } if (LQFS_GET_LOGBNO(fs)) { error = lqfs_log_validate(qfsvfsp, flp, cr); } /* * Check if logging is already enabled */ if (LQFS_GET_LOGP(qfsvfsp)) { flp->error = FIOLOG_ETRANS; /* for root ensure logging option is set */ vfs_setmntopt(vfsp, MNTOPT_LOGGING, NULL, 0); error = 0; goto out; } /* * Come back here to recheck if we had to disable the log. */ recheck: error = 0; flp->error = FIOLOG_ENONE; /* * Adjust requested log size */ flp->nbytes_actual = flp->nbytes_requested; if (flp->nbytes_actual == 0) { tmp_nbytes_actual = (((uint64_t)FS_SIZE(fs)) / ldl_divisor) << FS_FSHIFT(fs); flp->nbytes_actual = (uint_t)MIN(tmp_nbytes_actual, INT_MAX); } flp->nbytes_actual = MAX(flp->nbytes_actual, ldl_minlogsize); flp->nbytes_actual = MIN(flp->nbytes_actual, ldl_maxlogsize); flp->nbytes_actual = blkroundup(fs, flp->nbytes_actual); /* * logging is enabled and the log is the right size; done */ ul = LQFS_GET_LOGP(qfsvfsp); if (ul && LQFS_GET_LOGBNO(fs) && (flp->nbytes_actual == ul->un_requestsize)) { vfs_setmntopt(vfsp, MNTOPT_LOGGING, NULL, 0); error = 0; goto out; } /* * Readonly file system */ if (FS_RDONLY(fs)) { flp->error = FIOLOG_EROFS; error = 0; goto out; } #ifdef LQFS_TODO_LOCKFS /* * File system must be write locked to enable logging */ error = qfs_fiolfss(vp, &lf); if (error) { goto out; } if (!LOCKFS_IS_ULOCK(&lf)) { flp->error = FIOLOG_EULOCK; error = 0; goto out; } lf.lf_lock = LOCKFS_WLOCK; lf.lf_flags = 0; lf.lf_comment = NULL; error = qfs_fiolfs(vp, &lf, 1); if (error) { flp->error = FIOLOG_EWLOCK; error = 0; goto out; } #else /* QFS doesn't really support lockfs. */ #endif /* LQFS_TODO_LOCKFS */ /* * Grab appropriate locks to synchronize with the rest * of the system */ vfs_lock_wait(vfsp); #ifdef LQFS_TODO_LOCKFS ulp = &ufsvfsp->vfs_ulockfs; mutex_enter(&ulp->ul_lock); #else /* QFS doesn't really support lockfs. */ #endif /* LQFS_TODO_LOCKFS */ /* * File system must be fairly consistent to enable logging */ fsclean = LQFS_GET_FS_CLEAN(fs); if (fsclean != FSLOG && fsclean != FSACTIVE && fsclean != FSSTABLE && fsclean != FSCLEAN) { flp->error = FIOLOG_ECLEAN; goto unlockout; } #ifdef LUFS /* * A write-locked file system is only active if there are * open deleted files; so remember to set FS_RECLAIM later. */ if (LQFS_GET_FS_CLEAN(fs) == FSACTIVE) { reclaim = FS_RECLAIM; } #else /* QFS doesn't have a reclaim file thread. */ #endif /* LUFS */ /* * Logging is already enabled; must be changing the log's size */ if (LQFS_GET_LOGBNO(fs) && LQFS_GET_LOGP(qfsvfsp)) { #ifdef LQFS_TODO_LOCKFS /* * Before we can disable logging, we must give up our * lock. As a consequence of unlocking and disabling the * log, the fs structure may change. Because of this, when * disabling is complete, we will go back to recheck to * repeat all of the checks that we performed to get to * this point. Disabling sets fs->fs_logbno to 0, so this * will not put us into an infinite loop. */ mutex_exit(&ulp->ul_lock); #else /* QFS doesn't really support lockfs. */ #endif /* LQFS_TODO_LOCKFS */ vfs_unlock(vfsp); #ifdef LQFS_TODO_LOCKFS lf.lf_lock = LOCKFS_ULOCK; lf.lf_flags = 0; error = qfs_fiolfs(vp, &lf, 1); if (error) { flp->error = FIOLOG_ENOULOCK; error = 0; goto out; } #else /* QFS doesn't really support lockfs. */ #endif /* LQFS_TODO_LOCKFS */ error = lqfs_disable(vp, flp); if (error || (flp->error != FIOLOG_ENONE)) { error = 0; goto out; } goto recheck; } error = lqfs_alloc(qfsvfsp, flp, cr); if (error) { goto errout; } #ifdef LUFS #else if ((error = lqfs_log_validate(qfsvfsp, flp, cr)) != 0) { goto errout; } #endif /* LUFS */ /* * Create all of the incore structs */ error = lqfs_snarf(qfsvfsp, fs, 0); if (error) { goto errout; } /* * DON'T ``GOTO ERROUT'' PAST THIS POINT */ /* * Pretend we were just mounted with logging enabled * freeze and drain the file system of readers * Get the ops vector * If debug, record metadata locations with log subsystem * Start the delete thread * Start the reclaim thread, if necessary * Thaw readers */ vfs_setmntopt(vfsp, MNTOPT_LOGGING, NULL, 0); TRANS_DOMATAMAP(qfsvfsp); TRANS_MATA_MOUNT(qfsvfsp); TRANS_MATA_SI(qfsvfsp, fs); #ifdef LUFS qfs_thread_start(&qfsvfsp->vfs_delete, qfs_thread_delete, vfsp); if (fs->fs_reclaim & (FS_RECLAIM|FS_RECLAIMING)) { fs->fs_reclaim &= ~FS_RECLAIM; fs->fs_reclaim |= FS_RECLAIMING; qfs_thread_start(&qfsvfsp->vfs_reclaim, qfs_thread_reclaim, vfsp); } else { fs->fs_reclaim |= reclaim; } #else /* QFS doesn't have file reclaim nor i-node delete threads. */ #endif /* LUFS */ #ifdef LUFS mutex_exit(&ulp->ul_lock); #else /* QFS doesn't really support LOCKFS. */ #endif /* LUFS */ vfs_unlock(vfsp); #ifdef LQFS_TODO_LOCKFS /* * Unlock the file system */ lf.lf_lock = LOCKFS_ULOCK; lf.lf_flags = 0; error = qfs_fiolfs(vp, &lf, 1); if (error) { flp->error = FIOLOG_ENOULOCK; error = 0; goto out; } #else /* QFS doesn't really support LOCKFS. */ #endif /* LQFS_TODO_LOCKFS */ /* * There's nothing in the log yet (we've just allocated it) * so directly write out the super block. * Note, we have to force this sb out to disk * (not just to the log) so that if we crash we know we are logging */ VFS_LOCK_MUTEX_ENTER(qfsvfsp); LQFS_SET_FS_CLEAN(fs, FSLOG); LQFS_SET_FS_ROLLED(fs, FS_NEED_ROLL); /* Mark the fs as unrolled */ #ifdef LUFS QFS_BWRITE2(NULL, qfsvfsp->vfs_bufp); #else sam_update_sblk(qfsvfsp, 0, 0, TRUE); #endif /* LUFS */ VFS_LOCK_MUTEX_EXIT(qfsvfsp); error = 0; goto out; errout: /* * Aquire the qfs_scan_lock before de-linking the mtm data * structure so that we keep qfs_sync() and qfs_update() away * when they execute the ufs_scan_inodes() run while we're in * progress of enabling/disabling logging. */ mutex_enter(&qfs_scan_lock); (void) lqfs_unsnarf(qfsvfsp); mutex_exit(&qfs_scan_lock); (void) lqfs_free(qfsvfsp); unlockout: #ifdef LQFS_TODO_LOCKFS mutex_exit(&ulp->ul_lock); #else /* QFS doesn't really support LOCKFS. */ #endif /* LQFS_TODO_LOCKFS */ vfs_unlock(vfsp); #ifdef LQFS_TODO_LOCKFS lf.lf_lock = LOCKFS_ULOCK; lf.lf_flags = 0; (void) qfs_fiolfs(vp, &lf, 1); #else /* QFS doesn't really support LOCKFS. */ #endif /* LQFS_TODO_LOCKFS */ out: mutex_enter(&ip->mp->ms.m_waitwr_mutex); ip->mp->mt.fi_status |= FS_LOGSTATE_KNOWN; mutex_exit(&ip->mp->ms.m_waitwr_mutex); return (error); }
/* * Real work associated with removing an extended attribute from a vnode. * Assumes the attribute lock has already been grabbed. */ static int ufs_extattr_rm(struct vnode *vp, int attrnamespace, const char *name, struct ucred *cred, struct proc *p) { struct ufs_extattr_list_entry *attribute; struct ufs_extattr_header ueh; struct iovec local_aiov; struct uio local_aio; struct mount *mp = vp->v_mount; struct ufsmount *ump = VFSTOUFS(mp); struct inode *ip = VTOI(vp); off_t base_offset; int error = 0, ioflag; if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) return (EOPNOTSUPP); if (!ufs_extattr_valid_attrname(attrnamespace, name)) return (EINVAL); attribute = ufs_extattr_find_attr(ump, attrnamespace, name); if (!attribute) /* XXX: ENOENT here will eventually be ENOATTR. */ return (ENOENT); if ((error = ufs_extattr_credcheck(vp, attribute, cred, p, IWRITE))) return (error); /* * Find base offset of header in file based on file header size, and * data header size + maximum data size, indexed by inode number. */ base_offset = sizeof(struct ufs_extattr_fileheader) + ip->i_number * (sizeof(struct ufs_extattr_header) + attribute->uele_fileheader.uef_size); /* * Check to see if currently defined. */ bzero(&ueh, sizeof(struct ufs_extattr_header)); local_aiov.iov_base = (caddr_t) &ueh; local_aiov.iov_len = sizeof(struct ufs_extattr_header); local_aio.uio_iov = &local_aiov; local_aio.uio_iovcnt = 1; local_aio.uio_rw = UIO_READ; local_aio.uio_segflg = UIO_SYSSPACE; local_aio.uio_procp = p; local_aio.uio_offset = base_offset; local_aio.uio_resid = sizeof(struct ufs_extattr_header); VOP_LEASE(attribute->uele_backing_vnode, p, cred, LEASE_WRITE); /* * Don't need to get the lock on the backing vnode if the vnode we're * modifying is it, as we already hold the lock. */ if (attribute->uele_backing_vnode != vp) vn_lock(attribute->uele_backing_vnode, LK_EXCLUSIVE | LK_NOPAUSE | LK_RETRY, p); error = VOP_READ(attribute->uele_backing_vnode, &local_aio, IO_NODELOCKED, ump->um_extattr.uepm_ucred); if (error) goto vopunlock_exit; /* Defined? */ if ((ueh.ueh_flags & UFS_EXTATTR_ATTR_FLAG_INUSE) == 0) { /* XXX: ENOENT here will eventually be ENOATTR. */ error = ENOENT; goto vopunlock_exit; } /* Valid for the current inode generation? */ if (ueh.ueh_i_gen != ip->i_ffs_gen) { /* * The inode itself has a different generation number than * the attribute data. For now, the best solution is to * coerce this to undefined, and let it get cleaned up by * the next write or extattrctl clean. */ printf("ufs_extattr_rm: inode number inconsistency (%d, %d)\n", ueh.ueh_i_gen, ip->i_ffs_gen); /* XXX: ENOENT here will eventually be ENOATTR. */ error = ENOENT; goto vopunlock_exit; } /* Flag it as not in use. */ ueh.ueh_flags = 0; ueh.ueh_len = 0; local_aiov.iov_base = (caddr_t) &ueh; local_aiov.iov_len = sizeof(struct ufs_extattr_header); local_aio.uio_iov = &local_aiov; local_aio.uio_iovcnt = 1; local_aio.uio_rw = UIO_WRITE; local_aio.uio_segflg = UIO_SYSSPACE; local_aio.uio_procp = p; local_aio.uio_offset = base_offset; local_aio.uio_resid = sizeof(struct ufs_extattr_header); ioflag = IO_NODELOCKED; if (ufs_extattr_sync) ioflag |= IO_SYNC; error = VOP_WRITE(attribute->uele_backing_vnode, &local_aio, ioflag, ump->um_extattr.uepm_ucred); if (error) goto vopunlock_exit; if (local_aio.uio_resid != 0) error = ENXIO; vopunlock_exit: VOP_UNLOCK(attribute->uele_backing_vnode, 0, p); return (error); }
/* * Real work associated with retrieving a named attribute--assumes that * the attribute lock has already been grabbed. */ static int ufs_extattr_get(struct vnode *vp, int attrnamespace, const char *name, struct uio *uio, size_t *size, struct ucred *cred, struct proc *p) { struct ufs_extattr_list_entry *attribute; struct ufs_extattr_header ueh; struct iovec local_aiov; struct uio local_aio; struct mount *mp = vp->v_mount; struct ufsmount *ump = VFSTOUFS(mp); struct inode *ip = VTOI(vp); off_t base_offset; size_t len, old_len; int error = 0; if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) return (EOPNOTSUPP); if (strlen(name) == 0) { /* XXX retrieve attribute lists. */ /* XXX should probably be checking for name == NULL? */ return (EINVAL); } attribute = ufs_extattr_find_attr(ump, attrnamespace, name); if (!attribute) /* XXX: ENOENT here will eventually be ENOATTR. */ return (ENOENT); if ((error = ufs_extattr_credcheck(vp, attribute, cred, p, IREAD))) return (error); /* * Allow only offsets of zero to encourage the read/replace * extended attribute semantic. Otherwise we can't guarantee * atomicity, as we don't provide locks for extended attributes. */ if (uio != NULL && uio->uio_offset != 0) return (ENXIO); /* * Find base offset of header in file based on file header size, and * data header size + maximum data size, indexed by inode number. */ base_offset = sizeof(struct ufs_extattr_fileheader) + ip->i_number * (sizeof(struct ufs_extattr_header) + attribute->uele_fileheader.uef_size); /* * Read in the data header to see if the data is defined, and if so * how much. */ bzero(&ueh, sizeof(struct ufs_extattr_header)); local_aiov.iov_base = (caddr_t) &ueh; local_aiov.iov_len = sizeof(struct ufs_extattr_header); local_aio.uio_iov = &local_aiov; local_aio.uio_iovcnt = 1; local_aio.uio_rw = UIO_READ; local_aio.uio_segflg = UIO_SYSSPACE; local_aio.uio_procp = p; local_aio.uio_offset = base_offset; local_aio.uio_resid = sizeof(struct ufs_extattr_header); /* * Acquire locks. */ VOP_LEASE(attribute->uele_backing_vnode, p, cred, LEASE_READ); /* * Don't need to get a lock on the backing file if the getattr is * being applied to the backing file, as the lock is already held. */ if (attribute->uele_backing_vnode != vp) vn_lock(attribute->uele_backing_vnode, LK_SHARED | LK_NOPAUSE | LK_RETRY, p); error = VOP_READ(attribute->uele_backing_vnode, &local_aio, IO_NODELOCKED, ump->um_extattr.uepm_ucred); if (error) goto vopunlock_exit; /* Defined? */ if ((ueh.ueh_flags & UFS_EXTATTR_ATTR_FLAG_INUSE) == 0) { /* XXX: ENOENT here will eventually be ENOATTR. */ error = ENOENT; goto vopunlock_exit; } /* Valid for the current inode generation? */ if (ueh.ueh_i_gen != ip->i_ffs_gen) { /* * The inode itself has a different generation number * than the attribute data. For now, the best solution * is to coerce this to undefined, and let it get cleaned * up by the next write or extattrctl clean. */ printf("ufs_extattr_get: inode number inconsistency (%d, %d)\n", ueh.ueh_i_gen, ip->i_ffs_gen); /* XXX: ENOENT here will eventually be ENOATTR. */ error = ENOENT; goto vopunlock_exit; } /* Local size consistency check. */ if (ueh.ueh_len > attribute->uele_fileheader.uef_size) { error = ENXIO; goto vopunlock_exit; } /* Return full data size if caller requested it. */ if (size != NULL) *size = ueh.ueh_len; /* Return data if the caller requested it. */ if (uio != NULL) { /* Allow for offset into the attribute data. */ uio->uio_offset = base_offset + sizeof(struct ufs_extattr_header); /* * Figure out maximum to transfer -- use buffer size and * local data limit. */ len = MIN(uio->uio_resid, ueh.ueh_len); old_len = uio->uio_resid; uio->uio_resid = len; error = VOP_READ(attribute->uele_backing_vnode, uio, IO_NODELOCKED, ump->um_extattr.uepm_ucred); if (error) goto vopunlock_exit; uio->uio_resid = old_len - (len - uio->uio_resid); } vopunlock_exit: if (uio != NULL) uio->uio_offset = 0; if (attribute->uele_backing_vnode != vp) VOP_UNLOCK(attribute->uele_backing_vnode, 0, p); return (error); }
/* * Real work associated with setting a vnode's extended attributes; * assumes that the attribute lock has already been grabbed. */ static int ufs_extattr_set(struct vnode *vp, int attrnamespace, const char *name, struct uio *uio, struct ucred *cred, struct proc *p) { struct ufs_extattr_list_entry *attribute; struct ufs_extattr_header ueh; struct iovec local_aiov; struct uio local_aio; struct mount *mp = vp->v_mount; struct ufsmount *ump = VFSTOUFS(mp); struct inode *ip = VTOI(vp); off_t base_offset; int error = 0, ioflag; if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) return (EOPNOTSUPP); if (!ufs_extattr_valid_attrname(attrnamespace, name)) return (EINVAL); attribute = ufs_extattr_find_attr(ump, attrnamespace, name); if (!attribute) /* XXX: ENOENT here will eventually be ENOATTR. */ return (ENOENT); if ((error = ufs_extattr_credcheck(vp, attribute, cred, p, IWRITE))) return (error); /* * Early rejection of invalid offsets/length. * Reject: any offset but 0 (replace) * Any size greater than attribute size limit */ if (uio->uio_offset != 0 || uio->uio_resid > attribute->uele_fileheader.uef_size) return (ENXIO); /* * Find base offset of header in file based on file header size, and * data header size + maximum data size, indexed by inode number. */ base_offset = sizeof(struct ufs_extattr_fileheader) + ip->i_number * (sizeof(struct ufs_extattr_header) + attribute->uele_fileheader.uef_size); /* * Write out a data header for the data. */ ueh.ueh_len = uio->uio_resid; ueh.ueh_flags = UFS_EXTATTR_ATTR_FLAG_INUSE; ueh.ueh_i_gen = ip->i_ffs_gen; local_aiov.iov_base = (caddr_t) &ueh; local_aiov.iov_len = sizeof(struct ufs_extattr_header); local_aio.uio_iov = &local_aiov; local_aio.uio_iovcnt = 1; local_aio.uio_rw = UIO_WRITE; local_aio.uio_segflg = UIO_SYSSPACE; local_aio.uio_procp = p; local_aio.uio_offset = base_offset; local_aio.uio_resid = sizeof(struct ufs_extattr_header); /* * Acquire locks. */ VOP_LEASE(attribute->uele_backing_vnode, p, cred, LEASE_WRITE); /* * Don't need to get a lock on the backing file if the setattr is * being applied to the backing file, as the lock is already held. */ if (attribute->uele_backing_vnode != vp) vn_lock(attribute->uele_backing_vnode, LK_EXCLUSIVE | LK_NOPAUSE | LK_RETRY, p); ioflag = IO_NODELOCKED; if (ufs_extattr_sync) ioflag |= IO_SYNC; error = VOP_WRITE(attribute->uele_backing_vnode, &local_aio, ioflag, ump->um_extattr.uepm_ucred); if (error) goto vopunlock_exit; if (local_aio.uio_resid != 0) { error = ENXIO; goto vopunlock_exit; } /* * Write out user data. */ uio->uio_offset = base_offset + sizeof(struct ufs_extattr_header); ioflag = IO_NODELOCKED; if (ufs_extattr_sync) ioflag |= IO_SYNC; error = VOP_WRITE(attribute->uele_backing_vnode, uio, ioflag, ump->um_extattr.uepm_ucred); vopunlock_exit: uio->uio_offset = 0; if (attribute->uele_backing_vnode != vp) VOP_UNLOCK(attribute->uele_backing_vnode, 0, p); return (error); }
/* * Vnode op for reading directories. * * This function has to convert directory entries from the on-disk * format to the format defined by <sys/dirent.h>. Unfortunately, the * conversion will blow up some entries by four bytes, so it can't be * done in place. Instead, the conversion is done entry by entry and * the converted entry is sent via uiomove. * * XXX allocate a buffer, convert as many entries as possible, then send * the whole buffer to uiomove */ int ext2_readdir(struct vop_readdir_args *ap) { struct uio *uio = ap->a_uio; int count, error; struct ext2fs_direct_2 *edp, *dp; int ncookies; struct dirent dstdp; struct uio auio; struct iovec aiov; caddr_t dirbuf; int DIRBLKSIZ = VTOI(ap->a_vp)->i_e2fs->e2fs_bsize; int readcnt; off_t startoffset = uio->uio_offset; count = uio->uio_resid; /* * Avoid complications for partial directory entries by adjusting * the i/o to end at a block boundary. Don't give up (like ufs * does) if the initial adjustment gives a negative count, since * many callers don't supply a large enough buffer. The correct * size is a little larger than DIRBLKSIZ to allow for expansion * of directory entries, but some callers just use 512. */ count -= (uio->uio_offset + count) & (DIRBLKSIZ -1); if (count <= 0) count += DIRBLKSIZ; auio = *uio; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_resid = count; auio.uio_segflg = UIO_SYSSPACE; aiov.iov_len = count; dirbuf = malloc(count, M_TEMP, M_WAITOK); aiov.iov_base = dirbuf; error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred); if (error == 0) { readcnt = count - auio.uio_resid; edp = (struct ext2fs_direct_2 *)&dirbuf[readcnt]; ncookies = 0; bzero(&dstdp, offsetof(struct dirent, d_name)); for (dp = (struct ext2fs_direct_2 *)dirbuf; !error && uio->uio_resid > 0 && dp < edp; ) { /*- * "New" ext2fs directory entries differ in 3 ways * from ufs on-disk ones: * - the name is not necessarily NUL-terminated. * - the file type field always exists and always * follows the name length field. * - the file type is encoded in a different way. * * "Old" ext2fs directory entries need no special * conversions, since they are binary compatible * with "new" entries having a file type of 0 (i.e., * EXT2_FT_UNKNOWN). Splitting the old name length * field didn't make a mess like it did in ufs, * because ext2fs uses a machine-independent disk * layout. */ dstdp.d_fileno = dp->e2d_ino; dstdp.d_type = FTTODT(dp->e2d_type); dstdp.d_namlen = dp->e2d_namlen; dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp); bcopy(dp->e2d_name, dstdp.d_name, dstdp.d_namlen); bzero(dstdp.d_name + dstdp.d_namlen, dstdp.d_reclen - offsetof(struct dirent, d_name) - dstdp.d_namlen); if (dp->e2d_reclen > 0) { if(dstdp.d_reclen <= uio->uio_resid) { /* advance dp */ dp = (struct ext2fs_direct_2 *) ((char *)dp + dp->e2d_reclen); error = uiomove(&dstdp, dstdp.d_reclen, uio); if (!error) ncookies++; } else break; } else { error = EIO; break; } } /* we need to correct uio_offset */ uio->uio_offset = startoffset + (caddr_t)dp - dirbuf; if (!error && ap->a_ncookies != NULL) { u_long *cookiep, *cookies, *ecookies; off_t off; if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) panic("ext2_readdir: unexpected uio from NFS server"); cookies = malloc(ncookies * sizeof(u_long), M_TEMP, M_WAITOK); off = startoffset; for (dp = (struct ext2fs_direct_2 *)dirbuf, cookiep = cookies, ecookies = cookies + ncookies; cookiep < ecookies; dp = (struct ext2fs_direct_2 *)((caddr_t) dp + dp->e2d_reclen)) { off += dp->e2d_reclen; *cookiep++ = (u_long) off; } *ap->a_ncookies = ncookies; *ap->a_cookies = cookies; } }
/* * Remove a directory entry after a call to namei, using * the parameters which it left in nameidata. The entry * dp->i_offset contains the offset into the directory of the * entry to be eliminated. The dp->i_count field contains the * size of the previous record in the directory. If this * is 0, the first entry is being deleted, so we need only * zero the inode number to mark the entry as free. If the * entry is not the first in the directory, we must reclaim * the space of the now empty record by adding the record size * to the size of the previous entry. */ int ufs_dirremove(struct vnode *dvp, struct inode *ip, int flags, int isrmdir) { struct inode *dp; struct direct *ep; struct buf *bp; int error; dp = VTOI(dvp); if ((error = UFS_BUFATOFF(dp, (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0) return (error); #ifdef UFS_DIRHASH /* * Remove the dirhash entry. This is complicated by the fact * that `ep' is the previous entry when dp->i_count != 0. */ if (dp->i_dirhash != NULL) ufsdirhash_remove(dp, (dp->i_count == 0) ? ep : (struct direct *)((char *)ep + ep->d_reclen), dp->i_offset); #endif if (dp->i_count == 0) { /* * First entry in block: set d_ino to zero. */ ep->d_ino = 0; } else { /* * Collapse new free space into previous entry. */ ep->d_reclen += dp->i_reclen; } #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_checkblock(dp, (char *)ep - ((dp->i_offset - dp->i_count) & (DIRBLKSIZ - 1)), dp->i_offset & ~(DIRBLKSIZ - 1)); #endif if (DOINGSOFTDEP(dvp)) { if (ip) { ip->i_effnlink--; softdep_change_linkcnt(ip, 0); softdep_setup_remove(bp, dp, ip, isrmdir); } if (softdep_slowdown(dvp)) { error = bwrite(bp); } else { bdwrite(bp); error = 0; } } else { if (ip) { ip->i_effnlink--; DIP_ADD(ip, nlink, -1); ip->i_flag |= IN_CHANGE; } if (DOINGASYNC(dvp) && dp->i_count != 0) { bdwrite(bp); error = 0; } else error = bwrite(bp); } dp->i_flag |= IN_CHANGE | IN_UPDATE; return (error); }
/* * Balloc defines the structure of filesystem storage * by allocating the physical blocks on a device given * the inode and the logical block number in a file. * This is the allocation strategy for UFS1. Below is * the allocation strategy for UFS2. */ int ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, struct ucred *cred, int flags, struct buf **bpp) { struct inode *ip; struct ufs1_dinode *dp; ufs_lbn_t lbn, lastlbn; struct fs *fs; ufs1_daddr_t nb; struct buf *bp, *nbp; struct ufsmount *ump; struct indir indirs[NIADDR + 2]; int deallocated, osize, nsize, num, i, error; ufs2_daddr_t newb; ufs1_daddr_t *bap, pref; ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; int unwindidx = -1; int saved_inbdflush; static struct timeval lastfail; static int curfail; int gbflags, reclaimed; ip = VTOI(vp); dp = ip->i_din1; fs = ip->i_fs; ump = ip->i_ump; lbn = lblkno(fs, startoffset); size = blkoff(fs, startoffset) + size; reclaimed = 0; if (size > fs->fs_bsize) panic("ffs_balloc_ufs1: blk too big"); *bpp = NULL; if (flags & IO_EXT) return (EOPNOTSUPP); if (lbn < 0) return (EFBIG); gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0; if (DOINGSOFTDEP(vp)) softdep_prealloc(vp, MNT_WAIT); /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ lastlbn = lblkno(fs, ip->i_size); if (lastlbn < NDADDR && lastlbn < lbn) { nb = lastlbn; osize = blksize(fs, ip, nb); if (osize < fs->fs_bsize && osize > 0) { UFS_LOCK(ump); error = ffs_realloccg(ip, nb, dp->di_db[nb], ffs_blkpref_ufs1(ip, lastlbn, (int)nb, &dp->di_db[0]), osize, (int)fs->fs_bsize, flags, cred, &bp); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, nb, dbtofsb(fs, bp->b_blkno), dp->di_db[nb], fs->fs_bsize, osize, bp); ip->i_size = smalllblktosize(fs, nb + 1); dp->di_size = ip->i_size; dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (flags & IO_SYNC) bwrite(bp); else bawrite(bp); } } /* * The first NDADDR blocks are direct blocks */ if (lbn < NDADDR) { if (flags & BA_METAONLY) panic("ffs_balloc_ufs1: BA_METAONLY for direct block"); nb = dp->di_db[lbn]; if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); *bpp = bp; return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { error = bread(vp, lbn, osize, NOCRED, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); } else { UFS_LOCK(ump); error = ffs_realloccg(ip, lbn, dp->di_db[lbn], ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]), osize, nsize, flags, cred, &bp); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, dbtofsb(fs, bp->b_blkno), nb, nsize, osize, bp); } } else { if (ip->i_size < smalllblktosize(fs, lbn + 1)) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; UFS_LOCK(ump); error = ffs_alloc(ip, lbn, ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]), nsize, flags, cred, &newb); if (error) return (error); bp = getblk(vp, lbn, nsize, 0, 0, gbflags); bp->b_blkno = fsbtodb(fs, newb); if (flags & BA_CLRBUF) vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, newb, 0, nsize, 0, bp); } dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE | IN_UPDATE; *bpp = bp; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) return(error); #ifdef INVARIANTS if (num < 1) panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block"); #endif saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH); /* * Fetch the first indirect block allocating if necessary. */ --num; nb = dp->di_ib[indirs[0].in_off]; allocib = NULL; allocblk = allociblk; lbns_remfree = lbns; if (nb == 0) { UFS_LOCK(ump); pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1, (ufs1_daddr_t *)0); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred, &newb)) != 0) { curthread_pflags_restore(saved_inbdflush); return (error); } pref = newb + fs->fs_frag; nb = newb; *allocblk++ = nb; *lbns_remfree++ = indirs[1].in_lbn; bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags); bp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, newb, 0, fs->fs_bsize, 0, bp); bdwrite(bp); } else { /* * Write synchronously so that indirect blocks * never point at garbage. */ if (DOINGASYNC(vp)) bdwrite(bp); else if ((error = bwrite(bp)) != 0) goto fail; } allocib = &dp->di_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ retry: for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); if (error) { brelse(bp); goto fail; } bap = (ufs1_daddr_t *)bp->b_data; nb = bap[indirs[i].in_off]; if (i == num) break; i += 1; if (nb != 0) { bqrelse(bp); continue; } UFS_LOCK(ump); /* * If parent indirect has just been allocated, try to cluster * immediately following it. */ if (pref == 0) pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1, (ufs1_daddr_t *)0); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags | IO_BUFLOCKED, cred, &newb)) != 0) { brelse(bp); if (++reclaimed == 1) { UFS_LOCK(ump); softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT); UFS_UNLOCK(ump); goto retry; } if (ppsratecheck(&lastfail, &curfail, 1)) { ffs_fserr(fs, ip->i_number, "filesystem full"); uprintf("\n%s: write failed, filesystem " "is full\n", fs->fs_fsmnt); } goto fail; } pref = newb + fs->fs_frag; nb = newb; *allocblk++ = nb; *lbns_remfree++ = indirs[i].in_lbn; nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(nbp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocindir_meta(nbp, ip, bp, indirs[i - 1].in_off, nb); bdwrite(nbp); } else { /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(nbp)) != 0) { brelse(bp); goto fail; } } bap[indirs[i - 1].in_off] = nb; if (allocib == NULL && unwindidx < 0) unwindidx = i - 1; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & IO_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } } /* * If asked only for the indirect block, then return it. */ if (flags & BA_METAONLY) { curthread_pflags_restore(saved_inbdflush); *bpp = bp; return (0); } /* * Get the data block, allocating if necessary. */ if (nb == 0) { UFS_LOCK(ump); /* * If allocating metadata at the front of the cylinder * group and parent indirect block has just been allocated, * then cluster next to it if it is the first indirect in * the file. Otherwise it has been allocated in the metadata * area, so we want to find our own place out in the data area. */ if (pref == 0 || (lbn > NDADDR && fs->fs_metaspace != 0)) pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags | IO_BUFLOCKED, cred, &newb); if (error) { brelse(bp); if (++reclaimed == 1) { UFS_LOCK(ump); softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT); UFS_UNLOCK(ump); goto retry; } if (ppsratecheck(&lastfail, &curfail, 1)) { ffs_fserr(fs, ip->i_number, "filesystem full"); uprintf("\n%s: write failed, filesystem " "is full\n", fs->fs_fsmnt); } goto fail; } nb = newb; *allocblk++ = nb; *lbns_remfree++ = lbn; nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags); nbp->b_blkno = fsbtodb(fs, nb); if (flags & BA_CLRBUF) vfs_bio_clrbuf(nbp); if (DOINGSOFTDEP(vp)) softdep_setup_allocindir_page(ip, lbn, bp, indirs[i].in_off, nb, 0, nbp); bap[indirs[i].in_off] = nb; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & IO_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } curthread_pflags_restore(saved_inbdflush); *bpp = nbp; return (0); } brelse(bp); if (flags & BA_CLRBUF) { int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { error = cluster_read(vp, ip->i_size, lbn, (int)fs->fs_bsize, NOCRED, MAXBSIZE, seqcount, gbflags, &nbp); } else { error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED, gbflags, &nbp); } if (error) { brelse(nbp); goto fail; } } else {
/* * Reload all incore data for a filesystem (used after running fsck on * the root filesystem and finding things to fix). The filesystem must * be mounted read-only. * * Things to do to update the mount: * 1) invalidate all cached meta-data. * 2) re-read superblock from disk. * 3) re-read summary information from disk. * 4) invalidate all inactive vnodes. * 5) invalidate all cached file data. * 6) re-read inode data for all active vnodes. */ int ext2fs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l) { struct vnode *vp, *mvp, *devvp; struct inode *ip; struct buf *bp; struct m_ext2fs *fs; struct ext2fs *newfs; int i, error; void *cp; struct ufsmount *ump; if ((mp->mnt_flag & MNT_RDONLY) == 0) return (EINVAL); ump = VFSTOUFS(mp); /* * Step 1: invalidate all cached meta-data. */ devvp = ump->um_devvp; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = vinvalbuf(devvp, 0, cred, l, 0, 0); VOP_UNLOCK(devvp); if (error) panic("ext2fs_reload: dirty1"); /* * Step 2: re-read superblock from disk. */ error = bread(devvp, SBLOCK, SBSIZE, NOCRED, 0, &bp); if (error) { return (error); } newfs = (struct ext2fs *)bp->b_data; error = ext2fs_checksb(newfs, (mp->mnt_flag & MNT_RDONLY) != 0); if (error) { brelse(bp, 0); return (error); } fs = ump->um_e2fs; /* * copy in new superblock, and compute in-memory values */ e2fs_sbload(newfs, &fs->e2fs); fs->e2fs_ncg = howmany(fs->e2fs.e2fs_bcount - fs->e2fs.e2fs_first_dblock, fs->e2fs.e2fs_bpg); fs->e2fs_fsbtodb = fs->e2fs.e2fs_log_bsize + LOG_MINBSIZE - DEV_BSHIFT; fs->e2fs_bsize = MINBSIZE << fs->e2fs.e2fs_log_bsize; fs->e2fs_bshift = LOG_MINBSIZE + fs->e2fs.e2fs_log_bsize; fs->e2fs_qbmask = fs->e2fs_bsize - 1; fs->e2fs_bmask = ~fs->e2fs_qbmask; fs->e2fs_ngdb = howmany(fs->e2fs_ncg, fs->e2fs_bsize / sizeof(struct ext2_gd)); fs->e2fs_ipb = fs->e2fs_bsize / EXT2_DINODE_SIZE(fs); fs->e2fs_itpg = fs->e2fs.e2fs_ipg / fs->e2fs_ipb; brelse(bp, 0); /* * Step 3: re-read summary information from disk. */ for (i = 0; i < fs->e2fs_ngdb; i++) { error = bread(devvp , EXT2_FSBTODB(fs, fs->e2fs.e2fs_first_dblock + 1 /* superblock */ + i), fs->e2fs_bsize, NOCRED, 0, &bp); if (error) { return (error); } e2fs_cgload((struct ext2_gd *)bp->b_data, &fs->e2fs_gd[i * fs->e2fs_bsize / sizeof(struct ext2_gd)], fs->e2fs_bsize); brelse(bp, 0); } /* Allocate a marker vnode. */ mvp = vnalloc(mp); /* * NOTE: not using the TAILQ_FOREACH here since in this loop vgone() * and vclean() can be called indirectly */ mutex_enter(&mntvnode_lock); loop: for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) { vmark(mvp, vp); if (vp->v_mount != mp || vismarker(vp)) continue; /* * Step 4: invalidate all inactive vnodes. */ if (vrecycle(vp, &mntvnode_lock)) { mutex_enter(&mntvnode_lock); (void)vunmark(mvp); goto loop; } /* * Step 5: invalidate all cached file data. */ mutex_enter(vp->v_interlock); mutex_exit(&mntvnode_lock); if (vget(vp, LK_EXCLUSIVE)) { mutex_enter(&mntvnode_lock); (void)vunmark(mvp); goto loop; } if (vinvalbuf(vp, 0, cred, l, 0, 0)) panic("ext2fs_reload: dirty2"); /* * Step 6: re-read inode data for all active vnodes. */ ip = VTOI(vp); error = bread(devvp, EXT2_FSBTODB(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->e2fs_bsize, NOCRED, 0, &bp); if (error) { vput(vp); mutex_enter(&mntvnode_lock); (void)vunmark(mvp); break; } cp = (char *)bp->b_data + (ino_to_fsbo(fs, ip->i_number) * EXT2_DINODE_SIZE(fs)); e2fs_iload((struct ext2fs_dinode *)cp, ip->i_din.e2fs_din); ext2fs_set_inode_guid(ip); brelse(bp, 0); vput(vp); mutex_enter(&mntvnode_lock); } mutex_exit(&mntvnode_lock); vnfree(mvp); return (error); }
/* * this is exactly what we do here - the problem is that the conversion * will blow up some entries by four bytes, so it can't be done in place. * This is too bad. Right now the conversion is done entry by entry, the * converted entry is sent via uiomove. * * XXX allocate a buffer, convert as many entries as possible, then send * the whole buffer to uiomove * * ext2_readdir(struct vnode *a_vp, struct uio *a_uio, struct ucred *a_cred) */ int ext2_readdir(struct vop_readdir_args *ap) { struct uio *uio = ap->a_uio; int count, error; struct ext2_dir_entry_2 *edp, *dp; int ncookies; struct uio auio; struct iovec aiov; caddr_t dirbuf; int DIRBLKSIZ = VTOI(ap->a_vp)->i_e2fs->s_blocksize; int readcnt, retval; off_t startoffset = uio->uio_offset; if ((error = vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY)) != 0) return(error); count = uio->uio_resid; /* * Avoid complications for partial directory entries by adjusting * the i/o to end at a block boundary. Don't give up (like ufs * does) if the initial adjustment gives a negative count, since * many callers don't supply a large enough buffer. The correct * size is a little larger than DIRBLKSIZ to allow for expansion * of directory entries, but some callers just use 512. */ count -= (uio->uio_offset + count) & (DIRBLKSIZ -1); if (count <= 0) count += DIRBLKSIZ; if (count > MAXBSIZE) /* limit to a reasonable size */ count = MAXBSIZE; #ifdef EXT2FS_DEBUG kprintf("ext2_readdir: uio_offset = %lld, uio_resid = %d, count = %d\n", uio->uio_offset, uio->uio_resid, count); #endif auio = *uio; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_resid = count; auio.uio_segflg = UIO_SYSSPACE; aiov.iov_len = count; dirbuf = kmalloc(count, M_TEMP, M_WAITOK); aiov.iov_base = dirbuf; error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred); if (error == 0) { readcnt = count - auio.uio_resid; edp = (struct ext2_dir_entry_2 *)&dirbuf[readcnt]; ncookies = 0; for (dp = (struct ext2_dir_entry_2 *)dirbuf; !error && uio->uio_resid > 0 && dp < edp; ) { /*- * "New" ext2fs directory entries differ in 3 ways * from ufs on-disk ones: * - the name is not necessarily NUL-terminated. * - the file type field always exists and always * follows the name length field. * - the file type is encoded in a different way. * * "Old" ext2fs directory entries need no special * conversions, since they binary compatible with * "new" entries having a file type of 0 (i.e., * EXT2_FT_UNKNOWN). Splitting the old name length * field didn't make a mess like it did in ufs, * because ext2fs uses a machine-dependent disk * layout. */ if (dp->rec_len <= 0) { error = EIO; break; } retval = vop_write_dirent(&error, uio, dp->inode, FTTODT(dp->file_type), dp->name_len, dp->name); if (retval) break; /* advance dp */ dp = (struct ext2_dir_entry_2 *)((char *)dp + dp->rec_len); if (!error) ncookies++; } /* we need to correct uio_offset */ uio->uio_offset = startoffset + (caddr_t)dp - dirbuf; if (!error && ap->a_ncookies != NULL) { off_t *cookiep, *cookies, *ecookies; off_t off; if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) panic("ext2fs_readdir: unexpected uio from NFS server"); if (ncookies) { cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK); } else { cookies = kmalloc(sizeof(off_t), M_TEMP, M_WAITOK); } off = startoffset; for (dp = (struct ext2_dir_entry_2 *)dirbuf, cookiep = cookies, ecookies = cookies + ncookies; cookiep < ecookies; dp = (struct ext2_dir_entry_2 *)((caddr_t) dp + dp->rec_len)) { off += dp->rec_len; *cookiep++ = off; } *ap->a_ncookies = ncookies; *ap->a_cookies = cookies; } } kfree(dirbuf, M_TEMP); if (ap->a_eofflag) *ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset; vn_unlock(ap->a_vp); return (error); }
/* * Go through the disk queues to initiate sandbagged IO; * go through the inodes to write those that have been modified; * initiate the writing of the super block if it has been modified. * * Note: we are always called with the filesystem marked `MPBUSY'. */ int ext2fs_sync(struct mount *mp, int waitfor, kauth_cred_t cred) { struct vnode *vp, *mvp; struct inode *ip; struct ufsmount *ump = VFSTOUFS(mp); struct m_ext2fs *fs; int error, allerror = 0; fs = ump->um_e2fs; if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) { /* XXX */ printf("fs = %s\n", fs->e2fs_fsmnt); panic("update: rofs mod"); } /* Allocate a marker vnode. */ mvp = vnalloc(mp); /* * Write back each (modified) inode. */ mutex_enter(&mntvnode_lock); loop: /* * NOTE: not using the TAILQ_FOREACH here since in this loop vgone() * and vclean() can be called indirectly */ for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) { vmark(mvp, vp); if (vp->v_mount != mp || vismarker(vp)) continue; mutex_enter(vp->v_interlock); ip = VTOI(vp); if (ip == NULL || (vp->v_iflag & (VI_XLOCK|VI_CLEAN)) != 0 || vp->v_type == VNON || ((ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 && LIST_EMPTY(&vp->v_dirtyblkhd) && UVM_OBJ_IS_CLEAN(&vp->v_uobj))) { mutex_exit(vp->v_interlock); continue; } mutex_exit(&mntvnode_lock); error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT); if (error) { mutex_enter(&mntvnode_lock); if (error == ENOENT) { mutex_enter(&mntvnode_lock); (void)vunmark(mvp); goto loop; } continue; } if (vp->v_type == VREG && waitfor == MNT_LAZY) error = ext2fs_update(vp, NULL, NULL, 0); else error = VOP_FSYNC(vp, cred, waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0); if (error) allerror = error; vput(vp); mutex_enter(&mntvnode_lock); } mutex_exit(&mntvnode_lock); vnfree(mvp); /* * Force stale file system control information to be flushed. */ if (waitfor != MNT_LAZY) { vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0)) != 0) allerror = error; VOP_UNLOCK(ump->um_devvp); } /* * Write back modified superblock. */ if (fs->e2fs_fmod != 0) { fs->e2fs_fmod = 0; fs->e2fs.e2fs_wtime = time_second; if ((error = ext2fs_cgupdate(ump, waitfor))) allerror = error; } return (allerror); }
/* * Write a directory entry after a call to namei, using the parameters * that it left in the directory inode. The argument ip is the inode which * the new directory entry will refer to. Dvp is a pointer to the directory * to be written, which was left locked by namei. Remaining parameters * (dp->i_offset, dp->i_count) indicate how the space for the new * entry is to be obtained. */ int ext2_direnter(struct inode *ip, struct vnode *dvp, struct componentname *cnp) { struct ext2_dir_entry_2 *ep, *nep; struct inode *dp; struct buf *bp; struct ext2_dir_entry_2 newdir; struct iovec aiov; struct uio auio; u_int dsize; int error, loc, newentrysize, spacefree; char *dirbuf; int DIRBLKSIZ = ip->i_e2fs->s_blocksize; dp = VTOI(dvp); newdir.inode = ip->i_number; newdir.name_len = cnp->cn_namelen; if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs->s_es, EXT2_FEATURE_INCOMPAT_FILETYPE)) newdir.file_type = DTTOFT(IFTODT(ip->i_mode)); else newdir.file_type = EXT2_FT_UNKNOWN; bcopy(cnp->cn_nameptr, newdir.name, (unsigned)cnp->cn_namelen + 1); newentrysize = EXT2_DIR_REC_LEN(newdir.name_len); if (dp->i_count == 0) { /* * If dp->i_count is 0, then namei could find no * space in the directory. Here, dp->i_offset will * be on a directory block boundary and we will write the * new entry into a fresh block. */ if (dp->i_offset & (DIRBLKSIZ - 1)) panic("ext2_direnter: newblk"); auio.uio_offset = dp->i_offset; newdir.rec_len = DIRBLKSIZ; auio.uio_resid = newentrysize; aiov.iov_len = newentrysize; aiov.iov_base = (caddr_t)&newdir; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_WRITE; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = NULL; error = VOP_WRITE(dvp, &auio, IO_SYNC, cnp->cn_cred); if (DIRBLKSIZ > VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) /* XXX should grow with balloc() */ panic("ext2_direnter: frag size"); else if (!error) { dp->i_size = roundup(dp->i_size, DIRBLKSIZ); dp->i_flag |= IN_CHANGE; } return (error); } /* * If dp->i_count is non-zero, then namei found space * for the new entry in the range dp->i_offset to * dp->i_offset + dp->i_count in the directory. * To use this space, we may have to compact the entries located * there, by copying them together towards the beginning of the * block, leaving the free space in one usable chunk at the end. */ /* * Increase size of directory if entry eats into new space. * This should never push the size past a new multiple of * DIRBLKSIZE. * * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. */ if (dp->i_offset + dp->i_count > dp->i_size) dp->i_size = dp->i_offset + dp->i_count; /* * Get the block containing the space for the new directory entry. */ if ((error = EXT2_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp)) != 0) return (error); /* * Find space for the new entry. In the simple case, the entry at * offset base will have the space. If it does not, then namei * arranged that compacting the region dp->i_offset to * dp->i_offset + dp->i_count would yield the * space. */ ep = (struct ext2_dir_entry_2 *)dirbuf; dsize = EXT2_DIR_REC_LEN(ep->name_len); spacefree = ep->rec_len - dsize; for (loc = ep->rec_len; loc < dp->i_count; ) { nep = (struct ext2_dir_entry_2 *)(dirbuf + loc); if (ep->inode) { /* trim the existing slot */ ep->rec_len = dsize; ep = (struct ext2_dir_entry_2 *)((char *)ep + dsize); } else { /* overwrite; nothing there; header is ours */ spacefree += dsize; } dsize = EXT2_DIR_REC_LEN(nep->name_len); spacefree += nep->rec_len - dsize; loc += nep->rec_len; bcopy((caddr_t)nep, (caddr_t)ep, dsize); } /* * Update the pointer fields in the previous entry (if any), * copy in the new entry, and write out the block. */ if (ep->inode == 0) { if (spacefree + dsize < newentrysize) panic("ext2_direnter: compact1"); newdir.rec_len = spacefree + dsize; } else { if (spacefree < newentrysize) panic("ext2_direnter: compact2"); newdir.rec_len = spacefree; ep->rec_len = dsize; ep = (struct ext2_dir_entry_2 *)((char *)ep + dsize); } bcopy((caddr_t)&newdir, (caddr_t)ep, (u_int)newentrysize); error = bwrite(bp); dp->i_flag |= IN_CHANGE | IN_UPDATE; if (!error && dp->i_endoff && dp->i_endoff < dp->i_size) error = EXT2_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC, cnp->cn_cred); return (error); }
/* * Set attribute vnode op. called from several syscalls */ int ext2fs_setattr(void *v) { struct vop_setattr_args /* { struct vnode *a_vp; struct vattr *a_vap; kauth_cred_t a_cred; } */ *ap = v; struct vattr *vap = ap->a_vap; struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); kauth_cred_t cred = ap->a_cred; struct lwp *l = curlwp; int error; kauth_action_t action = KAUTH_VNODE_WRITE_FLAGS; bool changing_sysflags = false; /* * Check for unsettable attributes. */ if ((vap->va_type != VNON) || (vap->va_nlink != (nlink_t)VNOVAL) || (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { return (EINVAL); } if (vap->va_flags != VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); /* * Check if we're allowed to change the flags. * If EXT2FS_SYSTEM_FLAGS is set, then the flags are treated * as system flags, otherwise they're considered to be user * flags. */ #ifdef EXT2FS_SYSTEM_FLAGS /* Indicate we're changing system flags if we are. */ if ((vap->va_flags & SF_APPEND) || (vap->va_flags & SF_IMMUTABLE)) { action |= KAUTH_VNODE_WRITE_SYSFLAGS; changing_sysflags = true; } /* Indicate the node has system flags if it does. */ if (ip->i_e2fs_flags & (EXT2_APPEND | EXT2_IMMUTABLE)) { action |= KAUTH_VNODE_HAS_SYSFLAGS; } #endif /* EXT2FS_SYSTEM_FLAGS */ error = kauth_authorize_vnode(cred, action, vp, NULL, genfs_can_chflags(cred, vp->v_type, ip->i_uid, changing_sysflags)); if (error) return (error); #ifdef EXT2FS_SYSTEM_FLAGS ip->i_e2fs_flags &= ~(EXT2_APPEND | EXT2_IMMUTABLE); ip->i_e2fs_flags |= (vap->va_flags & SF_APPEND) ? EXT2_APPEND : 0 | (vap->va_flags & SF_IMMUTABLE) ? EXT2_IMMUTABLE : 0; #else ip->i_e2fs_flags &= ~(EXT2_APPEND | EXT2_IMMUTABLE); ip->i_e2fs_flags |= (vap->va_flags & UF_APPEND) ? EXT2_APPEND : 0 | (vap->va_flags & UF_IMMUTABLE) ? EXT2_IMMUTABLE : 0; #endif ip->i_flag |= IN_CHANGE; if (vap->va_flags & (IMMUTABLE | APPEND)) return (0); } if (ip->i_e2fs_flags & (EXT2_APPEND | EXT2_IMMUTABLE)) return (EPERM); /* * Go through the fields and update iff not VNOVAL. */ if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); error = ext2fs_chown(vp, vap->va_uid, vap->va_gid, cred, l); if (error) return (error); } if (vap->va_size != VNOVAL) { /* * Disallow write attempts on read-only file systems; * unless the file is a socket, fifo, or a block or * character device resident on the file system. */ switch (vp->v_type) { case VDIR: return (EISDIR); case VLNK: case VREG: if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); default: break; } error = ext2fs_truncate(vp, vap->va_size, 0, cred); if (error) return (error); } ip = VTOI(vp); if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp, NULL, genfs_can_chtimes(vp, vap->va_vaflags, ip->i_uid, cred)); if (error) return (error); if (vap->va_atime.tv_sec != VNOVAL) if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) ip->i_flag |= IN_ACCESS; if (vap->va_mtime.tv_sec != VNOVAL) { ip->i_flag |= IN_CHANGE | IN_UPDATE; if (vp->v_mount->mnt_flag & MNT_RELATIME) ip->i_flag |= IN_ACCESS; } error = ext2fs_update(vp, &vap->va_atime, &vap->va_mtime, UPDATE_WAIT); if (error) return (error); } error = 0; if (vap->va_mode != (mode_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); error = ext2fs_chmod(vp, (int)vap->va_mode, cred, l); } VN_KNOTE(vp, NOTE_ATTRIB); return (error); }
/* ARGSUSED */ int ffs_full_fsync(struct vnode *vp, int flags) { int error, i, uflags; struct mount *mp; KASSERT(vp->v_tag == VT_UFS); KASSERT(VTOI(vp) != NULL); KASSERT(vp->v_type != VCHR && vp->v_type != VBLK); error = 0; uflags = UPDATE_CLOSE | ((flags & FSYNC_WAIT) ? UPDATE_WAIT : 0); mp = vp->v_mount; /* * Flush all dirty data associated with the vnode. */ if (vp->v_type == VREG) { int pflags = PGO_ALLPAGES | PGO_CLEANIT; if ((flags & FSYNC_WAIT)) pflags |= PGO_SYNCIO; if (fstrans_getstate(mp) == FSTRANS_SUSPENDING) pflags |= PGO_FREE; mutex_enter(vp->v_interlock); error = VOP_PUTPAGES(vp, 0, 0, pflags); if (error) return error; } #ifdef WAPBL if (mp && mp->mnt_wapbl) { /* * Don't bother writing out metadata if the syncer is * making the request. We will let the sync vnode * write it out in a single burst through a call to * VFS_SYNC(). */ if ((flags & (FSYNC_DATAONLY | FSYNC_LAZY)) != 0) return 0; if ((VTOI(vp)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY | IN_MODIFIED | IN_ACCESSED)) != 0) { error = UFS_WAPBL_BEGIN(mp); if (error) return error; error = ffs_update(vp, NULL, NULL, uflags); UFS_WAPBL_END(mp); } if (error || (flags & FSYNC_NOLOG) != 0) return error; /* * Don't flush the log if the vnode being flushed * contains no dirty buffers that could be in the log. */ if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { error = wapbl_flush(mp->mnt_wapbl, 0); if (error) return error; } if ((flags & FSYNC_WAIT) != 0) { mutex_enter(vp->v_interlock); while (vp->v_numoutput != 0) cv_wait(&vp->v_cv, vp->v_interlock); mutex_exit(vp->v_interlock); } return error; } #endif /* WAPBL */ error = vflushbuf(vp, (flags & FSYNC_WAIT) != 0); if (error == 0) error = ffs_update(vp, NULL, NULL, uflags); if (error == 0 && (flags & FSYNC_CACHE) != 0) { i = 1; (void)VOP_IOCTL(VTOI(vp)->i_devvp, DIOCCACHESYNC, &i, FWRITE, kauth_cred_get()); } return error; }
static int fuse_vnode_cmp(struct vnode *vp, void *nidp) { return (VTOI(vp) != *((uint64_t *)nidp)); }
/* * this function handles ext4 extents block mapping */ static int ext4_ext_read(struct vop_read_args *ap) { struct vnode *vp; struct inode *ip; struct uio *uio; struct m_ext2fs *fs; struct buf *bp; struct ext4_extent nex, *ep; struct ext4_extent_header *ehp; struct ext4_extent_path path; daddr_t lbn, nextlbn, newblk = 0; off_t bytesinfile; u_short mode; int cache_type; int orig_resid; int error = 0; int depth = 0; long size, xfersize, blkoffset; vp = ap->a_vp; ip = VTOI(vp); mode = ip->i_mode; uio = ap->a_uio; memset(&path, 0, sizeof(path)); orig_resid = uio->uio_resid; KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0")); if (orig_resid == 0) return (0); KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0")); fs = ip->I_FS; if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->e2fs_maxfilesize) return (EOVERFLOW); for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) break; lbn = lblkno(fs, uio->uio_offset); nextlbn = lbn + 1; size = BLKSIZE(fs, ip, lbn); blkoffset = blkoff(fs, uio->uio_offset); xfersize = fs->e2fs_fsize - blkoffset; if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; if (bytesinfile < xfersize) xfersize = bytesinfile; /* get block from ext4 extent cache */ cache_type = ext4_ext_in_cache(ip, lbn, &nex); if (cache_type != 0) { /* block does not be allocated yet */ if (cache_type == EXT4_EXT_CACHE_GAP) return (error); else if (cache_type == EXT4_EXT_CACHE_IN) newblk = lbn - nex.e_blk + (nex.e_start_lo | ((daddr_t)(nex.e_start_hi) << 31) << 1); } else { ext4_ext_find_extent(fs, ip, lbn, &path); depth = ((struct ext4_extent_header *)(ip->i_db))->eh_depth; if (path.ep_ext == NULL && depth != 0) return (EIO); ehp = path.ep_header; ep = path.ep_ext; if (ep == NULL) return (EIO); ext4_ext_put_cache(ip, ep, EXT4_EXT_CACHE_IN); newblk = lbn - ep->e_blk + (ep->e_start_lo | ((daddr_t)(ep->e_start_hi) << 31) << 1); if (path.ep_bp != NULL) { brelse(path.ep_bp); path.ep_bp = NULL; } } error = bread(ip->i_devvp, fsbtodb(fs, newblk), size, NOCRED, &bp); if (error) { brelse(bp); bp = NULL; break; } size -= bp->b_resid; if (size < xfersize) { if (size == 0) break; xfersize = size; } error = uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); if (error) break; bqrelse(bp); } if (bp != NULL) bqrelse(bp); return (error); }
int linkup(ino_t orphan, ino_t parentdir) { union lfs_dinode *dp; int lostdir; ino_t oldlfdir; struct inodesc idesc; char tempname[BUFSIZ]; struct uvnode *vp; memset(&idesc, 0, sizeof(struct inodesc)); vp = vget(fs, orphan); dp = VTOD(vp); lostdir = (lfs_dino_getmode(fs, dp) & LFS_IFMT) == LFS_IFDIR; pwarn("UNREF %s ", lostdir ? "DIR" : "FILE"); pinode(orphan); if (preen && lfs_dino_getsize(fs, dp) == 0) return (0); if (preen) printf(" (RECONNECTED)\n"); else if (reply("RECONNECT") == 0) return (0); if (lfdir == 0) { dp = ginode(ULFS_ROOTINO); idesc.id_name = lfname; idesc.id_type = DATA; idesc.id_func = findino; idesc.id_number = ULFS_ROOTINO; if ((ckinode(dp, &idesc) & FOUND) != 0) { lfdir = idesc.id_parent; } else { pwarn("NO lost+found DIRECTORY"); if (preen || reply("CREATE")) { lfdir = allocdir(ULFS_ROOTINO, (ino_t) 0, lfmode); if (lfdir != 0) { if (makeentry(ULFS_ROOTINO, lfdir, lfname) != 0) { if (preen) printf(" (CREATED)\n"); } else { freedir(lfdir, ULFS_ROOTINO); lfdir = 0; if (preen) printf("\n"); } } } } if (lfdir == 0) { pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY"); printf("\n\n"); return (0); } } vp = vget(fs, lfdir); dp = VTOD(vp); if ((lfs_dino_getmode(fs, dp) & LFS_IFMT) != LFS_IFDIR) { pfatal("lost+found IS NOT A DIRECTORY"); if (reply("REALLOCATE") == 0) return (0); oldlfdir = lfdir; if ((lfdir = allocdir(ULFS_ROOTINO, (ino_t) 0, lfmode)) == 0) { pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY\n\n"); return (0); } if ((changeino(ULFS_ROOTINO, lfname, lfdir) & ALTERED) == 0) { pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY\n\n"); return (0); } inodirty(VTOI(vp)); idesc.id_type = ADDR; idesc.id_func = pass4check; idesc.id_number = oldlfdir; adjust(&idesc, lncntp[oldlfdir] + 1); lncntp[oldlfdir] = 0; vp = vget(fs, lfdir); dp = VTOD(vp); } if (statemap[lfdir] != DFOUND) { pfatal("SORRY. NO lost+found DIRECTORY\n\n"); return (0); } (void) lftempname(tempname, orphan); if (makeentry(lfdir, orphan, tempname) == 0) { pfatal("SORRY. NO SPACE IN lost+found DIRECTORY"); printf("\n\n"); return (0); } lncntp[orphan]--; if (lostdir) { if ((changeino(orphan, "..", lfdir) & ALTERED) == 0 && parentdir != (ino_t) - 1) (void) makeentry(orphan, lfdir, ".."); vp = vget(fs, lfdir); lfs_dino_setnlink(fs, VTOI(vp)->i_din, lfs_dino_getnlink(fs, VTOI(vp)->i_din) + 1); inodirty(VTOI(vp)); lncntp[lfdir]++; pwarn("DIR I=%llu CONNECTED. ", (unsigned long long)orphan); if (parentdir != (ino_t) - 1) printf("PARENT WAS I=%llu\n", (unsigned long long)parentdir); if (preen == 0) printf("\n"); } return (1); }
/* * Q_QUOTAON - set up a quota file for a particular filesystem. */ int quotaon(struct thread *td, struct mount *mp, int type, void *fname) { struct ufsmount *ump; struct vnode *vp, **vpp; struct vnode *mvp; struct dquot *dq; int error, flags; struct nameidata nd; error = priv_check(td, PRIV_UFS_QUOTAON); if (error != 0) { vfs_unbusy(mp); return (error); } if ((mp->mnt_flag & MNT_RDONLY) != 0) { vfs_unbusy(mp); return (EROFS); } ump = VFSTOUFS(mp); dq = NODQUOT; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fname, td); flags = FREAD | FWRITE; vfs_ref(mp); vfs_unbusy(mp); error = vn_open(&nd, &flags, 0, NULL); if (error != 0) { vfs_rel(mp); return (error); } NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; error = vfs_busy(mp, MBF_NOWAIT); vfs_rel(mp); if (error == 0) { if (vp->v_type != VREG) { error = EACCES; vfs_unbusy(mp); } } if (error != 0) { VOP_UNLOCK(vp, 0); (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); return (error); } UFS_LOCK(ump); if ((ump->um_qflags[type] & (QTF_OPENING|QTF_CLOSING)) != 0) { UFS_UNLOCK(ump); VOP_UNLOCK(vp, 0); (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); vfs_unbusy(mp); return (EALREADY); } ump->um_qflags[type] |= QTF_OPENING|QTF_CLOSING; UFS_UNLOCK(ump); if ((error = dqopen(vp, ump, type)) != 0) { VOP_UNLOCK(vp, 0); UFS_LOCK(ump); ump->um_qflags[type] &= ~(QTF_OPENING|QTF_CLOSING); UFS_UNLOCK(ump); (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); vfs_unbusy(mp); return (error); } VOP_UNLOCK(vp, 0); MNT_ILOCK(mp); mp->mnt_flag |= MNT_QUOTA; MNT_IUNLOCK(mp); vpp = &ump->um_quotas[type]; if (*vpp != vp) quotaoff1(td, mp, type); /* * When the directory vnode containing the quota file is * inactivated, due to the shared lookup of the quota file * vput()ing the dvp, the qsyncvp() call for the containing * directory would try to acquire the quota lock exclusive. * At the same time, lookup already locked the quota vnode * shared. Mark the quota vnode lock as allowing recursion * and automatically converting shared locks to exclusive. * * Also mark quota vnode as system. */ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); vp->v_vflag |= VV_SYSTEM; VN_LOCK_AREC(vp); VN_LOCK_DSHARE(vp); VOP_UNLOCK(vp, 0); *vpp = vp; /* * Save the credential of the process that turned on quotas. * Set up the time limits for this quota. */ ump->um_cred[type] = crhold(td->td_ucred); ump->um_btime[type] = MAX_DQ_TIME; ump->um_itime[type] = MAX_IQ_TIME; if (dqget(NULLVP, 0, ump, type, &dq) == 0) { if (dq->dq_btime > 0) ump->um_btime[type] = dq->dq_btime; if (dq->dq_itime > 0) ump->um_itime[type] = dq->dq_itime; dqrele(NULLVP, dq); } /* * Allow the getdq from getinoquota below to read the quota * from file. */ UFS_LOCK(ump); ump->um_qflags[type] &= ~QTF_CLOSING; UFS_UNLOCK(ump); /* * Search vnodes associated with this mount point, * adding references to quota file being opened. * NB: only need to add dquot's for inodes being modified. */ again: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto again; } if (vp->v_type == VNON || vp->v_writecount == 0) { VOP_UNLOCK(vp, 0); vrele(vp); continue; } error = getinoquota(VTOI(vp)); VOP_UNLOCK(vp, 0); vrele(vp); if (error) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); break; } } if (error) quotaoff_inchange(td, mp, type); UFS_LOCK(ump); ump->um_qflags[type] &= ~QTF_OPENING; KASSERT((ump->um_qflags[type] & QTF_CLOSING) == 0, ("quotaon: leaking flags")); UFS_UNLOCK(ump); vfs_unbusy(mp); return (error); }
/* * allocate a new directory */ int allocdir(ino_t parent, ino_t request, int mode) { ino_t ino; char *cp; union lfs_dinode *dp; struct ubuf *bp; LFS_DIRHEADER *dirp; struct uvnode *vp; ino = allocino(request, LFS_IFDIR | mode); vp = vget(fs, ino); dp = VTOD(vp); bread(vp, lfs_dino_getdb(fs, dp, 0), lfs_sb_getfsize(fs), 0, &bp); if (bp->b_flags & B_ERROR) { brelse(bp, 0); freeino(ino); return (0); } dirp = (LFS_DIRHEADER *)bp->b_data; /* . */ lfs_dir_setino(fs, dirp, ino); lfs_dir_setreclen(fs, dirp, LFS_DIRECTSIZ(fs, 1)); lfs_dir_settype(fs, dirp, LFS_DT_DIR); lfs_dir_setnamlen(fs, dirp, 1); lfs_copydirname(fs, lfs_dir_nameptr(fs, dirp), ".", 1, LFS_DIRECTSIZ(fs, 1)); /* .. */ dirp = LFS_NEXTDIR(fs, dirp); lfs_dir_setino(fs, dirp, parent); lfs_dir_setreclen(fs, dirp, LFS_DIRBLKSIZ - LFS_DIRECTSIZ(fs, 1)); lfs_dir_settype(fs, dirp, LFS_DT_DIR); lfs_dir_setnamlen(fs, dirp, 2); lfs_copydirname(fs, lfs_dir_nameptr(fs, dirp), "..", 2, LFS_DIRBLKSIZ - LFS_DIRECTSIZ(fs, 1)); for (cp = &bp->b_data[LFS_DIRBLKSIZ]; cp < &bp->b_data[lfs_sb_getfsize(fs)]; cp += LFS_DIRBLKSIZ) { zerodirblk(cp); } VOP_BWRITE(bp); lfs_dino_setnlink(fs, dp, 2); inodirty(VTOI(vp)); if (ino == ULFS_ROOTINO) { lncntp[ino] = lfs_dino_getnlink(fs, dp); cacheino(dp, ino); return (ino); } if (statemap[parent] != DSTATE && statemap[parent] != DFOUND) { freeino(ino); return (0); } cacheino(dp, ino); statemap[ino] = statemap[parent]; if (statemap[ino] == DSTATE) { lncntp[ino] = lfs_dino_getnlink(fs, dp); lncntp[parent]++; } vp = vget(fs, parent); dp = VTOD(vp); lfs_dino_setnlink(fs, dp, lfs_dino_getnlink(fs, dp) + 1); inodirty(VTOI(vp)); return (ino); }
/* * Reload all incore data for a filesystem (used after running fsck on * the root filesystem and finding things to fix). The filesystem must * be mounted read-only. * * Things to do to update the mount: * 1) invalidate all cached meta-data. * 2) re-read superblock from disk. * 3) invalidate all cluster summary information. * 4) invalidate all inactive vnodes. * 5) invalidate all cached file data. * 6) re-read inode data for all active vnodes. * XXX we are missing some steps, in particular # 3, this has to be reviewed. */ static int ext2_reload(struct mount *mp, struct thread *td) { struct vnode *vp, *mvp, *devvp; struct inode *ip; struct buf *bp; struct ext2fs *es; struct m_ext2fs *fs; struct csum *sump; int error, i; int32_t *lp; if ((mp->mnt_flag & MNT_RDONLY) == 0) return (EINVAL); /* * Step 1: invalidate all cached meta-data. */ devvp = VFSTOEXT2(mp)->um_devvp; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); if (vinvalbuf(devvp, 0, 0, 0) != 0) panic("ext2_reload: dirty1"); VOP_UNLOCK(devvp, 0); /* * Step 2: re-read superblock from disk. * constants have been adjusted for ext2 */ if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) return (error); es = (struct ext2fs *)bp->b_data; if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { brelse(bp); return (EIO); /* XXX needs translation */ } fs = VFSTOEXT2(mp)->um_e2fs; bcopy(bp->b_data, fs->e2fs, sizeof(struct ext2fs)); if((error = compute_sb_data(devvp, es, fs)) != 0) { brelse(bp); return (error); } #ifdef UNKLAR if (fs->fs_sbsize < SBSIZE) bp->b_flags |= B_INVAL; #endif brelse(bp); /* * Step 3: invalidate all cluster summary information. */ if (fs->e2fs_contigsumsize > 0) { lp = fs->e2fs_maxcluster; sump = fs->e2fs_clustersum; for (i = 0; i < fs->e2fs_gcount; i++, sump++) { *lp++ = fs->e2fs_contigsumsize; sump->cs_init = 0; bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1); } } loop: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { /* * Step 4: invalidate all cached file data. */ if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } if (vinvalbuf(vp, 0, 0, 0)) panic("ext2_reload: dirty2"); /* * Step 5: re-read inode data for all active vnodes. */ ip = VTOI(vp); error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->e2fs_bsize, NOCRED, &bp); if (error) { VOP_UNLOCK(vp, 0); vrele(vp); MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); return (error); } ext2_ei2i((struct ext2fs_dinode *) ((char *)bp->b_data + EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ip->i_number)), ip); brelse(bp); VOP_UNLOCK(vp, 0); vrele(vp); } return (0); }
void osi_DisableAtimes(struct vnode *avp) { struct inode *ip = VTOI(avp); ip->i_flag &= ~IN_ACCESS; }
/* * Retrieve the ACL on a file. * * As part of the ACL is stored in the inode, and the rest in an EA, * assemble both into a final ACL product. Right now this is not done * very efficiently. */ static int ufs_getacl_posix1e(struct vop_getacl_args *ap) { struct inode *ip = VTOI(ap->a_vp); int error; struct oldacl *old; /* * XXX: If ufs_getacl() should work on file systems not supporting * ACLs, remove this check. */ if ((ap->a_vp->v_mount->mnt_flag & MNT_ACLS) == 0) return (EINVAL); old = malloc(sizeof(*old), M_ACL, M_WAITOK | M_ZERO); /* * Attempt to retrieve the ACL from the extended attributes. */ error = ufs_get_oldacl(ap->a_type, old, ap->a_vp, ap->a_td); switch (error) { /* * XXX: If ufs_getacl() should work on filesystems * without the EA configured, add case EOPNOTSUPP here. */ case ENOATTR: switch (ap->a_type) { case ACL_TYPE_ACCESS: /* * Legitimately no ACL set on object, purely * emulate it through the inode. These fields will * be updated when the ACL is synchronized with * the inode later. */ old->acl_cnt = 3; old->acl_entry[0].ae_tag = ACL_USER_OBJ; old->acl_entry[0].ae_id = ACL_UNDEFINED_ID; old->acl_entry[0].ae_perm = ACL_PERM_NONE; old->acl_entry[1].ae_tag = ACL_GROUP_OBJ; old->acl_entry[1].ae_id = ACL_UNDEFINED_ID; old->acl_entry[1].ae_perm = ACL_PERM_NONE; old->acl_entry[2].ae_tag = ACL_OTHER; old->acl_entry[2].ae_id = ACL_UNDEFINED_ID; old->acl_entry[2].ae_perm = ACL_PERM_NONE; break; case ACL_TYPE_DEFAULT: /* * Unlike ACL_TYPE_ACCESS, there is no relationship * between the inode contents and the ACL, and it is * therefore possible for the request for the ACL * to fail since the ACL is undefined. In this * situation, return success and an empty ACL, * as required by POSIX.1e. */ old->acl_cnt = 0; break; } /* FALLTHROUGH */ case 0: error = acl_copy_oldacl_into_acl(old, ap->a_aclp); if (error != 0) break; if (ap->a_type == ACL_TYPE_ACCESS) ufs_sync_acl_from_inode(ip, ap->a_aclp); default: break; } free(old, M_ACL); return (error); }
/* * Convert a component of a pathname into a pointer to a locked inode. * This is a very central and rather complicated routine. * If the file system is not maintained in a strict tree hierarchy, * this can result in a deadlock situation (see comments in code below). * * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending * on whether the name is to be looked up, created, renamed, or deleted. * When CREATE, RENAME, or DELETE is specified, information usable in * creating, renaming, or deleting a directory entry may be calculated. * If flag has LOCKPARENT or'ed into it and the target of the pathname * exists, lookup returns both the target and its parent directory locked. * When creating or renaming and LOCKPARENT is specified, the target may * not be ".". When deleting and LOCKPARENT is specified, the target may * be "."., but the caller must check to ensure it does an vrele and vput * instead of two vputs. * * Overall outline of ufs_lookup: * * check accessibility of directory * look for name in cache, if found, then if at end of path * and deleting or creating, drop it, else return name * search for name in directory, to found or notfound * notfound: * if creating, return locked directory, leaving info on available slots * else return error * found: * if at end of path and deleting, return information to allow delete * if at end of path and rewriting (RENAME and LOCKPARENT), lock target * inode and return info to allow rewrite * if not at end, add name to cache; if at end and neither creating * nor deleting, add name to cache */ int ufs_lookup(void *v) { struct vop_lookup_args *ap = v; struct vnode *vdp; /* vnode for directory being searched */ struct inode *dp; /* inode for directory being searched */ struct buf *bp; /* a buffer of directory entries */ struct direct *ep; /* the current directory entry */ int entryoffsetinblock; /* offset of ep in bp's buffer */ enum {NONE, COMPACT, FOUND} slotstatus; doff_t slotoffset; /* offset of area with free space */ int slotsize; /* size of area at slotoffset */ int slotfreespace; /* amount of space free in slot */ int slotneeded; /* size of the entry we're seeking */ int numdirpasses; /* strategy for directory search */ doff_t endsearch; /* offset to end directory search */ doff_t prevoff; /* prev entry dp->i_offset */ struct vnode *pdp; /* saved dp during symlink work */ struct vnode *tdp; /* returned by VFS_VGET */ doff_t enduseful; /* pointer past last used dir slot */ u_long bmask; /* block offset mask */ int lockparent; /* 1 => lockparent flag is set */ int wantparent; /* 1 => wantparent or lockparent flag */ int namlen, error; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; struct ucred *cred = cnp->cn_cred; int flags; int nameiop = cnp->cn_nameiop; struct proc *p = cnp->cn_proc; cnp->cn_flags &= ~PDIRUNLOCK; flags = cnp->cn_flags; bp = NULL; slotoffset = -1; *vpp = NULL; vdp = ap->a_dvp; dp = VTOI(vdp); lockparent = flags & LOCKPARENT; wantparent = flags & (LOCKPARENT|WANTPARENT); /* * Check accessiblity of directory. */ if ((DIP(dp, mode) & IFMT) != IFDIR) return (ENOTDIR); if ((error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc)) != 0) return (error); if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); /* * We now have a segment name to search for, and a directory to search. * * Before tediously performing a linear scan of the directory, * check the name cache to see if the directory/name pair * we are looking for is known already. */ if ((error = cache_lookup(vdp, vpp, cnp)) >= 0) return (error); /* * Suppress search for slots unless creating * file and at end of pathname, in which case * we watch for a place to put the new file in * case it doesn't already exist. */ slotstatus = FOUND; slotfreespace = slotsize = slotneeded = 0; if ((nameiop == CREATE || nameiop == RENAME) && (flags & ISLASTCN)) { slotstatus = NONE; slotneeded = (sizeof(struct direct) - MAXNAMLEN + cnp->cn_namelen + 3) &~ 3; } /* * If there is cached information on a previous search of * this directory, pick up where we last left off. * We cache only lookups as these are the most common * and have the greatest payoff. Caching CREATE has little * benefit as it usually must search the entire directory * to determine that the entry does not exist. Caching the * location of the last DELETE or RENAME has not reduced * profiling time and hence has been removed in the interest * of simplicity. */ bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; #ifdef UFS_DIRHASH /* * Use dirhash for fast operations on large directories. The logic * to determine whether to hash the directory is contained within * ufsdirhash_build(); a zero return means that it decided to hash * this directory and it successfully built up the hash table. */ if (ufsdirhash_build(dp) == 0) { /* Look for a free slot if needed. */ enduseful = DIP(dp, size); if (slotstatus != FOUND) { slotoffset = ufsdirhash_findfree(dp, slotneeded, &slotsize); if (slotoffset >= 0) { slotstatus = COMPACT; enduseful = ufsdirhash_enduseful(dp); if (enduseful < 0) enduseful = DIP(dp, size); } } /* Look up the component. */ numdirpasses = 1; entryoffsetinblock = 0; /* silence compiler warning */ switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen, &dp->i_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) { case 0: ep = (struct direct *)((char *)bp->b_data + (dp->i_offset & bmask)); goto foundentry; case ENOENT: #define roundup2(x, y) (((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */ dp->i_offset = roundup2(DIP(dp, size), DIRBLKSIZ); goto notfound; default: /* Something failed; just do a linear search. */ break; } } #endif /* UFS_DIRHASH */ if (nameiop != LOOKUP || dp->i_diroff == 0 || dp->i_diroff >= DIP(dp, size)) { entryoffsetinblock = 0; dp->i_offset = 0; numdirpasses = 1; } else { dp->i_offset = dp->i_diroff; if ((entryoffsetinblock = dp->i_offset & bmask) && (error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, NULL, &bp))) return (error); numdirpasses = 2; nchstats.ncs_2passes++; } prevoff = dp->i_offset; endsearch = roundup(DIP(dp, size), DIRBLKSIZ); enduseful = 0; searchloop: while (dp->i_offset < endsearch) { /* * If necessary, get the next directory block. */ if ((dp->i_offset & bmask) == 0) { if (bp != NULL) brelse(bp); error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, NULL, &bp); if (error) return (error); entryoffsetinblock = 0; } /* * If still looking for a slot, and at a DIRBLKSIZE * boundary, have to start looking for free space again. */ if (slotstatus == NONE && (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) { slotoffset = -1; slotfreespace = 0; } /* * Get pointer to next entry. * Full validation checks are slow, so we only check * enough to insure forward progress through the * directory. Complete checks can be run by patching * "dirchk" to be true. */ ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock); if (ep->d_reclen == 0 || (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) { int i; ufs_dirbad(dp, dp->i_offset, "mangled entry"); i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); dp->i_offset += i; entryoffsetinblock += i; continue; } /* * If an appropriate sized slot has not yet been found, * check to see if one is available. Also accumulate space * in the current block so that we can determine if * compaction is viable. */ if (slotstatus != FOUND) { int size = ep->d_reclen; if (ep->d_ino != 0) size -= DIRSIZ(FSFMT(vdp), ep); if (size > 0) { if (size >= slotneeded) { slotstatus = FOUND; slotoffset = dp->i_offset; slotsize = ep->d_reclen; } else if (slotstatus == NONE) { slotfreespace += size; if (slotoffset == -1) slotoffset = dp->i_offset; if (slotfreespace >= slotneeded) { slotstatus = COMPACT; slotsize = dp->i_offset + ep->d_reclen - slotoffset; } } } } /* * Check for a name match. */ if (ep->d_ino) { # if (BYTE_ORDER == LITTLE_ENDIAN) if (vdp->v_mount->mnt_maxsymlinklen > 0) namlen = ep->d_namlen; else namlen = ep->d_type; # else namlen = ep->d_namlen; # endif if (namlen == cnp->cn_namelen && !bcmp(cnp->cn_nameptr, ep->d_name, (unsigned)namlen)) { #ifdef UFS_DIRHASH foundentry: #endif /* * Save directory entry's inode number and * reclen in ndp->ni_ufs area, and release * directory buffer. */ dp->i_ino = ep->d_ino; dp->i_reclen = ep->d_reclen; goto found; } } prevoff = dp->i_offset; dp->i_offset += ep->d_reclen; entryoffsetinblock += ep->d_reclen; if (ep->d_ino) enduseful = dp->i_offset; } #ifdef UFS_DIRHASH notfound: #endif /* * If we started in the middle of the directory and failed * to find our target, we must check the beginning as well. */ if (numdirpasses == 2) { numdirpasses--; dp->i_offset = 0; endsearch = dp->i_diroff; goto searchloop; } if (bp != NULL) brelse(bp); /* * If creating, and at end of pathname and current * directory has not been removed, then can consider * allowing file to be created. */ if ((nameiop == CREATE || nameiop == RENAME) && (flags & ISLASTCN) && dp->i_effnlink != 0) { /* * Access for write is interpreted as allowing * creation of files in the directory. */ error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc); if (error) return (error); /* * Return an indication of where the new directory * entry should be put. If we didn't find a slot, * then set dp->i_count to 0 indicating * that the new slot belongs at the end of the * directory. If we found a slot, then the new entry * can be put in the range from dp->i_offset to * dp->i_offset + dp->i_count. */ if (slotstatus == NONE) { dp->i_offset = roundup(DIP(dp, size), DIRBLKSIZ); dp->i_count = 0; enduseful = dp->i_offset; } else if (nameiop == DELETE) { dp->i_offset = slotoffset; if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) dp->i_count = 0; else dp->i_count = dp->i_offset - prevoff; } else { dp->i_offset = slotoffset; dp->i_count = slotsize; if (enduseful < slotoffset + slotsize) enduseful = slotoffset + slotsize; } dp->i_endoff = roundup(enduseful, DIRBLKSIZ); /* * We return with the directory locked, so that * the parameters we set up above will still be * valid if we actually decide to do a direnter(). * We return ni_vp == NULL to indicate that the entry * does not currently exist; we leave a pointer to * the (locked) directory inode in ndp->ni_dvp. * The pathname buffer is saved so that the name * can be obtained later. * * NB - if the directory is unlocked, then this * information cannot be used. */ cnp->cn_flags |= SAVENAME; if (!lockparent) { VOP_UNLOCK(vdp, 0, p); cnp->cn_flags |= PDIRUNLOCK; } return (EJUSTRETURN); } /* * Insert name into cache (as non-existent) if appropriate. */ if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) cache_enter(vdp, *vpp, cnp); return (ENOENT); found: if (numdirpasses == 2) nchstats.ncs_pass2++; /* * Check that directory length properly reflects presence * of this entry. */ if (dp->i_offset + DIRSIZ(FSFMT(vdp), ep) > DIP(dp, size)) { ufs_dirbad(dp, dp->i_offset, "i_ffs_size too small"); DIP_ASSIGN(dp, size, dp->i_offset + DIRSIZ(FSFMT(vdp), ep)); dp->i_flag |= IN_CHANGE | IN_UPDATE; } brelse(bp); /* * Found component in pathname. * If the final component of path name, save information * in the cache as to where the entry was found. */ if ((flags & ISLASTCN) && nameiop == LOOKUP) dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1); /* * If deleting, and at end of pathname, return * parameters which can be used to remove file. * If the wantparent flag isn't set, we return only * the directory (in ndp->ni_dvp), otherwise we go * on and lock the inode, being careful with ".". */ if (nameiop == DELETE && (flags & ISLASTCN)) { /* * Write access to directory required to delete files. */ error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc); if (error) return (error); /* * Return pointer to current entry in dp->i_offset, * and distance past previous entry (if there * is a previous entry in this block) in dp->i_count. * Save directory inode pointer in ndp->ni_dvp for dirremove(). */ if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) dp->i_count = 0; else dp->i_count = dp->i_offset - prevoff; if (dp->i_number == dp->i_ino) { VREF(vdp); *vpp = vdp; return (0); } error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); if (error) return (error); /* * If directory is "sticky", then user must own * the directory, or the file in it, else she * may not delete it (unless she's root). This * implements append-only directories. */ if ((DIP(dp, mode) & ISVTX) && VOP_ACCESS(vdp, VADMIN, cred, cnp->cn_proc) && VOP_ACCESS(tdp, VADMIN, cred, cnp->cn_proc)) { vput(tdp); return (EPERM); } *vpp = tdp; if (!lockparent) { VOP_UNLOCK(vdp, 0, p); cnp->cn_flags |= PDIRUNLOCK; } return (0); } /* * If rewriting (RENAME), return the inode and the * information required to rewrite the present directory * Must get inode of directory entry to verify it's a * regular file, or empty directory. */ if (nameiop == RENAME && wantparent && (flags & ISLASTCN)) { error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc); if (error) return (error); /* * Careful about locking second inode. * This can only occur if the target is ".". */ if (dp->i_number == dp->i_ino) return (EISDIR); error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); if (error) return (error); *vpp = tdp; cnp->cn_flags |= SAVENAME; if (!lockparent) { VOP_UNLOCK(vdp, 0, p); cnp->cn_flags |= PDIRUNLOCK; } return (0); } /* * Step through the translation in the name. We do not `vput' the * directory because we may need it again if a symbolic link * is relative to the current directory. Instead we save it * unlocked as "pdp". We must get the target inode before unlocking * the directory to insure that the inode will not be removed * before we get it. We prevent deadlock by always fetching * inodes from the root, moving down the directory tree. Thus * when following backward pointers ".." we must unlock the * parent directory before getting the requested directory. * There is a potential race condition here if both the current * and parent directories are removed before the VFS_VGET for the * inode associated with ".." returns. We hope that this occurs * infrequently since we cannot avoid this race condition without * implementing a sophisticated deadlock detection algorithm. * Note also that this simple deadlock detection scheme will not * work if the file system has any hard links other than ".." * that point backwards in the directory structure. */ pdp = vdp; if (flags & ISDOTDOT) { VOP_UNLOCK(pdp, 0, p); /* race to get the inode */ cnp->cn_flags |= PDIRUNLOCK; error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); if (error) { if (vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p) == 0) cnp->cn_flags &= ~PDIRUNLOCK; return (error); } if (lockparent && (flags & ISLASTCN)) { if ((error = vn_lock(pdp, LK_EXCLUSIVE, p))) { vput(tdp); return (error); } cnp->cn_flags &= ~PDIRUNLOCK; } *vpp = tdp; } else if (dp->i_number == dp->i_ino) { VREF(vdp); /* we want ourself, ie "." */ *vpp = vdp; } else { error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); if (error) return (error); if (!lockparent || !(flags & ISLASTCN)) { VOP_UNLOCK(pdp, 0, p); cnp->cn_flags |= PDIRUNLOCK; } *vpp = tdp; } /* * Insert name into cache if appropriate. */ if (cnp->cn_flags & MAKEENTRY) cache_enter(vdp, *vpp, cnp); return (0); }