/* * Change access and modification times of the given vnode. * Caller should execute tmpfs_update on vp after a successful execution. * The vnode must be locked on entry and remain locked on exit. */ int tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime, int vaflags, struct ucred *cred) { struct tmpfs_node *node; KKASSERT(vn_islocked(vp)); node = VP_TO_TMPFS_NODE(vp); /* Disallow this operation if the file system is mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return EROFS; /* Immutable or append-only files cannot be modified, either. */ if (node->tn_flags & (IMMUTABLE | APPEND)) return EPERM; TMPFS_NODE_LOCK(node); if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL) node->tn_status |= TMPFS_NODE_ACCESSED; if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) { node->tn_status |= TMPFS_NODE_MODIFIED; vclrflags(vp, VLASTWRITETS); } TMPFS_NODE_UNLOCK(node); tmpfs_itimes(vp, atime, mtime); KKASSERT(vn_islocked(vp)); return 0; }
/* * Change flags of the given vnode. * Caller should execute tmpfs_update on vp after a successful execution. * The vnode must be locked on entry and remain locked on exit. */ int tmpfs_chflags(struct vnode *vp, int vaflags, struct ucred *cred) { int error; struct tmpfs_node *node; int flags; KKASSERT(vn_islocked(vp)); node = VP_TO_TMPFS_NODE(vp); flags = node->tn_flags; /* Disallow this operation if the file system is mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return EROFS; error = vop_helper_setattr_flags(&flags, vaflags, node->tn_uid, cred); /* Actually change the flags on the node itself */ if (error == 0) { TMPFS_NODE_LOCK(node); node->tn_flags = flags; node->tn_status |= TMPFS_NODE_CHANGED; TMPFS_NODE_UNLOCK(node); } KKASSERT(vn_islocked(vp)); return error; }
/* * Change size of the given vnode. * Caller should execute tmpfs_update on vp after a successful execution. * The vnode must be locked on entry and remain locked on exit. */ int tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred) { int error; struct tmpfs_node *node; KKASSERT(vn_islocked(vp)); node = VP_TO_TMPFS_NODE(vp); /* Decide whether this is a valid operation based on the file type. */ error = 0; switch (vp->v_type) { case VDIR: return EISDIR; case VREG: if (vp->v_mount->mnt_flag & MNT_RDONLY) return EROFS; break; case VBLK: /* FALLTHROUGH */ case VCHR: /* FALLTHROUGH */ case VFIFO: /* Allow modifications of special files even if in the file * system is mounted read-only (we are not modifying the * files themselves, but the objects they represent). */ return 0; default: /* Anything else is unsupported. */ return EOPNOTSUPP; } /* Immutable or append-only files cannot be modified, either. */ if (node->tn_flags & (IMMUTABLE | APPEND)) return EPERM; error = tmpfs_truncate(vp, size); /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents * for us, as will update tn_status; no need to do that here. */ KKASSERT(vn_islocked(vp)); return error; }
/* ARGSUSED */ static int nwfs_sync(struct mount *mp, int waitfor) { struct vnode *vp; int error, allerror = 0; /* * Force stale buffer cache information to be flushed. */ loop: for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = TAILQ_NEXT(vp, v_nmntvnodes)) { /* * If the vnode that we are about to sync is no longer * associated with this mount point, start over. */ if (vp->v_mount != mp) goto loop; if (vn_islocked(vp) || RB_EMPTY(&vp->v_rbdirty_tree) || (waitfor & MNT_LAZY)) continue; if (vget(vp, LK_EXCLUSIVE)) goto loop; /* XXX vp may not be retained */ error = VOP_FSYNC(vp, waitfor, 0); if (error) allerror = error; vput(vp); } return (allerror); }
/* * Change access mode on the given vnode. * Caller should execute tmpfs_update on vp after a successful execution. * The vnode must be locked on entry and remain locked on exit. */ int tmpfs_chmod(struct vnode *vp, mode_t vamode, struct ucred *cred) { struct tmpfs_node *node; mode_t cur_mode; int error; KKASSERT(vn_islocked(vp)); node = VP_TO_TMPFS_NODE(vp); /* Disallow this operation if the file system is mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return EROFS; /* Immutable or append-only files cannot be modified, either. */ if (node->tn_flags & (IMMUTABLE | APPEND)) return EPERM; cur_mode = node->tn_mode; error = vop_helper_chmod(vp, vamode, cred, node->tn_uid, node->tn_gid, &cur_mode); if (error == 0 && (node->tn_mode & ALLPERMS) != (cur_mode & ALLPERMS)) { TMPFS_NODE_LOCK(node); node->tn_mode &= ~ALLPERMS; node->tn_mode |= cur_mode & ALLPERMS; node->tn_status |= TMPFS_NODE_CHANGED; TMPFS_NODE_UNLOCK(node); } KKASSERT(vn_islocked(vp)); return 0; }
/* * Change ownership of the given vnode. At least one of uid or gid must * be different than VNOVAL. If one is set to that value, the attribute * is unchanged. * Caller should execute tmpfs_update on vp after a successful execution. * The vnode must be locked on entry and remain locked on exit. */ int tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred) { mode_t cur_mode; uid_t cur_uid; gid_t cur_gid; struct tmpfs_node *node; int error; KKASSERT(vn_islocked(vp)); node = VP_TO_TMPFS_NODE(vp); /* Disallow this operation if the file system is mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return EROFS; /* Immutable or append-only files cannot be modified, either. */ if (node->tn_flags & (IMMUTABLE | APPEND)) return EPERM; cur_uid = node->tn_uid; cur_gid = node->tn_gid; cur_mode = node->tn_mode; error = vop_helper_chown(vp, uid, gid, cred, &cur_uid, &cur_gid, &cur_mode); if (error == 0) { TMPFS_NODE_LOCK(node); if (cur_uid != node->tn_uid || cur_gid != node->tn_gid || cur_mode != node->tn_mode) { node->tn_uid = cur_uid; node->tn_gid = cur_gid; node->tn_mode = cur_mode; node->tn_status |= TMPFS_NODE_CHANGED; } TMPFS_NODE_UNLOCK(node); } return error; }
int puffs_biowrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred) { int biosize = vp->v_mount->mnt_stat.f_iosize; struct buf *bp; struct vattr vattr; off_t loffset, fsize; int boff, bytes; int error = 0; int bcount; int trivial; KKASSERT(uio->uio_rw == UIO_WRITE); KKASSERT(vp->v_type == VREG); if (uio->uio_offset < 0) return EINVAL; if (uio->uio_resid == 0) return 0; /* * If IO_APPEND then load uio_offset. We restart here if we cannot * get the append lock. * * We need to obtain exclusize lock if we intend to modify file size * in order to guarentee the append point with multiple contending * writers. */ if (ioflag & IO_APPEND) { /* XXXDF relock if necessary */ KKASSERT(vn_islocked(vp) == LK_EXCLUSIVE); error = VOP_GETATTR(vp, &vattr); if (error) return error; uio->uio_offset = puffs_meta_getsize(vp); } do { boff = uio->uio_offset & (biosize-1); loffset = uio->uio_offset - boff; bytes = (int)szmin((unsigned)(biosize - boff), uio->uio_resid); again: /* * Handle direct append and file extension cases, calculate * unaligned buffer size. When extending B_CACHE will be * set if possible. See UIO_NOCOPY note below. */ fsize = puffs_meta_getsize(vp); if (uio->uio_offset + bytes > fsize) { trivial = (uio->uio_segflg != UIO_NOCOPY && uio->uio_offset <= fsize); puffs_meta_setsize(vp, uio->uio_offset + bytes, trivial); } bp = getblk(vp, loffset, biosize, 0, 0); if (bp == NULL) { error = EINTR; break; } /* * Actual bytes in buffer which we care about */ if (loffset + biosize < fsize) bcount = biosize; else bcount = (int)(fsize - loffset); /* * Avoid a read by setting B_CACHE where the data we * intend to write covers the entire buffer. Note * that the buffer may have been set to B_CACHE by * puffs_meta_setsize() above or otherwise inherited the * flag, but if B_CACHE isn't set the buffer may be * uninitialized and must be zero'd to accomodate * future seek+write's. * * See the comments in kern/vfs_bio.c's getblk() for * more information. * * When doing a UIO_NOCOPY write the buffer is not * overwritten and we cannot just set B_CACHE unconditionally * for full-block writes. */ if (boff == 0 && bytes == biosize && uio->uio_segflg != UIO_NOCOPY) { bp->b_flags |= B_CACHE; bp->b_flags &= ~(B_ERROR | B_INVAL); } /* * b_resid may be set due to file EOF if we extended out. * The NFS bio code will zero the difference anyway so * just acknowledged the fact and set b_resid to 0. */ if ((bp->b_flags & B_CACHE) == 0) { bp->b_cmd = BUF_CMD_READ; bp->b_bio2.bio_done = puffs_iodone; bp->b_bio2.bio_flags |= BIO_SYNC; vfs_busy_pages(vp, bp); error = puffs_doio(vp, &bp->b_bio2, uio->uio_td); if (error) { brelse(bp); break; } bp->b_resid = 0; } /* * If dirtyend exceeds file size, chop it down. This should * not normally occur but there is an append race where it * might occur XXX, so we log it. * * If the chopping creates a reverse-indexed or degenerate * situation with dirtyoff/end, we 0 both of them. */ if (bp->b_dirtyend > bcount) { kprintf("PUFFS append race @%08llx:%d\n", (long long)bp->b_bio2.bio_offset, bp->b_dirtyend - bcount); bp->b_dirtyend = bcount; } if (bp->b_dirtyoff >= bp->b_dirtyend) bp->b_dirtyoff = bp->b_dirtyend = 0; /* * If the new write will leave a contiguous dirty * area, just update the b_dirtyoff and b_dirtyend, * otherwise force a write rpc of the old dirty area. * * While it is possible to merge discontiguous writes due to * our having a B_CACHE buffer ( and thus valid read data * for the hole), we don't because it could lead to * significant cache coherency problems with multiple clients, * especially if locking is implemented later on. * * as an optimization we could theoretically maintain * a linked list of discontinuous areas, but we would still * have to commit them separately so there isn't much * advantage to it except perhaps a bit of asynchronization. */ if (bp->b_dirtyend > 0 && (boff > bp->b_dirtyend || (boff + bytes) < bp->b_dirtyoff) ) { if (bwrite(bp) == EINTR) { error = EINTR; break; } goto again; } error = uiomove(bp->b_data + boff, bytes, uio); /* * Since this block is being modified, it must be written * again and not just committed. Since write clustering does * not work for the stage 1 data write, only the stage 2 * commit rpc, we have to clear B_CLUSTEROK as well. */ bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); if (error) { brelse(bp); break; } /* * Only update dirtyoff/dirtyend if not a degenerate * condition. * * The underlying VM pages have been marked valid by * virtue of acquiring the bp. Because the entire buffer * is marked dirty we do not have to worry about cleaning * out the related dirty bits (and wouldn't really know * how to deal with byte ranges anyway) */ if (bytes) { if (bp->b_dirtyend > 0) { bp->b_dirtyoff = imin(boff, bp->b_dirtyoff); bp->b_dirtyend = imax(boff + bytes, bp->b_dirtyend); } else { bp->b_dirtyoff = boff; bp->b_dirtyend = boff + bytes; } } if (ioflag & IO_SYNC) { if (ioflag & IO_INVAL) bp->b_flags |= B_NOCACHE; error = bwrite(bp); if (error) break; } else { bdwrite(bp); } } while (uio->uio_resid > 0 && bytes > 0); return error; }
/* * ffs_balloc(struct vnode *a_vp, ufs_daddr_t a_lbn, int a_size, * struct ucred *a_cred, int a_flags, struct buf *a_bpp) * * Balloc defines the structure of filesystem storage by allocating * the physical blocks on a device given the inode and the logical * block number in a file. * * NOTE: B_CLRBUF - this flag tells balloc to clear invalid portions * of the buffer. However, any dirty bits will override missing * valid bits. This case occurs when writable mmaps are truncated * and then extended. */ int ffs_balloc(struct vop_balloc_args *ap) { struct inode *ip; ufs_daddr_t lbn; int size; struct ucred *cred; int flags; struct fs *fs; ufs_daddr_t nb; struct buf *bp, *nbp, *dbp; struct vnode *vp; struct indir indirs[NIADDR + 2]; ufs_daddr_t newb, *bap, pref; int deallocated, osize, nsize, num, i, error; ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; ufs_daddr_t *lbns_remfree, lbns[NIADDR + 1]; int unwindidx; int seqcount; vp = ap->a_vp; ip = VTOI(vp); fs = ip->i_fs; lbn = lblkno(fs, ap->a_startoffset); size = blkoff(fs, ap->a_startoffset) + ap->a_size; if (size > fs->fs_bsize) panic("ffs_balloc: blk too big"); *ap->a_bpp = NULL; if (lbn < 0) return (EFBIG); cred = ap->a_cred; flags = ap->a_flags; /* * The vnode must be locked for us to be able to safely mess * around with the inode. */ if (vn_islocked(vp) != LK_EXCLUSIVE) { panic("ffs_balloc: vnode %p not exclusively locked!", vp); } /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ nb = lblkno(fs, ip->i_size); if (nb < NDADDR && nb < lbn) { /* * The filesize prior to this write can fit in direct * blocks (ex. fragmentation is possibly done) * we are now extending the file write beyond * the block which has end of the file prior to this write. */ osize = blksize(fs, ip, nb); /* * osize gives disk allocated size in the last block. It is * either in fragments or a file system block size. */ if (osize < fs->fs_bsize && osize > 0) { /* A few fragments are already allocated, since the * current extends beyond this block allocated the * complete block as fragments are on in last block. */ error = ffs_realloccg(ip, nb, ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]), osize, (int)fs->fs_bsize, cred, &bp); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, nb, dofftofsb(fs, bp->b_bio2.bio_offset), ip->i_db[nb], fs->fs_bsize, osize, bp); /* adjust the inode size, we just grew */ ip->i_size = smalllblktosize(fs, nb + 1); ip->i_db[nb] = dofftofsb(fs, bp->b_bio2.bio_offset); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (flags & B_SYNC) bwrite(bp); else bawrite(bp); /* bp is already released here */ } } /* * The first NDADDR blocks are direct blocks */ if (lbn < NDADDR) { nb = ip->i_db[lbn]; if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { error = bread(vp, lblktodoff(fs, lbn), fs->fs_bsize, &bp); if (error) { brelse(bp); return (error); } bp->b_bio2.bio_offset = fsbtodoff(fs, nb); *ap->a_bpp = bp; return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { error = bread(vp, lblktodoff(fs, lbn), osize, &bp); if (error) { brelse(bp); return (error); } bp->b_bio2.bio_offset = fsbtodoff(fs, nb); } else { /* * NOTE: ffs_realloccg() issues a bread(). */ error = ffs_realloccg(ip, lbn, ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), osize, nsize, cred, &bp); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, dofftofsb(fs, bp->b_bio2.bio_offset), nb, nsize, osize, bp); } } else { if (ip->i_size < smalllblktosize(fs, lbn + 1)) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; error = ffs_alloc(ip, lbn, ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), nsize, cred, &newb); if (error) return (error); bp = getblk(vp, lblktodoff(fs, lbn), nsize, 0, 0); bp->b_bio2.bio_offset = fsbtodoff(fs, newb); if (flags & B_CLRBUF) vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, newb, 0, nsize, 0, bp); } ip->i_db[lbn] = dofftofsb(fs, bp->b_bio2.bio_offset); ip->i_flag |= IN_CHANGE | IN_UPDATE; *ap->a_bpp = bp; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) return(error); #ifdef DIAGNOSTIC if (num < 1) panic ("ffs_balloc: ufs_bmaparray returned indirect block"); #endif /* * Get a handle on the data block buffer before working through * indirect blocks to avoid a deadlock between the VM system holding * a locked VM page and issuing a BMAP (which tries to lock the * indirect blocks), and the filesystem holding a locked indirect * block and then trying to read a data block (which tries to lock * the underlying VM pages). */ dbp = getblk(vp, lblktodoff(fs, lbn), fs->fs_bsize, 0, 0); /* * Setup undo history */ allocib = NULL; allocblk = allociblk; lbns_remfree = lbns; unwindidx = -1; /* * Fetch the first indirect block directly from the inode, allocating * one if necessary. */ --num; nb = ip->i_ib[indirs[0].in_off]; if (nb == 0) { pref = ffs_blkpref(ip, lbn, 0, NULL); /* * If the filesystem has run out of space we can skip the * full fsync/undo of the main [fail] case since no undo * history has been built yet. Hence the goto fail2. */ if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) goto fail2; nb = newb; *allocblk++ = nb; *lbns_remfree++ = indirs[1].in_lbn; bp = getblk(vp, lblktodoff(fs, indirs[1].in_lbn), fs->fs_bsize, 0, 0); bp->b_bio2.bio_offset = fsbtodoff(fs, nb); vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, newb, 0, fs->fs_bsize, 0, bp); bdwrite(bp); } else { /* * Write synchronously so that indirect blocks * never point at garbage. */ if (DOINGASYNC(vp)) bdwrite(bp); else if ((error = bwrite(bp)) != 0) goto fail; } allocib = &ip->i_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = bread(vp, lblktodoff(fs, indirs[i].in_lbn), (int)fs->fs_bsize, &bp); if (error) { brelse(bp); goto fail; } bap = (ufs_daddr_t *)bp->b_data; nb = bap[indirs[i].in_off]; if (i == num) break; i += 1; if (nb != 0) { bqrelse(bp); continue; } if (pref == 0) pref = ffs_blkpref(ip, lbn, 0, NULL); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) { brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; *lbns_remfree++ = indirs[i].in_lbn; nbp = getblk(vp, lblktodoff(fs, indirs[i].in_lbn), fs->fs_bsize, 0, 0); nbp->b_bio2.bio_offset = fsbtodoff(fs, nb); vfs_bio_clrbuf(nbp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocindir_meta(nbp, ip, bp, indirs[i - 1].in_off, nb); bdwrite(nbp); } else { /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(nbp)) != 0) { brelse(bp); goto fail; } } bap[indirs[i - 1].in_off] = nb; if (allocib == NULL && unwindidx < 0) unwindidx = i - 1; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } } /* * Get the data block, allocating if necessary. We have already * called getblk() on the data block buffer, dbp. If we have to * allocate it and B_CLRBUF has been set the inference is an intention * to zero out the related disk blocks, so we do not have to issue * a read. Instead we simply call vfs_bio_clrbuf(). If B_CLRBUF is * not set the caller intends to overwrite the entire contents of the * buffer and we don't waste time trying to clean up the contents. * * bp references the current indirect block. When allocating, * the block must be updated. */ if (nb == 0) { pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb); if (error) { brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; *lbns_remfree++ = lbn; dbp->b_bio2.bio_offset = fsbtodoff(fs, nb); if (flags & B_CLRBUF) vfs_bio_clrbuf(dbp); if (DOINGSOFTDEP(vp)) softdep_setup_allocindir_page(ip, lbn, bp, indirs[i].in_off, nb, 0, dbp); bap[indirs[i].in_off] = nb; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } *ap->a_bpp = dbp; return (0); } brelse(bp); /* * At this point all related indirect blocks have been allocated * if necessary and released. bp is no longer valid. dbp holds * our getblk()'d data block. * * XXX we previously performed a cluster_read operation here. */ if (flags & B_CLRBUF) { /* * If B_CLRBUF is set we must validate the invalid portions * of the buffer. This typically requires a read-before- * write. The strategy call will fill in bio_offset in that * case. * * If we hit this case we do a cluster read if possible * since nearby data blocks are likely to be accessed soon * too. */ if ((dbp->b_flags & B_CACHE) == 0) { bqrelse(dbp); seqcount = (flags & B_SEQMASK) >> B_SEQSHIFT; if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { error = cluster_read(vp, (off_t)ip->i_size, lblktodoff(fs, lbn), (int)fs->fs_bsize, fs->fs_bsize, seqcount * BKVASIZE, &dbp); } else { error = bread(vp, lblktodoff(fs, lbn), (int)fs->fs_bsize, &dbp); } if (error) goto fail; } else {
/* * union_lookup(struct vnode *a_dvp, struct vnode **a_vpp, * struct componentname *a_cnp) */ static int union_lookup(struct vop_old_lookup_args *ap) { int error; int uerror, lerror; struct vnode *uppervp, *lowervp; struct vnode *upperdvp, *lowerdvp; struct vnode *dvp = ap->a_dvp; /* starting dir */ struct union_node *dun = VTOUNION(dvp); /* associated union node */ struct componentname *cnp = ap->a_cnp; struct thread *td = cnp->cn_td; int lockparent = cnp->cn_flags & CNP_LOCKPARENT; struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount); struct ucred *saved_cred = NULL; int iswhiteout; struct vattr va; *ap->a_vpp = NULLVP; /* * Disallow write attemps to the filesystem mounted read-only. */ if ((dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == NAMEI_DELETE || cnp->cn_nameiop == NAMEI_RENAME)) { return (EROFS); } /* * For any lookup's we do, always return with the parent locked */ cnp->cn_flags |= CNP_LOCKPARENT; lowerdvp = dun->un_lowervp; uppervp = NULLVP; lowervp = NULLVP; iswhiteout = 0; uerror = ENOENT; lerror = ENOENT; /* * Get a private lock on uppervp and a reference, effectively * taking it out of the union_node's control. * * We must lock upperdvp while holding our lock on dvp * to avoid a deadlock. */ upperdvp = union_lock_upper(dun, td); /* * do the lookup in the upper level. * if that level comsumes additional pathnames, * then assume that something special is going * on and just return that vnode. */ if (upperdvp != NULLVP) { /* * We do not have to worry about the DOTDOT case, we've * already unlocked dvp. */ UDEBUG(("A %p\n", upperdvp)); /* * Do the lookup. We must supply a locked and referenced * upperdvp to the function and will get a new locked and * referenced upperdvp back with the old having been * dereferenced. * * If an error is returned, uppervp will be NULLVP. If no * error occurs, uppervp will be the locked and referenced * return vnode or possibly NULL, depending on what is being * requested. It is possible that the returned uppervp * will be the same as upperdvp. */ uerror = union_lookup1(um->um_uppervp, &upperdvp, &uppervp, cnp); UDEBUG(( "uerror %d upperdvp %p %d/%d, uppervp %p ref=%d/lck=%d\n", uerror, upperdvp, upperdvp->v_sysref.refcnt, vn_islocked(upperdvp), uppervp, (uppervp ? uppervp->v_sysref.refcnt : -99), (uppervp ? vn_islocked(uppervp) : -99) )); /* * Disallow write attemps to the filesystem mounted read-only. */ if (uerror == EJUSTRETURN && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == NAMEI_CREATE || cnp->cn_nameiop == NAMEI_RENAME)) { error = EROFS; goto out; } /* * Special case. If cn_consume != 0 skip out. The result * of the lookup is transfered to our return variable. If * an error occured we have to throw away the results. */ if (cnp->cn_consume != 0) { if ((error = uerror) == 0) { *ap->a_vpp = uppervp; uppervp = NULL; } goto out; } /* * Calculate whiteout, fall through */ if (uerror == ENOENT || uerror == EJUSTRETURN) { if (cnp->cn_flags & CNP_ISWHITEOUT) { iswhiteout = 1; } else if (lowerdvp != NULLVP) { int terror; terror = VOP_GETATTR(upperdvp, &va); if (terror == 0 && (va.va_flags & OPAQUE)) iswhiteout = 1; } } } /* * in a similar way to the upper layer, do the lookup * in the lower layer. this time, if there is some * component magic going on, then vput whatever we got * back from the upper layer and return the lower vnode * instead. */ if (lowerdvp != NULLVP && !iswhiteout) { int nameiop; UDEBUG(("B %p\n", lowerdvp)); /* * Force only LOOKUPs on the lower node, since * we won't be making changes to it anyway. */ nameiop = cnp->cn_nameiop; cnp->cn_nameiop = NAMEI_LOOKUP; if (um->um_op == UNMNT_BELOW) { saved_cred = cnp->cn_cred; cnp->cn_cred = um->um_cred; } /* * We shouldn't have to worry about locking interactions * between the lower layer and our union layer (w.r.t. * `..' processing) because we don't futz with lowervp * locks in the union-node instantiation code path. * * union_lookup1() requires lowervp to be locked on entry, * and it will be unlocked on return. The ref count will * not change. On return lowervp doesn't represent anything * to us so we NULL it out. */ vref(lowerdvp); vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY); lerror = union_lookup1(um->um_lowervp, &lowerdvp, &lowervp, cnp); if (lowerdvp == lowervp) vrele(lowerdvp); else vput(lowerdvp); lowerdvp = NULL; /* lowerdvp invalid after vput */ if (um->um_op == UNMNT_BELOW) cnp->cn_cred = saved_cred; cnp->cn_nameiop = nameiop; if (cnp->cn_consume != 0 || lerror == EACCES) { if ((error = lerror) == 0) { *ap->a_vpp = lowervp; lowervp = NULL; } goto out; } } else { UDEBUG(("C %p\n", lowerdvp)); if ((cnp->cn_flags & CNP_ISDOTDOT) && dun->un_pvp != NULLVP) { if ((lowervp = LOWERVP(dun->un_pvp)) != NULL) { vref(lowervp); vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY); lerror = 0; } } } /* * Ok. Now we have uerror, uppervp, upperdvp, lerror, and lowervp. * * 1. If both layers returned an error, select the upper layer. * * 2. If the upper layer faile and the bottom layer succeeded, * two subcases occur: * * a. The bottom vnode is not a directory, in which case * just return a new union vnode referencing an * empty top layer and the existing bottom layer. * * b. The button vnode is a directory, in which case * create a new directory in the top layer and * and fall through to case 3. * * 3. If the top layer succeeded then return a new union * vnode referencing whatever the new top layer and * whatever the bottom layer returned. */ /* case 1. */ if ((uerror != 0) && (lerror != 0)) { error = uerror; goto out; } /* case 2. */ if (uerror != 0 /* && (lerror == 0) */ ) { if (lowervp->v_type == VDIR) { /* case 2b. */ KASSERT(uppervp == NULL, ("uppervp unexpectedly non-NULL")); /* * oops, uppervp has a problem, we may have to shadow. */ uerror = union_mkshadow(um, upperdvp, cnp, &uppervp); if (uerror) { error = uerror; goto out; } } } /* * Must call union_allocvp with both the upper and lower vnodes * referenced and the upper vnode locked. ap->a_vpp is returned * referenced and locked. lowervp, uppervp, and upperdvp are * absorbed by union_allocvp() whether it succeeds or fails. * * upperdvp is the parent directory of uppervp which may be * different, depending on the path, from dvp->un_uppervp. That's * why it is a separate argument. Note that it must be unlocked. * * dvp must be locked on entry to the call and will be locked on * return. */ if (uppervp && uppervp != upperdvp) vn_unlock(uppervp); if (lowervp) vn_unlock(lowervp); if (upperdvp) vn_unlock(upperdvp); error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp, uppervp, lowervp, 1); UDEBUG(("Create %p = %p %p refs=%d\n", *ap->a_vpp, uppervp, lowervp, (*ap->a_vpp) ? ((*ap->a_vpp)->v_sysref.refcnt) : -99)); uppervp = NULL; upperdvp = NULL; lowervp = NULL; /* * Termination Code * * - put away any extra junk laying around. Note that lowervp * (if not NULL) will never be the same as *ap->a_vp and * neither will uppervp, because when we set that state we * NULL-out lowervp or uppervp. On the otherhand, upperdvp * may match uppervp or *ap->a_vpp. * * - relock/unlock dvp if appropriate. */ out: if (upperdvp) { if (upperdvp == uppervp || upperdvp == *ap->a_vpp) vrele(upperdvp); else vput(upperdvp); } if (uppervp) vput(uppervp); if (lowervp) vput(lowervp); /* * Restore LOCKPARENT state */ if (!lockparent) cnp->cn_flags &= ~CNP_LOCKPARENT; UDEBUG(("Out %d vpp %p/%d lower %p upper %p\n", error, *ap->a_vpp, ((*ap->a_vpp) ? (*ap->a_vpp)->v_sysref.refcnt : -99), lowervp, uppervp)); /* * dvp lock state, determine whether to relock dvp. dvp is expected * to be locked on return if: * * - there was an error (except not EJUSTRETURN), or * - we hit the last component and lockparent is true * * dvp_is_locked is the current state of the dvp lock, not counting * the possibility that *ap->a_vpp == dvp (in which case it is locked * anyway). Note that *ap->a_vpp == dvp only if no error occured. */ if (*ap->a_vpp != dvp) { if ((error == 0 || error == EJUSTRETURN) && !lockparent) { vn_unlock(dvp); } } /* * Diagnostics */ #ifdef DIAGNOSTIC if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.' && *ap->a_vpp != dvp) { panic("union_lookup returning . (%p) not same as startdir (%p)", ap->a_vpp, dvp); } #endif return (error); }
static int devfs_spec_close(struct vop_close_args *ap) { struct devfs_node *node; struct proc *p = curproc; struct vnode *vp = ap->a_vp; cdev_t dev = vp->v_rdev; int error = 0; int needrelock; /* * We do special tests on the opencount so unfortunately we need * an exclusive lock. */ vn_lock(vp, LK_UPGRADE | LK_RETRY); if (dev) devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() called on %s! \n", dev->si_name); else devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() called, null vode!\n"); /* * A couple of hacks for devices and tty devices. The * vnode ref count cannot be used to figure out the * last close, but we can use v_opencount now that * revoke works properly. * * Detect the last close on a controlling terminal and clear * the session (half-close). */ if (dev) reference_dev(dev); if (p && vp->v_opencount <= 1 && vp == p->p_session->s_ttyvp) { p->p_session->s_ttyvp = NULL; vrele(vp); } /* * Vnodes can be opened and closed multiple times. Do not really * close the device unless (1) it is being closed forcibly, * (2) the device wants to track closes, or (3) this is the last * vnode doing its last close on the device. * * XXX the VXLOCK (force close) case can leave vnodes referencing * a closed device. This might not occur now that our revoke is * fixed. */ devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -1- \n"); if (dev && ((vp->v_flag & VRECLAIMED) || (dev_dflags(dev) & D_TRACKCLOSE) || (vp->v_opencount == 1))) { /* * Ugly pty magic, to make pty devices disappear again once * they are closed. */ node = DEVFS_NODE(ap->a_vp); if (node && (node->flags & DEVFS_PTY)) node->flags |= DEVFS_INVISIBLE; /* * Unlock around dev_dclose(), unless the vnode is * undergoing a vgone/reclaim (during umount). */ needrelock = 0; if ((vp->v_flag & VRECLAIMED) == 0 && vn_islocked(vp)) { needrelock = 1; vn_unlock(vp); } /* * WARNING! If the device destroys itself the devfs node * can disappear here. * * WARNING! vn_lock() will fail if the vp is in a VRECLAIM, * which can occur during umount. */ error = dev_dclose(dev, ap->a_fflag, S_IFCHR, ap->a_fp); /* node is now stale */ if (needrelock) { if (vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM) != 0) { panic("devfs_spec_close: vnode %p " "unexpectedly could not be relocked", vp); } } } else { error = 0; } devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -2- \n"); /* * Track the actual opens and closes on the vnode. The last close * disassociates the rdev. If the rdev is already disassociated or * the opencount is already 0, the vnode might have been revoked * and no further opencount tracking occurs. */ if (dev) release_dev(dev); if (vp->v_opencount > 0) vop_stdclose(ap); return(error); }
/* * Allocates a new vnode for the node node or returns a new reference to * an existing one if the node had already a vnode referencing it. The * resulting locked vnode is returned in *vpp. * * Returns zero on success or an appropriate error code on failure. * * The caller must ensure that node cannot go away (usually by holding * the related directory entry). * * If dnode is non-NULL this routine avoids deadlocking against it but * can return EAGAIN. Caller must try again. The dnode lock will cycle * in this case, it remains locked on return in all cases. dnode must * be shared-locked. */ int tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *dnode, struct tmpfs_node *node, int lkflag, struct vnode **vpp) { int error = 0; struct vnode *vp; loop: /* * Interlocked extraction from node. This can race many things. * We have to get a soft reference on the vnode while we hold * the node locked, then acquire it properly and check for races. */ TMPFS_NODE_LOCK(node); if ((vp = node->tn_vnode) != NULL) { KKASSERT((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0); vhold(vp); TMPFS_NODE_UNLOCK(node); if (dnode) { /* * Special-case handling to avoid deadlocking against * dnode. This case has been validated and occurs * every so often during synth builds. */ if (vget(vp, (lkflag & ~LK_RETRY) | LK_NOWAIT | LK_EXCLUSIVE) != 0) { TMPFS_NODE_UNLOCK(dnode); if (vget(vp, (lkflag & ~LK_RETRY) | LK_SLEEPFAIL | LK_EXCLUSIVE) == 0) { vn_unlock(vp); } vdrop(vp); TMPFS_NODE_LOCK_SH(dnode); return EAGAIN; } } else { /* * Normal path */ if (vget(vp, lkflag | LK_EXCLUSIVE) != 0) { vdrop(vp); goto loop; } } if (node->tn_vnode != vp) { vput(vp); vdrop(vp); goto loop; } vdrop(vp); goto out; } /* vp is NULL */ /* * This should never happen. */ if (node->tn_vpstate & TMPFS_VNODE_DOOMED) { TMPFS_NODE_UNLOCK(node); error = ENOENT; goto out; } /* * Interlock against other calls to tmpfs_alloc_vp() trying to * allocate and assign a vp to node. */ if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) { node->tn_vpstate |= TMPFS_VNODE_WANT; error = tsleep(&node->tn_vpstate, PINTERLOCKED | PCATCH, "tmpfs_alloc_vp", 0); TMPFS_NODE_UNLOCK(node); if (error) return error; goto loop; } node->tn_vpstate |= TMPFS_VNODE_ALLOCATING; TMPFS_NODE_UNLOCK(node); /* * Allocate a new vnode (may block). The ALLOCATING flag should * prevent a race against someone else assigning node->tn_vnode. */ error = getnewvnode(VT_TMPFS, mp, &vp, VLKTIMEOUT, LK_CANRECURSE); if (error != 0) goto unlock; KKASSERT(node->tn_vnode == NULL); KKASSERT(vp != NULL); vp->v_data = node; vp->v_type = node->tn_type; /* Type-specific initialization. */ switch (node->tn_type) { case VBLK: /* FALLTHROUGH */ case VCHR: /* FALLTHROUGH */ case VSOCK: break; case VREG: /* * VMIO is mandatory. Tmpfs also supports KVABIO * for its tmpfs_strategy(). */ vsetflags(vp, VKVABIO); vinitvmio(vp, node->tn_size, TMPFS_BLKSIZE, -1); break; case VLNK: break; case VFIFO: vp->v_ops = &mp->mnt_vn_fifo_ops; break; case VDIR: break; default: panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type); } unlock: TMPFS_NODE_LOCK(node); KKASSERT(node->tn_vpstate & TMPFS_VNODE_ALLOCATING); node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING; node->tn_vnode = vp; if (node->tn_vpstate & TMPFS_VNODE_WANT) { node->tn_vpstate &= ~TMPFS_VNODE_WANT; TMPFS_NODE_UNLOCK(node); wakeup(&node->tn_vpstate); } else { TMPFS_NODE_UNLOCK(node); } out: *vpp = vp; KKASSERT(IFF(error == 0, *vpp != NULL && vn_islocked(*vpp))); return error; }