Ejemplo n.º 1
0
/*
 * Change access and modification times of the given vnode.
 * Caller should execute tmpfs_update on vp after a successful execution.
 * The vnode must be locked on entry and remain locked on exit.
 */
int
tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime,
	      int vaflags, struct ucred *cred)
{
	struct tmpfs_node *node;

	KKASSERT(vn_islocked(vp));

	node = VP_TO_TMPFS_NODE(vp);

	/* Disallow this operation if the file system is mounted read-only. */
	if (vp->v_mount->mnt_flag & MNT_RDONLY)
		return EROFS;

	/* Immutable or append-only files cannot be modified, either. */
	if (node->tn_flags & (IMMUTABLE | APPEND))
		return EPERM;

	TMPFS_NODE_LOCK(node);
	if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL)
		node->tn_status |= TMPFS_NODE_ACCESSED;

	if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) {
		node->tn_status |= TMPFS_NODE_MODIFIED;
		vclrflags(vp, VLASTWRITETS);
	}

	TMPFS_NODE_UNLOCK(node);

	tmpfs_itimes(vp, atime, mtime);

	KKASSERT(vn_islocked(vp));

	return 0;
}
Ejemplo n.º 2
0
/*
 * Change flags of the given vnode.
 * Caller should execute tmpfs_update on vp after a successful execution.
 * The vnode must be locked on entry and remain locked on exit.
 */
int
tmpfs_chflags(struct vnode *vp, int vaflags, struct ucred *cred)
{
	int error;
	struct tmpfs_node *node;
	int flags;

	KKASSERT(vn_islocked(vp));

	node = VP_TO_TMPFS_NODE(vp);
	flags = node->tn_flags;

	/* Disallow this operation if the file system is mounted read-only. */
	if (vp->v_mount->mnt_flag & MNT_RDONLY)
		return EROFS;
	error = vop_helper_setattr_flags(&flags, vaflags, node->tn_uid, cred);

	/* Actually change the flags on the node itself */
	if (error == 0) {
		TMPFS_NODE_LOCK(node);
		node->tn_flags = flags;
		node->tn_status |= TMPFS_NODE_CHANGED;
		TMPFS_NODE_UNLOCK(node);
	}

	KKASSERT(vn_islocked(vp));

	return error;
}
Ejemplo n.º 3
0
/*
 * Change size of the given vnode.
 * Caller should execute tmpfs_update on vp after a successful execution.
 * The vnode must be locked on entry and remain locked on exit.
 */
int
tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred)
{
	int error;
	struct tmpfs_node *node;

	KKASSERT(vn_islocked(vp));

	node = VP_TO_TMPFS_NODE(vp);

	/* Decide whether this is a valid operation based on the file type. */
	error = 0;
	switch (vp->v_type) {
	case VDIR:
		return EISDIR;

	case VREG:
		if (vp->v_mount->mnt_flag & MNT_RDONLY)
			return EROFS;
		break;

	case VBLK:
		/* FALLTHROUGH */
	case VCHR:
		/* FALLTHROUGH */
	case VFIFO:
		/* Allow modifications of special files even if in the file
		 * system is mounted read-only (we are not modifying the
		 * files themselves, but the objects they represent). */
		return 0;

	default:
		/* Anything else is unsupported. */
		return EOPNOTSUPP;
	}

	/* Immutable or append-only files cannot be modified, either. */
	if (node->tn_flags & (IMMUTABLE | APPEND))
		return EPERM;

	error = tmpfs_truncate(vp, size);
	/* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents
	 * for us, as will update tn_status; no need to do that here. */

	KKASSERT(vn_islocked(vp));

	return error;
}
Ejemplo n.º 4
0
/* ARGSUSED */
static int
nwfs_sync(struct mount *mp, int waitfor)
{
	struct vnode *vp;
	int error, allerror = 0;
	/*
	 * Force stale buffer cache information to be flushed.
	 */
loop:
	for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
	     vp != NULL;
	     vp = TAILQ_NEXT(vp, v_nmntvnodes)) {
		/*
		 * If the vnode that we are about to sync is no longer
		 * associated with this mount point, start over.
		 */
		if (vp->v_mount != mp)
			goto loop;
		if (vn_islocked(vp) || RB_EMPTY(&vp->v_rbdirty_tree) ||
		    (waitfor & MNT_LAZY))
			continue;
		if (vget(vp, LK_EXCLUSIVE))
			goto loop;
		/* XXX vp may not be retained */
		error = VOP_FSYNC(vp, waitfor, 0);
		if (error)
			allerror = error;
		vput(vp);
	}
	return (allerror);
}
Ejemplo n.º 5
0
/*
 * Change access mode on the given vnode.
 * Caller should execute tmpfs_update on vp after a successful execution.
 * The vnode must be locked on entry and remain locked on exit.
 */
int
tmpfs_chmod(struct vnode *vp, mode_t vamode, struct ucred *cred)
{
	struct tmpfs_node *node;
	mode_t cur_mode;
	int error;

	KKASSERT(vn_islocked(vp));

	node = VP_TO_TMPFS_NODE(vp);

	/* Disallow this operation if the file system is mounted read-only. */
	if (vp->v_mount->mnt_flag & MNT_RDONLY)
		return EROFS;

	/* Immutable or append-only files cannot be modified, either. */
	if (node->tn_flags & (IMMUTABLE | APPEND))
		return EPERM;

	cur_mode = node->tn_mode;
	error = vop_helper_chmod(vp, vamode, cred, node->tn_uid, node->tn_gid,
				 &cur_mode);

	if (error == 0 &&
	    (node->tn_mode & ALLPERMS) != (cur_mode & ALLPERMS)) {
		TMPFS_NODE_LOCK(node);
		node->tn_mode &= ~ALLPERMS;
		node->tn_mode |= cur_mode & ALLPERMS;

		node->tn_status |= TMPFS_NODE_CHANGED;
		TMPFS_NODE_UNLOCK(node);
	}

	KKASSERT(vn_islocked(vp));

	return 0;
}
Ejemplo n.º 6
0
/*
 * Change ownership of the given vnode.  At least one of uid or gid must
 * be different than VNOVAL.  If one is set to that value, the attribute
 * is unchanged.
 * Caller should execute tmpfs_update on vp after a successful execution.
 * The vnode must be locked on entry and remain locked on exit.
 */
int
tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred)
{
	mode_t cur_mode;
	uid_t cur_uid;
	gid_t cur_gid;
	struct tmpfs_node *node;
	int error;

	KKASSERT(vn_islocked(vp));
	node = VP_TO_TMPFS_NODE(vp);

	/* Disallow this operation if the file system is mounted read-only. */
	if (vp->v_mount->mnt_flag & MNT_RDONLY)
		return EROFS;

	/* Immutable or append-only files cannot be modified, either. */
	if (node->tn_flags & (IMMUTABLE | APPEND))
		return EPERM;

	cur_uid = node->tn_uid;
	cur_gid = node->tn_gid;
	cur_mode = node->tn_mode;
	error = vop_helper_chown(vp, uid, gid, cred,
				 &cur_uid, &cur_gid, &cur_mode);

	if (error == 0) {
		TMPFS_NODE_LOCK(node);
		if (cur_uid != node->tn_uid ||
		    cur_gid != node->tn_gid ||
		    cur_mode != node->tn_mode) {
			node->tn_uid = cur_uid;
			node->tn_gid = cur_gid;
			node->tn_mode = cur_mode;
			node->tn_status |= TMPFS_NODE_CHANGED;
		}
		TMPFS_NODE_UNLOCK(node);
	}

	return error;
}
Ejemplo n.º 7
0
int
puffs_biowrite(struct vnode *vp, struct uio *uio, int ioflag,
    struct ucred *cred)
{
	int biosize = vp->v_mount->mnt_stat.f_iosize;
	struct buf *bp;
	struct vattr vattr;
	off_t loffset, fsize;
	int boff, bytes;
	int error = 0;
	int bcount;
	int trivial;

	KKASSERT(uio->uio_rw == UIO_WRITE);
	KKASSERT(vp->v_type == VREG);

	if (uio->uio_offset < 0)
		return EINVAL;
	if (uio->uio_resid == 0)
		return 0;

	/*
	 * If IO_APPEND then load uio_offset.  We restart here if we cannot
	 * get the append lock.
	 *
	 * We need to obtain exclusize lock if we intend to modify file size
	 * in order to guarentee the append point with multiple contending
	 * writers.
	 */
	if (ioflag & IO_APPEND) {
		/* XXXDF relock if necessary */
		KKASSERT(vn_islocked(vp) == LK_EXCLUSIVE);
		error = VOP_GETATTR(vp, &vattr);
		if (error)
			return error;
		uio->uio_offset = puffs_meta_getsize(vp);
	}

	do {
		boff = uio->uio_offset & (biosize-1);
		loffset = uio->uio_offset - boff;
		bytes = (int)szmin((unsigned)(biosize - boff), uio->uio_resid);
again:
		/*
		 * Handle direct append and file extension cases, calculate
		 * unaligned buffer size.  When extending B_CACHE will be
		 * set if possible.  See UIO_NOCOPY note below.
		 */
		fsize = puffs_meta_getsize(vp);
		if (uio->uio_offset + bytes > fsize) {
			trivial = (uio->uio_segflg != UIO_NOCOPY &&
			    uio->uio_offset <= fsize);
			puffs_meta_setsize(vp, uio->uio_offset + bytes,
			    trivial);
		}
		bp = getblk(vp, loffset, biosize, 0, 0);
		if (bp == NULL) {
			error = EINTR;
			break;
		}

		/*
		 * Actual bytes in buffer which we care about
		 */
		if (loffset + biosize < fsize)
			bcount = biosize;
		else
			bcount = (int)(fsize - loffset);

		/*
		 * Avoid a read by setting B_CACHE where the data we
		 * intend to write covers the entire buffer.  Note
		 * that the buffer may have been set to B_CACHE by
		 * puffs_meta_setsize() above or otherwise inherited the
		 * flag, but if B_CACHE isn't set the buffer may be
		 * uninitialized and must be zero'd to accomodate
		 * future seek+write's.
		 *
		 * See the comments in kern/vfs_bio.c's getblk() for
		 * more information.
		 *
		 * When doing a UIO_NOCOPY write the buffer is not
		 * overwritten and we cannot just set B_CACHE unconditionally
		 * for full-block writes.
		 */
		if (boff == 0 && bytes == biosize &&
		    uio->uio_segflg != UIO_NOCOPY) {
			bp->b_flags |= B_CACHE;
			bp->b_flags &= ~(B_ERROR | B_INVAL);
		}

		/*
		 * b_resid may be set due to file EOF if we extended out.
		 * The NFS bio code will zero the difference anyway so
		 * just acknowledged the fact and set b_resid to 0.
		 */
		if ((bp->b_flags & B_CACHE) == 0) {
			bp->b_cmd = BUF_CMD_READ;
			bp->b_bio2.bio_done = puffs_iodone;
			bp->b_bio2.bio_flags |= BIO_SYNC;
			vfs_busy_pages(vp, bp);
			error = puffs_doio(vp, &bp->b_bio2, uio->uio_td);
			if (error) {
				brelse(bp);
				break;
			}
			bp->b_resid = 0;
		}

		/*
		 * If dirtyend exceeds file size, chop it down.  This should
		 * not normally occur but there is an append race where it
		 * might occur XXX, so we log it.
		 *
		 * If the chopping creates a reverse-indexed or degenerate
		 * situation with dirtyoff/end, we 0 both of them.
		 */
		if (bp->b_dirtyend > bcount) {
			kprintf("PUFFS append race @%08llx:%d\n",
			    (long long)bp->b_bio2.bio_offset,
			    bp->b_dirtyend - bcount);
			bp->b_dirtyend = bcount;
		}

		if (bp->b_dirtyoff >= bp->b_dirtyend)
			bp->b_dirtyoff = bp->b_dirtyend = 0;

		/*
		 * If the new write will leave a contiguous dirty
		 * area, just update the b_dirtyoff and b_dirtyend,
		 * otherwise force a write rpc of the old dirty area.
		 *
		 * While it is possible to merge discontiguous writes due to
		 * our having a B_CACHE buffer ( and thus valid read data
		 * for the hole), we don't because it could lead to
		 * significant cache coherency problems with multiple clients,
		 * especially if locking is implemented later on.
		 *
		 * as an optimization we could theoretically maintain
		 * a linked list of discontinuous areas, but we would still
		 * have to commit them separately so there isn't much
		 * advantage to it except perhaps a bit of asynchronization.
		 */
		if (bp->b_dirtyend > 0 &&
		    (boff > bp->b_dirtyend ||
		    (boff + bytes) < bp->b_dirtyoff)
		   ) {
			if (bwrite(bp) == EINTR) {
				error = EINTR;
				break;
			}
			goto again;
		}

		error = uiomove(bp->b_data + boff, bytes, uio);

		/*
		 * Since this block is being modified, it must be written
		 * again and not just committed.  Since write clustering does
		 * not work for the stage 1 data write, only the stage 2
		 * commit rpc, we have to clear B_CLUSTEROK as well.
		 */
		bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);

		if (error) {
			brelse(bp);
			break;
		}

		/*
		 * Only update dirtyoff/dirtyend if not a degenerate
		 * condition.
		 *
		 * The underlying VM pages have been marked valid by
		 * virtue of acquiring the bp.  Because the entire buffer
		 * is marked dirty we do not have to worry about cleaning
		 * out the related dirty bits (and wouldn't really know
		 * how to deal with byte ranges anyway)
		 */
		if (bytes) {
			if (bp->b_dirtyend > 0) {
				bp->b_dirtyoff = imin(boff, bp->b_dirtyoff);
				bp->b_dirtyend = imax(boff + bytes,
				    bp->b_dirtyend);
			} else {
				bp->b_dirtyoff = boff;
				bp->b_dirtyend = boff + bytes;
			}
		}

		if (ioflag & IO_SYNC) {
			if (ioflag & IO_INVAL)
				bp->b_flags |= B_NOCACHE;
			error = bwrite(bp);
			if (error)
				break;
		} else {
			bdwrite(bp);
		}
	} while (uio->uio_resid > 0 && bytes > 0);

	return error;
}
Ejemplo n.º 8
0
/*
 * ffs_balloc(struct vnode *a_vp, ufs_daddr_t a_lbn, int a_size,
 *	      struct ucred *a_cred, int a_flags, struct buf *a_bpp)
 *
 * Balloc defines the structure of filesystem storage by allocating
 * the physical blocks on a device given the inode and the logical
 * block number in a file.
 *
 * NOTE: B_CLRBUF - this flag tells balloc to clear invalid portions
 *	 of the buffer.  However, any dirty bits will override missing
 *	 valid bits.  This case occurs when writable mmaps are truncated
 *	 and then extended.
 */
int
ffs_balloc(struct vop_balloc_args *ap)
{
	struct inode *ip;
	ufs_daddr_t lbn;
	int size;
	struct ucred *cred;
	int flags;
	struct fs *fs;
	ufs_daddr_t nb;
	struct buf *bp, *nbp, *dbp;
	struct vnode *vp;
	struct indir indirs[NIADDR + 2];
	ufs_daddr_t newb, *bap, pref;
	int deallocated, osize, nsize, num, i, error;
	ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
	ufs_daddr_t *lbns_remfree, lbns[NIADDR + 1];
	int unwindidx;
	int seqcount;

	vp = ap->a_vp;
	ip = VTOI(vp);
	fs = ip->i_fs;
	lbn = lblkno(fs, ap->a_startoffset);
	size = blkoff(fs, ap->a_startoffset) + ap->a_size;
	if (size > fs->fs_bsize)
		panic("ffs_balloc: blk too big");
	*ap->a_bpp = NULL;
	if (lbn < 0)
		return (EFBIG);
	cred = ap->a_cred;
	flags = ap->a_flags;

	/*
	 * The vnode must be locked for us to be able to safely mess
	 * around with the inode.
	 */
	if (vn_islocked(vp) != LK_EXCLUSIVE) {
		panic("ffs_balloc: vnode %p not exclusively locked!", vp);
	}

	/*
	 * If the next write will extend the file into a new block,
	 * and the file is currently composed of a fragment
	 * this fragment has to be extended to be a full block.
	 */
	nb = lblkno(fs, ip->i_size);
	if (nb < NDADDR && nb < lbn) {
		/*
		 * The filesize prior to this write can fit in direct
		 * blocks (ex. fragmentation is possibly done)
		 * we are now extending the file write beyond
		 * the block which has end of the file prior to this write.
		 */
		osize = blksize(fs, ip, nb);
		/*
		 * osize gives disk allocated size in the last block. It is
		 * either in fragments or a file system block size.
		 */
		if (osize < fs->fs_bsize && osize > 0) {
			/* A few fragments are already allocated, since the
			 * current extends beyond this block allocated the
			 * complete block as fragments are on in last block.
			 */
			error = ffs_realloccg(ip, nb,
				ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]),
				osize, (int)fs->fs_bsize, cred, &bp);
			if (error)
				return (error);
			if (DOINGSOFTDEP(vp))
				softdep_setup_allocdirect(ip, nb,
				    dofftofsb(fs, bp->b_bio2.bio_offset), 
				    ip->i_db[nb], fs->fs_bsize, osize, bp);
			/* adjust the inode size, we just grew */
			ip->i_size = smalllblktosize(fs, nb + 1);
			ip->i_db[nb] = dofftofsb(fs, bp->b_bio2.bio_offset);
			ip->i_flag |= IN_CHANGE | IN_UPDATE;
			if (flags & B_SYNC)
				bwrite(bp);
			else
				bawrite(bp);
			/* bp is already released here */
		}
	}
	/*
	 * The first NDADDR blocks are direct blocks
	 */
	if (lbn < NDADDR) {
		nb = ip->i_db[lbn];
		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
			error = bread(vp, lblktodoff(fs, lbn), fs->fs_bsize, &bp);
			if (error) {
				brelse(bp);
				return (error);
			}
			bp->b_bio2.bio_offset = fsbtodoff(fs, nb);
			*ap->a_bpp = bp;
			return (0);
		}
		if (nb != 0) {
			/*
			 * Consider need to reallocate a fragment.
			 */
			osize = fragroundup(fs, blkoff(fs, ip->i_size));
			nsize = fragroundup(fs, size);
			if (nsize <= osize) {
				error = bread(vp, lblktodoff(fs, lbn), 
					      osize, &bp);
				if (error) {
					brelse(bp);
					return (error);
				}
				bp->b_bio2.bio_offset = fsbtodoff(fs, nb);
			} else {
				/*
				 * NOTE: ffs_realloccg() issues a bread().
				 */
				error = ffs_realloccg(ip, lbn,
				    ffs_blkpref(ip, lbn, (int)lbn,
					&ip->i_db[0]), osize, nsize, cred, &bp);
				if (error)
					return (error);
				if (DOINGSOFTDEP(vp))
					softdep_setup_allocdirect(ip, lbn,
					    dofftofsb(fs, bp->b_bio2.bio_offset),
					    nb, nsize, osize, bp);
			}
		} else {
			if (ip->i_size < smalllblktosize(fs, lbn + 1))
				nsize = fragroundup(fs, size);
			else
				nsize = fs->fs_bsize;
			error = ffs_alloc(ip, lbn,
			    ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]),
			    nsize, cred, &newb);
			if (error)
				return (error);
			bp = getblk(vp, lblktodoff(fs, lbn), nsize, 0, 0);
			bp->b_bio2.bio_offset = fsbtodoff(fs, newb);
			if (flags & B_CLRBUF)
				vfs_bio_clrbuf(bp);
			if (DOINGSOFTDEP(vp))
				softdep_setup_allocdirect(ip, lbn, newb, 0,
				    nsize, 0, bp);
		}
		ip->i_db[lbn] = dofftofsb(fs, bp->b_bio2.bio_offset);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
		*ap->a_bpp = bp;
		return (0);
	}
	/*
	 * Determine the number of levels of indirection.
	 */
	pref = 0;
	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
		return(error);
#ifdef DIAGNOSTIC
	if (num < 1)
		panic ("ffs_balloc: ufs_bmaparray returned indirect block");
#endif
	/*
	 * Get a handle on the data block buffer before working through 
	 * indirect blocks to avoid a deadlock between the VM system holding
	 * a locked VM page and issuing a BMAP (which tries to lock the
	 * indirect blocks), and the filesystem holding a locked indirect
	 * block and then trying to read a data block (which tries to lock
	 * the underlying VM pages).
	 */
	dbp = getblk(vp, lblktodoff(fs, lbn), fs->fs_bsize, 0, 0);

	/*
	 * Setup undo history
	 */
	allocib = NULL;
	allocblk = allociblk;
	lbns_remfree = lbns;

	unwindidx = -1;

	/*
	 * Fetch the first indirect block directly from the inode, allocating
	 * one if necessary. 
	 */
	--num;
	nb = ip->i_ib[indirs[0].in_off];
	if (nb == 0) {
		pref = ffs_blkpref(ip, lbn, 0, NULL);
		/*
		 * If the filesystem has run out of space we can skip the
		 * full fsync/undo of the main [fail] case since no undo
		 * history has been built yet.  Hence the goto fail2.
		 */
	        if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
		    cred, &newb)) != 0)
			goto fail2;
		nb = newb;
		*allocblk++ = nb;
		*lbns_remfree++ = indirs[1].in_lbn;
		bp = getblk(vp, lblktodoff(fs, indirs[1].in_lbn),
			    fs->fs_bsize, 0, 0);
		bp->b_bio2.bio_offset = fsbtodoff(fs, nb);
		vfs_bio_clrbuf(bp);
		if (DOINGSOFTDEP(vp)) {
			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
			    newb, 0, fs->fs_bsize, 0, bp);
			bdwrite(bp);
		} else {
			/*
			 * Write synchronously so that indirect blocks
			 * never point at garbage.
			 */
			if (DOINGASYNC(vp))
				bdwrite(bp);
			else if ((error = bwrite(bp)) != 0)
				goto fail;
		}
		allocib = &ip->i_ib[indirs[0].in_off];
		*allocib = nb;
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}

	/*
	 * Fetch through the indirect blocks, allocating as necessary.
	 */
	for (i = 1;;) {
		error = bread(vp, lblktodoff(fs, indirs[i].in_lbn), (int)fs->fs_bsize, &bp);
		if (error) {
			brelse(bp);
			goto fail;
		}
		bap = (ufs_daddr_t *)bp->b_data;
		nb = bap[indirs[i].in_off];
		if (i == num)
			break;
		i += 1;
		if (nb != 0) {
			bqrelse(bp);
			continue;
		}
		if (pref == 0)
			pref = ffs_blkpref(ip, lbn, 0, NULL);
		if ((error =
		    ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) {
			brelse(bp);
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
		*lbns_remfree++ = indirs[i].in_lbn;
		nbp = getblk(vp, lblktodoff(fs, indirs[i].in_lbn),
			     fs->fs_bsize, 0, 0);
		nbp->b_bio2.bio_offset = fsbtodoff(fs, nb);
		vfs_bio_clrbuf(nbp);
		if (DOINGSOFTDEP(vp)) {
			softdep_setup_allocindir_meta(nbp, ip, bp,
			    indirs[i - 1].in_off, nb);
			bdwrite(nbp);
		} else {
			/*
			 * Write synchronously so that indirect blocks
			 * never point at garbage.
			 */
			if ((error = bwrite(nbp)) != 0) {
				brelse(bp);
				goto fail;
			}
		}
		bap[indirs[i - 1].in_off] = nb;
		if (allocib == NULL && unwindidx < 0)
			unwindidx = i - 1;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & B_SYNC) {
			bwrite(bp);
		} else {
			if (bp->b_bufsize == fs->fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		}
	}

	/*
	 * Get the data block, allocating if necessary.  We have already
	 * called getblk() on the data block buffer, dbp.  If we have to
	 * allocate it and B_CLRBUF has been set the inference is an intention
	 * to zero out the related disk blocks, so we do not have to issue
	 * a read.  Instead we simply call vfs_bio_clrbuf().  If B_CLRBUF is
	 * not set the caller intends to overwrite the entire contents of the
	 * buffer and we don't waste time trying to clean up the contents.
	 *
	 * bp references the current indirect block.  When allocating, 
	 * the block must be updated.
	 */
	if (nb == 0) {
		pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]);
		error = ffs_alloc(ip,
		    lbn, pref, (int)fs->fs_bsize, cred, &newb);
		if (error) {
			brelse(bp);
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
		*lbns_remfree++ = lbn;
		dbp->b_bio2.bio_offset = fsbtodoff(fs, nb);
		if (flags & B_CLRBUF)
			vfs_bio_clrbuf(dbp);
		if (DOINGSOFTDEP(vp))
			softdep_setup_allocindir_page(ip, lbn, bp,
			    indirs[i].in_off, nb, 0, dbp);
		bap[indirs[i].in_off] = nb;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & B_SYNC) {
			bwrite(bp);
		} else {
			if (bp->b_bufsize == fs->fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		}
		*ap->a_bpp = dbp;
		return (0);
	}
	brelse(bp);

	/*
	 * At this point all related indirect blocks have been allocated
	 * if necessary and released.  bp is no longer valid.  dbp holds
	 * our getblk()'d data block.
	 *
	 * XXX we previously performed a cluster_read operation here.
	 */
	if (flags & B_CLRBUF) {
		/*
		 * If B_CLRBUF is set we must validate the invalid portions
		 * of the buffer.  This typically requires a read-before-
		 * write.  The strategy call will fill in bio_offset in that
		 * case.
		 *
		 * If we hit this case we do a cluster read if possible
		 * since nearby data blocks are likely to be accessed soon
		 * too.
		 */
		if ((dbp->b_flags & B_CACHE) == 0) {
			bqrelse(dbp);
			seqcount = (flags & B_SEQMASK) >> B_SEQSHIFT;
			if (seqcount &&
			    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
				error = cluster_read(vp, (off_t)ip->i_size,
					    lblktodoff(fs, lbn),
					    (int)fs->fs_bsize, 
					    fs->fs_bsize,
					    seqcount * BKVASIZE,
					    &dbp);
			} else {
				error = bread(vp, lblktodoff(fs, lbn),
					      (int)fs->fs_bsize, &dbp);
			}
			if (error)
				goto fail;
		} else {
Ejemplo n.º 9
0
/*
 * union_lookup(struct vnode *a_dvp, struct vnode **a_vpp,
 *		struct componentname *a_cnp)
 */
static int
union_lookup(struct vop_old_lookup_args *ap)
{
	int error;
	int uerror, lerror;
	struct vnode *uppervp, *lowervp;
	struct vnode *upperdvp, *lowerdvp;
	struct vnode *dvp = ap->a_dvp;		/* starting dir */
	struct union_node *dun = VTOUNION(dvp);	/* associated union node */
	struct componentname *cnp = ap->a_cnp;
	struct thread *td = cnp->cn_td;
	int lockparent = cnp->cn_flags & CNP_LOCKPARENT;
	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
	struct ucred *saved_cred = NULL;
	int iswhiteout;
	struct vattr va;

	*ap->a_vpp = NULLVP;

	/*
	 * Disallow write attemps to the filesystem mounted read-only.
	 */
	if ((dvp->v_mount->mnt_flag & MNT_RDONLY) &&
	    (cnp->cn_nameiop == NAMEI_DELETE || cnp->cn_nameiop == NAMEI_RENAME)) {
		return (EROFS);
	}

	/*
	 * For any lookup's we do, always return with the parent locked
	 */
	cnp->cn_flags |= CNP_LOCKPARENT;

	lowerdvp = dun->un_lowervp;
	uppervp = NULLVP;
	lowervp = NULLVP;
	iswhiteout = 0;

	uerror = ENOENT;
	lerror = ENOENT;

	/*
	 * Get a private lock on uppervp and a reference, effectively 
	 * taking it out of the union_node's control.
	 *
	 * We must lock upperdvp while holding our lock on dvp
	 * to avoid a deadlock.
	 */
	upperdvp = union_lock_upper(dun, td);

	/*
	 * do the lookup in the upper level.
	 * if that level comsumes additional pathnames,
	 * then assume that something special is going
	 * on and just return that vnode.
	 */
	if (upperdvp != NULLVP) {
		/*
		 * We do not have to worry about the DOTDOT case, we've
		 * already unlocked dvp.
		 */
		UDEBUG(("A %p\n", upperdvp));

		/*
		 * Do the lookup.   We must supply a locked and referenced
		 * upperdvp to the function and will get a new locked and
		 * referenced upperdvp back with the old having been 
		 * dereferenced.
		 *
		 * If an error is returned, uppervp will be NULLVP.  If no
		 * error occurs, uppervp will be the locked and referenced
		 * return vnode or possibly NULL, depending on what is being
		 * requested.  It is possible that the returned uppervp
		 * will be the same as upperdvp.
		 */
		uerror = union_lookup1(um->um_uppervp, &upperdvp, &uppervp, cnp);
		UDEBUG((
		    "uerror %d upperdvp %p %d/%d, uppervp %p ref=%d/lck=%d\n",
		    uerror,
		    upperdvp,
		    upperdvp->v_sysref.refcnt,
		    vn_islocked(upperdvp),
		    uppervp,
		    (uppervp ? uppervp->v_sysref.refcnt : -99),
		    (uppervp ? vn_islocked(uppervp) : -99)
		));

		/*
		 * Disallow write attemps to the filesystem mounted read-only.
		 */
		if (uerror == EJUSTRETURN && 
		    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
		    (cnp->cn_nameiop == NAMEI_CREATE || cnp->cn_nameiop == NAMEI_RENAME)) {
			error = EROFS;
			goto out;
		}

		/*
		 * Special case.  If cn_consume != 0 skip out.  The result
		 * of the lookup is transfered to our return variable.  If
		 * an error occured we have to throw away the results.
		 */

		if (cnp->cn_consume != 0) {
			if ((error = uerror) == 0) {
				*ap->a_vpp = uppervp;
				uppervp = NULL;
			}
			goto out;
		}

		/*
		 * Calculate whiteout, fall through
		 */

		if (uerror == ENOENT || uerror == EJUSTRETURN) {
			if (cnp->cn_flags & CNP_ISWHITEOUT) {
				iswhiteout = 1;
			} else if (lowerdvp != NULLVP) {
				int terror;

				terror = VOP_GETATTR(upperdvp, &va);
				if (terror == 0 && (va.va_flags & OPAQUE))
					iswhiteout = 1;
			}
		}
	}

	/*
	 * in a similar way to the upper layer, do the lookup
	 * in the lower layer.   this time, if there is some
	 * component magic going on, then vput whatever we got
	 * back from the upper layer and return the lower vnode
	 * instead.
	 */

	if (lowerdvp != NULLVP && !iswhiteout) {
		int nameiop;

		UDEBUG(("B %p\n", lowerdvp));

		/*
		 * Force only LOOKUPs on the lower node, since
		 * we won't be making changes to it anyway.
		 */
		nameiop = cnp->cn_nameiop;
		cnp->cn_nameiop = NAMEI_LOOKUP;
		if (um->um_op == UNMNT_BELOW) {
			saved_cred = cnp->cn_cred;
			cnp->cn_cred = um->um_cred;
		}

		/*
		 * We shouldn't have to worry about locking interactions
		 * between the lower layer and our union layer (w.r.t.
		 * `..' processing) because we don't futz with lowervp
		 * locks in the union-node instantiation code path.
		 *
		 * union_lookup1() requires lowervp to be locked on entry,
		 * and it will be unlocked on return.  The ref count will
		 * not change.  On return lowervp doesn't represent anything
		 * to us so we NULL it out.
		 */
		vref(lowerdvp);
		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
		lerror = union_lookup1(um->um_lowervp, &lowerdvp, &lowervp, cnp);
		if (lowerdvp == lowervp)
			vrele(lowerdvp);
		else
			vput(lowerdvp);
		lowerdvp = NULL;	/* lowerdvp invalid after vput */

		if (um->um_op == UNMNT_BELOW)
			cnp->cn_cred = saved_cred;
		cnp->cn_nameiop = nameiop;

		if (cnp->cn_consume != 0 || lerror == EACCES) {
			if ((error = lerror) == 0) {
				*ap->a_vpp = lowervp;
				lowervp = NULL;
			}
			goto out;
		}
	} else {
		UDEBUG(("C %p\n", lowerdvp));
		if ((cnp->cn_flags & CNP_ISDOTDOT) && dun->un_pvp != NULLVP) {
			if ((lowervp = LOWERVP(dun->un_pvp)) != NULL) {
				vref(lowervp);
				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
				lerror = 0;
			}
		}
	}

	/*
	 * Ok.  Now we have uerror, uppervp, upperdvp, lerror, and lowervp.
	 *
	 * 1. If both layers returned an error, select the upper layer.
	 *
	 * 2. If the upper layer faile and the bottom layer succeeded,
	 *    two subcases occur:
	 *
	 *	a.  The bottom vnode is not a directory, in which case
	 *	    just return a new union vnode referencing an
	 *	    empty top layer and the existing bottom layer.
	 *
	 *	b.  The button vnode is a directory, in which case
	 *	    create a new directory in the top layer and
	 *	    and fall through to case 3.
	 *
	 * 3. If the top layer succeeded then return a new union
	 *    vnode referencing whatever the new top layer and
	 *    whatever the bottom layer returned.
	 */

	/* case 1. */
	if ((uerror != 0) && (lerror != 0)) {
		error = uerror;
		goto out;
	}

	/* case 2. */
	if (uerror != 0 /* && (lerror == 0) */ ) {
		if (lowervp->v_type == VDIR) { /* case 2b. */
			KASSERT(uppervp == NULL, ("uppervp unexpectedly non-NULL"));
			/*
			 * oops, uppervp has a problem, we may have to shadow.
			 */
			uerror = union_mkshadow(um, upperdvp, cnp, &uppervp);
			if (uerror) {
				error = uerror;
				goto out;
			}
		}
	}

	/*
	 * Must call union_allocvp with both the upper and lower vnodes
	 * referenced and the upper vnode locked.   ap->a_vpp is returned 
	 * referenced and locked.  lowervp, uppervp, and upperdvp are 
	 * absorbed by union_allocvp() whether it succeeds or fails.
	 *
	 * upperdvp is the parent directory of uppervp which may be
	 * different, depending on the path, from dvp->un_uppervp.  That's
	 * why it is a separate argument.  Note that it must be unlocked.
	 *
	 * dvp must be locked on entry to the call and will be locked on
	 * return.
	 */

	if (uppervp && uppervp != upperdvp)
		vn_unlock(uppervp);
	if (lowervp)
		vn_unlock(lowervp);
	if (upperdvp)
		vn_unlock(upperdvp);

	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
			      uppervp, lowervp, 1);

	UDEBUG(("Create %p = %p %p refs=%d\n", *ap->a_vpp, uppervp, lowervp, (*ap->a_vpp) ? ((*ap->a_vpp)->v_sysref.refcnt) : -99));

	uppervp = NULL;
	upperdvp = NULL;
	lowervp = NULL;

	/* 
	 *	Termination Code
	 *
	 *	- put away any extra junk laying around.  Note that lowervp
	 *	  (if not NULL) will never be the same as *ap->a_vp and 
	 *	  neither will uppervp, because when we set that state we 
	 *	  NULL-out lowervp or uppervp.  On the otherhand, upperdvp
	 *	  may match uppervp or *ap->a_vpp.
	 *
	 *	- relock/unlock dvp if appropriate.
	 */

out:
	if (upperdvp) {
		if (upperdvp == uppervp || upperdvp == *ap->a_vpp)
			vrele(upperdvp);
		else
			vput(upperdvp);
	}

	if (uppervp)
		vput(uppervp);

	if (lowervp)
		vput(lowervp);

	/*
	 * Restore LOCKPARENT state
	 */

	if (!lockparent)
		cnp->cn_flags &= ~CNP_LOCKPARENT;

	UDEBUG(("Out %d vpp %p/%d lower %p upper %p\n", error, *ap->a_vpp,
		((*ap->a_vpp) ? (*ap->a_vpp)->v_sysref.refcnt : -99),
		lowervp, uppervp));

	/*
	 * dvp lock state, determine whether to relock dvp.  dvp is expected
	 * to be locked on return if:
	 *
	 *	- there was an error (except not EJUSTRETURN), or
	 *	- we hit the last component and lockparent is true
	 *
	 * dvp_is_locked is the current state of the dvp lock, not counting
	 * the possibility that *ap->a_vpp == dvp (in which case it is locked
	 * anyway).  Note that *ap->a_vpp == dvp only if no error occured.
	 */

	if (*ap->a_vpp != dvp) {
		if ((error == 0 || error == EJUSTRETURN) && !lockparent) {
			vn_unlock(dvp);
		}
	}

	/*
	 * Diagnostics
	 */

#ifdef DIAGNOSTIC
	if (cnp->cn_namelen == 1 &&
	    cnp->cn_nameptr[0] == '.' &&
	    *ap->a_vpp != dvp) {
		panic("union_lookup returning . (%p) not same as startdir (%p)", ap->a_vpp, dvp);
	}
#endif

	return (error);
}
Ejemplo n.º 10
0
static int
devfs_spec_close(struct vop_close_args *ap)
{
	struct devfs_node *node;
	struct proc *p = curproc;
	struct vnode *vp = ap->a_vp;
	cdev_t dev = vp->v_rdev;
	int error = 0;
	int needrelock;

	/*
	 * We do special tests on the opencount so unfortunately we need
	 * an exclusive lock.
	 */
	vn_lock(vp, LK_UPGRADE | LK_RETRY);

	if (dev)
		devfs_debug(DEVFS_DEBUG_DEBUG,
			    "devfs_spec_close() called on %s! \n",
			    dev->si_name);
	else
		devfs_debug(DEVFS_DEBUG_DEBUG,
			    "devfs_spec_close() called, null vode!\n");

	/*
	 * A couple of hacks for devices and tty devices.  The
	 * vnode ref count cannot be used to figure out the
	 * last close, but we can use v_opencount now that
	 * revoke works properly.
	 *
	 * Detect the last close on a controlling terminal and clear
	 * the session (half-close).
	 */
	if (dev)
		reference_dev(dev);

	if (p && vp->v_opencount <= 1 && vp == p->p_session->s_ttyvp) {
		p->p_session->s_ttyvp = NULL;
		vrele(vp);
	}

	/*
	 * Vnodes can be opened and closed multiple times.  Do not really
	 * close the device unless (1) it is being closed forcibly,
	 * (2) the device wants to track closes, or (3) this is the last
	 * vnode doing its last close on the device.
	 *
	 * XXX the VXLOCK (force close) case can leave vnodes referencing
	 * a closed device.  This might not occur now that our revoke is
	 * fixed.
	 */
	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -1- \n");
	if (dev && ((vp->v_flag & VRECLAIMED) ||
	    (dev_dflags(dev) & D_TRACKCLOSE) ||
	    (vp->v_opencount == 1))) {
		/*
		 * Ugly pty magic, to make pty devices disappear again once
		 * they are closed.
		 */
		node = DEVFS_NODE(ap->a_vp);
		if (node && (node->flags & DEVFS_PTY))
			node->flags |= DEVFS_INVISIBLE;

		/*
		 * Unlock around dev_dclose(), unless the vnode is
		 * undergoing a vgone/reclaim (during umount).
		 */
		needrelock = 0;
		if ((vp->v_flag & VRECLAIMED) == 0 && vn_islocked(vp)) {
			needrelock = 1;
			vn_unlock(vp);
		}

		/*
		 * WARNING!  If the device destroys itself the devfs node
		 *	     can disappear here.
		 *
		 * WARNING!  vn_lock() will fail if the vp is in a VRECLAIM,
		 *	     which can occur during umount.
		 */
		error = dev_dclose(dev, ap->a_fflag, S_IFCHR, ap->a_fp);
		/* node is now stale */

		if (needrelock) {
			if (vn_lock(vp, LK_EXCLUSIVE |
					LK_RETRY |
					LK_FAILRECLAIM) != 0) {
				panic("devfs_spec_close: vnode %p "
				      "unexpectedly could not be relocked",
				      vp);
			}
		}
	} else {
		error = 0;
	}
	devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_close() -2- \n");

	/*
	 * Track the actual opens and closes on the vnode.  The last close
	 * disassociates the rdev.  If the rdev is already disassociated or
	 * the opencount is already 0, the vnode might have been revoked
	 * and no further opencount tracking occurs.
	 */
	if (dev)
		release_dev(dev);
	if (vp->v_opencount > 0)
		vop_stdclose(ap);
	return(error);

}
Ejemplo n.º 11
0
/*
 * Allocates a new vnode for the node node or returns a new reference to
 * an existing one if the node had already a vnode referencing it.  The
 * resulting locked vnode is returned in *vpp.
 *
 * Returns zero on success or an appropriate error code on failure.
 *
 * The caller must ensure that node cannot go away (usually by holding
 * the related directory entry).
 *
 * If dnode is non-NULL this routine avoids deadlocking against it but
 * can return EAGAIN.  Caller must try again.  The dnode lock will cycle
 * in this case, it remains locked on return in all cases.  dnode must
 * be shared-locked.
 */
int
tmpfs_alloc_vp(struct mount *mp,
	       struct tmpfs_node *dnode, struct tmpfs_node *node, int lkflag,
	       struct vnode **vpp)
{
	int error = 0;
	struct vnode *vp;

loop:
	/*
	 * Interlocked extraction from node.  This can race many things.
	 * We have to get a soft reference on the vnode while we hold
	 * the node locked, then acquire it properly and check for races.
	 */
	TMPFS_NODE_LOCK(node);
	if ((vp = node->tn_vnode) != NULL) {
		KKASSERT((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0);
		vhold(vp);
		TMPFS_NODE_UNLOCK(node);

		if (dnode) {
			/*
			 * Special-case handling to avoid deadlocking against
			 * dnode.  This case has been validated and occurs
			 * every so often during synth builds.
			 */
			if (vget(vp, (lkflag & ~LK_RETRY) |
				     LK_NOWAIT |
				     LK_EXCLUSIVE) != 0) {
				TMPFS_NODE_UNLOCK(dnode);
				if (vget(vp, (lkflag & ~LK_RETRY) |
					     LK_SLEEPFAIL |
					     LK_EXCLUSIVE) == 0) {
					vn_unlock(vp);
				}
				vdrop(vp);
				TMPFS_NODE_LOCK_SH(dnode);

				return EAGAIN;
			}
		} else {
			/*
			 * Normal path
			 */
			if (vget(vp, lkflag | LK_EXCLUSIVE) != 0) {
				vdrop(vp);
				goto loop;
			}
		}
		if (node->tn_vnode != vp) {
			vput(vp);
			vdrop(vp);
			goto loop;
		}
		vdrop(vp);
		goto out;
	}
	/* vp is NULL */

	/*
	 * This should never happen.
	 */
	if (node->tn_vpstate & TMPFS_VNODE_DOOMED) {
		TMPFS_NODE_UNLOCK(node);
		error = ENOENT;
		goto out;
	}

	/*
	 * Interlock against other calls to tmpfs_alloc_vp() trying to
	 * allocate and assign a vp to node.
	 */
	if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) {
		node->tn_vpstate |= TMPFS_VNODE_WANT;
		error = tsleep(&node->tn_vpstate, PINTERLOCKED | PCATCH,
			       "tmpfs_alloc_vp", 0);
		TMPFS_NODE_UNLOCK(node);
		if (error)
			return error;
		goto loop;
	}
	node->tn_vpstate |= TMPFS_VNODE_ALLOCATING;
	TMPFS_NODE_UNLOCK(node);

	/*
	 * Allocate a new vnode (may block).  The ALLOCATING flag should
	 * prevent a race against someone else assigning node->tn_vnode.
	 */
	error = getnewvnode(VT_TMPFS, mp, &vp, VLKTIMEOUT, LK_CANRECURSE);
	if (error != 0)
		goto unlock;

	KKASSERT(node->tn_vnode == NULL);
	KKASSERT(vp != NULL);
	vp->v_data = node;
	vp->v_type = node->tn_type;

	/* Type-specific initialization. */
	switch (node->tn_type) {
	case VBLK:
		/* FALLTHROUGH */
	case VCHR:
		/* FALLTHROUGH */
	case VSOCK:
		break;
	case VREG:
		/*
		 * VMIO is mandatory.  Tmpfs also supports KVABIO
		 * for its tmpfs_strategy().
		 */
		vsetflags(vp, VKVABIO);
		vinitvmio(vp, node->tn_size, TMPFS_BLKSIZE, -1);
		break;
	case VLNK:
		break;
	case VFIFO:
		vp->v_ops = &mp->mnt_vn_fifo_ops;
		break;
	case VDIR:
		break;

	default:
		panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type);
	}


unlock:
	TMPFS_NODE_LOCK(node);

	KKASSERT(node->tn_vpstate & TMPFS_VNODE_ALLOCATING);
	node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING;
	node->tn_vnode = vp;

	if (node->tn_vpstate & TMPFS_VNODE_WANT) {
		node->tn_vpstate &= ~TMPFS_VNODE_WANT;
		TMPFS_NODE_UNLOCK(node);
		wakeup(&node->tn_vpstate);
	} else {
		TMPFS_NODE_UNLOCK(node);
	}

out:
	*vpp = vp;
	KKASSERT(IFF(error == 0, *vpp != NULL && vn_islocked(*vpp)));

	return error;
}