Beispiel #1
0
/*
 * Write a directory entry after a call to namei, using the parameters
 * that it left in nameidata. The argument dirp is the new directory
 * entry contents. Dvp is a pointer to the directory to be written,
 * which was left locked by namei. Remaining parameters (dp->i_offset,
 * dp->i_count) indicate how the space for the new entry is to be obtained.
 * Non-null bp indicates that a directory is being created (for the
 * soft dependency code).
 */
int
ufs_direnter(struct vnode *dvp, struct vnode *tvp, struct direct *dirp,
    struct componentname *cnp, struct buf *newdirbp)
{
  	struct ucred *cr;
  	struct proc *p;
  	int newentrysize;
  	struct inode *dp;
  	struct buf *bp;
  	u_int dsize;
  	struct direct *ep, *nep;
	int error, ret, blkoff, loc, spacefree, flags;
  	char *dirbuf;

 	error = 0;
 	cr = cnp->cn_cred;
 	p = cnp->cn_proc;
  	dp = VTOI(dvp);
  	newentrysize = DIRSIZ(FSFMT(dvp), dirp);

	if (dp->i_count == 0) {
		/*
		 * If dp->i_count is 0, then namei could find no
		 * space in the directory. Here, dp->i_offset will
		 * be on a directory block boundary and we will write the
  		 * new entry into a fresh block.
  		 */
  		if (dp->i_offset & (DIRBLKSIZ - 1))
			panic("ufs_direnter: newblk");
		flags = B_CLRBUF;
		if (!DOINGSOFTDEP(dvp))
			flags |= B_SYNC;
		if ((error = UFS_BUF_ALLOC(dp, (off_t)dp->i_offset, DIRBLKSIZ,
		    cr, flags, &bp)) != 0) {
			if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
				bdwrite(newdirbp);
			return (error);
		}
		DIP_ASSIGN(dp, size, dp->i_offset + DIRBLKSIZ);
		dp->i_flag |= IN_CHANGE | IN_UPDATE;
		uvm_vnp_setsize(dvp, DIP(dp, size));
  		dirp->d_reclen = DIRBLKSIZ;
		blkoff = dp->i_offset &
		    (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1);
		bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize);

#ifdef UFS_DIRHASH
		if (dp->i_dirhash != NULL) {
			ufsdirhash_newblk(dp, dp->i_offset);
			ufsdirhash_add(dp, dirp, dp->i_offset);
			ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff,
			dp->i_offset);
		}
#endif

		if (DOINGSOFTDEP(dvp)) {
			/*
			 * Ensure that the entire newly allocated block is a
			 * valid directory so that future growth within the
			 * block does not have to ensure that the block is
			 * written before the inode.
			 */
			blkoff += DIRBLKSIZ;
			while (blkoff < bp->b_bcount) {
				((struct direct *)
				   (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
				blkoff += DIRBLKSIZ;
			}
			if (softdep_setup_directory_add(bp, dp, dp->i_offset,
			    dirp->d_ino, newdirbp, 1) == 0) {
				bdwrite(bp);
				return (UFS_UPDATE(dp, 0));
			}
			/* We have just allocated a directory block in an
			 * indirect block. Rather than tracking when it gets
			 * claimed by the inode, we simply do a VOP_FSYNC
			 * now to ensure that it is there (in case the user
			 * does a future fsync). Note that we have to unlock
			 * the inode for the entry that we just entered, as
			 * the VOP_FSYNC may need to lock other inodes which
			 * can lead to deadlock if we also hold a lock on
			 * the newly entered node.
			 */
			if ((error = VOP_BWRITE(bp)))
				return (error);
			if (tvp != NULL)
				VOP_UNLOCK(tvp, 0, p);
			error = VOP_FSYNC(dvp, p->p_ucred, MNT_WAIT, p);
			if (tvp != NULL)
				vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p);
			return (error);
		}
		error = VOP_BWRITE(bp);
 		ret = UFS_UPDATE(dp, !DOINGSOFTDEP(dvp));
 		if (error == 0)
 			return (ret);
  		return (error);
  	}
  
  	/*
	 * If dp->i_count is non-zero, then namei found space for the new
	 * entry in the range dp->i_offset to dp->i_offset + dp->i_count
	 * in the directory. To use this space, we may have to compact
	 * the entries located there, by copying them together towards the
	 * beginning of the block, leaving the free space in one usable
	 * chunk at the end.
  	 */
  
  	/*
	 * Increase size of directory if entry eats into new space.
	 * This should never push the size past a new multiple of
	 * DIRBLKSIZE.
	 *
	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
	 */
	if (dp->i_offset + dp->i_count > DIP(dp, size))
		DIP_ASSIGN(dp, size, dp->i_offset + dp->i_count);
	/*
	 * Get the block containing the space for the new directory entry.
	 */
 	if ((error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, &dirbuf, &bp)) 
	    != 0) {
 		if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
 			bdwrite(newdirbp);
  		return (error);
 	}
	/*
	 * Find space for the new entry. In the simple case, the entry at
	 * offset base will have the space. If it does not, then namei
	 * arranged that compacting the region dp->i_offset to
	 * dp->i_offset + dp->i_count would yield the space.
	 */
	ep = (struct direct *)dirbuf;
	dsize = ep->d_ino ? DIRSIZ(FSFMT(dvp), ep) : 0;
	spacefree = ep->d_reclen - dsize;
	for (loc = ep->d_reclen; loc < dp->i_count; ) {
		nep = (struct direct *)(dirbuf + loc);

		/* Trim the existing slot (NB: dsize may be zero). */
		ep->d_reclen = dsize;
		ep = (struct direct *)((char *)ep + dsize);

		/* Read nep->d_reclen now as the bcopy() may clobber it. */
		loc += nep->d_reclen;
		if (nep->d_ino == 0) {
			/*
			 * A mid-block unused entry. Such entries are
			 * never created by the kernel, but fsck_ffs
			 * can create them (and it doesn't fix them).
			 *
			 * Add up the free space, and initialise the
			 * relocated entry since we don't bcopy it.
			 */
			spacefree += nep->d_reclen;
			ep->d_ino = 0;
			dsize = 0;
			continue;
		}
		dsize = DIRSIZ(FSFMT(dvp), nep);
		spacefree += nep->d_reclen - dsize;
#ifdef UFS_DIRHASH
		if (dp->i_dirhash != NULL)
			ufsdirhash_move(dp, nep,
			    dp->i_offset + ((char *)nep - dirbuf),
			    dp->i_offset + ((char *)ep - dirbuf));
#endif
 		if (DOINGSOFTDEP(dvp))
 			softdep_change_directoryentry_offset(dp, dirbuf,
 			    (caddr_t)nep, (caddr_t)ep, dsize); 
 		else
 			bcopy((caddr_t)nep, (caddr_t)ep, dsize);
	}
	/*
	 * Here, `ep' points to a directory entry containing `dsize' in-use
	 * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0,
	 * then the entry is completely unused (dsize == 0). The value
	 * of ep->d_reclen is always indeterminate.
	 *
	 * Update the pointer fields in the previous entry (if any),
	 * copy in the new entry, and write out the block.
	 */
	if (ep->d_ino == 0) {
		if (spacefree + dsize < newentrysize)
			panic("ufs_direnter: compact1");
		dirp->d_reclen = spacefree + dsize;
	} else {
		if (spacefree < newentrysize)
			panic("ufs_direnter: compact2");
		dirp->d_reclen = spacefree;
		ep->d_reclen = dsize;
		ep = (struct direct *)((char *)ep + dsize);
	}

#ifdef UFS_DIRHASH
	if (dp->i_dirhash != NULL && (ep->d_ino == 0 ||
	    dirp->d_reclen == spacefree))
		ufsdirhash_add(dp, dirp, dp->i_offset + ((char *)ep - dirbuf));
#endif
	bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize);
#ifdef UFS_DIRHASH
	if (dp->i_dirhash != NULL)
		ufsdirhash_checkblock(dp, dirbuf -
		    (dp->i_offset & (DIRBLKSIZ - 1)),
		    dp->i_offset & ~(DIRBLKSIZ - 1));
#endif

  	if (DOINGSOFTDEP(dvp)) {
  		(void)softdep_setup_directory_add(bp, dp,
  		    dp->i_offset + (caddr_t)ep - dirbuf,
		    dirp->d_ino, newdirbp, 0);
  		bdwrite(bp);
  	} else {
  		error = VOP_BWRITE(bp);
  	}
	dp->i_flag |= IN_CHANGE | IN_UPDATE;

 	/*
 	 * If all went well, and the directory can be shortened, proceed
 	 * with the truncation. Note that we have to unlock the inode for
 	 * the entry that we just entered, as the truncation may need to
 	 * lock other inodes which can lead to deadlock if we also hold a
 	 * lock on the newly entered node.
 	 */

	if (error == 0 && dp->i_endoff && dp->i_endoff < DIP(dp, size)) {
		if (tvp != NULL)
			VOP_UNLOCK(tvp, 0, p);
#ifdef UFS_DIRHASH
		if (dp->i_dirhash != NULL)
			ufsdirhash_dirtrunc(dp, dp->i_endoff);
#endif


		error = UFS_TRUNCATE(dp, (off_t)dp->i_endoff,
		    IO_NORMAL | IO_SYNC, cr);

		if (tvp != NULL)
			vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p);
	}
	return (error);
}
Beispiel #2
0
/*
 * Main code to turn off disk quotas for a filesystem. Does not change
 * flags.
 */
static int
quotaoff1(struct thread *td, struct mount *mp, int type)
{
	struct vnode *vp;
	struct vnode *qvp, *mvp;
	struct ufsmount *ump;
	struct dquot *dq;
	struct inode *ip;
	struct ucred *cr;
	int error;

	ump = VFSTOUFS(mp);

	UFS_LOCK(ump);
	KASSERT((ump->um_qflags[type] & QTF_CLOSING) != 0,
		("quotaoff1: flags are invalid"));
	if ((qvp = ump->um_quotas[type]) == NULLVP) {
		UFS_UNLOCK(ump);
		return (0);
	}
	cr = ump->um_cred[type];
	UFS_UNLOCK(ump);

	/*
	 * Search vnodes associated with this mount point,
	 * deleting any references to quota file being closed.
	 */
again:
	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
		if (vp->v_type == VNON) {
			VI_UNLOCK(vp);
			continue;
		}
		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
			goto again;
		}
		ip = VTOI(vp);
		dq = ip->i_dquot[type];
		ip->i_dquot[type] = NODQUOT;
		dqrele(vp, dq);
		VOP_UNLOCK(vp, 0);
		vrele(vp);
	}

	error = dqflush(qvp);
	if (error != 0)
		return (error);

	/*
	 * Clear um_quotas before closing the quota vnode to prevent
	 * access to the closed vnode from dqget/dqsync
	 */
	UFS_LOCK(ump);
	ump->um_quotas[type] = NULLVP;
	ump->um_cred[type] = NOCRED;
	UFS_UNLOCK(ump);

	vn_lock(qvp, LK_EXCLUSIVE | LK_RETRY);
	qvp->v_vflag &= ~VV_SYSTEM;
	VOP_UNLOCK(qvp, 0);
	error = vn_close(qvp, FREAD|FWRITE, td->td_ucred, td);
	crfree(cr);

	return (error);
}
Beispiel #3
0
/*
 * Go through the disk queues to initiate sandbagged IO;
 * go through the inodes to write those that have been modified;
 * initiate the writing of the super block if it has been modified.
 *
 * Note: we are always called with the filesystem marked `MPBUSY'.
 */
static int
ext2_sync(struct mount *mp, int waitfor)
{
	struct vnode *mvp, *vp;
	struct thread *td;
	struct inode *ip;
	struct ext2mount *ump = VFSTOEXT2(mp);
	struct m_ext2fs *fs;
	int error, allerror = 0;

	td = curthread;
	fs = ump->um_e2fs;
	if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) {		/* XXX */
		printf("fs = %s\n", fs->e2fs_fsmnt);
		panic("ext2_sync: rofs mod");
	}

	/*
	 * Write back each (modified) inode.
	 */
loop:
	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
		if (vp->v_type == VNON) {
			VI_UNLOCK(vp);
			continue;
		}
		ip = VTOI(vp);
		if ((ip->i_flag &
		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
		    (vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
		    waitfor == MNT_LAZY)) {
			VI_UNLOCK(vp);
			continue;
		}
		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td);
		if (error) {
			if (error == ENOENT) {
				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
				goto loop;
			}
			continue;
		}
		if ((error = VOP_FSYNC(vp, waitfor, td)) != 0)
			allerror = error;
		VOP_UNLOCK(vp, 0);
		vrele(vp);
	}

	/*
	 * Force stale file system control information to be flushed.
	 */
	if (waitfor != MNT_LAZY) {
		vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
		if ((error = VOP_FSYNC(ump->um_devvp, waitfor, td)) != 0)
			allerror = error;
		VOP_UNLOCK(ump->um_devvp, 0);
	}

	/*
	 * Write back modified superblock.
	 */
	if (fs->e2fs_fmod != 0) {
		fs->e2fs_fmod = 0;
		fs->e2fs->e2fs_wtime = time_second;
		if ((error = ext2_cgupdate(ump, waitfor)) != 0)
			allerror = error;
	}
	return (allerror);
}
Beispiel #4
0
/*
 * this function handles traditional block mapping
 */
static int
ext2_ind_read(struct vop_read_args *ap)
{
	struct vnode *vp;
	struct inode *ip;
	struct uio *uio;
	FS *fs;
	struct buf *bp;
	daddr_t lbn, nextlbn;
	off_t bytesinfile;
	long size, xfersize, blkoffset;
	int error, orig_resid, seqcount;
	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
	u_short mode;

	vp = ap->a_vp;
	ip = VTOI(vp);
	mode = ip->i_mode;
	uio = ap->a_uio;

#ifdef DIAGNOSTIC
	if (uio->uio_rw != UIO_READ)
		panic("%s: mode", READ_S);

	if (vp->v_type == VLNK) {
		if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
			panic("%s: short symlink", READ_S);
	} else if (vp->v_type != VREG && vp->v_type != VDIR)
		panic("%s: type %d", READ_S, vp->v_type);
#endif
	orig_resid = uio->uio_resid;
	KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0"));
	if (orig_resid == 0)
		return (0);
	KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0"));
	fs = ip->I_FS;
	if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->e2fs_maxfilesize)
		return (EOVERFLOW);
	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
			break;
		lbn = lblkno(fs, uio->uio_offset);
		nextlbn = lbn + 1;
		size = BLKSIZE(fs, ip, lbn);
		blkoffset = blkoff(fs, uio->uio_offset);

		xfersize = fs->e2fs_fsize - blkoffset;
		if (uio->uio_resid < xfersize)
			xfersize = uio->uio_resid;
		if (bytesinfile < xfersize)
			xfersize = bytesinfile;

		if (lblktosize(fs, nextlbn) >= ip->i_size)
			error = bread(vp, lbn, size, NOCRED, &bp);
		else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0)
		error = cluster_read(vp, ip->i_size, lbn, size,
  			NOCRED, blkoffset + uio->uio_resid, seqcount, &bp);
		else if (seqcount > 1) {
			int nextsize = BLKSIZE(fs, ip, nextlbn);
			error = breadn(vp, lbn,
			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
		} else
			error = bread(vp, lbn, size, NOCRED, &bp);
		if (error) {
			brelse(bp);
			bp = NULL;
			break;
		}

		/*
		 * We should only get non-zero b_resid when an I/O error
		 * has occurred, which should cause us to break above.
		 * However, if the short read did not cause an error,
		 * then we want to ensure that we do not uiomove bad
		 * or uninitialized data.
		 */
		size -= bp->b_resid;
		if (size < xfersize) {
			if (size == 0)
				break;
			xfersize = size;
		}
		error = uiomove((char *)bp->b_data + blkoffset,
  			(int)xfersize, uio);
		if (error)
			break;

		bqrelse(bp);
	}
	if (bp != NULL)
		bqrelse(bp);
	if ((error == 0 || uio->uio_resid != orig_resid) &&
	    (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
		ip->i_flag |= IN_ACCESS;
	return (error);
}
Beispiel #5
0
/*
 * Last reference to an inode.  If necessary, write or delete it.
 */
int
ufs_inactive(void *v)
{
	struct vop_inactive_args *ap = v;
	struct vnode *vp = ap->a_vp;
	struct inode *ip = VTOI(vp);
	struct fs *fs = ip->i_fs;
	struct proc *p = curproc;
	mode_t mode;
	int error = 0, logged = 0, truncate_error = 0;
#ifdef DIAGNOSTIC
	extern int prtactive;

	if (prtactive && vp->v_usecount != 0)
		vprint("ufs_inactive: pushing active", vp);
#endif

	UFS_WAPBL_JUNLOCK_ASSERT(vp->v_mount);

	/*
	 * Ignore inodes related to stale file handles.
	 */
	if (ip->i_din1 == NULL || DIP(ip, mode) == 0)
		goto out;

	if (DIP(ip, nlink) <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
		error = UFS_WAPBL_BEGIN(vp->v_mount);
		if (error)
			goto out;
		logged = 1;
		if (getinoquota(ip) == 0)
			(void)ufs_quota_free_inode(ip, NOCRED);
		if (DIP(ip, size) != 0 && vp->v_mount->mnt_wapbl) {
			/*
			 * When journaling, only truncate one indirect block at
			 * a time.
			 */
			uint64_t incr = MNINDIR(ip->i_ump) << fs->fs_bshift;
			uint64_t base = NDADDR << fs->fs_bshift;
			while (!error && DIP(ip, size) > base + incr) {
				/*
				 * round down to next full indirect block
				 * boundary.
				 */
				uint64_t nsize = base +
				    ((DIP(ip, size) - base - 1) &
				    ~(incr - 1));
				error = UFS_TRUNCATE(ip, nsize, 0, NOCRED);
				if (error)
					break;
				UFS_WAPBL_END(vp->v_mount);
				error = UFS_WAPBL_BEGIN(vp->v_mount);
				if (error)
					goto out;
			}
		}

		if (error == 0) {
			truncate_error = UFS_TRUNCATE(ip, (off_t)0, 0, NOCRED);
			/* XXX pedro: remove me */
			if (truncate_error)
				printf("UFS_TRUNCATE()=%d\n", truncate_error);
		}

		DIP_ASSIGN(ip, rdev, 0);
		mode = DIP(ip, mode);
		DIP_ASSIGN(ip, mode, 0);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;

		/*
		 * Setting the mode to zero needs to wait for the inode to be
		 * written just as does a change to the link count. So, rather
		 * than creating a new entry point to do the same thing, we
		 * just use softdep_change_linkcnt(). Also, we can't let
		 * softdep co-opt us to help on its worklist, as we may end up
		 * trying to recycle vnodes and getting to this same point a
		 * couple of times, blowing the kernel stack. However, this
		 * could be optimized by checking if we are coming from
		 * vrele(), vput() or vclean() (by checking for VXLOCK) and
		 * just avoiding the co-opt to happen in the last case.
		 */
		if (DOINGSOFTDEP(vp))
			softdep_change_linkcnt(ip, 1);

		UFS_INODE_FREE(ip, ip->i_number, mode);
	}

	if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) {
		if (!logged++) {
			int err;
			err = UFS_WAPBL_BEGIN(vp->v_mount);
			if (err) {
				error = err;
				goto out;
			}
		}
		UFS_UPDATE(ip, 0);
	}
	if (logged)
		UFS_WAPBL_END(vp->v_mount);
out:
	VOP_UNLOCK(vp, 0);

	/*
	 * If we are done with the inode, reclaim it
	 * so that it can be reused immediately.
	 */
	if (error == 0 && truncate_error == 0 &&
	    (ip->i_din1 == NULL || DIP(ip, mode) == 0))
		vrecycle(vp, p);

	return (truncate_error ? truncate_error : error);
}
Beispiel #6
0
/*
 * Convert a component of a pathname into a pointer to a locked inode.
 * This is a very central and rather complicated routine.
 * If the file system is not maintained in a strict tree hierarchy,
 * this can result in a deadlock situation (see comments in code below).
 *
 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
 * on whether the name is to be looked up, created, renamed, or deleted.
 * When CREATE, RENAME, or DELETE is specified, information usable in
 * creating, renaming, or deleting a directory entry may be calculated.
 * If flag has LOCKPARENT or'ed into it and the target of the pathname
 * exists, lookup returns both the target and its parent directory locked.
 * When creating or renaming and LOCKPARENT is specified, the target may
 * not be ".".  When deleting and LOCKPARENT is specified, the target may
 * be "."., but the caller must check to ensure it does an vrele and vput
 * instead of two vputs.
 *
 * Overall outline of ufs_lookup:
 *
 *	search for name in directory, to found or notfound
 * notfound:
 *	if creating, return locked directory, leaving info on available slots
 *	else return error
 * found:
 *	if at end of path and deleting, return information to allow delete
 *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
 *	  inode and return info to allow rewrite
 *	if not at end, add name to cache; if at end and neither creating
 *	  nor deleting, add name to cache
 *
 * ext2_lookup(struct vnode *a_dvp, struct vnode **a_vpp,
 *	       struct componentname *a_cnp)
 */
int
ext2_lookup(struct vop_old_lookup_args *ap)
{
	struct vnode *vdp;	/* vnode for directory being searched */
	struct inode *dp;	/* inode for directory being searched */
	struct buf *bp;			/* a buffer of directory entries */
	struct ext2_dir_entry_2 *ep; /* the current directory entry */
	int entryoffsetinblock;		/* offset of ep in bp's buffer */
	enum {NONE, COMPACT, FOUND} slotstatus;
	doff_t slotoffset;		/* offset of area with free space */
	int slotsize;			/* size of area at slotoffset */
	int slotfreespace;		/* amount of space free in slot */
	int slotneeded;			/* size of the entry we're seeking */
	int numdirpasses;		/* strategy for directory search */
	doff_t endsearch;		/* offset to end directory search */
	doff_t prevoff;			/* prev entry dp->i_offset */
	struct vnode *pdp;		/* saved dp during symlink work */
	struct vnode *tdp;		/* returned by VFS_VGET */
	doff_t enduseful;		/* pointer past last used dir slot */
	u_long bmask;			/* block offset mask */
	int lockparent;			/* 1 => lockparent flag is set */
	int wantparent;			/* 1 => wantparent or lockparent flag */
	int namlen, error;
	struct vnode **vpp = ap->a_vpp;
	struct componentname *cnp = ap->a_cnp;
	struct ucred *cred = cnp->cn_cred;
	int flags = cnp->cn_flags;
	int nameiop = cnp->cn_nameiop;

	int	DIRBLKSIZ = VTOI(ap->a_dvp)->i_e2fs->s_blocksize;

	bp = NULL;
	slotoffset = -1;
	*vpp = NULL;
	vdp = ap->a_dvp;
	dp = VTOI(vdp);
	lockparent = flags & CNP_LOCKPARENT;
	wantparent = flags & (CNP_LOCKPARENT|CNP_WANTPARENT);

	/*
	 * We now have a segment name to search for, and a directory to search.
	 */

	/*
	 * Suppress search for slots unless creating
	 * file and at end of pathname, in which case
	 * we watch for a place to put the new file in
	 * case it doesn't already exist.
	 */
	slotstatus = FOUND;
	slotfreespace = slotsize = slotneeded = 0;
	if (nameiop == NAMEI_CREATE || nameiop == NAMEI_RENAME) {
		slotstatus = NONE;
		slotneeded = EXT2_DIR_REC_LEN(cnp->cn_namelen);
		/* was
		slotneeded = (sizeof(struct direct) - MAXNAMLEN +
			cnp->cn_namelen + 3) &~ 3; */
	}

	/*
	 * If there is cached information on a previous search of
	 * this directory, pick up where we last left off.
	 * We cache only lookups as these are the most common
	 * and have the greatest payoff. Caching CREATE has little
	 * benefit as it usually must search the entire directory
	 * to determine that the entry does not exist. Caching the
	 * location of the last DELETE or RENAME has not reduced
	 * profiling time and hence has been removed in the interest
	 * of simplicity.
	 */
	bmask = VFSTOEXT2(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
	if (nameiop != NAMEI_LOOKUP || dp->i_diroff == 0 ||
	    dp->i_diroff > dp->i_size) {
		entryoffsetinblock = 0;
		dp->i_offset = 0;
		numdirpasses = 1;
	} else {
		dp->i_offset = dp->i_diroff;
		if ((entryoffsetinblock = dp->i_offset & bmask) &&
		    (error = EXT2_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp)))
			return (error);
		numdirpasses = 2;
	}
	prevoff = dp->i_offset;
	endsearch = roundup(dp->i_size, DIRBLKSIZ);
	enduseful = 0;

searchloop:
	while (dp->i_offset < endsearch) {
		/*
		 * If necessary, get the next directory block.
		 */
		if ((dp->i_offset & bmask) == 0) {
			if (bp != NULL)
				brelse(bp);
			if ((error =
			    EXT2_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp)) != 0)
				return (error);
			entryoffsetinblock = 0;
		}
		/*
		 * If still looking for a slot, and at a DIRBLKSIZE
		 * boundary, have to start looking for free space again.
		 */
		if (slotstatus == NONE &&
		    (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
			slotoffset = -1;
			slotfreespace = 0;
		}
		/*
		 * Get pointer to next entry.
		 * Full validation checks are slow, so we only check
		 * enough to insure forward progress through the
		 * directory. Complete checks can be run by patching
		 * "dirchk" to be true.
		 */
		ep = (struct ext2_dir_entry_2 *)
			((char *)bp->b_data + entryoffsetinblock);
		if (ep->rec_len == 0 ||
		    (dirchk && ext2_dirbadentry(vdp, ep, entryoffsetinblock))) {
			int i;
			ext2_dirbad(dp, dp->i_offset, "mangled entry");
			i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
			dp->i_offset += i;
			entryoffsetinblock += i;
			continue;
		}

		/*
		 * If an appropriate sized slot has not yet been found,
		 * check to see if one is available. Also accumulate space
		 * in the current block so that we can determine if
		 * compaction is viable.
		 */
		if (slotstatus != FOUND) {
			int size = ep->rec_len;

			if (ep->inode != 0)
				size -= EXT2_DIR_REC_LEN(ep->name_len);
			if (size > 0) {
				if (size >= slotneeded) {
					slotstatus = FOUND;
					slotoffset = dp->i_offset;
					slotsize = ep->rec_len;
				} else if (slotstatus == NONE) {
					slotfreespace += size;
					if (slotoffset == -1)
						slotoffset = dp->i_offset;
					if (slotfreespace >= slotneeded) {
						slotstatus = COMPACT;
						slotsize = dp->i_offset +
						      ep->rec_len - slotoffset;
					}
				}
			}
		}

		/*
		 * Check for a name match.
		 */
		if (ep->inode) {
			namlen = ep->name_len;
			if (namlen == cnp->cn_namelen &&
			    !bcmp(cnp->cn_nameptr, ep->name,
				(unsigned)namlen)) {
				/*
				 * Save directory entry's inode number and
				 * reclen in ndp->ni_ufs area, and release
				 * directory buffer.
				 */
				dp->i_ino = ep->inode;
				dp->i_reclen = ep->rec_len;
				goto found;
			}
		}
		prevoff = dp->i_offset;
		dp->i_offset += ep->rec_len;
		entryoffsetinblock += ep->rec_len;
		if (ep->inode)
			enduseful = dp->i_offset;
	}
/* notfound: */
	/*
	 * If we started in the middle of the directory and failed
	 * to find our target, we must check the beginning as well.
	 */
	if (numdirpasses == 2) {
		numdirpasses--;
		dp->i_offset = 0;
		endsearch = dp->i_diroff;
		goto searchloop;
	}
	if (bp != NULL)
		brelse(bp);
	/*
	 * If creating, and at end of pathname and current
	 * directory has not been removed, then can consider
	 * allowing file to be created.
	 */
	if ((nameiop == NAMEI_CREATE || nameiop == NAMEI_RENAME) &&
	    dp->i_nlink != 0) {
		/*
		 * Access for write is interpreted as allowing
		 * creation of files in the directory.
		 */
		if ((error = VOP_EACCESS(vdp, VWRITE, cred)) != 0)
			return (error);
		/*
		 * Return an indication of where the new directory
		 * entry should be put.  If we didn't find a slot,
		 * then set dp->i_count to 0 indicating
		 * that the new slot belongs at the end of the
		 * directory. If we found a slot, then the new entry
		 * can be put in the range from dp->i_offset to
		 * dp->i_offset + dp->i_count.
		 */
		if (slotstatus == NONE) {
			dp->i_offset = roundup(dp->i_size, DIRBLKSIZ);
			dp->i_count = 0;
			enduseful = dp->i_offset;
		} else {
			dp->i_offset = slotoffset;
			dp->i_count = slotsize;
			if (enduseful < slotoffset + slotsize)
				enduseful = slotoffset + slotsize;
		}
		dp->i_endoff = roundup(enduseful, DIRBLKSIZ);
		dp->i_flag |= IN_CHANGE | IN_UPDATE;
		/*
		 * We return with the directory locked, so that
		 * the parameters we set up above will still be
		 * valid if we actually decide to do a direnter().
		 * We return ni_vp == NULL to indicate that the entry
		 * does not currently exist; we leave a pointer to
		 * the (locked) directory inode in ndp->ni_dvp.
		 * The pathname buffer is saved so that the name
		 * can be obtained later.
		 *
		 * NB - if the directory is unlocked, then this
		 * information cannot be used.
		 */
		if (!lockparent)
			vn_unlock(vdp);
		return (EJUSTRETURN);
	}
	return (ENOENT);

found:
	/*
	 * Check that directory length properly reflects presence
	 * of this entry.
	 */
	if (entryoffsetinblock + EXT2_DIR_REC_LEN(ep->name_len)
		> dp->i_size) {
		ext2_dirbad(dp, dp->i_offset, "i_size too small");
		dp->i_size = entryoffsetinblock+EXT2_DIR_REC_LEN(ep->name_len);
		dp->i_flag |= IN_CHANGE | IN_UPDATE;
	}
	brelse(bp);

	/*
	 * Found component in pathname.
	 * If the final component of path name, save information
	 * in the cache as to where the entry was found.
	 */
	if (nameiop == NAMEI_LOOKUP)
		dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1);

	/*
	 * If deleting, and at end of pathname, return
	 * parameters which can be used to remove file.
	 * If the wantparent flag isn't set, we return only
	 * the directory (in ndp->ni_dvp), otherwise we go
	 * on and lock the inode, being careful with ".".
	 */
	if (nameiop == NAMEI_DELETE) {
		/*
		 * Write access to directory required to delete files.
		 */
		if ((error = VOP_EACCESS(vdp, VWRITE, cred)) != 0)
			return (error);
		/*
		 * Return pointer to current entry in dp->i_offset,
		 * and distance past previous entry (if there
		 * is a previous entry in this block) in dp->i_count.
		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
		 */
		if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
			dp->i_count = 0;
		else
			dp->i_count = dp->i_offset - prevoff;
		if (dp->i_number == dp->i_ino) {
			vref(vdp);
			*vpp = vdp;
			return (0);
		}
		if ((error = VFS_VGET(vdp->v_mount, NULL, dp->i_ino, &tdp)) != 0)
			return (error);
		/*
		 * If directory is "sticky", then user must own
		 * the directory, or the file in it, else she
		 * may not delete it (unless she's root). This
		 * implements append-only directories.
		 */
		if ((dp->i_mode & ISVTX) &&
		    cred->cr_uid != 0 &&
		    cred->cr_uid != dp->i_uid &&
		    VTOI(tdp)->i_uid != cred->cr_uid) {
			vput(tdp);
			return (EPERM);
		}
		*vpp = tdp;
		if (!lockparent)
			vn_unlock(vdp);
		return (0);
	}

	/*
	 * If rewriting (RENAME), return the inode and the
	 * information required to rewrite the present directory
	 * Must get inode of directory entry to verify it's a
	 * regular file, or empty directory.
	 */
	if (nameiop == NAMEI_RENAME && wantparent) {
		if ((error = VOP_EACCESS(vdp, VWRITE, cred)) != 0)
			return (error);
		/*
		 * Careful about locking second inode.
		 * This can only occur if the target is ".".
		 */
		if (dp->i_number == dp->i_ino)
			return (EISDIR);
		if ((error = VFS_VGET(vdp->v_mount, NULL, dp->i_ino, &tdp)) != 0)
			return (error);
		*vpp = tdp;
		if (!lockparent)
			vn_unlock(vdp);
		return (0);
	}

	/*
	 * Step through the translation in the name.  We do not `vput' the
	 * directory because we may need it again if a symbolic link
	 * is relative to the current directory.  Instead we save it
	 * unlocked as "pdp".  We must get the target inode before unlocking
	 * the directory to insure that the inode will not be removed
	 * before we get it.  We prevent deadlock by always fetching
	 * inodes from the root, moving down the directory tree. Thus
	 * when following backward pointers ".." we must unlock the
	 * parent directory before getting the requested directory.
	 * There is a potential race condition here if both the current
	 * and parent directories are removed before the VFS_VGET for the
	 * inode associated with ".." returns.  We hope that this occurs
	 * infrequently since we cannot avoid this race condition without
	 * implementing a sophisticated deadlock detection algorithm.
	 * Note also that this simple deadlock detection scheme will not
	 * work if the file system has any hard links other than ".."
	 * that point backwards in the directory structure.
	 */
	pdp = vdp;
	if (flags & CNP_ISDOTDOT) {
		vn_unlock(pdp);	/* race to get the inode */
		if ((error = VFS_VGET(vdp->v_mount, NULL, dp->i_ino, &tdp)) != 0) {
			vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY);
			return (error);
		}
		if (lockparent && (error = vn_lock(pdp, LK_EXCLUSIVE))) {
			vput(tdp);
			return (error);
		}
		*vpp = tdp;
	} else if (dp->i_number == dp->i_ino) {
		vref(vdp);	/* we want ourself, ie "." */
		*vpp = vdp;
	} else {
		if ((error = VFS_VGET(vdp->v_mount, NULL, dp->i_ino, &tdp)) != 0)
			return (error);
		if (!lockparent)
			vn_unlock(pdp);
		*vpp = tdp;
	}
	return (0);
}
Beispiel #7
0
int
ffs_fsync(void *v)
{
	struct vop_fsync_args /* {
		struct vnode *a_vp;
		kauth_cred_t a_cred;
		int a_flags;
		off_t a_offlo;
		off_t a_offhi;
		struct lwp *a_l;
	} */ *ap = v;
	struct buf *bp;
	int num, error, i;
	struct indir ia[NIADDR + 1];
	int bsize;
	daddr_t blk_high;
	struct vnode *vp;
	struct mount *mp;

	vp = ap->a_vp;
	mp = vp->v_mount;

	fstrans_start(mp, FSTRANS_LAZY);
	if ((ap->a_offlo == 0 && ap->a_offhi == 0) || (vp->v_type != VREG)) {
		error = ffs_full_fsync(vp, ap->a_flags);
		goto out;
	}

	bsize = mp->mnt_stat.f_iosize;
	blk_high = ap->a_offhi / bsize;
	if (ap->a_offhi % bsize != 0)
		blk_high++;

	/*
	 * First, flush all pages in range.
	 */

	mutex_enter(vp->v_interlock);
	error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo),
	    round_page(ap->a_offhi), PGO_CLEANIT |
	    ((ap->a_flags & FSYNC_WAIT) ? PGO_SYNCIO : 0));
	if (error) {
		goto out;
	}

#ifdef WAPBL
	KASSERT(vp->v_type == VREG);
	if (mp->mnt_wapbl) {
		/*
		 * Don't bother writing out metadata if the syncer is
		 * making the request.  We will let the sync vnode
		 * write it out in a single burst through a call to
		 * VFS_SYNC().
		 */
		if ((ap->a_flags & (FSYNC_DATAONLY | FSYNC_LAZY)) != 0) {
			fstrans_done(mp);
			return 0;
		}
		error = 0;
		if (vp->v_tag == VT_UFS && VTOI(vp)->i_flag &
		    (IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY |
				 IN_MODIFIED | IN_ACCESSED)) {
			error = UFS_WAPBL_BEGIN(mp);
			if (error) {
				fstrans_done(mp);
				return error;
			}
			error = ffs_update(vp, NULL, NULL, UPDATE_CLOSE |
			    ((ap->a_flags & FSYNC_WAIT) ? UPDATE_WAIT : 0));
			UFS_WAPBL_END(mp);
		}
		if (error || (ap->a_flags & FSYNC_NOLOG) != 0) {
			fstrans_done(mp);
			return error;
		}
		error = wapbl_flush(mp->mnt_wapbl, 0);
		fstrans_done(mp);
		return error;
	}
#endif /* WAPBL */

	/*
	 * Then, flush indirect blocks.
	 */

	if (blk_high >= NDADDR) {
		error = ufs_getlbns(vp, blk_high, ia, &num);
		if (error)
			goto out;

		mutex_enter(&bufcache_lock);
		for (i = 0; i < num; i++) {
			if ((bp = incore(vp, ia[i].in_lbn)) == NULL)
				continue;
			if ((bp->b_cflags & BC_BUSY) != 0 ||
			    (bp->b_oflags & BO_DELWRI) == 0)
				continue;
			bp->b_cflags |= BC_BUSY | BC_VFLUSH;
			mutex_exit(&bufcache_lock);
			bawrite(bp);
			mutex_enter(&bufcache_lock);
		}
		mutex_exit(&bufcache_lock);
	}

	if (ap->a_flags & FSYNC_WAIT) {
		mutex_enter(vp->v_interlock);
		while (vp->v_numoutput > 0)
			cv_wait(&vp->v_cv, vp->v_interlock);
		mutex_exit(vp->v_interlock);
	}

	error = ffs_update(vp, NULL, NULL, UPDATE_CLOSE |
	    (((ap->a_flags & (FSYNC_WAIT | FSYNC_DATAONLY)) == FSYNC_WAIT)
	    ? UPDATE_WAIT : 0));

	if (error == 0 && ap->a_flags & FSYNC_CACHE) {
		int l = 0;
		VOP_IOCTL(VTOI(vp)->i_devvp, DIOCCACHESYNC, &l, FWRITE,
			curlwp->l_cred);
	}

out:
	fstrans_done(mp);
	return error;
}
Beispiel #8
0
/*
 * Disable logging
 */
int
lqfs_disable(vnode_t *vp, struct fiolog *flp)
{
	int		error = 0;
	inode_t		*ip = VTOI(vp);
	qfsvfs_t	*qfsvfsp = ip->i_qfsvfs;
	fs_lqfs_common_t	*fs = VFS_FS_PTR(qfsvfsp);
#ifdef LUFS
	struct lockfs	lf;
	struct ulockfs	*ulp;
#else
	/* QFS doesn't really support LOCKFS. */
#endif /* LUFS */

	flp->error = FIOLOG_ENONE;

	/*
	 * Logging is already disabled; done
	 */
	if (LQFS_GET_LOGBNO(fs) == 0 || LQFS_GET_LOGP(qfsvfsp) == NULL ||
	    !LQFS_CAPABLE(qfsvfsp)) {
		vfs_setmntopt(qfsvfsp->vfs_vfs, MNTOPT_NOLOGGING, NULL, 0);
		error = 0;
		goto out;
	}

#ifdef LUFS
	/*
	 * File system must be write locked to disable logging
	 */
	error = qfs_fiolfss(vp, &lf);
	if (error) {
		goto out;
	}
	if (!LOCKFS_IS_ULOCK(&lf)) {
		flp->error = FIOLOG_EULOCK;
		error = 0;
		goto out;
	}
	lf.lf_lock = LOCKFS_WLOCK;
	lf.lf_flags = 0;
	lf.lf_comment = NULL;
	error = qfs_fiolfs(vp, &lf, 1);
	if (error) {
		flp->error = FIOLOG_EWLOCK;
		error = 0;
		goto out;
	}
#else
	/* QFS doesn't really support LOCKFS. */
#endif /* LUFS */

	if (LQFS_GET_LOGP(qfsvfsp) == NULL || LQFS_GET_LOGBNO(fs) == 0) {
		goto errout;
	}

	/*
	 * WE ARE COMMITTED TO DISABLING LOGGING PAST THIS POINT
	 */

	/*
	 * Disable logging:
	 * Suspend the reclaim thread and force the delete thread to exit.
	 *	When a nologging mount has completed there may still be
	 *	work for reclaim to do so just suspend this thread until
	 *	it's [deadlock-] safe for it to continue.  The delete
	 *	thread won't be needed as qfs_iinactive() calls
	 *	qfs_delete() when logging is disabled.
	 * Freeze and drain reader ops.
	 *	Commit any outstanding reader transactions (lqfs_flush).
	 *	Set the ``unmounted'' bit in the qfstrans struct.
	 *	If debug, remove metadata from matamap.
	 *	Disable matamap processing.
	 *	NULL the trans ops table.
	 *	Free all of the incore structs related to logging.
	 * Allow reader ops.
	 */
#ifdef LUFS
	qfs_thread_suspend(&qfsvfsp->vfs_reclaim);
	qfs_thread_exit(&qfsvfsp->vfs_delete);
#else
	/* QFS doesn't have file reclaim nor i-node delete threads. */
#endif /* LUFS */

	vfs_lock_wait(qfsvfsp->vfs_vfs);
#ifdef LQFS_TODO_LOCKFS
	ulp = &qfsvfsp->vfs_ulockfs;
	mutex_enter(&ulp->ul_lock);
	(void) qfs_quiesce(ulp);
#else
	/* QFS doesn't really support LOCKFS. */
#endif /* LQFS_TODO_LOCKFS */

#ifdef LQFS_TODO
	(void) qfs_flush(qfsvfsp->vfs_vfs);
#else
	(void) lqfs_flush(qfsvfsp);
	if (LQFS_GET_LOGP(qfsvfsp)) {
		logmap_start_roll(LQFS_GET_LOGP(qfsvfsp));
	}
#endif /* LQFS_TODO */

	TRANS_MATA_UMOUNT(qfsvfsp);
	LQFS_SET_DOMATAMAP(qfsvfsp, 0);

	/*
	 * Free all of the incore structs
	 * Aquire the ufs_scan_lock before de-linking the mtm data
	 * structure so that we keep ufs_sync() and ufs_update() away
	 * when they execute the ufs_scan_inodes() run while we're in
	 * progress of enabling/disabling logging.
	 */
	mutex_enter(&qfs_scan_lock);
	(void) lqfs_unsnarf(qfsvfsp);
	mutex_exit(&qfs_scan_lock);

#ifdef LQFS_TODO_LOCKFS
	atomic_add_long(&ufs_quiesce_pend, -1);
	mutex_exit(&ulp->ul_lock);
#else
	/* QFS doesn't do this yet. */
#endif /* LQFS_TODO_LOCKFS */
	vfs_setmntopt(qfsvfsp->vfs_vfs, MNTOPT_NOLOGGING, NULL, 0);
	vfs_unlock(qfsvfsp->vfs_vfs);

	LQFS_SET_FS_ROLLED(fs, FS_ALL_ROLLED);
	LQFS_SET_NOLOG_SI(qfsvfsp, 0);

	/*
	 * Free the log space and mark the superblock as FSACTIVE
	 */
	(void) lqfs_free(qfsvfsp);

#ifdef LUFS
	/*
	 * Allow the reclaim thread to continue.
	 */
	qfs_thread_continue(&qfsvfsp->vfs_reclaim);
#else
	/* QFS doesn't have a file reclaim thread. */
#endif /* LUFS */

#ifdef LQFS_TODO_LOCKFS
	/*
	 * Unlock the file system
	 */
	lf.lf_lock = LOCKFS_ULOCK;
	lf.lf_flags = 0;
	error = qfs_fiolfs(vp, &lf, 1);
	if (error) {
		flp->error = FIOLOG_ENOULOCK;
	}
#else
	/* QFS doesn't really support LOCKFS. */
#endif /* LQFS_LOCKFS */

	error = 0;
	goto out;

errout:
#ifdef LQFS_LOCKFS
	lf.lf_lock = LOCKFS_ULOCK;
	lf.lf_flags = 0;
	(void) qfs_fiolfs(vp, &lf, 1);
#else
	/* QFS doesn't really support LOCKFS. */
#endif /* LQFS_LOCKFS */

out:
	mutex_enter(&ip->mp->ms.m_waitwr_mutex);
	ip->mp->mt.fi_status |= FS_LOGSTATE_KNOWN;
	mutex_exit(&ip->mp->ms.m_waitwr_mutex);
	return (error);
}
Beispiel #9
0
/*
 * Enable logging
 */
int
lqfs_enable(struct vnode *vp, struct fiolog *flp, cred_t *cr)
{
	int		error;
	inode_t		*ip = VTOI(vp);
	qfsvfs_t	*qfsvfsp = ip->i_qfsvfs;
	fs_lqfs_common_t	*fs = VFS_FS_PTR(qfsvfsp);
	ml_unit_t	*ul;
#ifdef LQFS_TODO_LOCKFS
	int		reclaim = 0;
	struct lockfs	lf;
	struct ulockfs	*ulp;
#else
	/* QFS doesn't really support LOCKFS. */
#endif /* LQFS_TODO_LOCKFS */
	vfs_t		*vfsp = qfsvfsp->vfs_vfs;
	uint64_t	tmp_nbytes_actual;
	char fsclean;
	sam_sblk_t	*sblk = qfsvfsp->mi.m_sbp;

	/*
	 * File system is not capable of logging.
	 */
	if (!LQFS_CAPABLE(qfsvfsp)) {
		flp->error = FIOLOG_ENOTSUP;
		error = 0;
		goto out;
	}
	if (!SAM_MAGIC_V2A_OR_HIGHER(&sblk->info.sb)) {
		cmn_err(CE_WARN, "SAM-QFS: %s: Not enabling logging, "
		    " file system is not version 2A.", qfsvfsp->mt.fi_name);
		cmn_err(CE_WARN, "\tUpgrade file system with samfsck -u "
		    "first.");
		flp->error = FIOLOG_ENOTSUP;
		error = 0;
		goto out;
	}

	if (LQFS_GET_LOGBNO(fs)) {
		error = lqfs_log_validate(qfsvfsp, flp, cr);
	}

	/*
	 * Check if logging is already enabled
	 */
	if (LQFS_GET_LOGP(qfsvfsp)) {
		flp->error = FIOLOG_ETRANS;
		/* for root ensure logging option is set */
		vfs_setmntopt(vfsp, MNTOPT_LOGGING, NULL, 0);
		error = 0;
		goto out;
	}

	/*
	 * Come back here to recheck if we had to disable the log.
	 */
recheck:
	error = 0;
	flp->error = FIOLOG_ENONE;

	/*
	 * Adjust requested log size
	 */
	flp->nbytes_actual = flp->nbytes_requested;
	if (flp->nbytes_actual == 0) {
		tmp_nbytes_actual =
		    (((uint64_t)FS_SIZE(fs)) / ldl_divisor) << FS_FSHIFT(fs);
		flp->nbytes_actual = (uint_t)MIN(tmp_nbytes_actual, INT_MAX);
	}
	flp->nbytes_actual = MAX(flp->nbytes_actual, ldl_minlogsize);
	flp->nbytes_actual = MIN(flp->nbytes_actual, ldl_maxlogsize);
	flp->nbytes_actual = blkroundup(fs, flp->nbytes_actual);

	/*
	 * logging is enabled and the log is the right size; done
	 */
	ul = LQFS_GET_LOGP(qfsvfsp);

	if (ul && LQFS_GET_LOGBNO(fs) &&
	    (flp->nbytes_actual == ul->un_requestsize)) {
		vfs_setmntopt(vfsp, MNTOPT_LOGGING, NULL, 0);
		error = 0;
		goto out;
	}

	/*
	 * Readonly file system
	 */
	if (FS_RDONLY(fs)) {
		flp->error = FIOLOG_EROFS;
		error = 0;
		goto out;
	}

#ifdef LQFS_TODO_LOCKFS
	/*
	 * File system must be write locked to enable logging
	 */
	error = qfs_fiolfss(vp, &lf);
	if (error) {
		goto out;
	}
	if (!LOCKFS_IS_ULOCK(&lf)) {
		flp->error = FIOLOG_EULOCK;
		error = 0;
		goto out;
	}
	lf.lf_lock = LOCKFS_WLOCK;
	lf.lf_flags = 0;
	lf.lf_comment = NULL;
	error = qfs_fiolfs(vp, &lf, 1);
	if (error) {
		flp->error = FIOLOG_EWLOCK;
		error = 0;
		goto out;
	}
#else
	/* QFS doesn't really support lockfs. */
#endif /* LQFS_TODO_LOCKFS */

	/*
	 * Grab appropriate locks to synchronize with the rest
	 * of the system
	 */
	vfs_lock_wait(vfsp);
#ifdef LQFS_TODO_LOCKFS
	ulp = &ufsvfsp->vfs_ulockfs;
	mutex_enter(&ulp->ul_lock);
#else
	/* QFS doesn't really support lockfs. */
#endif /* LQFS_TODO_LOCKFS */

	/*
	 * File system must be fairly consistent to enable logging
	 */
	fsclean = LQFS_GET_FS_CLEAN(fs);
	if (fsclean != FSLOG &&
	    fsclean != FSACTIVE &&
	    fsclean != FSSTABLE &&
	    fsclean != FSCLEAN) {
		flp->error = FIOLOG_ECLEAN;
		goto unlockout;
	}

#ifdef LUFS
	/*
	 * A write-locked file system is only active if there are
	 * open deleted files; so remember to set FS_RECLAIM later.
	 */
	if (LQFS_GET_FS_CLEAN(fs) == FSACTIVE) {
		reclaim = FS_RECLAIM;
	}
#else
	/* QFS doesn't have a reclaim file thread. */
#endif /* LUFS */

	/*
	 * Logging is already enabled; must be changing the log's size
	 */
	if (LQFS_GET_LOGBNO(fs) && LQFS_GET_LOGP(qfsvfsp)) {
#ifdef LQFS_TODO_LOCKFS
		/*
		 * Before we can disable logging, we must give up our
		 * lock.  As a consequence of unlocking and disabling the
		 * log, the fs structure may change.  Because of this, when
		 * disabling is complete, we will go back to recheck to
		 * repeat all of the checks that we performed to get to
		 * this point.  Disabling sets fs->fs_logbno to 0, so this
		 * will not put us into an infinite loop.
		 */
		mutex_exit(&ulp->ul_lock);
#else
		/* QFS doesn't really support lockfs. */
#endif /* LQFS_TODO_LOCKFS */
		vfs_unlock(vfsp);

#ifdef LQFS_TODO_LOCKFS
		lf.lf_lock = LOCKFS_ULOCK;
		lf.lf_flags = 0;
		error = qfs_fiolfs(vp, &lf, 1);
		if (error) {
			flp->error = FIOLOG_ENOULOCK;
			error = 0;
			goto out;
		}
#else
		/* QFS doesn't really support lockfs. */
#endif /* LQFS_TODO_LOCKFS */
		error = lqfs_disable(vp, flp);
		if (error || (flp->error != FIOLOG_ENONE)) {
			error = 0;
			goto out;
		}
		goto recheck;
	}

	error = lqfs_alloc(qfsvfsp, flp, cr);
	if (error) {
		goto errout;
	}
#ifdef LUFS
#else
	if ((error = lqfs_log_validate(qfsvfsp, flp, cr)) != 0) {
		goto errout;
	}
#endif /* LUFS */

	/*
	 * Create all of the incore structs
	 */
	error = lqfs_snarf(qfsvfsp, fs, 0);
	if (error) {
		goto errout;
	}

	/*
	 * DON'T ``GOTO ERROUT'' PAST THIS POINT
	 */

	/*
	 * Pretend we were just mounted with logging enabled
	 *	freeze and drain the file system of readers
	 *		Get the ops vector
	 *		If debug, record metadata locations with log subsystem
	 *		Start the delete thread
	 *		Start the reclaim thread, if necessary
	 *	Thaw readers
	 */
	vfs_setmntopt(vfsp, MNTOPT_LOGGING, NULL, 0);

	TRANS_DOMATAMAP(qfsvfsp);
	TRANS_MATA_MOUNT(qfsvfsp);
	TRANS_MATA_SI(qfsvfsp, fs);

#ifdef LUFS
	qfs_thread_start(&qfsvfsp->vfs_delete, qfs_thread_delete, vfsp);
	if (fs->fs_reclaim & (FS_RECLAIM|FS_RECLAIMING)) {
		fs->fs_reclaim &= ~FS_RECLAIM;
		fs->fs_reclaim |=  FS_RECLAIMING;
		qfs_thread_start(&qfsvfsp->vfs_reclaim,
		    qfs_thread_reclaim, vfsp);
	} else {
		fs->fs_reclaim |= reclaim;
	}
#else
	/* QFS doesn't have file reclaim nor i-node delete threads. */
#endif /* LUFS */

#ifdef LUFS
	mutex_exit(&ulp->ul_lock);
#else
	/* QFS doesn't really support LOCKFS. */
#endif /* LUFS */

	vfs_unlock(vfsp);

#ifdef LQFS_TODO_LOCKFS
	/*
	 * Unlock the file system
	 */
	lf.lf_lock = LOCKFS_ULOCK;
	lf.lf_flags = 0;
	error = qfs_fiolfs(vp, &lf, 1);
	if (error) {
		flp->error = FIOLOG_ENOULOCK;
		error = 0;
		goto out;
	}
#else
	/* QFS doesn't really support LOCKFS. */
#endif /* LQFS_TODO_LOCKFS */

	/*
	 * There's nothing in the log yet (we've just allocated it)
	 * so directly write out the super block.
	 * Note, we have to force this sb out to disk
	 * (not just to the log) so that if we crash we know we are logging
	 */
	VFS_LOCK_MUTEX_ENTER(qfsvfsp);
	LQFS_SET_FS_CLEAN(fs, FSLOG);
	LQFS_SET_FS_ROLLED(fs, FS_NEED_ROLL);	/* Mark the fs as unrolled */
#ifdef LUFS
	QFS_BWRITE2(NULL, qfsvfsp->vfs_bufp);
#else
	sam_update_sblk(qfsvfsp, 0, 0, TRUE);
#endif /* LUFS */
	VFS_LOCK_MUTEX_EXIT(qfsvfsp);

	error = 0;
	goto out;

errout:
	/*
	 * Aquire the qfs_scan_lock before de-linking the mtm data
	 * structure so that we keep qfs_sync() and qfs_update() away
	 * when they execute the ufs_scan_inodes() run while we're in
	 * progress of enabling/disabling logging.
	 */
	mutex_enter(&qfs_scan_lock);
	(void) lqfs_unsnarf(qfsvfsp);
	mutex_exit(&qfs_scan_lock);

	(void) lqfs_free(qfsvfsp);
unlockout:
#ifdef LQFS_TODO_LOCKFS
	mutex_exit(&ulp->ul_lock);
#else
	/* QFS doesn't really support LOCKFS. */
#endif /* LQFS_TODO_LOCKFS */

	vfs_unlock(vfsp);

#ifdef LQFS_TODO_LOCKFS
	lf.lf_lock = LOCKFS_ULOCK;
	lf.lf_flags = 0;
	(void) qfs_fiolfs(vp, &lf, 1);
#else
	/* QFS doesn't really support LOCKFS. */
#endif /* LQFS_TODO_LOCKFS */

out:
	mutex_enter(&ip->mp->ms.m_waitwr_mutex);
	ip->mp->mt.fi_status |= FS_LOGSTATE_KNOWN;
	mutex_exit(&ip->mp->ms.m_waitwr_mutex);
	return (error);
}
Beispiel #10
0
/*
 * Real work associated with removing an extended attribute from a vnode.
 * Assumes the attribute lock has already been grabbed.
 */
static int
ufs_extattr_rm(struct vnode *vp, int attrnamespace, const char *name,
    struct ucred *cred, struct proc *p)
{
	struct ufs_extattr_list_entry	*attribute;
	struct ufs_extattr_header	ueh;
	struct iovec	local_aiov;
	struct uio	local_aio;
	struct mount	*mp = vp->v_mount;
	struct ufsmount	*ump = VFSTOUFS(mp);
	struct inode	*ip = VTOI(vp);
	off_t	base_offset;
	int	error = 0, ioflag;

	if (vp->v_mount->mnt_flag & MNT_RDONLY)  
		return (EROFS);
	if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED))
		return (EOPNOTSUPP);
	if (!ufs_extattr_valid_attrname(attrnamespace, name))
		return (EINVAL);

	attribute = ufs_extattr_find_attr(ump, attrnamespace, name);
	if (!attribute)
		/* XXX: ENOENT here will eventually be ENOATTR. */
		return (ENOENT);

	if ((error = ufs_extattr_credcheck(vp, attribute, cred, p, IWRITE)))
		return (error);

	/*
	 * Find base offset of header in file based on file header size, and
	 * data header size + maximum data size, indexed by inode number.
	 */
	base_offset = sizeof(struct ufs_extattr_fileheader) +
	    ip->i_number * (sizeof(struct ufs_extattr_header) +
	    attribute->uele_fileheader.uef_size);

	/*
	 * Check to see if currently defined.
	 */
	bzero(&ueh, sizeof(struct ufs_extattr_header));

	local_aiov.iov_base = (caddr_t) &ueh;
	local_aiov.iov_len = sizeof(struct ufs_extattr_header);
	local_aio.uio_iov = &local_aiov;
	local_aio.uio_iovcnt = 1;
	local_aio.uio_rw = UIO_READ;
	local_aio.uio_segflg = UIO_SYSSPACE;
	local_aio.uio_procp = p;
	local_aio.uio_offset = base_offset;
	local_aio.uio_resid = sizeof(struct ufs_extattr_header);

	VOP_LEASE(attribute->uele_backing_vnode, p, cred, LEASE_WRITE);

	/*
	 * Don't need to get the lock on the backing vnode if the vnode we're
	 * modifying is it, as we already hold the lock.
	 */
	if (attribute->uele_backing_vnode != vp)
		vn_lock(attribute->uele_backing_vnode,
		    LK_EXCLUSIVE | LK_NOPAUSE | LK_RETRY, p);

	error = VOP_READ(attribute->uele_backing_vnode, &local_aio,
	    IO_NODELOCKED, ump->um_extattr.uepm_ucred);
	if (error)
		goto vopunlock_exit;

	/* Defined? */
	if ((ueh.ueh_flags & UFS_EXTATTR_ATTR_FLAG_INUSE) == 0) {
		/* XXX: ENOENT here will eventually be ENOATTR. */
		error = ENOENT;
		goto vopunlock_exit;
	}

	/* Valid for the current inode generation? */
	if (ueh.ueh_i_gen != ip->i_ffs_gen) {
		/*
		 * The inode itself has a different generation number than
		 * the attribute data.  For now, the best solution is to
		 * coerce this to undefined, and let it get cleaned up by
		 * the next write or extattrctl clean.
		 */
		printf("ufs_extattr_rm: inode number inconsistency (%d, %d)\n",
		    ueh.ueh_i_gen, ip->i_ffs_gen);
		/* XXX: ENOENT here will eventually be ENOATTR. */
		error = ENOENT;
		goto vopunlock_exit;
	}

	/* Flag it as not in use. */
	ueh.ueh_flags = 0;
	ueh.ueh_len = 0;

	local_aiov.iov_base = (caddr_t) &ueh;
	local_aiov.iov_len = sizeof(struct ufs_extattr_header);
	local_aio.uio_iov = &local_aiov;
	local_aio.uio_iovcnt = 1;
	local_aio.uio_rw = UIO_WRITE;
	local_aio.uio_segflg = UIO_SYSSPACE;
	local_aio.uio_procp = p;
	local_aio.uio_offset = base_offset;
	local_aio.uio_resid = sizeof(struct ufs_extattr_header);

	ioflag = IO_NODELOCKED;
	if (ufs_extattr_sync)
		ioflag |= IO_SYNC;
	error = VOP_WRITE(attribute->uele_backing_vnode, &local_aio, ioflag,
	    ump->um_extattr.uepm_ucred);
	if (error)
		goto vopunlock_exit;

	if (local_aio.uio_resid != 0)
		error = ENXIO;

vopunlock_exit:
	VOP_UNLOCK(attribute->uele_backing_vnode, 0, p);

	return (error);
}
Beispiel #11
0
/*
 * Real work associated with retrieving a named attribute--assumes that
 * the attribute lock has already been grabbed.
 */
static int
ufs_extattr_get(struct vnode *vp, int attrnamespace, const char *name,
    struct uio *uio, size_t *size, struct ucred *cred, struct proc *p)
{
	struct ufs_extattr_list_entry	*attribute;
	struct ufs_extattr_header	ueh;
	struct iovec	local_aiov;
	struct uio	local_aio;
	struct mount	*mp = vp->v_mount;
	struct ufsmount	*ump = VFSTOUFS(mp);
	struct inode	*ip = VTOI(vp);
	off_t	base_offset;
	size_t	len, old_len;
	int	error = 0;

	if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED))
		return (EOPNOTSUPP);

	if (strlen(name) == 0) {
		/* XXX retrieve attribute lists. */
		/* XXX should probably be checking for name == NULL? */
		return (EINVAL);
	}

	attribute = ufs_extattr_find_attr(ump, attrnamespace, name);
	if (!attribute)
		/* XXX: ENOENT here will eventually be ENOATTR. */
		return (ENOENT);

	if ((error = ufs_extattr_credcheck(vp, attribute, cred, p, IREAD)))
		return (error);

	/*
	 * Allow only offsets of zero to encourage the read/replace
	 * extended attribute semantic.  Otherwise we can't guarantee
	 * atomicity, as we don't provide locks for extended attributes.
	 */
	if (uio != NULL && uio->uio_offset != 0)
		return (ENXIO);

	/*
	 * Find base offset of header in file based on file header size, and
	 * data header size + maximum data size, indexed by inode number.
	 */
	base_offset = sizeof(struct ufs_extattr_fileheader) +
	    ip->i_number * (sizeof(struct ufs_extattr_header) +
	    attribute->uele_fileheader.uef_size);

	/*
	 * Read in the data header to see if the data is defined, and if so
	 * how much.
	 */
	bzero(&ueh, sizeof(struct ufs_extattr_header));
	local_aiov.iov_base = (caddr_t) &ueh;
	local_aiov.iov_len = sizeof(struct ufs_extattr_header);
	local_aio.uio_iov = &local_aiov;
	local_aio.uio_iovcnt = 1;
	local_aio.uio_rw = UIO_READ;
	local_aio.uio_segflg = UIO_SYSSPACE;
	local_aio.uio_procp = p;
	local_aio.uio_offset = base_offset;
	local_aio.uio_resid = sizeof(struct ufs_extattr_header);
	
	/*
	 * Acquire locks.
	 */
	VOP_LEASE(attribute->uele_backing_vnode, p, cred, LEASE_READ);
	/*
	 * Don't need to get a lock on the backing file if the getattr is
	 * being applied to the backing file, as the lock is already held.
	 */
	if (attribute->uele_backing_vnode != vp)
		vn_lock(attribute->uele_backing_vnode, LK_SHARED |
		    LK_NOPAUSE | LK_RETRY, p);

	error = VOP_READ(attribute->uele_backing_vnode, &local_aio,
	    IO_NODELOCKED, ump->um_extattr.uepm_ucred);
	if (error)
		goto vopunlock_exit;

	/* Defined? */
	if ((ueh.ueh_flags & UFS_EXTATTR_ATTR_FLAG_INUSE) == 0) {
		/* XXX: ENOENT here will eventually be ENOATTR. */
		error = ENOENT;
		goto vopunlock_exit;
	}

	/* Valid for the current inode generation? */
	if (ueh.ueh_i_gen != ip->i_ffs_gen) {
		/*
		 * The inode itself has a different generation number
		 * than the attribute data.  For now, the best solution
		 * is to coerce this to undefined, and let it get cleaned
		 * up by the next write or extattrctl clean.
		 */
		printf("ufs_extattr_get: inode number inconsistency (%d, %d)\n",
		    ueh.ueh_i_gen, ip->i_ffs_gen);
		/* XXX: ENOENT here will eventually be ENOATTR. */
		error = ENOENT;
		goto vopunlock_exit;
	}

	/* Local size consistency check. */
	if (ueh.ueh_len > attribute->uele_fileheader.uef_size) {
		error = ENXIO;
		goto vopunlock_exit;
	}

	/* Return full data size if caller requested it. */
	if (size != NULL)
		*size = ueh.ueh_len;

	/* Return data if the caller requested it. */
	if (uio != NULL) {
		/* Allow for offset into the attribute data. */
		uio->uio_offset = base_offset + sizeof(struct
		    ufs_extattr_header);

		/*
		 * Figure out maximum to transfer -- use buffer size and
		 * local data limit.
		 */
		len = MIN(uio->uio_resid, ueh.ueh_len);
		old_len = uio->uio_resid;
		uio->uio_resid = len;
 
		error = VOP_READ(attribute->uele_backing_vnode, uio,
		    IO_NODELOCKED, ump->um_extattr.uepm_ucred);
		if (error)
			goto vopunlock_exit;

		uio->uio_resid = old_len - (len - uio->uio_resid);
	}

vopunlock_exit:

	if (uio != NULL)
		uio->uio_offset = 0;

	if (attribute->uele_backing_vnode != vp)
		VOP_UNLOCK(attribute->uele_backing_vnode, 0, p);

	return (error);
}
Beispiel #12
0
/*
 * Real work associated with setting a vnode's extended attributes;
 * assumes that the attribute lock has already been grabbed.
 */
static int
ufs_extattr_set(struct vnode *vp, int attrnamespace, const char *name,
    struct uio *uio, struct ucred *cred, struct proc *p)
{
	struct ufs_extattr_list_entry	*attribute;
	struct ufs_extattr_header	ueh;
	struct iovec	local_aiov;
	struct uio	local_aio;
	struct mount	*mp = vp->v_mount;
	struct ufsmount	*ump = VFSTOUFS(mp);
	struct inode	*ip = VTOI(vp);
	off_t	base_offset;
	int	error = 0, ioflag;

	if (vp->v_mount->mnt_flag & MNT_RDONLY)
		return (EROFS);
	if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED))
		return (EOPNOTSUPP);
	if (!ufs_extattr_valid_attrname(attrnamespace, name))
		return (EINVAL);

	attribute = ufs_extattr_find_attr(ump, attrnamespace, name);
	if (!attribute)
		/* XXX: ENOENT here will eventually be ENOATTR. */
		return (ENOENT);

	if ((error = ufs_extattr_credcheck(vp, attribute, cred, p, IWRITE)))
		return (error);

	/*
	 * Early rejection of invalid offsets/length.
	 * Reject: any offset but 0 (replace)
	 *	 Any size greater than attribute size limit
 	 */
	if (uio->uio_offset != 0 ||
	    uio->uio_resid > attribute->uele_fileheader.uef_size)
		return (ENXIO);

	/*
	 * Find base offset of header in file based on file header size, and
	 * data header size + maximum data size, indexed by inode number.
	 */
	base_offset = sizeof(struct ufs_extattr_fileheader) +
	    ip->i_number * (sizeof(struct ufs_extattr_header) +
	    attribute->uele_fileheader.uef_size);

	/*
	 * Write out a data header for the data.
	 */
	ueh.ueh_len = uio->uio_resid;
	ueh.ueh_flags = UFS_EXTATTR_ATTR_FLAG_INUSE;
	ueh.ueh_i_gen = ip->i_ffs_gen;
	local_aiov.iov_base = (caddr_t) &ueh;
	local_aiov.iov_len = sizeof(struct ufs_extattr_header);
	local_aio.uio_iov = &local_aiov;
	local_aio.uio_iovcnt = 1;
	local_aio.uio_rw = UIO_WRITE;
	local_aio.uio_segflg = UIO_SYSSPACE;
	local_aio.uio_procp = p;
	local_aio.uio_offset = base_offset;
	local_aio.uio_resid = sizeof(struct ufs_extattr_header);

	/*
	 * Acquire locks.
	 */
	VOP_LEASE(attribute->uele_backing_vnode, p, cred, LEASE_WRITE);

	/*
	 * Don't need to get a lock on the backing file if the setattr is
	 * being applied to the backing file, as the lock is already held.
	 */
	if (attribute->uele_backing_vnode != vp)
		vn_lock(attribute->uele_backing_vnode, 
		    LK_EXCLUSIVE | LK_NOPAUSE | LK_RETRY, p);

	ioflag = IO_NODELOCKED;
	if (ufs_extattr_sync)
		ioflag |= IO_SYNC;
	error = VOP_WRITE(attribute->uele_backing_vnode, &local_aio, ioflag,
	    ump->um_extattr.uepm_ucred);
	if (error)
		goto vopunlock_exit;

	if (local_aio.uio_resid != 0) {
		error = ENXIO;
		goto vopunlock_exit;
	}

	/*
	 * Write out user data.
	 */
	uio->uio_offset = base_offset + sizeof(struct ufs_extattr_header);

	ioflag = IO_NODELOCKED;
	if (ufs_extattr_sync)
		ioflag |= IO_SYNC;
	error = VOP_WRITE(attribute->uele_backing_vnode, uio, ioflag,
	    ump->um_extattr.uepm_ucred);

vopunlock_exit:
	uio->uio_offset = 0;

	if (attribute->uele_backing_vnode != vp)
		VOP_UNLOCK(attribute->uele_backing_vnode, 0, p);

	return (error);
}
Beispiel #13
0
/*
 * Vnode op for reading directories.
 *
 * This function has to convert directory entries from the on-disk
 * format to the format defined by <sys/dirent.h>.  Unfortunately, the
 * conversion will blow up some entries by four bytes, so it can't be
 * done in place.  Instead, the conversion is done entry by entry and
 * the converted entry is sent via uiomove.
 *
 * XXX allocate a buffer, convert as many entries as possible, then send
 * the whole buffer to uiomove
 */
int
ext2_readdir(struct vop_readdir_args *ap)
{
	struct uio *uio = ap->a_uio;
	int count, error;

	struct ext2fs_direct_2 *edp, *dp;
	int ncookies;
	struct dirent dstdp;
	struct uio auio;
	struct iovec aiov;
	caddr_t dirbuf;
	int DIRBLKSIZ = VTOI(ap->a_vp)->i_e2fs->e2fs_bsize;
	int readcnt;
	off_t startoffset = uio->uio_offset;

	count = uio->uio_resid;
	/*
	 * Avoid complications for partial directory entries by adjusting
	 * the i/o to end at a block boundary.  Don't give up (like ufs
	 * does) if the initial adjustment gives a negative count, since
	 * many callers don't supply a large enough buffer.  The correct
	 * size is a little larger than DIRBLKSIZ to allow for expansion
	 * of directory entries, but some callers just use 512.
	 */
	count -= (uio->uio_offset + count) & (DIRBLKSIZ -1);
	if (count <= 0)
		count += DIRBLKSIZ;
	auio = *uio;
	auio.uio_iov = &aiov;
	auio.uio_iovcnt = 1;
	auio.uio_resid = count;
	auio.uio_segflg = UIO_SYSSPACE;
	aiov.iov_len = count;
	dirbuf = malloc(count, M_TEMP, M_WAITOK);
	aiov.iov_base = dirbuf;
	error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
	if (error == 0) {
		readcnt = count - auio.uio_resid;
		edp = (struct ext2fs_direct_2 *)&dirbuf[readcnt];
		ncookies = 0;
		bzero(&dstdp, offsetof(struct dirent, d_name));
		for (dp = (struct ext2fs_direct_2 *)dirbuf;
		    !error && uio->uio_resid > 0 && dp < edp; ) {
			/*-
			 * "New" ext2fs directory entries differ in 3 ways
			 * from ufs on-disk ones:
			 * - the name is not necessarily NUL-terminated.
			 * - the file type field always exists and always
			 *   follows the name length field.
			 * - the file type is encoded in a different way.
			 *
			 * "Old" ext2fs directory entries need no special
			 * conversions, since they are binary compatible
			 * with "new" entries having a file type of 0 (i.e.,
			 * EXT2_FT_UNKNOWN).  Splitting the old name length
			 * field didn't make a mess like it did in ufs,
			 * because ext2fs uses a machine-independent disk
			 * layout.
			 */
			dstdp.d_fileno = dp->e2d_ino;
			dstdp.d_type = FTTODT(dp->e2d_type);
			dstdp.d_namlen = dp->e2d_namlen;
			dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp);
			bcopy(dp->e2d_name, dstdp.d_name, dstdp.d_namlen);
			bzero(dstdp.d_name + dstdp.d_namlen,
			    dstdp.d_reclen - offsetof(struct dirent, d_name) -
			    dstdp.d_namlen);

			if (dp->e2d_reclen > 0) {
				if(dstdp.d_reclen <= uio->uio_resid) {
					/* advance dp */
					dp = (struct ext2fs_direct_2 *)
					    ((char *)dp + dp->e2d_reclen);
					error =
					  uiomove(&dstdp, dstdp.d_reclen, uio);
					if (!error)
						ncookies++;
				} else
					break;
			} else {
				error = EIO;
				break;
			}
		}
		/* we need to correct uio_offset */
		uio->uio_offset = startoffset + (caddr_t)dp - dirbuf;

		if (!error && ap->a_ncookies != NULL) {
			u_long *cookiep, *cookies, *ecookies;
			off_t off;

			if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
				panic("ext2_readdir: unexpected uio from NFS server");
			cookies = malloc(ncookies * sizeof(u_long), M_TEMP,
			       M_WAITOK);
			off = startoffset;
			for (dp = (struct ext2fs_direct_2 *)dirbuf,
			     cookiep = cookies, ecookies = cookies + ncookies;
			     cookiep < ecookies;
			     dp = (struct ext2fs_direct_2 *)((caddr_t) dp + dp->e2d_reclen)) {
				off += dp->e2d_reclen;
				*cookiep++ = (u_long) off;
			}
			*ap->a_ncookies = ncookies;
			*ap->a_cookies = cookies;
		}
	}
Beispiel #14
0
/*
 * Remove a directory entry after a call to namei, using
 * the parameters which it left in nameidata. The entry
 * dp->i_offset contains the offset into the directory of the
 * entry to be eliminated.  The dp->i_count field contains the
 * size of the previous record in the directory.  If this
 * is 0, the first entry is being deleted, so we need only
 * zero the inode number to mark the entry as free.  If the
 * entry is not the first in the directory, we must reclaim
 * the space of the now empty record by adding the record size
 * to the size of the previous entry.
 */
int
ufs_dirremove(struct vnode *dvp, struct inode *ip, int flags, int isrmdir)
{
	struct inode *dp;
	struct direct *ep;
	struct buf *bp;
	int error;

	dp = VTOI(dvp);

	if ((error = UFS_BUFATOFF(dp,
	    (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0)
		return (error);
#ifdef UFS_DIRHASH
	/*
	 * Remove the dirhash entry. This is complicated by the fact
	 * that `ep' is the previous entry when dp->i_count != 0.
	 */
	if (dp->i_dirhash != NULL)
		ufsdirhash_remove(dp, (dp->i_count == 0) ? ep :
		(struct direct *)((char *)ep + ep->d_reclen), dp->i_offset);
#endif

	if (dp->i_count == 0) {
		/*
		 * First entry in block: set d_ino to zero.
		 */
		ep->d_ino = 0;
	} else {
 		/*
 		 * Collapse new free space into previous entry.
 		 */
 		ep->d_reclen += dp->i_reclen;
	}
#ifdef UFS_DIRHASH
	if (dp->i_dirhash != NULL)
		ufsdirhash_checkblock(dp, (char *)ep -
		    ((dp->i_offset - dp->i_count) & (DIRBLKSIZ - 1)),
		    dp->i_offset & ~(DIRBLKSIZ - 1));
#endif
 	if (DOINGSOFTDEP(dvp)) {
		if (ip) {
			ip->i_effnlink--;
			softdep_change_linkcnt(ip, 0);
			softdep_setup_remove(bp, dp, ip, isrmdir);
		}
		if (softdep_slowdown(dvp)) {
			error = bwrite(bp);
		} else {
			bdwrite(bp);
			error = 0;
		}
 	} else {
		if (ip) {
			ip->i_effnlink--;
			DIP_ADD(ip, nlink, -1);
			ip->i_flag |= IN_CHANGE;
		}
		if (DOINGASYNC(dvp) && dp->i_count != 0) {
			bdwrite(bp);
			error = 0;
		} else
			error = bwrite(bp);
	}
	dp->i_flag |= IN_CHANGE | IN_UPDATE;
	return (error);
}
Beispiel #15
0
/*
 * Balloc defines the structure of filesystem storage
 * by allocating the physical blocks on a device given
 * the inode and the logical block number in a file.
 * This is the allocation strategy for UFS1. Below is
 * the allocation strategy for UFS2.
 */
int
ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
    struct ucred *cred, int flags, struct buf **bpp)
{
	struct inode *ip;
	struct ufs1_dinode *dp;
	ufs_lbn_t lbn, lastlbn;
	struct fs *fs;
	ufs1_daddr_t nb;
	struct buf *bp, *nbp;
	struct ufsmount *ump;
	struct indir indirs[NIADDR + 2];
	int deallocated, osize, nsize, num, i, error;
	ufs2_daddr_t newb;
	ufs1_daddr_t *bap, pref;
	ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
	ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
	int unwindidx = -1;
	int saved_inbdflush;
	static struct timeval lastfail;
	static int curfail;
	int gbflags, reclaimed;

	ip = VTOI(vp);
	dp = ip->i_din1;
	fs = ip->i_fs;
	ump = ip->i_ump;
	lbn = lblkno(fs, startoffset);
	size = blkoff(fs, startoffset) + size;
	reclaimed = 0;
	if (size > fs->fs_bsize)
		panic("ffs_balloc_ufs1: blk too big");
	*bpp = NULL;
	if (flags & IO_EXT)
		return (EOPNOTSUPP);
	if (lbn < 0)
		return (EFBIG);
	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;

	if (DOINGSOFTDEP(vp))
		softdep_prealloc(vp, MNT_WAIT);
	/*
	 * If the next write will extend the file into a new block,
	 * and the file is currently composed of a fragment
	 * this fragment has to be extended to be a full block.
	 */
	lastlbn = lblkno(fs, ip->i_size);
	if (lastlbn < NDADDR && lastlbn < lbn) {
		nb = lastlbn;
		osize = blksize(fs, ip, nb);
		if (osize < fs->fs_bsize && osize > 0) {
			UFS_LOCK(ump);
			error = ffs_realloccg(ip, nb, dp->di_db[nb],
			   ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
			   &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
			   cred, &bp);
			if (error)
				return (error);
			if (DOINGSOFTDEP(vp))
				softdep_setup_allocdirect(ip, nb,
				    dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
				    fs->fs_bsize, osize, bp);
			ip->i_size = smalllblktosize(fs, nb + 1);
			dp->di_size = ip->i_size;
			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
			ip->i_flag |= IN_CHANGE | IN_UPDATE;
			if (flags & IO_SYNC)
				bwrite(bp);
			else
				bawrite(bp);
		}
	}
	/*
	 * The first NDADDR blocks are direct blocks
	 */
	if (lbn < NDADDR) {
		if (flags & BA_METAONLY)
			panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
		nb = dp->di_db[lbn];
		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
			if (error) {
				brelse(bp);
				return (error);
			}
			bp->b_blkno = fsbtodb(fs, nb);
			*bpp = bp;
			return (0);
		}
		if (nb != 0) {
			/*
			 * Consider need to reallocate a fragment.
			 */
			osize = fragroundup(fs, blkoff(fs, ip->i_size));
			nsize = fragroundup(fs, size);
			if (nsize <= osize) {
				error = bread(vp, lbn, osize, NOCRED, &bp);
				if (error) {
					brelse(bp);
					return (error);
				}
				bp->b_blkno = fsbtodb(fs, nb);
			} else {
				UFS_LOCK(ump);
				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
				    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
				    &dp->di_db[0]), osize, nsize, flags,
				    cred, &bp);
				if (error)
					return (error);
				if (DOINGSOFTDEP(vp))
					softdep_setup_allocdirect(ip, lbn,
					    dbtofsb(fs, bp->b_blkno), nb,
					    nsize, osize, bp);
			}
		} else {
			if (ip->i_size < smalllblktosize(fs, lbn + 1))
				nsize = fragroundup(fs, size);
			else
				nsize = fs->fs_bsize;
			UFS_LOCK(ump);
			error = ffs_alloc(ip, lbn,
			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
			    nsize, flags, cred, &newb);
			if (error)
				return (error);
			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
			bp->b_blkno = fsbtodb(fs, newb);
			if (flags & BA_CLRBUF)
				vfs_bio_clrbuf(bp);
			if (DOINGSOFTDEP(vp))
				softdep_setup_allocdirect(ip, lbn, newb, 0,
				    nsize, 0, bp);
		}
		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
		*bpp = bp;
		return (0);
	}
	/*
	 * Determine the number of levels of indirection.
	 */
	pref = 0;
	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
		return(error);
#ifdef INVARIANTS
	if (num < 1)
		panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
#endif
	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
	/*
	 * Fetch the first indirect block allocating if necessary.
	 */
	--num;
	nb = dp->di_ib[indirs[0].in_off];
	allocib = NULL;
	allocblk = allociblk;
	lbns_remfree = lbns;
	if (nb == 0) {
		UFS_LOCK(ump);
		pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
		    (ufs1_daddr_t *)0);
		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
		    flags, cred, &newb)) != 0) {
			curthread_pflags_restore(saved_inbdflush);
			return (error);
		}
		pref = newb + fs->fs_frag;
		nb = newb;
		*allocblk++ = nb;
		*lbns_remfree++ = indirs[1].in_lbn;
		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
		bp->b_blkno = fsbtodb(fs, nb);
		vfs_bio_clrbuf(bp);
		if (DOINGSOFTDEP(vp)) {
			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
			    newb, 0, fs->fs_bsize, 0, bp);
			bdwrite(bp);
		} else {
			/*
			 * Write synchronously so that indirect blocks
			 * never point at garbage.
			 */
			if (DOINGASYNC(vp))
				bdwrite(bp);
			else if ((error = bwrite(bp)) != 0)
				goto fail;
		}
		allocib = &dp->di_ib[indirs[0].in_off];
		*allocib = nb;
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}
	/*
	 * Fetch through the indirect blocks, allocating as necessary.
	 */
retry:
	for (i = 1;;) {
		error = bread(vp,
		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
		if (error) {
			brelse(bp);
			goto fail;
		}
		bap = (ufs1_daddr_t *)bp->b_data;
		nb = bap[indirs[i].in_off];
		if (i == num)
			break;
		i += 1;
		if (nb != 0) {
			bqrelse(bp);
			continue;
		}
		UFS_LOCK(ump);
		/*
		 * If parent indirect has just been allocated, try to cluster
		 * immediately following it.
		 */
		if (pref == 0)
			pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
			    (ufs1_daddr_t *)0);
		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
			brelse(bp);
			if (++reclaimed == 1) {
				UFS_LOCK(ump);
				softdep_request_cleanup(fs, vp, cred,
				    FLUSH_BLOCKS_WAIT);
				UFS_UNLOCK(ump);
				goto retry;
			}
			if (ppsratecheck(&lastfail, &curfail, 1)) {
				ffs_fserr(fs, ip->i_number, "filesystem full");
				uprintf("\n%s: write failed, filesystem "
				    "is full\n", fs->fs_fsmnt);
			}
			goto fail;
		}
		pref = newb + fs->fs_frag;
		nb = newb;
		*allocblk++ = nb;
		*lbns_remfree++ = indirs[i].in_lbn;
		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
		nbp->b_blkno = fsbtodb(fs, nb);
		vfs_bio_clrbuf(nbp);
		if (DOINGSOFTDEP(vp)) {
			softdep_setup_allocindir_meta(nbp, ip, bp,
			    indirs[i - 1].in_off, nb);
			bdwrite(nbp);
		} else {
			/*
			 * Write synchronously so that indirect blocks
			 * never point at garbage.
			 */
			if ((error = bwrite(nbp)) != 0) {
				brelse(bp);
				goto fail;
			}
		}
		bap[indirs[i - 1].in_off] = nb;
		if (allocib == NULL && unwindidx < 0)
			unwindidx = i - 1;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & IO_SYNC) {
			bwrite(bp);
		} else {
			if (bp->b_bufsize == fs->fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		}
	}
	/*
	 * If asked only for the indirect block, then return it.
	 */
	if (flags & BA_METAONLY) {
		curthread_pflags_restore(saved_inbdflush);
		*bpp = bp;
		return (0);
	}
	/*
	 * Get the data block, allocating if necessary.
	 */
	if (nb == 0) {
		UFS_LOCK(ump);
		/*
		 * If allocating metadata at the front of the cylinder
		 * group and parent indirect block has just been allocated,
		 * then cluster next to it if it is the first indirect in
		 * the file. Otherwise it has been allocated in the metadata
		 * area, so we want to find our own place out in the data area.
		 */
		if (pref == 0 || (lbn > NDADDR && fs->fs_metaspace != 0))
			pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
			    &bap[0]);
		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
		    flags | IO_BUFLOCKED, cred, &newb);
		if (error) {
			brelse(bp);
			if (++reclaimed == 1) {
				UFS_LOCK(ump);
				softdep_request_cleanup(fs, vp, cred,
				    FLUSH_BLOCKS_WAIT);
				UFS_UNLOCK(ump);
				goto retry;
			}
			if (ppsratecheck(&lastfail, &curfail, 1)) {
				ffs_fserr(fs, ip->i_number, "filesystem full");
				uprintf("\n%s: write failed, filesystem "
				    "is full\n", fs->fs_fsmnt);
			}
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
		*lbns_remfree++ = lbn;
		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
		nbp->b_blkno = fsbtodb(fs, nb);
		if (flags & BA_CLRBUF)
			vfs_bio_clrbuf(nbp);
		if (DOINGSOFTDEP(vp))
			softdep_setup_allocindir_page(ip, lbn, bp,
			    indirs[i].in_off, nb, 0, nbp);
		bap[indirs[i].in_off] = nb;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & IO_SYNC) {
			bwrite(bp);
		} else {
			if (bp->b_bufsize == fs->fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		}
		curthread_pflags_restore(saved_inbdflush);
		*bpp = nbp;
		return (0);
	}
	brelse(bp);
	if (flags & BA_CLRBUF) {
		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
		if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
			error = cluster_read(vp, ip->i_size, lbn,
			    (int)fs->fs_bsize, NOCRED,
			    MAXBSIZE, seqcount, gbflags, &nbp);
		} else {
			error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
			    gbflags, &nbp);
		}
		if (error) {
			brelse(nbp);
			goto fail;
		}
	} else {
Beispiel #16
0
/*
 * Reload all incore data for a filesystem (used after running fsck on
 * the root filesystem and finding things to fix). The filesystem must
 * be mounted read-only.
 *
 * Things to do to update the mount:
 *	1) invalidate all cached meta-data.
 *	2) re-read superblock from disk.
 *	3) re-read summary information from disk.
 *	4) invalidate all inactive vnodes.
 *	5) invalidate all cached file data.
 *	6) re-read inode data for all active vnodes.
 */
int
ext2fs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l)
{
	struct vnode *vp, *mvp, *devvp;
	struct inode *ip;
	struct buf *bp;
	struct m_ext2fs *fs;
	struct ext2fs *newfs;
	int i, error;
	void *cp;
	struct ufsmount *ump;

	if ((mp->mnt_flag & MNT_RDONLY) == 0)
		return (EINVAL);

	ump = VFSTOUFS(mp);
	/*
	 * Step 1: invalidate all cached meta-data.
	 */
	devvp = ump->um_devvp;
	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
	error = vinvalbuf(devvp, 0, cred, l, 0, 0);
	VOP_UNLOCK(devvp);
	if (error)
		panic("ext2fs_reload: dirty1");
	/*
	 * Step 2: re-read superblock from disk.
	 */
	error = bread(devvp, SBLOCK, SBSIZE, NOCRED, 0, &bp);
	if (error) {
		return (error);
	}
	newfs = (struct ext2fs *)bp->b_data;
	error = ext2fs_checksb(newfs, (mp->mnt_flag & MNT_RDONLY) != 0);
	if (error) {
		brelse(bp, 0);
		return (error);
	}

	fs = ump->um_e2fs;
	/*
	 * copy in new superblock, and compute in-memory values
	 */
	e2fs_sbload(newfs, &fs->e2fs);
	fs->e2fs_ncg =
	    howmany(fs->e2fs.e2fs_bcount - fs->e2fs.e2fs_first_dblock,
	    fs->e2fs.e2fs_bpg);
	fs->e2fs_fsbtodb = fs->e2fs.e2fs_log_bsize + LOG_MINBSIZE - DEV_BSHIFT;
	fs->e2fs_bsize = MINBSIZE << fs->e2fs.e2fs_log_bsize;
	fs->e2fs_bshift = LOG_MINBSIZE + fs->e2fs.e2fs_log_bsize;
	fs->e2fs_qbmask = fs->e2fs_bsize - 1;
	fs->e2fs_bmask = ~fs->e2fs_qbmask;
	fs->e2fs_ngdb =
	    howmany(fs->e2fs_ncg, fs->e2fs_bsize / sizeof(struct ext2_gd));
	fs->e2fs_ipb = fs->e2fs_bsize / EXT2_DINODE_SIZE(fs);
	fs->e2fs_itpg = fs->e2fs.e2fs_ipg / fs->e2fs_ipb;
	brelse(bp, 0);

	/*
	 * Step 3: re-read summary information from disk.
	 */

	for (i = 0; i < fs->e2fs_ngdb; i++) {
		error = bread(devvp ,
		    EXT2_FSBTODB(fs, fs->e2fs.e2fs_first_dblock +
		    1 /* superblock */ + i),
		    fs->e2fs_bsize, NOCRED, 0, &bp);
		if (error) {
			return (error);
		}
		e2fs_cgload((struct ext2_gd *)bp->b_data,
		    &fs->e2fs_gd[i * fs->e2fs_bsize / sizeof(struct ext2_gd)],
		    fs->e2fs_bsize);
		brelse(bp, 0);
	}

	/* Allocate a marker vnode. */
	mvp = vnalloc(mp);
	/*
	 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
	 * and vclean() can be called indirectly
	 */
	mutex_enter(&mntvnode_lock);
loop:
	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
		vmark(mvp, vp);
		if (vp->v_mount != mp || vismarker(vp))
			continue;
		/*
		 * Step 4: invalidate all inactive vnodes.
		 */
		if (vrecycle(vp, &mntvnode_lock)) {
			mutex_enter(&mntvnode_lock);
			(void)vunmark(mvp);
			goto loop;
		}
		/*
		 * Step 5: invalidate all cached file data.
		 */
		mutex_enter(vp->v_interlock);
		mutex_exit(&mntvnode_lock);
		if (vget(vp, LK_EXCLUSIVE)) {
			mutex_enter(&mntvnode_lock);
			(void)vunmark(mvp);
			goto loop;
		}
		if (vinvalbuf(vp, 0, cred, l, 0, 0))
			panic("ext2fs_reload: dirty2");
		/*
		 * Step 6: re-read inode data for all active vnodes.
		 */
		ip = VTOI(vp);
		error = bread(devvp, EXT2_FSBTODB(fs, ino_to_fsba(fs, ip->i_number)),
		    (int)fs->e2fs_bsize, NOCRED, 0, &bp);
		if (error) {
			vput(vp);
			mutex_enter(&mntvnode_lock);
			(void)vunmark(mvp);
			break;
		}
		cp = (char *)bp->b_data +
		    (ino_to_fsbo(fs, ip->i_number) * EXT2_DINODE_SIZE(fs));
		e2fs_iload((struct ext2fs_dinode *)cp, ip->i_din.e2fs_din);
		ext2fs_set_inode_guid(ip);
		brelse(bp, 0);
		vput(vp);
		mutex_enter(&mntvnode_lock);
	}
	mutex_exit(&mntvnode_lock);
	vnfree(mvp);
	return (error);
}
Beispiel #17
0
/*
 * this is exactly what we do here - the problem is that the conversion
 * will blow up some entries by four bytes, so it can't be done in place.
 * This is too bad. Right now the conversion is done entry by entry, the
 * converted entry is sent via uiomove.
 *
 * XXX allocate a buffer, convert as many entries as possible, then send
 * the whole buffer to uiomove
 *
 * ext2_readdir(struct vnode *a_vp, struct uio *a_uio, struct ucred *a_cred)
 */
int
ext2_readdir(struct vop_readdir_args *ap)
{
        struct uio *uio = ap->a_uio;
        int count, error;
	struct ext2_dir_entry_2 *edp, *dp;
	int ncookies;
	struct uio auio;
	struct iovec aiov;
	caddr_t dirbuf;
	int DIRBLKSIZ = VTOI(ap->a_vp)->i_e2fs->s_blocksize;
	int readcnt, retval;
	off_t startoffset = uio->uio_offset;

	if ((error = vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY)) != 0)
		return(error);

	count = uio->uio_resid;
	/*
	 * Avoid complications for partial directory entries by adjusting
	 * the i/o to end at a block boundary.  Don't give up (like ufs
	 * does) if the initial adjustment gives a negative count, since
	 * many callers don't supply a large enough buffer.  The correct
	 * size is a little larger than DIRBLKSIZ to allow for expansion
	 * of directory entries, but some callers just use 512.
	 */
	count -= (uio->uio_offset + count) & (DIRBLKSIZ -1);
	if (count <= 0)
		count += DIRBLKSIZ;
	if (count > MAXBSIZE)		/* limit to a reasonable size */
		count = MAXBSIZE;

#ifdef EXT2FS_DEBUG
	kprintf("ext2_readdir: uio_offset = %lld, uio_resid = %d, count = %d\n",
	    uio->uio_offset, uio->uio_resid, count);
#endif

	auio = *uio;
	auio.uio_iov = &aiov;
	auio.uio_iovcnt = 1;
	auio.uio_resid = count;
	auio.uio_segflg = UIO_SYSSPACE;
	aiov.iov_len = count;
	dirbuf = kmalloc(count, M_TEMP, M_WAITOK);
	aiov.iov_base = dirbuf;
	error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
	if (error == 0) {
		readcnt = count - auio.uio_resid;
		edp = (struct ext2_dir_entry_2 *)&dirbuf[readcnt];
		ncookies = 0;
		for (dp = (struct ext2_dir_entry_2 *)dirbuf;
		    !error && uio->uio_resid > 0 && dp < edp; ) {
			/*-
			 * "New" ext2fs directory entries differ in 3 ways
			 * from ufs on-disk ones:
			 * - the name is not necessarily NUL-terminated.
			 * - the file type field always exists and always
			 * follows the name length field.
			 * - the file type is encoded in a different way.
			 *
			 * "Old" ext2fs directory entries need no special
			 * conversions, since they binary compatible with
			 * "new" entries having a file type of 0 (i.e.,
			 * EXT2_FT_UNKNOWN).  Splitting the old name length
			 * field didn't make a mess like it did in ufs,
			 * because ext2fs uses a machine-dependent disk
			 * layout.
			 */
			if (dp->rec_len <= 0) {
				error = EIO;
				break;
			}
			retval = vop_write_dirent(&error, uio, dp->inode,
			    FTTODT(dp->file_type), dp->name_len, dp->name);

			if (retval)
				break;
			/* advance dp */
			dp = (struct ext2_dir_entry_2 *)((char *)dp + dp->rec_len);
			if (!error)
				ncookies++;
		}
		/* we need to correct uio_offset */
		uio->uio_offset = startoffset + (caddr_t)dp - dirbuf;

		if (!error && ap->a_ncookies != NULL) {
			off_t *cookiep, *cookies, *ecookies;
			off_t off;

			if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
				panic("ext2fs_readdir: unexpected uio from NFS server");
			if (ncookies) {
				cookies = kmalloc(ncookies * sizeof(off_t),
                                                  M_TEMP, M_WAITOK);
			} else {
				cookies = kmalloc(sizeof(off_t), M_TEMP,
                                                  M_WAITOK);
			}
			off = startoffset;
			for (dp = (struct ext2_dir_entry_2 *)dirbuf,
			     cookiep = cookies, ecookies = cookies + ncookies;
			     cookiep < ecookies;
			     dp = (struct ext2_dir_entry_2 *)((caddr_t) dp + dp->rec_len)) {
				off += dp->rec_len;
				*cookiep++ = off;
			}
			*ap->a_ncookies = ncookies;
			*ap->a_cookies = cookies;
		}
	}
	kfree(dirbuf, M_TEMP);
	if (ap->a_eofflag)
		*ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset;
	vn_unlock(ap->a_vp);
        return (error);
}
Beispiel #18
0
/*
 * Go through the disk queues to initiate sandbagged IO;
 * go through the inodes to write those that have been modified;
 * initiate the writing of the super block if it has been modified.
 *
 * Note: we are always called with the filesystem marked `MPBUSY'.
 */
int
ext2fs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
{
	struct vnode *vp, *mvp;
	struct inode *ip;
	struct ufsmount *ump = VFSTOUFS(mp);
	struct m_ext2fs *fs;
	int error, allerror = 0;

	fs = ump->um_e2fs;
	if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) {	/* XXX */
		printf("fs = %s\n", fs->e2fs_fsmnt);
		panic("update: rofs mod");
	}

	/* Allocate a marker vnode. */
	mvp = vnalloc(mp);

	/*
	 * Write back each (modified) inode.
	 */
	mutex_enter(&mntvnode_lock);
loop:
	/*
	 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
	 * and vclean() can be called indirectly
	 */
	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
		vmark(mvp, vp);
		if (vp->v_mount != mp || vismarker(vp))
			continue;
		mutex_enter(vp->v_interlock);
		ip = VTOI(vp);
		if (ip == NULL || (vp->v_iflag & (VI_XLOCK|VI_CLEAN)) != 0 ||
		    vp->v_type == VNON ||
		    ((ip->i_flag &
		      (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 &&
		     LIST_EMPTY(&vp->v_dirtyblkhd) &&
		     UVM_OBJ_IS_CLEAN(&vp->v_uobj)))
		{
			mutex_exit(vp->v_interlock);
			continue;
		}
		mutex_exit(&mntvnode_lock);
		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT);
		if (error) {
			mutex_enter(&mntvnode_lock);
			if (error == ENOENT) {
				mutex_enter(&mntvnode_lock);
				(void)vunmark(mvp);
				goto loop;
			}
			continue;
		}
		if (vp->v_type == VREG && waitfor == MNT_LAZY)
			error = ext2fs_update(vp, NULL, NULL, 0);
		else
			error = VOP_FSYNC(vp, cred,
			    waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0);
		if (error)
			allerror = error;
		vput(vp);
		mutex_enter(&mntvnode_lock);
	}
	mutex_exit(&mntvnode_lock);
	vnfree(mvp);
	/*
	 * Force stale file system control information to be flushed.
	 */
	if (waitfor != MNT_LAZY) {
		vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
		if ((error = VOP_FSYNC(ump->um_devvp, cred,
		    waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0)) != 0)
			allerror = error;
		VOP_UNLOCK(ump->um_devvp);
	}
	/*
	 * Write back modified superblock.
	 */
	if (fs->e2fs_fmod != 0) {
		fs->e2fs_fmod = 0;
		fs->e2fs.e2fs_wtime = time_second;
		if ((error = ext2fs_cgupdate(ump, waitfor)))
			allerror = error;
	}
	return (allerror);
}
Beispiel #19
0
/*
 * Write a directory entry after a call to namei, using the parameters
 * that it left in the directory inode.  The argument ip is the inode which
 * the new directory entry will refer to.  Dvp is a pointer to the directory
 * to be written, which was left locked by namei. Remaining parameters
 * (dp->i_offset, dp->i_count) indicate how the space for the new
 * entry is to be obtained.
 */
int
ext2_direnter(struct inode *ip, struct vnode *dvp, struct componentname *cnp)
{
	struct ext2_dir_entry_2 *ep, *nep;
	struct inode *dp;
	struct buf *bp;
	struct ext2_dir_entry_2 newdir;
	struct iovec aiov;
	struct uio auio;
	u_int dsize;
	int error, loc, newentrysize, spacefree;
	char *dirbuf;
	int     DIRBLKSIZ = ip->i_e2fs->s_blocksize;


	dp = VTOI(dvp);
	newdir.inode = ip->i_number;
	newdir.name_len = cnp->cn_namelen;
	if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs->s_es,
	    EXT2_FEATURE_INCOMPAT_FILETYPE))
		newdir.file_type = DTTOFT(IFTODT(ip->i_mode));
	else
		newdir.file_type = EXT2_FT_UNKNOWN;
	bcopy(cnp->cn_nameptr, newdir.name, (unsigned)cnp->cn_namelen + 1);
	newentrysize = EXT2_DIR_REC_LEN(newdir.name_len);
	if (dp->i_count == 0) {
		/*
		 * If dp->i_count is 0, then namei could find no
		 * space in the directory. Here, dp->i_offset will
		 * be on a directory block boundary and we will write the
		 * new entry into a fresh block.
		 */
		if (dp->i_offset & (DIRBLKSIZ - 1))
			panic("ext2_direnter: newblk");
		auio.uio_offset = dp->i_offset;
		newdir.rec_len = DIRBLKSIZ;
		auio.uio_resid = newentrysize;
		aiov.iov_len = newentrysize;
		aiov.iov_base = (caddr_t)&newdir;
		auio.uio_iov = &aiov;
		auio.uio_iovcnt = 1;
		auio.uio_rw = UIO_WRITE;
		auio.uio_segflg = UIO_SYSSPACE;
		auio.uio_td = NULL;
		error = VOP_WRITE(dvp, &auio, IO_SYNC, cnp->cn_cred);
		if (DIRBLKSIZ >
		    VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
			/* XXX should grow with balloc() */
			panic("ext2_direnter: frag size");
		else if (!error) {
			dp->i_size = roundup(dp->i_size, DIRBLKSIZ);
			dp->i_flag |= IN_CHANGE;
		}
		return (error);
	}

	/*
	 * If dp->i_count is non-zero, then namei found space
	 * for the new entry in the range dp->i_offset to
	 * dp->i_offset + dp->i_count in the directory.
	 * To use this space, we may have to compact the entries located
	 * there, by copying them together towards the beginning of the
	 * block, leaving the free space in one usable chunk at the end.
	 */

	/*
	 * Increase size of directory if entry eats into new space.
	 * This should never push the size past a new multiple of
	 * DIRBLKSIZE.
	 *
	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
	 */
	if (dp->i_offset + dp->i_count > dp->i_size)
		dp->i_size = dp->i_offset + dp->i_count;
	/*
	 * Get the block containing the space for the new directory entry.
	 */
	if ((error = EXT2_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp)) != 0)
		return (error);
	/*
	 * Find space for the new entry. In the simple case, the entry at
	 * offset base will have the space. If it does not, then namei
	 * arranged that compacting the region dp->i_offset to
	 * dp->i_offset + dp->i_count would yield the
	 * space.
	 */
	ep = (struct ext2_dir_entry_2 *)dirbuf;
	dsize = EXT2_DIR_REC_LEN(ep->name_len);
	spacefree = ep->rec_len - dsize;
	for (loc = ep->rec_len; loc < dp->i_count; ) {
		nep = (struct ext2_dir_entry_2 *)(dirbuf + loc);
		if (ep->inode) {
			/* trim the existing slot */
			ep->rec_len = dsize;
			ep = (struct ext2_dir_entry_2 *)((char *)ep + dsize);
		} else {
			/* overwrite; nothing there; header is ours */
			spacefree += dsize;
		}
		dsize = EXT2_DIR_REC_LEN(nep->name_len);
		spacefree += nep->rec_len - dsize;
		loc += nep->rec_len;
		bcopy((caddr_t)nep, (caddr_t)ep, dsize);
	}
	/*
	 * Update the pointer fields in the previous entry (if any),
	 * copy in the new entry, and write out the block.
	 */
	if (ep->inode == 0) {
		if (spacefree + dsize < newentrysize)
			panic("ext2_direnter: compact1");
		newdir.rec_len = spacefree + dsize;
	} else {
		if (spacefree < newentrysize)
			panic("ext2_direnter: compact2");
		newdir.rec_len = spacefree;
		ep->rec_len = dsize;
		ep = (struct ext2_dir_entry_2 *)((char *)ep + dsize);
	}
	bcopy((caddr_t)&newdir, (caddr_t)ep, (u_int)newentrysize);
	error = bwrite(bp);
	dp->i_flag |= IN_CHANGE | IN_UPDATE;
	if (!error && dp->i_endoff && dp->i_endoff < dp->i_size)
		error = EXT2_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC,
				      cnp->cn_cred);
	return (error);
}
Beispiel #20
0
/*
 * Set attribute vnode op. called from several syscalls
 */
int
ext2fs_setattr(void *v)
{
	struct vop_setattr_args /* {
		struct vnode *a_vp;
		struct vattr *a_vap;
		kauth_cred_t a_cred;
	} */ *ap = v;
	struct vattr *vap = ap->a_vap;
	struct vnode *vp = ap->a_vp;
	struct inode *ip = VTOI(vp);
	kauth_cred_t cred = ap->a_cred;
	struct lwp *l = curlwp;
	int error;
	kauth_action_t action = KAUTH_VNODE_WRITE_FLAGS;
	bool changing_sysflags = false;

	/*
	 * Check for unsettable attributes.
	 */
	if ((vap->va_type != VNON) || (vap->va_nlink != (nlink_t)VNOVAL) ||
	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
		return (EINVAL);
	}
	if (vap->va_flags != VNOVAL) {
		if (vp->v_mount->mnt_flag & MNT_RDONLY)
			return (EROFS);

		/*
		 * Check if we're allowed to change the flags.
		 * If EXT2FS_SYSTEM_FLAGS is set, then the flags are treated
		 * as system flags, otherwise they're considered to be user
		 * flags.
		 */
#ifdef EXT2FS_SYSTEM_FLAGS
		/* Indicate we're changing system flags if we are. */
		if ((vap->va_flags & SF_APPEND) ||
		     (vap->va_flags & SF_IMMUTABLE)) {
			action |= KAUTH_VNODE_WRITE_SYSFLAGS;
			changing_sysflags = true;
		}

		/* Indicate the node has system flags if it does. */
		if (ip->i_e2fs_flags & (EXT2_APPEND | EXT2_IMMUTABLE)) {
			action |= KAUTH_VNODE_HAS_SYSFLAGS;
		}
#endif /* EXT2FS_SYSTEM_FLAGS */

		error = kauth_authorize_vnode(cred, action, vp, NULL,
		    genfs_can_chflags(cred, vp->v_type, ip->i_uid,
		    changing_sysflags));
		if (error)
			return (error);

#ifdef EXT2FS_SYSTEM_FLAGS
		ip->i_e2fs_flags &= ~(EXT2_APPEND | EXT2_IMMUTABLE);
		ip->i_e2fs_flags |=
		    (vap->va_flags & SF_APPEND) ?  EXT2_APPEND : 0 |
		    (vap->va_flags & SF_IMMUTABLE) ? EXT2_IMMUTABLE : 0;
#else
		ip->i_e2fs_flags &= ~(EXT2_APPEND | EXT2_IMMUTABLE);
		ip->i_e2fs_flags |=
		    (vap->va_flags & UF_APPEND) ? EXT2_APPEND : 0 |
		    (vap->va_flags & UF_IMMUTABLE) ? EXT2_IMMUTABLE : 0;
#endif
		ip->i_flag |= IN_CHANGE;
		if (vap->va_flags & (IMMUTABLE | APPEND))
			return (0);
	}
	if (ip->i_e2fs_flags & (EXT2_APPEND | EXT2_IMMUTABLE))
		return (EPERM);
	/*
	 * Go through the fields and update iff not VNOVAL.
	 */
	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
		if (vp->v_mount->mnt_flag & MNT_RDONLY)
			return (EROFS);
		error = ext2fs_chown(vp, vap->va_uid, vap->va_gid, cred, l);
		if (error)
			return (error);
	}
	if (vap->va_size != VNOVAL) {
		/*
		 * Disallow write attempts on read-only file systems;
		 * unless the file is a socket, fifo, or a block or
		 * character device resident on the file system.
		 */
		switch (vp->v_type) {
		case VDIR:
			return (EISDIR);
		case VLNK:
		case VREG:
			if (vp->v_mount->mnt_flag & MNT_RDONLY)
				return (EROFS);
		default:
			break;
		}
		error = ext2fs_truncate(vp, vap->va_size, 0, cred);
		if (error)
			return (error);
	}
	ip = VTOI(vp);
	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
		if (vp->v_mount->mnt_flag & MNT_RDONLY)
			return (EROFS);
		error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp,
		    NULL, genfs_can_chtimes(vp, vap->va_vaflags, ip->i_uid,
		    cred));
		if (error)
			return (error);
		if (vap->va_atime.tv_sec != VNOVAL)
			if (!(vp->v_mount->mnt_flag & MNT_NOATIME))
				ip->i_flag |= IN_ACCESS;
		if (vap->va_mtime.tv_sec != VNOVAL) {
			ip->i_flag |= IN_CHANGE | IN_UPDATE;
			if (vp->v_mount->mnt_flag & MNT_RELATIME)
				ip->i_flag |= IN_ACCESS;
		}
		error = ext2fs_update(vp, &vap->va_atime, &vap->va_mtime,
			UPDATE_WAIT);
		if (error)
			return (error);
	}
	error = 0;
	if (vap->va_mode != (mode_t)VNOVAL) {
		if (vp->v_mount->mnt_flag & MNT_RDONLY)
			return (EROFS);
		error = ext2fs_chmod(vp, (int)vap->va_mode, cred, l);
	}
	VN_KNOTE(vp, NOTE_ATTRIB);
	return (error);
}
Beispiel #21
0
/* ARGSUSED */
int
ffs_full_fsync(struct vnode *vp, int flags)
{
	int error, i, uflags;
	struct mount *mp;

	KASSERT(vp->v_tag == VT_UFS);
	KASSERT(VTOI(vp) != NULL);
	KASSERT(vp->v_type != VCHR && vp->v_type != VBLK);

	error = 0;
	uflags = UPDATE_CLOSE | ((flags & FSYNC_WAIT) ? UPDATE_WAIT : 0);

	mp = vp->v_mount;

	/*
	 * Flush all dirty data associated with the vnode.
	 */
	if (vp->v_type == VREG) {
		int pflags = PGO_ALLPAGES | PGO_CLEANIT;

		if ((flags & FSYNC_WAIT))
			pflags |= PGO_SYNCIO;
		if (fstrans_getstate(mp) == FSTRANS_SUSPENDING)
			pflags |= PGO_FREE;
		mutex_enter(vp->v_interlock);
		error = VOP_PUTPAGES(vp, 0, 0, pflags);
		if (error)
			return error;
	}

#ifdef WAPBL
	if (mp && mp->mnt_wapbl) {
		/*
		 * Don't bother writing out metadata if the syncer is
		 * making the request.  We will let the sync vnode
		 * write it out in a single burst through a call to
		 * VFS_SYNC().
		 */
		if ((flags & (FSYNC_DATAONLY | FSYNC_LAZY)) != 0)
			return 0;

		if ((VTOI(vp)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE
		    | IN_MODIFY | IN_MODIFIED | IN_ACCESSED)) != 0) {
			error = UFS_WAPBL_BEGIN(mp);
			if (error)
				return error;
			error = ffs_update(vp, NULL, NULL, uflags);
			UFS_WAPBL_END(mp);
		}
		if (error || (flags & FSYNC_NOLOG) != 0)
			return error;

		/*
		 * Don't flush the log if the vnode being flushed
		 * contains no dirty buffers that could be in the log.
		 */
		if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
			error = wapbl_flush(mp->mnt_wapbl, 0);
			if (error)
				return error;
		}

		if ((flags & FSYNC_WAIT) != 0) {
			mutex_enter(vp->v_interlock);
			while (vp->v_numoutput != 0)
				cv_wait(&vp->v_cv, vp->v_interlock);
			mutex_exit(vp->v_interlock);
		}

		return error;
	}
#endif /* WAPBL */

	error = vflushbuf(vp, (flags & FSYNC_WAIT) != 0);
	if (error == 0)
		error = ffs_update(vp, NULL, NULL, uflags);
	if (error == 0 && (flags & FSYNC_CACHE) != 0) {
		i = 1;
		(void)VOP_IOCTL(VTOI(vp)->i_devvp, DIOCCACHESYNC, &i, FWRITE,
		    kauth_cred_get());
	}

	return error;
}
Beispiel #22
0
static int
fuse_vnode_cmp(struct vnode *vp, void *nidp)
{
    return (VTOI(vp) != *((uint64_t *)nidp));
}
Beispiel #23
0
/*
 * this function handles ext4 extents block mapping
 */
static int
ext4_ext_read(struct vop_read_args *ap)
{
	struct vnode *vp;
	struct inode *ip;
        struct uio *uio;
	struct m_ext2fs *fs;
        struct buf *bp;
        struct ext4_extent nex, *ep;
        struct ext4_extent_header *ehp;
        struct ext4_extent_path path;
        daddr_t lbn, nextlbn, newblk = 0;
        off_t bytesinfile;
        u_short mode;
        int cache_type;
	int orig_resid;
        int error = 0;
        int depth = 0;
        long size, xfersize, blkoffset;

	vp = ap->a_vp;
	ip = VTOI(vp);
        mode = ip->i_mode;
        uio = ap->a_uio;
        memset(&path, 0, sizeof(path));

	orig_resid = uio->uio_resid;
	KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0"));
	if (orig_resid == 0)
		return (0);
	KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0"));
	fs = ip->I_FS;
	if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->e2fs_maxfilesize)
		return (EOVERFLOW);

        for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
                if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
                        break;
                lbn = lblkno(fs, uio->uio_offset);
                nextlbn = lbn + 1;
                size = BLKSIZE(fs, ip, lbn);
                blkoffset = blkoff(fs, uio->uio_offset);

                xfersize = fs->e2fs_fsize - blkoffset;
                if (uio->uio_resid < xfersize)
                        xfersize = uio->uio_resid;
                if (bytesinfile < xfersize)
                        xfersize = bytesinfile;

                /* get block from ext4 extent cache */
                cache_type = ext4_ext_in_cache(ip, lbn, &nex);
                if (cache_type != 0) {
                        /* block does not be allocated yet */
                        if (cache_type == EXT4_EXT_CACHE_GAP)
                                return (error);
                        else if (cache_type == EXT4_EXT_CACHE_IN)
                                newblk = lbn - nex.e_blk +
                                    (nex.e_start_lo | ((daddr_t)(nex.e_start_hi) << 31) << 1);
                } else {
                        ext4_ext_find_extent(fs, ip, lbn, &path);
                        depth = ((struct ext4_extent_header *)(ip->i_db))->eh_depth;
                        if (path.ep_ext == NULL && depth != 0)
                                return (EIO);

                        ehp = path.ep_header;
                        ep = path.ep_ext;
                        if (ep == NULL)
                                return (EIO);

                        ext4_ext_put_cache(ip, ep, EXT4_EXT_CACHE_IN);

                        newblk = lbn - ep->e_blk +
                            (ep->e_start_lo | ((daddr_t)(ep->e_start_hi) << 31) << 1);

                        if (path.ep_bp != NULL) {
                                brelse(path.ep_bp);
                                path.ep_bp = NULL;
                        }
                }

                error = bread(ip->i_devvp, fsbtodb(fs, newblk), size, NOCRED, &bp);
                if (error) {
                        brelse(bp);
                        bp = NULL;
                        break;
                }

                size -= bp->b_resid;
                if (size < xfersize) {
                        if (size == 0)
                                break;
                        xfersize = size;
                }
                error = uiomove((char *)bp->b_data + blkoffset,
                    (int)xfersize, uio);
                if (error)
                        break;

                bqrelse(bp);
        }

        if (bp != NULL)
                bqrelse(bp);

        return (error);
}
Beispiel #24
0
int
linkup(ino_t orphan, ino_t parentdir)
{
	union lfs_dinode *dp;
	int lostdir;
	ino_t oldlfdir;
	struct inodesc idesc;
	char tempname[BUFSIZ];
	struct uvnode *vp;

	memset(&idesc, 0, sizeof(struct inodesc));
	vp = vget(fs, orphan);
	dp = VTOD(vp);
	lostdir = (lfs_dino_getmode(fs, dp) & LFS_IFMT) == LFS_IFDIR;
	pwarn("UNREF %s ", lostdir ? "DIR" : "FILE");
	pinode(orphan);
	if (preen && lfs_dino_getsize(fs, dp) == 0)
		return (0);
	if (preen)
		printf(" (RECONNECTED)\n");
	else if (reply("RECONNECT") == 0)
		return (0);
	if (lfdir == 0) {
		dp = ginode(ULFS_ROOTINO);
		idesc.id_name = lfname;
		idesc.id_type = DATA;
		idesc.id_func = findino;
		idesc.id_number = ULFS_ROOTINO;
		if ((ckinode(dp, &idesc) & FOUND) != 0) {
			lfdir = idesc.id_parent;
		} else {
			pwarn("NO lost+found DIRECTORY");
			if (preen || reply("CREATE")) {
				lfdir = allocdir(ULFS_ROOTINO, (ino_t) 0, lfmode);
				if (lfdir != 0) {
					if (makeentry(ULFS_ROOTINO, lfdir, lfname) != 0) {
						if (preen)
							printf(" (CREATED)\n");
					} else {
						freedir(lfdir, ULFS_ROOTINO);
						lfdir = 0;
						if (preen)
							printf("\n");
					}
				}
			}
		}
		if (lfdir == 0) {
			pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY");
			printf("\n\n");
			return (0);
		}
	}
	vp = vget(fs, lfdir);
	dp = VTOD(vp);
	if ((lfs_dino_getmode(fs, dp) & LFS_IFMT) != LFS_IFDIR) {
		pfatal("lost+found IS NOT A DIRECTORY");
		if (reply("REALLOCATE") == 0)
			return (0);
		oldlfdir = lfdir;
		if ((lfdir = allocdir(ULFS_ROOTINO, (ino_t) 0, lfmode)) == 0) {
			pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY\n\n");
			return (0);
		}
		if ((changeino(ULFS_ROOTINO, lfname, lfdir) & ALTERED) == 0) {
			pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY\n\n");
			return (0);
		}
		inodirty(VTOI(vp));
		idesc.id_type = ADDR;
		idesc.id_func = pass4check;
		idesc.id_number = oldlfdir;
		adjust(&idesc, lncntp[oldlfdir] + 1);
		lncntp[oldlfdir] = 0;
		vp = vget(fs, lfdir);
		dp = VTOD(vp);
	}
	if (statemap[lfdir] != DFOUND) {
		pfatal("SORRY. NO lost+found DIRECTORY\n\n");
		return (0);
	}
	(void) lftempname(tempname, orphan);
	if (makeentry(lfdir, orphan, tempname) == 0) {
		pfatal("SORRY. NO SPACE IN lost+found DIRECTORY");
		printf("\n\n");
		return (0);
	}
	lncntp[orphan]--;
	if (lostdir) {
		if ((changeino(orphan, "..", lfdir) & ALTERED) == 0 &&
		    parentdir != (ino_t) - 1)
			(void) makeentry(orphan, lfdir, "..");
		vp = vget(fs, lfdir);
		lfs_dino_setnlink(fs, VTOI(vp)->i_din,
		    lfs_dino_getnlink(fs, VTOI(vp)->i_din) + 1);
		inodirty(VTOI(vp));
		lncntp[lfdir]++;
		pwarn("DIR I=%llu CONNECTED. ", (unsigned long long)orphan);
		if (parentdir != (ino_t) - 1)
			printf("PARENT WAS I=%llu\n",
			    (unsigned long long)parentdir);
		if (preen == 0)
			printf("\n");
	}
	return (1);
}
Beispiel #25
0
/*
 * Q_QUOTAON - set up a quota file for a particular filesystem.
 */
int
quotaon(struct thread *td, struct mount *mp, int type, void *fname)
{
	struct ufsmount *ump;
	struct vnode *vp, **vpp;
	struct vnode *mvp;
	struct dquot *dq;
	int error, flags;
	struct nameidata nd;

	error = priv_check(td, PRIV_UFS_QUOTAON);
	if (error != 0) {
		vfs_unbusy(mp);
		return (error);
	}

	if ((mp->mnt_flag & MNT_RDONLY) != 0) {
		vfs_unbusy(mp);
		return (EROFS);
	}

	ump = VFSTOUFS(mp);
	dq = NODQUOT;

	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fname, td);
	flags = FREAD | FWRITE;
	vfs_ref(mp);
	vfs_unbusy(mp);
	error = vn_open(&nd, &flags, 0, NULL);
	if (error != 0) {
		vfs_rel(mp);
		return (error);
	}
	NDFREE(&nd, NDF_ONLY_PNBUF);
	vp = nd.ni_vp;
	error = vfs_busy(mp, MBF_NOWAIT);
	vfs_rel(mp);
	if (error == 0) {
		if (vp->v_type != VREG) {
			error = EACCES;
			vfs_unbusy(mp);
		}
	}
	if (error != 0) {
		VOP_UNLOCK(vp, 0);
		(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
		return (error);
	}

	UFS_LOCK(ump);
	if ((ump->um_qflags[type] & (QTF_OPENING|QTF_CLOSING)) != 0) {
		UFS_UNLOCK(ump);
		VOP_UNLOCK(vp, 0);
		(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
		vfs_unbusy(mp);
		return (EALREADY);
	}
	ump->um_qflags[type] |= QTF_OPENING|QTF_CLOSING;
	UFS_UNLOCK(ump);
	if ((error = dqopen(vp, ump, type)) != 0) {
		VOP_UNLOCK(vp, 0);
		UFS_LOCK(ump);
		ump->um_qflags[type] &= ~(QTF_OPENING|QTF_CLOSING);
		UFS_UNLOCK(ump);
		(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
		vfs_unbusy(mp);
		return (error);
	}
	VOP_UNLOCK(vp, 0);
	MNT_ILOCK(mp);
	mp->mnt_flag |= MNT_QUOTA;
	MNT_IUNLOCK(mp);

	vpp = &ump->um_quotas[type];
	if (*vpp != vp)
		quotaoff1(td, mp, type);

	/*
	 * When the directory vnode containing the quota file is
	 * inactivated, due to the shared lookup of the quota file
	 * vput()ing the dvp, the qsyncvp() call for the containing
	 * directory would try to acquire the quota lock exclusive.
	 * At the same time, lookup already locked the quota vnode
	 * shared.  Mark the quota vnode lock as allowing recursion
	 * and automatically converting shared locks to exclusive.
	 *
	 * Also mark quota vnode as system.
	 */
	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
	vp->v_vflag |= VV_SYSTEM;
	VN_LOCK_AREC(vp);
	VN_LOCK_DSHARE(vp);
	VOP_UNLOCK(vp, 0);
	*vpp = vp;
	/*
	 * Save the credential of the process that turned on quotas.
	 * Set up the time limits for this quota.
	 */
	ump->um_cred[type] = crhold(td->td_ucred);
	ump->um_btime[type] = MAX_DQ_TIME;
	ump->um_itime[type] = MAX_IQ_TIME;
	if (dqget(NULLVP, 0, ump, type, &dq) == 0) {
		if (dq->dq_btime > 0)
			ump->um_btime[type] = dq->dq_btime;
		if (dq->dq_itime > 0)
			ump->um_itime[type] = dq->dq_itime;
		dqrele(NULLVP, dq);
	}
	/*
	 * Allow the getdq from getinoquota below to read the quota
	 * from file.
	 */
	UFS_LOCK(ump);
	ump->um_qflags[type] &= ~QTF_CLOSING;
	UFS_UNLOCK(ump);
	/*
	 * Search vnodes associated with this mount point,
	 * adding references to quota file being opened.
	 * NB: only need to add dquot's for inodes being modified.
	 */
again:
	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
			goto again;
		}
		if (vp->v_type == VNON || vp->v_writecount == 0) {
			VOP_UNLOCK(vp, 0);
			vrele(vp);
			continue;
		}
		error = getinoquota(VTOI(vp));
		VOP_UNLOCK(vp, 0);
		vrele(vp);
		if (error) {
			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
			break;
		}
	}

        if (error)
		quotaoff_inchange(td, mp, type);
	UFS_LOCK(ump);
	ump->um_qflags[type] &= ~QTF_OPENING;
	KASSERT((ump->um_qflags[type] & QTF_CLOSING) == 0,
		("quotaon: leaking flags"));
	UFS_UNLOCK(ump);

	vfs_unbusy(mp);
	return (error);
}
Beispiel #26
0
/*
 * allocate a new directory
 */
int
allocdir(ino_t parent, ino_t request, int mode)
{
	ino_t ino;
	char *cp;
	union lfs_dinode *dp;
	struct ubuf *bp;
	LFS_DIRHEADER *dirp;
	struct uvnode *vp;

	ino = allocino(request, LFS_IFDIR | mode);
	vp = vget(fs, ino);
	dp = VTOD(vp);
	bread(vp, lfs_dino_getdb(fs, dp, 0), lfs_sb_getfsize(fs), 0, &bp);
	if (bp->b_flags & B_ERROR) {
		brelse(bp, 0);
		freeino(ino);
		return (0);
	}
	dirp = (LFS_DIRHEADER *)bp->b_data;
	/* . */
	lfs_dir_setino(fs, dirp, ino);
	lfs_dir_setreclen(fs, dirp, LFS_DIRECTSIZ(fs, 1));
	lfs_dir_settype(fs, dirp, LFS_DT_DIR);
	lfs_dir_setnamlen(fs, dirp, 1);
	lfs_copydirname(fs, lfs_dir_nameptr(fs, dirp), ".", 1,
			LFS_DIRECTSIZ(fs, 1));
	/* .. */
	dirp = LFS_NEXTDIR(fs, dirp);
	lfs_dir_setino(fs, dirp, parent);
	lfs_dir_setreclen(fs, dirp, LFS_DIRBLKSIZ - LFS_DIRECTSIZ(fs, 1));
	lfs_dir_settype(fs, dirp, LFS_DT_DIR);
	lfs_dir_setnamlen(fs, dirp, 2);
	lfs_copydirname(fs, lfs_dir_nameptr(fs, dirp), "..", 2,
			LFS_DIRBLKSIZ - LFS_DIRECTSIZ(fs, 1));
	for (cp = &bp->b_data[LFS_DIRBLKSIZ];
	    cp < &bp->b_data[lfs_sb_getfsize(fs)];
	    cp += LFS_DIRBLKSIZ) {
		zerodirblk(cp);
	}
	VOP_BWRITE(bp);
	lfs_dino_setnlink(fs, dp, 2);
	inodirty(VTOI(vp));
	if (ino == ULFS_ROOTINO) {
		lncntp[ino] = lfs_dino_getnlink(fs, dp);
		cacheino(dp, ino);
		return (ino);
	}
	if (statemap[parent] != DSTATE && statemap[parent] != DFOUND) {
		freeino(ino);
		return (0);
	}
	cacheino(dp, ino);
	statemap[ino] = statemap[parent];
	if (statemap[ino] == DSTATE) {
		lncntp[ino] = lfs_dino_getnlink(fs, dp);
		lncntp[parent]++;
	}
	vp = vget(fs, parent);
	dp = VTOD(vp);
	lfs_dino_setnlink(fs, dp, lfs_dino_getnlink(fs, dp) + 1);
	inodirty(VTOI(vp));
	return (ino);
}
Beispiel #27
0
/*
 * Reload all incore data for a filesystem (used after running fsck on
 * the root filesystem and finding things to fix). The filesystem must
 * be mounted read-only.
 *
 * Things to do to update the mount:
 *	1) invalidate all cached meta-data.
 *	2) re-read superblock from disk.
 *	3) invalidate all cluster summary information.
 *	4) invalidate all inactive vnodes.
 *	5) invalidate all cached file data.
 *	6) re-read inode data for all active vnodes.
 * XXX we are missing some steps, in particular # 3, this has to be reviewed.
 */
static int
ext2_reload(struct mount *mp, struct thread *td)
{
	struct vnode *vp, *mvp, *devvp;
	struct inode *ip;
	struct buf *bp;
	struct ext2fs *es;
	struct m_ext2fs *fs;
	struct csum *sump;
	int error, i;
	int32_t *lp;

	if ((mp->mnt_flag & MNT_RDONLY) == 0)
		return (EINVAL);
	/*
	 * Step 1: invalidate all cached meta-data.
	 */
	devvp = VFSTOEXT2(mp)->um_devvp;
	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
	if (vinvalbuf(devvp, 0, 0, 0) != 0)
		panic("ext2_reload: dirty1");
	VOP_UNLOCK(devvp, 0);

	/*
	 * Step 2: re-read superblock from disk.
	 * constants have been adjusted for ext2
	 */
	if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0)
		return (error);
	es = (struct ext2fs *)bp->b_data;
	if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) {
		brelse(bp);
		return (EIO);		/* XXX needs translation */
	}
	fs = VFSTOEXT2(mp)->um_e2fs;
	bcopy(bp->b_data, fs->e2fs, sizeof(struct ext2fs));

	if((error = compute_sb_data(devvp, es, fs)) != 0) {
		brelse(bp);
		return (error);
	}
#ifdef UNKLAR
	if (fs->fs_sbsize < SBSIZE)
		bp->b_flags |= B_INVAL;
#endif
	brelse(bp);

	/*
	 * Step 3: invalidate all cluster summary information.
	 */
	if (fs->e2fs_contigsumsize > 0) {
		lp = fs->e2fs_maxcluster;
		sump = fs->e2fs_clustersum;
		for (i = 0; i < fs->e2fs_gcount; i++, sump++) {
			*lp++ = fs->e2fs_contigsumsize;
			sump->cs_init = 0;
			bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1);
		}
	}

loop:
	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
		/*
		 * Step 4: invalidate all cached file data.
		 */
		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
			goto loop;
		}
		if (vinvalbuf(vp, 0, 0, 0))
			panic("ext2_reload: dirty2");

		/*
		 * Step 5: re-read inode data for all active vnodes.
		 */
		ip = VTOI(vp);
		error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
		    (int)fs->e2fs_bsize, NOCRED, &bp);
		if (error) {
			VOP_UNLOCK(vp, 0);
			vrele(vp);
			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
			return (error);
		}
		ext2_ei2i((struct ext2fs_dinode *) ((char *)bp->b_data +
		    EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ip->i_number)), ip);
		brelse(bp);
		VOP_UNLOCK(vp, 0);
		vrele(vp);
	}
	return (0);
}
Beispiel #28
0
void
osi_DisableAtimes(struct vnode *avp)
{
    struct inode *ip = VTOI(avp);
    ip->i_flag &= ~IN_ACCESS;
}
Beispiel #29
0
/*
 * Retrieve the ACL on a file.
 *
 * As part of the ACL is stored in the inode, and the rest in an EA,
 * assemble both into a final ACL product.  Right now this is not done
 * very efficiently.
 */
static int
ufs_getacl_posix1e(struct vop_getacl_args *ap)
{
	struct inode *ip = VTOI(ap->a_vp);
	int error;
	struct oldacl *old;

	/*
	 * XXX: If ufs_getacl() should work on file systems not supporting
	 * ACLs, remove this check.
	 */
	if ((ap->a_vp->v_mount->mnt_flag & MNT_ACLS) == 0)
		return (EINVAL);

	old = malloc(sizeof(*old), M_ACL, M_WAITOK | M_ZERO);

	/*
	 * Attempt to retrieve the ACL from the extended attributes.
	 */
	error = ufs_get_oldacl(ap->a_type, old, ap->a_vp, ap->a_td);
	switch (error) {
	/*
	 * XXX: If ufs_getacl() should work on filesystems
	 * without the EA configured, add case EOPNOTSUPP here.
	 */
	case ENOATTR:
		switch (ap->a_type) {
		case ACL_TYPE_ACCESS:
			/*
			 * Legitimately no ACL set on object, purely
			 * emulate it through the inode.  These fields will
			 * be updated when the ACL is synchronized with
			 * the inode later.
			 */
			old->acl_cnt = 3;
			old->acl_entry[0].ae_tag = ACL_USER_OBJ;
			old->acl_entry[0].ae_id = ACL_UNDEFINED_ID;
			old->acl_entry[0].ae_perm = ACL_PERM_NONE;
			old->acl_entry[1].ae_tag = ACL_GROUP_OBJ;
			old->acl_entry[1].ae_id = ACL_UNDEFINED_ID;
			old->acl_entry[1].ae_perm = ACL_PERM_NONE;
			old->acl_entry[2].ae_tag = ACL_OTHER;
			old->acl_entry[2].ae_id = ACL_UNDEFINED_ID;
			old->acl_entry[2].ae_perm = ACL_PERM_NONE;
			break;

		case ACL_TYPE_DEFAULT:
			/*
			 * Unlike ACL_TYPE_ACCESS, there is no relationship
			 * between the inode contents and the ACL, and it is
			 * therefore possible for the request for the ACL
			 * to fail since the ACL is undefined.  In this
			 * situation, return success and an empty ACL,
			 * as required by POSIX.1e.
			 */
			old->acl_cnt = 0;
			break;
		}
		/* FALLTHROUGH */
	case 0:
		error = acl_copy_oldacl_into_acl(old, ap->a_aclp);
		if (error != 0)
			break;

		if (ap->a_type == ACL_TYPE_ACCESS)
			ufs_sync_acl_from_inode(ip, ap->a_aclp);
	default:
		break;
	}

	free(old, M_ACL);
	return (error);
}
Beispiel #30
0
/*
 * Convert a component of a pathname into a pointer to a locked inode.
 * This is a very central and rather complicated routine.
 * If the file system is not maintained in a strict tree hierarchy,
 * this can result in a deadlock situation (see comments in code below).
 *
 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
 * on whether the name is to be looked up, created, renamed, or deleted.
 * When CREATE, RENAME, or DELETE is specified, information usable in
 * creating, renaming, or deleting a directory entry may be calculated.
 * If flag has LOCKPARENT or'ed into it and the target of the pathname
 * exists, lookup returns both the target and its parent directory locked.
 * When creating or renaming and LOCKPARENT is specified, the target may
 * not be ".".  When deleting and LOCKPARENT is specified, the target may
 * be "."., but the caller must check to ensure it does an vrele and vput
 * instead of two vputs.
 *
 * Overall outline of ufs_lookup:
 *
 *	check accessibility of directory
 *	look for name in cache, if found, then if at end of path
 *	  and deleting or creating, drop it, else return name
 *	search for name in directory, to found or notfound
 * notfound:
 *	if creating, return locked directory, leaving info on available slots
 *	else return error
 * found:
 *	if at end of path and deleting, return information to allow delete
 *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
 *	  inode and return info to allow rewrite
 *	if not at end, add name to cache; if at end and neither creating
 *	  nor deleting, add name to cache
 */
int
ufs_lookup(void *v)
{
	struct vop_lookup_args *ap = v;
	struct vnode *vdp;		/* vnode for directory being searched */
	struct inode *dp;		/* inode for directory being searched */
	struct buf *bp;			/* a buffer of directory entries */
	struct direct *ep;		/* the current directory entry */
	int entryoffsetinblock;		/* offset of ep in bp's buffer */
	enum {NONE, COMPACT, FOUND} slotstatus;
	doff_t slotoffset;		/* offset of area with free space */
	int slotsize;			/* size of area at slotoffset */
	int slotfreespace;		/* amount of space free in slot */
	int slotneeded;			/* size of the entry we're seeking */
	int numdirpasses;		/* strategy for directory search */
	doff_t endsearch;		/* offset to end directory search */
	doff_t prevoff;			/* prev entry dp->i_offset */
	struct vnode *pdp;		/* saved dp during symlink work */
	struct vnode *tdp;		/* returned by VFS_VGET */
	doff_t enduseful;		/* pointer past last used dir slot */
	u_long bmask;			/* block offset mask */
	int lockparent;			/* 1 => lockparent flag is set */
	int wantparent;			/* 1 => wantparent or lockparent flag */
	int namlen, error;
	struct vnode **vpp = ap->a_vpp;
	struct componentname *cnp = ap->a_cnp;
	struct ucred *cred = cnp->cn_cred;
	int flags;
	int nameiop = cnp->cn_nameiop;
	struct proc *p = cnp->cn_proc;

	cnp->cn_flags &= ~PDIRUNLOCK;
	flags = cnp->cn_flags;

	bp = NULL;
	slotoffset = -1;
	*vpp = NULL;
	vdp = ap->a_dvp;
	dp = VTOI(vdp);
	lockparent = flags & LOCKPARENT;
	wantparent = flags & (LOCKPARENT|WANTPARENT);

	/*
	 * Check accessiblity of directory.
	 */
	if ((DIP(dp, mode) & IFMT) != IFDIR)
		return (ENOTDIR);
	if ((error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc)) != 0)
		return (error);

	if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) &&
	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
		return (EROFS);

	/*
	 * We now have a segment name to search for, and a directory to search.
	 *
	 * Before tediously performing a linear scan of the directory,
	 * check the name cache to see if the directory/name pair
	 * we are looking for is known already.
	 */
	if ((error = cache_lookup(vdp, vpp, cnp)) >= 0)
		return (error);

	/*
	 * Suppress search for slots unless creating
	 * file and at end of pathname, in which case
	 * we watch for a place to put the new file in
	 * case it doesn't already exist.
	 */
	slotstatus = FOUND;
	slotfreespace = slotsize = slotneeded = 0;
	if ((nameiop == CREATE || nameiop == RENAME) &&
	    (flags & ISLASTCN)) {
		slotstatus = NONE;
		slotneeded = (sizeof(struct direct) - MAXNAMLEN +
			cnp->cn_namelen + 3) &~ 3;
	}

	/*
	 * If there is cached information on a previous search of
	 * this directory, pick up where we last left off.
	 * We cache only lookups as these are the most common
	 * and have the greatest payoff. Caching CREATE has little
	 * benefit as it usually must search the entire directory
	 * to determine that the entry does not exist. Caching the
	 * location of the last DELETE or RENAME has not reduced
	 * profiling time and hence has been removed in the interest
	 * of simplicity.
	 */
	bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;

#ifdef UFS_DIRHASH
	/*
	 * Use dirhash for fast operations on large directories. The logic
	 * to determine whether to hash the directory is contained within
	 * ufsdirhash_build(); a zero return means that it decided to hash
	 * this directory and it successfully built up the hash table.
	 */
	if (ufsdirhash_build(dp) == 0) {
		/* Look for a free slot if needed. */
		enduseful = DIP(dp, size);
		if (slotstatus != FOUND) {
			slotoffset = ufsdirhash_findfree(dp, slotneeded,
			    &slotsize);
			if (slotoffset >= 0) {
				slotstatus = COMPACT;
				enduseful = ufsdirhash_enduseful(dp);
				if (enduseful < 0)
					enduseful = DIP(dp, size);
			}
		}
		/* Look up the component. */
		numdirpasses = 1;
		entryoffsetinblock = 0; /* silence compiler warning */
		switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen,
		    &dp->i_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) {
		case 0:
			ep = (struct direct *)((char *)bp->b_data +
			    (dp->i_offset & bmask));
			goto foundentry;
		case ENOENT:
#define roundup2(x, y)	(((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */
			dp->i_offset = roundup2(DIP(dp, size), DIRBLKSIZ);
			goto notfound;
		default:
			/* Something failed; just do a linear search. */
			break;
		}
	}
#endif /* UFS_DIRHASH */

	if (nameiop != LOOKUP || dp->i_diroff == 0 ||
	    dp->i_diroff >= DIP(dp, size)) {
		entryoffsetinblock = 0;
		dp->i_offset = 0;
		numdirpasses = 1;
	} else {
		dp->i_offset = dp->i_diroff;
		if ((entryoffsetinblock = dp->i_offset & bmask) &&
		    (error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, NULL, &bp)))
			return (error);
		numdirpasses = 2;
		nchstats.ncs_2passes++;
	}
	prevoff = dp->i_offset;
	endsearch = roundup(DIP(dp, size), DIRBLKSIZ);
	enduseful = 0;

searchloop:
	while (dp->i_offset < endsearch) {
		/*
		 * If necessary, get the next directory block.
		 */
		if ((dp->i_offset & bmask) == 0) {
			if (bp != NULL)
				brelse(bp);
			error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, NULL,
					     &bp);
			if (error)
				return (error);
			entryoffsetinblock = 0;
		}
		/*
		 * If still looking for a slot, and at a DIRBLKSIZE
		 * boundary, have to start looking for free space again.
		 */
		if (slotstatus == NONE &&
		    (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
			slotoffset = -1;
			slotfreespace = 0;
		}
		/*
		 * Get pointer to next entry.
		 * Full validation checks are slow, so we only check
		 * enough to insure forward progress through the
		 * directory. Complete checks can be run by patching
		 * "dirchk" to be true.
		 */
		ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock);
		if (ep->d_reclen == 0 ||
		    (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) {
			int i;

			ufs_dirbad(dp, dp->i_offset, "mangled entry");
			i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
			dp->i_offset += i;
			entryoffsetinblock += i;
			continue;
		}

		/*
		 * If an appropriate sized slot has not yet been found,
		 * check to see if one is available. Also accumulate space
		 * in the current block so that we can determine if
		 * compaction is viable.
		 */
		if (slotstatus != FOUND) {
			int size = ep->d_reclen;

			if (ep->d_ino != 0)
				size -= DIRSIZ(FSFMT(vdp), ep);
			if (size > 0) {
				if (size >= slotneeded) {
					slotstatus = FOUND;
					slotoffset = dp->i_offset;
					slotsize = ep->d_reclen;
				} else if (slotstatus == NONE) {
					slotfreespace += size;
					if (slotoffset == -1)
						slotoffset = dp->i_offset;
					if (slotfreespace >= slotneeded) {
						slotstatus = COMPACT;
						slotsize = dp->i_offset +
						      ep->d_reclen - slotoffset;
					}
				}
			}
		}

		/*
		 * Check for a name match.
		 */
		if (ep->d_ino) {
#			if (BYTE_ORDER == LITTLE_ENDIAN)
				if (vdp->v_mount->mnt_maxsymlinklen > 0)
					namlen = ep->d_namlen;
				else
					namlen = ep->d_type;
#			else
				namlen = ep->d_namlen;
#			endif
			if (namlen == cnp->cn_namelen &&
			    !bcmp(cnp->cn_nameptr, ep->d_name,
				(unsigned)namlen)) {
#ifdef UFS_DIRHASH
foundentry:
#endif
				/*
				 * Save directory entry's inode number and
				 * reclen in ndp->ni_ufs area, and release
				 * directory buffer.
				 */
				dp->i_ino = ep->d_ino;
				dp->i_reclen = ep->d_reclen;
				goto found;
			}
		}
		prevoff = dp->i_offset;
		dp->i_offset += ep->d_reclen;
		entryoffsetinblock += ep->d_reclen;
		if (ep->d_ino)
			enduseful = dp->i_offset;
	}
#ifdef UFS_DIRHASH
notfound:
#endif
	/*
	 * If we started in the middle of the directory and failed
	 * to find our target, we must check the beginning as well.
	 */
	if (numdirpasses == 2) {
		numdirpasses--;
		dp->i_offset = 0;
		endsearch = dp->i_diroff;
		goto searchloop;
	}
	if (bp != NULL)
		brelse(bp);
	/*
	 * If creating, and at end of pathname and current
	 * directory has not been removed, then can consider
	 * allowing file to be created.
	 */
	if ((nameiop == CREATE || nameiop == RENAME) &&
	    (flags & ISLASTCN) && dp->i_effnlink != 0) {
		/*
		 * Access for write is interpreted as allowing
		 * creation of files in the directory.
		 */
		error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc);
		if (error)
			return (error);
		/*
		 * Return an indication of where the new directory
		 * entry should be put.  If we didn't find a slot,
		 * then set dp->i_count to 0 indicating
		 * that the new slot belongs at the end of the
		 * directory. If we found a slot, then the new entry
		 * can be put in the range from dp->i_offset to
		 * dp->i_offset + dp->i_count.
		 */
		if (slotstatus == NONE) {
			dp->i_offset = roundup(DIP(dp, size), DIRBLKSIZ);
			dp->i_count = 0;
			enduseful = dp->i_offset;
		} else if (nameiop == DELETE) {
			dp->i_offset = slotoffset;
			if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
				dp->i_count = 0;
			else
				dp->i_count = dp->i_offset - prevoff;
		} else {
			dp->i_offset = slotoffset;
			dp->i_count = slotsize;
			if (enduseful < slotoffset + slotsize)
				enduseful = slotoffset + slotsize;
		}
		dp->i_endoff = roundup(enduseful, DIRBLKSIZ);
		/*
		 * We return with the directory locked, so that
		 * the parameters we set up above will still be
		 * valid if we actually decide to do a direnter().
		 * We return ni_vp == NULL to indicate that the entry
		 * does not currently exist; we leave a pointer to
		 * the (locked) directory inode in ndp->ni_dvp.
		 * The pathname buffer is saved so that the name
		 * can be obtained later.
		 *
		 * NB - if the directory is unlocked, then this
		 * information cannot be used.
		 */
		cnp->cn_flags |= SAVENAME;
		if (!lockparent) {
			VOP_UNLOCK(vdp, 0, p);
			cnp->cn_flags |= PDIRUNLOCK;
		}
		return (EJUSTRETURN);
	}
	/*
	 * Insert name into cache (as non-existent) if appropriate.
	 */
	if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
		cache_enter(vdp, *vpp, cnp);
	return (ENOENT);

found:
	if (numdirpasses == 2)
		nchstats.ncs_pass2++;
	/*
	 * Check that directory length properly reflects presence
	 * of this entry.
	 */
	if (dp->i_offset + DIRSIZ(FSFMT(vdp), ep) > DIP(dp, size)) {
		ufs_dirbad(dp, dp->i_offset, "i_ffs_size too small");
		DIP_ASSIGN(dp, size, dp->i_offset + DIRSIZ(FSFMT(vdp), ep));
		dp->i_flag |= IN_CHANGE | IN_UPDATE;
	}
	brelse(bp);

	/*
	 * Found component in pathname.
	 * If the final component of path name, save information
	 * in the cache as to where the entry was found.
	 */
	if ((flags & ISLASTCN) && nameiop == LOOKUP)
		dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1);

	/*
	 * If deleting, and at end of pathname, return
	 * parameters which can be used to remove file.
	 * If the wantparent flag isn't set, we return only
	 * the directory (in ndp->ni_dvp), otherwise we go
	 * on and lock the inode, being careful with ".".
	 */
	if (nameiop == DELETE && (flags & ISLASTCN)) {
		/*
		 * Write access to directory required to delete files.
		 */
		error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc);
		if (error)
			return (error);
		/*
		 * Return pointer to current entry in dp->i_offset,
		 * and distance past previous entry (if there
		 * is a previous entry in this block) in dp->i_count.
		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
		 */
		if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
			dp->i_count = 0;
		else
			dp->i_count = dp->i_offset - prevoff;
		if (dp->i_number == dp->i_ino) {
			VREF(vdp);
			*vpp = vdp;
			return (0);
		}
		error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp);
		if (error)
			return (error);
		/*
		 * If directory is "sticky", then user must own
		 * the directory, or the file in it, else she
		 * may not delete it (unless she's root). This
		 * implements append-only directories.
		 */
		if ((DIP(dp, mode) & ISVTX) &&
		    VOP_ACCESS(vdp, VADMIN, cred, cnp->cn_proc) &&
		    VOP_ACCESS(tdp, VADMIN, cred, cnp->cn_proc)) {
			vput(tdp);
			return (EPERM);
		}
		*vpp = tdp;
		if (!lockparent) {
			VOP_UNLOCK(vdp, 0, p);
			cnp->cn_flags |= PDIRUNLOCK;
		}
		return (0);
	}

	/*
	 * If rewriting (RENAME), return the inode and the
	 * information required to rewrite the present directory
	 * Must get inode of directory entry to verify it's a
	 * regular file, or empty directory.
	 */
	if (nameiop == RENAME && wantparent &&
	    (flags & ISLASTCN)) {
		error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc);
		if (error)
			return (error);
		/*
		 * Careful about locking second inode.
		 * This can only occur if the target is ".".
		 */
		if (dp->i_number == dp->i_ino)
			return (EISDIR);
		error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp);
		if (error)
			return (error);
		*vpp = tdp;
		cnp->cn_flags |= SAVENAME;
		if (!lockparent) {
			VOP_UNLOCK(vdp, 0, p);
			cnp->cn_flags |= PDIRUNLOCK;
		}
		return (0);
	}

	/*
	 * Step through the translation in the name.  We do not `vput' the
	 * directory because we may need it again if a symbolic link
	 * is relative to the current directory.  Instead we save it
	 * unlocked as "pdp".  We must get the target inode before unlocking
	 * the directory to insure that the inode will not be removed
	 * before we get it.  We prevent deadlock by always fetching
	 * inodes from the root, moving down the directory tree. Thus
	 * when following backward pointers ".." we must unlock the
	 * parent directory before getting the requested directory.
	 * There is a potential race condition here if both the current
	 * and parent directories are removed before the VFS_VGET for the
	 * inode associated with ".." returns.  We hope that this occurs
	 * infrequently since we cannot avoid this race condition without
	 * implementing a sophisticated deadlock detection algorithm.
	 * Note also that this simple deadlock detection scheme will not
	 * work if the file system has any hard links other than ".."
	 * that point backwards in the directory structure.
	 */
	pdp = vdp;
	if (flags & ISDOTDOT) {
		VOP_UNLOCK(pdp, 0, p);	/* race to get the inode */
		cnp->cn_flags |= PDIRUNLOCK;
		error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp);
		if (error) {
			if (vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p) == 0)
				cnp->cn_flags &= ~PDIRUNLOCK;
			return (error);
		}
		if (lockparent && (flags & ISLASTCN)) {
			if ((error = vn_lock(pdp, LK_EXCLUSIVE, p))) {
				vput(tdp);
				return (error);
			}
			cnp->cn_flags &= ~PDIRUNLOCK;
		}
		*vpp = tdp;
	} else if (dp->i_number == dp->i_ino) {
		VREF(vdp);	/* we want ourself, ie "." */
		*vpp = vdp;
	} else {
		error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp);
		if (error)
			return (error);
		if (!lockparent || !(flags & ISLASTCN)) {
			VOP_UNLOCK(pdp, 0, p);
			cnp->cn_flags |= PDIRUNLOCK;
		}
		*vpp = tdp;
	}

	/*
	 * Insert name into cache if appropriate.
	 */
	if (cnp->cn_flags & MAKEENTRY)
		cache_enter(vdp, *vpp, cnp);
	return (0);
}