Exemple #1
0
/*
 * Traverse backward across mountpoint from the
 * root vnode of a filesystem to its mounted-on
 * vnode.
 */
vnode_t *
untraverse(vnode_t *vp)
{
	vnode_t *tvp, *nextvp;

	tvp = vp;
	for (;;) {
		if (! (tvp->v_flag & VROOT))
			break;

		/* lock vfs to prevent unmount of this vfs */
		vfs_lock_wait(tvp->v_vfsp);

		if ((nextvp = tvp->v_vfsp->vfs_vnodecovered) == NULL) {
			vfs_unlock(tvp->v_vfsp);
			break;
		}

		/*
		 * Hold nextvp to prevent unmount.  After unlock vfs and
		 * rele tvp, any number of overlays could be unmounted.
		 * Putting a hold on vfs_vnodecovered will only allow
		 * tvp's vfs to be unmounted. Of course if caller placed
		 * extra hold on vp before calling untraverse, the following
		 * hold would not be needed.  Since prev actions of caller
		 * are unknown, we need to hold here just to be safe.
		 */
		VN_HOLD(nextvp);
		vfs_unlock(tvp->v_vfsp);
		VN_RELE(tvp);
		tvp = nextvp;
	}

	return (tvp);
}
Exemple #2
0
static int
ext2_mountroot()
{
#if !defined(__FreeBSD__)
    extern struct vnode *rootvp;
#endif
    register struct ext2_sb_info *fs;
    register struct mount *mp;
#if defined(__FreeBSD__)
    struct proc *p = curproc;
#else
    struct proc *p = get_proc();	/* XXX */
#endif
    struct ufsmount *ump;
    u_int size;
    int error;

    /*
     * Get vnodes for swapdev and rootdev.
     */
    if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp))
        panic("ext2_mountroot: can't setup bdevvp's");

    mp = bsd_malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
    bzero((char *)mp, (u_long)sizeof(struct mount));
    mp->mnt_op = &ext2fs_vfsops;
    mp->mnt_flag = MNT_RDONLY;
    if (error = ext2_mountfs(rootvp, mp, p)) {
        bsd_free(mp, M_MOUNT);
        return (error);
    }
    if (error = vfs_lock(mp)) {
        (void)ext2_unmount(mp, 0, p);
        bsd_free(mp, M_MOUNT);
        return (error);
    }
#if defined(__FreeBSD__)
    CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
#else
    TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
#endif
    mp->mnt_flag |= MNT_ROOTFS;
    mp->mnt_vnodecovered = NULLVP;
    ump = VFSTOUFS(mp);
    fs = ump->um_e2fs;
    bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt));
    fs->fs_fsmnt[0] = '/';
    bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
          MNAMELEN);
    (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
                   &size);
    bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
    (void)ext2_statfs(mp, &mp->mnt_stat, p);
    vfs_unlock(mp);
    inittodr(fs->s_es->s_wtime);		/* this helps to set the time */
    return (0);
}
static void
zfsctl_rename_snap(zfsctl_snapdir_t *sdp, zfs_snapentry_t *sep, const char *nm)
{
	avl_index_t where;
	vfs_t *vfsp;
	refstr_t *pathref;
	char newpath[MAXNAMELEN];
	char *tail;

	ASSERT(MUTEX_HELD(&sdp->sd_lock));
	ASSERT(sep != NULL);

	vfsp = vn_mountedvfs(sep->se_root);
	ASSERT(vfsp != NULL);

	vfs_lock_wait(vfsp);

	/*
	 * Change the name in the AVL tree.
	 */
	avl_remove(&sdp->sd_snaps, sep);
	kmem_free(sep->se_name, strlen(sep->se_name) + 1);
	sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
	(void) strcpy(sep->se_name, nm);
	VERIFY(avl_find(&sdp->sd_snaps, sep, &where) == NULL);
	avl_insert(&sdp->sd_snaps, sep, where);

	/*
	 * Change the current mountpoint info:
	 * 	- update the tail of the mntpoint path
	 *	- update the tail of the resource path
	 */
	pathref = vfs_getmntpoint(vfsp);
	(void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
	VERIFY((tail = strrchr(newpath, '/')) != NULL);
	*(tail+1) = '\0';
	ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
	(void) strcat(newpath, nm);
	refstr_rele(pathref);
	vfs_setmntpoint(vfsp, newpath);

	pathref = vfs_getresource(vfsp);
	(void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
	VERIFY((tail = strrchr(newpath, '@')) != NULL);
	*(tail+1) = '\0';
	ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
	(void) strcat(newpath, nm);
	refstr_rele(pathref);
	vfs_setresource(vfsp, newpath);

	vfs_unlock(vfsp);
}
Exemple #4
0
/*
 * Return the lowermost vnode if this is a mountpoint.
 */
static vnode_t *
vn_under(vnode_t *vp)
{
	vnode_t *uvp;
	vfs_t *vfsp;

	while (vp->v_flag & VROOT) {

		vfsp = vp->v_vfsp;
		vfs_rlock_wait(vfsp);
		if ((uvp = vfsp->vfs_vnodecovered) == NULL ||
		    (vfsp->vfs_flag & VFS_UNMOUNTED)) {
			vfs_unlock(vfsp);
			break;
		}
		VN_HOLD(uvp);
		vfs_unlock(vfsp);
		VN_RELE(vp);
		vp = uvp;
	}

	return (vp);
}
Exemple #5
0
static int
ext2_mountroot()
{
	register struct ext2_sb_info *fs;
	register struct mount *mp;
	struct proc *p = curproc;
	struct ufsmount *ump;
	u_int size;
	int error;
	
	if ((error = bdevvp(rootdev, &rootvp))) {
		printf("ext2_mountroot: can't find rootvp\n");
		return (error);
	}
	mp = bsd_malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
	bzero((char *)mp, (u_long)sizeof(struct mount));
	mp->mnt_op = &ext2fs_vfsops;
	mp->mnt_flag = MNT_RDONLY;
	if (error = ext2_mountfs(rootvp, mp, p)) {
		bsd_free(mp, M_MOUNT);
		return (error);
	}
	if (error = vfs_lock(mp)) {
		(void)ext2_unmount(mp, 0, p);
		bsd_free(mp, M_MOUNT);
		return (error);
	}
	TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
	mp->mnt_flag |= MNT_ROOTFS;
	mp->mnt_vnodecovered = NULLVP;
	ump = VFSTOUFS(mp);
	fs = ump->um_e2fs;
	bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt));
	fs->fs_fsmnt[0] = '/';
	bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
	    MNAMELEN);
	(void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
	    &size);
	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
	(void)ext2_statfs(mp, &mp->mnt_stat, p);
	vfs_unlock(mp);
	inittodr(fs->s_es->s_wtime);		/* this helps to set the time */
	return (0);
}
Exemple #6
0
static int
zfs_mountroot(vfs_t *vfsp, enum whymountroot why)
{
	int error = 0;
	int ret = 0;
	static int zfsrootdone = 0;
	zfsvfs_t *zfsvfs = NULL;
	znode_t *zp = NULL;
	vnode_t *vp = NULL;

	ASSERT(vfsp);

	/*
	 * The filesystem that we mount as root is defined in
	 * /etc/system using the zfsroot variable.  The value defined
	 * there is copied early in startup code to zfs_bootpath
	 * (defined in modsysfile.c).
	 */
	if (why == ROOT_INIT) {
		if (zfsrootdone++)
			return (EBUSY);

		/*
		 * This needs to be done here, so that when we return from
		 * mountroot, the vfs resource name will be set correctly.
		 */
		if (snprintf(rootfs.bo_name, BO_MAXOBJNAME, "%s", zfs_bootpath)
		    >= BO_MAXOBJNAME)
			return (ENAMETOOLONG);

		if (error = vfs_lock(vfsp))
			return (error);

		if (error = zfs_domount(vfsp, zfs_bootpath, CRED()))
			goto out;

		zfsvfs = (zfsvfs_t *)vfsp->vfs_data;
		ASSERT(zfsvfs);
		if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp))
			goto out;

		vp = ZTOV(zp);
		mutex_enter(&vp->v_lock);
		vp->v_flag |= VROOT;
		mutex_exit(&vp->v_lock);
		rootvp = vp;

		/*
		 * The zfs_zget call above returns with a hold on vp, we release
		 * it here.
		 */
		VN_RELE(vp);

		/*
		 * Mount root as readonly initially, it will be remouted
		 * read/write by /lib/svc/method/fs-usr.
		 */
		readonly_changed_cb(vfsp->vfs_data, B_TRUE);
		vfs_add((struct vnode *)0, vfsp,
		    (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0);
out:
		vfs_unlock(vfsp);
		ret = (error) ? error : 0;
		return (ret);

	} else if (why == ROOT_REMOUNT) {

		readonly_changed_cb(vfsp->vfs_data, B_FALSE);
		vfsp->vfs_flag |= VFS_REMOUNT;
		return (zfs_refresh_properties(vfsp));

	} else if (why == ROOT_UNMOUNT) {
		zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data);
		(void) zfs_sync(vfsp, 0, 0);
		return (0);
	}

	/*
	 * if "why" is equal to anything else other than ROOT_INIT,
	 * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it.
	 */
	return (ENOTSUP);
}
Exemple #7
0
static int
auto_lookup(
	vnode_t *dvp,
	char *nm,
	vnode_t **vpp,
	pathname_t *pnp,
	int flags,
	vnode_t *rdir,
	cred_t *cred,
	caller_context_t *ct,
	int *direntflags,
	pathname_t *realpnp)
{
	int error = 0;
	vnode_t *newvp = NULL;
	vfs_t *vfsp;
	fninfo_t *dfnip;
	fnnode_t *dfnp = NULL;
	fnnode_t *fnp = NULL;
	char *searchnm;
	int operation;		/* either AUTOFS_LOOKUP or AUTOFS_MOUNT */

	dfnip = vfstofni(dvp->v_vfsp);
	AUTOFS_DPRINT((3, "auto_lookup: dvp=%p (%s) name=%s\n",
	    (void *)dvp, dfnip->fi_map, nm));

	if (nm[0] == 0) {
		VN_HOLD(dvp);
		*vpp = dvp;
		return (0);
	}

	if (error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct))
		return (error);

	if (nm[0] == '.' && nm[1] == 0) {
		VN_HOLD(dvp);
		*vpp = dvp;
		return (0);
	}

	if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) {
		fnnode_t *pdfnp;

		pdfnp = (vntofn(dvp))->fn_parent;
		ASSERT(pdfnp != NULL);

		/*
		 * Since it is legitimate to have the VROOT flag set for the
		 * subdirectories of the indirect map in autofs filesystem,
		 * rootfnnodep is checked against fnnode of dvp instead of
		 * just checking whether VROOT flag is set in dvp
		 */

		if (pdfnp == pdfnp->fn_globals->fng_rootfnnodep) {
			vnode_t *vp;

			vfs_rlock_wait(dvp->v_vfsp);
			if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED) {
				vfs_unlock(dvp->v_vfsp);
				return (EIO);
			}
			vp = dvp->v_vfsp->vfs_vnodecovered;
			VN_HOLD(vp);
			vfs_unlock(dvp->v_vfsp);
			error = VOP_LOOKUP(vp, nm, vpp, pnp, flags, rdir, cred,
			    ct, direntflags, realpnp);
			VN_RELE(vp);
			return (error);
		} else {
			*vpp = fntovn(pdfnp);
			VN_HOLD(*vpp);
			return (0);
		}
	}

top:
	dfnp = vntofn(dvp);
	searchnm = nm;
	operation = 0;

	ASSERT(vn_matchops(dvp, auto_vnodeops));

	AUTOFS_DPRINT((3, "auto_lookup: dvp=%p dfnp=%p\n", (void *)dvp,
	    (void *)dfnp));

	/*
	 * If a lookup or mount of this node is in progress, wait for it
	 * to finish, and return whatever result it got.
	 */
	mutex_enter(&dfnp->fn_lock);
	if (dfnp->fn_flags & (MF_LOOKUP | MF_INPROG)) {
		mutex_exit(&dfnp->fn_lock);
		error = auto_wait4mount(dfnp);
		if (error == AUTOFS_SHUTDOWN)
			error = ENOENT;
		if (error == EAGAIN)
			goto top;
		if (error)
			return (error);
	} else
		mutex_exit(&dfnp->fn_lock);


	error = vn_vfsrlock_wait(dvp);
	if (error)
		return (error);
	vfsp = vn_mountedvfs(dvp);
	if (vfsp != NULL) {
		error = VFS_ROOT(vfsp, &newvp);
		vn_vfsunlock(dvp);
		if (!error) {
			error = VOP_LOOKUP(newvp, nm, vpp, pnp,
			    flags, rdir, cred, ct, direntflags, realpnp);
			VN_RELE(newvp);
		}
		return (error);
	}
	vn_vfsunlock(dvp);

	rw_enter(&dfnp->fn_rwlock, RW_READER);
	error = auto_search(dfnp, nm, &fnp, cred);
	if (error) {
		if (dfnip->fi_flags & MF_DIRECT) {
			/*
			 * direct map.
			 */
			if (dfnp->fn_dirents) {
				/*
				 * Mount previously triggered.
				 * 'nm' not found
				 */
				error = ENOENT;
			} else {
				/*
				 * I need to contact the daemon to trigger
				 * the mount. 'dfnp' will be the mountpoint.
				 */
				operation = AUTOFS_MOUNT;
				VN_HOLD(fntovn(dfnp));
				fnp = dfnp;
				error = 0;
			}
		} else if (dvp == dfnip->fi_rootvp) {
			/*
			 * 'dfnp' is the root of the indirect AUTOFS.
			 */
			if (rw_tryupgrade(&dfnp->fn_rwlock) == 0) {
				/*
				 * Could not acquire writer lock, release
				 * reader, and wait until available. We
				 * need to search for 'nm' again, since we
				 * had to release the lock before reacquiring
				 * it.
				 */
				rw_exit(&dfnp->fn_rwlock);
				rw_enter(&dfnp->fn_rwlock, RW_WRITER);
				error = auto_search(dfnp, nm, &fnp, cred);
			}

			ASSERT(RW_WRITE_HELD(&dfnp->fn_rwlock));
			if (error) {
				/*
				 * create node being looked-up and request
				 * mount on it.
				 */
				error = auto_enter(dfnp, nm, &fnp, kcred);
				if (!error)
					operation = AUTOFS_LOOKUP;
			}
		} else if ((dfnp->fn_dirents == NULL) &&
		    ((dvp->v_flag & VROOT) == 0) &&
		    ((fntovn(dfnp->fn_parent))->v_flag & VROOT)) {
			/*
			 * dfnp is the actual 'mountpoint' of indirect map,
			 * it is the equivalent of a direct mount,
			 * ie, /home/'user1'
			 */
			operation = AUTOFS_MOUNT;
			VN_HOLD(fntovn(dfnp));
			fnp = dfnp;
			error = 0;
			searchnm = dfnp->fn_name;
		}
	}

	if (error == EAGAIN) {
		rw_exit(&dfnp->fn_rwlock);
		goto top;
	}
	if (error) {
		rw_exit(&dfnp->fn_rwlock);
		return (error);
	}

	/*
	 * We now have the actual fnnode we're interested in.
	 * The 'MF_LOOKUP' indicates another thread is currently
	 * performing a daemon lookup of this node, therefore we
	 * wait for its completion.
	 * The 'MF_INPROG' indicates another thread is currently
	 * performing a daemon mount of this node, we wait for it
	 * to be done if we are performing a MOUNT. We don't
	 * wait for it if we are performing a LOOKUP.
	 * We can release the reader/writer lock as soon as we acquire
	 * the mutex, since the state of the lock can only change by
	 * first acquiring the mutex.
	 */
	mutex_enter(&fnp->fn_lock);
	rw_exit(&dfnp->fn_rwlock);
	if ((fnp->fn_flags & MF_LOOKUP) ||
	    ((operation == AUTOFS_MOUNT) && (fnp->fn_flags & MF_INPROG))) {
		mutex_exit(&fnp->fn_lock);
		error = auto_wait4mount(fnp);
		VN_RELE(fntovn(fnp));
		if (error == AUTOFS_SHUTDOWN)
			error = ENOENT;
		if (error && error != EAGAIN)
			return (error);
		goto top;
	}

	if (operation == 0) {
		/*
		 * got the fnnode, check for any errors
		 * on the previous operation on that node.
		 */
		error = fnp->fn_error;
		if ((error == EINTR) || (error == EAGAIN)) {
			/*
			 * previous operation on this node was
			 * not completed, do a lookup now.
			 */
			operation = AUTOFS_LOOKUP;
		} else {
			/*
			 * previous operation completed. Return
			 * a pointer to the node only if there was
			 * no error.
			 */
			mutex_exit(&fnp->fn_lock);
			if (!error)
				*vpp = fntovn(fnp);
			else
				VN_RELE(fntovn(fnp));
			return (error);
		}
	}

	/*
	 * Since I got to this point, it means I'm the one
	 * responsible for triggering the mount/look-up of this node.
	 */
	switch (operation) {
	case AUTOFS_LOOKUP:
		AUTOFS_BLOCK_OTHERS(fnp, MF_LOOKUP);
		fnp->fn_error = 0;
		mutex_exit(&fnp->fn_lock);
		error = auto_lookup_aux(fnp, searchnm, cred);
		if (!error) {
			/*
			 * Return this vnode
			 */
			*vpp = fntovn(fnp);
		} else {
			/*
			 * release our reference to this vnode
			 * and return error
			 */
			VN_RELE(fntovn(fnp));
		}
		break;
	case AUTOFS_MOUNT:
		AUTOFS_BLOCK_OTHERS(fnp, MF_INPROG);
		fnp->fn_error = 0;
		mutex_exit(&fnp->fn_lock);
		/*
		 * auto_new_mount_thread fires up a new thread which
		 * calls automountd finishing up the work
		 */
		auto_new_mount_thread(fnp, searchnm, cred);

		/*
		 * At this point, we are simply another thread
		 * waiting for the mount to complete
		 */
		error = auto_wait4mount(fnp);
		if (error == AUTOFS_SHUTDOWN)
			error = ENOENT;

		/*
		 * now release our reference to this vnode
		 */
		VN_RELE(fntovn(fnp));
		if (!error)
			goto top;
		break;
	default:
		auto_log(dfnp->fn_globals->fng_verbose,
		    dfnp->fn_globals->fng_zoneid, CE_WARN,
		    "auto_lookup: unknown operation %d",
		    operation);
	}

	AUTOFS_DPRINT((5, "auto_lookup: name=%s *vpp=%p return=%d\n",
	    nm, (void *)*vpp, error));

	return (error);
}
Exemple #8
0
/* ARGSUSED */
int
ufs_fioffs(
	struct vnode	*vp,
	char 		*vap,		/* must be NULL - reserved */
	struct cred	*cr)		/* credentials from ufs_ioctl */
{
	int error;
	struct ufsvfs	*ufsvfsp;
	struct ulockfs	*ulp;

	/* file system has been forcibly unmounted */
	ufsvfsp = VTOI(vp)->i_ufsvfs;
	if (ufsvfsp == NULL)
		return (EIO);

	ulp = &ufsvfsp->vfs_ulockfs;

	/*
	 * suspend the delete thread
	 *	this must be done outside the lockfs locking protocol
	 */
	vfs_lock_wait(vp->v_vfsp);
	ufs_thread_suspend(&ufsvfsp->vfs_delete);

	/* hold the mutex to prevent race with a lockfs request */
	mutex_enter(&ulp->ul_lock);
	atomic_inc_ulong(&ufs_quiesce_pend);

	if (ULOCKFS_IS_HLOCK(ulp)) {
		error = EIO;
		goto out;
	}
	if (ULOCKFS_IS_ELOCK(ulp)) {
		error = EBUSY;
		goto out;
	}
	/* wait for outstanding accesses to finish */
	if (error = ufs_quiesce(ulp))
		goto out;

	/*
	 * If logging, and the logmap was marked as not rollable,
	 * make it rollable now, and start the trans_roll thread and
	 * the reclaim thread.  The log at this point is safe to write to.
	 */
	if (ufsvfsp->vfs_log) {
		ml_unit_t	*ul = ufsvfsp->vfs_log;
		struct fs	*fsp = ufsvfsp->vfs_fs;
		int		err;

		if (ul->un_flags & LDL_NOROLL) {
			ul->un_flags &= ~LDL_NOROLL;
			logmap_start_roll(ul);
			if (!fsp->fs_ronly && (fsp->fs_reclaim &
			    (FS_RECLAIM|FS_RECLAIMING))) {
				fsp->fs_reclaim &= ~FS_RECLAIM;
				fsp->fs_reclaim |= FS_RECLAIMING;
				ufs_thread_start(&ufsvfsp->vfs_reclaim,
				    ufs_thread_reclaim, vp->v_vfsp);
				if (!fsp->fs_ronly) {
					TRANS_SBWRITE(ufsvfsp,
					    TOP_SBUPDATE_UPDATE);
					if (err =
					    geterror(ufsvfsp->vfs_bufp)) {
						refstr_t	*mntpt;
						mntpt = vfs_getmntpoint(
						    vp->v_vfsp);
						cmn_err(CE_NOTE,
						    "Filesystem Flush "
						    "Failed to update "
						    "Reclaim Status for "
						    " %s, Write failed to "
						    "update superblock, "
						    "error %d",
						    refstr_value(mntpt),
						    err);
						refstr_rele(mntpt);
					}
				}
			}
		}
	}

	/* synchronously flush dirty data and metadata */
	error = ufs_flush(vp->v_vfsp);

out:
	atomic_dec_ulong(&ufs_quiesce_pend);
	cv_broadcast(&ulp->ul_cv);
	mutex_exit(&ulp->ul_lock);
	vfs_unlock(vp->v_vfsp);

	/*
	 * allow the delete thread to continue
	 */
	ufs_thread_continue(&ufsvfsp->vfs_delete);
	return (error);
}
Exemple #9
0
/*
 * ufs_fiosdio
 *	Set delayed-io state.  This ioctl is tailored
 *	to metamucil's needs and may change at any time.
 */
int
ufs_fiosdio(
	struct vnode	*vp,		/* file's vnode */
	uint_t		*diop,		/* dio flag */
	int		flag,		/* flag from ufs_ioctl */
	struct cred	*cr)		/* credentials from ufs_ioctl */
{
	uint_t		dio;		/* copy of user's dio */
	struct inode	*ip;		/* inode for vp */
	struct ufsvfs	*ufsvfsp;
	struct fs	*fs;
	struct ulockfs	*ulp;
	int		error = 0;

#ifdef lint
	flag = flag;
#endif

	/* check input conditions */
	if (secpolicy_fs_config(cr, vp->v_vfsp) != 0)
		return (EPERM);

	if (copyin(diop, &dio, sizeof (dio)))
		return (EFAULT);

	if (dio > 1)
		return (EINVAL);

	/* file system has been forcibly unmounted */
	if (VTOI(vp)->i_ufsvfs == NULL)
		return (EIO);

	ip = VTOI(vp);
	ufsvfsp = ip->i_ufsvfs;
	ulp = &ufsvfsp->vfs_ulockfs;

	/* logging file system; dio ignored */
	if (TRANS_ISTRANS(ufsvfsp))
		return (error);

	/* hold the mutex to prevent race with a lockfs request */
	vfs_lock_wait(vp->v_vfsp);
	mutex_enter(&ulp->ul_lock);
	atomic_inc_ulong(&ufs_quiesce_pend);

	if (ULOCKFS_IS_HLOCK(ulp)) {
		error = EIO;
		goto out;
	}

	if (ULOCKFS_IS_ELOCK(ulp)) {
		error = EBUSY;
		goto out;
	}
	/* wait for outstanding accesses to finish */
	if (error = ufs_quiesce(ulp))
		goto out;

	/* flush w/invalidate */
	if (error = ufs_flush(vp->v_vfsp))
		goto out;

	/*
	 * update dio
	 */
	mutex_enter(&ufsvfsp->vfs_lock);
	ufsvfsp->vfs_dio = dio;

	/*
	 * enable/disable clean flag processing
	 */
	fs = ip->i_fs;
	if (fs->fs_ronly == 0 &&
	    fs->fs_clean != FSBAD &&
	    fs->fs_clean != FSLOG) {
		if (dio)
			fs->fs_clean = FSSUSPEND;
		else
			fs->fs_clean = FSACTIVE;
		ufs_sbwrite(ufsvfsp);
		mutex_exit(&ufsvfsp->vfs_lock);
	} else
		mutex_exit(&ufsvfsp->vfs_lock);
out:
	/*
	 * we need this broadcast because of the ufs_quiesce call above
	 */
	atomic_dec_ulong(&ufs_quiesce_pend);
	cv_broadcast(&ulp->ul_cv);
	mutex_exit(&ulp->ul_lock);
	vfs_unlock(vp->v_vfsp);
	return (error);
}
Exemple #10
0
/*
 * Enable logging
 */
int
lqfs_enable(struct vnode *vp, struct fiolog *flp, cred_t *cr)
{
	int		error;
	inode_t		*ip = VTOI(vp);
	qfsvfs_t	*qfsvfsp = ip->i_qfsvfs;
	fs_lqfs_common_t	*fs = VFS_FS_PTR(qfsvfsp);
	ml_unit_t	*ul;
#ifdef LQFS_TODO_LOCKFS
	int		reclaim = 0;
	struct lockfs	lf;
	struct ulockfs	*ulp;
#else
	/* QFS doesn't really support LOCKFS. */
#endif /* LQFS_TODO_LOCKFS */
	vfs_t		*vfsp = qfsvfsp->vfs_vfs;
	uint64_t	tmp_nbytes_actual;
	char fsclean;
	sam_sblk_t	*sblk = qfsvfsp->mi.m_sbp;

	/*
	 * File system is not capable of logging.
	 */
	if (!LQFS_CAPABLE(qfsvfsp)) {
		flp->error = FIOLOG_ENOTSUP;
		error = 0;
		goto out;
	}
	if (!SAM_MAGIC_V2A_OR_HIGHER(&sblk->info.sb)) {
		cmn_err(CE_WARN, "SAM-QFS: %s: Not enabling logging, "
		    " file system is not version 2A.", qfsvfsp->mt.fi_name);
		cmn_err(CE_WARN, "\tUpgrade file system with samfsck -u "
		    "first.");
		flp->error = FIOLOG_ENOTSUP;
		error = 0;
		goto out;
	}

	if (LQFS_GET_LOGBNO(fs)) {
		error = lqfs_log_validate(qfsvfsp, flp, cr);
	}

	/*
	 * Check if logging is already enabled
	 */
	if (LQFS_GET_LOGP(qfsvfsp)) {
		flp->error = FIOLOG_ETRANS;
		/* for root ensure logging option is set */
		vfs_setmntopt(vfsp, MNTOPT_LOGGING, NULL, 0);
		error = 0;
		goto out;
	}

	/*
	 * Come back here to recheck if we had to disable the log.
	 */
recheck:
	error = 0;
	flp->error = FIOLOG_ENONE;

	/*
	 * Adjust requested log size
	 */
	flp->nbytes_actual = flp->nbytes_requested;
	if (flp->nbytes_actual == 0) {
		tmp_nbytes_actual =
		    (((uint64_t)FS_SIZE(fs)) / ldl_divisor) << FS_FSHIFT(fs);
		flp->nbytes_actual = (uint_t)MIN(tmp_nbytes_actual, INT_MAX);
	}
	flp->nbytes_actual = MAX(flp->nbytes_actual, ldl_minlogsize);
	flp->nbytes_actual = MIN(flp->nbytes_actual, ldl_maxlogsize);
	flp->nbytes_actual = blkroundup(fs, flp->nbytes_actual);

	/*
	 * logging is enabled and the log is the right size; done
	 */
	ul = LQFS_GET_LOGP(qfsvfsp);

	if (ul && LQFS_GET_LOGBNO(fs) &&
	    (flp->nbytes_actual == ul->un_requestsize)) {
		vfs_setmntopt(vfsp, MNTOPT_LOGGING, NULL, 0);
		error = 0;
		goto out;
	}

	/*
	 * Readonly file system
	 */
	if (FS_RDONLY(fs)) {
		flp->error = FIOLOG_EROFS;
		error = 0;
		goto out;
	}

#ifdef LQFS_TODO_LOCKFS
	/*
	 * File system must be write locked to enable logging
	 */
	error = qfs_fiolfss(vp, &lf);
	if (error) {
		goto out;
	}
	if (!LOCKFS_IS_ULOCK(&lf)) {
		flp->error = FIOLOG_EULOCK;
		error = 0;
		goto out;
	}
	lf.lf_lock = LOCKFS_WLOCK;
	lf.lf_flags = 0;
	lf.lf_comment = NULL;
	error = qfs_fiolfs(vp, &lf, 1);
	if (error) {
		flp->error = FIOLOG_EWLOCK;
		error = 0;
		goto out;
	}
#else
	/* QFS doesn't really support lockfs. */
#endif /* LQFS_TODO_LOCKFS */

	/*
	 * Grab appropriate locks to synchronize with the rest
	 * of the system
	 */
	vfs_lock_wait(vfsp);
#ifdef LQFS_TODO_LOCKFS
	ulp = &ufsvfsp->vfs_ulockfs;
	mutex_enter(&ulp->ul_lock);
#else
	/* QFS doesn't really support lockfs. */
#endif /* LQFS_TODO_LOCKFS */

	/*
	 * File system must be fairly consistent to enable logging
	 */
	fsclean = LQFS_GET_FS_CLEAN(fs);
	if (fsclean != FSLOG &&
	    fsclean != FSACTIVE &&
	    fsclean != FSSTABLE &&
	    fsclean != FSCLEAN) {
		flp->error = FIOLOG_ECLEAN;
		goto unlockout;
	}

#ifdef LUFS
	/*
	 * A write-locked file system is only active if there are
	 * open deleted files; so remember to set FS_RECLAIM later.
	 */
	if (LQFS_GET_FS_CLEAN(fs) == FSACTIVE) {
		reclaim = FS_RECLAIM;
	}
#else
	/* QFS doesn't have a reclaim file thread. */
#endif /* LUFS */

	/*
	 * Logging is already enabled; must be changing the log's size
	 */
	if (LQFS_GET_LOGBNO(fs) && LQFS_GET_LOGP(qfsvfsp)) {
#ifdef LQFS_TODO_LOCKFS
		/*
		 * Before we can disable logging, we must give up our
		 * lock.  As a consequence of unlocking and disabling the
		 * log, the fs structure may change.  Because of this, when
		 * disabling is complete, we will go back to recheck to
		 * repeat all of the checks that we performed to get to
		 * this point.  Disabling sets fs->fs_logbno to 0, so this
		 * will not put us into an infinite loop.
		 */
		mutex_exit(&ulp->ul_lock);
#else
		/* QFS doesn't really support lockfs. */
#endif /* LQFS_TODO_LOCKFS */
		vfs_unlock(vfsp);

#ifdef LQFS_TODO_LOCKFS
		lf.lf_lock = LOCKFS_ULOCK;
		lf.lf_flags = 0;
		error = qfs_fiolfs(vp, &lf, 1);
		if (error) {
			flp->error = FIOLOG_ENOULOCK;
			error = 0;
			goto out;
		}
#else
		/* QFS doesn't really support lockfs. */
#endif /* LQFS_TODO_LOCKFS */
		error = lqfs_disable(vp, flp);
		if (error || (flp->error != FIOLOG_ENONE)) {
			error = 0;
			goto out;
		}
		goto recheck;
	}

	error = lqfs_alloc(qfsvfsp, flp, cr);
	if (error) {
		goto errout;
	}
#ifdef LUFS
#else
	if ((error = lqfs_log_validate(qfsvfsp, flp, cr)) != 0) {
		goto errout;
	}
#endif /* LUFS */

	/*
	 * Create all of the incore structs
	 */
	error = lqfs_snarf(qfsvfsp, fs, 0);
	if (error) {
		goto errout;
	}

	/*
	 * DON'T ``GOTO ERROUT'' PAST THIS POINT
	 */

	/*
	 * Pretend we were just mounted with logging enabled
	 *	freeze and drain the file system of readers
	 *		Get the ops vector
	 *		If debug, record metadata locations with log subsystem
	 *		Start the delete thread
	 *		Start the reclaim thread, if necessary
	 *	Thaw readers
	 */
	vfs_setmntopt(vfsp, MNTOPT_LOGGING, NULL, 0);

	TRANS_DOMATAMAP(qfsvfsp);
	TRANS_MATA_MOUNT(qfsvfsp);
	TRANS_MATA_SI(qfsvfsp, fs);

#ifdef LUFS
	qfs_thread_start(&qfsvfsp->vfs_delete, qfs_thread_delete, vfsp);
	if (fs->fs_reclaim & (FS_RECLAIM|FS_RECLAIMING)) {
		fs->fs_reclaim &= ~FS_RECLAIM;
		fs->fs_reclaim |=  FS_RECLAIMING;
		qfs_thread_start(&qfsvfsp->vfs_reclaim,
		    qfs_thread_reclaim, vfsp);
	} else {
		fs->fs_reclaim |= reclaim;
	}
#else
	/* QFS doesn't have file reclaim nor i-node delete threads. */
#endif /* LUFS */

#ifdef LUFS
	mutex_exit(&ulp->ul_lock);
#else
	/* QFS doesn't really support LOCKFS. */
#endif /* LUFS */

	vfs_unlock(vfsp);

#ifdef LQFS_TODO_LOCKFS
	/*
	 * Unlock the file system
	 */
	lf.lf_lock = LOCKFS_ULOCK;
	lf.lf_flags = 0;
	error = qfs_fiolfs(vp, &lf, 1);
	if (error) {
		flp->error = FIOLOG_ENOULOCK;
		error = 0;
		goto out;
	}
#else
	/* QFS doesn't really support LOCKFS. */
#endif /* LQFS_TODO_LOCKFS */

	/*
	 * There's nothing in the log yet (we've just allocated it)
	 * so directly write out the super block.
	 * Note, we have to force this sb out to disk
	 * (not just to the log) so that if we crash we know we are logging
	 */
	VFS_LOCK_MUTEX_ENTER(qfsvfsp);
	LQFS_SET_FS_CLEAN(fs, FSLOG);
	LQFS_SET_FS_ROLLED(fs, FS_NEED_ROLL);	/* Mark the fs as unrolled */
#ifdef LUFS
	QFS_BWRITE2(NULL, qfsvfsp->vfs_bufp);
#else
	sam_update_sblk(qfsvfsp, 0, 0, TRUE);
#endif /* LUFS */
	VFS_LOCK_MUTEX_EXIT(qfsvfsp);

	error = 0;
	goto out;

errout:
	/*
	 * Aquire the qfs_scan_lock before de-linking the mtm data
	 * structure so that we keep qfs_sync() and qfs_update() away
	 * when they execute the ufs_scan_inodes() run while we're in
	 * progress of enabling/disabling logging.
	 */
	mutex_enter(&qfs_scan_lock);
	(void) lqfs_unsnarf(qfsvfsp);
	mutex_exit(&qfs_scan_lock);

	(void) lqfs_free(qfsvfsp);
unlockout:
#ifdef LQFS_TODO_LOCKFS
	mutex_exit(&ulp->ul_lock);
#else
	/* QFS doesn't really support LOCKFS. */
#endif /* LQFS_TODO_LOCKFS */

	vfs_unlock(vfsp);

#ifdef LQFS_TODO_LOCKFS
	lf.lf_lock = LOCKFS_ULOCK;
	lf.lf_flags = 0;
	(void) qfs_fiolfs(vp, &lf, 1);
#else
	/* QFS doesn't really support LOCKFS. */
#endif /* LQFS_TODO_LOCKFS */

out:
	mutex_enter(&ip->mp->ms.m_waitwr_mutex);
	ip->mp->mt.fi_status |= FS_LOGSTATE_KNOWN;
	mutex_exit(&ip->mp->ms.m_waitwr_mutex);
	return (error);
}
Exemple #11
0
/*
 * Disable logging
 */
int
lqfs_disable(vnode_t *vp, struct fiolog *flp)
{
	int		error = 0;
	inode_t		*ip = VTOI(vp);
	qfsvfs_t	*qfsvfsp = ip->i_qfsvfs;
	fs_lqfs_common_t	*fs = VFS_FS_PTR(qfsvfsp);
#ifdef LUFS
	struct lockfs	lf;
	struct ulockfs	*ulp;
#else
	/* QFS doesn't really support LOCKFS. */
#endif /* LUFS */

	flp->error = FIOLOG_ENONE;

	/*
	 * Logging is already disabled; done
	 */
	if (LQFS_GET_LOGBNO(fs) == 0 || LQFS_GET_LOGP(qfsvfsp) == NULL ||
	    !LQFS_CAPABLE(qfsvfsp)) {
		vfs_setmntopt(qfsvfsp->vfs_vfs, MNTOPT_NOLOGGING, NULL, 0);
		error = 0;
		goto out;
	}

#ifdef LUFS
	/*
	 * File system must be write locked to disable logging
	 */
	error = qfs_fiolfss(vp, &lf);
	if (error) {
		goto out;
	}
	if (!LOCKFS_IS_ULOCK(&lf)) {
		flp->error = FIOLOG_EULOCK;
		error = 0;
		goto out;
	}
	lf.lf_lock = LOCKFS_WLOCK;
	lf.lf_flags = 0;
	lf.lf_comment = NULL;
	error = qfs_fiolfs(vp, &lf, 1);
	if (error) {
		flp->error = FIOLOG_EWLOCK;
		error = 0;
		goto out;
	}
#else
	/* QFS doesn't really support LOCKFS. */
#endif /* LUFS */

	if (LQFS_GET_LOGP(qfsvfsp) == NULL || LQFS_GET_LOGBNO(fs) == 0) {
		goto errout;
	}

	/*
	 * WE ARE COMMITTED TO DISABLING LOGGING PAST THIS POINT
	 */

	/*
	 * Disable logging:
	 * Suspend the reclaim thread and force the delete thread to exit.
	 *	When a nologging mount has completed there may still be
	 *	work for reclaim to do so just suspend this thread until
	 *	it's [deadlock-] safe for it to continue.  The delete
	 *	thread won't be needed as qfs_iinactive() calls
	 *	qfs_delete() when logging is disabled.
	 * Freeze and drain reader ops.
	 *	Commit any outstanding reader transactions (lqfs_flush).
	 *	Set the ``unmounted'' bit in the qfstrans struct.
	 *	If debug, remove metadata from matamap.
	 *	Disable matamap processing.
	 *	NULL the trans ops table.
	 *	Free all of the incore structs related to logging.
	 * Allow reader ops.
	 */
#ifdef LUFS
	qfs_thread_suspend(&qfsvfsp->vfs_reclaim);
	qfs_thread_exit(&qfsvfsp->vfs_delete);
#else
	/* QFS doesn't have file reclaim nor i-node delete threads. */
#endif /* LUFS */

	vfs_lock_wait(qfsvfsp->vfs_vfs);
#ifdef LQFS_TODO_LOCKFS
	ulp = &qfsvfsp->vfs_ulockfs;
	mutex_enter(&ulp->ul_lock);
	(void) qfs_quiesce(ulp);
#else
	/* QFS doesn't really support LOCKFS. */
#endif /* LQFS_TODO_LOCKFS */

#ifdef LQFS_TODO
	(void) qfs_flush(qfsvfsp->vfs_vfs);
#else
	(void) lqfs_flush(qfsvfsp);
	if (LQFS_GET_LOGP(qfsvfsp)) {
		logmap_start_roll(LQFS_GET_LOGP(qfsvfsp));
	}
#endif /* LQFS_TODO */

	TRANS_MATA_UMOUNT(qfsvfsp);
	LQFS_SET_DOMATAMAP(qfsvfsp, 0);

	/*
	 * Free all of the incore structs
	 * Aquire the ufs_scan_lock before de-linking the mtm data
	 * structure so that we keep ufs_sync() and ufs_update() away
	 * when they execute the ufs_scan_inodes() run while we're in
	 * progress of enabling/disabling logging.
	 */
	mutex_enter(&qfs_scan_lock);
	(void) lqfs_unsnarf(qfsvfsp);
	mutex_exit(&qfs_scan_lock);

#ifdef LQFS_TODO_LOCKFS
	atomic_add_long(&ufs_quiesce_pend, -1);
	mutex_exit(&ulp->ul_lock);
#else
	/* QFS doesn't do this yet. */
#endif /* LQFS_TODO_LOCKFS */
	vfs_setmntopt(qfsvfsp->vfs_vfs, MNTOPT_NOLOGGING, NULL, 0);
	vfs_unlock(qfsvfsp->vfs_vfs);

	LQFS_SET_FS_ROLLED(fs, FS_ALL_ROLLED);
	LQFS_SET_NOLOG_SI(qfsvfsp, 0);

	/*
	 * Free the log space and mark the superblock as FSACTIVE
	 */
	(void) lqfs_free(qfsvfsp);

#ifdef LUFS
	/*
	 * Allow the reclaim thread to continue.
	 */
	qfs_thread_continue(&qfsvfsp->vfs_reclaim);
#else
	/* QFS doesn't have a file reclaim thread. */
#endif /* LUFS */

#ifdef LQFS_TODO_LOCKFS
	/*
	 * Unlock the file system
	 */
	lf.lf_lock = LOCKFS_ULOCK;
	lf.lf_flags = 0;
	error = qfs_fiolfs(vp, &lf, 1);
	if (error) {
		flp->error = FIOLOG_ENOULOCK;
	}
#else
	/* QFS doesn't really support LOCKFS. */
#endif /* LQFS_LOCKFS */

	error = 0;
	goto out;

errout:
#ifdef LQFS_LOCKFS
	lf.lf_lock = LOCKFS_ULOCK;
	lf.lf_flags = 0;
	(void) qfs_fiolfs(vp, &lf, 1);
#else
	/* QFS doesn't really support LOCKFS. */
#endif /* LQFS_LOCKFS */

out:
	mutex_enter(&ip->mp->ms.m_waitwr_mutex);
	ip->mp->mt.fi_status |= FS_LOGSTATE_KNOWN;
	mutex_exit(&ip->mp->ms.m_waitwr_mutex);
	return (error);
}
Exemple #12
0
/*
 * Starting at current directory, translate pathname pnp to end.
 * Leave pathname of final component in pnp, return the vnode
 * for the final component in *compvpp, and return the vnode
 * for the parent of the final component in dirvpp.
 *
 * This is the central routine in pathname translation and handles
 * multiple components in pathnames, separating them at /'s.  It also
 * implements mounted file systems and processes symbolic links.
 *
 * vp is the vnode where the directory search should start.
 *
 * Reference counts: vp must be held prior to calling this function.  rootvp
 * should only be held if rootvp != rootdir.
 */
int
lookuppnvp(
	struct pathname *pnp,		/* pathname to lookup */
	struct pathname *rpnp,		/* if non-NULL, return resolved path */
	int flags,			/* follow symlinks */
	vnode_t **dirvpp,		/* ptr for parent vnode */
	vnode_t **compvpp,		/* ptr for entry vnode */
	vnode_t *rootvp,		/* rootvp */
	vnode_t *vp,			/* directory to start search at */
	cred_t *cr)			/* user's credential */
{
	vnode_t *cvp;	/* current component vp */
	char component[MAXNAMELEN];	/* buffer for component (incl null) */
	int error;
	int nlink;
	int lookup_flags;
	struct pathname presrvd; /* case preserved name */
	struct pathname *pp = NULL;
	vnode_t *startvp;
	vnode_t *zonevp = curproc->p_zone->zone_rootvp;		/* zone root */
	int must_be_directory = 0;
	boolean_t retry_with_kcred;
	uint32_t auditing = AU_AUDITING();

	CPU_STATS_ADDQ(CPU, sys, namei, 1);
	nlink = 0;
	cvp = NULL;
	if (rpnp)
		rpnp->pn_pathlen = 0;

	lookup_flags = dirvpp ? LOOKUP_DIR : 0;
	if (flags & FIGNORECASE) {
		lookup_flags |= FIGNORECASE;
		pn_alloc(&presrvd);
		pp = &presrvd;
	}

	if (auditing)
		audit_anchorpath(pnp, vp == rootvp);

	/*
	 * Eliminate any trailing slashes in the pathname.
	 * If there are any, we must follow all symlinks.
	 * Also, we must guarantee that the last component is a directory.
	 */
	if (pn_fixslash(pnp)) {
		flags |= FOLLOW;
		must_be_directory = 1;
	}

	startvp = vp;
next:
	retry_with_kcred = B_FALSE;

	/*
	 * Make sure we have a directory.
	 */
	if (vp->v_type != VDIR) {
		error = ENOTDIR;
		goto bad;
	}

	if (rpnp && VN_CMP(vp, rootvp))
		(void) pn_set(rpnp, "/");

	/*
	 * Process the next component of the pathname.
	 */
	if (error = pn_getcomponent(pnp, component)) {
		goto bad;
	}

	/*
	 * Handle "..": two special cases.
	 * 1. If we're at the root directory (e.g. after chroot or
	 *    zone_enter) then change ".." to "." so we can't get
	 *    out of this subtree.
	 * 2. If this vnode is the root of a mounted file system,
	 *    then replace it with the vnode that was mounted on
	 *    so that we take the ".." in the other file system.
	 */
	if (component[0] == '.' && component[1] == '.' && component[2] == 0) {
checkforroot:
		if (VN_CMP(vp, rootvp) || VN_CMP(vp, zonevp)) {
			component[1] = '\0';
		} else if (vp->v_flag & VROOT) {
			vfs_t *vfsp;
			cvp = vp;

			/*
			 * While we deal with the vfs pointer from the vnode
			 * the filesystem could have been forcefully unmounted
			 * and the vnode's v_vfsp could have been invalidated
			 * by VFS_UNMOUNT. Hence, we cache v_vfsp and use it
			 * with vfs_rlock_wait/vfs_unlock.
			 * It is safe to use the v_vfsp even it is freed by
			 * VFS_UNMOUNT because vfs_rlock_wait/vfs_unlock
			 * do not dereference v_vfsp. It is just used as a
			 * magic cookie.
			 * One more corner case here is the memory getting
			 * reused for another vfs structure. In this case
			 * lookuppnvp's vfs_rlock_wait will succeed, domount's
			 * vfs_lock will fail and domount will bail out with an
			 * error (EBUSY).
			 */
			vfsp = cvp->v_vfsp;

			/*
			 * This lock is used to synchronize
			 * mounts/unmounts and lookups.
			 * Threads doing mounts/unmounts hold the
			 * writers version vfs_lock_wait().
			 */

			vfs_rlock_wait(vfsp);

			/*
			 * If this vnode is on a file system that
			 * has been forcibly unmounted,
			 * we can't proceed. Cancel this operation
			 * and return EIO.
			 *
			 * vfs_vnodecovered is NULL if unmounted.
			 * Currently, nfs uses VFS_UNMOUNTED to
			 * check if it's a forced-umount. Keep the
			 * same checking here as well even though it
			 * may not be needed.
			 */
			if (((vp = cvp->v_vfsp->vfs_vnodecovered) == NULL) ||
			    (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
				vfs_unlock(vfsp);
				VN_RELE(cvp);
				if (pp)
					pn_free(pp);
				return (EIO);
			}
			VN_HOLD(vp);
			vfs_unlock(vfsp);
			VN_RELE(cvp);
			cvp = NULL;
			/*
			 * Crossing mount points. For eg: We are doing
			 * a lookup of ".." for file systems root vnode
			 * mounted here, and VOP_LOOKUP() (with covered vnode)
			 * will be on underlying file systems mount point
			 * vnode. Set retry_with_kcred flag as we might end
			 * up doing VOP_LOOKUP() with kcred if required.
			 */
			retry_with_kcred = B_TRUE;
			goto checkforroot;
		}
	}

	/*
	 * LOOKUP_CHECKREAD is a private flag used by vnodetopath() to indicate
	 * that we need to have read permission on every directory in the entire
	 * path.  This is used to ensure that a forward-lookup of a cached value
	 * has the same effect as a reverse-lookup when the cached value cannot
	 * be found.
	 */
	if ((flags & LOOKUP_CHECKREAD) &&
	    (error = VOP_ACCESS(vp, VREAD, 0, cr, NULL)) != 0)
		goto bad;

	/*
	 * Perform a lookup in the current directory.
	 */
	error = VOP_LOOKUP(vp, component, &cvp, pnp, lookup_flags,
	    rootvp, cr, NULL, NULL, pp);

	/*
	 * Retry with kcred - If crossing mount points & error is EACCES.
	 *
	 * If we are crossing mount points here and doing ".." lookup,
	 * VOP_LOOKUP() might fail if the underlying file systems
	 * mount point has no execute permission. In cases like these,
	 * we retry VOP_LOOKUP() by giving as much privilage as possible
	 * by passing kcred credentials.
	 *
	 * In case of hierarchical file systems, passing kcred still may
	 * or may not work.
	 * For eg: UFS FS --> Mount NFS FS --> Again mount UFS on some
	 *			directory inside NFS FS.
	 */
	if ((error == EACCES) && retry_with_kcred)
		error = VOP_LOOKUP(vp, component, &cvp, pnp, lookup_flags,
		    rootvp, zone_kcred(), NULL, NULL, pp);

	if (error) {
		cvp = NULL;
		/*
		 * On error, return hard error if
		 * (a) we're not at the end of the pathname yet, or
		 * (b) the caller didn't want the parent directory, or
		 * (c) we failed for some reason other than a missing entry.
		 */
		if (pn_pathleft(pnp) || dirvpp == NULL || error != ENOENT)
			goto bad;
		if (auditing) {	/* directory access */
			if (error = audit_savepath(pnp, vp, vp, error, cr))
				goto bad_noaudit;
		}

		pn_setlast(pnp);
		/*
		 * We inform the caller that the desired entry must be
		 * a directory by adding a '/' to the component name.
		 */
		if (must_be_directory && (error = pn_addslash(pnp)) != 0)
			goto bad;
		*dirvpp = vp;
		if (compvpp != NULL)
			*compvpp = NULL;
		if (rootvp != rootdir)
			VN_RELE(rootvp);
		if (pp)
			pn_free(pp);
		return (0);
	}

	/*
	 * Traverse mount points.
	 * XXX why don't we need to hold a read lock here (call vn_vfsrlock)?
	 * What prevents a concurrent update to v_vfsmountedhere?
	 * 	Possible answer: if mounting, we might not see the mount
	 *	if it is concurrently coming into existence, but that's
	 *	really not much different from the thread running a bit slower.
	 *	If unmounting, we may get into traverse() when we shouldn't,
	 *	but traverse() will catch this case for us.
	 *	(For this to work, fetching v_vfsmountedhere had better
	 *	be atomic!)
	 */
	if (vn_mountedvfs(cvp) != NULL) {
		if ((error = traverse(&cvp)) != 0)
			goto bad;
	}

	/*
	 * If we hit a symbolic link and there is more path to be
	 * translated or this operation does not wish to apply
	 * to a link, then place the contents of the link at the
	 * front of the remaining pathname.
	 */
	if (cvp->v_type == VLNK && ((flags & FOLLOW) || pn_pathleft(pnp))) {
		struct pathname linkpath;

		if (++nlink > MAXSYMLINKS) {
			error = ELOOP;
			goto bad;
		}
		pn_alloc(&linkpath);
		if (error = pn_getsymlink(cvp, &linkpath, cr)) {
			pn_free(&linkpath);
			goto bad;
		}

		if (auditing)
			audit_symlink(pnp, &linkpath);

		if (pn_pathleft(&linkpath) == 0)
			(void) pn_set(&linkpath, ".");
		error = pn_insert(pnp, &linkpath, strlen(component));
		pn_free(&linkpath);
		if (error)
			goto bad;
		VN_RELE(cvp);
		cvp = NULL;
		if (pnp->pn_pathlen == 0) {
			error = ENOENT;
			goto bad;
		}
		if (pnp->pn_path[0] == '/') {
			do {
				pnp->pn_path++;
				pnp->pn_pathlen--;
			} while (pnp->pn_path[0] == '/');
			VN_RELE(vp);
			vp = rootvp;
			VN_HOLD(vp);
		}
		if (auditing)
			audit_anchorpath(pnp, vp == rootvp);
		if (pn_fixslash(pnp)) {
			flags |= FOLLOW;
			must_be_directory = 1;
		}
		goto next;
	}

	/*
	 * If rpnp is non-NULL, remember the resolved path name therein.
	 * Do not include "." components.  Collapse occurrences of
	 * "previous/..", so long as "previous" is not itself "..".
	 * Exhausting rpnp results in error ENAMETOOLONG.
	 */
	if (rpnp && strcmp(component, ".") != 0) {
		size_t len;

		if (strcmp(component, "..") == 0 &&
		    rpnp->pn_pathlen != 0 &&
		    !((rpnp->pn_pathlen > 2 &&
		    strncmp(rpnp->pn_path+rpnp->pn_pathlen-3, "/..", 3) == 0) ||
		    (rpnp->pn_pathlen == 2 &&
		    strncmp(rpnp->pn_path, "..", 2) == 0))) {
			while (rpnp->pn_pathlen &&
			    rpnp->pn_path[rpnp->pn_pathlen-1] != '/')
				rpnp->pn_pathlen--;
			if (rpnp->pn_pathlen > 1)
				rpnp->pn_pathlen--;
			rpnp->pn_path[rpnp->pn_pathlen] = '\0';
		} else {
			if (rpnp->pn_pathlen != 0 &&
			    rpnp->pn_path[rpnp->pn_pathlen-1] != '/')
				rpnp->pn_path[rpnp->pn_pathlen++] = '/';
			if (flags & FIGNORECASE) {
				/*
				 * Return the case-preserved name
				 * within the resolved path.
				 */
				error = copystr(pp->pn_buf,
				    rpnp->pn_path + rpnp->pn_pathlen,
				    rpnp->pn_bufsize - rpnp->pn_pathlen, &len);
			} else {
				error = copystr(component,
				    rpnp->pn_path + rpnp->pn_pathlen,
				    rpnp->pn_bufsize - rpnp->pn_pathlen, &len);
			}
			if (error)	/* copystr() returns ENAMETOOLONG */
				goto bad;
			rpnp->pn_pathlen += (len - 1);
			ASSERT(rpnp->pn_bufsize > rpnp->pn_pathlen);
		}
	}

	/*
	 * If no more components, return last directory (if wanted) and
	 * last component (if wanted).
	 */
	if (pn_pathleft(pnp) == 0) {
		/*
		 * If there was a trailing slash in the pathname,
		 * make sure the last component is a directory.
		 */
		if (must_be_directory && cvp->v_type != VDIR) {
			error = ENOTDIR;
			goto bad;
		}
		if (dirvpp != NULL) {
			/*
			 * Check that we have the real parent and not
			 * an alias of the last component.
			 */
			if (vn_compare(vp, cvp)) {
				if (auditing)
					(void) audit_savepath(pnp, cvp, vp,
					    EINVAL, cr);
				pn_setlast(pnp);
				VN_RELE(vp);
				VN_RELE(cvp);
				if (rootvp != rootdir)
					VN_RELE(rootvp);
				if (pp)
					pn_free(pp);
				return (EINVAL);
			}
			*dirvpp = vp;
		} else
			VN_RELE(vp);
		if (auditing)
			(void) audit_savepath(pnp, cvp, vp, 0, cr);
		if (pnp->pn_path == pnp->pn_buf)
			(void) pn_set(pnp, ".");
		else
			pn_setlast(pnp);
		if (rpnp) {
			if (VN_CMP(cvp, rootvp))
				(void) pn_set(rpnp, "/");
			else if (rpnp->pn_pathlen == 0)
				(void) pn_set(rpnp, ".");
		}

		if (compvpp != NULL)
			*compvpp = cvp;
		else
			VN_RELE(cvp);
		if (rootvp != rootdir)
			VN_RELE(rootvp);
		if (pp)
			pn_free(pp);
		return (0);
	}

	/*
	 * Skip over slashes from end of last component.
	 */
	while (pnp->pn_path[0] == '/') {
		pnp->pn_path++;
		pnp->pn_pathlen--;
	}

	/*
	 * Searched through another level of directory:
	 * release previous directory handle and save new (result
	 * of lookup) as current directory.
	 */
	VN_RELE(vp);
	vp = cvp;
	cvp = NULL;
	goto next;

bad:
	if (auditing)	/* reached end of path */
		(void) audit_savepath(pnp, cvp, vp, error, cr);
bad_noaudit:
	/*
	 * Error.  Release vnodes and return.
	 */
	if (cvp)
		VN_RELE(cvp);
	/*
	 * If the error was ESTALE and the current directory to look in
	 * was the root for this lookup, the root for a mounted file
	 * system, or the starting directory for lookups, then
	 * return ENOENT instead of ESTALE.  In this case, no recovery
	 * is possible by the higher level.  If ESTALE was returned for
	 * some intermediate directory along the path, then recovery
	 * is potentially possible and retrying from the higher level
	 * will either correct the situation by purging stale cache
	 * entries or eventually get back to the point where no recovery
	 * is possible.
	 */
	if (error == ESTALE &&
	    (VN_CMP(vp, rootvp) || (vp->v_flag & VROOT) || vp == startvp))
		error = ENOENT;
	VN_RELE(vp);
	if (rootvp != rootdir)
		VN_RELE(rootvp);
	if (pp)
		pn_free(pp);
	return (error);
}
/*
 * smb_vop_lookup
 *
 * dvp:		directory vnode (in)
 * name:	name of file to be looked up (in)
 * vpp:		looked-up vnode (out)
 * od_name:	on-disk name of file (out).
 *		This parameter is optional.  If a pointer is passed in, it
 * 		must be allocated with MAXNAMELEN bytes
 * rootvp:	vnode of the tree root (in)
 *		This parameter is always passed in non-NULL except at the time
 *		of share set up.
 * direntflags:	dirent flags returned from VOP_LOOKUP
 */
int
smb_vop_lookup(
    vnode_t		*dvp,
    char		*name,
    vnode_t		**vpp,
    char		*od_name,
    int			flags,
    int			*direntflags,
    vnode_t		*rootvp,
    cred_t		*cr)
{
	int error = 0;
	int option_flags = 0;
	pathname_t rpn;
	char *np = name;
	char namebuf[MAXNAMELEN];

	if (*name == '\0')
		return (EINVAL);

	ASSERT(vpp);
	*vpp = NULL;
	*direntflags = 0;

	if ((name[0] == '.') && (name[1] == '.') && (name[2] == 0)) {
		if (rootvp && (dvp == rootvp)) {
			VN_HOLD(dvp);
			*vpp = dvp;
			return (0);
		}

		if (dvp->v_flag & VROOT) {
			vfs_t *vfsp;
			vnode_t *cvp = dvp;

			/*
			 * Set dvp and check for races with forced unmount
			 * (see lookuppnvp())
			 */

			vfsp = cvp->v_vfsp;
			vfs_rlock_wait(vfsp);
			if (((dvp = cvp->v_vfsp->vfs_vnodecovered) == NULL) ||
			    (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
				vfs_unlock(vfsp);
				return (EIO);
			}
			vfs_unlock(vfsp);
		}
	}

	if (flags & SMB_IGNORE_CASE)
		option_flags = FIGNORECASE;

	if (flags & SMB_CATIA)
		np = smb_vop_catia_v5tov4(name, namebuf, sizeof (namebuf));

	pn_alloc(&rpn);

	error = VOP_LOOKUP(dvp, np, vpp, NULL, option_flags, NULL, cr,
	    &smb_ct, direntflags, &rpn);

	if ((error == 0) && od_name) {
		bzero(od_name, MAXNAMELEN);
		np = (option_flags == FIGNORECASE) ? rpn.pn_buf : name;

		if (flags & SMB_CATIA)
			smb_vop_catia_v4tov5(np, od_name, MAXNAMELEN);
		else
			(void) strlcpy(od_name, np, MAXNAMELEN);
	}

	pn_free(&rpn);
	return (error);
}