Exemplo n.º 1
0
int
sys_shm_unlink(struct thread *td, struct shm_unlink_args *uap)
{
	char *path;
	Fnv32_t fnv;
	int error;

	path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
	error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
	if (error) {
		free(path, M_TEMP);
		return (error);
	}
#ifdef KTRACE
	if (KTRPOINT(curthread, KTR_NAMEI))
		ktrnamei(path);
#endif
	fnv = fnv_32_str(path, FNV1_32_INIT);
	sx_xlock(&shm_dict_lock);
	error = shm_remove(path, fnv, td->td_ucred);
	sx_xunlock(&shm_dict_lock);
	free(path, M_TEMP);

	return (error);
}
Exemplo n.º 2
0
int
main(int argc, char **argv)
{
	int ch, col, ktrlen, size;
	pid_t do_pid = -1;
	void *m;
	int trpoints = ALL_POINTS;
	char *cp;

	(void) setlocale(LC_CTYPE, "");

	while ((ch = getopt(argc,argv,"f:djlm:np:RTt:")) != -1)
		switch((char)ch) {
		case 'f':
			tracefile = optarg;
			break;
		case 'j':
			fixedformat = 1;
			break;
		case 'd':
			decimal = 1;
			break;
		case 'l':
			tail = 1;
			break;
		case 'm':
			maxdata = atoi(optarg);
			break;
		case 'n':
			fancy = 0;
			break;
		case 'p':
			do_pid = strtoul(optarg, &cp, 0);
			if (*cp != 0)
				errx(1,"invalid number %s", optarg);
			break;
		case 'R':
			timestamp = 2;	/* relative timestamp */
			break;
		case 'T':
			timestamp = 1;
			break;
		case 't':
			trpoints = getpoints(optarg);
			if (trpoints < 0)
				errx(1, "unknown trace point in %s", optarg);
			break;
		default:
			usage();
		}

	if (argc > optind)
		usage();

	m = (void *)malloc(size = 1025);
	if (m == NULL)
		errx(1, "%s", strerror(ENOMEM));
	if (!freopen(tracefile, "r", stdin))
		err(1, "%s", tracefile);
	while (fread_tail(&ktr_header, sizeof(struct ktr_header), 1)) {
		if (trpoints & (1 << ktr_header.ktr_type) &&
		    (do_pid == -1 || ktr_header.ktr_pid == do_pid))
			col = dumpheader(&ktr_header);
		else
			col = -1;
		if ((ktrlen = ktr_header.ktr_len) < 0)
			errx(1, "bogus length 0x%x", ktrlen);
		if (ktrlen > size) {
			m = (void *)realloc(m, ktrlen+1);
			if (m == NULL)
				errx(1, "%s", strerror(ENOMEM));
			size = ktrlen;
		}
		if (ktrlen && fread_tail(m, ktrlen, 1) == 0)
			errx(1, "data too short");
		if ((trpoints & (1<<ktr_header.ktr_type)) == 0)
			continue;
		if (col == -1)
			continue;
		switch (ktr_header.ktr_type) {
		case KTR_SYSCALL:
			ktrsyscall((struct ktr_syscall *)m);
			break;
		case KTR_SYSRET:
			ktrsysret((struct ktr_sysret *)m);
			break;
		case KTR_NAMEI:
			ktrnamei(m, ktrlen);
			break;
		case KTR_GENIO:
			ktrgenio((struct ktr_genio *)m, ktrlen);
			break;
		case KTR_PSIG:
			ktrpsig((struct ktr_psig *)m);
			break;
		case KTR_CSW:
			ktrcsw((struct ktr_csw *)m);
			break;
		case KTR_USER:
			ktruser(ktrlen, m);
			break;
		}
		if (tail)
			(void)fflush(stdout);
	}
	exit(0);
}
Exemplo n.º 3
0
/*
 * Convert a pathname into a pointer to a vnode.
 *
 * The FOLLOW flag is set when symbolic links are to be followed
 * when they occur at the end of the name translation process.
 * Symbolic links are always followed for all other pathname
 * components other than the last.
 *
 * If the LOCKLEAF flag is set, a locked vnode is returned.
 *
 * The segflg defines whether the name is to be copied from user
 * space or kernel space.
 *
 * Overall outline of namei:
 *
 *	copy in name
 *	get starting directory
 *	while (!done && !error) {
 *		call lookup to search path.
 *		if symbolic link, massage name in buffer and continue
 *	}
 */
int
namei(struct nameidata *ndp)
{
	struct filedesc *fdp;		/* pointer to file descriptor state */
	char *cp;			/* pointer into pathname argument */
	struct vnode *dp;		/* the directory we are searching */
	struct iovec aiov;		/* uio for reading symbolic links */
	struct uio auio;
	int error, linklen;
	struct componentname *cnp = &ndp->ni_cnd;
	struct proc *p = cnp->cn_proc;

	ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_proc->p_ucred;
#ifdef DIAGNOSTIC
	if (!cnp->cn_cred || !cnp->cn_proc)
		panic ("namei: bad cred/proc");
	if (cnp->cn_nameiop & (~OPMASK))
		panic ("namei: nameiop contaminated with flags");
	if (cnp->cn_flags & OPMASK)
		panic ("namei: flags contaminated with nameiops");
#endif
	fdp = cnp->cn_proc->p_fd;

	/*
	 * Get a buffer for the name to be translated, and copy the
	 * name into the buffer.
	 */
	if ((cnp->cn_flags & HASBUF) == 0)
		cnp->cn_pnbuf = pool_get(&namei_pool, PR_WAITOK);
	if (ndp->ni_segflg == UIO_SYSSPACE)
		error = copystr(ndp->ni_dirp, cnp->cn_pnbuf,
			    MAXPATHLEN, &ndp->ni_pathlen);
	else
		error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf,
			    MAXPATHLEN, &ndp->ni_pathlen);

	/*
	 * Fail on null pathnames
	 */
	if (error == 0 && ndp->ni_pathlen == 1)
		error = ENOENT;

	if (error) {
		pool_put(&namei_pool, cnp->cn_pnbuf);
		ndp->ni_vp = NULL;
		return (error);
	}

#ifdef KTRACE
	if (KTRPOINT(cnp->cn_proc, KTR_NAMEI))
		ktrnamei(cnp->cn_proc, cnp->cn_pnbuf);
#endif
#if NSYSTRACE > 0
	if (ISSET(cnp->cn_proc->p_flag, P_SYSTRACE))
		systrace_namei(ndp);
#endif

	/*
	 *  Strip trailing slashes, as requested
	 */
	if (cnp->cn_flags & STRIPSLASHES) {
		char *end = cnp->cn_pnbuf + ndp->ni_pathlen - 2;

		cp = end;
		while (cp >= cnp->cn_pnbuf && (*cp == '/'))
			cp--;

		/* Still some remaining characters in the buffer */
		if (cp >= cnp->cn_pnbuf) {
			ndp->ni_pathlen -= (end - cp);
			*(cp + 1) = '\0';
		}
	}

	ndp->ni_loopcnt = 0;

	/*
	 * Get starting point for the translation.
	 */
	if ((ndp->ni_rootdir = fdp->fd_rdir) == NULL)
		ndp->ni_rootdir = rootvnode;
	/*
	 * Check if starting from root directory or current directory.
	 */
	if (cnp->cn_pnbuf[0] == '/') {
		dp = ndp->ni_rootdir;
		vref(dp);
	} else {
		dp = fdp->fd_cdir;
		vref(dp);
	}
	for (;;) {
		if (!dp->v_mount) {
			/* Give up if the directory is no longer mounted */
			pool_put(&namei_pool, cnp->cn_pnbuf);
			return (ENOENT);
		}
		cnp->cn_nameptr = cnp->cn_pnbuf;
		ndp->ni_startdir = dp;
		if ((error = lookup(ndp)) != 0) {
			pool_put(&namei_pool, cnp->cn_pnbuf);
			return (error);
		}
		/*
		 * If not a symbolic link, return search result.
		 */
		if ((cnp->cn_flags & ISSYMLINK) == 0) {
			if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0)
				pool_put(&namei_pool, cnp->cn_pnbuf);
			else
				cnp->cn_flags |= HASBUF;
			return (0);
		}
		if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN))
			VOP_UNLOCK(ndp->ni_dvp, 0, p);
		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
			error = ELOOP;
			break;
		}
		if (ndp->ni_pathlen > 1)
			cp = pool_get(&namei_pool, PR_WAITOK);
		else
			cp = cnp->cn_pnbuf;
		aiov.iov_base = cp;
		aiov.iov_len = MAXPATHLEN;
		auio.uio_iov = &aiov;
		auio.uio_iovcnt = 1;
		auio.uio_offset = 0;
		auio.uio_rw = UIO_READ;
		auio.uio_segflg = UIO_SYSSPACE;
		auio.uio_procp = cnp->cn_proc;
		auio.uio_resid = MAXPATHLEN;
		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
		if (error) {
badlink:
			if (ndp->ni_pathlen > 1)
				pool_put(&namei_pool, cp);
			break;
		}
		linklen = MAXPATHLEN - auio.uio_resid;
		if (linklen == 0) {
			error = ENOENT;
			goto badlink;
		}
		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
			error = ENAMETOOLONG;
			goto badlink;
		}
		if (ndp->ni_pathlen > 1) {
			bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
			pool_put(&namei_pool, cnp->cn_pnbuf);
			cnp->cn_pnbuf = cp;
		} else
			cnp->cn_pnbuf[linklen] = '\0';
		ndp->ni_pathlen += linklen;
		vput(ndp->ni_vp);
		dp = ndp->ni_dvp;
		/*
		 * Check if root directory should replace current directory.
		 */
		if (cnp->cn_pnbuf[0] == '/') {
			vrele(dp);
			dp = ndp->ni_rootdir;
			vref(dp);
		}
	}
	pool_put(&namei_pool, cnp->cn_pnbuf);
	vrele(ndp->ni_dvp);
	vput(ndp->ni_vp);
	ndp->ni_vp = NULL;
	return (error);
}
Exemplo n.º 4
0
/* System calls. */
int
sys_shm_open(struct thread *td, struct shm_open_args *uap)
{
	struct filedesc *fdp;
	struct shmfd *shmfd;
	struct file *fp;
	char *path;
	Fnv32_t fnv;
	mode_t cmode;
	int fd, error;

#ifdef CAPABILITY_MODE
	/*
	 * shm_open(2) is only allowed for anonymous objects.
	 */
	if (IN_CAPABILITY_MODE(td) && (uap->path != SHM_ANON))
		return (ECAPMODE);
#endif

	if ((uap->flags & O_ACCMODE) != O_RDONLY &&
	    (uap->flags & O_ACCMODE) != O_RDWR)
		return (EINVAL);

	if ((uap->flags & ~(O_ACCMODE | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC)) != 0)
		return (EINVAL);

	fdp = td->td_proc->p_fd;
	cmode = (uap->mode & ~fdp->fd_cmask) & ACCESSPERMS;

	error = falloc(td, &fp, &fd, O_CLOEXEC);
	if (error)
		return (error);

	/* A SHM_ANON path pointer creates an anonymous object. */
	if (uap->path == SHM_ANON) {
		/* A read-only anonymous object is pointless. */
		if ((uap->flags & O_ACCMODE) == O_RDONLY) {
			fdclose(fdp, fp, fd, td);
			fdrop(fp, td);
			return (EINVAL);
		}
		shmfd = shm_alloc(td->td_ucred, cmode);
	} else {
		path = malloc(MAXPATHLEN, M_SHMFD, M_WAITOK);
		error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
#ifdef KTRACE
		if (error == 0 && KTRPOINT(curthread, KTR_NAMEI))
			ktrnamei(path);
#endif
		/* Require paths to start with a '/' character. */
		if (error == 0 && path[0] != '/')
			error = EINVAL;
		if (error) {
			fdclose(fdp, fp, fd, td);
			fdrop(fp, td);
			free(path, M_SHMFD);
			return (error);
		}

		fnv = fnv_32_str(path, FNV1_32_INIT);
		sx_xlock(&shm_dict_lock);
		shmfd = shm_lookup(path, fnv);
		if (shmfd == NULL) {
			/* Object does not yet exist, create it if requested. */
			if (uap->flags & O_CREAT) {
#ifdef MAC
				error = mac_posixshm_check_create(td->td_ucred,
				    path);
				if (error == 0) {
#endif
					shmfd = shm_alloc(td->td_ucred, cmode);
					shm_insert(path, fnv, shmfd);
#ifdef MAC
				}
#endif
			} else {
				free(path, M_SHMFD);
				error = ENOENT;
			}
		} else {
			/*
			 * Object already exists, obtain a new
			 * reference if requested and permitted.
			 */
			free(path, M_SHMFD);
			if ((uap->flags & (O_CREAT | O_EXCL)) ==
			    (O_CREAT | O_EXCL))
				error = EEXIST;
			else {
#ifdef MAC
				error = mac_posixshm_check_open(td->td_ucred,
				    shmfd, FFLAGS(uap->flags & O_ACCMODE));
				if (error == 0)
#endif
				error = shm_access(shmfd, td->td_ucred,
				    FFLAGS(uap->flags & O_ACCMODE));
			}

			/*
			 * Truncate the file back to zero length if
			 * O_TRUNC was specified and the object was
			 * opened with read/write.
			 */
			if (error == 0 &&
			    (uap->flags & (O_ACCMODE | O_TRUNC)) ==
			    (O_RDWR | O_TRUNC)) {
#ifdef MAC
				error = mac_posixshm_check_truncate(
					td->td_ucred, fp->f_cred, shmfd);
				if (error == 0)
#endif
					shm_dotruncate(shmfd, 0);
			}
			if (error == 0)
				shm_hold(shmfd);
		}
		sx_xunlock(&shm_dict_lock);

		if (error) {
			fdclose(fdp, fp, fd, td);
			fdrop(fp, td);
			return (error);
		}
	}

	finit(fp, FFLAGS(uap->flags & O_ACCMODE), DTYPE_SHM, shmfd, &shm_ops);

	td->td_retval[0] = fd;
	fdrop(fp, td);

	return (0);
}
Exemplo n.º 5
0
/*
 * Do a generic nlookup.  Note that the passed nd is not nlookup_done()'d
 * on return, even if an error occurs.  If no error occurs or NLC_CREATE
 * is flagged and ENOENT is returned, then the returned nl_nch is always
 * referenced and locked exclusively.
 *
 * WARNING: For any general error other than ENOENT w/NLC_CREATE, the
 *	    the resulting nl_nch may or may not be locked and if locked
 *	    might be locked either shared or exclusive.
 *
 * Intermediate directory elements, including the current directory, require
 * execute (search) permission.  nlookup does not examine the access 
 * permissions on the returned element.
 *
 * If NLC_CREATE is set the last directory must allow node creation,
 * and an error code of 0 will be returned for a non-existant
 * target (not ENOENT).
 *
 * If NLC_RENAME_DST is set the last directory mut allow node deletion,
 * plus the sticky check is made, and an error code of 0 will be returned
 * for a non-existant target (not ENOENT).
 *
 * If NLC_DELETE is set the last directory mut allow node deletion,
 * plus the sticky check is made.
 *
 * If NLC_REFDVP is set nd->nl_dvp will be set to the directory vnode
 * of the returned entry.  The vnode will be referenced, but not locked,
 * and will be released by nlookup_done() along with everything else.
 *
 * NOTE: As an optimization we attempt to obtain a shared namecache lock
 *	 on any intermediate elements.  On success, the returned element
 *	 is ALWAYS locked exclusively.
 */
int
nlookup(struct nlookupdata *nd)
{
    globaldata_t gd = mycpu;
    struct nlcomponent nlc;
    struct nchandle nch;
    struct nchandle par;
    struct nchandle nctmp;
    struct mount *mp;
    struct vnode *hvp;		/* hold to prevent recyclement */
    int wasdotordotdot;
    char *ptr;
    char *nptr;
    int error;
    int len;
    int dflags;
    int hit = 1;
    int saveflag = nd->nl_flags & ~NLC_NCDIR;
    boolean_t doretry = FALSE;
    boolean_t inretry = FALSE;

nlookup_start:
#ifdef KTRACE
    if (KTRPOINT(nd->nl_td, KTR_NAMEI))
	ktrnamei(nd->nl_td->td_lwp, nd->nl_path);
#endif
    bzero(&nlc, sizeof(nlc));

    /*
     * Setup for the loop.  The current working namecache element is
     * always at least referenced.  We lock it as required, but always
     * return a locked, resolved namecache entry.
     */
    nd->nl_loopcnt = 0;
    if (nd->nl_dvp) {
	vrele(nd->nl_dvp);
	nd->nl_dvp = NULL;
    }
    ptr = nd->nl_path;

    /*
     * Loop on the path components.  At the top of the loop nd->nl_nch
     * is ref'd and unlocked and represents our current position.
     */
    for (;;) {
	/*
	 * Make sure nl_nch is locked so we can access the vnode, resolution
	 * state, etc.
	 */
	if ((nd->nl_flags & NLC_NCPISLOCKED) == 0) {
		nd->nl_flags |= NLC_NCPISLOCKED;
		cache_lock_maybe_shared(&nd->nl_nch, wantsexcllock(nd, ptr));
	}

	/*
	 * Check if the root directory should replace the current
	 * directory.  This is done at the start of a translation
	 * or after a symbolic link has been found.  In other cases
	 * ptr will never be pointing at a '/'.
	 */
	if (*ptr == '/') {
	    do {
		++ptr;
	    } while (*ptr == '/');
	    cache_unlock(&nd->nl_nch);
	    cache_get_maybe_shared(&nd->nl_rootnch, &nch,
				   wantsexcllock(nd, ptr));
	    if (nd->nl_flags & NLC_NCDIR) {
		    cache_drop_ncdir(&nd->nl_nch);
		    nd->nl_flags &= ~NLC_NCDIR;
	    } else {
		    cache_drop(&nd->nl_nch);
	    }
	    nd->nl_nch = nch;		/* remains locked */

	    /*
	     * Fast-track termination.  There is no parent directory of
	     * the root in the same mount from the point of view of
	     * the caller so return EACCES if NLC_REFDVP is specified,
	     * and EEXIST if NLC_CREATE is also specified.
	     * e.g. 'rmdir /' or 'mkdir /' are not allowed.
	     */
	    if (*ptr == 0) {
		if (nd->nl_flags & NLC_REFDVP)
			error = (nd->nl_flags & NLC_CREATE) ? EEXIST : EACCES;
		else
			error = 0;
		break;
	    }
	    continue;
	}

	/*
	 * Pre-calculate next path component so we can check whether the
	 * current component directory is the last directory in the path
	 * or not.
	 */
	for (nptr = ptr; *nptr && *nptr != '/'; ++nptr)
		;

	/*
	 * Check directory search permissions (nd->nl_nch is locked & refd).
	 * This will load dflags to obtain directory-special permissions to
	 * be checked along with the last component.
	 *
	 * We only need to pass-in &dflags for the second-to-last component.
	 * Optimize by passing-in NULL for any prior components, which may
	 * allow the code to bypass the naccess() call.
	 */
	dflags = 0;
	if (*nptr == '/')
	    error = naccess(&nd->nl_nch, NLC_EXEC, nd->nl_cred, NULL);
	else
	    error = naccess(&nd->nl_nch, NLC_EXEC, nd->nl_cred, &dflags);
	if (error) {
	    if (keeperror(nd, error))
		    break;
	    error = 0;
	}

	/*
	 * Extract the next (or last) path component.  Path components are
	 * limited to 255 characters.
	 */
	nlc.nlc_nameptr = ptr;
	nlc.nlc_namelen = nptr - ptr;
	ptr = nptr;
	if (nlc.nlc_namelen >= 256) {
	    error = ENAMETOOLONG;
	    break;
	}

	/*
	 * Lookup the path component in the cache, creating an unresolved
	 * entry if necessary.  We have to handle "." and ".." as special
	 * cases.
	 *
	 * When handling ".." we have to detect a traversal back through a
	 * mount point.   If we are at the root, ".." just returns the root.
	 *
	 * When handling "." or ".." we also have to recalculate dflags
	 * since our dflags will be for some sub-directory instead of the
	 * parent dir.
	 *
	 * This subsection returns a locked, refd 'nch' unless it errors out,
	 * and an unlocked but still ref'd nd->nl_nch.
	 *
	 * The namecache topology is not allowed to be disconnected, so 
	 * encountering a NULL parent will generate EINVAL.  This typically
	 * occurs when a directory is removed out from under a process.
	 *
	 * WARNING! The unlocking of nd->nl_nch is sensitive code.
	 */
	KKASSERT(nd->nl_flags & NLC_NCPISLOCKED);

	if (nlc.nlc_namelen == 1 && nlc.nlc_nameptr[0] == '.') {
	    cache_unlock(&nd->nl_nch);
	    nd->nl_flags &= ~NLC_NCPISLOCKED;
	    cache_get_maybe_shared(&nd->nl_nch, &nch, wantsexcllock(nd, ptr));
	    wasdotordotdot = 1;
	} else if (nlc.nlc_namelen == 2 && 
		   nlc.nlc_nameptr[0] == '.' && nlc.nlc_nameptr[1] == '.') {
	    if (nd->nl_nch.mount == nd->nl_rootnch.mount &&
		nd->nl_nch.ncp == nd->nl_rootnch.ncp
	    ) {
		/*
		 * ".." at the root returns the root
		 */
		cache_unlock(&nd->nl_nch);
		nd->nl_flags &= ~NLC_NCPISLOCKED;
		cache_get_maybe_shared(&nd->nl_nch, &nch,
				       wantsexcllock(nd, ptr));
	    } else {
		/*
		 * Locate the parent ncp.  If we are at the root of a
		 * filesystem mount we have to skip to the mounted-on
		 * point in the underlying filesystem.
		 *
		 * Expect the parent to always be good since the
		 * mountpoint doesn't go away.  XXX hack.  cache_get()
		 * requires the ncp to already have a ref as a safety.
		 *
		 * However, a process which has been broken out of a chroot
		 * will wind up with a NULL parent if it tries to '..' above
		 * the real root, deal with the case.  Note that this does
		 * not protect us from a jail breakout, it just stops a panic
		 * if the jail-broken process tries to '..' past the real
		 * root.
		 */
		nctmp = nd->nl_nch;
		while (nctmp.ncp == nctmp.mount->mnt_ncmountpt.ncp) {
			nctmp = nctmp.mount->mnt_ncmounton;
			if (nctmp.ncp == NULL)
				break;
		}
		if (nctmp.ncp == NULL) {
			if (curthread->td_proc) {
				kprintf("vfs_nlookup: '..' traverse broke "
					"jail: pid %d (%s)\n",
					curthread->td_proc->p_pid,
					curthread->td_comm);
			}
			nctmp = nd->nl_rootnch;
		} else {
			nctmp.ncp = nctmp.ncp->nc_parent;
		}
		cache_hold(&nctmp);
		cache_unlock(&nd->nl_nch);
		nd->nl_flags &= ~NLC_NCPISLOCKED;
		cache_get_maybe_shared(&nctmp, &nch, wantsexcllock(nd, ptr));
		cache_drop(&nctmp);		/* NOTE: zero's nctmp */
	    }
	    wasdotordotdot = 2;
	} else {
	    /*
	     * Must unlock nl_nch when traversing down the path.  However,
	     * the child ncp has not yet been found/created and the parent's
	     * child list might be empty.  Thus releasing the lock can
	     * allow a race whereby the parent ncp's vnode is recycled.
	     * This case can occur especially when maxvnodes is set very low.
	     *
	     * We need the parent's ncp to remain resolved for all normal
	     * filesystem activities, so we vhold() the vp during the lookup
	     * to prevent recyclement due to vnlru / maxvnodes.
	     *
	     * If we race an unlink or rename the ncp might be marked
	     * DESTROYED after resolution, requiring a retry.
	     */
	    if ((hvp = nd->nl_nch.ncp->nc_vp) != NULL)
		vhold(hvp);
	    cache_unlock(&nd->nl_nch);
	    nd->nl_flags &= ~NLC_NCPISLOCKED;
	    error = cache_nlookup_maybe_shared(&nd->nl_nch, &nlc,
					       wantsexcllock(nd, ptr), &nch);
	    if (error == EWOULDBLOCK) {
		    nch = cache_nlookup(&nd->nl_nch, &nlc);
		    if (nch.ncp->nc_flag & NCF_UNRESOLVED)
			hit = 0;
		    for (;;) {
			error = cache_resolve(&nch, nd->nl_cred);
			if (error != EAGAIN &&
			    (nch.ncp->nc_flag & NCF_DESTROYED) == 0) {
				if (error == ESTALE) {
				    if (!inretry)
					error = ENOENT;
				    doretry = TRUE;
				}
				break;
			}
			kprintf("[diagnostic] nlookup: relookup %*.*s\n",
				nch.ncp->nc_nlen, nch.ncp->nc_nlen,
				nch.ncp->nc_name);
			cache_put(&nch);
			nch = cache_nlookup(&nd->nl_nch, &nlc);
		    }
	    }
	    if (hvp)
		vdrop(hvp);
	    wasdotordotdot = 0;
	}

	/*
	 * If the last component was "." or ".." our dflags no longer
	 * represents the parent directory and we have to explicitly
	 * look it up.
	 *
	 * Expect the parent to be good since nch is locked.
	 */
	if (wasdotordotdot && error == 0) {
	    dflags = 0;
	    if ((par.ncp = nch.ncp->nc_parent) != NULL) {
		par.mount = nch.mount;
		cache_hold(&par);
		cache_lock_maybe_shared(&par, wantsexcllock(nd, ptr));
		error = naccess(&par, 0, nd->nl_cred, &dflags);
		cache_put(&par);
		if (error) {
		    if (!keeperror(nd, error))
			    error = 0;
		}
	    }
	}

	/*
	 * [end of subsection]
	 *
	 * nch is locked and referenced.
	 * nd->nl_nch is unlocked and referenced.
	 *
	 * nl_nch must be unlocked or we could chain lock to the root
	 * if a resolve gets stuck (e.g. in NFS).
	 */
	KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0);

	/*
	 * Resolve the namespace if necessary.  The ncp returned by
	 * cache_nlookup() is referenced and locked.
	 *
	 * XXX neither '.' nor '..' should return EAGAIN since they were
	 * previously resolved and thus cannot be newly created ncp's.
	 */
	if (nch.ncp->nc_flag & NCF_UNRESOLVED) {
	    hit = 0;
	    error = cache_resolve(&nch, nd->nl_cred);
	    if (error == ESTALE) {
		if (!inretry)
		    error = ENOENT;
		doretry = TRUE;
	    }
	    KKASSERT(error != EAGAIN);
	} else {
	    error = nch.ncp->nc_error;
	}

	/*
	 * Early completion.  ENOENT is not an error if this is the last
	 * component and NLC_CREATE or NLC_RENAME (rename target) was
	 * requested.  Note that ncp->nc_error is left as ENOENT in that
	 * case, which we check later on.
	 *
	 * Also handle invalid '.' or '..' components terminating a path
	 * for a create/rename/delete.  The standard requires this and pax
	 * pretty stupidly depends on it.
	 */
	if (islastelement(ptr)) {
	    if (error == ENOENT &&
		(nd->nl_flags & (NLC_CREATE | NLC_RENAME_DST))
	    ) {
		if (nd->nl_flags & NLC_NFS_RDONLY) {
			error = EROFS;
		} else {
			error = naccess(&nch, nd->nl_flags | dflags,
					nd->nl_cred, NULL);
		}
	    }
	    if (error == 0 && wasdotordotdot &&
		(nd->nl_flags & (NLC_CREATE | NLC_DELETE |
				 NLC_RENAME_SRC | NLC_RENAME_DST))) {
		/*
		 * POSIX junk
		 */
		if (nd->nl_flags & NLC_CREATE)
			error = EEXIST;
		else if (nd->nl_flags & NLC_DELETE)
			error = (wasdotordotdot == 1) ? EINVAL : ENOTEMPTY;
		else
			error = EINVAL;
	    }
	}

	/*
	 * Early completion on error.
	 */
	if (error) {
	    cache_put(&nch);
	    break;
	}

	/*
	 * If the element is a symlink and it is either not the last
	 * element or it is the last element and we are allowed to
	 * follow symlinks, resolve the symlink.
	 */
	if ((nch.ncp->nc_flag & NCF_ISSYMLINK) &&
	    (*ptr || (nd->nl_flags & NLC_FOLLOW))
	) {
	    if (nd->nl_loopcnt++ >= MAXSYMLINKS) {
		error = ELOOP;
		cache_put(&nch);
		break;
	    }
	    error = nreadsymlink(nd, &nch, &nlc);
	    cache_put(&nch);
	    if (error)
		break;

	    /*
	     * Concatenate trailing path elements onto the returned symlink.
	     * Note that if the path component (ptr) is not exhausted, it
	     * will being with a '/', so we do not have to add another one.
	     *
	     * The symlink may not be empty.
	     */
	    len = strlen(ptr);
	    if (nlc.nlc_namelen == 0 || nlc.nlc_namelen + len >= MAXPATHLEN) {
		error = nlc.nlc_namelen ? ENAMETOOLONG : ENOENT;
		objcache_put(namei_oc, nlc.nlc_nameptr);
		break;
	    }
	    bcopy(ptr, nlc.nlc_nameptr + nlc.nlc_namelen, len + 1);
	    if (nd->nl_flags & NLC_HASBUF)
		objcache_put(namei_oc, nd->nl_path);
	    nd->nl_path = nlc.nlc_nameptr;
	    nd->nl_flags |= NLC_HASBUF;
	    ptr = nd->nl_path;

	    /*
	     * Go back up to the top to resolve any initial '/'s in the
	     * symlink.
	     */
	    continue;
	}
	
	/*
	 * If the element is a directory and we are crossing a mount point,
	 * Locate the mount.
	 */
	while ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 
	    (nd->nl_flags & NLC_NOCROSSMOUNT) == 0 &&
	    (mp = cache_findmount(&nch)) != NULL
	) {
	    struct vnode *tdp;
	    int vfs_do_busy = 0;

	    /*
	     * VFS must be busied before the namecache entry is locked,
	     * but we don't want to waste time calling vfs_busy() if the
	     * mount point is already resolved.
	     */
again:
	    cache_put(&nch);
	    if (vfs_do_busy) {
		while (vfs_busy(mp, 0)) {
		    if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
			kprintf("nlookup: warning umount race avoided\n");
			cache_dropmount(mp);
			error = EBUSY;
			vfs_do_busy = 0;
			goto double_break;
		    }
		}
	    }
	    cache_get_maybe_shared(&mp->mnt_ncmountpt, &nch,
				   wantsexcllock(nd, ptr));

	    if (nch.ncp->nc_flag & NCF_UNRESOLVED) {
		if (vfs_do_busy == 0) {
		    vfs_do_busy = 1;
		    goto again;
		}
		error = VFS_ROOT(mp, &tdp);
		vfs_unbusy(mp);
		vfs_do_busy = 0;
		if (keeperror(nd, error)) {
		    cache_dropmount(mp);
		    break;
		}
		if (error == 0) {
		    cache_setvp(&nch, tdp);
		    vput(tdp);
		}
	    }
	    if (vfs_do_busy)
		vfs_unbusy(mp);
	    cache_dropmount(mp);
	}

	if (keeperror(nd, error)) {
	    cache_put(&nch);
double_break:
	    break;
	}
	    
	/*
	 * Skip any slashes to get to the next element.  If there 
	 * are any slashes at all the current element must be a
	 * directory or, in the create case, intended to become a directory.
	 * If it isn't we break without incrementing ptr and fall through
	 * to the failure case below.
	 */
	while (*ptr == '/') {
	    if ((nch.ncp->nc_flag & NCF_ISDIR) == 0 && 
		!(nd->nl_flags & NLC_WILLBEDIR)
	    ) {
		break;
	    }
	    ++ptr;
	}

	/*
	 * Continuation case: additional elements and the current
	 * element is a directory.
	 */
	if (*ptr && (nch.ncp->nc_flag & NCF_ISDIR)) {
	    if (nd->nl_flags & NLC_NCDIR) {
		    cache_drop_ncdir(&nd->nl_nch);
		    nd->nl_flags &= ~NLC_NCDIR;
	    } else {
		    cache_drop(&nd->nl_nch);
	    }
	    cache_unlock(&nch);
	    KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0);
	    nd->nl_nch = nch;
	    continue;
	}

	/*
	 * Failure case: additional elements and the current element
	 * is not a directory
	 */
	if (*ptr) {
	    cache_put(&nch);
	    error = ENOTDIR;
	    break;
	}

	/*
	 * Successful lookup of last element.
	 *
	 * Check permissions if the target exists.  If the target does not
	 * exist directory permissions were already tested in the early
	 * completion code above.
	 *
	 * nd->nl_flags will be adjusted on return with NLC_APPENDONLY
	 * if the file is marked append-only, and NLC_STICKY if the directory
	 * containing the file is sticky.
	 */
	if (nch.ncp->nc_vp && (nd->nl_flags & NLC_ALLCHKS)) {
	    error = naccess(&nch, nd->nl_flags | dflags,
			    nd->nl_cred, NULL);
	    if (keeperror(nd, error)) {
		cache_put(&nch);
		break;
	    }
	}

	/*
	 * Termination: no more elements.
	 *
	 * If NLC_REFDVP is set acquire a referenced parent dvp.
	 */
	if (nd->nl_flags & NLC_REFDVP) {
		cache_lock(&nd->nl_nch);
		error = cache_vref(&nd->nl_nch, nd->nl_cred, &nd->nl_dvp);
		cache_unlock(&nd->nl_nch);
		if (keeperror(nd, error)) {
			kprintf("NLC_REFDVP: Cannot ref dvp of %p\n", nch.ncp);
			cache_put(&nch);
			break;
		}
	}
	if (nd->nl_flags & NLC_NCDIR) {
		cache_drop_ncdir(&nd->nl_nch);
		nd->nl_flags &= ~NLC_NCDIR;
	} else {
		cache_drop(&nd->nl_nch);
	}
	nd->nl_nch = nch;
	nd->nl_flags |= NLC_NCPISLOCKED;
	error = 0;
	break;
    }

    if (hit)
	++gd->gd_nchstats->ncs_longhits;
    else
	++gd->gd_nchstats->ncs_longmiss;

    if (nd->nl_flags & NLC_NCPISLOCKED)
	KKASSERT(cache_lockstatus(&nd->nl_nch) > 0);

    /*
     * Retry the whole thing if doretry flag is set, but only once.
     * autofs(5) may mount another filesystem under its root directory
     * while resolving a path.
     */
    if (doretry && !inretry) {
	inretry = TRUE;
	nd->nl_flags &= NLC_NCDIR;
	nd->nl_flags |= saveflag;
	goto nlookup_start;
    }

    /*
     * NOTE: If NLC_CREATE was set the ncp may represent a negative hit
     * (ncp->nc_error will be ENOENT), but we will still return an error
     * code of 0.
     */
    return(error);
}