static int nullfs_readdir(struct vnop_readdir_args * ap) { struct vnode *vp, *lvp; int error; struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(ap->a_vp)); NULLFSDEBUG("%s %p\n", __FUNCTION__, ap->a_vp); /* assumption is that any vp that comes through here had to go through lookup */ lck_mtx_lock(&null_mp->nullm_lock); if (nullfs_isspecialvp(ap->a_vp)) { error = nullfs_special_readdir(ap); lck_mtx_unlock(&null_mp->nullm_lock); return error; } lck_mtx_unlock(&null_mp->nullm_lock); vp = ap->a_vp; lvp = NULLVPTOLOWERVP(vp); error = vnode_getwithref(lvp); if (error == 0) { error = VNOP_READDIR(lvp, ap->a_uio, ap->a_flags, ap->a_eofflag, ap->a_numdirent, ap->a_context); vnode_put(lvp); } return error; }
static int nullfs_readlink(struct vnop_readlink_args * ap) { NULLFSDEBUG("%s %p\n", __FUNCTION__, ap->a_vp); int error; struct vnode *vp, *lvp; if (nullfs_checkspecialvp(ap->a_vp)) { return ENOTSUP; /* the special vnodes aren't links */ } vp = ap->a_vp; lvp = NULLVPTOLOWERVP(vp); error = vnode_getwithref(lvp); if (error == 0) { error = VNOP_READLINK(lvp, ap->a_uio, ap->a_context); vnode_put(lvp); if (error) { NULLFSDEBUG("readlink failed: %d\n", error); } } return error; }
static int nullfs_getattr(struct vnop_getattr_args * args) { int error; struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(args->a_vp)); NULLFSDEBUG("%s %p\n", __FUNCTION__, args->a_vp); lck_mtx_lock(&null_mp->nullm_lock); if (nullfs_isspecialvp(args->a_vp)) { error = nullfs_special_getattr(args); lck_mtx_unlock(&null_mp->nullm_lock); return error; } lck_mtx_unlock(&null_mp->nullm_lock); /* this will return a different inode for third than read dir will */ struct vnode * lowervp = NULLVPTOLOWERVP(args->a_vp); error = vnode_getwithref(lowervp); if (error == 0) { error = VNOP_GETATTR(lowervp, args->a_vap, args->a_context); vnode_put(lowervp); if (error == 0) { /* fix up fsid so it doesn't say the underlying fs*/ VATTR_RETURN(args->a_vap, va_fsid, vfs_statfs(vnode_mount(args->a_vp))->f_fsid.val[0]); } } return error; }
static int null_reclaim(struct vnop_reclaim_args * ap) { struct vnode * vp; struct null_node * xp; struct vnode * lowervp; struct null_mount * null_mp = MOUNTTONULLMOUNT(vnode_mount(ap->a_vp)); NULLFSDEBUG("%s %p\n", __FUNCTION__, ap->a_vp); vp = ap->a_vp; xp = VTONULL(vp); lowervp = xp->null_lowervp; lck_mtx_lock(&null_mp->nullm_lock); vnode_removefsref(vp); if (lowervp != NULL) { /* root and second don't have a lowervp, so nothing to release and nothing * got hashed */ if (xp->null_flags & NULL_FLAG_HASHED) { /* only call this if we actually made it into the hash list. reclaim gets called also to clean up a vnode that got created when it didn't need to under race conditions */ null_hashrem(xp); } vnode_getwithref(lowervp); vnode_rele(lowervp); vnode_put(lowervp); } if (vp == null_mp->nullm_rootvp) { null_mp->nullm_rootvp = NULL; } else if (vp == null_mp->nullm_secondvp) { null_mp->nullm_secondvp = NULL; } else if (vp == null_mp->nullm_thirdcovervp) { null_mp->nullm_thirdcovervp = NULL; } lck_mtx_unlock(&null_mp->nullm_lock); cache_purge(vp); vnode_clearfsnode(vp); FREE(xp, M_TEMP); return 0; }
void vm_swapfile_close(uint64_t path_addr, vnode_t vp) { struct nameidata nd; vfs_context_t context = vfs_context_current(); int error = 0; vnode_getwithref(vp); vnode_close(vp, 0, context); NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_SYSSPACE, path_addr, context); error = unlink1(context, &nd, 0); }
void vm_swapfile_close(uint64_t path_addr, vnode_t vp) { vfs_context_t context = vfs_context_current(); int error; vnode_getwithref(vp); vnode_close(vp, 0, context); error = unlink1(context, NULLVP, CAST_USER_ADDR_T(path_addr), UIO_SYSSPACE, 0); #if DEVELOPMENT || DEBUG if (error) printf("%s : unlink of %s failed with error %d", __FUNCTION__, (char *)path_addr, error); #endif }
static int nullfs_listxattr(struct vnop_listxattr_args * args) { int error; struct vnode *vp, *lvp; NULLFSDEBUG("%s %p\n", __FUNCTION__, args->a_vp); if (nullfs_checkspecialvp(args->a_vp)) { return 0; /* nothing extra needed */ } vp = args->a_vp; lvp = NULLVPTOLOWERVP(vp); error = vnode_getwithref(lvp); if (error == 0) { error = VNOP_LISTXATTR(lvp, args->a_uio, args->a_size, args->a_options, args->a_context); vnode_put(lvp); } return error; }
static int nullfs_mnomap(struct vnop_mnomap_args * args) { int error; struct vnode *vp, *lvp; NULLFSDEBUG("%s %p\n", __FUNCTION__, args->a_vp); if (nullfs_checkspecialvp(args->a_vp)) { return 0; /* nothing extra needed */ } vp = args->a_vp; lvp = NULLVPTOLOWERVP(vp); error = vnode_getwithref(lvp); if (error == 0) { error = VNOP_MNOMAP(lvp, args->a_context); vnode_put(lvp); } return error; }
/* * forkproc * * Description: Create a new process structure, given a parent process * structure. * * Parameters: parent_proc The parent process * * Returns: !NULL The new process structure * NULL Error (insufficient free memory) * * Note: When successful, the newly created process structure is * partially initialized; if a caller needs to deconstruct the * returned structure, they must call forkproc_free() to do so. */ proc_t forkproc(proc_t parent_proc) { proc_t child_proc; /* Our new process */ static int nextpid = 0, pidwrap = 0, nextpidversion = 0; int error = 0; struct session *sessp; uthread_t parent_uthread = (uthread_t)get_bsdthread_info(current_thread()); MALLOC_ZONE(child_proc, proc_t , sizeof *child_proc, M_PROC, M_WAITOK); if (child_proc == NULL) { printf("forkproc: M_PROC zone exhausted\n"); goto bad; } /* zero it out as we need to insert in hash */ bzero(child_proc, sizeof *child_proc); MALLOC_ZONE(child_proc->p_stats, struct pstats *, sizeof *child_proc->p_stats, M_PSTATS, M_WAITOK); if (child_proc->p_stats == NULL) { printf("forkproc: M_SUBPROC zone exhausted (p_stats)\n"); FREE_ZONE(child_proc, sizeof *child_proc, M_PROC); child_proc = NULL; goto bad; } MALLOC_ZONE(child_proc->p_sigacts, struct sigacts *, sizeof *child_proc->p_sigacts, M_SIGACTS, M_WAITOK); if (child_proc->p_sigacts == NULL) { printf("forkproc: M_SUBPROC zone exhausted (p_sigacts)\n"); FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS); FREE_ZONE(child_proc, sizeof *child_proc, M_PROC); child_proc = NULL; goto bad; } /* allocate a callout for use by interval timers */ child_proc->p_rcall = thread_call_allocate((thread_call_func_t)realitexpire, child_proc); if (child_proc->p_rcall == NULL) { FREE_ZONE(child_proc->p_sigacts, sizeof *child_proc->p_sigacts, M_SIGACTS); FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS); FREE_ZONE(child_proc, sizeof *child_proc, M_PROC); child_proc = NULL; goto bad; } /* * Find an unused PID. */ proc_list_lock(); nextpid++; retry: /* * If the process ID prototype has wrapped around, * restart somewhat above 0, as the low-numbered procs * tend to include daemons that don't exit. */ if (nextpid >= PID_MAX) { nextpid = 100; pidwrap = 1; } if (pidwrap != 0) { /* if the pid stays in hash both for zombie and runniing state */ if (pfind_locked(nextpid) != PROC_NULL) { nextpid++; goto retry; } if (pgfind_internal(nextpid) != PGRP_NULL) { nextpid++; goto retry; } if (session_find_internal(nextpid) != SESSION_NULL) { nextpid++; goto retry; } } nprocs++; child_proc->p_pid = nextpid; child_proc->p_idversion = nextpidversion++; #if 1 if (child_proc->p_pid != 0) { if (pfind_locked(child_proc->p_pid) != PROC_NULL) panic("proc in the list already\n"); } #endif /* Insert in the hash */ child_proc->p_listflag |= (P_LIST_INHASH | P_LIST_INCREATE); LIST_INSERT_HEAD(PIDHASH(child_proc->p_pid), child_proc, p_hash); proc_list_unlock(); /* * We've identified the PID we are going to use; initialize the new * process structure. */ child_proc->p_stat = SIDL; child_proc->p_pgrpid = PGRPID_DEAD; /* * The zero'ing of the proc was at the allocation time due to need * for insertion to hash. Copy the section that is to be copied * directly from the parent. */ bcopy(&parent_proc->p_startcopy, &child_proc->p_startcopy, (unsigned) ((caddr_t)&child_proc->p_endcopy - (caddr_t)&child_proc->p_startcopy)); /* * Some flags are inherited from the parent. * Duplicate sub-structures as needed. * Increase reference counts on shared objects. * The p_stats and p_sigacts substructs are set in vm_fork. */ child_proc->p_flag = (parent_proc->p_flag & (P_LP64 | P_TRANSLATED | P_AFFINITY)); if (parent_proc->p_flag & P_PROFIL) startprofclock(child_proc); /* * Note that if the current thread has an assumed identity, this * credential will be granted to the new process. */ child_proc->p_ucred = kauth_cred_get_with_ref(); #ifdef CONFIG_EMBEDDED lck_mtx_init(&child_proc->p_mlock, proc_lck_grp, proc_lck_attr); lck_mtx_init(&child_proc->p_fdmlock, proc_lck_grp, proc_lck_attr); #if CONFIG_DTRACE lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr); #endif lck_spin_init(&child_proc->p_slock, proc_lck_grp, proc_lck_attr); #else /* !CONFIG_EMBEDDED */ lck_mtx_init(&child_proc->p_mlock, proc_mlock_grp, proc_lck_attr); lck_mtx_init(&child_proc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr); #if CONFIG_DTRACE lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr); #endif lck_spin_init(&child_proc->p_slock, proc_slock_grp, proc_lck_attr); #endif /* !CONFIG_EMBEDDED */ klist_init(&child_proc->p_klist); if (child_proc->p_textvp != NULLVP) { /* bump references to the text vnode */ /* Need to hold iocount across the ref call */ if (vnode_getwithref(child_proc->p_textvp) == 0) { error = vnode_ref(child_proc->p_textvp); vnode_put(child_proc->p_textvp); if (error != 0) child_proc->p_textvp = NULLVP; } } /* * Copy the parents per process open file table to the child; if * there is a per-thread current working directory, set the childs * per-process current working directory to that instead of the * parents. * * XXX may fail to copy descriptors to child */ child_proc->p_fd = fdcopy(parent_proc, parent_uthread->uu_cdir); #if SYSV_SHM if (parent_proc->vm_shm) { /* XXX may fail to attach shm to child */ (void)shmfork(parent_proc, child_proc); } #endif /* * inherit the limit structure to child */ proc_limitfork(parent_proc, child_proc); if (child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { uint64_t rlim_cur = child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur; child_proc->p_rlim_cpu.tv_sec = (rlim_cur > __INT_MAX__) ? __INT_MAX__ : rlim_cur; } /* Intialize new process stats, including start time */ /* <rdar://6640543> non-zeroed portion contains garbage AFAICT */ bzero(&child_proc->p_stats->pstat_startzero, (unsigned) ((caddr_t)&child_proc->p_stats->pstat_endzero - (caddr_t)&child_proc->p_stats->pstat_startzero)); bzero(&child_proc->p_stats->user_p_prof, sizeof(struct user_uprof)); microtime(&child_proc->p_start); child_proc->p_stats->p_start = child_proc->p_start; /* for compat */ if (parent_proc->p_sigacts != NULL) (void)memcpy(child_proc->p_sigacts, parent_proc->p_sigacts, sizeof *child_proc->p_sigacts); else (void)memset(child_proc->p_sigacts, 0, sizeof *child_proc->p_sigacts); sessp = proc_session(parent_proc); if (sessp->s_ttyvp != NULL && parent_proc->p_flag & P_CONTROLT) OSBitOrAtomic(P_CONTROLT, &child_proc->p_flag); session_rele(sessp); /* * block all signals to reach the process. * no transition race should be occuring with the child yet, * but indicate that the process is in (the creation) transition. */ proc_signalstart(child_proc, 0); proc_transstart(child_proc, 0); child_proc->p_pcaction = (parent_proc->p_pcaction) & P_PCMAX; TAILQ_INIT(&child_proc->p_uthlist); TAILQ_INIT(&child_proc->p_aio_activeq); TAILQ_INIT(&child_proc->p_aio_doneq); /* Inherit the parent flags for code sign */ child_proc->p_csflags = parent_proc->p_csflags; /* * All processes have work queue locks; cleaned up by * reap_child_locked() */ workqueue_init_lock(child_proc); /* * Copy work queue information * * Note: This should probably only happen in the case where we are * creating a child that is a copy of the parent; since this * routine is called in the non-duplication case of vfork() * or posix_spawn(), then this information should likely not * be duplicated. * * <rdar://6640553> Work queue pointers that no longer point to code */ child_proc->p_wqthread = parent_proc->p_wqthread; child_proc->p_threadstart = parent_proc->p_threadstart; child_proc->p_pthsize = parent_proc->p_pthsize; child_proc->p_targconc = parent_proc->p_targconc; if ((parent_proc->p_lflag & P_LREGISTER) != 0) { child_proc->p_lflag |= P_LREGISTER; } child_proc->p_dispatchqueue_offset = parent_proc->p_dispatchqueue_offset; #if PSYNCH pth_proc_hashinit(child_proc); #endif /* PSYNCH */ #if CONFIG_LCTX child_proc->p_lctx = NULL; /* Add new process to login context (if any). */ if (parent_proc->p_lctx != NULL) { /* * <rdar://6640564> This should probably be delayed in the * vfork() or posix_spawn() cases. */ LCTX_LOCK(parent_proc->p_lctx); enterlctx(child_proc, parent_proc->p_lctx, 0); } #endif bad: return(child_proc); }
/* * Search a pathname. * This is a very central and rather complicated routine. * * The pathname is pointed to by ni_ptr and is of length ni_pathlen. * The starting directory is taken from ni_startdir. The pathname is * descended until done, or a symbolic link is encountered. The variable * ni_more is clear if the path is completed; it is set to one if a * symbolic link needing interpretation is encountered. * * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on * whether the name is to be looked up, created, renamed, or deleted. * When CREATE, RENAME, or DELETE is specified, information usable in * creating, renaming, or deleting a directory entry may be calculated. * If flag has LOCKPARENT or'ed into it, the parent directory is returned * locked. If flag has WANTPARENT or'ed into it, the parent directory is * returned unlocked. Otherwise the parent directory is not returned. If * the target of the pathname exists and LOCKLEAF is or'ed into the flag * the target is returned locked, otherwise it is returned unlocked. * When creating or renaming and LOCKPARENT is specified, the target may not * be ".". When deleting and LOCKPARENT is specified, the target may be ".". * * Overall outline of lookup: * * dirloop: * identify next component of name at ndp->ni_ptr * handle degenerate case where name is null string * if .. and crossing mount points and on mounted filesys, find parent * call VNOP_LOOKUP routine for next component name * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set * component vnode returned in ni_vp (if it exists), locked. * if result vnode is mounted on and crossing mount points, * find mounted on vnode * if more components of name, do next level at dirloop * return the answer in ni_vp, locked if LOCKLEAF set * if LOCKPARENT set, return locked parent in ni_dvp * if WANTPARENT set, return unlocked parent in ni_dvp * * Returns: 0 Success * ENOENT No such file or directory * EBADF Bad file descriptor * ENOTDIR Not a directory * EROFS Read-only file system [CREATE] * EISDIR Is a directory [CREATE] * cache_lookup_path:ERECYCLE (vnode was recycled from underneath us, redrive lookup again) * vnode_authorize:EROFS * vnode_authorize:EACCES * vnode_authorize:EPERM * vnode_authorize:??? * VNOP_LOOKUP:ENOENT No such file or directory * VNOP_LOOKUP:EJUSTRETURN Restart system call (INTERNAL) * VNOP_LOOKUP:??? * VFS_ROOT:ENOTSUP * VFS_ROOT:ENOENT * VFS_ROOT:??? */ int lookup(struct nameidata *ndp) { char *cp; /* pointer into pathname argument */ vnode_t tdp; /* saved dp */ vnode_t dp; /* the directory we are searching */ int docache = 1; /* == 0 do not cache last component */ int wantparent; /* 1 => wantparent or lockparent flag */ int rdonly; /* lookup read-only flag bit */ int dp_authorized = 0; int error = 0; struct componentname *cnp = &ndp->ni_cnd; vfs_context_t ctx = cnp->cn_context; int vbusyflags = 0; int nc_generation = 0; vnode_t last_dp = NULLVP; int keep_going; int atroot; /* * Setup: break out flag bits into variables. */ if (cnp->cn_flags & (NOCACHE | DOWHITEOUT)) { if ((cnp->cn_flags & NOCACHE) || (cnp->cn_nameiop == DELETE)) docache = 0; } wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); rdonly = cnp->cn_flags & RDONLY; cnp->cn_flags &= ~ISSYMLINK; cnp->cn_consume = 0; dp = ndp->ni_startdir; ndp->ni_startdir = NULLVP; if ((cnp->cn_flags & CN_NBMOUNTLOOK) != 0) vbusyflags = LK_NOWAIT; cp = cnp->cn_nameptr; if (*cp == '\0') { if ( (vnode_getwithref(dp)) ) { dp = NULLVP; error = ENOENT; goto bad; } ndp->ni_vp = dp; error = lookup_handle_emptyname(ndp, cnp, wantparent); if (error) { goto bad; } return 0; } dirloop: atroot = 0; ndp->ni_vp = NULLVP; if ( (error = cache_lookup_path(ndp, cnp, dp, ctx, &dp_authorized, last_dp)) ) { dp = NULLVP; goto bad; } if ((cnp->cn_flags & ISLASTCN)) { if (docache) cnp->cn_flags |= MAKEENTRY; } else cnp->cn_flags |= MAKEENTRY; dp = ndp->ni_dvp; if (ndp->ni_vp != NULLVP) { /* * cache_lookup_path returned a non-NULL ni_vp then, * we're guaranteed that the dp is a VDIR, it's * been authorized, and vp is not ".." * * make sure we don't try to enter the name back into * the cache if this vp is purged before we get to that * check since we won't have serialized behind whatever * activity is occurring in the FS that caused the purge */ if (dp != NULLVP) nc_generation = dp->v_nc_generation - 1; goto returned_from_lookup_path; } /* * Handle "..": two special cases. * 1. If at root directory (e.g. after chroot) * or at absolute root directory * then ignore it so can't get out. * 2. If this vnode is the root of a mounted * filesystem, then replace it with the * vnode which was mounted on so we take the * .. in the other file system. */ if ( (cnp->cn_flags & ISDOTDOT) ) { for (;;) { if (dp == ndp->ni_rootdir || dp == rootvnode) { ndp->ni_dvp = dp; ndp->ni_vp = dp; /* * we're pinned at the root * we've already got one reference on 'dp' * courtesy of cache_lookup_path... take * another one for the ".." * if we fail to get the new reference, we'll * drop our original down in 'bad' */ if ( (vnode_get(dp)) ) { error = ENOENT; goto bad; } atroot = 1; goto returned_from_lookup_path; } if ((dp->v_flag & VROOT) == 0 || (cnp->cn_flags & NOCROSSMOUNT)) break; if (dp->v_mount == NULL) { /* forced umount */ error = EBADF; goto bad; } tdp = dp; dp = tdp->v_mount->mnt_vnodecovered; vnode_put(tdp); if ( (vnode_getwithref(dp)) ) { dp = NULLVP; error = ENOENT; goto bad; } ndp->ni_dvp = dp; dp_authorized = 0; } } /* * We now have a segment name to search for, and a directory to search. */ unionlookup: ndp->ni_vp = NULLVP; if (dp->v_type != VDIR) { error = ENOTDIR; goto lookup_error; } if ( (cnp->cn_flags & DONOTAUTH) != DONOTAUTH ) { error = lookup_authorize_search(dp, cnp, dp_authorized, ctx); if (error) { goto lookup_error; } } /* * Now that we've authorized a lookup, can bail out if the filesystem * will be doing a batched operation. Return an iocount on dvp. */ #if NAMEDRSRCFORK if ((cnp->cn_flags & ISLASTCN) && namei_compound_available(dp, ndp) && !(cnp->cn_flags & CN_WANTSRSRCFORK)) { #else if ((cnp->cn_flags & ISLASTCN) && namei_compound_available(dp, ndp)) { #endif /* NAMEDRSRCFORK */ ndp->ni_flag |= NAMEI_UNFINISHED; ndp->ni_ncgeneration = dp->v_nc_generation; return 0; } nc_generation = dp->v_nc_generation; error = VNOP_LOOKUP(dp, &ndp->ni_vp, cnp, ctx); if ( error ) { lookup_error: if ((error == ENOENT) && (dp->v_flag & VROOT) && (dp->v_mount != NULL) && (dp->v_mount->mnt_flag & MNT_UNION)) { #if CONFIG_VFS_FUNNEL if ((cnp->cn_flags & FSNODELOCKHELD)) { cnp->cn_flags &= ~FSNODELOCKHELD; unlock_fsnode(dp, NULL); } #endif /* CONFIG_VFS_FUNNEL */ tdp = dp; dp = tdp->v_mount->mnt_vnodecovered; vnode_put(tdp); if ( (vnode_getwithref(dp)) ) { dp = NULLVP; error = ENOENT; goto bad; } ndp->ni_dvp = dp; dp_authorized = 0; goto unionlookup; } if (error != EJUSTRETURN) goto bad; if (ndp->ni_vp != NULLVP) panic("leaf should be empty"); error = lookup_validate_creation_path(ndp); if (error) goto bad; /* * We return with ni_vp NULL to indicate that the entry * doesn't currently exist, leaving a pointer to the * referenced directory vnode in ndp->ni_dvp. */ if (cnp->cn_flags & SAVESTART) { if ( (vnode_get(ndp->ni_dvp)) ) { error = ENOENT; goto bad; } ndp->ni_startdir = ndp->ni_dvp; } if (!wantparent) vnode_put(ndp->ni_dvp); if (kdebug_enable) kdebug_lookup(ndp->ni_dvp, cnp); return (0); } returned_from_lookup_path: /* We'll always have an iocount on ni_vp when this finishes. */ error = lookup_handle_found_vnode(ndp, cnp, rdonly, vbusyflags, &keep_going, nc_generation, wantparent, atroot, ctx); if (error != 0) { goto bad2; } if (keep_going) { dp = ndp->ni_vp; /* namei() will handle symlinks */ if ((dp->v_type == VLNK) && ((cnp->cn_flags & FOLLOW) || (ndp->ni_flag & NAMEI_TRAILINGSLASH) || *ndp->ni_next == '/')) { return 0; } /* * Otherwise, there's more path to process. * cache_lookup_path is now responsible for dropping io ref on dp * when it is called again in the dirloop. This ensures we hold * a ref on dp until we complete the next round of lookup. */ last_dp = dp; goto dirloop; } return (0); bad2: #if CONFIG_VFS_FUNNEL if ((cnp->cn_flags & FSNODELOCKHELD)) { cnp->cn_flags &= ~FSNODELOCKHELD; unlock_fsnode(ndp->ni_dvp, NULL); } #endif /* CONFIG_VFS_FUNNEL */ if (ndp->ni_dvp) vnode_put(ndp->ni_dvp); vnode_put(ndp->ni_vp); ndp->ni_vp = NULLVP; if (kdebug_enable) kdebug_lookup(dp, cnp); return (error); bad: #if CONFIG_VFS_FUNNEL if ((cnp->cn_flags & FSNODELOCKHELD)) { cnp->cn_flags &= ~FSNODELOCKHELD; unlock_fsnode(ndp->ni_dvp, NULL); } #endif /* CONFIG_VFS_FUNNEL */ if (dp) vnode_put(dp); ndp->ni_vp = NULLVP; if (kdebug_enable) kdebug_lookup(dp, cnp); return (error); } int lookup_validate_creation_path(struct nameidata *ndp) { struct componentname *cnp = &ndp->ni_cnd; /* * If creating and at end of pathname, then can consider * allowing file to be created. */ if (cnp->cn_flags & RDONLY) { return EROFS; } if ((cnp->cn_flags & ISLASTCN) && (ndp->ni_flag & NAMEI_TRAILINGSLASH) && !(cnp->cn_flags & WILLBEDIR)) { return ENOENT; } return 0; } /* * Modifies only ni_vp. Always returns with ni_vp still valid (iocount held). */ int lookup_traverse_mountpoints(struct nameidata *ndp, struct componentname *cnp, vnode_t dp, int vbusyflags, vfs_context_t ctx) { mount_t mp; vnode_t tdp; int error = 0; uthread_t uth; uint32_t depth = 0; int dont_cache_mp = 0; vnode_t mounted_on_dp; int current_mount_generation = 0; mounted_on_dp = dp; current_mount_generation = mount_generation; while ((dp->v_type == VDIR) && dp->v_mountedhere && ((cnp->cn_flags & NOCROSSMOUNT) == 0)) { #if CONFIG_TRIGGERS /* * For a trigger vnode, call its resolver when crossing its mount (if requested) */ if (dp->v_resolve) { (void) vnode_trigger_resolve(dp, ndp, ctx); } #endif vnode_lock(dp); if ((dp->v_type == VDIR) && (mp = dp->v_mountedhere)) { mp->mnt_crossref++; vnode_unlock(dp); if (vfs_busy(mp, vbusyflags)) { mount_dropcrossref(mp, dp, 0); if (vbusyflags == LK_NOWAIT) { error = ENOENT; goto out; } continue; } /* * XXX - if this is the last component of the * pathname, and it's either not a lookup operation * or the NOTRIGGER flag is set for the operation, * set a uthread flag to let VFS_ROOT() for autofs * know it shouldn't trigger a mount. */ uth = (struct uthread *)get_bsdthread_info(current_thread()); if ((cnp->cn_flags & ISLASTCN) && (cnp->cn_nameiop != LOOKUP || (cnp->cn_flags & NOTRIGGER))) { uth->uu_notrigger = 1; dont_cache_mp = 1; } error = VFS_ROOT(mp, &tdp, ctx); /* XXX - clear the uthread flag */ uth->uu_notrigger = 0; mount_dropcrossref(mp, dp, 0); vfs_unbusy(mp); if (error) { goto out; } vnode_put(dp); ndp->ni_vp = dp = tdp; depth++; #if CONFIG_TRIGGERS /* * Check if root dir is a trigger vnode */ if (dp->v_resolve) { error = vnode_trigger_resolve(dp, ndp, ctx); if (error) { goto out; } } #endif } else { vnode_unlock(dp); break; } } if (depth && !dont_cache_mp) { mp = mounted_on_dp->v_mountedhere; if (mp) { mount_lock_spin(mp); mp->mnt_realrootvp_vid = dp->v_id; mp->mnt_realrootvp = dp; mp->mnt_generation = current_mount_generation; mount_unlock(mp); } } return 0; out: return error; }
/* * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct * XXX usage is PROT_* from an interface perspective. Thus the values of * XXX VM_PROT_* and PROT_* need to correspond. */ int mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) { /* * Map in special device (must be SHARED) or file */ struct fileproc *fp; register struct vnode *vp; int flags; int prot, file_prot; int err=0; vm_map_t user_map; kern_return_t result; mach_vm_offset_t user_addr; mach_vm_size_t user_size; vm_object_offset_t pageoff; vm_object_offset_t file_pos; int alloc_flags=0; boolean_t docow; vm_prot_t maxprot; void *handle; vm_pager_t pager; int mapanon=0; int fpref=0; int error =0; int fd = uap->fd; user_addr = (mach_vm_offset_t)uap->addr; user_size = (mach_vm_size_t) uap->len; AUDIT_ARG(addr, user_addr); AUDIT_ARG(len, user_size); AUDIT_ARG(fd, uap->fd); prot = (uap->prot & VM_PROT_ALL); #if 3777787 /* * Since the hardware currently does not support writing without * read-before-write, or execution-without-read, if the request is * for write or execute access, we must imply read access as well; * otherwise programs expecting this to work will fail to operate. */ if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) prot |= VM_PROT_READ; #endif /* radar 3777787 */ flags = uap->flags; vp = NULLVP; /* * The vm code does not have prototypes & compiler doesn't do the' * the right thing when you cast 64bit value and pass it in function * call. So here it is. */ file_pos = (vm_object_offset_t)uap->pos; /* make sure mapping fits into numeric range etc */ if (file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64) return (EINVAL); /* * Align the file position to a page boundary, * and save its page offset component. */ pageoff = (file_pos & PAGE_MASK); file_pos -= (vm_object_offset_t)pageoff; /* Adjust size for rounding (on both ends). */ user_size += pageoff; /* low end... */ user_size = mach_vm_round_page(user_size); /* hi end */ /* * Check for illegal addresses. Watch out for address wrap... Note * that VM_*_ADDRESS are not constants due to casts (argh). */ if (flags & MAP_FIXED) { /* * The specified address must have the same remainder * as the file offset taken modulo PAGE_SIZE, so it * should be aligned after adjustment by pageoff. */ user_addr -= pageoff; if (user_addr & PAGE_MASK) return (EINVAL); } #ifdef notyet /* DO not have apis to get this info, need to wait till then*/ /* * XXX for non-fixed mappings where no hint is provided or * the hint would fall in the potential heap space, * place it after the end of the largest possible heap. * * There should really be a pmap call to determine a reasonable * location. */ else if (addr < mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ)) addr = mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ); #endif alloc_flags = 0; if (flags & MAP_ANON) { /* * Mapping blank space is trivial. Use positive fds as the alias * value for memory tracking. */ if (fd != -1) { /* * Use "fd" to pass (some) Mach VM allocation flags, * (see the VM_FLAGS_* definitions). */ alloc_flags = fd & (VM_FLAGS_ALIAS_MASK | VM_FLAGS_PURGABLE); if (alloc_flags != fd) { /* reject if there are any extra flags */ return EINVAL; } } handle = NULL; maxprot = VM_PROT_ALL; file_pos = 0; mapanon = 1; } else { struct vnode_attr va; vfs_context_t ctx = vfs_context_current(); /* * Mapping file, get fp for validation. Obtain vnode and make * sure it is of appropriate type. */ err = fp_lookup(p, fd, &fp, 0); if (err) return(err); fpref = 1; if(fp->f_fglob->fg_type == DTYPE_PSXSHM) { uap->addr = (user_addr_t)user_addr; uap->len = (user_size_t)user_size; uap->prot = prot; uap->flags = flags; uap->pos = file_pos; error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff); goto bad; } if (fp->f_fglob->fg_type != DTYPE_VNODE) { error = EINVAL; goto bad; } vp = (struct vnode *)fp->f_fglob->fg_data; error = vnode_getwithref(vp); if(error != 0) goto bad; if (vp->v_type != VREG && vp->v_type != VCHR) { (void)vnode_put(vp); error = EINVAL; goto bad; } AUDIT_ARG(vnpath, vp, ARG_VNODE1); /* * POSIX: mmap needs to update access time for mapped files */ if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) { VATTR_INIT(&va); nanotime(&va.va_access_time); VATTR_SET_ACTIVE(&va, va_access_time); vnode_setattr(vp, &va, ctx); } /* * XXX hack to handle use of /dev/zero to map anon memory (ala * SunOS). */ if (vp->v_type == VCHR || vp->v_type == VSTR) { (void)vnode_put(vp); error = ENODEV; goto bad; } else { /* * Ensure that file and memory protections are * compatible. Note that we only worry about * writability if mapping is shared; in this case, * current and max prot are dictated by the open file. * XXX use the vnode instead? Problem is: what * credentials do we use for determination? What if * proc does a setuid? */ maxprot = VM_PROT_EXECUTE; /* ??? */ if (fp->f_fglob->fg_flag & FREAD) maxprot |= VM_PROT_READ; else if (prot & PROT_READ) { (void)vnode_put(vp); error = EACCES; goto bad; } /* * If we are sharing potential changes (either via * MAP_SHARED or via the implicit sharing of character * device mappings), and we are trying to get write * permission although we opened it without asking * for it, bail out. */ if ((flags & MAP_SHARED) != 0) { if ((fp->f_fglob->fg_flag & FWRITE) != 0) { /* * check for write access * * Note that we already made this check when granting FWRITE * against the file, so it seems redundant here. */ error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx); /* if not granted for any reason, but we wanted it, bad */ if ((prot & PROT_WRITE) && (error != 0)) { vnode_put(vp); goto bad; } /* if writable, remember */ if (error == 0) maxprot |= VM_PROT_WRITE; } else if ((prot & PROT_WRITE) != 0) { (void)vnode_put(vp); error = EACCES; goto bad; } } else maxprot |= VM_PROT_WRITE; handle = (void *)vp; #if CONFIG_MACF error = mac_file_check_mmap(vfs_context_ucred(ctx), fp->f_fglob, prot, flags, &maxprot); if (error) { (void)vnode_put(vp); goto bad; } #endif /* MAC */ } } if (user_size == 0) { if (!mapanon) (void)vnode_put(vp); error = 0; goto bad; } /* * We bend a little - round the start and end addresses * to the nearest page boundary. */ user_size = mach_vm_round_page(user_size); if (file_pos & PAGE_MASK_64) { if (!mapanon) (void)vnode_put(vp); error = EINVAL; goto bad; } user_map = current_map(); if ((flags & MAP_FIXED) == 0) { alloc_flags |= VM_FLAGS_ANYWHERE; user_addr = mach_vm_round_page(user_addr); } else { if (user_addr != mach_vm_trunc_page(user_addr)) { if (!mapanon) (void)vnode_put(vp); error = EINVAL; goto bad; } /* * mmap(MAP_FIXED) will replace any existing mappings in the * specified range, if the new mapping is successful. * If we just deallocate the specified address range here, * another thread might jump in and allocate memory in that * range before we get a chance to establish the new mapping, * and we won't have a chance to restore the old mappings. * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it * has to deallocate the existing mappings and establish the * new ones atomically. */ alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE; } if (flags & MAP_NOCACHE) alloc_flags |= VM_FLAGS_NO_CACHE; /* * Lookup/allocate object. */ if (handle == NULL) { pager = NULL; #ifdef notyet /* Hmm .. */ #if defined(VM_PROT_READ_IS_EXEC) if (prot & VM_PROT_READ) prot |= VM_PROT_EXECUTE; if (maxprot & VM_PROT_READ) maxprot |= VM_PROT_EXECUTE; #endif #endif #if 3777787 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) prot |= VM_PROT_READ; if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) maxprot |= VM_PROT_READ; #endif /* radar 3777787 */ result = vm_map_enter_mem_object(user_map, &user_addr, user_size, 0, alloc_flags, IPC_PORT_NULL, 0, FALSE, prot, maxprot, (flags & MAP_SHARED) ? VM_INHERIT_SHARE : VM_INHERIT_DEFAULT); if (result != KERN_SUCCESS) goto out; } else { pager = (vm_pager_t)ubc_getpager(vp); if (pager == NULL) { (void)vnode_put(vp); error = ENOMEM; goto bad; } /* * Set credentials: * FIXME: if we're writing the file we need a way to * ensure that someone doesn't replace our R/W creds * with ones that only work for read. */ ubc_setthreadcred(vp, p, current_thread()); docow = FALSE; if ((flags & (MAP_ANON|MAP_SHARED)) == 0) { docow = TRUE; } #ifdef notyet /* Hmm .. */ #if defined(VM_PROT_READ_IS_EXEC) if (prot & VM_PROT_READ) prot |= VM_PROT_EXECUTE; if (maxprot & VM_PROT_READ) maxprot |= VM_PROT_EXECUTE; #endif #endif /* notyet */ #if 3777787 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) prot |= VM_PROT_READ; if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) maxprot |= VM_PROT_READ; #endif /* radar 3777787 */ result = vm_map_enter_mem_object(user_map, &user_addr, user_size, 0, alloc_flags, (ipc_port_t)pager, file_pos, docow, prot, maxprot, (flags & MAP_SHARED) ? VM_INHERIT_SHARE : VM_INHERIT_DEFAULT); if (result != KERN_SUCCESS) { (void)vnode_put(vp); goto out; } file_prot = prot & (PROT_READ | PROT_WRITE | PROT_EXEC); if (docow) { /* private mapping: won't write to the file */ file_prot &= ~PROT_WRITE; } (void) ubc_map(vp, file_prot); } if (!mapanon) (void)vnode_put(vp); out: switch (result) { case KERN_SUCCESS: *retval = user_addr + pageoff; error = 0; break; case KERN_INVALID_ADDRESS: case KERN_NO_SPACE: error = ENOMEM; break; case KERN_PROTECTION_FAILURE: error = EACCES; break; default: error = EINVAL; break; } bad: if (fpref) fp_drop(p, fd, fp, 0); KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0); KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32), (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0); return(error); }
kern_return_t map_fd_funneled( int fd, vm_object_offset_t offset, vm_offset_t *va, boolean_t findspace, vm_size_t size) { kern_return_t result; struct fileproc *fp; struct vnode *vp; void * pager; vm_offset_t map_addr=0; vm_size_t map_size; int err=0; vm_map_t my_map; proc_t p = current_proc(); struct vnode_attr vattr; /* * Find the inode; verify that it's a regular file. */ err = fp_lookup(p, fd, &fp, 0); if (err) return(err); if (fp->f_fglob->fg_type != DTYPE_VNODE){ err = KERN_INVALID_ARGUMENT; goto bad; } if (!(fp->f_fglob->fg_flag & FREAD)) { err = KERN_PROTECTION_FAILURE; goto bad; } vp = (struct vnode *)fp->f_fglob->fg_data; err = vnode_getwithref(vp); if(err != 0) goto bad; if (vp->v_type != VREG) { (void)vnode_put(vp); err = KERN_INVALID_ARGUMENT; goto bad; } AUDIT_ARG(vnpath, vp, ARG_VNODE1); /* * POSIX: mmap needs to update access time for mapped files */ if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) { VATTR_INIT(&vattr); nanotime(&vattr.va_access_time); VATTR_SET_ACTIVE(&vattr, va_access_time); vnode_setattr(vp, &vattr, vfs_context_current()); } if (offset & PAGE_MASK_64) { printf("map_fd: file offset not page aligned(%d : %s)\n",p->p_pid, p->p_comm); (void)vnode_put(vp); err = KERN_INVALID_ARGUMENT; goto bad; } map_size = round_page(size); /* * Allow user to map in a zero length file. */ if (size == 0) { (void)vnode_put(vp); err = KERN_SUCCESS; goto bad; } /* * Map in the file. */ pager = (void *)ubc_getpager(vp); if (pager == NULL) { (void)vnode_put(vp); err = KERN_FAILURE; goto bad; } my_map = current_map(); result = vm_map_64( my_map, &map_addr, map_size, (vm_offset_t)0, VM_FLAGS_ANYWHERE, pager, offset, TRUE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); if (result != KERN_SUCCESS) { (void)vnode_put(vp); err = result; goto bad; } if (!findspace) { vm_offset_t dst_addr; vm_map_copy_t tmp; if (copyin(CAST_USER_ADDR_T(va), &dst_addr, sizeof (dst_addr)) || trunc_page_32(dst_addr) != dst_addr) { (void) vm_map_remove( my_map, map_addr, map_addr + map_size, VM_MAP_NO_FLAGS); (void)vnode_put(vp); err = KERN_INVALID_ADDRESS; goto bad; } result = vm_map_copyin(my_map, (vm_map_address_t)map_addr, (vm_map_size_t)map_size, TRUE, &tmp); if (result != KERN_SUCCESS) { (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr), vm_map_round_page(map_addr + map_size), VM_MAP_NO_FLAGS); (void)vnode_put(vp); err = result; goto bad; } result = vm_map_copy_overwrite(my_map, (vm_map_address_t)dst_addr, tmp, FALSE); if (result != KERN_SUCCESS) { vm_map_copy_discard(tmp); (void)vnode_put(vp); err = result; goto bad; } } else { if (copyout(&map_addr, CAST_USER_ADDR_T(va), sizeof (map_addr))) { (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr), vm_map_round_page(map_addr + map_size), VM_MAP_NO_FLAGS); (void)vnode_put(vp); err = KERN_INVALID_ADDRESS; goto bad; } } ubc_setthreadcred(vp, current_proc(), current_thread()); (void)ubc_map(vp, (PROT_READ | PROT_EXEC)); (void)vnode_put(vp); err = 0; bad: fp_drop(p, fd, fp, 0); return (err); }
/* * shared_region_map_np() * * This system call is intended for dyld. * * dyld uses this to map a shared cache file into a shared region. * This is usually done only the first time a shared cache is needed. * Subsequent processes will just use the populated shared region without * requiring any further setup. */ int shared_region_map_np( struct proc *p, struct shared_region_map_np_args *uap, __unused int *retvalp) { int error; kern_return_t kr; int fd; struct fileproc *fp; struct vnode *vp, *root_vp; struct vnode_attr va; off_t fs; memory_object_size_t file_size; user_addr_t user_mappings; struct shared_file_mapping_np *mappings; #define SFM_MAX_STACK 8 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK]; unsigned int mappings_count; vm_size_t mappings_size; memory_object_control_t file_control; struct vm_shared_region *shared_region; SHARED_REGION_TRACE_DEBUG( ("shared_region: %p [%d(%s)] -> map\n", current_thread(), p->p_pid, p->p_comm)); shared_region = NULL; mappings_count = 0; mappings_size = 0; mappings = NULL; fp = NULL; vp = NULL; /* get file descriptor for shared region cache file */ fd = uap->fd; /* get file structure from file descriptor */ error = fp_lookup(p, fd, &fp, 0); if (error) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map: " "fd=%d lookup failed (error=%d)\n", current_thread(), p->p_pid, p->p_comm, fd, error)); goto done; } /* make sure we're attempting to map a vnode */ if (fp->f_fglob->fg_type != DTYPE_VNODE) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map: " "fd=%d not a vnode (type=%d)\n", current_thread(), p->p_pid, p->p_comm, fd, fp->f_fglob->fg_type)); error = EINVAL; goto done; } /* we need at least read permission on the file */ if (! (fp->f_fglob->fg_flag & FREAD)) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map: " "fd=%d not readable\n", current_thread(), p->p_pid, p->p_comm, fd)); error = EPERM; goto done; } /* get vnode from file structure */ error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data); if (error) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map: " "fd=%d getwithref failed (error=%d)\n", current_thread(), p->p_pid, p->p_comm, fd, error)); goto done; } vp = (struct vnode *) fp->f_fglob->fg_data; /* make sure the vnode is a regular file */ if (vp->v_type != VREG) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "not a file (type=%d)\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name, vp->v_type)); error = EINVAL; goto done; } /* make sure vnode is on the process's root volume */ root_vp = p->p_fd->fd_rdir; if (root_vp == NULL) { root_vp = rootvnode; } if (vp->v_mount != root_vp->v_mount) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "not on process's root volume\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name)); error = EPERM; goto done; } /* make sure vnode is owned by "root" */ VATTR_INIT(&va); VATTR_WANTED(&va, va_uid); error = vnode_getattr(vp, &va, vfs_context_current()); if (error) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "vnode_getattr(%p) failed (error=%d)\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name, vp, error)); goto done; } if (va.va_uid != 0) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "owned by uid=%d instead of 0\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name, va.va_uid)); error = EPERM; goto done; } /* get vnode size */ error = vnode_size(vp, &fs, vfs_context_current()); if (error) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "vnode_size(%p) failed (error=%d)\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name, vp, error)); goto done; } file_size = fs; /* get the file's memory object handle */ file_control = ubc_getobject(vp, UBC_HOLDOBJECT); if (file_control == MEMORY_OBJECT_CONTROL_NULL) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "no memory object\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name)); error = EINVAL; goto done; } /* get the list of mappings the caller wants us to establish */ mappings_count = uap->count; /* number of mappings */ mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0])); if (mappings_count == 0) { SHARED_REGION_TRACE_INFO( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "no mappings\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name)); error = 0; /* no mappings: we're done ! */ goto done; } else if (mappings_count <= SFM_MAX_STACK) { mappings = &stack_mappings[0]; } else { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "too many mappings (%d)\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name, mappings_count)); error = EINVAL; goto done; } user_mappings = uap->mappings; /* the mappings, in user space */ error = copyin(user_mappings, mappings, mappings_size); if (error) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "copyin(0x%llx, %d) failed (error=%d)\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name, (uint64_t)user_mappings, mappings_count, error)); goto done; } /* get the process's shared region (setup in vm_map_exec()) */ shared_region = vm_shared_region_get(current_task()); if (shared_region == NULL) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "no shared region\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name)); goto done; } /* map the file into that shared region's submap */ kr = vm_shared_region_map_file(shared_region, mappings_count, mappings, file_control, file_size, (void *) p->p_fd->fd_rdir); if (kr != KERN_SUCCESS) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "vm_shared_region_map_file() failed kr=0x%x\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name, kr)); switch (kr) { case KERN_INVALID_ADDRESS: error = EFAULT; break; case KERN_PROTECTION_FAILURE: error = EPERM; break; case KERN_NO_SPACE: error = ENOMEM; break; case KERN_FAILURE: case KERN_INVALID_ARGUMENT: default: error = EINVAL; break; } goto done; } /* * The mapping was successful. Let the buffer cache know * that we've mapped that file with these protections. This * prevents the vnode from getting recycled while it's mapped. */ (void) ubc_map(vp, VM_PROT_READ); error = 0; /* update the vnode's access time */ if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) { VATTR_INIT(&va); nanotime(&va.va_access_time); VATTR_SET_ACTIVE(&va, va_access_time); vnode_setattr(vp, &va, vfs_context_current()); } if (p->p_flag & P_NOSHLIB) { /* signal that this process is now using split libraries */ OSBitAndAtomic(~((uint32_t)P_NOSHLIB), (UInt32 *)&p->p_flag); } done: if (vp != NULL) { /* * release the vnode... * ubc_map() still holds it for us in the non-error case */ (void) vnode_put(vp); vp = NULL; } if (fp != NULL) { /* release the file descriptor */ fp_drop(p, fd, fp, 0); fp = NULL; } if (shared_region != NULL) { vm_shared_region_deallocate(shared_region); } SHARED_REGION_TRACE_DEBUG( ("shared_region: %p [%d(%s)] <- map\n", current_thread(), p->p_pid, p->p_comm)); return error; }
/* * Write out process accounting information, on process exit. * Data to be written out is specified in Leffler, et al. * and are enumerated below. (They're also noted in the system * "acct.h" header file.) */ int acct_process(proc_t p) { struct acct an_acct; struct rusage rup, *r; struct timeval ut, st, tmp; int t; int error; struct vnode *vp; kauth_cred_t safecred; struct session * sessp; boolean_t fstate; /* If accounting isn't enabled, don't bother */ vp = acctp; if (vp == NULLVP) return (0); /* * Get process accounting information. */ /* (1) The name of the command that ran */ bcopy(p->p_comm, an_acct.ac_comm, sizeof an_acct.ac_comm); /* (2) The amount of user and system time that was used */ calcru(p, &ut, &st, NULL); an_acct.ac_utime = encode_comp_t(ut.tv_sec, ut.tv_usec); an_acct.ac_stime = encode_comp_t(st.tv_sec, st.tv_usec); /* (3) The elapsed time the commmand ran (and its starting time) */ an_acct.ac_btime = p->p_start.tv_sec; microtime(&tmp); timevalsub(&tmp, &p->p_start); an_acct.ac_etime = encode_comp_t(tmp.tv_sec, tmp.tv_usec); /* (4) The average amount of memory used */ proc_lock(p); rup = p->p_stats->p_ru; proc_unlock(p); r = &rup; tmp = ut; timevaladd(&tmp, &st); t = tmp.tv_sec * hz + tmp.tv_usec / tick; if (t) an_acct.ac_mem = (r->ru_ixrss + r->ru_idrss + r->ru_isrss) / t; else an_acct.ac_mem = 0; /* (5) The number of disk I/O operations done */ an_acct.ac_io = encode_comp_t(r->ru_inblock + r->ru_oublock, 0); /* (6) The UID and GID of the process */ safecred = kauth_cred_proc_ref(p); an_acct.ac_uid = safecred->cr_ruid; an_acct.ac_gid = safecred->cr_rgid; /* (7) The terminal from which the process was started */ sessp = proc_session(p); if ((p->p_flag & P_CONTROLT) && (sessp != SESSION_NULL) && (sessp->s_ttyp != TTY_NULL)) { fstate = thread_funnel_set(kernel_flock, TRUE); an_acct.ac_tty = sessp->s_ttyp->t_dev; (void) thread_funnel_set(kernel_flock, fstate); }else an_acct.ac_tty = NODEV; if (sessp != SESSION_NULL) session_rele(sessp); /* (8) The boolean flags that tell how the process terminated, etc. */ an_acct.ac_flag = p->p_acflag; /* * Now, just write the accounting information to the file. */ if ((error = vnode_getwithref(vp)) == 0) { error = vn_rdwr(UIO_WRITE, vp, (caddr_t)&an_acct, sizeof (an_acct), (off_t)0, UIO_SYSSPACE32, IO_APPEND|IO_UNIT, safecred, (int *)0, p); vnode_put(vp); } kauth_cred_unref(&safecred); return (error); }
/* * We have to carry on the locking protocol on the null layer vnodes * as we progress through the tree. We also have to enforce read-only * if this layer is mounted read-only. */ static int null_lookup(struct vnop_lookup_args * ap) { struct componentname * cnp = ap->a_cnp; struct vnode * dvp = ap->a_dvp; struct vnode *vp, *ldvp, *lvp; struct mount * mp; struct null_mount * null_mp; int error; NULLFSDEBUG("%s parent: %p component: %.*s\n", __FUNCTION__, ap->a_dvp, cnp->cn_namelen, cnp->cn_nameptr); mp = vnode_mount(dvp); /* rename and delete are not allowed. this is a read only file system */ if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME || cnp->cn_nameiop == CREATE) { return (EROFS); } null_mp = MOUNTTONULLMOUNT(mp); lck_mtx_lock(&null_mp->nullm_lock); if (nullfs_isspecialvp(dvp)) { error = null_special_lookup(ap); lck_mtx_unlock(&null_mp->nullm_lock); return error; } lck_mtx_unlock(&null_mp->nullm_lock); // . and .. handling if (cnp->cn_nameptr[0] == '.') { if (cnp->cn_namelen == 1) { vp = dvp; } else if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { /* mount point crossing is handled in null_special_lookup */ vp = vnode_parent(dvp); } else { goto notdot; } error = vp ? vnode_get(vp) : ENOENT; if (error == 0) { *ap->a_vpp = vp; } return error; } notdot: ldvp = NULLVPTOLOWERVP(dvp); vp = lvp = NULL; /* * Hold ldvp. The reference on it, owned by dvp, is lost in * case of dvp reclamation. */ error = vnode_getwithref(ldvp); if (error) { return error; } error = VNOP_LOOKUP(ldvp, &lvp, cnp, ap->a_context); vnode_put(ldvp); if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) { if (ldvp == lvp) { vp = dvp; error = vnode_get(vp); } else { error = null_nodeget(mp, lvp, dvp, &vp, cnp, 0); } if (error == 0) { *ap->a_vpp = vp; } } /* if we got lvp, drop the iocount from VNOP_LOOKUP */ if (lvp != NULL) { vnode_put(lvp); } return (error); }
/* * Mount null layer */ static int nullfs_mount(struct mount * mp, __unused vnode_t devvp, user_addr_t user_data, vfs_context_t ctx) { int error = 0; struct vnode *lowerrootvp = NULL, *vp = NULL; struct vfsstatfs * sp = NULL; struct null_mount * xmp = NULL; char data[MAXPATHLEN]; size_t count; struct vfs_attr vfa; /* set defaults (arbitrary since this file system is readonly) */ uint32_t bsize = BLKDEV_IOSIZE; size_t iosize = BLKDEV_IOSIZE; uint64_t blocks = 4711 * 4711; uint64_t bfree = 0; uint64_t bavail = 0; uint64_t bused = 4711; uint64_t files = 4711; uint64_t ffree = 0; kauth_cred_t cred = vfs_context_ucred(ctx); NULLFSDEBUG("nullfs_mount(mp = %p) %llx\n", (void *)mp, vfs_flags(mp)); if (vfs_flags(mp) & MNT_ROOTFS) return (EOPNOTSUPP); /* * Update is a no-op */ if (vfs_isupdate(mp)) { return ENOTSUP; } /* check entitlement */ if (!IOTaskHasEntitlement(current_task(), NULLFS_ENTITLEMENT)) { return EPERM; } /* * Get argument */ error = copyinstr(user_data, data, MAXPATHLEN - 1, &count); if (error) { NULLFSDEBUG("nullfs: error copying data form user %d\n", error); goto error; } /* This could happen if the system is configured for 32 bit inodes instead of * 64 bit */ if (count > MAX_MNT_FROM_LENGTH) { error = EINVAL; NULLFSDEBUG("nullfs: path to translocate too large for this system %d vs %d\n", count, MAX_MNT_FROM_LENGTH); goto error; } error = vnode_lookup(data, 0, &lowerrootvp, ctx); if (error) { NULLFSDEBUG("lookup %s -> %d\n", data, error); goto error; } /* lowervrootvp has an iocount after vnode_lookup, drop that for a usecount. Keep this to signal what we want to keep around the thing we are mirroring. Drop it in unmount.*/ error = vnode_ref(lowerrootvp); vnode_put(lowerrootvp); if (error) { // If vnode_ref failed, then null it out so it can't be used anymore in cleanup. lowerrootvp = NULL; goto error; } NULLFSDEBUG("mount %s\n", data); MALLOC(xmp, struct null_mount *, sizeof(*xmp), M_TEMP, M_WAITOK | M_ZERO); if (xmp == NULL) { error = ENOMEM; goto error; } /* * Save reference to underlying FS */ xmp->nullm_lowerrootvp = lowerrootvp; xmp->nullm_lowerrootvid = vnode_vid(lowerrootvp); error = null_getnewvnode(mp, NULL, NULL, &vp, NULL, 1); if (error) { goto error; } /* vp has an iocount on it from vnode_create. drop that for a usecount. This * is our root vnode so we drop the ref in unmount * * Assuming for now that because we created this vnode and we aren't finished mounting we can get a ref*/ vnode_ref(vp); vnode_put(vp); error = nullfs_init_lck(&xmp->nullm_lock); if (error) { goto error; } xmp->nullm_rootvp = vp; /* read the flags the user set, but then ignore some of them, we will only allow them if they are set on the lower file system */ uint64_t flags = vfs_flags(mp) & (~(MNT_IGNORE_OWNERSHIP | MNT_LOCAL)); uint64_t lowerflags = vfs_flags(vnode_mount(lowerrootvp)) & (MNT_LOCAL | MNT_QUARANTINE | MNT_IGNORE_OWNERSHIP | MNT_NOEXEC); if (lowerflags) { flags |= lowerflags; } /* force these flags */ flags |= (MNT_DONTBROWSE | MNT_MULTILABEL | MNT_NOSUID | MNT_RDONLY); vfs_setflags(mp, flags); vfs_setfsprivate(mp, xmp); vfs_getnewfsid(mp); vfs_setlocklocal(mp); /* fill in the stat block */ sp = vfs_statfs(mp); strlcpy(sp->f_mntfromname, data, MAX_MNT_FROM_LENGTH); sp->f_flags = flags; xmp->nullm_flags = NULLM_CASEINSENSITIVE; /* default to case insensitive */ error = nullfs_vfs_getlowerattr(vnode_mount(lowerrootvp), &vfa, ctx); if (error == 0) { if (VFSATTR_IS_SUPPORTED(&vfa, f_bsize)) { bsize = vfa.f_bsize; } if (VFSATTR_IS_SUPPORTED(&vfa, f_iosize)) { iosize = vfa.f_iosize; } if (VFSATTR_IS_SUPPORTED(&vfa, f_blocks)) { blocks = vfa.f_blocks; } if (VFSATTR_IS_SUPPORTED(&vfa, f_bfree)) { bfree = vfa.f_bfree; } if (VFSATTR_IS_SUPPORTED(&vfa, f_bavail)) { bavail = vfa.f_bavail; } if (VFSATTR_IS_SUPPORTED(&vfa, f_bused)) { bused = vfa.f_bused; } if (VFSATTR_IS_SUPPORTED(&vfa, f_files)) { files = vfa.f_files; } if (VFSATTR_IS_SUPPORTED(&vfa, f_ffree)) { ffree = vfa.f_ffree; } if (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) { if ((vfa.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & (VOL_CAP_FMT_CASE_SENSITIVE)) && (vfa.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & (VOL_CAP_FMT_CASE_SENSITIVE))) { xmp->nullm_flags &= ~NULLM_CASEINSENSITIVE; } } } else { goto error; } sp->f_bsize = bsize; sp->f_iosize = iosize; sp->f_blocks = blocks; sp->f_bfree = bfree; sp->f_bavail = bavail; sp->f_bused = bused; sp->f_files = files; sp->f_ffree = ffree; /* Associate the mac label information from the mirrored filesystem with the * mirror */ MAC_PERFORM(mount_label_associate, cred, vnode_mount(lowerrootvp), vfs_mntlabel(mp)); NULLFSDEBUG("nullfs_mount: lower %s, alias at %s\n", sp->f_mntfromname, sp->f_mntonname); return (0); error: if (xmp) { FREE(xmp, M_TEMP); } if (lowerrootvp) { vnode_getwithref(lowerrootvp); vnode_rele(lowerrootvp); vnode_put(lowerrootvp); } if (vp) { /* we made the root vnode but the mount is failed, so clean it up */ vnode_getwithref(vp); vnode_rele(vp); /* give vp back */ vnode_recycle(vp); vnode_put(vp); } return error; }
off_t DldIOShadowFile::expandFile( __in off_t lowTargetSize ) { assert( preemption_enabled() ); off_t target; off_t currentLastExpansion; #if defined( DBG ) currentLastExpansion = DLD_IGNR_FSIZE; #endif//DBG // // Make sure that we are root. Growing a file // without zero filling the data is a security hole // root would have access anyway so we'll allow it // // // TO DO - is_suser is not supported for 64 bit kext // /* #if defined(__i386__) assert( is_suser() ); if( !is_suser() ) return this->lastExpansion;// an unsafe unprotected access to 64b value #elif defined(__x86_64__) #endif//defined(__x86_64__) */ // // check that the limit has not been reached // if( DLD_IGNR_FSIZE != this->maxSize && this->lastExpansion == this->maxSize ) return this->maxSize;// a safe access to 64b value as the value can't change this->LockExclusive(); {// start of the lock off_t valueToAdd; currentLastExpansion = this->lastExpansion; // // try to extend the file on 128 MB // ( the same value is used as a maximum value for a mapping // range in DldIOShadow::processFileWriteWQ ) // valueToAdd = 0x8*0x1000*0x1000; if( this->offset > this->lastExpansion ) target = this->offset + valueToAdd; else target = this->lastExpansion + valueToAdd; if( target < lowTargetSize ) target = lowTargetSize; if( DLD_IGNR_FSIZE != this->maxSize && target > this->maxSize ) target = this->maxSize; }// end of the lock this->UnLockExclusive(); if( target < lowTargetSize ){ // // there is no point for extension as the goal won't be reached, fail the request // return currentLastExpansion; } assert( DLD_IGNR_FSIZE != currentLastExpansion && currentLastExpansion <= target ); // // extend the file // vfs_context_t vfs_context; int error; error = vnode_getwithref( this->vnode ); assert( !error ); if( !error ){ struct vnode_attr va; VATTR_INIT(&va); VATTR_SET(&va, va_data_size, target); va.va_vaflags =(IO_NOZEROFILL) & 0xffff; vfs_context = vfs_context_create( NULL ); assert( vfs_context ); if( vfs_context ){ this->LockExclusive(); {// start of the lock assert( currentLastExpansion <= this->lastExpansion ); // // do not truncate the file incidentally! // if( target > this->offset ){ error = vnode_setattr( this->vnode, &va, vfs_context ); if( !error ){ this->lastExpansion = target; currentLastExpansion = target; } } else { // // the file has been extended by the write operation // this->lastExpansion = this->offset; currentLastExpansion = this->offset; }// end if( target < this->offset ) }// end of the lock this->UnLockExclusive(); vfs_context_rele( vfs_context ); DLD_DBG_MAKE_POINTER_INVALID( vfs_context ); }// end if( vfs_context ) vnode_put( this->vnode ); }// end if( !error ) assert( DLD_IGNR_FSIZE != currentLastExpansion ); return currentLastExpansion; }
/* * Search a pathname. * This is a very central and rather complicated routine. * * The pathname is pointed to by ni_ptr and is of length ni_pathlen. * The starting directory is taken from ni_startdir. The pathname is * descended until done, or a symbolic link is encountered. The variable * ni_more is clear if the path is completed; it is set to one if a * symbolic link needing interpretation is encountered. * * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on * whether the name is to be looked up, created, renamed, or deleted. * When CREATE, RENAME, or DELETE is specified, information usable in * creating, renaming, or deleting a directory entry may be calculated. * If flag has LOCKPARENT or'ed into it, the parent directory is returned * locked. If flag has WANTPARENT or'ed into it, the parent directory is * returned unlocked. Otherwise the parent directory is not returned. If * the target of the pathname exists and LOCKLEAF is or'ed into the flag * the target is returned locked, otherwise it is returned unlocked. * When creating or renaming and LOCKPARENT is specified, the target may not * be ".". When deleting and LOCKPARENT is specified, the target may be ".". * * Overall outline of lookup: * * dirloop: * identify next component of name at ndp->ni_ptr * handle degenerate case where name is null string * if .. and crossing mount points and on mounted filesys, find parent * call VNOP_LOOKUP routine for next component name * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set * component vnode returned in ni_vp (if it exists), locked. * if result vnode is mounted on and crossing mount points, * find mounted on vnode * if more components of name, do next level at dirloop * return the answer in ni_vp, locked if LOCKLEAF set * if LOCKPARENT set, return locked parent in ni_dvp * if WANTPARENT set, return unlocked parent in ni_dvp * * Returns: 0 Success * ENOENT No such file or directory * EBADF Bad file descriptor * ENOTDIR Not a directory * EROFS Read-only file system [CREATE] * EISDIR Is a directory [CREATE] * cache_lookup_path:ERECYCLE (vnode was recycled from underneath us, redrive lookup again) * vnode_authorize:EROFS * vnode_authorize:EACCES * vnode_authorize:EPERM * vnode_authorize:??? * VNOP_LOOKUP:ENOENT No such file or directory * VNOP_LOOKUP:EJUSTRETURN Restart system call (INTERNAL) * VNOP_LOOKUP:??? * VFS_ROOT:ENOTSUP * VFS_ROOT:ENOENT * VFS_ROOT:??? */ int lookup(struct nameidata *ndp) { char *cp; /* pointer into pathname argument */ vnode_t tdp; /* saved dp */ vnode_t dp; /* the directory we are searching */ mount_t mp; /* mount table entry */ int docache = 1; /* == 0 do not cache last component */ int wantparent; /* 1 => wantparent or lockparent flag */ int rdonly; /* lookup read-only flag bit */ int trailing_slash = 0; int dp_authorized = 0; int error = 0; struct componentname *cnp = &ndp->ni_cnd; vfs_context_t ctx = cnp->cn_context; int mounted_on_depth = 0; int dont_cache_mp = 0; vnode_t mounted_on_dp = NULLVP; int current_mount_generation = 0; int vbusyflags = 0; int nc_generation = 0; vnode_t last_dp = NULLVP; /* * Setup: break out flag bits into variables. */ if (cnp->cn_flags & (NOCACHE | DOWHITEOUT)) { if ((cnp->cn_flags & NOCACHE) || (cnp->cn_nameiop == DELETE)) docache = 0; } wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); rdonly = cnp->cn_flags & RDONLY; cnp->cn_flags &= ~ISSYMLINK; cnp->cn_consume = 0; dp = ndp->ni_startdir; ndp->ni_startdir = NULLVP; if ((cnp->cn_flags & CN_NBMOUNTLOOK) != 0) vbusyflags = LK_NOWAIT; cp = cnp->cn_nameptr; if (*cp == '\0') { if ( (vnode_getwithref(dp)) ) { dp = NULLVP; error = ENOENT; goto bad; } goto emptyname; } dirloop: ndp->ni_vp = NULLVP; if ( (error = cache_lookup_path(ndp, cnp, dp, ctx, &trailing_slash, &dp_authorized, last_dp)) ) { dp = NULLVP; goto bad; } if ((cnp->cn_flags & ISLASTCN)) { if (docache) cnp->cn_flags |= MAKEENTRY; } else cnp->cn_flags |= MAKEENTRY; dp = ndp->ni_dvp; if (ndp->ni_vp != NULLVP) { /* * cache_lookup_path returned a non-NULL ni_vp then, * we're guaranteed that the dp is a VDIR, it's * been authorized, and vp is not ".." * * make sure we don't try to enter the name back into * the cache if this vp is purged before we get to that * check since we won't have serialized behind whatever * activity is occurring in the FS that caused the purge */ if (dp != NULLVP) nc_generation = dp->v_nc_generation - 1; goto returned_from_lookup_path; } /* * Handle "..": two special cases. * 1. If at root directory (e.g. after chroot) * or at absolute root directory * then ignore it so can't get out. * 2. If this vnode is the root of a mounted * filesystem, then replace it with the * vnode which was mounted on so we take the * .. in the other file system. */ if ( (cnp->cn_flags & ISDOTDOT) ) { for (;;) { if (dp == ndp->ni_rootdir || dp == rootvnode) { ndp->ni_dvp = dp; ndp->ni_vp = dp; /* * we're pinned at the root * we've already got one reference on 'dp' * courtesy of cache_lookup_path... take * another one for the ".." * if we fail to get the new reference, we'll * drop our original down in 'bad' */ if ( (vnode_get(dp)) ) { error = ENOENT; goto bad; } goto nextname; } if ((dp->v_flag & VROOT) == 0 || (cnp->cn_flags & NOCROSSMOUNT)) break; if (dp->v_mount == NULL) { /* forced umount */ error = EBADF; goto bad; } tdp = dp; dp = tdp->v_mount->mnt_vnodecovered; vnode_put(tdp); if ( (vnode_getwithref(dp)) ) { dp = NULLVP; error = ENOENT; goto bad; } ndp->ni_dvp = dp; dp_authorized = 0; } } /* * We now have a segment name to search for, and a directory to search. */ unionlookup: ndp->ni_vp = NULLVP; if (dp->v_type != VDIR) { error = ENOTDIR; goto lookup_error; } if ( (cnp->cn_flags & DONOTAUTH) != DONOTAUTH ) { if (!dp_authorized) { error = vnode_authorize(dp, NULL, KAUTH_VNODE_SEARCH, ctx); if (error) goto lookup_error; } #if CONFIG_MACF error = mac_vnode_check_lookup(ctx, dp, cnp); if (error) goto lookup_error; #endif /* CONFIG_MACF */ } nc_generation = dp->v_nc_generation; if ( (error = VNOP_LOOKUP(dp, &ndp->ni_vp, cnp, ctx)) ) { lookup_error: if ((error == ENOENT) && (dp->v_flag & VROOT) && (dp->v_mount != NULL) && (dp->v_mount->mnt_flag & MNT_UNION)) { if ((cnp->cn_flags & FSNODELOCKHELD)) { cnp->cn_flags &= ~FSNODELOCKHELD; unlock_fsnode(dp, NULL); } tdp = dp; dp = tdp->v_mount->mnt_vnodecovered; vnode_put(tdp); if ( (vnode_getwithref(dp)) ) { dp = NULLVP; error = ENOENT; goto bad; } ndp->ni_dvp = dp; dp_authorized = 0; goto unionlookup; } if (error != EJUSTRETURN) goto bad; if (ndp->ni_vp != NULLVP) panic("leaf should be empty"); /* * If creating and at end of pathname, then can consider * allowing file to be created. */ if (rdonly) { error = EROFS; goto bad; } if ((cnp->cn_flags & ISLASTCN) && trailing_slash && !(cnp->cn_flags & WILLBEDIR)) { error = ENOENT; goto bad; } /* * We return with ni_vp NULL to indicate that the entry * doesn't currently exist, leaving a pointer to the * referenced directory vnode in ndp->ni_dvp. */ if (cnp->cn_flags & SAVESTART) { if ( (vnode_get(ndp->ni_dvp)) ) { error = ENOENT; goto bad; } ndp->ni_startdir = ndp->ni_dvp; } if (!wantparent) vnode_put(ndp->ni_dvp); if (kdebug_enable) kdebug_lookup(ndp->ni_dvp, cnp); return (0); } returned_from_lookup_path: dp = ndp->ni_vp; /* * Take into account any additional components consumed by * the underlying filesystem. */ if (cnp->cn_consume > 0) { cnp->cn_nameptr += cnp->cn_consume; ndp->ni_next += cnp->cn_consume; ndp->ni_pathlen -= cnp->cn_consume; cnp->cn_consume = 0; } else { if (dp->v_name == NULL || dp->v_parent == NULLVP) { int isdot_or_dotdot; int update_flags = 0; isdot_or_dotdot = (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') || (cnp->cn_flags & ISDOTDOT); if (isdot_or_dotdot == 0) { if (dp->v_name == NULL) update_flags |= VNODE_UPDATE_NAME; if (ndp->ni_dvp != NULLVP && dp->v_parent == NULLVP) update_flags |= VNODE_UPDATE_PARENT; if (update_flags) vnode_update_identity(dp, ndp->ni_dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, update_flags); } } if ( (cnp->cn_flags & MAKEENTRY) && (dp->v_flag & VNCACHEABLE) && LIST_FIRST(&dp->v_nclinks) == NULL) { /* * missing from name cache, but should * be in it... this can happen if volfs * causes the vnode to be created or the * name cache entry got recycled but the * vnode didn't... * check to make sure that ni_dvp is valid * cache_lookup_path may return a NULL * do a quick check to see if the generation of the * directory matches our snapshot... this will get * rechecked behind the name cache lock, but if it * already fails to match, no need to go any further */ if (ndp->ni_dvp != NULLVP && (nc_generation == ndp->ni_dvp->v_nc_generation)) cache_enter_with_gen(ndp->ni_dvp, dp, cnp, nc_generation); } } mounted_on_dp = dp; mounted_on_depth = 0; dont_cache_mp = 0; current_mount_generation = mount_generation; /* * Check to see if the vnode has been mounted on... * if so find the root of the mounted file system. */ check_mounted_on: if ((dp->v_type == VDIR) && dp->v_mountedhere && ((cnp->cn_flags & NOCROSSMOUNT) == 0)) { vnode_lock(dp); if ((dp->v_type == VDIR) && (mp = dp->v_mountedhere)) { struct uthread *uth = (struct uthread *)get_bsdthread_info(current_thread()); mp->mnt_crossref++; vnode_unlock(dp); if (vfs_busy(mp, vbusyflags)) { mount_dropcrossref(mp, dp, 0); if (vbusyflags == LK_NOWAIT) { error = ENOENT; goto bad2; } goto check_mounted_on; } /* * XXX - if this is the last component of the * pathname, and it's either not a lookup operation * or the NOTRIGGER flag is set for the operation, * set a uthread flag to let VFS_ROOT() for autofs * know it shouldn't trigger a mount. */ if ((cnp->cn_flags & ISLASTCN) && (cnp->cn_nameiop != LOOKUP || (cnp->cn_flags & NOTRIGGER))) { uth->uu_notrigger = 1; dont_cache_mp = 1; } error = VFS_ROOT(mp, &tdp, ctx); /* XXX - clear the uthread flag */ uth->uu_notrigger = 0; /* * mount_dropcrossref does a vnode_put * on dp if the 3rd arg is non-zero */ mount_dropcrossref(mp, dp, 1); dp = NULL; vfs_unbusy(mp); if (error) { goto bad2; } ndp->ni_vp = dp = tdp; mounted_on_depth++; goto check_mounted_on; } vnode_unlock(dp); } #if CONFIG_MACF if (vfs_flags(vnode_mount(dp)) & MNT_MULTILABEL) { error = vnode_label(vnode_mount(dp), NULL, dp, NULL, VNODE_LABEL_NEEDREF, ctx); if (error) goto bad2; } #endif if (mounted_on_depth && !dont_cache_mp) { mp = mounted_on_dp->v_mountedhere; if (mp) { mount_lock(mp); mp->mnt_realrootvp_vid = dp->v_id; mp->mnt_realrootvp = dp; mp->mnt_generation = current_mount_generation; mount_unlock(mp); } } /* * Check for symbolic link */ if ((dp->v_type == VLNK) && ((cnp->cn_flags & FOLLOW) || trailing_slash || *ndp->ni_next == '/')) { cnp->cn_flags |= ISSYMLINK; return (0); } /* * Check for bogus trailing slashes. */ if (trailing_slash) { if (dp->v_type != VDIR) { error = ENOTDIR; goto bad2; } trailing_slash = 0; } nextname: /* * Not a symbolic link. If more pathname, * continue at next component, else return. */ if (*ndp->ni_next == '/') { cnp->cn_nameptr = ndp->ni_next + 1; ndp->ni_pathlen--; while (*cnp->cn_nameptr == '/') { cnp->cn_nameptr++; ndp->ni_pathlen--; } vnode_put(ndp->ni_dvp); cp = cnp->cn_nameptr; if (*cp == '\0') goto emptyname; /* * cache_lookup_path is now responsible for dropping io ref on dp * when it is called again in the dirloop. This ensures we hold * a ref on dp until we complete the next round of lookup. */ last_dp = dp; goto dirloop; } /* * Disallow directory write attempts on read-only file systems. */ if (rdonly && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { error = EROFS; goto bad2; } if (cnp->cn_flags & SAVESTART) { /* * note that we already hold a reference * on both dp and ni_dvp, but for some reason * can't get another one... in this case we * need to do vnode_put on dp in 'bad2' */ if ( (vnode_get(ndp->ni_dvp)) ) { error = ENOENT; goto bad2; } ndp->ni_startdir = ndp->ni_dvp; } if (!wantparent && ndp->ni_dvp) { vnode_put(ndp->ni_dvp); ndp->ni_dvp = NULLVP; } if (cnp->cn_flags & AUDITVNPATH1) AUDIT_ARG(vnpath, dp, ARG_VNODE1); else if (cnp->cn_flags & AUDITVNPATH2) AUDIT_ARG(vnpath, dp, ARG_VNODE2); #if NAMEDRSRCFORK /* * Caller wants the resource fork. */ if ((cnp->cn_flags & CN_WANTSRSRCFORK) && (dp != NULLVP)) { vnode_t svp = NULLVP; enum nsoperation nsop; if (dp->v_type != VREG) { error = ENOENT; goto bad2; } switch (cnp->cn_nameiop) { case DELETE: nsop = NS_DELETE; break; case CREATE: nsop = NS_CREATE; break; case LOOKUP: /* Make sure our lookup of "/..namedfork/rsrc" is allowed. */ if (cnp->cn_flags & CN_ALLOWRSRCFORK) { nsop = NS_OPEN; } else { error = EPERM; goto bad2; } break; default: error = EPERM; goto bad2; } /* Ask the file system for the resource fork. */ error = vnode_getnamedstream(dp, &svp, XATTR_RESOURCEFORK_NAME, nsop, 0, ctx); /* During a create, it OK for stream vnode to be missing. */ if (error == ENOATTR || error == ENOENT) { error = (nsop == NS_CREATE) ? 0 : ENOENT; } if (error) { goto bad2; } /* The "parent" of the stream is the file. */ if (wantparent) { if (ndp->ni_dvp) { if (ndp->ni_cnd.cn_flags & FSNODELOCKHELD) { ndp->ni_cnd.cn_flags &= ~FSNODELOCKHELD; unlock_fsnode(ndp->ni_dvp, NULL); } vnode_put(ndp->ni_dvp); } ndp->ni_dvp = dp; } else { vnode_put(dp); } ndp->ni_vp = dp = svp; /* on create this may be null */ /* Restore the truncated pathname buffer (for audits). */ if (ndp->ni_pathlen == 1 && ndp->ni_next[0] == '\0') { ndp->ni_next[0] = '/'; } cnp->cn_flags &= ~MAKEENTRY; } #endif if (kdebug_enable) kdebug_lookup(dp, cnp); return (0); emptyname: cnp->cn_namelen = 0; /* * A degenerate name (e.g. / or "") which is a way of * talking about a directory, e.g. like "/." or ".". */ if (dp->v_type != VDIR) { error = ENOTDIR; goto bad; } if (cnp->cn_nameiop != LOOKUP) { error = EISDIR; goto bad; } if (wantparent) { /* * note that we already hold a reference * on dp, but for some reason can't * get another one... in this case we * need to do vnode_put on dp in 'bad' */ if ( (vnode_get(dp)) ) { error = ENOENT; goto bad; } ndp->ni_dvp = dp; } cnp->cn_flags &= ~ISDOTDOT; cnp->cn_flags |= ISLASTCN; ndp->ni_next = cp; ndp->ni_vp = dp; if (cnp->cn_flags & AUDITVNPATH1) AUDIT_ARG(vnpath, dp, ARG_VNODE1); else if (cnp->cn_flags & AUDITVNPATH2) AUDIT_ARG(vnpath, dp, ARG_VNODE2); if (cnp->cn_flags & SAVESTART) panic("lookup: SAVESTART"); return (0); bad2: if ((cnp->cn_flags & FSNODELOCKHELD)) { cnp->cn_flags &= ~FSNODELOCKHELD; unlock_fsnode(ndp->ni_dvp, NULL); } if (ndp->ni_dvp) vnode_put(ndp->ni_dvp); if (dp) vnode_put(dp); ndp->ni_vp = NULLVP; if (kdebug_enable) kdebug_lookup(dp, cnp); return (error); bad: if ((cnp->cn_flags & FSNODELOCKHELD)) { cnp->cn_flags &= ~FSNODELOCKHELD; unlock_fsnode(ndp->ni_dvp, NULL); } if (dp) vnode_put(dp); ndp->ni_vp = NULLVP; if (kdebug_enable) kdebug_lookup(dp, cnp); return (error); }