int lookup_authorize_search(vnode_t dp, struct componentname *cnp, int dp_authorized_in_cache, vfs_context_t ctx) { int error; if (!dp_authorized_in_cache) { error = vnode_authorize(dp, NULL, KAUTH_VNODE_SEARCH, ctx); if (error) return error; } #if CONFIG_MACF error = mac_vnode_check_lookup(ctx, dp, cnp); if (error) return error; #endif /* CONFIG_MACF */ return 0; }
/* * This routine exists to support the load_dylinker(). * * This routine has its own, separate, understanding of the FAT file format, * which is terrifically unfortunate. */ static load_return_t get_macho_vnode( char *path, integer_t archbits, struct mach_header *mach_header, off_t *file_offset, off_t *macho_size, struct vnode **vpp ) { struct vnode *vp; vfs_context_t ctx = vfs_context_current(); proc_t p = vfs_context_proc(ctx); kauth_cred_t kerncred; struct nameidata nid, *ndp; boolean_t is_fat; struct fat_arch fat_arch; int error = LOAD_SUCCESS; int resid; union { struct mach_header mach_header; struct fat_header fat_header; char pad[512]; } header; off_t fsize = (off_t)0; int err2; /* * Capture the kernel credential for use in the actual read of the * file, since the user doing the execution may have execute rights * but not read rights, but to exec something, we have to either map * or read it into the new process address space, which requires * read rights. This is to deal with lack of common credential * serialization code which would treat NOCRED as "serialize 'root'". */ kerncred = vfs_context_ucred(vfs_context_kernel()); ndp = &nid; /* init the namei data to point the file user's program name */ NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(path), ctx); if ((error = namei(ndp)) != 0) { if (error == ENOENT) { error = LOAD_ENOENT; } else { error = LOAD_FAILURE; } return(error); } nameidone(ndp); vp = ndp->ni_vp; /* check for regular file */ if (vp->v_type != VREG) { error = LOAD_PROTECT; goto bad1; } /* get size */ if ((error = vnode_size(vp, &fsize, ctx)) != 0) { error = LOAD_FAILURE; goto bad1; } /* Check mount point */ if (vp->v_mount->mnt_flag & MNT_NOEXEC) { error = LOAD_PROTECT; goto bad1; } /* check access */ if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_EXECUTE, ctx)) != 0) { error = LOAD_PROTECT; goto bad1; } /* try to open it */ if ((error = VNOP_OPEN(vp, FREAD, ctx)) != 0) { error = LOAD_PROTECT; goto bad1; } if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)&header, sizeof(header), 0, UIO_SYSSPACE32, IO_NODELOCKED, kerncred, &resid, p)) != 0) { error = LOAD_IOERROR; goto bad2; } if (header.mach_header.magic == MH_MAGIC || header.mach_header.magic == MH_MAGIC_64) is_fat = FALSE; else if (header.fat_header.magic == FAT_MAGIC || header.fat_header.magic == FAT_CIGAM) is_fat = TRUE; else { error = LOAD_BADMACHO; goto bad2; } if (is_fat) { /* Look up our architecture in the fat file. */ error = fatfile_getarch_with_bits(vp, archbits, (vm_offset_t)(&header.fat_header), &fat_arch); if (error != LOAD_SUCCESS) goto bad2; /* Read the Mach-O header out of it */ error = vn_rdwr(UIO_READ, vp, (caddr_t)&header.mach_header, sizeof(header.mach_header), fat_arch.offset, UIO_SYSSPACE32, IO_NODELOCKED, kerncred, &resid, p); if (error) { error = LOAD_IOERROR; goto bad2; } /* Is this really a Mach-O? */ if (header.mach_header.magic != MH_MAGIC && header.mach_header.magic != MH_MAGIC_64) { error = LOAD_BADMACHO; goto bad2; } *file_offset = fat_arch.offset; *macho_size = fat_arch.size; } else { /* * Force get_macho_vnode() to fail if the architecture bits * do not match the expected architecture bits. This in * turn causes load_dylinker() to fail for the same reason, * so it ensures the dynamic linker and the binary are in * lock-step. This is potentially bad, if we ever add to * the CPU_ARCH_* bits any bits that are desirable but not * required, since the dynamic linker might work, but we will * refuse to load it because of this check. */ if ((cpu_type_t)(header.mach_header.cputype & CPU_ARCH_MASK) != archbits) return(LOAD_BADARCH); *file_offset = 0; *macho_size = fsize; } *mach_header = header.mach_header; *vpp = vp; ubc_setsize(vp, fsize); return (error); bad2: err2 = VNOP_CLOSE(vp, FREAD, ctx); vnode_put(vp); return (error); bad1: vnode_put(vp); return(error); }
/* * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct * XXX usage is PROT_* from an interface perspective. Thus the values of * XXX VM_PROT_* and PROT_* need to correspond. */ int mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) { /* * Map in special device (must be SHARED) or file */ struct fileproc *fp; register struct vnode *vp; int flags; int prot, file_prot; int err=0; vm_map_t user_map; kern_return_t result; mach_vm_offset_t user_addr; mach_vm_size_t user_size; vm_object_offset_t pageoff; vm_object_offset_t file_pos; int alloc_flags=0; boolean_t docow; vm_prot_t maxprot; void *handle; vm_pager_t pager; int mapanon=0; int fpref=0; int error =0; int fd = uap->fd; user_addr = (mach_vm_offset_t)uap->addr; user_size = (mach_vm_size_t) uap->len; AUDIT_ARG(addr, user_addr); AUDIT_ARG(len, user_size); AUDIT_ARG(fd, uap->fd); prot = (uap->prot & VM_PROT_ALL); #if 3777787 /* * Since the hardware currently does not support writing without * read-before-write, or execution-without-read, if the request is * for write or execute access, we must imply read access as well; * otherwise programs expecting this to work will fail to operate. */ if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) prot |= VM_PROT_READ; #endif /* radar 3777787 */ flags = uap->flags; vp = NULLVP; /* * The vm code does not have prototypes & compiler doesn't do the' * the right thing when you cast 64bit value and pass it in function * call. So here it is. */ file_pos = (vm_object_offset_t)uap->pos; /* make sure mapping fits into numeric range etc */ if (file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64) return (EINVAL); /* * Align the file position to a page boundary, * and save its page offset component. */ pageoff = (file_pos & PAGE_MASK); file_pos -= (vm_object_offset_t)pageoff; /* Adjust size for rounding (on both ends). */ user_size += pageoff; /* low end... */ user_size = mach_vm_round_page(user_size); /* hi end */ /* * Check for illegal addresses. Watch out for address wrap... Note * that VM_*_ADDRESS are not constants due to casts (argh). */ if (flags & MAP_FIXED) { /* * The specified address must have the same remainder * as the file offset taken modulo PAGE_SIZE, so it * should be aligned after adjustment by pageoff. */ user_addr -= pageoff; if (user_addr & PAGE_MASK) return (EINVAL); } #ifdef notyet /* DO not have apis to get this info, need to wait till then*/ /* * XXX for non-fixed mappings where no hint is provided or * the hint would fall in the potential heap space, * place it after the end of the largest possible heap. * * There should really be a pmap call to determine a reasonable * location. */ else if (addr < mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ)) addr = mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ); #endif alloc_flags = 0; if (flags & MAP_ANON) { /* * Mapping blank space is trivial. Use positive fds as the alias * value for memory tracking. */ if (fd != -1) { /* * Use "fd" to pass (some) Mach VM allocation flags, * (see the VM_FLAGS_* definitions). */ alloc_flags = fd & (VM_FLAGS_ALIAS_MASK | VM_FLAGS_PURGABLE); if (alloc_flags != fd) { /* reject if there are any extra flags */ return EINVAL; } } handle = NULL; maxprot = VM_PROT_ALL; file_pos = 0; mapanon = 1; } else { struct vnode_attr va; vfs_context_t ctx = vfs_context_current(); /* * Mapping file, get fp for validation. Obtain vnode and make * sure it is of appropriate type. */ err = fp_lookup(p, fd, &fp, 0); if (err) return(err); fpref = 1; if(fp->f_fglob->fg_type == DTYPE_PSXSHM) { uap->addr = (user_addr_t)user_addr; uap->len = (user_size_t)user_size; uap->prot = prot; uap->flags = flags; uap->pos = file_pos; error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff); goto bad; } if (fp->f_fglob->fg_type != DTYPE_VNODE) { error = EINVAL; goto bad; } vp = (struct vnode *)fp->f_fglob->fg_data; error = vnode_getwithref(vp); if(error != 0) goto bad; if (vp->v_type != VREG && vp->v_type != VCHR) { (void)vnode_put(vp); error = EINVAL; goto bad; } AUDIT_ARG(vnpath, vp, ARG_VNODE1); /* * POSIX: mmap needs to update access time for mapped files */ if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) { VATTR_INIT(&va); nanotime(&va.va_access_time); VATTR_SET_ACTIVE(&va, va_access_time); vnode_setattr(vp, &va, ctx); } /* * XXX hack to handle use of /dev/zero to map anon memory (ala * SunOS). */ if (vp->v_type == VCHR || vp->v_type == VSTR) { (void)vnode_put(vp); error = ENODEV; goto bad; } else { /* * Ensure that file and memory protections are * compatible. Note that we only worry about * writability if mapping is shared; in this case, * current and max prot are dictated by the open file. * XXX use the vnode instead? Problem is: what * credentials do we use for determination? What if * proc does a setuid? */ maxprot = VM_PROT_EXECUTE; /* ??? */ if (fp->f_fglob->fg_flag & FREAD) maxprot |= VM_PROT_READ; else if (prot & PROT_READ) { (void)vnode_put(vp); error = EACCES; goto bad; } /* * If we are sharing potential changes (either via * MAP_SHARED or via the implicit sharing of character * device mappings), and we are trying to get write * permission although we opened it without asking * for it, bail out. */ if ((flags & MAP_SHARED) != 0) { if ((fp->f_fglob->fg_flag & FWRITE) != 0) { /* * check for write access * * Note that we already made this check when granting FWRITE * against the file, so it seems redundant here. */ error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx); /* if not granted for any reason, but we wanted it, bad */ if ((prot & PROT_WRITE) && (error != 0)) { vnode_put(vp); goto bad; } /* if writable, remember */ if (error == 0) maxprot |= VM_PROT_WRITE; } else if ((prot & PROT_WRITE) != 0) { (void)vnode_put(vp); error = EACCES; goto bad; } } else maxprot |= VM_PROT_WRITE; handle = (void *)vp; #if CONFIG_MACF error = mac_file_check_mmap(vfs_context_ucred(ctx), fp->f_fglob, prot, flags, &maxprot); if (error) { (void)vnode_put(vp); goto bad; } #endif /* MAC */ } } if (user_size == 0) { if (!mapanon) (void)vnode_put(vp); error = 0; goto bad; } /* * We bend a little - round the start and end addresses * to the nearest page boundary. */ user_size = mach_vm_round_page(user_size); if (file_pos & PAGE_MASK_64) { if (!mapanon) (void)vnode_put(vp); error = EINVAL; goto bad; } user_map = current_map(); if ((flags & MAP_FIXED) == 0) { alloc_flags |= VM_FLAGS_ANYWHERE; user_addr = mach_vm_round_page(user_addr); } else { if (user_addr != mach_vm_trunc_page(user_addr)) { if (!mapanon) (void)vnode_put(vp); error = EINVAL; goto bad; } /* * mmap(MAP_FIXED) will replace any existing mappings in the * specified range, if the new mapping is successful. * If we just deallocate the specified address range here, * another thread might jump in and allocate memory in that * range before we get a chance to establish the new mapping, * and we won't have a chance to restore the old mappings. * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it * has to deallocate the existing mappings and establish the * new ones atomically. */ alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE; } if (flags & MAP_NOCACHE) alloc_flags |= VM_FLAGS_NO_CACHE; /* * Lookup/allocate object. */ if (handle == NULL) { pager = NULL; #ifdef notyet /* Hmm .. */ #if defined(VM_PROT_READ_IS_EXEC) if (prot & VM_PROT_READ) prot |= VM_PROT_EXECUTE; if (maxprot & VM_PROT_READ) maxprot |= VM_PROT_EXECUTE; #endif #endif #if 3777787 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) prot |= VM_PROT_READ; if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) maxprot |= VM_PROT_READ; #endif /* radar 3777787 */ result = vm_map_enter_mem_object(user_map, &user_addr, user_size, 0, alloc_flags, IPC_PORT_NULL, 0, FALSE, prot, maxprot, (flags & MAP_SHARED) ? VM_INHERIT_SHARE : VM_INHERIT_DEFAULT); if (result != KERN_SUCCESS) goto out; } else { pager = (vm_pager_t)ubc_getpager(vp); if (pager == NULL) { (void)vnode_put(vp); error = ENOMEM; goto bad; } /* * Set credentials: * FIXME: if we're writing the file we need a way to * ensure that someone doesn't replace our R/W creds * with ones that only work for read. */ ubc_setthreadcred(vp, p, current_thread()); docow = FALSE; if ((flags & (MAP_ANON|MAP_SHARED)) == 0) { docow = TRUE; } #ifdef notyet /* Hmm .. */ #if defined(VM_PROT_READ_IS_EXEC) if (prot & VM_PROT_READ) prot |= VM_PROT_EXECUTE; if (maxprot & VM_PROT_READ) maxprot |= VM_PROT_EXECUTE; #endif #endif /* notyet */ #if 3777787 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) prot |= VM_PROT_READ; if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) maxprot |= VM_PROT_READ; #endif /* radar 3777787 */ result = vm_map_enter_mem_object(user_map, &user_addr, user_size, 0, alloc_flags, (ipc_port_t)pager, file_pos, docow, prot, maxprot, (flags & MAP_SHARED) ? VM_INHERIT_SHARE : VM_INHERIT_DEFAULT); if (result != KERN_SUCCESS) { (void)vnode_put(vp); goto out; } file_prot = prot & (PROT_READ | PROT_WRITE | PROT_EXEC); if (docow) { /* private mapping: won't write to the file */ file_prot &= ~PROT_WRITE; } (void) ubc_map(vp, file_prot); } if (!mapanon) (void)vnode_put(vp); out: switch (result) { case KERN_SUCCESS: *retval = user_addr + pageoff; error = 0; break; case KERN_INVALID_ADDRESS: case KERN_NO_SPACE: error = ENOMEM; break; case KERN_PROTECTION_FAILURE: error = EACCES; break; default: error = EINVAL; break; } bad: if (fpref) fp_drop(p, fd, fp, 0); KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0); KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32), (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0); return(error); }
/* * Search a pathname. * This is a very central and rather complicated routine. * * The pathname is pointed to by ni_ptr and is of length ni_pathlen. * The starting directory is taken from ni_startdir. The pathname is * descended until done, or a symbolic link is encountered. The variable * ni_more is clear if the path is completed; it is set to one if a * symbolic link needing interpretation is encountered. * * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on * whether the name is to be looked up, created, renamed, or deleted. * When CREATE, RENAME, or DELETE is specified, information usable in * creating, renaming, or deleting a directory entry may be calculated. * If flag has LOCKPARENT or'ed into it, the parent directory is returned * locked. If flag has WANTPARENT or'ed into it, the parent directory is * returned unlocked. Otherwise the parent directory is not returned. If * the target of the pathname exists and LOCKLEAF is or'ed into the flag * the target is returned locked, otherwise it is returned unlocked. * When creating or renaming and LOCKPARENT is specified, the target may not * be ".". When deleting and LOCKPARENT is specified, the target may be ".". * * Overall outline of lookup: * * dirloop: * identify next component of name at ndp->ni_ptr * handle degenerate case where name is null string * if .. and crossing mount points and on mounted filesys, find parent * call VNOP_LOOKUP routine for next component name * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set * component vnode returned in ni_vp (if it exists), locked. * if result vnode is mounted on and crossing mount points, * find mounted on vnode * if more components of name, do next level at dirloop * return the answer in ni_vp, locked if LOCKLEAF set * if LOCKPARENT set, return locked parent in ni_dvp * if WANTPARENT set, return unlocked parent in ni_dvp * * Returns: 0 Success * ENOENT No such file or directory * EBADF Bad file descriptor * ENOTDIR Not a directory * EROFS Read-only file system [CREATE] * EISDIR Is a directory [CREATE] * cache_lookup_path:ERECYCLE (vnode was recycled from underneath us, redrive lookup again) * vnode_authorize:EROFS * vnode_authorize:EACCES * vnode_authorize:EPERM * vnode_authorize:??? * VNOP_LOOKUP:ENOENT No such file or directory * VNOP_LOOKUP:EJUSTRETURN Restart system call (INTERNAL) * VNOP_LOOKUP:??? * VFS_ROOT:ENOTSUP * VFS_ROOT:ENOENT * VFS_ROOT:??? */ int lookup(struct nameidata *ndp) { char *cp; /* pointer into pathname argument */ vnode_t tdp; /* saved dp */ vnode_t dp; /* the directory we are searching */ mount_t mp; /* mount table entry */ int docache = 1; /* == 0 do not cache last component */ int wantparent; /* 1 => wantparent or lockparent flag */ int rdonly; /* lookup read-only flag bit */ int trailing_slash = 0; int dp_authorized = 0; int error = 0; struct componentname *cnp = &ndp->ni_cnd; vfs_context_t ctx = cnp->cn_context; int mounted_on_depth = 0; int dont_cache_mp = 0; vnode_t mounted_on_dp = NULLVP; int current_mount_generation = 0; int vbusyflags = 0; int nc_generation = 0; vnode_t last_dp = NULLVP; /* * Setup: break out flag bits into variables. */ if (cnp->cn_flags & (NOCACHE | DOWHITEOUT)) { if ((cnp->cn_flags & NOCACHE) || (cnp->cn_nameiop == DELETE)) docache = 0; } wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); rdonly = cnp->cn_flags & RDONLY; cnp->cn_flags &= ~ISSYMLINK; cnp->cn_consume = 0; dp = ndp->ni_startdir; ndp->ni_startdir = NULLVP; if ((cnp->cn_flags & CN_NBMOUNTLOOK) != 0) vbusyflags = LK_NOWAIT; cp = cnp->cn_nameptr; if (*cp == '\0') { if ( (vnode_getwithref(dp)) ) { dp = NULLVP; error = ENOENT; goto bad; } goto emptyname; } dirloop: ndp->ni_vp = NULLVP; if ( (error = cache_lookup_path(ndp, cnp, dp, ctx, &trailing_slash, &dp_authorized, last_dp)) ) { dp = NULLVP; goto bad; } if ((cnp->cn_flags & ISLASTCN)) { if (docache) cnp->cn_flags |= MAKEENTRY; } else cnp->cn_flags |= MAKEENTRY; dp = ndp->ni_dvp; if (ndp->ni_vp != NULLVP) { /* * cache_lookup_path returned a non-NULL ni_vp then, * we're guaranteed that the dp is a VDIR, it's * been authorized, and vp is not ".." * * make sure we don't try to enter the name back into * the cache if this vp is purged before we get to that * check since we won't have serialized behind whatever * activity is occurring in the FS that caused the purge */ if (dp != NULLVP) nc_generation = dp->v_nc_generation - 1; goto returned_from_lookup_path; } /* * Handle "..": two special cases. * 1. If at root directory (e.g. after chroot) * or at absolute root directory * then ignore it so can't get out. * 2. If this vnode is the root of a mounted * filesystem, then replace it with the * vnode which was mounted on so we take the * .. in the other file system. */ if ( (cnp->cn_flags & ISDOTDOT) ) { for (;;) { if (dp == ndp->ni_rootdir || dp == rootvnode) { ndp->ni_dvp = dp; ndp->ni_vp = dp; /* * we're pinned at the root * we've already got one reference on 'dp' * courtesy of cache_lookup_path... take * another one for the ".." * if we fail to get the new reference, we'll * drop our original down in 'bad' */ if ( (vnode_get(dp)) ) { error = ENOENT; goto bad; } goto nextname; } if ((dp->v_flag & VROOT) == 0 || (cnp->cn_flags & NOCROSSMOUNT)) break; if (dp->v_mount == NULL) { /* forced umount */ error = EBADF; goto bad; } tdp = dp; dp = tdp->v_mount->mnt_vnodecovered; vnode_put(tdp); if ( (vnode_getwithref(dp)) ) { dp = NULLVP; error = ENOENT; goto bad; } ndp->ni_dvp = dp; dp_authorized = 0; } } /* * We now have a segment name to search for, and a directory to search. */ unionlookup: ndp->ni_vp = NULLVP; if (dp->v_type != VDIR) { error = ENOTDIR; goto lookup_error; } if ( (cnp->cn_flags & DONOTAUTH) != DONOTAUTH ) { if (!dp_authorized) { error = vnode_authorize(dp, NULL, KAUTH_VNODE_SEARCH, ctx); if (error) goto lookup_error; } #if CONFIG_MACF error = mac_vnode_check_lookup(ctx, dp, cnp); if (error) goto lookup_error; #endif /* CONFIG_MACF */ } nc_generation = dp->v_nc_generation; if ( (error = VNOP_LOOKUP(dp, &ndp->ni_vp, cnp, ctx)) ) { lookup_error: if ((error == ENOENT) && (dp->v_flag & VROOT) && (dp->v_mount != NULL) && (dp->v_mount->mnt_flag & MNT_UNION)) { if ((cnp->cn_flags & FSNODELOCKHELD)) { cnp->cn_flags &= ~FSNODELOCKHELD; unlock_fsnode(dp, NULL); } tdp = dp; dp = tdp->v_mount->mnt_vnodecovered; vnode_put(tdp); if ( (vnode_getwithref(dp)) ) { dp = NULLVP; error = ENOENT; goto bad; } ndp->ni_dvp = dp; dp_authorized = 0; goto unionlookup; } if (error != EJUSTRETURN) goto bad; if (ndp->ni_vp != NULLVP) panic("leaf should be empty"); /* * If creating and at end of pathname, then can consider * allowing file to be created. */ if (rdonly) { error = EROFS; goto bad; } if ((cnp->cn_flags & ISLASTCN) && trailing_slash && !(cnp->cn_flags & WILLBEDIR)) { error = ENOENT; goto bad; } /* * We return with ni_vp NULL to indicate that the entry * doesn't currently exist, leaving a pointer to the * referenced directory vnode in ndp->ni_dvp. */ if (cnp->cn_flags & SAVESTART) { if ( (vnode_get(ndp->ni_dvp)) ) { error = ENOENT; goto bad; } ndp->ni_startdir = ndp->ni_dvp; } if (!wantparent) vnode_put(ndp->ni_dvp); if (kdebug_enable) kdebug_lookup(ndp->ni_dvp, cnp); return (0); } returned_from_lookup_path: dp = ndp->ni_vp; /* * Take into account any additional components consumed by * the underlying filesystem. */ if (cnp->cn_consume > 0) { cnp->cn_nameptr += cnp->cn_consume; ndp->ni_next += cnp->cn_consume; ndp->ni_pathlen -= cnp->cn_consume; cnp->cn_consume = 0; } else { if (dp->v_name == NULL || dp->v_parent == NULLVP) { int isdot_or_dotdot; int update_flags = 0; isdot_or_dotdot = (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') || (cnp->cn_flags & ISDOTDOT); if (isdot_or_dotdot == 0) { if (dp->v_name == NULL) update_flags |= VNODE_UPDATE_NAME; if (ndp->ni_dvp != NULLVP && dp->v_parent == NULLVP) update_flags |= VNODE_UPDATE_PARENT; if (update_flags) vnode_update_identity(dp, ndp->ni_dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, update_flags); } } if ( (cnp->cn_flags & MAKEENTRY) && (dp->v_flag & VNCACHEABLE) && LIST_FIRST(&dp->v_nclinks) == NULL) { /* * missing from name cache, but should * be in it... this can happen if volfs * causes the vnode to be created or the * name cache entry got recycled but the * vnode didn't... * check to make sure that ni_dvp is valid * cache_lookup_path may return a NULL * do a quick check to see if the generation of the * directory matches our snapshot... this will get * rechecked behind the name cache lock, but if it * already fails to match, no need to go any further */ if (ndp->ni_dvp != NULLVP && (nc_generation == ndp->ni_dvp->v_nc_generation)) cache_enter_with_gen(ndp->ni_dvp, dp, cnp, nc_generation); } } mounted_on_dp = dp; mounted_on_depth = 0; dont_cache_mp = 0; current_mount_generation = mount_generation; /* * Check to see if the vnode has been mounted on... * if so find the root of the mounted file system. */ check_mounted_on: if ((dp->v_type == VDIR) && dp->v_mountedhere && ((cnp->cn_flags & NOCROSSMOUNT) == 0)) { vnode_lock(dp); if ((dp->v_type == VDIR) && (mp = dp->v_mountedhere)) { struct uthread *uth = (struct uthread *)get_bsdthread_info(current_thread()); mp->mnt_crossref++; vnode_unlock(dp); if (vfs_busy(mp, vbusyflags)) { mount_dropcrossref(mp, dp, 0); if (vbusyflags == LK_NOWAIT) { error = ENOENT; goto bad2; } goto check_mounted_on; } /* * XXX - if this is the last component of the * pathname, and it's either not a lookup operation * or the NOTRIGGER flag is set for the operation, * set a uthread flag to let VFS_ROOT() for autofs * know it shouldn't trigger a mount. */ if ((cnp->cn_flags & ISLASTCN) && (cnp->cn_nameiop != LOOKUP || (cnp->cn_flags & NOTRIGGER))) { uth->uu_notrigger = 1; dont_cache_mp = 1; } error = VFS_ROOT(mp, &tdp, ctx); /* XXX - clear the uthread flag */ uth->uu_notrigger = 0; /* * mount_dropcrossref does a vnode_put * on dp if the 3rd arg is non-zero */ mount_dropcrossref(mp, dp, 1); dp = NULL; vfs_unbusy(mp); if (error) { goto bad2; } ndp->ni_vp = dp = tdp; mounted_on_depth++; goto check_mounted_on; } vnode_unlock(dp); } #if CONFIG_MACF if (vfs_flags(vnode_mount(dp)) & MNT_MULTILABEL) { error = vnode_label(vnode_mount(dp), NULL, dp, NULL, VNODE_LABEL_NEEDREF, ctx); if (error) goto bad2; } #endif if (mounted_on_depth && !dont_cache_mp) { mp = mounted_on_dp->v_mountedhere; if (mp) { mount_lock(mp); mp->mnt_realrootvp_vid = dp->v_id; mp->mnt_realrootvp = dp; mp->mnt_generation = current_mount_generation; mount_unlock(mp); } } /* * Check for symbolic link */ if ((dp->v_type == VLNK) && ((cnp->cn_flags & FOLLOW) || trailing_slash || *ndp->ni_next == '/')) { cnp->cn_flags |= ISSYMLINK; return (0); } /* * Check for bogus trailing slashes. */ if (trailing_slash) { if (dp->v_type != VDIR) { error = ENOTDIR; goto bad2; } trailing_slash = 0; } nextname: /* * Not a symbolic link. If more pathname, * continue at next component, else return. */ if (*ndp->ni_next == '/') { cnp->cn_nameptr = ndp->ni_next + 1; ndp->ni_pathlen--; while (*cnp->cn_nameptr == '/') { cnp->cn_nameptr++; ndp->ni_pathlen--; } vnode_put(ndp->ni_dvp); cp = cnp->cn_nameptr; if (*cp == '\0') goto emptyname; /* * cache_lookup_path is now responsible for dropping io ref on dp * when it is called again in the dirloop. This ensures we hold * a ref on dp until we complete the next round of lookup. */ last_dp = dp; goto dirloop; } /* * Disallow directory write attempts on read-only file systems. */ if (rdonly && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { error = EROFS; goto bad2; } if (cnp->cn_flags & SAVESTART) { /* * note that we already hold a reference * on both dp and ni_dvp, but for some reason * can't get another one... in this case we * need to do vnode_put on dp in 'bad2' */ if ( (vnode_get(ndp->ni_dvp)) ) { error = ENOENT; goto bad2; } ndp->ni_startdir = ndp->ni_dvp; } if (!wantparent && ndp->ni_dvp) { vnode_put(ndp->ni_dvp); ndp->ni_dvp = NULLVP; } if (cnp->cn_flags & AUDITVNPATH1) AUDIT_ARG(vnpath, dp, ARG_VNODE1); else if (cnp->cn_flags & AUDITVNPATH2) AUDIT_ARG(vnpath, dp, ARG_VNODE2); #if NAMEDRSRCFORK /* * Caller wants the resource fork. */ if ((cnp->cn_flags & CN_WANTSRSRCFORK) && (dp != NULLVP)) { vnode_t svp = NULLVP; enum nsoperation nsop; if (dp->v_type != VREG) { error = ENOENT; goto bad2; } switch (cnp->cn_nameiop) { case DELETE: nsop = NS_DELETE; break; case CREATE: nsop = NS_CREATE; break; case LOOKUP: /* Make sure our lookup of "/..namedfork/rsrc" is allowed. */ if (cnp->cn_flags & CN_ALLOWRSRCFORK) { nsop = NS_OPEN; } else { error = EPERM; goto bad2; } break; default: error = EPERM; goto bad2; } /* Ask the file system for the resource fork. */ error = vnode_getnamedstream(dp, &svp, XATTR_RESOURCEFORK_NAME, nsop, 0, ctx); /* During a create, it OK for stream vnode to be missing. */ if (error == ENOATTR || error == ENOENT) { error = (nsop == NS_CREATE) ? 0 : ENOENT; } if (error) { goto bad2; } /* The "parent" of the stream is the file. */ if (wantparent) { if (ndp->ni_dvp) { if (ndp->ni_cnd.cn_flags & FSNODELOCKHELD) { ndp->ni_cnd.cn_flags &= ~FSNODELOCKHELD; unlock_fsnode(ndp->ni_dvp, NULL); } vnode_put(ndp->ni_dvp); } ndp->ni_dvp = dp; } else { vnode_put(dp); } ndp->ni_vp = dp = svp; /* on create this may be null */ /* Restore the truncated pathname buffer (for audits). */ if (ndp->ni_pathlen == 1 && ndp->ni_next[0] == '\0') { ndp->ni_next[0] = '/'; } cnp->cn_flags &= ~MAKEENTRY; } #endif if (kdebug_enable) kdebug_lookup(dp, cnp); return (0); emptyname: cnp->cn_namelen = 0; /* * A degenerate name (e.g. / or "") which is a way of * talking about a directory, e.g. like "/." or ".". */ if (dp->v_type != VDIR) { error = ENOTDIR; goto bad; } if (cnp->cn_nameiop != LOOKUP) { error = EISDIR; goto bad; } if (wantparent) { /* * note that we already hold a reference * on dp, but for some reason can't * get another one... in this case we * need to do vnode_put on dp in 'bad' */ if ( (vnode_get(dp)) ) { error = ENOENT; goto bad; } ndp->ni_dvp = dp; } cnp->cn_flags &= ~ISDOTDOT; cnp->cn_flags |= ISLASTCN; ndp->ni_next = cp; ndp->ni_vp = dp; if (cnp->cn_flags & AUDITVNPATH1) AUDIT_ARG(vnpath, dp, ARG_VNODE1); else if (cnp->cn_flags & AUDITVNPATH2) AUDIT_ARG(vnpath, dp, ARG_VNODE2); if (cnp->cn_flags & SAVESTART) panic("lookup: SAVESTART"); return (0); bad2: if ((cnp->cn_flags & FSNODELOCKHELD)) { cnp->cn_flags &= ~FSNODELOCKHELD; unlock_fsnode(ndp->ni_dvp, NULL); } if (ndp->ni_dvp) vnode_put(ndp->ni_dvp); if (dp) vnode_put(dp); ndp->ni_vp = NULLVP; if (kdebug_enable) kdebug_lookup(dp, cnp); return (error); bad: if ((cnp->cn_flags & FSNODELOCKHELD)) { cnp->cn_flags &= ~FSNODELOCKHELD; unlock_fsnode(ndp->ni_dvp, NULL); } if (dp) vnode_put(dp); ndp->ni_vp = NULLVP; if (kdebug_enable) kdebug_lookup(dp, cnp); return (error); }