int imageboot_setup() { dev_t dev; int error = 0; char *root_path = NULL; DBG_TRACE("%s: entry\n", __FUNCTION__); MALLOC_ZONE(root_path, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); if (root_path == NULL) return (ENOMEM); if(PE_parse_boot_argn("rp", root_path, MAXPATHLEN) == FALSE) { error = ENOENT; goto done; } printf("%s: root image url is %s\n", __FUNCTION__, root_path); error = di_root_image(root_path, rootdevice, &dev); if(error) { printf("%s: di_root_image failed: %d\n", __FUNCTION__, error); goto done; } rootdev = dev; mountroot = NULL; printf("%s: root device 0x%x\n", __FUNCTION__, rootdev); error = vfs_mountroot(); if (error == 0 && rootvnode != NULL) { struct vnode *tvp; struct vnode *newdp; /* * Get the vnode for '/'. * Set fdp->fd_fd.fd_cdir to reference it. */ if (VFS_ROOT(TAILQ_LAST(&mountlist,mntlist), &newdp, vfs_context_kernel())) panic("%s: cannot find root vnode", __FUNCTION__); vnode_ref(newdp); vnode_put(newdp); tvp = rootvnode; vnode_rele(tvp); filedesc0.fd_cdir = newdp; rootvnode = newdp; mount_list_lock(); TAILQ_REMOVE(&mountlist, TAILQ_FIRST(&mountlist), mnt_list); mount_list_unlock(); mountlist.tqh_first->mnt_flag |= MNT_ROOTFS; DBG_TRACE("%s: root switched\n", __FUNCTION__); } done: FREE_ZONE(root_path, MAXPATHLEN, M_NAMEI); DBG_TRACE("%s: exit\n", __FUNCTION__); return (error); }
static int cp_lock_vfs_callback(mount_t mp, void *arg) { VFS_IOCTL(mp, FIODEVICELOCKED, arg, 0, vfs_context_kernel()); return 0; }
int vm_record_file_write(vnode_t vp, uint64_t offset, char *buf, int size) { int error = 0; vfs_context_t ctx; ctx = vfs_context_kernel(); error = vn_rdwr(UIO_WRITE, vp, (caddr_t)buf, size, offset, UIO_SYSSPACE, IO_NODELOCKED, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); return (error); }
u_int32_t vnode_trim_list (vnode_t vp, struct trim_list *tl, boolean_t route_only) { int error = 0; int trim_index = 0; u_int32_t blocksize = 0; struct vnode *devvp; dk_extent_t *extents; dk_unmap_t unmap; _dk_cs_unmap_t cs_unmap; if ( !(vp->v_mount->mnt_ioflags & MNT_IOFLAGS_UNMAP_SUPPORTED)) return (ENOTSUP); if (tl == NULL) return (0); /* * Get the underlying device vnode and physical block size */ devvp = vp->v_mount->mnt_devvp; blocksize = vp->v_mount->mnt_devblocksize; extents = kalloc(sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM); if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) { memset (&cs_unmap, 0, sizeof(_dk_cs_unmap_t)); cs_unmap.extents = extents; if (route_only == TRUE) cs_unmap.options = ROUTE_ONLY; } else { memset (&unmap, 0, sizeof(dk_unmap_t)); unmap.extents = extents; } while (tl) { daddr64_t io_blockno; /* Block number corresponding to the start of the extent */ size_t io_bytecount; /* Number of bytes in current extent for the specified range */ size_t trimmed; size_t remaining_length; off_t current_offset; current_offset = tl->tl_offset; remaining_length = tl->tl_length; trimmed = 0; /* * We may not get the entire range from tl_offset -> tl_offset+tl_length in a single * extent from the blockmap call. Keep looping/going until we are sure we've hit * the whole range or if we encounter an error. */ while (trimmed < tl->tl_length) { /* * VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the * specified offset. It returns blocks in contiguous chunks, so if the logical range is * broken into multiple extents, it must be called multiple times, increasing the offset * in each call to ensure that the entire range is covered. */ error = VNOP_BLOCKMAP (vp, current_offset, remaining_length, &io_blockno, &io_bytecount, NULL, VNODE_READ, NULL); if (error) { goto trim_exit; } extents[trim_index].offset = (uint64_t) io_blockno * (u_int64_t) blocksize; extents[trim_index].length = io_bytecount; trim_index++; if (trim_index == MAX_BATCH_TO_TRIM) { if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) { cs_unmap.extentsCount = trim_index; error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel()); } else { unmap.extentsCount = trim_index; error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); } if (error) { goto trim_exit; } trim_index = 0; } trimmed += io_bytecount; current_offset += io_bytecount; remaining_length -= io_bytecount; } tl = tl->tl_next; } if (trim_index) { if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) { cs_unmap.extentsCount = trim_index; error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel()); } else { unmap.extentsCount = trim_index; error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); } } trim_exit: kfree(extents, sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM); return error; }
int vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags) { int error = 0; uint64_t io_size = npages * PAGE_SIZE_64; #if 1 kern_return_t kr = KERN_SUCCESS; upl_t upl = NULL; unsigned int count = 0; upl_control_flags_t upl_create_flags = 0; int upl_control_flags = 0; upl_size_t upl_size = 0; upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE | UPL_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK); #if ENCRYPTED_SWAP upl_control_flags = UPL_IOSYNC | UPL_PAGING_ENCRYPTED; #else upl_control_flags = UPL_IOSYNC; #endif if ((flags & SWAP_READ) == FALSE) { upl_create_flags |= UPL_COPYOUT_FROM; } upl_size = io_size; kr = vm_map_create_upl( kernel_map, start, &upl_size, &upl, NULL, &count, &upl_create_flags); if (kr != KERN_SUCCESS || (upl_size != io_size)) { panic("vm_map_create_upl failed with %d\n", kr); } if (flags & SWAP_READ) { vnode_pagein(vp, upl, 0, offset, io_size, upl_control_flags | UPL_IGNORE_VALID_PAGE_CHECK, &error); if (error) { #if DEBUG printf("vm_swapfile_io: vnode_pagein failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size); #else /* DEBUG */ printf("vm_swapfile_io: vnode_pagein failed with %d.\n", error); #endif /* DEBUG */ } } else { vnode_pageout(vp, upl, 0, offset, io_size, upl_control_flags, &error); if (error) { #if DEBUG printf("vm_swapfile_io: vnode_pageout failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size); #else /* DEBUG */ printf("vm_swapfile_io: vnode_pageout failed with %d.\n", error); #endif /* DEBUG */ } } return error; #else /* 1 */ vfs_context_t ctx; ctx = vfs_context_kernel(); error = vn_rdwr((flags & SWAP_READ) ? UIO_READ : UIO_WRITE, vp, (caddr_t)start, io_size, offset, UIO_SYSSPACE, IO_SYNC | IO_NODELOCKED | IO_UNIT | IO_NOCACHE | IO_SWAP_DISPATCH, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); if (error) { printf("vn_rdwr: Swap I/O failed with %d\n", error); } return error; #endif /* 1 */ }
/* * Function: devfs_kernel_mount * Purpose: * Mount devfs at the given mount point from within the kernel. */ int devfs_kernel_mount(char * mntname) { struct mount *mp; int error; struct nameidata nd; struct vnode * vp; vfs_context_t ctx = vfs_context_kernel(); struct vfstable *vfsp; /* Find our vfstable entry */ for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (!strncmp(vfsp->vfc_name, "devfs", sizeof(vfsp->vfc_name))) break; if (!vfsp) { panic("Could not find entry in vfsconf for devfs.\n"); } /* * Get vnode to be covered */ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, CAST_USER_ADDR_T(mntname), ctx); if ((error = namei(&nd))) { printf("devfs_kernel_mount: failed to find directory '%s', %d", mntname, error); return (error); } nameidone(&nd); vp = nd.ni_vp; if ((error = VNOP_FSYNC(vp, MNT_WAIT, ctx))) { printf("devfs_kernel_mount: vnop_fsync failed: %d\n", error); vnode_put(vp); return (error); } if ((error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0))) { printf("devfs_kernel_mount: buf_invalidateblks failed: %d\n", error); vnode_put(vp); return (error); } if (vnode_isdir(vp) == 0) { printf("devfs_kernel_mount: '%s' is not a directory\n", mntname); vnode_put(vp); return (ENOTDIR); } if ((vnode_mountedhere(vp))) { vnode_put(vp); return (EBUSY); } /* * Allocate and initialize the filesystem. */ MALLOC_ZONE(mp, struct mount *, sizeof(struct mount), M_MOUNT, M_WAITOK); bzero((char *)mp, sizeof(struct mount)); /* Initialize the default IO constraints */ mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32; mp->mnt_ioflags = 0; mp->mnt_realrootvp = NULLVP; mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL; mount_lock_init(mp); TAILQ_INIT(&mp->mnt_vnodelist); TAILQ_INIT(&mp->mnt_workerqueue); TAILQ_INIT(&mp->mnt_newvnodes); (void)vfs_busy(mp, LK_NOWAIT); mp->mnt_op = &devfs_vfsops; mp->mnt_vtable = vfsp; mp->mnt_flag = 0; mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN); vp->v_mountedhere = mp; mp->mnt_vnodecovered = vp; mp->mnt_vfsstat.f_owner = kauth_cred_getuid(kauth_cred_get()); (void) copystr(mntname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN - 1, 0); #if CONFIG_MACF mac_mount_label_init(mp); mac_mount_label_associate(ctx, mp); #endif error = devfs_mount(mp, NULL, USER_ADDR_NULL, ctx); if (error) { printf("devfs_kernel_mount: mount %s failed: %d", mntname, error); mp->mnt_vtable->vfc_refcount--; vfs_unbusy(mp); mount_lock_destroy(mp); #if CONFIG_MACF mac_mount_label_destroy(mp); #endif FREE_ZONE(mp, sizeof (struct mount), M_MOUNT); vnode_put(vp); return (error); } vnode_ref(vp); vnode_put(vp); vfs_unbusy(mp); mount_list_add(mp); return (0); }
vfs_context_t spl_vfs_context_kernel(void) { return vfs_context_kernel(); }
struct kern_direct_file_io_ref_t * kern_open_file_for_direct_io(const char * name, uint32_t iflags, kern_get_file_extents_callback_t callback, void * callback_ref, off_t set_file_size, off_t fs_free_size, off_t write_file_offset, void * write_file_addr, size_t write_file_len, dev_t * partition_device_result, dev_t * image_device_result, uint64_t * partitionbase_result, uint64_t * maxiocount_result, uint32_t * oflags) { struct kern_direct_file_io_ref_t * ref; proc_t p; struct vnode_attr va; dk_apfs_wbc_range_t wbc_range; int error; off_t f_offset; uint64_t fileblk; size_t filechunk; uint64_t physoffset, minoffset; dev_t device; dev_t target = 0; int isssd = 0; uint32_t flags = 0; uint32_t blksize; off_t maxiocount, count, segcount, wbctotal; boolean_t locked = FALSE; int fmode, cmode; struct nameidata nd; u_int32_t ndflags; off_t mpFree; int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result); void * p1 = NULL; void * p2 = NULL; error = EFAULT; ref = (struct kern_direct_file_io_ref_t *) kalloc(sizeof(struct kern_direct_file_io_ref_t)); if (!ref) { error = EFAULT; goto out; } bzero(ref, sizeof(*ref)); p = kernproc; ref->ctx = vfs_context_kernel(); fmode = (kIOPolledFileCreate & iflags) ? (O_CREAT | FWRITE) : FWRITE; cmode = S_IRUSR | S_IWUSR; ndflags = NOFOLLOW; NDINIT(&nd, LOOKUP, OP_OPEN, ndflags, UIO_SYSSPACE, CAST_USER_ADDR_T(name), ref->ctx); VATTR_INIT(&va); VATTR_SET(&va, va_mode, cmode); VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED); VATTR_SET(&va, va_dataprotect_class, PROTECTION_CLASS_D); if ((error = vn_open_auth(&nd, &fmode, &va))) { kprintf("vn_open_auth(fmode: %d, cmode: %d) failed with error: %d\n", fmode, cmode, error); goto out; } ref->vp = nd.ni_vp; if (ref->vp->v_type == VREG) { vnode_lock_spin(ref->vp); SET(ref->vp->v_flag, VSWAP); vnode_unlock(ref->vp); } if (write_file_addr && write_file_len) { if ((error = kern_write_file(ref, write_file_offset, write_file_addr, write_file_len, IO_SKIP_ENCRYPTION))) { kprintf("kern_write_file() failed with error: %d\n", error); goto out; } } VATTR_INIT(&va); VATTR_WANTED(&va, va_rdev); VATTR_WANTED(&va, va_fsid); VATTR_WANTED(&va, va_devid); VATTR_WANTED(&va, va_data_size); VATTR_WANTED(&va, va_data_alloc); VATTR_WANTED(&va, va_nlink); error = EFAULT; if (vnode_getattr(ref->vp, &va, ref->ctx)) goto out; wbctotal = 0; mpFree = freespace_mb(ref->vp); mpFree <<= 20; kprintf("kern_direct_file(%s): vp size %qd, alloc %qd, mp free %qd, keep free %qd\n", name, va.va_data_size, va.va_data_alloc, mpFree, fs_free_size); if (ref->vp->v_type == VREG) { /* Don't dump files with links. */ if (va.va_nlink != 1) goto out; device = (VATTR_IS_SUPPORTED(&va, va_devid)) ? va.va_devid : va.va_fsid; ref->filelength = va.va_data_size; p1 = &device; p2 = p; do_ioctl = &file_ioctl; if (kIOPolledFileHibernate & iflags) { error = do_ioctl(p1, p2, DKIOCAPFSGETWBCRANGE, (caddr_t) &wbc_range); ref->wbcranged = (error == 0); } if (ref->wbcranged) { uint32_t idx; assert(wbc_range.count <= (sizeof(wbc_range.extents) / sizeof(wbc_range.extents[0]))); for (idx = 0; idx < wbc_range.count; idx++) wbctotal += wbc_range.extents[idx].length; kprintf("kern_direct_file(%s): wbc %qd\n", name, wbctotal); if (wbctotal) target = wbc_range.dev; } if (set_file_size) { if (wbctotal) { if (wbctotal >= set_file_size) set_file_size = HIBERNATE_MIN_FILE_SIZE; else { set_file_size -= wbctotal; if (set_file_size < HIBERNATE_MIN_FILE_SIZE) set_file_size = HIBERNATE_MIN_FILE_SIZE; } } if (fs_free_size) { mpFree += va.va_data_alloc; if ((mpFree < set_file_size) || ((mpFree - set_file_size) < fs_free_size)) { error = ENOSPC; goto out; } } error = vnode_setsize(ref->vp, set_file_size, IO_NOZEROFILL | IO_NOAUTH, ref->ctx); if (error) goto out; ref->filelength = set_file_size; } } else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) { /* Partition. */ device = va.va_rdev; p1 = ref->vp; p2 = ref->ctx; do_ioctl = &device_ioctl; } else { /* Don't dump to non-regular files. */ error = EFAULT; goto out; } ref->device = device; // probe for CF dk_corestorage_info_t cs_info; memset(&cs_info, 0, sizeof(dk_corestorage_info_t)); error = do_ioctl(p1, p2, DKIOCCORESTORAGE, (caddr_t)&cs_info); ref->cf = (error == 0) && (cs_info.flags & DK_CORESTORAGE_ENABLE_HOTFILES); // get block size error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &ref->blksize); if (error) goto out; minoffset = HIBERNATE_MIN_PHYSICAL_LBA * ref->blksize; if (ref->vp->v_type != VREG) { error = do_ioctl(p1, p2, DKIOCGETBLOCKCOUNT, (caddr_t) &fileblk); if (error) goto out; ref->filelength = fileblk * ref->blksize; } // pin logical extents, CS version error = kern_ioctl_file_extents(ref, _DKIOCCSPINEXTENT, 0, ref->filelength); if (error && (ENOTTY != error)) goto out; ref->pinned = (error == 0); // pin logical extents, apfs version error = VNOP_IOCTL(ref->vp, FSCTL_FREEZE_EXTENTS, NULL, 0, ref->ctx); if (error && (ENOTTY != error)) goto out; ref->frozen = (error == 0); // generate the block list error = do_ioctl(p1, p2, DKIOCLOCKPHYSICALEXTENTS, NULL); if (error) goto out; locked = TRUE; f_offset = 0; for (; f_offset < ref->filelength; f_offset += filechunk) { if (ref->vp->v_type == VREG) { filechunk = 1*1024*1024*1024; daddr64_t blkno; error = VNOP_BLOCKMAP(ref->vp, f_offset, filechunk, &blkno, &filechunk, NULL, VNODE_WRITE | VNODE_BLOCKMAP_NO_TRACK, NULL); if (error) goto out; if (-1LL == blkno) continue; fileblk = blkno * ref->blksize; } else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) { fileblk = f_offset; filechunk = f_offset ? 0 : ref->filelength; } physoffset = 0; while (physoffset < filechunk) { dk_physical_extent_t getphysreq; bzero(&getphysreq, sizeof(getphysreq)); getphysreq.offset = fileblk + physoffset; getphysreq.length = (filechunk - physoffset); error = do_ioctl(p1, p2, DKIOCGETPHYSICALEXTENT, (caddr_t) &getphysreq); if (error) goto out; if (!target) { target = getphysreq.dev; } else if (target != getphysreq.dev) { error = ENOTSUP; goto out; } assert(getphysreq.offset >= minoffset); #if HIBFRAGMENT uint64_t rev; for (rev = 4096; rev <= getphysreq.length; rev += 4096) { callback(callback_ref, getphysreq.offset + getphysreq.length - rev, 4096); } #else callback(callback_ref, getphysreq.offset, getphysreq.length); #endif physoffset += getphysreq.length; } } if (ref->wbcranged) { uint32_t idx; for (idx = 0; idx < wbc_range.count; idx++) { assert(wbc_range.extents[idx].offset >= minoffset); callback(callback_ref, wbc_range.extents[idx].offset, wbc_range.extents[idx].length); } } callback(callback_ref, 0ULL, 0ULL); if (ref->vp->v_type == VREG) p1 = ⌖ else { p1 = ⌖ p2 = p; do_ioctl = &file_ioctl; } // get partition base if (partitionbase_result) { error = do_ioctl(p1, p2, DKIOCGETBASE, (caddr_t) partitionbase_result); if (error) goto out; } // get block size & constraints error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &blksize); if (error) goto out; maxiocount = 1*1024*1024*1024; error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTREAD, (caddr_t) &count); if (error) count = 0; count *= blksize; if (count && (count < maxiocount)) maxiocount = count; error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTWRITE, (caddr_t) &count); if (error) count = 0; count *= blksize; if (count && (count < maxiocount)) maxiocount = count; error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTREAD, (caddr_t) &count); if (error) count = 0; if (count && (count < maxiocount)) maxiocount = count; error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTWRITE, (caddr_t) &count); if (error) count = 0; if (count && (count < maxiocount)) maxiocount = count; error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTREAD, (caddr_t) &count); if (!error) error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTCOUNTREAD, (caddr_t) &segcount); if (error) count = segcount = 0; count *= segcount; if (count && (count < maxiocount)) maxiocount = count; error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTWRITE, (caddr_t) &count); if (!error) error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTCOUNTWRITE, (caddr_t) &segcount); if (error) count = segcount = 0; count *= segcount; if (count && (count < maxiocount)) maxiocount = count; kprintf("max io 0x%qx bytes\n", maxiocount); if (maxiocount_result) *maxiocount_result = maxiocount; error = do_ioctl(p1, p2, DKIOCISSOLIDSTATE, (caddr_t)&isssd); if (!error && isssd) flags |= kIOPolledFileSSD; if (partition_device_result) *partition_device_result = device; if (image_device_result) *image_device_result = target; if (oflags) *oflags = flags; if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) { vnode_close(ref->vp, FWRITE, ref->ctx); ref->vp = NULLVP; ref->ctx = NULL; } out: printf("kern_open_file_for_direct_io(%p, %d)\n", ref, error); if (error && locked) { p1 = &device; (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL); } if (error && ref) { if (ref->vp) { (void) kern_ioctl_file_extents(ref, _DKIOCCSUNPINEXTENT, 0, (ref->pinned && ref->cf) ? ref->filelength : 0); if (ref->frozen) { (void) VNOP_IOCTL(ref->vp, FSCTL_THAW_EXTENTS, NULL, 0, ref->ctx); } if (ref->wbcranged) { (void) do_ioctl(p1, p2, DKIOCAPFSRELEASEWBCRANGE, (caddr_t) NULL); } vnode_close(ref->vp, FWRITE, ref->ctx); ref->vp = NULLVP; } ref->ctx = NULL; kfree(ref, sizeof(struct kern_direct_file_io_ref_t)); ref = NULL; } return(ref); }
/* * This routine exists to support the load_dylinker(). * * This routine has its own, separate, understanding of the FAT file format, * which is terrifically unfortunate. */ static load_return_t get_macho_vnode( char *path, integer_t archbits, struct mach_header *mach_header, off_t *file_offset, off_t *macho_size, struct vnode **vpp ) { struct vnode *vp; vfs_context_t ctx = vfs_context_current(); proc_t p = vfs_context_proc(ctx); kauth_cred_t kerncred; struct nameidata nid, *ndp; boolean_t is_fat; struct fat_arch fat_arch; int error = LOAD_SUCCESS; int resid; union { struct mach_header mach_header; struct fat_header fat_header; char pad[512]; } header; off_t fsize = (off_t)0; int err2; /* * Capture the kernel credential for use in the actual read of the * file, since the user doing the execution may have execute rights * but not read rights, but to exec something, we have to either map * or read it into the new process address space, which requires * read rights. This is to deal with lack of common credential * serialization code which would treat NOCRED as "serialize 'root'". */ kerncred = vfs_context_ucred(vfs_context_kernel()); ndp = &nid; /* init the namei data to point the file user's program name */ NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(path), ctx); if ((error = namei(ndp)) != 0) { if (error == ENOENT) { error = LOAD_ENOENT; } else { error = LOAD_FAILURE; } return(error); } nameidone(ndp); vp = ndp->ni_vp; /* check for regular file */ if (vp->v_type != VREG) { error = LOAD_PROTECT; goto bad1; } /* get size */ if ((error = vnode_size(vp, &fsize, ctx)) != 0) { error = LOAD_FAILURE; goto bad1; } /* Check mount point */ if (vp->v_mount->mnt_flag & MNT_NOEXEC) { error = LOAD_PROTECT; goto bad1; } /* check access */ if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_EXECUTE, ctx)) != 0) { error = LOAD_PROTECT; goto bad1; } /* try to open it */ if ((error = VNOP_OPEN(vp, FREAD, ctx)) != 0) { error = LOAD_PROTECT; goto bad1; } if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)&header, sizeof(header), 0, UIO_SYSSPACE32, IO_NODELOCKED, kerncred, &resid, p)) != 0) { error = LOAD_IOERROR; goto bad2; } if (header.mach_header.magic == MH_MAGIC || header.mach_header.magic == MH_MAGIC_64) is_fat = FALSE; else if (header.fat_header.magic == FAT_MAGIC || header.fat_header.magic == FAT_CIGAM) is_fat = TRUE; else { error = LOAD_BADMACHO; goto bad2; } if (is_fat) { /* Look up our architecture in the fat file. */ error = fatfile_getarch_with_bits(vp, archbits, (vm_offset_t)(&header.fat_header), &fat_arch); if (error != LOAD_SUCCESS) goto bad2; /* Read the Mach-O header out of it */ error = vn_rdwr(UIO_READ, vp, (caddr_t)&header.mach_header, sizeof(header.mach_header), fat_arch.offset, UIO_SYSSPACE32, IO_NODELOCKED, kerncred, &resid, p); if (error) { error = LOAD_IOERROR; goto bad2; } /* Is this really a Mach-O? */ if (header.mach_header.magic != MH_MAGIC && header.mach_header.magic != MH_MAGIC_64) { error = LOAD_BADMACHO; goto bad2; } *file_offset = fat_arch.offset; *macho_size = fat_arch.size; } else { /* * Force get_macho_vnode() to fail if the architecture bits * do not match the expected architecture bits. This in * turn causes load_dylinker() to fail for the same reason, * so it ensures the dynamic linker and the binary are in * lock-step. This is potentially bad, if we ever add to * the CPU_ARCH_* bits any bits that are desirable but not * required, since the dynamic linker might work, but we will * refuse to load it because of this check. */ if ((cpu_type_t)(header.mach_header.cputype & CPU_ARCH_MASK) != archbits) return(LOAD_BADARCH); *file_offset = 0; *macho_size = fsize; } *mach_header = header.mach_header; *vpp = vp; ubc_setsize(vp, fsize); return (error); bad2: err2 = VNOP_CLOSE(vp, FREAD, ctx); vnode_put(vp); return (error); bad1: vnode_put(vp); return(error); }