Exemple #1
0
int
imageboot_setup()
{
	dev_t       dev;
	int         error = 0;
	char *root_path = NULL;

	DBG_TRACE("%s: entry\n", __FUNCTION__);

	MALLOC_ZONE(root_path, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
	if (root_path == NULL)
		return (ENOMEM);

	if(PE_parse_boot_argn("rp", root_path, MAXPATHLEN) == FALSE) {
		error = ENOENT;
		goto done;
	}

	printf("%s: root image url is %s\n", __FUNCTION__, root_path);
	error = di_root_image(root_path, rootdevice, &dev);
	if(error) {
		printf("%s: di_root_image failed: %d\n", __FUNCTION__, error);
		goto done;
	}

	rootdev = dev;
	mountroot = NULL;
	printf("%s: root device 0x%x\n", __FUNCTION__, rootdev);
	error = vfs_mountroot();

	if (error == 0 && rootvnode != NULL) {
		struct vnode *tvp;
		struct vnode *newdp;

		/*
		 * Get the vnode for '/'.
		 * Set fdp->fd_fd.fd_cdir to reference it.
		 */
		if (VFS_ROOT(TAILQ_LAST(&mountlist,mntlist), &newdp, vfs_context_kernel()))
			panic("%s: cannot find root vnode", __FUNCTION__);

		vnode_ref(newdp);
		vnode_put(newdp);
		tvp = rootvnode;
		vnode_rele(tvp);
		filedesc0.fd_cdir = newdp;
		rootvnode = newdp;
		mount_list_lock();
		TAILQ_REMOVE(&mountlist, TAILQ_FIRST(&mountlist), mnt_list);
		mount_list_unlock();
		mountlist.tqh_first->mnt_flag |= MNT_ROOTFS;
		DBG_TRACE("%s: root switched\n", __FUNCTION__);
	}
done:
	FREE_ZONE(root_path, MAXPATHLEN, M_NAMEI);

	DBG_TRACE("%s: exit\n", __FUNCTION__);

	return (error);
}
Exemple #2
0
static int
cp_lock_vfs_callback(mount_t mp, void *arg)
{
	VFS_IOCTL(mp, FIODEVICELOCKED, arg, 0, vfs_context_kernel());

	return 0;
}
int
vm_record_file_write(vnode_t vp, uint64_t offset, char *buf, int size)
{
	int error = 0;
	vfs_context_t ctx;

	ctx = vfs_context_kernel();
		
	error = vn_rdwr(UIO_WRITE, vp, (caddr_t)buf, size, offset,
		UIO_SYSSPACE, IO_NODELOCKED, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));

	return (error);
}
u_int32_t vnode_trim_list (vnode_t vp, struct trim_list *tl, boolean_t route_only)
{
	int		error = 0;
	int		trim_index = 0;
	u_int32_t	blocksize = 0;
	struct vnode	*devvp;
	dk_extent_t	*extents;
	dk_unmap_t	unmap;
	_dk_cs_unmap_t	cs_unmap;

	if ( !(vp->v_mount->mnt_ioflags & MNT_IOFLAGS_UNMAP_SUPPORTED))
		return (ENOTSUP);

	if (tl == NULL)
		return (0);

	/*
	 * Get the underlying device vnode and physical block size
	 */
	devvp = vp->v_mount->mnt_devvp;
	blocksize = vp->v_mount->mnt_devblocksize;

	extents = kalloc(sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM);

	if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
		memset (&cs_unmap, 0, sizeof(_dk_cs_unmap_t));
		cs_unmap.extents = extents;

		if (route_only == TRUE)
			cs_unmap.options = ROUTE_ONLY;
	} else {
		memset (&unmap, 0, sizeof(dk_unmap_t));
		unmap.extents = extents;
	}

	while (tl) {
		daddr64_t	io_blockno;	/* Block number corresponding to the start of the extent */
		size_t		io_bytecount;	/* Number of bytes in current extent for the specified range */
		size_t		trimmed;
		size_t		remaining_length;
		off_t		current_offset; 

		current_offset = tl->tl_offset;
		remaining_length = tl->tl_length;
		trimmed = 0;
		
		/* 
		 * We may not get the entire range from tl_offset -> tl_offset+tl_length in a single
		 * extent from the blockmap call.  Keep looping/going until we are sure we've hit
		 * the whole range or if we encounter an error.
		 */
		while (trimmed < tl->tl_length) {
			/*
			 * VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the
			 * specified offset.  It returns blocks in contiguous chunks, so if the logical range is 
			 * broken into multiple extents, it must be called multiple times, increasing the offset
			 * in each call to ensure that the entire range is covered.
			 */
			error = VNOP_BLOCKMAP (vp, current_offset, remaining_length, 
					       &io_blockno, &io_bytecount, NULL, VNODE_READ, NULL);

			if (error) {
				goto trim_exit;
			}

			extents[trim_index].offset = (uint64_t) io_blockno * (u_int64_t) blocksize;
			extents[trim_index].length = io_bytecount;

			trim_index++;

			if (trim_index == MAX_BATCH_TO_TRIM) {

				if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
					cs_unmap.extentsCount = trim_index;
					error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel());
				} else {
					unmap.extentsCount = trim_index;
					error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
				}
				if (error) {
					goto trim_exit;
				}
				trim_index = 0;
			}
			trimmed += io_bytecount;
			current_offset += io_bytecount;
			remaining_length -= io_bytecount;
		}
		tl = tl->tl_next;
	}
	if (trim_index) {
		if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
			cs_unmap.extentsCount = trim_index;
			error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel());
		} else {
			unmap.extentsCount = trim_index;
			error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
		}
	}
trim_exit:
	kfree(extents, sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM);

	return error;
}
int
vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags)
{
	int error = 0;
	uint64_t io_size = npages * PAGE_SIZE_64;
#if 1
	kern_return_t	kr = KERN_SUCCESS;
	upl_t		upl = NULL;
	unsigned int	count = 0;
	upl_control_flags_t upl_create_flags = 0;
	int		upl_control_flags = 0;
	upl_size_t	upl_size = 0;

	upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE
			| UPL_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK);

#if ENCRYPTED_SWAP
	upl_control_flags = UPL_IOSYNC | UPL_PAGING_ENCRYPTED;
#else
	upl_control_flags = UPL_IOSYNC;
#endif
	if ((flags & SWAP_READ) == FALSE) {
		upl_create_flags |= UPL_COPYOUT_FROM;
	}
 
	upl_size = io_size;
	kr = vm_map_create_upl( kernel_map,
				start,
				&upl_size,
				&upl,
				NULL,
				&count,
				&upl_create_flags);

	if (kr != KERN_SUCCESS || (upl_size != io_size)) {
		panic("vm_map_create_upl failed with %d\n", kr);
	}

	if (flags & SWAP_READ) {
		vnode_pagein(vp,
			      upl,
			      0,
			      offset,
			      io_size,
			      upl_control_flags | UPL_IGNORE_VALID_PAGE_CHECK,
			      &error);
		if (error) {
#if DEBUG
			printf("vm_swapfile_io: vnode_pagein failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size);
#else /* DEBUG */
			printf("vm_swapfile_io: vnode_pagein failed with %d.\n", error);
#endif /* DEBUG */
		}
	
	} else {
		vnode_pageout(vp,
			      upl,
			      0,
			      offset,
			      io_size,
			      upl_control_flags,
			      &error);
		if (error) {
#if DEBUG
			printf("vm_swapfile_io: vnode_pageout failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size);
#else /* DEBUG */
			printf("vm_swapfile_io: vnode_pageout failed with %d.\n", error);
#endif /* DEBUG */
		}
	}
	return error;

#else /* 1 */
	vfs_context_t ctx;
	ctx = vfs_context_kernel();
		
	error = vn_rdwr((flags & SWAP_READ) ? UIO_READ : UIO_WRITE, vp, (caddr_t)start, io_size, offset,
		UIO_SYSSPACE, IO_SYNC | IO_NODELOCKED | IO_UNIT | IO_NOCACHE | IO_SWAP_DISPATCH, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));

	if (error) {
		printf("vn_rdwr: Swap I/O failed with %d\n", error);
	}
	return error;
#endif /* 1 */
}
Exemple #6
0
/*
 * Function: devfs_kernel_mount
 * Purpose:
 *   Mount devfs at the given mount point from within the kernel.
 */
int
devfs_kernel_mount(char * mntname)
{
	struct mount *mp;
	int error;
	struct nameidata nd;
	struct vnode  * vp;
	vfs_context_t ctx = vfs_context_kernel();
	struct vfstable *vfsp;

	/* Find our vfstable entry */
	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
		if (!strncmp(vfsp->vfc_name, "devfs", sizeof(vfsp->vfc_name)))
			break;
	
	if (!vfsp) {
		panic("Could not find entry in vfsconf for devfs.\n");
	} 

	/*
	 * Get vnode to be covered
	 */
	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
	    CAST_USER_ADDR_T(mntname), ctx);
	if ((error = namei(&nd))) {
	    printf("devfs_kernel_mount: failed to find directory '%s', %d", 
		   mntname, error);
	    return (error);
	}
	nameidone(&nd);
	vp = nd.ni_vp;

	if ((error = VNOP_FSYNC(vp, MNT_WAIT, ctx))) {
	    printf("devfs_kernel_mount: vnop_fsync failed: %d\n", error);
	    vnode_put(vp);
	    return (error);
	}
	if ((error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0))) {
	    printf("devfs_kernel_mount: buf_invalidateblks failed: %d\n", error);
	    vnode_put(vp);
	    return (error);
	}
	if (vnode_isdir(vp) == 0) {
	    printf("devfs_kernel_mount: '%s' is not a directory\n", mntname);
	    vnode_put(vp);
	    return (ENOTDIR);
	}
	if ((vnode_mountedhere(vp))) {
	    vnode_put(vp);
	    return (EBUSY);
	}

	/*
	 * Allocate and initialize the filesystem.
	 */
	MALLOC_ZONE(mp, struct mount *, sizeof(struct mount),
		M_MOUNT, M_WAITOK);
	bzero((char *)mp, sizeof(struct mount));

	/* Initialize the default IO constraints */
	mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
	mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
	mp->mnt_ioflags = 0;
	mp->mnt_realrootvp = NULLVP;
	mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;

	mount_lock_init(mp);
	TAILQ_INIT(&mp->mnt_vnodelist);
	TAILQ_INIT(&mp->mnt_workerqueue);
	TAILQ_INIT(&mp->mnt_newvnodes);

	(void)vfs_busy(mp, LK_NOWAIT);
	mp->mnt_op = &devfs_vfsops;
	mp->mnt_vtable = vfsp;
	mp->mnt_flag = 0;
	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
	strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
	vp->v_mountedhere = mp;
	mp->mnt_vnodecovered = vp;
	mp->mnt_vfsstat.f_owner = kauth_cred_getuid(kauth_cred_get());
	(void) copystr(mntname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN - 1, 0);
#if CONFIG_MACF
	mac_mount_label_init(mp);
	mac_mount_label_associate(ctx, mp);
#endif

	error = devfs_mount(mp, NULL, USER_ADDR_NULL, ctx);

	if (error) {
	    printf("devfs_kernel_mount: mount %s failed: %d", mntname, error);
	    mp->mnt_vtable->vfc_refcount--;

	    vfs_unbusy(mp);

	    mount_lock_destroy(mp);
#if CONFIG_MACF
	    mac_mount_label_destroy(mp);
#endif
	    FREE_ZONE(mp, sizeof (struct mount), M_MOUNT);
	    vnode_put(vp);
	    return (error);
	}
	vnode_ref(vp);
	vnode_put(vp);
	vfs_unbusy(mp);
	mount_list_add(mp);
	return (0);
}
Exemple #7
0
vfs_context_t spl_vfs_context_kernel(void)
{
	return vfs_context_kernel();
}
Exemple #8
0
struct kern_direct_file_io_ref_t *
kern_open_file_for_direct_io(const char * name, 
			     uint32_t iflags,
			     kern_get_file_extents_callback_t callback, 
			     void * callback_ref,
                             off_t set_file_size,
                             off_t fs_free_size,
                             off_t write_file_offset,
                             void * write_file_addr,
                             size_t write_file_len,
			     dev_t * partition_device_result,
			     dev_t * image_device_result,
                             uint64_t * partitionbase_result,
                             uint64_t * maxiocount_result,
                             uint32_t * oflags)
{
    struct kern_direct_file_io_ref_t * ref;

    proc_t            p;
    struct vnode_attr va;
    dk_apfs_wbc_range_t wbc_range;
    int               error;
    off_t             f_offset;
    uint64_t          fileblk;
    size_t            filechunk;
    uint64_t          physoffset, minoffset;
    dev_t             device;
    dev_t             target = 0;
    int               isssd = 0;
    uint32_t          flags = 0;
    uint32_t          blksize;
    off_t             maxiocount, count, segcount, wbctotal;
    boolean_t         locked = FALSE;
    int               fmode, cmode;
    struct            nameidata nd;
    u_int32_t         ndflags;
    off_t             mpFree;

    int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result);
    void * p1 = NULL;
    void * p2 = NULL;

    error = EFAULT;

    ref = (struct kern_direct_file_io_ref_t *) kalloc(sizeof(struct kern_direct_file_io_ref_t));
    if (!ref)
    {
	error = EFAULT;
    	goto out;
    }

    bzero(ref, sizeof(*ref));
    p = kernproc;
    ref->ctx = vfs_context_kernel();

    fmode  = (kIOPolledFileCreate & iflags) ? (O_CREAT | FWRITE) : FWRITE;
    cmode =  S_IRUSR | S_IWUSR;
    ndflags = NOFOLLOW;
    NDINIT(&nd, LOOKUP, OP_OPEN, ndflags, UIO_SYSSPACE, CAST_USER_ADDR_T(name), ref->ctx);
    VATTR_INIT(&va);
    VATTR_SET(&va, va_mode, cmode);
    VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
    VATTR_SET(&va, va_dataprotect_class, PROTECTION_CLASS_D);
    if ((error = vn_open_auth(&nd, &fmode, &va))) {
	kprintf("vn_open_auth(fmode: %d, cmode: %d) failed with error: %d\n", fmode, cmode, error);
	goto out;
    }

    ref->vp = nd.ni_vp;
    if (ref->vp->v_type == VREG)
    {
        vnode_lock_spin(ref->vp);
        SET(ref->vp->v_flag, VSWAP);
        vnode_unlock(ref->vp);
    }

    if (write_file_addr && write_file_len)
    {
        if ((error = kern_write_file(ref, write_file_offset, write_file_addr, write_file_len, IO_SKIP_ENCRYPTION))) {
            kprintf("kern_write_file() failed with error: %d\n", error);
            goto out;
        }
    }

    VATTR_INIT(&va);
    VATTR_WANTED(&va, va_rdev);
    VATTR_WANTED(&va, va_fsid);
    VATTR_WANTED(&va, va_devid);
    VATTR_WANTED(&va, va_data_size);
    VATTR_WANTED(&va, va_data_alloc);
    VATTR_WANTED(&va, va_nlink);
    error = EFAULT;
    if (vnode_getattr(ref->vp, &va, ref->ctx)) goto out;

    wbctotal = 0;
    mpFree = freespace_mb(ref->vp);
    mpFree <<= 20;
    kprintf("kern_direct_file(%s): vp size %qd, alloc %qd, mp free %qd, keep free %qd\n", 
    		name, va.va_data_size, va.va_data_alloc, mpFree, fs_free_size);

    if (ref->vp->v_type == VREG)
    {
        /* Don't dump files with links. */
        if (va.va_nlink != 1) goto out;

        device = (VATTR_IS_SUPPORTED(&va, va_devid)) ? va.va_devid : va.va_fsid;
        ref->filelength = va.va_data_size;

        p1 = &device;
        p2 = p;
        do_ioctl = &file_ioctl;

        if (kIOPolledFileHibernate & iflags)
        {
            error = do_ioctl(p1, p2, DKIOCAPFSGETWBCRANGE, (caddr_t) &wbc_range);
            ref->wbcranged = (error == 0);
        }
        if (ref->wbcranged)
        {
            uint32_t idx;
            assert(wbc_range.count <= (sizeof(wbc_range.extents) / sizeof(wbc_range.extents[0])));
            for (idx = 0; idx < wbc_range.count; idx++) wbctotal += wbc_range.extents[idx].length;
            kprintf("kern_direct_file(%s): wbc %qd\n", name, wbctotal);
            if (wbctotal) target = wbc_range.dev;
        }

        if (set_file_size)
        {
            if (wbctotal)
            {
                if (wbctotal >= set_file_size) set_file_size = HIBERNATE_MIN_FILE_SIZE;
                else
                {
                    set_file_size -= wbctotal;
                    if (set_file_size < HIBERNATE_MIN_FILE_SIZE) set_file_size = HIBERNATE_MIN_FILE_SIZE;
                }
            }
            if (fs_free_size)
            {
		mpFree += va.va_data_alloc;
		if ((mpFree < set_file_size) || ((mpFree - set_file_size) < fs_free_size))
		{
		    error = ENOSPC;
		    goto out;
		}
	    }
	    error = vnode_setsize(ref->vp, set_file_size, IO_NOZEROFILL | IO_NOAUTH, ref->ctx);
	    if (error) goto out;
	    ref->filelength = set_file_size;
        }
    }
    else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
    {
	/* Partition. */
        device = va.va_rdev;

        p1 = ref->vp;
        p2 = ref->ctx;
        do_ioctl = &device_ioctl;
    }
    else
    {
	/* Don't dump to non-regular files. */
        error = EFAULT;
        goto out;
    }
    ref->device = device;

    // probe for CF
    dk_corestorage_info_t cs_info;
    memset(&cs_info, 0, sizeof(dk_corestorage_info_t));
    error = do_ioctl(p1, p2, DKIOCCORESTORAGE, (caddr_t)&cs_info);
    ref->cf = (error == 0) && (cs_info.flags & DK_CORESTORAGE_ENABLE_HOTFILES);

    // get block size

    error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &ref->blksize);
    if (error)
        goto out;

    minoffset = HIBERNATE_MIN_PHYSICAL_LBA * ref->blksize;

    if (ref->vp->v_type != VREG)
    {
        error = do_ioctl(p1, p2, DKIOCGETBLOCKCOUNT, (caddr_t) &fileblk);
        if (error) goto out;
	ref->filelength = fileblk * ref->blksize;    
    }

    // pin logical extents, CS version

    error = kern_ioctl_file_extents(ref, _DKIOCCSPINEXTENT, 0, ref->filelength);
    if (error && (ENOTTY != error)) goto out;
    ref->pinned = (error == 0);

    // pin logical extents, apfs version

    error = VNOP_IOCTL(ref->vp, FSCTL_FREEZE_EXTENTS, NULL, 0, ref->ctx);
    if (error && (ENOTTY != error)) goto out;
    ref->frozen = (error == 0);

    // generate the block list

    error = do_ioctl(p1, p2, DKIOCLOCKPHYSICALEXTENTS, NULL);
    if (error) goto out;
    locked = TRUE;

    f_offset = 0;
    for (; f_offset < ref->filelength; f_offset += filechunk)
    {
        if (ref->vp->v_type == VREG)
        {
            filechunk = 1*1024*1024*1024;
            daddr64_t blkno;

            error = VNOP_BLOCKMAP(ref->vp, f_offset, filechunk, &blkno,
								  &filechunk, NULL, VNODE_WRITE | VNODE_BLOCKMAP_NO_TRACK, NULL);
            if (error) goto out;
            if (-1LL == blkno) continue;
            fileblk = blkno * ref->blksize;
        }
        else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
        {
            fileblk = f_offset;
            filechunk = f_offset ? 0 : ref->filelength;
        }

        physoffset = 0;
        while (physoffset < filechunk)
        {
            dk_physical_extent_t getphysreq;
            bzero(&getphysreq, sizeof(getphysreq));

            getphysreq.offset = fileblk + physoffset;
            getphysreq.length = (filechunk - physoffset);
            error = do_ioctl(p1, p2, DKIOCGETPHYSICALEXTENT, (caddr_t) &getphysreq);
            if (error) goto out;
            if (!target)
            {
                target = getphysreq.dev;
            }
            else if (target != getphysreq.dev)
            {
                error = ENOTSUP;
                goto out;
            }

            assert(getphysreq.offset >= minoffset);

#if HIBFRAGMENT
	    uint64_t rev;
	    for (rev = 4096; rev <= getphysreq.length; rev += 4096)
	    {
		callback(callback_ref, getphysreq.offset + getphysreq.length - rev, 4096);
	    }
#else
            callback(callback_ref, getphysreq.offset, getphysreq.length);
#endif
            physoffset += getphysreq.length;
        }
    }
    if (ref->wbcranged)
    {
        uint32_t idx;
        for (idx = 0; idx < wbc_range.count; idx++)
        {
            assert(wbc_range.extents[idx].offset >= minoffset);
            callback(callback_ref, wbc_range.extents[idx].offset, wbc_range.extents[idx].length);
        }
    }
    callback(callback_ref, 0ULL, 0ULL);

    if (ref->vp->v_type == VREG) p1 = &target;
    else
    {
	p1 = &target;
	p2 = p;
	do_ioctl = &file_ioctl;
    }

    // get partition base

    if (partitionbase_result) 
    {
        error = do_ioctl(p1, p2, DKIOCGETBASE, (caddr_t) partitionbase_result);
        if (error)
            goto out;
    }

    // get block size & constraints

    error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &blksize);
    if (error)
        goto out;

    maxiocount = 1*1024*1024*1024;

    error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTREAD, (caddr_t) &count);
    if (error)
        count = 0;
    count *= blksize;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTWRITE, (caddr_t) &count);
    if (error)
        count = 0;
    count *= blksize;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTREAD, (caddr_t) &count);
    if (error)
        count = 0;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTWRITE, (caddr_t) &count);
    if (error)
        count = 0;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTREAD, (caddr_t) &count);
    if (!error)
	error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTCOUNTREAD, (caddr_t) &segcount);
    if (error)
        count = segcount = 0;
    count *= segcount;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTWRITE, (caddr_t) &count);
    if (!error)
	error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTCOUNTWRITE, (caddr_t) &segcount);
    if (error)
        count = segcount = 0;
    count *= segcount;
    if (count && (count < maxiocount))
        maxiocount = count;

    kprintf("max io 0x%qx bytes\n", maxiocount);
    if (maxiocount_result)
        *maxiocount_result = maxiocount;

    error = do_ioctl(p1, p2, DKIOCISSOLIDSTATE, (caddr_t)&isssd);
    if (!error && isssd)
        flags |= kIOPolledFileSSD;

    if (partition_device_result)
        *partition_device_result = device;
    if (image_device_result)
        *image_device_result = target;
    if (oflags)
        *oflags = flags;

    if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
    {
        vnode_close(ref->vp, FWRITE, ref->ctx);
        ref->vp = NULLVP;
	ref->ctx = NULL;
    }

out:
    printf("kern_open_file_for_direct_io(%p, %d)\n", ref, error);


    if (error && locked)
    {
        p1 = &device;
        (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL);
    }

    if (error && ref)
    {
        if (ref->vp)
        {
            (void) kern_ioctl_file_extents(ref, _DKIOCCSUNPINEXTENT, 0, (ref->pinned && ref->cf) ? ref->filelength : 0);

            if (ref->frozen)
            {
                (void) VNOP_IOCTL(ref->vp, FSCTL_THAW_EXTENTS, NULL, 0, ref->ctx);
            }
            if (ref->wbcranged)
            {
                (void) do_ioctl(p1, p2, DKIOCAPFSRELEASEWBCRANGE, (caddr_t) NULL);
            }
            vnode_close(ref->vp, FWRITE, ref->ctx);
            ref->vp = NULLVP;
        }
        ref->ctx = NULL;
        kfree(ref, sizeof(struct kern_direct_file_io_ref_t));
        ref = NULL;
    }

    return(ref);
}
/*
 * This routine exists to support the load_dylinker().
 *
 * This routine has its own, separate, understanding of the FAT file format,
 * which is terrifically unfortunate.
 */
static
load_return_t
get_macho_vnode(
    char			*path,
    integer_t		archbits,
    struct mach_header	*mach_header,
    off_t			*file_offset,
    off_t			*macho_size,
    struct vnode		**vpp
)
{
    struct vnode		*vp;
    vfs_context_t		ctx = vfs_context_current();
    proc_t			p = vfs_context_proc(ctx);
    kauth_cred_t		kerncred;
    struct nameidata nid, *ndp;
    boolean_t		is_fat;
    struct fat_arch		fat_arch;
    int			error = LOAD_SUCCESS;
    int resid;
    union {
        struct mach_header	mach_header;
        struct fat_header	fat_header;
        char	pad[512];
    } header;
    off_t fsize = (off_t)0;
    int err2;

    /*
     * Capture the kernel credential for use in the actual read of the
     * file, since the user doing the execution may have execute rights
     * but not read rights, but to exec something, we have to either map
     * or read it into the new process address space, which requires
     * read rights.  This is to deal with lack of common credential
     * serialization code which would treat NOCRED as "serialize 'root'".
     */
    kerncred = vfs_context_ucred(vfs_context_kernel());

    ndp = &nid;

    /* init the namei data to point the file user's program name */
    NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(path), ctx);

    if ((error = namei(ndp)) != 0) {
        if (error == ENOENT) {
            error = LOAD_ENOENT;
        } else {
            error = LOAD_FAILURE;
        }
        return(error);
    }
    nameidone(ndp);
    vp = ndp->ni_vp;

    /* check for regular file */
    if (vp->v_type != VREG) {
        error = LOAD_PROTECT;
        goto bad1;
    }

    /* get size */
    if ((error = vnode_size(vp, &fsize, ctx)) != 0) {
        error = LOAD_FAILURE;
        goto bad1;
    }

    /* Check mount point */
    if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
        error = LOAD_PROTECT;
        goto bad1;
    }

    /* check access */
    if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_EXECUTE, ctx)) != 0) {
        error = LOAD_PROTECT;
        goto bad1;
    }

    /* try to open it */
    if ((error = VNOP_OPEN(vp, FREAD, ctx)) != 0) {
        error = LOAD_PROTECT;
        goto bad1;
    }

    if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)&header, sizeof(header), 0,
                         UIO_SYSSPACE32, IO_NODELOCKED, kerncred, &resid, p)) != 0) {
        error = LOAD_IOERROR;
        goto bad2;
    }

    if (header.mach_header.magic == MH_MAGIC ||
            header.mach_header.magic == MH_MAGIC_64)
        is_fat = FALSE;
    else if (header.fat_header.magic == FAT_MAGIC ||
             header.fat_header.magic == FAT_CIGAM)
        is_fat = TRUE;
    else {
        error = LOAD_BADMACHO;
        goto bad2;
    }

    if (is_fat) {
        /* Look up our architecture in the fat file. */
        error = fatfile_getarch_with_bits(vp, archbits, (vm_offset_t)(&header.fat_header), &fat_arch);
        if (error != LOAD_SUCCESS)
            goto bad2;

        /* Read the Mach-O header out of it */
        error = vn_rdwr(UIO_READ, vp, (caddr_t)&header.mach_header,
                        sizeof(header.mach_header), fat_arch.offset,
                        UIO_SYSSPACE32, IO_NODELOCKED, kerncred, &resid, p);
        if (error) {
            error = LOAD_IOERROR;
            goto bad2;
        }

        /* Is this really a Mach-O? */
        if (header.mach_header.magic != MH_MAGIC &&
                header.mach_header.magic != MH_MAGIC_64) {
            error = LOAD_BADMACHO;
            goto bad2;
        }

        *file_offset = fat_arch.offset;
        *macho_size = fat_arch.size;
    } else {
        /*
         * Force get_macho_vnode() to fail if the architecture bits
         * do not match the expected architecture bits.  This in
         * turn causes load_dylinker() to fail for the same reason,
         * so it ensures the dynamic linker and the binary are in
         * lock-step.  This is potentially bad, if we ever add to
         * the CPU_ARCH_* bits any bits that are desirable but not
         * required, since the dynamic linker might work, but we will
         * refuse to load it because of this check.
         */
        if ((cpu_type_t)(header.mach_header.cputype & CPU_ARCH_MASK) != archbits)
            return(LOAD_BADARCH);

        *file_offset = 0;
        *macho_size = fsize;
    }

    *mach_header = header.mach_header;
    *vpp = vp;

    ubc_setsize(vp, fsize);

    return (error);

bad2:
    err2 = VNOP_CLOSE(vp, FREAD, ctx);
    vnode_put(vp);
    return (error);

bad1:
    vnode_put(vp);
    return(error);
}