Ejemplo n.º 1
0
int
vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin)
{
	int		error = 0;
	uint64_t	file_size = 0;
	vfs_context_t	ctx = NULL;


	ctx = vfs_context_current();

	error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx);

	if (error) {
		printf("vnode_setsize for swap files failed: %d\n", error);
		goto done;
	}

	error = vnode_size(vp, (off_t*) &file_size, ctx);

	if (error) {
		printf("vnode_size (new file) for swap file failed: %d\n", error);
		goto done;
	}	
	assert(file_size == *size);
	
	if (pin != NULL && *pin != FALSE) {
		error = VNOP_IOCTL(vp, FIOPINSWAP, NULL, 0, ctx);

		if (error) {
			printf("pin for swap files failed: %d,  file_size = %lld\n", error, file_size);
			/* this is not fatal, carry on with files wherever they landed */
			*pin = FALSE;
			error = 0;
		}
	}

	vnode_lock_spin(vp);
	SET(vp->v_flag, VSWAP);
	vnode_unlock(vp);
done:
	return error;
}
Ejemplo n.º 2
0
int
cttyioctl(__unused dev_t dev, u_long cmd, caddr_t addr, int flag, proc_t p)
{
	vnode_t ttyvp = cttyvp(current_proc());
	struct vfs_context context;
	struct session *sessp;
	int error = 0;

	if (ttyvp == NULL)
		return (EIO);
	if (cmd == TIOCSCTTY)  { /* don't allow controlling tty to be set    */
		error = EINVAL; /* to controlling tty -- infinite recursion */
		goto out;
	}
	if (cmd == TIOCNOTTY) {
		sessp = proc_session(p);
		if (!SESS_LEADER(p, sessp)) {
			OSBitAndAtomic(~((uint32_t)P_CONTROLT), &p->p_flag);
			if (sessp != SESSION_NULL)
				session_rele(sessp);
			error = 0;
			goto out;
		} else {
			if (sessp != SESSION_NULL)
				session_rele(sessp);
			error = EINVAL;
			goto out;
		}
	}
	context.vc_thread = current_thread();
	context.vc_ucred = NOCRED;

	error = VNOP_IOCTL(ttyvp, cmd, addr, flag, &context);
out:
	vnode_put(ttyvp);
	return (error);
}
Ejemplo n.º 3
0
u_int32_t vnode_trim_list (vnode_t vp, struct trim_list *tl, boolean_t route_only)
{
	int		error = 0;
	int		trim_index = 0;
	u_int32_t	blocksize = 0;
	struct vnode	*devvp;
	dk_extent_t	*extents;
	dk_unmap_t	unmap;
	_dk_cs_unmap_t	cs_unmap;

	if ( !(vp->v_mount->mnt_ioflags & MNT_IOFLAGS_UNMAP_SUPPORTED))
		return (ENOTSUP);

	if (tl == NULL)
		return (0);

	/*
	 * Get the underlying device vnode and physical block size
	 */
	devvp = vp->v_mount->mnt_devvp;
	blocksize = vp->v_mount->mnt_devblocksize;

	extents = kalloc(sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM);

	if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
		memset (&cs_unmap, 0, sizeof(_dk_cs_unmap_t));
		cs_unmap.extents = extents;

		if (route_only == TRUE)
			cs_unmap.options = ROUTE_ONLY;
	} else {
		memset (&unmap, 0, sizeof(dk_unmap_t));
		unmap.extents = extents;
	}

	while (tl) {
		daddr64_t	io_blockno;	/* Block number corresponding to the start of the extent */
		size_t		io_bytecount;	/* Number of bytes in current extent for the specified range */
		size_t		trimmed;
		size_t		remaining_length;
		off_t		current_offset; 

		current_offset = tl->tl_offset;
		remaining_length = tl->tl_length;
		trimmed = 0;
		
		/* 
		 * We may not get the entire range from tl_offset -> tl_offset+tl_length in a single
		 * extent from the blockmap call.  Keep looping/going until we are sure we've hit
		 * the whole range or if we encounter an error.
		 */
		while (trimmed < tl->tl_length) {
			/*
			 * VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the
			 * specified offset.  It returns blocks in contiguous chunks, so if the logical range is 
			 * broken into multiple extents, it must be called multiple times, increasing the offset
			 * in each call to ensure that the entire range is covered.
			 */
			error = VNOP_BLOCKMAP (vp, current_offset, remaining_length, 
					       &io_blockno, &io_bytecount, NULL, VNODE_READ, NULL);

			if (error) {
				goto trim_exit;
			}

			extents[trim_index].offset = (uint64_t) io_blockno * (u_int64_t) blocksize;
			extents[trim_index].length = io_bytecount;

			trim_index++;

			if (trim_index == MAX_BATCH_TO_TRIM) {

				if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
					cs_unmap.extentsCount = trim_index;
					error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel());
				} else {
					unmap.extentsCount = trim_index;
					error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
				}
				if (error) {
					goto trim_exit;
				}
				trim_index = 0;
			}
			trimmed += io_bytecount;
			current_offset += io_bytecount;
			remaining_length -= io_bytecount;
		}
		tl = tl->tl_next;
	}
	if (trim_index) {
		if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
			cs_unmap.extentsCount = trim_index;
			error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel());
		} else {
			unmap.extentsCount = trim_index;
			error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
		}
	}
trim_exit:
	kfree(extents, sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM);

	return error;
}
Ejemplo n.º 4
0
static int
vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
    uint64_t *ashift)
{
	spa_t *spa = vd->vdev_spa;
	vdev_disk_t *dvd = vd->vdev_tsd;
	vnode_t *devvp = NULLVP;
	vfs_context_t context = NULL;
	uint64_t blkcnt;
	uint32_t blksize;
	int fmode = 0;
	int error = 0;
	int isssd;

	/*
	 * We must have a pathname, and it must be absolute.
	 */
	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
		return (SET_ERROR(EINVAL));
	}

	/*
	 * Reopen the device if it's not currently open. Otherwise,
	 * just update the physical size of the device.
	 */
	if (dvd != NULL) {
	  if (dvd->vd_offline) {
	    /*
	     * If we are opening a device in its offline notify
	     * context, the LDI handle was just closed. Clean
	     * up the LDI event callbacks and free vd->vdev_tsd.
	     */
	    vdev_disk_free(vd);
	  } else {
	    ASSERT(vd->vdev_reopening);
		devvp = dvd->vd_devvp;
	    goto skip_open;
	  }
	}

	/*
	 * Create vd->vdev_tsd.
	 */
	vdev_disk_alloc(vd);
	dvd = vd->vdev_tsd;

	/*
	 * When opening a disk device, we want to preserve the user's original
	 * intent.  We always want to open the device by the path the user gave
	 * us, even if it is one of multiple paths to the same device.  But we
	 * also want to be able to survive disks being removed/recabled.
	 * Therefore the sequence of opening devices is:
	 *
	 * 1. Try opening the device by path.  For legacy pools without the
	 *    'whole_disk' property, attempt to fix the path by appending 's0'.
	 *
	 * 2. If the devid of the device matches the stored value, return
	 *    success.
	 *
	 * 3. Otherwise, the device may have moved.  Try opening the device
	 *    by the devid instead.
	 */
	/* ### APPLE TODO ### */
#ifdef illumos
	if (vd->vdev_devid != NULL) {
		if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid,
		    &dvd->vd_minor) != 0) {
			vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
			return (SET_ERROR(EINVAL));
		}
	}
#endif

	error = EINVAL;		/* presume failure */

	if (vd->vdev_path != NULL) {

		context = vfs_context_create( spl_vfs_context_kernel() );

		/* Obtain an opened/referenced vnode for the device. */
		if ((error = vnode_open(vd->vdev_path, spa_mode(spa), 0, 0,
								&devvp, context))) {
			goto out;
		}
		if (!vnode_isblk(devvp)) {
			error = ENOTBLK;
			goto out;
		}
		/*
		 * ### APPLE TODO ###
		 * vnode_authorize devvp for KAUTH_VNODE_READ_DATA and
		 * KAUTH_VNODE_WRITE_DATA
		 */

		/*
		 * Disallow opening of a device that is currently in use.
		 * Flush out any old buffers remaining from a previous use.
		 */
		if ((error = vfs_mountedon(devvp))) {
			goto out;
		}
		if (VNOP_FSYNC(devvp, MNT_WAIT, context) != 0) {
			error = ENOTBLK;
			goto out;
		}
		if ((error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0))) {
			goto out;
		}

	} else {
		goto out;
	}


	int len = MAXPATHLEN;
	if (vn_getpath(devvp, dvd->vd_readlinkname, &len) == 0) {
		dprintf("ZFS: '%s' resolved name is '%s'\n",
			   vd->vdev_path, dvd->vd_readlinkname);
	} else {
		dvd->vd_readlinkname[0] = 0;
	}



skip_open:
	/*
	 * Determine the actual size of the device.
	 */
	if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&blksize, 0,
	    context) != 0 ||
	    VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0,
	    context) != 0) {
		error = EINVAL;
		goto out;
	}

	*psize = blkcnt * (uint64_t)blksize;
	*max_psize = *psize;

	dvd->vd_ashift = highbit(blksize) - 1;
	dprintf("vdev_disk: Device %p ashift set to %d\n", devvp,
	    dvd->vd_ashift);


	*ashift = highbit(MAX(blksize, SPA_MINBLOCKSIZE)) - 1;

	/*
	 *  ### APPLE TODO ###
	 */
#ifdef illumos
	if (vd->vdev_wholedisk == 1) {
		int wce = 1;
		if (error == 0) {
			/*
			 * If we have the capability to expand, we'd have
			 * found out via success from DKIOCGMEDIAINFO{,EXT}.
			 * Adjust max_psize upward accordingly since we know
			 * we own the whole disk now.
			 */
			*max_psize = capacity * blksz;
		}

		/*
		 * Since we own the whole disk, try to enable disk write
		 * caching.  We ignore errors because it's OK if we can't do it.
		 */
		(void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce,
		    FKIOCTL, kcred, NULL);
	}
#endif

	/*
	 * Clear the nowritecache bit, so that on a vdev_reopen() we will
	 * try again.
	 */
	vd->vdev_nowritecache = B_FALSE;

	/* Inform the ZIO pipeline that we are non-rotational */
	vd->vdev_nonrot = B_FALSE;
	if (VNOP_IOCTL(devvp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0,
				   context) == 0) {
		if (isssd)
			vd->vdev_nonrot = B_TRUE;
	}
	dprintf("ZFS: vdev_disk(%s) isSSD %d\n", vd->vdev_path ? vd->vdev_path : "",
			isssd);

	dvd->vd_devvp = devvp;
out:
	if (error) {
	  if (devvp) {
			vnode_close(devvp, fmode, context);
			dvd->vd_devvp = NULL;
	  }
		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
	}
	if (context)
		(void) vfs_context_rele(context);

	if (error) printf("ZFS: vdev_disk_open('%s') failed error %d\n",
					  vd->vdev_path ? vd->vdev_path : "", error);
	return (error);
}
Ejemplo n.º 5
0
static void
vdev_disk_io_start(zio_t *zio)
{
	vdev_t *vd = zio->io_vd;
	vdev_disk_t *dvd = vd->vdev_tsd;
	struct buf *bp;
	vfs_context_t context;
	int flags, error = 0;

	/*
	 * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
	 * Nothing to be done here but return failure.
	 */
	if (dvd == NULL || (dvd->vd_offline) || dvd->vd_devvp == NULL) {
		zio->io_error = ENXIO;
		zio_interrupt(zio);
		return;
	}

	switch (zio->io_type) {
		case ZIO_TYPE_IOCTL:

			if (!vdev_readable(vd)) {
				zio->io_error = SET_ERROR(ENXIO);
				zio_interrupt(zio);
				return;
			}

			switch (zio->io_cmd) {

				case DKIOCFLUSHWRITECACHE:

					if (zfs_nocacheflush)
						break;

					if (vd->vdev_nowritecache) {
						zio->io_error = SET_ERROR(ENOTSUP);
						break;
					}

					context = vfs_context_create(spl_vfs_context_kernel());
					error = VNOP_IOCTL(dvd->vd_devvp, DKIOCSYNCHRONIZECACHE,
									   NULL, FWRITE, context);
					(void) vfs_context_rele(context);

					if (error == 0)
						vdev_disk_ioctl_done(zio, error);
					else
						error = ENOTSUP;

					if (error == 0) {
						/*
						 * The ioctl will be done asychronously,
						 * and will call vdev_disk_ioctl_done()
						 * upon completion.
						 */
						return;
					} else if (error == ENOTSUP || error == ENOTTY) {
						/*
						 * If we get ENOTSUP or ENOTTY, we know that
						 * no future attempts will ever succeed.
						 * In this case we set a persistent bit so
						 * that we don't bother with the ioctl in the
						 * future.
						 */
						vd->vdev_nowritecache = B_TRUE;
					}
					zio->io_error = error;

					break;

				default:
					zio->io_error = SET_ERROR(ENOTSUP);
			} /* io_cmd */

			zio_execute(zio);
			return;

	case ZIO_TYPE_WRITE:
		if (zio->io_priority == ZIO_PRIORITY_SYNC_WRITE)
			flags = B_WRITE;
		else
			flags = B_WRITE | B_ASYNC;
		break;

	case ZIO_TYPE_READ:
		if (zio->io_priority == ZIO_PRIORITY_SYNC_READ)
			flags = B_READ;
		else
			flags = B_READ | B_ASYNC;
		break;

		default:
			zio->io_error = SET_ERROR(ENOTSUP);
			zio_interrupt(zio);
			return;
	} /* io_type */

	ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);

	/* Stop OSX from also caching our data */
	flags |= B_NOCACHE;

	if (zio->io_flags & ZIO_FLAG_FAILFAST)
		flags |= B_FAILFAST;

	zio->io_target_timestamp = zio_handle_io_delay(zio);

	bp = buf_alloc(dvd->vd_devvp);

	ASSERT(bp != NULL);
	ASSERT(zio->io_data != NULL);
	ASSERT(zio->io_size != 0);

	buf_setflags(bp, flags);
	buf_setcount(bp, zio->io_size);
	buf_setdataptr(bp, (uintptr_t)zio->io_data);

	/*
	 * Map offset to blcknumber, based on physical block number.
	 * (512, 4096, ..). If we fail to map, default back to
	 * standard 512. lbtodb() is fixed at 512.
	 */
	buf_setblkno(bp, zio->io_offset >> dvd->vd_ashift);
	buf_setlblkno(bp, zio->io_offset >> dvd->vd_ashift);

	buf_setsize(bp, zio->io_size);
	if (buf_setcallback(bp, vdev_disk_io_intr, zio) != 0)
		panic("vdev_disk_io_start: buf_setcallback failed\n");

	if (zio->io_type == ZIO_TYPE_WRITE) {
		vnode_startwrite(dvd->vd_devvp);
	}
	error = VNOP_STRATEGY(bp);
	ASSERT(error == 0);

	if (error) {
		zio->io_error = error;
		zio_interrupt(zio);
		return;
	}
}
Ejemplo n.º 6
0
static int
vdev_disk_open(vdev_t *vd, uint64_t *size, uint64_t *max_size, uint64_t *ashift)
{
	vdev_disk_t *dvd = NULL;
	vnode_t *devvp = NULLVP;
	vfs_context_t context = NULL;
	uint64_t blkcnt;
	uint32_t blksize;
	int fmode = 0;
	int error = 0;

	/*
	 * We must have a pathname, and it must be absolute.
	 */
	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
		return (EINVAL);
	}

	dvd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
    if (dvd == NULL)
        return ENOMEM;

	/*
	 * When opening a disk device, we want to preserve the user's original
	 * intent.  We always want to open the device by the path the user gave
	 * us, even if it is one of multiple paths to the save device.  But we
	 * also want to be able to survive disks being removed/recabled.
	 * Therefore the sequence of opening devices is:
	 *
	 * 1. Try opening the device by path.  For legacy pools without the
	 *    'whole_disk' property, attempt to fix the path by appending 's0'.
	 *
	 * 2. If the devid of the device matches the stored value, return
	 *    success.
	 *
	 * 3. Otherwise, the device may have moved.  Try opening the device
	 *    by the devid instead.
	 *
	 */

	/* ### APPLE TODO ### */
	/* ddi_devid_str_decode */

	context = vfs_context_create((vfs_context_t)0);

	/* Obtain an opened/referenced vnode for the device. */
	error = vnode_open(vd->vdev_path, spa_mode(vd->vdev_spa), 0, 0, &devvp, context);
	if (error) {
		goto out;
	}

	if (!vnode_isblk(devvp)) {
		error = ENOTBLK;
		goto out;
	}

	/* ### APPLE TODO ### */
	/* vnode_authorize devvp for KAUTH_VNODE_READ_DATA and
	 * KAUTH_VNODE_WRITE_DATA
	 */

	/*
	 * Disallow opening of a device that is currently in use.
	 * Flush out any old buffers remaining from a previous use.
	 */
	if ((error = vfs_mountedon(devvp))) {
		goto out;
	}
	if (VNOP_FSYNC(devvp, MNT_WAIT, context) != 0) {
		error = ENOTBLK;
		goto out;
	}
	if ((error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0))) {
		goto out;
	}

	/*
	 * Determine the actual size of the device.
	 */
	if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&blksize, 0, context)
	       	!= 0 || VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt,
		0, context) != 0) {

		error = EINVAL;
		goto out;
	}
	*size = blkcnt * (uint64_t)blksize;

	/*
	 *  ### APPLE TODO ###
	 * If we own the whole disk, try to enable disk write caching.
	 */

	/*
	 * Take the device's minimum transfer size into account.
	 */
	*ashift = highbit(MAX(blksize, SPA_MINBLOCKSIZE)) - 1;

    /*
     * Setting the vdev_ashift did in fact break the pool for import
     * on ZEVO. This puts the logic into question. It appears that vdev_top
     * will also then change. It then panics in space_map from metaslab_alloc
     */
    //vd->vdev_ashift = *ashift;
    dvd->vd_ashift = *ashift;


	/*
	 * Clear the nowritecache bit, so that on a vdev_reopen() we will
	 * try again.
	 */
	vd->vdev_nowritecache = B_FALSE;
	vd->vdev_tsd = dvd;
	dvd->vd_devvp = devvp;
out:
	if (error) {
		if (devvp)
			vnode_close(devvp, fmode, context);
		if (dvd)
			kmem_free(dvd, sizeof (vdev_disk_t));

		/*
		 * Since the open has failed, vd->vdev_tsd should
		 * be NULL when we get here, signaling to the
		 * rest of the spa not to try and reopen or close this device
		 */
		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
	}
	if (context) {
		(void) vfs_context_rele(context);
	}
	return (error);
}
Ejemplo n.º 7
0
static int
vdev_disk_io_start(zio_t *zio)
{
	vdev_t *vd = zio->io_vd;
	vdev_disk_t *dvd = vd->vdev_tsd;
	struct buf *bp;
	vfs_context_t context;
	int flags, error = 0;

	if (zio->io_type == ZIO_TYPE_IOCTL) {
		zio_vdev_io_bypass(zio);

		/* XXPOLICY */
		if (vdev_is_dead(vd)) {
			zio->io_error = ENXIO;
			//zio_next_stage_async(zio);
			return (ZIO_PIPELINE_CONTINUE);
            //return;
		}

		switch (zio->io_cmd) {

		case DKIOCFLUSHWRITECACHE:

			if (zfs_nocacheflush)
				break;

			if (vd->vdev_nowritecache) {
				zio->io_error = SET_ERROR(ENOTSUP);
				break;
			}

			context = vfs_context_create((vfs_context_t)0);
			error = VNOP_IOCTL(dvd->vd_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
			(void) vfs_context_rele(context);

			if (error == 0)
				vdev_disk_ioctl_done(zio, error);
			else
				error = ENOTSUP;

			if (error == 0) {
				/*
				 * The ioctl will be done asychronously,
				 * and will call vdev_disk_ioctl_done()
				 * upon completion.
				 */
				return ZIO_PIPELINE_STOP;
			} else if (error == ENOTSUP || error == ENOTTY) {
				/*
				 * If we get ENOTSUP or ENOTTY, we know that
				 * no future attempts will ever succeed.
				 * In this case we set a persistent bit so
				 * that we don't bother with the ioctl in the
				 * future.
				 */
				vd->vdev_nowritecache = B_TRUE;
			}
			zio->io_error = error;

			break;

		default:
			zio->io_error = SET_ERROR(ENOTSUP);
		}

		//zio_next_stage_async(zio);
        return (ZIO_PIPELINE_CONTINUE);
	}

	if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0)
        return (ZIO_PIPELINE_STOP);
    //		return;

	if ((zio = vdev_queue_io(zio)) == NULL)
        return (ZIO_PIPELINE_CONTINUE);
    //		return;

	flags = (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
	//flags |= B_NOCACHE;

	if (zio->io_flags & ZIO_FLAG_FAILFAST)
		flags |= B_FAILFAST;

	/*
	 * Check the state of this device to see if it has been offlined or
	 * is in an error state.  If the device was offlined or closed,
	 * dvd will be NULL and buf_alloc below will fail
	 */
	//error = vdev_is_dead(vd) ? ENXIO : vdev_error_inject(vd, zio);
	if (vdev_is_dead(vd)) {
        error = ENXIO;
    }

	if (error) {
		zio->io_error = error;
		//zio_next_stage_async(zio);
		return (ZIO_PIPELINE_CONTINUE);
	}

	bp = buf_alloc(dvd->vd_devvp);

	ASSERT(bp != NULL);
	ASSERT(zio->io_data != NULL);
	ASSERT(zio->io_size != 0);

	buf_setflags(bp, flags);
	buf_setcount(bp, zio->io_size);
	buf_setdataptr(bp, (uintptr_t)zio->io_data);
    if (dvd->vd_ashift) {
        buf_setlblkno(bp, zio->io_offset>>dvd->vd_ashift);
        buf_setblkno(bp,  zio->io_offset>>dvd->vd_ashift);
    } else {
Ejemplo n.º 8
0
/*
 * Open a special file.
 */
int
spec_open(struct vnop_open_args *ap)
{
	struct proc *p = vfs_context_proc(ap->a_context);
	kauth_cred_t cred = vfs_context_ucred(ap->a_context);
	struct vnode *vp = ap->a_vp;
	dev_t bdev, dev = (dev_t)vp->v_rdev;
	int maj = major(dev);
	int error;

	/*
	 * Don't allow open if fs is mounted -nodev.
	 */
	if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
		return (ENXIO);

	switch (vp->v_type) {

	case VCHR:
		if ((u_int)maj >= (u_int)nchrdev)
			return (ENXIO);
		if (cred != FSCRED && (ap->a_mode & FWRITE)) {
			/*
			 * When running in very secure mode, do not allow
			 * opens for writing of any disk character devices.
			 */
			if (securelevel >= 2 && isdisk(dev, VCHR))
				return (EPERM);
			/*
			 * When running in secure mode, do not allow opens
			 * for writing of /dev/mem, /dev/kmem, or character
			 * devices whose corresponding block devices are
			 * currently mounted.
			 */
			if (securelevel >= 1) {
				if ((bdev = chrtoblk(dev)) != NODEV && check_mountedon(bdev, VBLK, &error))
					return (error);
				if (iskmemdev(dev))
					return (EPERM);
			}
		}
		if (cdevsw[maj].d_type == D_TTY) {
			vnode_lock(vp);
			vp->v_flag |= VISTTY;
			vnode_unlock(vp);
		}
		error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, p);
		return (error);

	case VBLK:
		if ((u_int)maj >= (u_int)nblkdev)
			return (ENXIO);
		/*
		 * When running in very secure mode, do not allow
		 * opens for writing of any disk block devices.
		 */
		if (securelevel >= 2 && cred != FSCRED &&
		    (ap->a_mode & FWRITE) && bdevsw[maj].d_type == D_DISK)
			return (EPERM);
		/*
		 * Do not allow opens of block devices that are
		 * currently mounted.
		 */
		if ( (error = vfs_mountedon(vp)) )
			return (error);
		error = (*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, p);
		if (!error) {
		    u_int64_t blkcnt;
		    u_int32_t blksize;
			int setsize = 0;
			u_int32_t size512 = 512;


		    if (!VNOP_IOCTL(vp, DKIOCGETBLOCKSIZE, (caddr_t)&blksize, 0, ap->a_context)) {
				/* Switch to 512 byte sectors (temporarily) */

				if (!VNOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, ap->a_context)) {
			    	/* Get the number of 512 byte physical blocks. */
			    	if (!VNOP_IOCTL(vp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, ap->a_context)) {
						setsize = 1;
			    	}
				}
				/* If it doesn't set back, we can't recover */
				if (VNOP_IOCTL(vp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, ap->a_context))
			    	error = ENXIO;
		    }


			vnode_lock(vp);
		    set_blocksize(vp, dev);

		    /*
		     * Cache the size in bytes of the block device for later
		     * use by spec_write().
		     */
			if (setsize)
				vp->v_specdevsize = blkcnt * (u_int64_t)size512;
			else
		    	vp->v_specdevsize = (u_int64_t)0;	/* Default: Can't get */
			
			vnode_unlock(vp);

		}
		return(error);
	default:
	        panic("spec_open type");
	}
	return (0);
}
Ejemplo n.º 9
0
static int device_ioctl(void * p1, __unused void * p2, u_long theIoctl, caddr_t result)
{
    return (VNOP_IOCTL(p1, theIoctl, result, 0, p2));
}
Ejemplo n.º 10
0
void
kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref,
			      off_t write_offset, void * addr, size_t write_length,
			      off_t discard_offset, off_t discard_end)
{
    int error;
    printf("kern_close_file_for_direct_io(%p)\n", ref);

    if (!ref) return;

    if (ref->vp)
    {
        int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result);
        void * p1;
        void * p2;

        discard_offset = ((discard_offset + ref->blksize - 1) & ~(((off_t) ref->blksize) - 1));
        discard_end    = ((discard_end)                       & ~(((off_t) ref->blksize) - 1));

        if (ref->vp->v_type == VREG)
        {
            p1 = &ref->device;
            p2 = kernproc;
            do_ioctl = &file_ioctl;
        }
        else
        {
            /* Partition. */
            p1 = ref->vp;
            p2 = ref->ctx;
            do_ioctl = &device_ioctl;
        }
        (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL);

		//XXX If unmapping extents then don't also need to unpin; except ...
		//XXX if file unaligned (HFS 4k / Fusion 128k) then pin is superset and
		//XXX unmap is subset, so save extra walk over file extents (and the risk
		//XXX that CF drain starts) vs leaving partial units pinned to SSD
		//XXX (until whatever was sharing also unmaps).  Err on cleaning up fully.
		boolean_t will_unmap = (!ref->pinned || ref->cf) && (discard_end > discard_offset);
		boolean_t will_unpin = (ref->pinned && ref->cf /* && !will_unmap */);

		(void) kern_ioctl_file_extents(ref, _DKIOCCSUNPINEXTENT, 0, (will_unpin) ? ref->filelength : 0);

        if (will_unmap)
        {
            (void) kern_ioctl_file_extents(ref, DKIOCUNMAP, discard_offset, (ref->cf) ? ref->filelength : discard_end);
        }

        if (ref->frozen)
        {
            (void) VNOP_IOCTL(ref->vp, FSCTL_THAW_EXTENTS, NULL, 0, ref->ctx);
        }
        if (ref->wbcranged)
        {
            (void) do_ioctl(p1, p2, DKIOCAPFSRELEASEWBCRANGE, (caddr_t) NULL);
        }

        if (addr && write_length)
        {
            (void) kern_write_file(ref, write_offset, addr, write_length, IO_SKIP_ENCRYPTION);
        }

        error = vnode_close(ref->vp, FWRITE, ref->ctx);

        ref->vp = NULLVP;
        kprintf("vnode_close(%d)\n", error);

    }

    ref->ctx = NULL;

    kfree(ref, sizeof(struct kern_direct_file_io_ref_t));
}
Ejemplo n.º 11
0
struct kern_direct_file_io_ref_t *
kern_open_file_for_direct_io(const char * name, 
			     uint32_t iflags,
			     kern_get_file_extents_callback_t callback, 
			     void * callback_ref,
                             off_t set_file_size,
                             off_t fs_free_size,
                             off_t write_file_offset,
                             void * write_file_addr,
                             size_t write_file_len,
			     dev_t * partition_device_result,
			     dev_t * image_device_result,
                             uint64_t * partitionbase_result,
                             uint64_t * maxiocount_result,
                             uint32_t * oflags)
{
    struct kern_direct_file_io_ref_t * ref;

    proc_t            p;
    struct vnode_attr va;
    dk_apfs_wbc_range_t wbc_range;
    int               error;
    off_t             f_offset;
    uint64_t          fileblk;
    size_t            filechunk;
    uint64_t          physoffset, minoffset;
    dev_t             device;
    dev_t             target = 0;
    int               isssd = 0;
    uint32_t          flags = 0;
    uint32_t          blksize;
    off_t             maxiocount, count, segcount, wbctotal;
    boolean_t         locked = FALSE;
    int               fmode, cmode;
    struct            nameidata nd;
    u_int32_t         ndflags;
    off_t             mpFree;

    int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result);
    void * p1 = NULL;
    void * p2 = NULL;

    error = EFAULT;

    ref = (struct kern_direct_file_io_ref_t *) kalloc(sizeof(struct kern_direct_file_io_ref_t));
    if (!ref)
    {
	error = EFAULT;
    	goto out;
    }

    bzero(ref, sizeof(*ref));
    p = kernproc;
    ref->ctx = vfs_context_kernel();

    fmode  = (kIOPolledFileCreate & iflags) ? (O_CREAT | FWRITE) : FWRITE;
    cmode =  S_IRUSR | S_IWUSR;
    ndflags = NOFOLLOW;
    NDINIT(&nd, LOOKUP, OP_OPEN, ndflags, UIO_SYSSPACE, CAST_USER_ADDR_T(name), ref->ctx);
    VATTR_INIT(&va);
    VATTR_SET(&va, va_mode, cmode);
    VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
    VATTR_SET(&va, va_dataprotect_class, PROTECTION_CLASS_D);
    if ((error = vn_open_auth(&nd, &fmode, &va))) {
	kprintf("vn_open_auth(fmode: %d, cmode: %d) failed with error: %d\n", fmode, cmode, error);
	goto out;
    }

    ref->vp = nd.ni_vp;
    if (ref->vp->v_type == VREG)
    {
        vnode_lock_spin(ref->vp);
        SET(ref->vp->v_flag, VSWAP);
        vnode_unlock(ref->vp);
    }

    if (write_file_addr && write_file_len)
    {
        if ((error = kern_write_file(ref, write_file_offset, write_file_addr, write_file_len, IO_SKIP_ENCRYPTION))) {
            kprintf("kern_write_file() failed with error: %d\n", error);
            goto out;
        }
    }

    VATTR_INIT(&va);
    VATTR_WANTED(&va, va_rdev);
    VATTR_WANTED(&va, va_fsid);
    VATTR_WANTED(&va, va_devid);
    VATTR_WANTED(&va, va_data_size);
    VATTR_WANTED(&va, va_data_alloc);
    VATTR_WANTED(&va, va_nlink);
    error = EFAULT;
    if (vnode_getattr(ref->vp, &va, ref->ctx)) goto out;

    wbctotal = 0;
    mpFree = freespace_mb(ref->vp);
    mpFree <<= 20;
    kprintf("kern_direct_file(%s): vp size %qd, alloc %qd, mp free %qd, keep free %qd\n", 
    		name, va.va_data_size, va.va_data_alloc, mpFree, fs_free_size);

    if (ref->vp->v_type == VREG)
    {
        /* Don't dump files with links. */
        if (va.va_nlink != 1) goto out;

        device = (VATTR_IS_SUPPORTED(&va, va_devid)) ? va.va_devid : va.va_fsid;
        ref->filelength = va.va_data_size;

        p1 = &device;
        p2 = p;
        do_ioctl = &file_ioctl;

        if (kIOPolledFileHibernate & iflags)
        {
            error = do_ioctl(p1, p2, DKIOCAPFSGETWBCRANGE, (caddr_t) &wbc_range);
            ref->wbcranged = (error == 0);
        }
        if (ref->wbcranged)
        {
            uint32_t idx;
            assert(wbc_range.count <= (sizeof(wbc_range.extents) / sizeof(wbc_range.extents[0])));
            for (idx = 0; idx < wbc_range.count; idx++) wbctotal += wbc_range.extents[idx].length;
            kprintf("kern_direct_file(%s): wbc %qd\n", name, wbctotal);
            if (wbctotal) target = wbc_range.dev;
        }

        if (set_file_size)
        {
            if (wbctotal)
            {
                if (wbctotal >= set_file_size) set_file_size = HIBERNATE_MIN_FILE_SIZE;
                else
                {
                    set_file_size -= wbctotal;
                    if (set_file_size < HIBERNATE_MIN_FILE_SIZE) set_file_size = HIBERNATE_MIN_FILE_SIZE;
                }
            }
            if (fs_free_size)
            {
		mpFree += va.va_data_alloc;
		if ((mpFree < set_file_size) || ((mpFree - set_file_size) < fs_free_size))
		{
		    error = ENOSPC;
		    goto out;
		}
	    }
	    error = vnode_setsize(ref->vp, set_file_size, IO_NOZEROFILL | IO_NOAUTH, ref->ctx);
	    if (error) goto out;
	    ref->filelength = set_file_size;
        }
    }
    else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
    {
	/* Partition. */
        device = va.va_rdev;

        p1 = ref->vp;
        p2 = ref->ctx;
        do_ioctl = &device_ioctl;
    }
    else
    {
	/* Don't dump to non-regular files. */
        error = EFAULT;
        goto out;
    }
    ref->device = device;

    // probe for CF
    dk_corestorage_info_t cs_info;
    memset(&cs_info, 0, sizeof(dk_corestorage_info_t));
    error = do_ioctl(p1, p2, DKIOCCORESTORAGE, (caddr_t)&cs_info);
    ref->cf = (error == 0) && (cs_info.flags & DK_CORESTORAGE_ENABLE_HOTFILES);

    // get block size

    error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &ref->blksize);
    if (error)
        goto out;

    minoffset = HIBERNATE_MIN_PHYSICAL_LBA * ref->blksize;

    if (ref->vp->v_type != VREG)
    {
        error = do_ioctl(p1, p2, DKIOCGETBLOCKCOUNT, (caddr_t) &fileblk);
        if (error) goto out;
	ref->filelength = fileblk * ref->blksize;    
    }

    // pin logical extents, CS version

    error = kern_ioctl_file_extents(ref, _DKIOCCSPINEXTENT, 0, ref->filelength);
    if (error && (ENOTTY != error)) goto out;
    ref->pinned = (error == 0);

    // pin logical extents, apfs version

    error = VNOP_IOCTL(ref->vp, FSCTL_FREEZE_EXTENTS, NULL, 0, ref->ctx);
    if (error && (ENOTTY != error)) goto out;
    ref->frozen = (error == 0);

    // generate the block list

    error = do_ioctl(p1, p2, DKIOCLOCKPHYSICALEXTENTS, NULL);
    if (error) goto out;
    locked = TRUE;

    f_offset = 0;
    for (; f_offset < ref->filelength; f_offset += filechunk)
    {
        if (ref->vp->v_type == VREG)
        {
            filechunk = 1*1024*1024*1024;
            daddr64_t blkno;

            error = VNOP_BLOCKMAP(ref->vp, f_offset, filechunk, &blkno,
								  &filechunk, NULL, VNODE_WRITE | VNODE_BLOCKMAP_NO_TRACK, NULL);
            if (error) goto out;
            if (-1LL == blkno) continue;
            fileblk = blkno * ref->blksize;
        }
        else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
        {
            fileblk = f_offset;
            filechunk = f_offset ? 0 : ref->filelength;
        }

        physoffset = 0;
        while (physoffset < filechunk)
        {
            dk_physical_extent_t getphysreq;
            bzero(&getphysreq, sizeof(getphysreq));

            getphysreq.offset = fileblk + physoffset;
            getphysreq.length = (filechunk - physoffset);
            error = do_ioctl(p1, p2, DKIOCGETPHYSICALEXTENT, (caddr_t) &getphysreq);
            if (error) goto out;
            if (!target)
            {
                target = getphysreq.dev;
            }
            else if (target != getphysreq.dev)
            {
                error = ENOTSUP;
                goto out;
            }

            assert(getphysreq.offset >= minoffset);

#if HIBFRAGMENT
	    uint64_t rev;
	    for (rev = 4096; rev <= getphysreq.length; rev += 4096)
	    {
		callback(callback_ref, getphysreq.offset + getphysreq.length - rev, 4096);
	    }
#else
            callback(callback_ref, getphysreq.offset, getphysreq.length);
#endif
            physoffset += getphysreq.length;
        }
    }
    if (ref->wbcranged)
    {
        uint32_t idx;
        for (idx = 0; idx < wbc_range.count; idx++)
        {
            assert(wbc_range.extents[idx].offset >= minoffset);
            callback(callback_ref, wbc_range.extents[idx].offset, wbc_range.extents[idx].length);
        }
    }
    callback(callback_ref, 0ULL, 0ULL);

    if (ref->vp->v_type == VREG) p1 = &target;
    else
    {
	p1 = &target;
	p2 = p;
	do_ioctl = &file_ioctl;
    }

    // get partition base

    if (partitionbase_result) 
    {
        error = do_ioctl(p1, p2, DKIOCGETBASE, (caddr_t) partitionbase_result);
        if (error)
            goto out;
    }

    // get block size & constraints

    error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &blksize);
    if (error)
        goto out;

    maxiocount = 1*1024*1024*1024;

    error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTREAD, (caddr_t) &count);
    if (error)
        count = 0;
    count *= blksize;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTWRITE, (caddr_t) &count);
    if (error)
        count = 0;
    count *= blksize;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTREAD, (caddr_t) &count);
    if (error)
        count = 0;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTWRITE, (caddr_t) &count);
    if (error)
        count = 0;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTREAD, (caddr_t) &count);
    if (!error)
	error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTCOUNTREAD, (caddr_t) &segcount);
    if (error)
        count = segcount = 0;
    count *= segcount;
    if (count && (count < maxiocount))
        maxiocount = count;

    error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTWRITE, (caddr_t) &count);
    if (!error)
	error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTCOUNTWRITE, (caddr_t) &segcount);
    if (error)
        count = segcount = 0;
    count *= segcount;
    if (count && (count < maxiocount))
        maxiocount = count;

    kprintf("max io 0x%qx bytes\n", maxiocount);
    if (maxiocount_result)
        *maxiocount_result = maxiocount;

    error = do_ioctl(p1, p2, DKIOCISSOLIDSTATE, (caddr_t)&isssd);
    if (!error && isssd)
        flags |= kIOPolledFileSSD;

    if (partition_device_result)
        *partition_device_result = device;
    if (image_device_result)
        *image_device_result = target;
    if (oflags)
        *oflags = flags;

    if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
    {
        vnode_close(ref->vp, FWRITE, ref->ctx);
        ref->vp = NULLVP;
	ref->ctx = NULL;
    }

out:
    printf("kern_open_file_for_direct_io(%p, %d)\n", ref, error);


    if (error && locked)
    {
        p1 = &device;
        (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL);
    }

    if (error && ref)
    {
        if (ref->vp)
        {
            (void) kern_ioctl_file_extents(ref, _DKIOCCSUNPINEXTENT, 0, (ref->pinned && ref->cf) ? ref->filelength : 0);

            if (ref->frozen)
            {
                (void) VNOP_IOCTL(ref->vp, FSCTL_THAW_EXTENTS, NULL, 0, ref->ctx);
            }
            if (ref->wbcranged)
            {
                (void) do_ioctl(p1, p2, DKIOCAPFSRELEASEWBCRANGE, (caddr_t) NULL);
            }
            vnode_close(ref->vp, FWRITE, ref->ctx);
            ref->vp = NULLVP;
        }
        ref->ctx = NULL;
        kfree(ref, sizeof(struct kern_direct_file_io_ref_t));
        ref = NULL;
    }

    return(ref);
}