Beispiel #1
0
Datei: zvol.c Projekt: l1k/zfs
/*
 * Set ZFS_PROP_VOLSIZE set entry point.
 */
int
zvol_set_volsize(const char *name, uint64_t volsize)
{
	zvol_state_t *zv = NULL;
	objset_t *os = NULL;
	int error;
	dmu_object_info_t *doi;
	uint64_t readonly;
	boolean_t owned = B_FALSE;

	error = dsl_prop_get_integer(name,
	    zfs_prop_to_name(ZFS_PROP_READONLY), &readonly, NULL);
	if (error != 0)
		return (SET_ERROR(error));
	if (readonly)
		return (SET_ERROR(EROFS));

	mutex_enter(&zvol_state_lock);
	zv = zvol_find_by_name(name);

	if (zv == NULL || zv->zv_objset == NULL) {
		if ((error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE,
		    FTAG, &os)) != 0) {
			mutex_exit(&zvol_state_lock);
			return (SET_ERROR(error));
		}
		owned = B_TRUE;
		if (zv != NULL)
			zv->zv_objset = os;
	} else {
		os = zv->zv_objset;
	}

	doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP);

	if ((error = dmu_object_info(os, ZVOL_OBJ, doi)) ||
	    (error = zvol_check_volsize(volsize, doi->doi_data_block_size)))
		goto out;

	error = zvol_update_volsize(volsize, os);
	kmem_free(doi, sizeof (dmu_object_info_t));

	if (error == 0 && zv != NULL)
		error = zvol_update_live_volsize(zv, volsize);
out:
	if (owned) {
		dmu_objset_disown(os, FTAG);
		if (zv != NULL)
			zv->zv_objset = NULL;
	}
	mutex_exit(&zvol_state_lock);
	return (error);
}
Beispiel #2
0
/*
* Clean up any znodes that had no links when we either crashed or
* (force) umounted the file system.
*/
void
zfs_unlinked_drain(zfsvfs_t *zfsvfs)
{
        zap_cursor_t        zc;
        zap_attribute_t zap;
        dmu_object_info_t doi;
        znode_t                *zp;
        int                error;

        printf("ZFS: unlinked drain\n");

        /*
         * Interate over the contents of the unlinked set.
         */
        for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
         zap_cursor_retrieve(&zc, &zap) == 0;
         zap_cursor_advance(&zc)) {

                /*
                 * See what kind of object we have in list
                 */

                error = dmu_object_info(zfsvfs->z_os,
                 zap.za_first_integer, &doi);
                if (error != 0)
                        continue;

                ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
                 (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
                /*
                 * We need to re-mark these list entries for deletion,
                 * so we pull them back into core and set zp->z_unlinked.
                 */
                error = zfs_zget(zfsvfs, zap.za_first_integer, &zp);

                /*
                 * We may pick up znodes that are already marked for deletion.
                 * This could happen during the purge of an extended attribute
                 * directory. All we need to do is skip over them, since they
                 * are already in the system marked z_unlinked.
                 */
                if (error != 0)
                        continue;

                zp->z_unlinked = B_TRUE;
                VN_RELE(ZTOV(zp));
        }
        zap_cursor_fini(&zc);
        printf("ZFS: unlinked drain completed.\n");
}
Beispiel #3
0
/*
 * Set ZFS_PROP_VOLSIZE set entry point.
 */
int
zvol_set_volsize(const char *name, uint64_t volsize)
{
	zvol_state_t *zv;
	dmu_object_info_t *doi;
	objset_t *os = NULL;
	uint64_t readonly;
	int error;

	mutex_enter(&zvol_state_lock);

	zv = zvol_find_by_name(name);
	if (zv == NULL) {
		error = ENXIO;
		goto out;
	}

	doi = kmem_alloc(sizeof(dmu_object_info_t), KM_SLEEP);

	error = dmu_objset_hold(name, FTAG, &os);
	if (error)
		goto out_doi;

	if ((error = dmu_object_info(os, ZVOL_OBJ, doi)) != 0 ||
	    (error = zvol_check_volsize(volsize,doi->doi_data_block_size)) != 0)
		goto out_doi;

	VERIFY(dsl_prop_get_integer(name, "readonly", &readonly, NULL) == 0);
	if (readonly) {
		error = EROFS;
		goto out_doi;
	}

	if (get_disk_ro(zv->zv_disk) || (zv->zv_flags & ZVOL_RDONLY)) {
		error = EROFS;
		goto out_doi;
	}

	error = zvol_update_volsize(zv, volsize, os);
out_doi:
	kmem_free(doi, sizeof(dmu_object_info_t));
out:
	if (os)
		dmu_objset_rele(os, FTAG);

	mutex_exit(&zvol_state_lock);

	return (error);
}
Beispiel #4
0
int
zvol_get_stats(zfs_cmd_t *zc, objset_t *os)
{
	int error;
	dmu_object_info_t doi;

	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &zc->zc_volsize);

	if (error)
		return (error);

	error = dmu_object_info(os, ZVOL_OBJ, &doi);

	if (error == 0)
		zc->zc_volblocksize = doi.doi_data_block_size;

	return (error);
}
Beispiel #5
0
int
zvol_get_stats(objset_t *os, nvlist_t *nv)
{
	int error;
	dmu_object_info_t doi;
	uint64_t val;

	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val);
	if (error)
		return (error);

	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val);

	error = dmu_object_info(os, ZVOL_OBJ, &doi);

	if (error == 0) {
		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE,
		    doi.doi_data_block_size);
	}

	return (error);
}
Beispiel #6
0
/*
 * zfs_init_fs - Initialize the zfsvfs struct and the file system
 *	incore "master" object.  Verify version compatibility.
 */
int
zfs_init_fs(zfsvfs_t *zfsvfs, znode_t **zpp, cred_t *cr)
{
	extern int zfsfstype;

	objset_t	*os = zfsvfs->z_os;
	int		i, error;
	dmu_object_info_t doi;
	uint64_t fsid_guid;
	uint64_t zval;

	*zpp = NULL;

	/*
	 * XXX - hack to auto-create the pool root filesystem at
	 * the first attempted mount.
	 */
	if (dmu_object_info(os, MASTER_NODE_OBJ, &doi) == ENOENT) {
		dmu_tx_t *tx = dmu_tx_create(os);
		uint64_t zpl_version;
		nvlist_t *zprops;

		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL); /* master */
		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL); /* del queue */
		dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); /* root node */
		error = dmu_tx_assign(tx, TXG_WAIT);
		ASSERT3U(error, ==, 0);
		if (spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID)
			zpl_version = ZPL_VERSION;
		else
			zpl_version = ZPL_VERSION_FUID - 1;

		VERIFY(nvlist_alloc(&zprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
		VERIFY(nvlist_add_uint64(zprops,
		    zfs_prop_to_name(ZFS_PROP_VERSION), zpl_version) == 0);
		zfs_create_fs(os, cr, zprops, tx);
		nvlist_free(zprops);
		dmu_tx_commit(tx);
	}
Beispiel #7
0
Datei: zvol.c Projekt: alek-p/zfs
/*
 * ZFS_IOC_OBJSET_STATS entry point.
 */
int
zvol_get_stats(objset_t *os, nvlist_t *nv)
{
	int error;
	dmu_object_info_t *doi;
	uint64_t val;

	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val);
	if (error)
		return (SET_ERROR(error));

	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val);
	doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP);
	error = dmu_object_info(os, ZVOL_OBJ, doi);

	if (error == 0) {
		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE,
		    doi->doi_data_block_size);
	}

	kmem_free(doi, sizeof (dmu_object_info_t));

	return (SET_ERROR(error));
}
Beispiel #8
0
static int
zfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap)
{
	char *name = NULL;		/* location determined later */
	char *link;			/* symlink content follows name */
	znode_t *dzp;
	vnode_t *vp = NULL;
	xvattr_t xva;
	int vflg = 0;
	size_t lrsize = sizeof (lr_create_t);
	lr_attr_t *lrattr;
	void *start;
	size_t xvatlen;
	uint64_t txtype;
	struct componentname cn;
	int error;

	txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
	if (byteswap) {
		byteswap_uint64_array(lr, sizeof (*lr));
		if (txtype == TX_CREATE_ATTR || txtype == TX_MKDIR_ATTR)
			zfs_replay_swap_attrs((lr_attr_t *)(lr + 1));
	}


	if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
		return (error);

	xva_init(&xva);
	zfs_init_vattr(&xva.xva_vattr, AT_TYPE | AT_MODE | AT_UID | AT_GID,
	    lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid);

	/*
	 * All forms of zfs create (create, mkdir, mkxattrdir, symlink)
	 * eventually end up in zfs_mknode(), which assigns the object's
	 * creation time and generation number.  The generic VOP_CREATE()
	 * doesn't have either concept, so we smuggle the values inside
	 * the vattr's otherwise unused va_ctime and va_nblocks fields.
	 */
	ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime);
	xva.xva_vattr.va_nblocks = lr->lr_gen;

	error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL);
	if (error != ENOENT)
		goto out;

	if (lr->lr_common.lrc_txtype & TX_CI)
		vflg |= FIGNORECASE;

	/*
	 * Symlinks don't have fuid info, and CIFS never creates
	 * symlinks.
	 *
	 * The _ATTR versions will grab the fuid info in their subcases.
	 */
	if ((int)lr->lr_common.lrc_txtype != TX_SYMLINK &&
	    (int)lr->lr_common.lrc_txtype != TX_MKDIR_ATTR &&
	    (int)lr->lr_common.lrc_txtype != TX_CREATE_ATTR) {
		start = (lr + 1);
		zfsvfs->z_fuid_replay =
		    zfs_replay_fuid_domain(start, &start,
		    lr->lr_uid, lr->lr_gid);
	}

	cn.cn_cred = kcred;
	cn.cn_thread = curthread;
	cn.cn_flags = SAVENAME;

	vn_lock(ZTOV(dzp), LK_EXCLUSIVE | LK_RETRY);
	switch (txtype) {
	case TX_CREATE_ATTR:
		lrattr = (lr_attr_t *)(caddr_t)(lr + 1);
		xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
		zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva);
		start = (caddr_t)(lr + 1) + xvatlen;
		zfsvfs->z_fuid_replay =
		    zfs_replay_fuid_domain(start, &start,
		    lr->lr_uid, lr->lr_gid);
		name = (char *)start;

		/*FALLTHROUGH*/
	case TX_CREATE:
		if (name == NULL)
			name = (char *)start;

		cn.cn_nameptr = name;
		error = VOP_CREATE(ZTOV(dzp), &vp, &cn, &xva.xva_vattr /*,vflg*/);
		break;
	case TX_MKDIR_ATTR:
		lrattr = (lr_attr_t *)(caddr_t)(lr + 1);
		xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
		zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva);
		start = (caddr_t)(lr + 1) + xvatlen;
		zfsvfs->z_fuid_replay =
		    zfs_replay_fuid_domain(start, &start,
		    lr->lr_uid, lr->lr_gid);
		name = (char *)start;

		/*FALLTHROUGH*/
	case TX_MKDIR:
		if (name == NULL)
			name = (char *)(lr + 1);

		cn.cn_nameptr = name;
		error = VOP_MKDIR(ZTOV(dzp), &vp, &cn, &xva.xva_vattr /*,vflg*/);
		break;
	case TX_MKXATTR:
		error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &vp, kcred);
		break;
	case TX_SYMLINK:
		name = (char *)(lr + 1);
		link = name + strlen(name) + 1;
		cn.cn_nameptr = name;
		error = VOP_SYMLINK(ZTOV(dzp), &vp, &cn, &xva.xva_vattr, link /*,vflg*/);
		break;
	default:
		error = ENOTSUP;
	}
	VOP_UNLOCK(ZTOV(dzp), 0);

out:
	if (error == 0 && vp != NULL)
		VN_URELE(vp);

	VN_RELE(ZTOV(dzp));

	if (zfsvfs->z_fuid_replay)
		zfs_fuid_info_free(zfsvfs->z_fuid_replay);
	zfsvfs->z_fuid_replay = NULL;
	return (error);
}
Beispiel #9
0
/*
 * Replay file create with optional ACL, xvattr information as well
 * as option FUID information.
 */
static int
zfs_replay_create_acl(zfsvfs_t *zfsvfs,
    lr_acl_create_t *lracl, boolean_t byteswap)
{
	char *name = NULL;		/* location determined later */
	lr_create_t *lr = (lr_create_t *)lracl;
	znode_t *dzp;
	vnode_t *vp = NULL;
	xvattr_t xva;
	int vflg = 0;
	vsecattr_t vsec = { 0 };
	lr_attr_t *lrattr;
	void *aclstart;
	void *fuidstart;
	size_t xvatlen = 0;
	uint64_t txtype;
	int error;

	txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
	if (byteswap) {
		byteswap_uint64_array(lracl, sizeof (*lracl));
		if (txtype == TX_CREATE_ACL_ATTR ||
		    txtype == TX_MKDIR_ACL_ATTR) {
			lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
			zfs_replay_swap_attrs(lrattr);
			xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
		}

		aclstart = (caddr_t)(lracl + 1) + xvatlen;
		zfs_ace_byteswap(aclstart, lracl->lr_acl_bytes, B_FALSE);
		/* swap fuids */
		if (lracl->lr_fuidcnt) {
			byteswap_uint64_array((caddr_t)aclstart +
			    ZIL_ACE_LENGTH(lracl->lr_acl_bytes),
			    lracl->lr_fuidcnt * sizeof (uint64_t));
		}
	}

	if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
		return (error);

	xva_init(&xva);
	zfs_init_vattr(&xva.xva_vattr, AT_TYPE | AT_MODE | AT_UID | AT_GID,
	    lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid);

	/*
	 * All forms of zfs create (create, mkdir, mkxattrdir, symlink)
	 * eventually end up in zfs_mknode(), which assigns the object's
	 * creation time and generation number.  The generic VOP_CREATE()
	 * doesn't have either concept, so we smuggle the values inside
	 * the vattr's otherwise unused va_ctime and va_nblocks fields.
	 */
	ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime);
	xva.xva_vattr.va_nblocks = lr->lr_gen;

	error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL);
	if (error != ENOENT)
		goto bail;

	if (lr->lr_common.lrc_txtype & TX_CI)
		vflg |= FIGNORECASE;
	switch (txtype) {
	case TX_CREATE_ACL:
		aclstart = (caddr_t)(lracl + 1);
		fuidstart = (caddr_t)aclstart +
		    ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
		zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart,
		    (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
		    lr->lr_uid, lr->lr_gid);
		/*FALLTHROUGH*/
	case TX_CREATE_ACL_ATTR:
		if (name == NULL) {
			lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
			xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
			xva.xva_vattr.va_mask |= AT_XVATTR;
			zfs_replay_xvattr(lrattr, &xva);
		}
		vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS;
		vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen;
		vsec.vsa_aclcnt = lracl->lr_aclcnt;
		vsec.vsa_aclentsz = lracl->lr_acl_bytes;
		vsec.vsa_aclflags = lracl->lr_acl_flags;
		if (zfsvfs->z_fuid_replay == NULL) {
			fuidstart = (caddr_t)(lracl + 1) + xvatlen +
			    ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
			zfsvfs->z_fuid_replay =
			    zfs_replay_fuids(fuidstart,
			    (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
			    lr->lr_uid, lr->lr_gid);
		}

#ifdef TODO
		error = VOP_CREATE(ZTOV(dzp), name, &xva.xva_vattr,
		    0, 0, &vp, kcred, vflg, NULL, &vsec);
#else
		panic("%s:%u: unsupported condition", __func__, __LINE__);
#endif
		break;
	case TX_MKDIR_ACL:
		aclstart = (caddr_t)(lracl + 1);
		fuidstart = (caddr_t)aclstart +
		    ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
		zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart,
		    (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
		    lr->lr_uid, lr->lr_gid);
		/*FALLTHROUGH*/
	case TX_MKDIR_ACL_ATTR:
		if (name == NULL) {
			lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
			xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
			zfs_replay_xvattr(lrattr, &xva);
		}
		vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS;
		vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen;
		vsec.vsa_aclcnt = lracl->lr_aclcnt;
		vsec.vsa_aclentsz = lracl->lr_acl_bytes;
		vsec.vsa_aclflags = lracl->lr_acl_flags;
		if (zfsvfs->z_fuid_replay == NULL) {
			fuidstart = (caddr_t)(lracl + 1) + xvatlen +
			    ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
			zfsvfs->z_fuid_replay =
			    zfs_replay_fuids(fuidstart,
			    (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
			    lr->lr_uid, lr->lr_gid);
		}
#ifdef TODO
		error = VOP_MKDIR(ZTOV(dzp), name, &xva.xva_vattr,
		    &vp, kcred, NULL, vflg, &vsec);
#else
		panic("%s:%u: unsupported condition", __func__, __LINE__);
#endif
		break;
	default:
		error = ENOTSUP;
	}

bail:
	if (error == 0 && vp != NULL)
		VN_RELE(vp);

	VN_RELE(ZTOV(dzp));

	if (zfsvfs->z_fuid_replay)
		zfs_fuid_info_free(zfsvfs->z_fuid_replay);
	zfsvfs->z_fuid_replay = NULL;

	return (error);
}
Beispiel #10
0
Datei: zvol.c Projekt: alek-p/zfs
/*
 * Create a block device minor node and setup the linkage between it
 * and the specified volume.  Once this function returns the block
 * device is live and ready for use.
 */
static int
zvol_create_minor_impl(const char *name)
{
	zvol_state_t *zv;
	objset_t *os;
	dmu_object_info_t *doi;
	uint64_t volsize;
	uint64_t len;
	unsigned minor = 0;
	int error = 0;

	mutex_enter(&zvol_state_lock);

	zv = zvol_find_by_name(name);
	if (zv) {
		error = SET_ERROR(EEXIST);
		goto out;
	}

	doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP);

	error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os);
	if (error)
		goto out_doi;

	error = dmu_object_info(os, ZVOL_OBJ, doi);
	if (error)
		goto out_dmu_objset_disown;

	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
	if (error)
		goto out_dmu_objset_disown;

	error = zvol_find_minor(&minor);
	if (error)
		goto out_dmu_objset_disown;

	zv = zvol_alloc(MKDEV(zvol_major, minor), name);
	if (zv == NULL) {
		error = SET_ERROR(EAGAIN);
		goto out_dmu_objset_disown;
	}

	if (dmu_objset_is_snapshot(os))
		zv->zv_flags |= ZVOL_RDONLY;

	zv->zv_volblocksize = doi->doi_data_block_size;
	zv->zv_volsize = volsize;
	zv->zv_objset = os;

	set_capacity(zv->zv_disk, zv->zv_volsize >> 9);

	blk_queue_max_hw_sectors(zv->zv_queue, (DMU_MAX_ACCESS / 4) >> 9);
	blk_queue_max_segments(zv->zv_queue, UINT16_MAX);
	blk_queue_max_segment_size(zv->zv_queue, UINT_MAX);
	blk_queue_physical_block_size(zv->zv_queue, zv->zv_volblocksize);
	blk_queue_io_opt(zv->zv_queue, zv->zv_volblocksize);
	blk_queue_max_discard_sectors(zv->zv_queue,
	    (zvol_max_discard_blocks * zv->zv_volblocksize) >> 9);
	blk_queue_discard_granularity(zv->zv_queue, zv->zv_volblocksize);
	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zv->zv_queue);
#ifdef QUEUE_FLAG_NONROT
	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zv->zv_queue);
#endif
#ifdef QUEUE_FLAG_ADD_RANDOM
	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zv->zv_queue);
#endif

	if (spa_writeable(dmu_objset_spa(os))) {
		if (zil_replay_disable)
			zil_destroy(dmu_objset_zil(os), B_FALSE);
		else
			zil_replay(os, zv, zvol_replay_vector);
	}

	/*
	 * When udev detects the addition of the device it will immediately
	 * invoke blkid(8) to determine the type of content on the device.
	 * Prefetching the blocks commonly scanned by blkid(8) will speed
	 * up this process.
	 */
	len = MIN(MAX(zvol_prefetch_bytes, 0), SPA_MAXBLOCKSIZE);
	if (len > 0) {
		dmu_prefetch(os, ZVOL_OBJ, 0, 0, len, ZIO_PRIORITY_SYNC_READ);
		dmu_prefetch(os, ZVOL_OBJ, 0, volsize - len, len,
			ZIO_PRIORITY_SYNC_READ);
	}

	zv->zv_objset = NULL;
out_dmu_objset_disown:
	dmu_objset_disown(os, zvol_tag);
out_doi:
	kmem_free(doi, sizeof (dmu_object_info_t));
out:

	if (error == 0) {
		zvol_insert(zv);
		/*
		 * Drop the lock to prevent deadlock with sys_open() ->
		 * zvol_open(), which first takes bd_disk->bd_mutex and then
		 * takes zvol_state_lock, whereas this code path first takes
		 * zvol_state_lock, and then takes bd_disk->bd_mutex.
		 */
		mutex_exit(&zvol_state_lock);
		add_disk(zv->zv_disk);
	} else {
		mutex_exit(&zvol_state_lock);
	}

	return (SET_ERROR(error));
}
Beispiel #11
0
static int
__zvol_create_minor(const char *name)
{
	zvol_state_t *zv;
	objset_t *os;
	dmu_object_info_t *doi;
	uint64_t volsize;
	unsigned minor = 0;
	int error = 0;

	ASSERT(MUTEX_HELD(&zvol_state_lock));

	zv = zvol_find_by_name(name);
	if (zv) {
		error = EEXIST;
		goto out;
	}

	doi = kmem_alloc(sizeof(dmu_object_info_t), KM_SLEEP);

	error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os);
	if (error)
		goto out_doi;

    /* Make sure we have the key loaded if we need one. */
    error = dsl_crypto_key_inherit(name);
    if (error != 0 && error != EEXIST)
		goto out_dmu_objset_disown;

	error = dmu_object_info(os, ZVOL_OBJ, doi);
	if (error)
		goto out_dmu_objset_disown;

	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
	if (error)
		goto out_dmu_objset_disown;

	error = zvol_find_minor(&minor);
	if (error)
		goto out_dmu_objset_disown;

	zv = zvol_alloc(MKDEV(zvol_major, minor), name);
	if (zv == NULL) {
		error = EAGAIN;
		goto out_dmu_objset_disown;
	}

	if (dmu_objset_is_snapshot(os))
		zv->zv_flags |= ZVOL_RDONLY;

	zv->zv_volblocksize = doi->doi_data_block_size;
	zv->zv_volsize = volsize;
	zv->zv_objset = os;

	set_capacity(zv->zv_disk, zv->zv_volsize >> 9);

	blk_queue_max_hw_sectors(zv->zv_queue, UINT_MAX);
	blk_queue_max_segments(zv->zv_queue, UINT16_MAX);
	blk_queue_max_segment_size(zv->zv_queue, UINT_MAX);
	blk_queue_physical_block_size(zv->zv_queue, zv->zv_volblocksize);
	blk_queue_io_opt(zv->zv_queue, zv->zv_volblocksize);
#ifdef HAVE_BLK_QUEUE_DISCARD
	blk_queue_max_discard_sectors(zv->zv_queue,
	    (zvol_max_discard_blocks * zv->zv_volblocksize) >> 9);
	blk_queue_discard_granularity(zv->zv_queue, zv->zv_volblocksize);
	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zv->zv_queue);
#endif
#ifdef HAVE_BLK_QUEUE_NONROT
	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zv->zv_queue);
#endif

	if (zil_replay_disable)
		zil_destroy(dmu_objset_zil(os), B_FALSE);
	else
		zil_replay(os, zv, zvol_replay_vector);

out_dmu_objset_disown:
	dmu_objset_disown(os, zvol_tag);
	zv->zv_objset = NULL;
out_doi:
	kmem_free(doi, sizeof(dmu_object_info_t));
out:

	if (error == 0) {
		zvol_insert(zv);
		add_disk(zv->zv_disk);
	}

	return (error);
}
Beispiel #12
0
static int
zfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap)
{
	char *name = NULL;		/* location determined later */
	char *link;			/* symlink content follows name */
	znode_t *dzp;
	struct inode *ip = NULL;
	xvattr_t xva;
	int vflg = 0;
	size_t lrsize = sizeof (lr_create_t);
	lr_attr_t *lrattr;
	void *start;
	size_t xvatlen;
	uint64_t txtype;
	uint64_t objid;
	uint64_t dnodesize;
	int error;

	txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
	if (byteswap) {
		byteswap_uint64_array(lr, sizeof (*lr));
		if (txtype == TX_CREATE_ATTR || txtype == TX_MKDIR_ATTR)
			zfs_replay_swap_attrs((lr_attr_t *)(lr + 1));
	}


	if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
		return (error);

	objid = LR_FOID_GET_OBJ(lr->lr_foid);
	dnodesize = LR_FOID_GET_SLOTS(lr->lr_foid) << DNODE_SHIFT;

	xva_init(&xva);
	zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID,
	    lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, objid);

	/*
	 * All forms of zfs create (create, mkdir, mkxattrdir, symlink)
	 * eventually end up in zfs_mknode(), which assigns the object's
	 * creation time, generation number, and dnode slot count. The
	 * generic zfs_create() has no concept of these attributes, so
	 * we smuggle the values inside * the vattr's otherwise unused
	 * va_ctime, va_nblocks, and va_nlink fields.
	 */
	ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime);
	xva.xva_vattr.va_nblocks = lr->lr_gen;
	xva.xva_vattr.va_fsid = dnodesize;

	error = dmu_object_info(zfsvfs->z_os, objid, NULL);
	if (error != ENOENT)
		goto out;

	if (lr->lr_common.lrc_txtype & TX_CI)
		vflg |= FIGNORECASE;

	/*
	 * Symlinks don't have fuid info, and CIFS never creates
	 * symlinks.
	 *
	 * The _ATTR versions will grab the fuid info in their subcases.
	 */
	if ((int)lr->lr_common.lrc_txtype != TX_SYMLINK &&
	    (int)lr->lr_common.lrc_txtype != TX_MKDIR_ATTR &&
	    (int)lr->lr_common.lrc_txtype != TX_CREATE_ATTR) {
		start = (lr + 1);
		zfsvfs->z_fuid_replay =
		    zfs_replay_fuid_domain(start, &start,
		    lr->lr_uid, lr->lr_gid);
	}

	switch (txtype) {
	case TX_CREATE_ATTR:
		lrattr = (lr_attr_t *)(caddr_t)(lr + 1);
		xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
		zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva);
		start = (caddr_t)(lr + 1) + xvatlen;
		zfsvfs->z_fuid_replay =
		    zfs_replay_fuid_domain(start, &start,
		    lr->lr_uid, lr->lr_gid);
		name = (char *)start;

		/*FALLTHROUGH*/
	case TX_CREATE:
		if (name == NULL)
			name = (char *)start;

		error = zfs_create(ZTOI(dzp), name, &xva.xva_vattr,
		    0, 0, &ip, kcred, vflg, NULL);
		break;
	case TX_MKDIR_ATTR:
		lrattr = (lr_attr_t *)(caddr_t)(lr + 1);
		xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
		zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva);
		start = (caddr_t)(lr + 1) + xvatlen;
		zfsvfs->z_fuid_replay =
		    zfs_replay_fuid_domain(start, &start,
		    lr->lr_uid, lr->lr_gid);
		name = (char *)start;

		/*FALLTHROUGH*/
	case TX_MKDIR:
		if (name == NULL)
			name = (char *)(lr + 1);

		error = zfs_mkdir(ZTOI(dzp), name, &xva.xva_vattr,
		    &ip, kcred, vflg, NULL);
		break;
	case TX_MKXATTR:
		error = zfs_make_xattrdir(dzp, &xva.xva_vattr, &ip, kcred);
		break;
	case TX_SYMLINK:
		name = (char *)(lr + 1);
		link = name + strlen(name) + 1;
		error = zfs_symlink(ZTOI(dzp), name, &xva.xva_vattr,
		    link, &ip, kcred, vflg);
		break;
	default:
		error = SET_ERROR(ENOTSUP);
	}

out:
	if (error == 0 && ip != NULL)
		iput(ip);

	iput(ZTOI(dzp));

	if (zfsvfs->z_fuid_replay)
		zfs_fuid_info_free(zfsvfs->z_fuid_replay);
	zfsvfs->z_fuid_replay = NULL;
	return (error);
}
Beispiel #13
0
/*
 * Replay file create with optional ACL, xvattr information as well
 * as option FUID information.
 */
static int
zfs_replay_create_acl(zfsvfs_t *zfsvfs,
    lr_acl_create_t *lracl, boolean_t byteswap)
{
	char *name = NULL;		/* location determined later */
	lr_create_t *lr = (lr_create_t *)lracl;
	znode_t *dzp;
	struct inode *ip = NULL;
	xvattr_t xva;
	int vflg = 0;
	vsecattr_t vsec = { 0 };
	lr_attr_t *lrattr;
	void *aclstart;
	void *fuidstart;
	size_t xvatlen = 0;
	uint64_t txtype;
	uint64_t objid;
	uint64_t dnodesize;
	int error;

	txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
	if (byteswap) {
		byteswap_uint64_array(lracl, sizeof (*lracl));
		if (txtype == TX_CREATE_ACL_ATTR ||
		    txtype == TX_MKDIR_ACL_ATTR) {
			lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
			zfs_replay_swap_attrs(lrattr);
			xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
		}

		aclstart = (caddr_t)(lracl + 1) + xvatlen;
		zfs_ace_byteswap(aclstart, lracl->lr_acl_bytes, B_FALSE);
		/* swap fuids */
		if (lracl->lr_fuidcnt) {
			byteswap_uint64_array((caddr_t)aclstart +
			    ZIL_ACE_LENGTH(lracl->lr_acl_bytes),
			    lracl->lr_fuidcnt * sizeof (uint64_t));
		}
	}

	if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
		return (error);

	objid = LR_FOID_GET_OBJ(lr->lr_foid);
	dnodesize = LR_FOID_GET_SLOTS(lr->lr_foid) << DNODE_SHIFT;

	xva_init(&xva);
	zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID,
	    lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, objid);

	/*
	 * All forms of zfs create (create, mkdir, mkxattrdir, symlink)
	 * eventually end up in zfs_mknode(), which assigns the object's
	 * creation time, generation number, and dnode size. The generic
	 * zfs_create() has no concept of these attributes, so we smuggle
	 * the values inside the vattr's otherwise unused va_ctime,
	 * va_nblocks, and va_fsid fields.
	 */
	ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime);
	xva.xva_vattr.va_nblocks = lr->lr_gen;
	xva.xva_vattr.va_fsid = dnodesize;

	error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL);
	if (error != ENOENT)
		goto bail;

	if (lr->lr_common.lrc_txtype & TX_CI)
		vflg |= FIGNORECASE;
	switch (txtype) {
	case TX_CREATE_ACL:
		aclstart = (caddr_t)(lracl + 1);
		fuidstart = (caddr_t)aclstart +
		    ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
		zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart,
		    (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
		    lr->lr_uid, lr->lr_gid);
		/*FALLTHROUGH*/
	case TX_CREATE_ACL_ATTR:
		if (name == NULL) {
			lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
			xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
			xva.xva_vattr.va_mask |= ATTR_XVATTR;
			zfs_replay_xvattr(lrattr, &xva);
		}
		vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS;
		vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen;
		vsec.vsa_aclcnt = lracl->lr_aclcnt;
		vsec.vsa_aclentsz = lracl->lr_acl_bytes;
		vsec.vsa_aclflags = lracl->lr_acl_flags;
		if (zfsvfs->z_fuid_replay == NULL) {
			fuidstart = (caddr_t)(lracl + 1) + xvatlen +
			    ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
			zfsvfs->z_fuid_replay =
			    zfs_replay_fuids(fuidstart,
			    (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
			    lr->lr_uid, lr->lr_gid);
		}

		error = zfs_create(ZTOI(dzp), name, &xva.xva_vattr,
		    0, 0, &ip, kcred, vflg, &vsec);
		break;
	case TX_MKDIR_ACL:
		aclstart = (caddr_t)(lracl + 1);
		fuidstart = (caddr_t)aclstart +
		    ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
		zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart,
		    (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
		    lr->lr_uid, lr->lr_gid);
		/*FALLTHROUGH*/
	case TX_MKDIR_ACL_ATTR:
		if (name == NULL) {
			lrattr = (lr_attr_t *)(caddr_t)(lracl + 1);
			xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
			zfs_replay_xvattr(lrattr, &xva);
		}
		vsec.vsa_mask = VSA_ACE | VSA_ACE_ACLFLAGS;
		vsec.vsa_aclentp = (caddr_t)(lracl + 1) + xvatlen;
		vsec.vsa_aclcnt = lracl->lr_aclcnt;
		vsec.vsa_aclentsz = lracl->lr_acl_bytes;
		vsec.vsa_aclflags = lracl->lr_acl_flags;
		if (zfsvfs->z_fuid_replay == NULL) {
			fuidstart = (caddr_t)(lracl + 1) + xvatlen +
			    ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
			zfsvfs->z_fuid_replay =
			    zfs_replay_fuids(fuidstart,
			    (void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
			    lr->lr_uid, lr->lr_gid);
		}
		error = zfs_mkdir(ZTOI(dzp), name, &xva.xva_vattr,
		    &ip, kcred, vflg, &vsec);
		break;
	default:
		error = SET_ERROR(ENOTSUP);
	}

bail:
	if (error == 0 && ip != NULL)
		iput(ip);

	iput(ZTOI(dzp));

	if (zfsvfs->z_fuid_replay)
		zfs_fuid_info_free(zfsvfs->z_fuid_replay);
	zfsvfs->z_fuid_replay = NULL;

	return (error);
}
Beispiel #14
0
int
zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t len,
    char *addr)
{
	dmu_object_info_t doi;
	ssize_t nbytes;
	itx_t *itx;
	lr_write_t *lr;
	objset_t *os;
	dmu_buf_t *db;
	uint64_t txg;
	uint64_t boff;
	int error;
	uint32_t blocksize;

	/* handle common case */
	if (len <= zvol_immediate_write_sz) {
		itx = zvol_immediate_itx(off, len, addr);
		(void) zil_itx_assign(zv->zv_zilog, itx, tx);
		return (0);
	}

	txg = dmu_tx_get_txg(tx);
	os = zv->zv_objset;

	/*
	 * We need to dmu_sync() each block in the range.
	 * For this we need the blocksize.
	 */
	error = dmu_object_info(os, ZVOL_OBJ, &doi);
	if (error)
		return (error);
	blocksize = doi.doi_data_block_size;

	/*
	 * We need to immediate write or dmu_sync() each block in the range.
	 */
	while (len) {
		nbytes = MIN(len, blocksize - P2PHASE(off, blocksize));
		if (nbytes <= zvol_immediate_write_sz) {
			itx = zvol_immediate_itx(off, nbytes, addr);
		} else {
			boff =  P2ALIGN_TYPED(off, blocksize, uint64_t);
			itx = zil_itx_create(TX_WRITE, sizeof (*lr));
			lr = (lr_write_t *)&itx->itx_lr;
			lr->lr_foid = ZVOL_OBJ;
			lr->lr_offset = off;
			lr->lr_length = nbytes;
			lr->lr_blkoff = off - boff;
			BP_ZERO(&lr->lr_blkptr);

			/* XXX - we should do these IOs in parallel */
			VERIFY(0 == dmu_buf_hold(os, ZVOL_OBJ, boff,
			    FTAG, &db));
			ASSERT(boff == db->db_offset);
			error = dmu_sync(NULL, db, &lr->lr_blkptr,
			    txg, NULL, NULL);
			dmu_buf_rele(db, FTAG);
			if (error) {
				kmem_free(itx, offsetof(itx_t, itx_lr));
				return (error);
			}
			itx->itx_wr_state = WR_COPIED;
		}
		(void) zil_itx_assign(zv->zv_zilog, itx, tx);
		len -= nbytes;
		off += nbytes;
	}
	return (0);
}
Beispiel #15
0
static int
__zvol_create_minor(const char *name)
{
	zvol_state_t *zv;
	objset_t *os;
	dmu_object_info_t *doi;
	uint64_t volsize;
	unsigned minor = 0;
	int error = 0;

	ASSERT(MUTEX_HELD(&zvol_state_lock));

	zv = zvol_find_by_name(name);
	if (zv) {
		error = EEXIST;
		goto out;
	}

	doi = kmem_alloc(sizeof(dmu_object_info_t), KM_SLEEP);

	error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os);
	if (error)
		goto out_doi;

	error = dmu_object_info(os, ZVOL_OBJ, doi);
	if (error)
		goto out_dmu_objset_disown;

	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
	if (error)
		goto out_dmu_objset_disown;

	error = zvol_find_minor(&minor);
	if (error)
		goto out_dmu_objset_disown;

	zv = zvol_alloc(MKDEV(zvol_major, minor), name);
	if (zv == NULL) {
		error = EAGAIN;
		goto out_dmu_objset_disown;
	}

	if (dmu_objset_is_snapshot(os))
		zv->zv_flags |= ZVOL_RDONLY;

	zv->zv_volblocksize = doi->doi_data_block_size;
	zv->zv_volsize = volsize;
	zv->zv_objset = os;

	set_capacity(zv->zv_disk, zv->zv_volsize >> 9);

	if (zil_replay_disable)
		zil_destroy(dmu_objset_zil(os), B_FALSE);
	else
		zil_replay(os, zv, zvol_replay_vector);

out_dmu_objset_disown:
	dmu_objset_disown(os, zvol_tag);
	zv->zv_objset = NULL;
out_doi:
	kmem_free(doi, sizeof(dmu_object_info_t));
out:

	if (error == 0) {
		zvol_insert(zv);
		add_disk(zv->zv_disk);
	}

	return (error);
}