Пример #1
0
Файл: zvol.c Проект: alek-p/zfs
static int
zvol_first_open(zvol_state_t *zv)
{
	objset_t *os;
	uint64_t volsize;
	int error;
	uint64_t ro;

	/* lie and say we're read-only */
	error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zvol_tag, &os);
	if (error)
		return (SET_ERROR(-error));

	zv->zv_objset = os;

	error = dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL);
	if (error)
		goto out_owned;

	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
	if (error)
		goto out_owned;

	error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf);
	if (error)
		goto out_owned;

	set_capacity(zv->zv_disk, volsize >> 9);
	zv->zv_volsize = volsize;
	zv->zv_zilog = zil_open(os, zvol_get_data);

	if (ro || dmu_objset_is_snapshot(os) ||
	    !spa_writeable(dmu_objset_spa(os))) {
		set_disk_ro(zv->zv_disk, 1);
		zv->zv_flags |= ZVOL_RDONLY;
	} else {
		set_disk_ro(zv->zv_disk, 0);
		zv->zv_flags &= ~ZVOL_RDONLY;
	}

out_owned:
	if (error) {
		dmu_objset_disown(os, zvol_tag);
		zv->zv_objset = NULL;
	}

	return (SET_ERROR(-error));
}
Пример #2
0
static int
zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
{
	int error;

	error = zfs_register_callbacks(zfsvfs->z_vfs);
	if (error)
		return (error);

	/*
	 * Set the objset user_ptr to track its zfsvfs.
	 */
	mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock);
	dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
	mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock);

	/*
	 * If we are not mounting (ie: online recv), then we don't
	 * have to worry about replaying the log as we blocked all
	 * operations out since we closed the ZIL.
	 */
	if (mounting) {
		boolean_t readonly;

		/*
		 * During replay we remove the read only flag to
		 * allow replays to succeed.
		 */
		readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY;
		zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;

		/*
		 * Parse and replay the intent log.
		 */
		zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign,
		    zfs_replay_vector, zfs_unlinked_drain);

		zfs_unlinked_drain(zfsvfs);
		zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */
	}

	if (!zil_disable)
		zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);

	return (0);
}
Пример #3
0
int
zfs_sb_setup(zfs_sb_t *zsb, boolean_t mounting)
{
	int error;

	error = zfs_register_callbacks(zsb);
	if (error)
		return (error);

	/*
	 * Set the objset user_ptr to track its zsb.
	 */
	mutex_enter(&zsb->z_os->os_user_ptr_lock);
	dmu_objset_set_user(zsb->z_os, zsb);
	mutex_exit(&zsb->z_os->os_user_ptr_lock);

	zsb->z_log = zil_open(zsb->z_os, zfs_get_data);

	/*
	 * If we are not mounting (ie: online recv), then we don't
	 * have to worry about replaying the log as we blocked all
	 * operations out since we closed the ZIL.
	 */
	if (mounting) {
		boolean_t readonly;

		/*
		 * During replay we remove the read only flag to
		 * allow replays to succeed.
		 */
		readonly = zfs_is_readonly(zsb);
		if (readonly != 0)
			readonly_changed_cb(zsb, B_FALSE);
		else
			zfs_unlinked_drain(zsb);

		/*
		 * Parse and replay the intent log.
		 *
		 * Because of ziltest, this must be done after
		 * zfs_unlinked_drain().  (Further note: ziltest
		 * doesn't use readonly mounts, where
		 * zfs_unlinked_drain() isn't called.)  This is because
		 * ziltest causes spa_sync() to think it's committed,
		 * but actually it is not, so the intent log contains
		 * many txg's worth of changes.
		 *
		 * In particular, if object N is in the unlinked set in
		 * the last txg to actually sync, then it could be
		 * actually freed in a later txg and then reallocated
		 * in a yet later txg.  This would write a "create
		 * object N" record to the intent log.  Normally, this
		 * would be fine because the spa_sync() would have
		 * written out the fact that object N is free, before
		 * we could write the "create object N" intent log
		 * record.
		 *
		 * But when we are in ziltest mode, we advance the "open
		 * txg" without actually spa_sync()-ing the changes to
		 * disk.  So we would see that object N is still
		 * allocated and in the unlinked set, and there is an
		 * intent log record saying to allocate it.
		 */
		if (spa_writeable(dmu_objset_spa(zsb->z_os))) {
			if (zil_replay_disable) {
				zil_destroy(zsb->z_log, B_FALSE);
			} else {
				zsb->z_replay = B_TRUE;
				zil_replay(zsb->z_os, zsb,
				    zfs_replay_vector);
				zsb->z_replay = B_FALSE;
			}
		}

		/* restore readonly bit */
		if (readonly != 0)
			readonly_changed_cb(zsb, B_TRUE);
	}

	return (0);
}
Пример #4
0
static int
zfs_domount(struct mount *mp, dev_t mount_dev, char *osname, vfs_context_t ctx)
{
	uint64_t readonly;
	int error = 0;
	int mode;
	zfsvfs_t *zfsvfs;
	znode_t *zp = NULL;
	struct timeval tv;

	ASSERT(mp);
	ASSERT(osname);

	/*
	 * Initialize the zfs-specific filesystem structure.
	 * Should probably make this a kmem cache, shuffle fields,
	 * and just bzero up to z_hold_mtx[].
	 */
	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
	zfsvfs->z_vfs = mp;
	zfsvfs->z_parent = zfsvfs;
	zfsvfs->z_assign = TXG_NOWAIT;
	zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
	zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;

	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
	    offsetof(znode_t, z_link_node));
	rw_init(&zfsvfs->z_unmount_lock, NULL, RW_DEFAULT, NULL);
	rw_init(&zfsvfs->z_unmount_inactive_lock, NULL, RW_DEFAULT, NULL);
#ifndef __APPLE__
	/* Initialize the generic filesystem structure. */
	vfsp->vfs_bcount = 0;
	vfsp->vfs_data = NULL;

	if (zfs_create_unique_device(&mount_dev) == -1) {
		error = ENODEV;
		goto out;
	}
	ASSERT(vfs_devismounted(mount_dev) == 0);
#endif

	vfs_setfsprivate(mp, zfsvfs);

	if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL))
		goto out;

	if (readonly) {
		mode = DS_MODE_PRIMARY | DS_MODE_READONLY;
		vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_RDONLY));
	} else {
		mode = DS_MODE_PRIMARY;
	}
	error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os);
	if (error == EROFS) {
		mode = DS_MODE_PRIMARY | DS_MODE_READONLY;
		error = dmu_objset_open(osname, DMU_OST_ZFS, mode,
		    &zfsvfs->z_os);
	}

	if (error)
		goto out;

	if (error = zfs_init_fs(zfsvfs, &zp, (cred_t *) vfs_context_ucred(ctx)))
		goto out;

	/* The call to zfs_init_fs leaves the vnode held, release it here. */
	vnode_put(ZTOV(zp));

	if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
		uint64_t xattr;

		ASSERT(mode & DS_MODE_READONLY);
#if 0
		atime_changed_cb(zfsvfs, B_FALSE);
		readonly_changed_cb(zfsvfs, B_TRUE);
		if (error = dsl_prop_get_integer(osname, "xattr", &xattr, NULL))
			goto out;
		xattr_changed_cb(zfsvfs, xattr);
#endif
		zfsvfs->z_issnap = B_TRUE;
	} else {
		
		if (!vfs_isrdonly(mp))
			zfs_unlinked_drain(zfsvfs);

#ifndef __APPLE__
		/*
		 * Parse and replay the intent log.
		 *
		 * Because of ziltest, this must be done after
		 * zfs_unlinked_drain().  (Further note: ziltest doesn't
		 * use readonly mounts, where zfs_unlinked_drain() isn't
		 * called.)  This is because ziltest causes spa_sync()
		 * to think it's committed, but actually it is not, so
		 * the intent log contains many txg's worth of changes.
		 *
		 * In particular, if object N is in the unlinked set in
		 * the last txg to actually sync, then it could be
		 * actually freed in a later txg and then reallocated in
		 * a yet later txg.  This would write a "create object
		 * N" record to the intent log.  Normally, this would be
		 * fine because the spa_sync() would have written out
		 * the fact that object N is free, before we could write
		 * the "create object N" intent log record.
		 *
		 * But when we are in ziltest mode, we advance the "open
		 * txg" without actually spa_sync()-ing the changes to
		 * disk.  So we would see that object N is still
		 * allocated and in the unlinked set, and there is an
		 * intent log record saying to allocate it.
		 */
		zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign,
		    zfs_replay_vector);

		if (!zil_disable)
			zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
#endif
	}

#if 0
	if (!zfsvfs->z_issnap)
		zfsctl_create(zfsvfs);
#endif

	/*
	 * Record the mount time (for Spotlight)
	 */
	microtime(&tv);
	zfsvfs->z_mount_time = tv.tv_sec;
	
out:
	if (error) {
		if (zfsvfs->z_os)
			dmu_objset_close(zfsvfs->z_os);
		mutex_destroy(&zfsvfs->z_znodes_lock);
		list_destroy(&zfsvfs->z_all_znodes);
		rw_destroy(&zfsvfs->z_unmount_lock);
		rw_destroy(&zfsvfs->z_unmount_inactive_lock);
		kmem_free(zfsvfs, sizeof (zfsvfs_t));
	} else {
		OSIncrementAtomic(&zfs_active_fs_count);
		(void) copystr(osname, vfs_statfs(mp)->f_mntfromname, MNAMELEN - 1, 0);
		vfs_getnewfsid(mp);
	}

	return (error);
}
Пример #5
0
static int
zvol_first_open(zvol_state_t *zv)
{
	objset_t *os;
	uint64_t volsize;
	int locked = 0;
	int error;
	uint64_t ro;

	/*
	 * In all other cases the spa_namespace_lock is taken before the
	 * bdev->bd_mutex lock.  But in this case the Linux __blkdev_get()
	 * function calls fops->open() with the bdev->bd_mutex lock held.
	 *
	 * To avoid a potential lock inversion deadlock we preemptively
	 * try to take the spa_namespace_lock().  Normally it will not
	 * be contended and this is safe because spa_open_common() handles
	 * the case where the caller already holds the spa_namespace_lock.
	 *
	 * When it is contended we risk a lock inversion if we were to
	 * block waiting for the lock.  Luckily, the __blkdev_get()
	 * function allows us to return -ERESTARTSYS which will result in
	 * bdev->bd_mutex being dropped, reacquired, and fops->open() being
	 * called again.  This process can be repeated safely until both
	 * locks are acquired.
	 */
	if (!mutex_owned(&spa_namespace_lock)) {
		locked = mutex_tryenter(&spa_namespace_lock);
		if (!locked)
			return (-ERESTARTSYS);
	}

	/* lie and say we're read-only */
	error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zvol_tag, &os);
	if (error)
		goto out_mutex;

	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
	if (error) {
		dmu_objset_disown(os, zvol_tag);
		goto out_mutex;
	}

	zv->zv_objset = os;
	error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf);
	if (error) {
		dmu_objset_disown(os, zvol_tag);
		goto out_mutex;
	}

	set_capacity(zv->zv_disk, volsize >> 9);
	zv->zv_volsize = volsize;
	zv->zv_zilog = zil_open(os, zvol_get_data);

	VERIFY(dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL) == 0);
	if (ro || dmu_objset_is_snapshot(os) ||
	    !spa_writeable(dmu_objset_spa(os))) {
		set_disk_ro(zv->zv_disk, 1);
		zv->zv_flags |= ZVOL_RDONLY;
	} else {
		set_disk_ro(zv->zv_disk, 0);
		zv->zv_flags &= ~ZVOL_RDONLY;
	}

out_mutex:
	if (locked)
		mutex_exit(&spa_namespace_lock);

	return (-error);
}
Пример #6
0
static int
zfs_domount(vfs_t *vfsp, char *osname, cred_t *cr)
{
	dev_t mount_dev;
	uint64_t recordsize, readonly;
	int error = 0;
	int mode;
	zfsvfs_t *zfsvfs;
	znode_t *zp = NULL;

	ASSERT(vfsp);
	ASSERT(osname);

	/*
	 * Initialize the zfs-specific filesystem structure.
	 * Should probably make this a kmem cache, shuffle fields,
	 * and just bzero up to z_hold_mtx[].
	 */
	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
	zfsvfs->z_vfs = vfsp;
	zfsvfs->z_parent = zfsvfs;
	zfsvfs->z_assign = TXG_NOWAIT;
	zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
	zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;

	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
	    offsetof(znode_t, z_link_node));
	rw_init(&zfsvfs->z_um_lock, NULL, RW_DEFAULT, NULL);

	/* Initialize the generic filesystem structure. */
	vfsp->vfs_bcount = 0;
	vfsp->vfs_data = NULL;

	if (zfs_create_unique_device(&mount_dev) == -1) {
		error = ENODEV;
		goto out;
	}
	ASSERT(vfs_devismounted(mount_dev) == 0);

	if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize,
	    NULL))
		goto out;

	vfsp->vfs_dev = mount_dev;
	vfsp->vfs_fstype = zfsfstype;
	vfsp->vfs_bsize = recordsize;
	vfsp->vfs_flag |= VFS_NOTRUNC;
	vfsp->vfs_data = zfsvfs;

	if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL))
		goto out;

	if (readonly)
		mode = DS_MODE_PRIMARY | DS_MODE_READONLY;
	else
		mode = DS_MODE_PRIMARY;

	error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os);
	if (error == EROFS) {
		mode = DS_MODE_PRIMARY | DS_MODE_READONLY;
		error = dmu_objset_open(osname, DMU_OST_ZFS, mode,
		    &zfsvfs->z_os);
	}

	if (error)
		goto out;

	if (error = zfs_init_fs(zfsvfs, &zp, cr))
		goto out;

	/* The call to zfs_init_fs leaves the vnode held, release it here. */
	VN_RELE(ZTOV(zp));

	if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
		ASSERT(mode & DS_MODE_READONLY);
		atime_changed_cb(zfsvfs, B_FALSE);
		readonly_changed_cb(zfsvfs, B_TRUE);
		zfsvfs->z_issnap = B_TRUE;
	} else {
		error = zfs_register_callbacks(vfsp);
		if (error)
			goto out;

		/*
		 * Start a delete thread running.
		 */
		(void) zfs_delete_thread_target(zfsvfs, 1);

		/*
		 * Parse and replay the intent log.
		 */
		zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign,
		    zfs_replay_vector, (void (*)(void *))zfs_delete_wait_empty);

		if (!zil_disable)
			zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
	}

	if (!zfsvfs->z_issnap)
		zfsctl_create(zfsvfs);
out:
	if (error) {
		if (zfsvfs->z_os)
			dmu_objset_close(zfsvfs->z_os);
		kmem_free(zfsvfs, sizeof (zfsvfs_t));
	} else {
		atomic_add_32(&zfs_active_fs_count, 1);
	}

	return (error);

}
Пример #7
0
/*
 * Create a minor node for the specified volume.
 */
int
zvol_create_minor(zfs_cmd_t *zc)
{
	char *name = zc->zc_name;
	dev_t dev = zc->zc_dev;
	zvol_state_t *zv;
	objset_t *os;
	uint64_t volsize;
	minor_t minor = 0;
	struct pathname linkpath;
	int ds_mode = DS_MODE_PRIMARY;
	vnode_t *vp = NULL;
	char *devpath;
	size_t devpathlen = strlen(ZVOL_FULL_DEV_DIR) + 1 + strlen(name) + 1;
	char chrbuf[30], blkbuf[30];
	int error;

	mutex_enter(&zvol_state_lock);

	if ((zv = zvol_minor_lookup(name)) != NULL) {
		mutex_exit(&zvol_state_lock);
		return (EEXIST);
	}

	if (strchr(name, '@') != 0)
		ds_mode |= DS_MODE_READONLY;

	error = dmu_objset_open(name, DMU_OST_ZVOL, ds_mode, &os);

	if (error) {
		mutex_exit(&zvol_state_lock);
		return (error);
	}

	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);

	if (error) {
		dmu_objset_close(os);
		mutex_exit(&zvol_state_lock);
		return (error);
	}

	/*
	 * If there's an existing /dev/zvol symlink, try to use the
	 * same minor number we used last time.
	 */
	devpath = kmem_alloc(devpathlen, KM_SLEEP);

	(void) sprintf(devpath, "%s/%s", ZVOL_FULL_DEV_DIR, name);

	error = lookupname(devpath, UIO_SYSSPACE, NO_FOLLOW, NULL, &vp);

	kmem_free(devpath, devpathlen);

	if (error == 0 && vp->v_type != VLNK)
		error = EINVAL;

	if (error == 0) {
		pn_alloc(&linkpath);
		error = pn_getsymlink(vp, &linkpath, kcred);
		if (error == 0) {
			char *ms = strstr(linkpath.pn_path, ZVOL_PSEUDO_DEV);
			if (ms != NULL) {
				ms += strlen(ZVOL_PSEUDO_DEV);
				minor = stoi(&ms);
			}
		}
		pn_free(&linkpath);
	}

	if (vp != NULL)
		VN_RELE(vp);

	/*
	 * If we found a minor but it's already in use, we must pick a new one.
	 */
	if (minor != 0 && ddi_get_soft_state(zvol_state, minor) != NULL)
		minor = 0;

	if (minor == 0)
		minor = zvol_minor_alloc();

	if (minor == 0) {
		dmu_objset_close(os);
		mutex_exit(&zvol_state_lock);
		return (ENXIO);
	}

	if (ddi_soft_state_zalloc(zvol_state, minor) != DDI_SUCCESS) {
		dmu_objset_close(os);
		mutex_exit(&zvol_state_lock);
		return (EAGAIN);
	}

	(void) ddi_prop_update_string(minor, zfs_dip, ZVOL_PROP_NAME, name);

	(void) sprintf(chrbuf, "%uc,raw", minor);

	if (ddi_create_minor_node(zfs_dip, chrbuf, S_IFCHR,
	    minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
		ddi_soft_state_free(zvol_state, minor);
		dmu_objset_close(os);
		mutex_exit(&zvol_state_lock);
		return (EAGAIN);
	}

	(void) sprintf(blkbuf, "%uc", minor);

	if (ddi_create_minor_node(zfs_dip, blkbuf, S_IFBLK,
	    minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
		ddi_remove_minor_node(zfs_dip, chrbuf);
		ddi_soft_state_free(zvol_state, minor);
		dmu_objset_close(os);
		mutex_exit(&zvol_state_lock);
		return (EAGAIN);
	}

	zv = ddi_get_soft_state(zvol_state, minor);

	(void) strcpy(zv->zv_name, name);
	zv->zv_min_bs = DEV_BSHIFT;
	zv->zv_minor = minor;
	zv->zv_volsize = volsize;
	zv->zv_objset = os;
	zv->zv_mode = ds_mode;
	zv->zv_zilog = zil_open(os, NULL);

	rw_init(&zv->zv_dslock, NULL, RW_DEFAULT, NULL);

	zil_replay(os, zv, &zv->zv_txg_assign, zvol_replay_vector, NULL);

	zvol_size_changed(zv, dev);

	/* XXX this should handle the possible i/o error */
	VERIFY(dsl_prop_register(dmu_objset_ds(zv->zv_objset),
	    "readonly", zvol_readonly_changed_cb, zv) == 0);

	zvol_minors++;

	mutex_exit(&zvol_state_lock);

	return (0);
}