Example #1
0
int
spa_history_log_nvl(spa_t *spa, nvlist_t *nvl)
{
	int err = 0;
	dmu_tx_t *tx;
	nvlist_t *nvarg;

	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa))
		return (SET_ERROR(EINVAL));

	tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
	err = dmu_tx_assign(tx, TXG_WAIT);
	if (err) {
		dmu_tx_abort(tx);
		return (err);
	}

	VERIFY0(nvlist_dup(nvl, &nvarg, KM_PUSHPAGE));
	if (spa_history_zone() != NULL) {
		fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE,
		    spa_history_zone());
	}
	fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED()));

	/* Kick this off asynchronously; errors are ignored. */
	dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync,
	    nvarg, 0, tx);
	dmu_tx_commit(tx);

	/* spa_history_log_sync will free nvl */
	return (err);

}
Example #2
0
boolean_t
dmu_objset_userused_enabled(objset_impl_t *os)
{
	return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE &&
	    used_cbs[os->os_phys->os_type] &&
	    os->os_userused_dnode);
}
Example #3
0
int
dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset)
{
	dsl_dir_t *dd;
	int error;
	nvpair_t *whopair = NULL;
	int blocks_modified = 0;

	error = dsl_dir_open(ddname, FTAG, &dd, NULL);
	if (error)
		return (error);

	if (spa_version(dmu_objset_spa(dd->dd_pool->dp_meta_objset)) <
	    SPA_VERSION_DELEGATED_PERMS) {
		dsl_dir_close(dd, FTAG);
		return (ENOTSUP);
	}

	while ((whopair = nvlist_next_nvpair(nvp, whopair)))
		blocks_modified++;

	error = dsl_sync_task_do(dd->dd_pool, NULL,
	    unset ? dsl_deleg_unset_sync : dsl_deleg_set_sync,
	    dd, nvp, blocks_modified);
	dsl_dir_close(dd, FTAG);

	return (error);
}
Example #4
0
int
dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp)
{
	dmu_buf_impl_t *db = (dmu_buf_impl_t *)bonus;
	dnode_t *dn;
	int err;

	DB_DNODE_ENTER(db);
	dn = DB_DNODE(db);

	if (spa_version(dn->dn_objset->os_spa) < SPA_VERSION_SA) {
		err = EINVAL;
	} else {
		rw_enter(&dn->dn_struct_rwlock, RW_READER);

		if (!dn->dn_have_spill) {
			err = ENOENT;
		} else {
			err = dmu_spill_hold_by_dnode(dn,
			    DB_RF_HAVESTRUCT | DB_RF_CANFAIL, tag, dbp);
		}

		rw_exit(&dn->dn_struct_rwlock);
	}

	DB_DNODE_EXIT(db);
	return (err);
}
Example #5
0
boolean_t
dmu_objset_userused_enabled(objset_t *os)
{
	return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE &&
	    used_cbs[os->os_phys->os_type] != NULL &&
	    DMU_USERUSED_DNODE(os) != NULL);
}
Example #6
0
int
dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
{
	if (!ds->ds_is_snapshot)
		return (SET_ERROR(EINVAL));

	if (dsl_dataset_long_held(ds))
		return (SET_ERROR(EBUSY));

	/*
	 * Only allow deferred destroy on pools that support it.
	 * NOTE: deferred destroy is only supported on snapshots.
	 */
	if (defer) {
		if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
		    SPA_VERSION_USERREFS)
			return (SET_ERROR(ENOTSUP));
		return (0);
	}

	/*
	 * If this snapshot has an elevated user reference count,
	 * we can't destroy it yet.
	 */
	if (ds->ds_userrefs > 0)
		return (SET_ERROR(EBUSY));

	/*
	 * Can't delete a branch point.
	 */
	if (dsl_dataset_phys(ds)->ds_num_children > 1)
		return (SET_ERROR(EEXIST));

	return (0);
}
Example #7
0
void
dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa)
{
	zfs_prop_t prop = zfs_name_to_prop(psa->psa_name);
	uint64_t intval;
	char setpoint[MAXNAMELEN];
	uint64_t version = spa_version(dd->dd_pool->dp_spa);
	int err;

	if (version < SPA_VERSION_RECVD_PROPS) {
		switch (prop) {
		case ZFS_PROP_QUOTA:
		case ZFS_PROP_RESERVATION:
			return;
		default:
			break;
		}
	}

	err = dsl_prop_get_dd(dd, psa->psa_name, 8, 1, &intval,
	    setpoint, B_FALSE);
	if (err == 0 && intval != psa->psa_effective_value) {
		cmn_err(CE_PANIC, "%s property, source: %x, "
		    "predicted effective value: %llu, "
		    "actual effective value: %llu (setpoint: %s)",
		    psa->psa_name, psa->psa_source,
		    (unsigned long long)psa->psa_effective_value,
		    (unsigned long long)intval, setpoint);
	}
}
Example #8
0
void
dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
{
#ifdef ZFS_DEBUG
	int err;
#endif
	int after_branch_point = FALSE;
	dsl_pool_t *dp = ds->ds_dir->dd_pool;
	objset_t *mos = dp->dp_meta_objset;
	dsl_dataset_t *ds_prev = NULL;
	uint64_t obj, old_unique, used = 0, comp = 0, uncomp = 0;
	dsl_dataset_t *ds_next, *ds_head, *hds;


	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
	ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
	ASSERT(refcount_is_zero(&ds->ds_longholds));

	if (defer &&
	    (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1)) {
		ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
		dmu_buf_will_dirty(ds->ds_dbuf, tx);
		ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY;
		spa_history_log_internal_ds(ds, "defer_destroy", tx, "");
		return;
	}

	ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);

	/* We need to log before removing it from the namespace. */
	spa_history_log_internal_ds(ds, "destroy", tx, "");

	dsl_scan_ds_destroyed(ds, tx);

	obj = ds->ds_object;

	if (ds->ds_phys->ds_prev_snap_obj != 0) {
		ASSERT3P(ds->ds_prev, ==, NULL);
		VERIFY0(dsl_dataset_hold_obj(dp,
		    ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev));
		after_branch_point =
		    (ds_prev->ds_phys->ds_next_snap_obj != obj);

		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
		if (after_branch_point &&
		    ds_prev->ds_phys->ds_next_clones_obj != 0) {
			dsl_dataset_remove_from_next_clones(ds_prev, obj, tx);
			if (ds->ds_phys->ds_next_snap_obj != 0) {
				VERIFY0(zap_add_int(mos,
				    ds_prev->ds_phys->ds_next_clones_obj,
				    ds->ds_phys->ds_next_snap_obj, tx));
			}
		}
		if (!after_branch_point) {
			ds_prev->ds_phys->ds_next_snap_obj =
			    ds->ds_phys->ds_next_snap_obj;
		}
	}
Example #9
0
void
spa_history_log_version(spa_t *spa, const char *operation)
{
	spa_history_log_internal(spa, operation, NULL,
	    "pool version %llu; software version %llu/%d; uts %s %s %s %s",
	    (u_longlong_t)spa_version(spa), SPA_VERSION, ZPL_VERSION,
	    utsname.nodename, utsname.release, utsname.version,
	    utsname.machine);
}
Example #10
0
static void
zhack_spa_open(const char *target, boolean_t readonly, void *tag, spa_t **spa)
{
	int err;

	import_pool(target, readonly);

	zfeature_checks_disable = B_TRUE;
	err = spa_open(target, spa, tag);
	zfeature_checks_disable = B_FALSE;

	if (err != 0)
		fatal("cannot open '%s': %s", target, strerror(err));
	if (spa_version(*spa) < SPA_VERSION_FEATURES) {
		fatal("'%s' has version %d, features not enabled", target,
		    (int)spa_version(*spa));
	}
}
Example #11
0
void
spa_history_log_version(spa_t *spa, const char *operation, dmu_tx_t *tx)
{
	utsname_t *u = utsname();

	spa_history_log_internal(spa, operation, tx,
	    "pool version %llu; software version %llu/%llu; uts %s %s %s %s",
	    (u_longlong_t)spa_version(spa), SPA_VERSION, ZPL_VERSION,
	    u->nodename, u->release, u->version, u->machine);
}
Example #12
0
uint64_t
bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx)
{
	int size;

	size = spa_version(dmu_objset_spa(mos)) < SPA_VERSION_BPLIST_ACCOUNT ?
	    BPLIST_SIZE_V0 : sizeof (bplist_phys_t);

	return (dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize,
	    DMU_OT_BPLIST_HDR, size, tx));
}
Example #13
0
File: zfs_vfsops.c Project: nwf/zfs
int
zfs_set_version(zfs_sb_t *zsb, uint64_t newvers)
{
	int error;
	objset_t *os = zsb->z_os;
	dmu_tx_t *tx;

	if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
		return (SET_ERROR(EINVAL));

	if (newvers < zsb->z_version)
		return (SET_ERROR(EINVAL));

	if (zfs_spa_version_map(newvers) >
	    spa_version(dmu_objset_spa(zsb->z_os)))
		return (SET_ERROR(ENOTSUP));

	tx = dmu_tx_create(os);
	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
	if (newvers >= ZPL_VERSION_SA && !zsb->z_use_sa) {
		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
		    ZFS_SA_ATTRS);
		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
	}
	error = dmu_tx_assign(tx, TXG_WAIT);
	if (error) {
		dmu_tx_abort(tx);
		return (error);
	}

	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
	    8, 1, &newvers, tx);

	if (error) {
		dmu_tx_commit(tx);
		return (error);
	}

	if (newvers >= ZPL_VERSION_SA && !zsb->z_use_sa) {
		uint64_t sa_obj;

		ASSERT3U(spa_version(dmu_objset_spa(zsb->z_os)), >=,
		    SPA_VERSION_SA);
		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
		    DMU_OT_NONE, 0, tx);

		error = zap_add(os, MASTER_NODE_OBJ,
		    ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
		ASSERT0(error);

		VERIFY(0 == sa_set_sa_object(os, sa_obj));
		sa_register_update_callback(os, zfs_sa_upgrade);
	}
Example #14
0
static int
dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx)
{
	dsl_dataset_user_hold_arg_t *dduha = arg;
	dsl_pool_t *dp = dmu_tx_pool(tx);
	nvpair_t *pair;

	if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
		return (SET_ERROR(ENOTSUP));

	if (!dmu_tx_is_syncing(tx))
		return (0);

	for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL);
	    pair != NULL; pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
		dsl_dataset_t *ds;
		int error = 0;
		char *htag, *name;

		/* must be a snapshot */
		name = nvpair_name(pair);
		if (strchr(name, '@') == NULL)
			error = SET_ERROR(EINVAL);

		if (error == 0)
			error = nvpair_value_string(pair, &htag);

		if (error == 0)
			error = dsl_dataset_hold(dp, name, FTAG, &ds);

		if (error == 0) {
			error = dsl_dataset_user_hold_check_one(ds, htag,
			    dduha->dduha_minor != 0, tx);
			dsl_dataset_rele(ds, FTAG);
		}

		if (error == 0) {
			fnvlist_add_string(dduha->dduha_chkholds, name, htag);
		} else {
			/*
			 * We register ENOENT errors so they can be correctly
			 * reported if needed, such as when all holds fail.
			 */
			fnvlist_add_int32(dduha->dduha_errlist, name, error);
			if (error != ENOENT)
				return (error);
		}
	}

	return (0);
}
Example #15
0
void
spa_history_log_version(spa_t *spa, const char *operation)
{
#ifdef _KERNEL
	uint64_t current_vers = spa_version(spa);

	spa_history_log_internal(spa, operation, NULL,
	    "pool version %llu; software version %llu/%d; uts %s %s %s %s",
	    (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION,
	    utsname.nodename, utsname.release, utsname.version,
	    utsname.machine);
	cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", operation,
	    (u_longlong_t)current_vers, spa_name(spa), SPA_VERSION);
#endif
}
Example #16
0
/*
 * Generate the pool's configuration based on the current in-core state.
 * We infer whether to generate a complete config or just one top-level config
 * based on whether vd is the root vdev.
 */
nvlist_t *
spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
{
	nvlist_t *config, *nvroot;
	vdev_t *rvd = spa->spa_root_vdev;

	ASSERT(spa_config_held(spa, RW_READER));

	if (vd == NULL)
		vd = rvd;

	/*
	 * If txg is -1, report the current value of spa->spa_config_txg.
	 */
	if (txg == -1ULL)
		txg = spa->spa_config_txg;

	VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0);

	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
	    spa_version(spa)) == 0);
	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
	    spa_name(spa)) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
	    spa_state(spa)) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
	    txg) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
	    spa_guid(spa)) == 0);

	if (vd != rvd) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID,
		    vd->vdev_top->vdev_guid) == 0);
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID,
		    vd->vdev_guid) == 0);
		if (vd->vdev_isspare)
			VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE,
			    1ULL) == 0);
		vd = vd->vdev_top;		/* label contains top config */
	}

	nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE);
	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
	nvlist_free(nvroot);

	return (config);
}
Example #17
0
static int
dsl_deleg_check(void *arg, dmu_tx_t *tx)
{
    dsl_deleg_arg_t *dda = arg;
    dsl_dir_t *dd;
    int error;

    if (spa_version(dmu_tx_pool(tx)->dp_spa) <
            SPA_VERSION_DELEGATED_PERMS) {
        return (SET_ERROR(ENOTSUP));
    }

    error = dsl_dir_hold(dmu_tx_pool(tx), dda->dda_name, FTAG, &dd, NULL);
    if (error == 0)
        dsl_dir_rele(dd, FTAG);
    return (error);
}
Example #18
0
void
spa_history_log_version(spa_t *spa, history_internal_events_t event)
{
#ifdef _KERNEL
	uint64_t current_vers = spa_version(spa);

	if (current_vers >= SPA_VERSION_ZPOOL_HISTORY) {
		spa_history_log_internal(event, spa, NULL,
		    "pool spa %llu; zfs spa %llu; zpl %d; uts %s %s %s %s",
		    (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION,
		    utsname.nodename, utsname.release, utsname.version,
		    utsname.machine);
	}
	cmn_err(CE_CONT, "!%s version %llu pool %s using %llu",
	    event == LOG_POOL_IMPORT ? "imported" :
	    event == LOG_POOL_CREATE ? "created" : "accessed",
	    (u_longlong_t)current_vers, spa_name(spa), SPA_VERSION);
#endif
}
Example #19
0
/*
 * set all create time permission on new dataset.
 */
void
dsl_deleg_set_create_perms(dsl_dir_t *sdd, dmu_tx_t *tx, cred_t *cr)
{
	dsl_dir_t *dd;
	uint64_t uid = crgetuid(cr);

	if (spa_version(dmu_objset_spa(sdd->dd_pool->dp_meta_objset)) <
	    SPA_VERSION_DELEGATED_PERMS)
		return;

	for (dd = sdd->dd_parent; dd != NULL; dd = dd->dd_parent) {
		uint64_t pzapobj = dd->dd_phys->dd_deleg_zapobj;

		if (pzapobj == 0)
			continue;

		copy_create_perms(sdd, pzapobj, B_FALSE, uid, tx);
		copy_create_perms(sdd, pzapobj, B_TRUE, uid, tx);
	}
}
Example #20
0
static int
dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx)
{
	dsl_dataset_user_hold_arg_t *dduha = arg;
	dsl_pool_t *dp = dmu_tx_pool(tx);
	nvpair_t *pair;
	int rv = 0;

	if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
		return (ENOTSUP);

	for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
	    pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
		int error = 0;
		dsl_dataset_t *ds;
		char *htag;

		/* must be a snapshot */
		if (strchr(nvpair_name(pair), '@') == NULL)
			error = EINVAL;

		if (error == 0)
			error = nvpair_value_string(pair, &htag);
		if (error == 0) {
			error = dsl_dataset_hold(dp,
			    nvpair_name(pair), FTAG, &ds);
		}
		if (error == 0) {
			error = dsl_dataset_user_hold_check_one(ds, htag,
			    dduha->dduha_minor != 0, tx);
			dsl_dataset_rele(ds, FTAG);
		}

		if (error != 0) {
			rv = error;
			fnvlist_add_int32(dduha->dduha_errlist,
			    nvpair_name(pair), error);
		}
	}
	return (rv);
}
Example #21
0
/*
 * zfs_init_fs - Initialize the zfsvfs struct and the file system
 *	incore "master" object.  Verify version compatibility.
 */
int
zfs_init_fs(zfsvfs_t *zfsvfs, znode_t **zpp, cred_t *cr)
{
	extern int zfsfstype;

	objset_t	*os = zfsvfs->z_os;
	int		i, error;
	dmu_object_info_t doi;
	uint64_t fsid_guid;
	uint64_t zval;

	*zpp = NULL;

	/*
	 * XXX - hack to auto-create the pool root filesystem at
	 * the first attempted mount.
	 */
	if (dmu_object_info(os, MASTER_NODE_OBJ, &doi) == ENOENT) {
		dmu_tx_t *tx = dmu_tx_create(os);
		uint64_t zpl_version;
		nvlist_t *zprops;

		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL); /* master */
		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL); /* del queue */
		dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); /* root node */
		error = dmu_tx_assign(tx, TXG_WAIT);
		ASSERT3U(error, ==, 0);
		if (spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID)
			zpl_version = ZPL_VERSION;
		else
			zpl_version = ZPL_VERSION_FUID - 1;

		VERIFY(nvlist_alloc(&zprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
		VERIFY(nvlist_add_uint64(zprops,
		    zfs_prop_to_name(ZFS_PROP_VERSION), zpl_version) == 0);
		zfs_create_fs(os, cr, zprops, tx);
		nvlist_free(zprops);
		dmu_tx_commit(tx);
	}
Example #22
0
/* ARGSUSED */
static int
zcp_synctask_snapshot(lua_State *state, boolean_t sync, nvlist_t *err_details)
{
	int err;
	dsl_dataset_snapshot_arg_t ddsa = { 0 };
	const char *dsname = lua_tostring(state, 1);
	zcp_run_info_t *ri = zcp_run_info(state);

	/*
	 * On old pools, the ZIL must not be active when a snapshot is created,
	 * but we can't suspend the ZIL because we're already in syncing
	 * context.
	 */
	if (spa_version(ri->zri_pool->dp_spa) < SPA_VERSION_FAST_SNAP) {
		return (ENOTSUP);
	}

	/*
	 * We only allow for a single snapshot rather than a list, so the
	 * error list output is unnecessary.
	 */
	ddsa.ddsa_errors = NULL;
	ddsa.ddsa_props = NULL;
	ddsa.ddsa_cr = ri->zri_cred;
	ddsa.ddsa_snaps = fnvlist_alloc();
	fnvlist_add_boolean(ddsa.ddsa_snaps, dsname);

	zcp_cleanup_handler_t *zch = zcp_register_cleanup(state,
	    (zcp_cleanup_t *)&fnvlist_free, ddsa.ddsa_snaps);

	err = zcp_sync_task(state, dsl_dataset_snapshot_check,
	    dsl_dataset_snapshot_sync, &ddsa, sync, dsname);

	zcp_deregister_cleanup(state, zch);
	fnvlist_free(ddsa.ddsa_snaps);

	return (err);
}
Example #23
0
int
spa_history_log_nvl(spa_t *spa, nvlist_t *nvl)
{
	int err = 0;
	dmu_tx_t *tx;
	nvlist_t *nvarg, *in_nvl = NULL;

	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa))
		return (SET_ERROR(EINVAL));

	err = nvlist_lookup_nvlist(nvl, ZPOOL_HIST_INPUT_NVL, &in_nvl);
	if (err == 0) {
		(void) nvlist_remove_all(in_nvl, ZPOOL_HIDDEN_ARGS);
	}

	tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
	err = dmu_tx_assign(tx, TXG_WAIT);
	if (err) {
		dmu_tx_abort(tx);
		return (err);
	}

	VERIFY0(nvlist_dup(nvl, &nvarg, KM_SLEEP));
	if (spa_history_zone() != NULL) {
		fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE,
		    spa_history_zone());
	}
	fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED()));

	/* Kick this off asynchronously; errors are ignored. */
	dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync,
	    nvarg, 0, ZFS_SPACE_CHECK_NONE, tx);
	dmu_tx_commit(tx);

	/* spa_history_log_sync will free nvl */
	return (err);

}
Example #24
0
uint64_t
dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
    dmu_tx_t *tx)
{
	objset_t *mos = dp->dp_meta_objset;
	uint64_t ddobj;
	dsl_dir_phys_t *ddphys;
	dmu_buf_t *dbuf;

	ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
	    DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx);
	if (pds) {
		VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj,
		    name, sizeof (uint64_t), 1, &ddobj, tx));
	} else {
		/* it's the root dir */
		VERIFY(0 == zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
		    DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx));
	}
	VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf));
	dmu_buf_will_dirty(dbuf, tx);
	ddphys = dbuf->db_data;

	ddphys->dd_creation_time = gethrestime_sec();
	if (pds)
		ddphys->dd_parent_obj = pds->dd_object;
	ddphys->dd_props_zapobj = zap_create(mos,
	    DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
	ddphys->dd_child_dir_zapobj = zap_create(mos,
	    DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
	if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN)
		ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
	dmu_buf_rele(dbuf, FTAG);

	return (ddobj);
}
Example #25
0
/*
 * Predict the effective value of the given special property if it were set with
 * the given value and source. This is not a general purpose function. It exists
 * only to handle the special requirements of the quota and reservation
 * properties. The fact that these properties are non-inheritable greatly
 * simplifies the prediction logic.
 *
 * Returns 0 on success, a positive error code on failure, or -1 if called with
 * a property not handled by this function.
 */
int
dsl_prop_predict(dsl_dir_t *dd, const char *propname,
    zprop_source_t source, uint64_t value, uint64_t *newvalp)
{
	zfs_prop_t prop = zfs_name_to_prop(propname);
	objset_t *mos;
	uint64_t zapobj;
	uint64_t version;
	char *recvdstr;
	int err = 0;

	switch (prop) {
	case ZFS_PROP_QUOTA:
	case ZFS_PROP_RESERVATION:
	case ZFS_PROP_REFQUOTA:
	case ZFS_PROP_REFRESERVATION:
		break;
	default:
		return (-1);
	}

	mos = dd->dd_pool->dp_meta_objset;
	zapobj = dsl_dir_phys(dd)->dd_props_zapobj;
	recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX);

	version = spa_version(dd->dd_pool->dp_spa);
	if (version < SPA_VERSION_RECVD_PROPS) {
		if (source & ZPROP_SRC_NONE)
			source = ZPROP_SRC_NONE;
		else if (source & ZPROP_SRC_RECEIVED)
			source = ZPROP_SRC_LOCAL;
	}

	switch ((int)source) {
	case ZPROP_SRC_NONE:
		/* Revert to the received value, if any. */
		err = zap_lookup(mos, zapobj, recvdstr, 8, 1, newvalp);
		if (err == ENOENT)
			*newvalp = 0;
		break;
	case ZPROP_SRC_LOCAL:
		*newvalp = value;
		break;
	case ZPROP_SRC_RECEIVED:
		/*
		 * If there's no local setting, then the new received value will
		 * be the effective value.
		 */
		err = zap_lookup(mos, zapobj, propname, 8, 1, newvalp);
		if (err == ENOENT)
			*newvalp = value;
		break;
	case (ZPROP_SRC_NONE | ZPROP_SRC_RECEIVED):
		/*
		 * We're clearing the received value, so the local setting (if
		 * it exists) remains the effective value.
		 */
		err = zap_lookup(mos, zapobj, propname, 8, 1, newvalp);
		if (err == ENOENT)
			*newvalp = 0;
		break;
	default:
		panic("unexpected property source: %d", source);
	}

	strfree(recvdstr);

	if (err == ENOENT)
		return (0);

	return (err);
}
Example #26
0
int
dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
    objset_t **osp)
{
	objset_t *os;
	int i, err = 0;

	ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock));

	os = kmem_zalloc(sizeof (objset_t), KM_PUSHPAGE);
	os->os_dsl_dataset = ds;
	os->os_spa = spa;
	os->os_rootbp = bp;
	if (!BP_IS_HOLE(os->os_rootbp)) {
		uint32_t aflags = ARC_WAIT;
		zbookmark_t zb;
		SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
		    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);

		if (DMU_OS_IS_L2CACHEABLE(os))
			aflags |= ARC_L2CACHE;

		dprintf_bp(os->os_rootbp, "reading %s", "");
		/*
		 * XXX when bprewrite scrub can change the bp,
		 * and this is called from dmu_objset_open_ds_os, the bp
		 * could change, and we'll need a lock.
		 */
		err = dsl_read_nolock(NULL, spa, os->os_rootbp,
		    arc_getbuf_func, &os->os_phys_buf,
		    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
		if (err) {
			kmem_free(os, sizeof (objset_t));
			/* convert checksum errors into IO errors */
			if (err == ECKSUM)
				err = EIO;
			return (err);
		}

		/* Increase the blocksize if we are permitted. */
		if (spa_version(spa) >= SPA_VERSION_USERSPACE &&
		    arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) {
			arc_buf_t *buf = arc_buf_alloc(spa,
			    sizeof (objset_phys_t), &os->os_phys_buf,
			    ARC_BUFC_METADATA);
			bzero(buf->b_data, sizeof (objset_phys_t));
			bcopy(os->os_phys_buf->b_data, buf->b_data,
			    arc_buf_size(os->os_phys_buf));
			(void) arc_buf_remove_ref(os->os_phys_buf,
			    &os->os_phys_buf);
			os->os_phys_buf = buf;
		}

		os->os_phys = os->os_phys_buf->b_data;
		os->os_flags = os->os_phys->os_flags;
	} else {
		int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
		    sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE;
		os->os_phys_buf = arc_buf_alloc(spa, size,
		    &os->os_phys_buf, ARC_BUFC_METADATA);
		os->os_phys = os->os_phys_buf->b_data;
		bzero(os->os_phys, size);
	}

	/*
	 * Note: the changed_cb will be called once before the register
	 * func returns, thus changing the checksum/compression from the
	 * default (fletcher2/off).  Snapshots don't need to know about
	 * checksum/compression/copies.  But they do need to know about
	 * encryption so that clones from the snaphost inherit the
	 * same encryption property regardless of where in the namespace
	 * they get created.
	 */
	if (ds) {
		err = dsl_prop_register(ds, "primarycache",
		    primary_cache_changed_cb, os);
		if (err == 0)
			err = dsl_prop_register(ds, "secondarycache",
                                    secondary_cache_changed_cb, os);
		if (err == 0)
		  err = dsl_prop_register(ds, "encryption",
					  crypt_changed_cb, os);

		if (!dsl_dataset_is_snapshot(ds)) {
			if (err == 0)
				err = dsl_prop_register(ds, "checksum",
				    checksum_changed_cb, os);
			if (err == 0)
				err = dsl_prop_register(ds, "compression",
				    compression_changed_cb, os);
			if (err == 0)
				err = dsl_prop_register(ds, "copies",
				    copies_changed_cb, os);
			if (err == 0)
				err = dsl_prop_register(ds, "dedup",
				    dedup_changed_cb, os);
			if (err == 0)
				err = dsl_prop_register(ds, "logbias",
				    logbias_changed_cb, os);
			if (err == 0)
				err = dsl_prop_register(ds, "sync",
				    sync_changed_cb, os);
		}
		if (err) {
			VERIFY(arc_buf_remove_ref(os->os_phys_buf,
			    &os->os_phys_buf) == 1);
			kmem_free(os, sizeof (objset_t));
			return (err);
		}
	} else if (ds == NULL) {
        /*
         * It's the meta-objset.
         * Encryption is off for ZFS metadata but on for ZPL metadata
         * and file/zvol contents.
         */
		os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
		os->os_compress = ZIO_COMPRESS_LZJB;
		os->os_copies = spa_max_replication(spa);
		os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
		os->os_dedup_verify = 0;
		os->os_logbias = 0;
		os->os_sync = 0;
		os->os_primary_cache = ZFS_CACHE_ALL;
		os->os_secondary_cache = ZFS_CACHE_ALL;
        os->os_crypt = ZIO_CRYPT_OFF;
	}

	if (ds == NULL || !dsl_dataset_is_snapshot(ds))
		os->os_zil_header = os->os_phys->os_zil_header;
	os->os_zil = zil_alloc(os, &os->os_zil_header);

	for (i = 0; i < TXG_SIZE; i++) {
		list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t),
		    offsetof(dnode_t, dn_dirty_link[i]));
		list_create(&os->os_free_dnodes[i], sizeof (dnode_t),
		    offsetof(dnode_t, dn_dirty_link[i]));
	}
	list_create(&os->os_dnodes, sizeof (dnode_t),
	    offsetof(dnode_t, dn_link));
	list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
	    offsetof(dmu_buf_impl_t, db_link));

	mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
	mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
	mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);

	DMU_META_DNODE(os) = dnode_special_open(os,
	    &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT,
	    &os->os_meta_dnode);
	if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) {
		DMU_USERUSED_DNODE(os) = dnode_special_open(os,
		    &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT,
		    &os->os_userused_dnode);
		DMU_GROUPUSED_DNODE(os) = dnode_special_open(os,
		    &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT,
		    &os->os_groupused_dnode);
	}

	/*
	 * We should be the only thread trying to do this because we
	 * have ds_opening_lock
	 */
	if (ds) {
		mutex_enter(&ds->ds_lock);
		ASSERT(ds->ds_objset == NULL);
		ds->ds_objset = os;
		mutex_exit(&ds->ds_lock);
	}

	*osp = os;

	return (0);
}
Example #27
0
dsl_pool_t *
dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
{
	int err;
	dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
	dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg);
	objset_t *os;
	dsl_dataset_t *ds;
	uint64_t obj;

	rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);

	/* create and open the MOS (meta-objset) */
	dp->dp_meta_objset = dmu_objset_create_impl(spa,
	    NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx);

	/* create the pool directory */
	err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
	    DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx);
	ASSERT0(err);

	/* Initialize scan structures */
	VERIFY0(dsl_scan_init(dp, txg));

	/* create and open the root dir */
	dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx);
	VERIFY0(dsl_dir_hold_obj(dp, dp->dp_root_dir_obj,
	    NULL, dp, &dp->dp_root_dir));

	/* create and open the meta-objset dir */
	(void) dsl_dir_create_sync(dp, dp->dp_root_dir, MOS_DIR_NAME, tx);
	VERIFY0(dsl_pool_open_special_dir(dp,
	    MOS_DIR_NAME, &dp->dp_mos_dir));

	if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
		/* create and open the free dir */
		(void) dsl_dir_create_sync(dp, dp->dp_root_dir,
		    FREE_DIR_NAME, tx);
		VERIFY0(dsl_pool_open_special_dir(dp,
		    FREE_DIR_NAME, &dp->dp_free_dir));

		/* create and open the free_bplist */
		obj = bpobj_alloc(dp->dp_meta_objset, SPA_MAXBLOCKSIZE, tx);
		VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
		    DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0);
		VERIFY0(bpobj_open(&dp->dp_free_bpobj,
		    dp->dp_meta_objset, obj));
	}

	if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB)
		dsl_pool_create_origin(dp, tx);

	/* create the root dataset */
	obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx);

	/* create the root objset */
	VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
	os = dmu_objset_create_impl(dp->dp_spa, ds,
	    dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx);
#ifdef _KERNEL
	zfs_create_fs(os, kcred, zplprops, tx);
#endif
	dsl_dataset_rele(ds, FTAG);

	dmu_tx_commit(tx);

	rrw_exit(&dp->dp_config_rwlock, FTAG);

	return (dp);
}
Example #28
0
int
dsl_pool_open(dsl_pool_t *dp)
{
	int err;
	dsl_dir_t *dd;
	dsl_dataset_t *ds;
	uint64_t obj;

	rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
	err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
	    DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1,
	    &dp->dp_root_dir_obj);
	if (err)
		goto out;

	err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj,
	    NULL, dp, &dp->dp_root_dir);
	if (err)
		goto out;

	err = dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir);
	if (err)
		goto out;

	if (spa_version(dp->dp_spa) >= SPA_VERSION_ORIGIN) {
		err = dsl_pool_open_special_dir(dp, ORIGIN_DIR_NAME, &dd);
		if (err)
			goto out;
		err = dsl_dataset_hold_obj(dp, dd->dd_phys->dd_head_dataset_obj,
		    FTAG, &ds);
		if (err == 0) {
			err = dsl_dataset_hold_obj(dp,
			    ds->ds_phys->ds_prev_snap_obj, dp,
			    &dp->dp_origin_snap);
			dsl_dataset_rele(ds, FTAG);
		}
		dsl_dir_rele(dd, dp);
		if (err)
			goto out;
	}

	if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
		err = dsl_pool_open_special_dir(dp, FREE_DIR_NAME,
		    &dp->dp_free_dir);
		if (err)
			goto out;

		err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
		    DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj);
		if (err)
			goto out;
		VERIFY0(bpobj_open(&dp->dp_free_bpobj,
		    dp->dp_meta_objset, obj));
	}

	if (spa_feature_is_active(dp->dp_spa,
	    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
		err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
		    DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
		    &dp->dp_bptree_obj);
		if (err != 0)
			goto out;
	}

	if (spa_feature_is_active(dp->dp_spa,
	    &spa_feature_table[SPA_FEATURE_EMPTY_BPOBJ])) {
		err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
		    DMU_POOL_EMPTY_BPOBJ, sizeof (uint64_t), 1,
		    &dp->dp_empty_bpobj);
		if (err != 0)
			goto out;
	}

	err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
	    DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1,
	    &dp->dp_tmp_userrefs_obj);
	if (err == ENOENT)
		err = 0;
	if (err)
		goto out;

	err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg);

out:
	rrw_exit(&dp->dp_config_rwlock, FTAG);
	return (err);
}
Example #29
0
static void
dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
{
	dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
	pool_scan_func_t *funcp = arg;
	dmu_object_type_t ot = 0;
	dsl_pool_t *dp = scn->scn_dp;
	spa_t *spa = dp->dp_spa;

	ASSERT(scn->scn_phys.scn_state != DSS_SCANNING);
	ASSERT(*funcp > POOL_SCAN_NONE && *funcp < POOL_SCAN_FUNCS);
	bzero(&scn->scn_phys, sizeof (scn->scn_phys));
	scn->scn_phys.scn_func = *funcp;
	scn->scn_phys.scn_state = DSS_SCANNING;
	scn->scn_phys.scn_min_txg = 0;
	scn->scn_phys.scn_max_txg = tx->tx_txg;
	scn->scn_phys.scn_ddt_class_max = DDT_CLASSES - 1; /* the entire DDT */
	scn->scn_phys.scn_start_time = gethrestime_sec();
	scn->scn_phys.scn_errors = 0;
	scn->scn_phys.scn_to_examine = spa->spa_root_vdev->vdev_stat.vs_alloc;
	scn->scn_restart_txg = 0;
	spa_scan_stat_init(spa);

	if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
		scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max;

		/* rewrite all disk labels */
		vdev_config_dirty(spa->spa_root_vdev);

		if (vdev_resilver_needed(spa->spa_root_vdev,
		    &scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) {
			spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_START);
		} else {
			spa_event_notify(spa, NULL, ESC_ZFS_SCRUB_START);
		}

		spa->spa_scrub_started = B_TRUE;
		/*
		 * If this is an incremental scrub, limit the DDT scrub phase
		 * to just the auto-ditto class (for correctness); the rest
		 * of the scrub should go faster using top-down pruning.
		 */
		if (scn->scn_phys.scn_min_txg > TXG_INITIAL)
			scn->scn_phys.scn_ddt_class_max = DDT_CLASS_DITTO;

	}

	/* back to the generic stuff */

	if (dp->dp_blkstats == NULL) {
		dp->dp_blkstats =
		    kmem_alloc(sizeof (zfs_all_blkstats_t), KM_SLEEP);
	}
	bzero(dp->dp_blkstats, sizeof (zfs_all_blkstats_t));

	if (spa_version(spa) < SPA_VERSION_DSL_SCRUB)
		ot = DMU_OT_ZAP_OTHER;

	scn->scn_phys.scn_queue_obj = zap_create(dp->dp_meta_objset,
	    ot ? ot : DMU_OT_SCAN_QUEUE, DMU_OT_NONE, 0, tx);

	dsl_scan_sync_state(scn, tx);

	spa_history_log_internal(spa, "scan setup", tx,
	    "func=%u mintxg=%llu maxtxg=%llu",
	    *funcp, scn->scn_phys.scn_min_txg, scn->scn_phys.scn_max_txg);
}
Example #30
0
int
dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
    objset_t **osp)
{
	objset_t *os;
	int i, err;

	ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock));

	os = kmem_zalloc(sizeof (objset_t), KM_SLEEP);
	os->os_dsl_dataset = ds;
	os->os_spa = spa;
	os->os_rootbp = bp;
	if (!BP_IS_HOLE(os->os_rootbp)) {
		arc_flags_t aflags = ARC_FLAG_WAIT;
		zbookmark_phys_t zb;
		SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
		    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);

		if (DMU_OS_IS_L2CACHEABLE(os))
			aflags |= ARC_FLAG_L2CACHE;
		if (DMU_OS_IS_L2COMPRESSIBLE(os))
			aflags |= ARC_FLAG_L2COMPRESS;

		dprintf_bp(os->os_rootbp, "reading %s", "");
		err = arc_read(NULL, spa, os->os_rootbp,
		    arc_getbuf_func, &os->os_phys_buf,
		    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
		if (err != 0) {
			kmem_free(os, sizeof (objset_t));
			/* convert checksum errors into IO errors */
			if (err == ECKSUM)
				err = SET_ERROR(EIO);
			return (err);
		}

		/* Increase the blocksize if we are permitted. */
		if (spa_version(spa) >= SPA_VERSION_USERSPACE &&
		    arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) {
			arc_buf_t *buf = arc_buf_alloc(spa,
			    sizeof (objset_phys_t), &os->os_phys_buf,
			    ARC_BUFC_METADATA);
			bzero(buf->b_data, sizeof (objset_phys_t));
			bcopy(os->os_phys_buf->b_data, buf->b_data,
			    arc_buf_size(os->os_phys_buf));
			(void) arc_buf_remove_ref(os->os_phys_buf,
			    &os->os_phys_buf);
			os->os_phys_buf = buf;
		}

		os->os_phys = os->os_phys_buf->b_data;
		os->os_flags = os->os_phys->os_flags;
	} else {
		int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
		    sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE;
		os->os_phys_buf = arc_buf_alloc(spa, size,
		    &os->os_phys_buf, ARC_BUFC_METADATA);
		os->os_phys = os->os_phys_buf->b_data;
		bzero(os->os_phys, size);
	}

	/*
	 * Note: the changed_cb will be called once before the register
	 * func returns, thus changing the checksum/compression from the
	 * default (fletcher2/off).  Snapshots don't need to know about
	 * checksum/compression/copies.
	 */
	if (ds != NULL) {
		err = dsl_prop_register(ds,
		    zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
		    primary_cache_changed_cb, os);
		if (err == 0) {
			err = dsl_prop_register(ds,
			    zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
			    secondary_cache_changed_cb, os);
		}
		if (!ds->ds_is_snapshot) {
			if (err == 0) {
				err = dsl_prop_register(ds,
				    zfs_prop_to_name(ZFS_PROP_CHECKSUM),
				    checksum_changed_cb, os);
			}
			if (err == 0) {
				err = dsl_prop_register(ds,
				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
				    compression_changed_cb, os);
			}
			if (err == 0) {
				err = dsl_prop_register(ds,
				    zfs_prop_to_name(ZFS_PROP_COPIES),
				    copies_changed_cb, os);
			}
			if (err == 0) {
				err = dsl_prop_register(ds,
				    zfs_prop_to_name(ZFS_PROP_DEDUP),
				    dedup_changed_cb, os);
			}
			if (err == 0) {
				err = dsl_prop_register(ds,
				    zfs_prop_to_name(ZFS_PROP_LOGBIAS),
				    logbias_changed_cb, os);
			}
			if (err == 0) {
				err = dsl_prop_register(ds,
				    zfs_prop_to_name(ZFS_PROP_SYNC),
				    sync_changed_cb, os);
			}
			if (err == 0) {
				err = dsl_prop_register(ds,
				    zfs_prop_to_name(
				    ZFS_PROP_REDUNDANT_METADATA),
				    redundant_metadata_changed_cb, os);
			}
			if (err == 0) {
				err = dsl_prop_register(ds,
				    zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
				    recordsize_changed_cb, os);
			}
		}
		if (err != 0) {
			VERIFY(arc_buf_remove_ref(os->os_phys_buf,
			    &os->os_phys_buf));
			kmem_free(os, sizeof (objset_t));
			return (err);
		}
	} else {
		/* It's the meta-objset. */
		os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
		os->os_compress = ZIO_COMPRESS_ON;
		os->os_copies = spa_max_replication(spa);
		os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
		os->os_dedup_verify = B_FALSE;
		os->os_logbias = ZFS_LOGBIAS_LATENCY;
		os->os_sync = ZFS_SYNC_STANDARD;
		os->os_primary_cache = ZFS_CACHE_ALL;
		os->os_secondary_cache = ZFS_CACHE_ALL;
	}

	if (ds == NULL || !ds->ds_is_snapshot)
		os->os_zil_header = os->os_phys->os_zil_header;
	os->os_zil = zil_alloc(os, &os->os_zil_header);

	for (i = 0; i < TXG_SIZE; i++) {
		list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t),
		    offsetof(dnode_t, dn_dirty_link[i]));
		list_create(&os->os_free_dnodes[i], sizeof (dnode_t),
		    offsetof(dnode_t, dn_dirty_link[i]));
	}
	list_create(&os->os_dnodes, sizeof (dnode_t),
	    offsetof(dnode_t, dn_link));
	list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
	    offsetof(dmu_buf_impl_t, db_link));

	mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
	mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
	mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);

	dnode_special_open(os, &os->os_phys->os_meta_dnode,
	    DMU_META_DNODE_OBJECT, &os->os_meta_dnode);
	if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) {
		dnode_special_open(os, &os->os_phys->os_userused_dnode,
		    DMU_USERUSED_OBJECT, &os->os_userused_dnode);
		dnode_special_open(os, &os->os_phys->os_groupused_dnode,
		    DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode);
	}

	*osp = os;
	return (0);
}