int spa_history_log_nvl(spa_t *spa, nvlist_t *nvl) { int err = 0; dmu_tx_t *tx; nvlist_t *nvarg; if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa)) return (SET_ERROR(EINVAL)); tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); err = dmu_tx_assign(tx, TXG_WAIT); if (err) { dmu_tx_abort(tx); return (err); } VERIFY0(nvlist_dup(nvl, &nvarg, KM_PUSHPAGE)); if (spa_history_zone() != NULL) { fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE, spa_history_zone()); } fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED())); /* Kick this off asynchronously; errors are ignored. */ dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync, nvarg, 0, tx); dmu_tx_commit(tx); /* spa_history_log_sync will free nvl */ return (err); }
boolean_t dmu_objset_userused_enabled(objset_impl_t *os) { return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE && used_cbs[os->os_phys->os_type] && os->os_userused_dnode); }
int dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset) { dsl_dir_t *dd; int error; nvpair_t *whopair = NULL; int blocks_modified = 0; error = dsl_dir_open(ddname, FTAG, &dd, NULL); if (error) return (error); if (spa_version(dmu_objset_spa(dd->dd_pool->dp_meta_objset)) < SPA_VERSION_DELEGATED_PERMS) { dsl_dir_close(dd, FTAG); return (ENOTSUP); } while ((whopair = nvlist_next_nvpair(nvp, whopair))) blocks_modified++; error = dsl_sync_task_do(dd->dd_pool, NULL, unset ? dsl_deleg_unset_sync : dsl_deleg_set_sync, dd, nvp, blocks_modified); dsl_dir_close(dd, FTAG); return (error); }
int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)bonus; dnode_t *dn; int err; DB_DNODE_ENTER(db); dn = DB_DNODE(db); if (spa_version(dn->dn_objset->os_spa) < SPA_VERSION_SA) { err = EINVAL; } else { rw_enter(&dn->dn_struct_rwlock, RW_READER); if (!dn->dn_have_spill) { err = ENOENT; } else { err = dmu_spill_hold_by_dnode(dn, DB_RF_HAVESTRUCT | DB_RF_CANFAIL, tag, dbp); } rw_exit(&dn->dn_struct_rwlock); } DB_DNODE_EXIT(db); return (err); }
boolean_t dmu_objset_userused_enabled(objset_t *os) { return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE && used_cbs[os->os_phys->os_type] != NULL && DMU_USERUSED_DNODE(os) != NULL); }
int dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer) { if (!ds->ds_is_snapshot) return (SET_ERROR(EINVAL)); if (dsl_dataset_long_held(ds)) return (SET_ERROR(EBUSY)); /* * Only allow deferred destroy on pools that support it. * NOTE: deferred destroy is only supported on snapshots. */ if (defer) { if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS) return (SET_ERROR(ENOTSUP)); return (0); } /* * If this snapshot has an elevated user reference count, * we can't destroy it yet. */ if (ds->ds_userrefs > 0) return (SET_ERROR(EBUSY)); /* * Can't delete a branch point. */ if (dsl_dataset_phys(ds)->ds_num_children > 1) return (SET_ERROR(EEXIST)); return (0); }
void dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa) { zfs_prop_t prop = zfs_name_to_prop(psa->psa_name); uint64_t intval; char setpoint[MAXNAMELEN]; uint64_t version = spa_version(dd->dd_pool->dp_spa); int err; if (version < SPA_VERSION_RECVD_PROPS) { switch (prop) { case ZFS_PROP_QUOTA: case ZFS_PROP_RESERVATION: return; default: break; } } err = dsl_prop_get_dd(dd, psa->psa_name, 8, 1, &intval, setpoint, B_FALSE); if (err == 0 && intval != psa->psa_effective_value) { cmn_err(CE_PANIC, "%s property, source: %x, " "predicted effective value: %llu, " "actual effective value: %llu (setpoint: %s)", psa->psa_name, psa->psa_source, (unsigned long long)psa->psa_effective_value, (unsigned long long)intval, setpoint); } }
void dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx) { #ifdef ZFS_DEBUG int err; #endif int after_branch_point = FALSE; dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; dsl_dataset_t *ds_prev = NULL; uint64_t obj, old_unique, used = 0, comp = 0, uncomp = 0; dsl_dataset_t *ds_next, *ds_head, *hds; ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); ASSERT(refcount_is_zero(&ds->ds_longholds)); if (defer && (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1)) { ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); dmu_buf_will_dirty(ds->ds_dbuf, tx); ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY; spa_history_log_internal_ds(ds, "defer_destroy", tx, ""); return; } ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); /* We need to log before removing it from the namespace. */ spa_history_log_internal_ds(ds, "destroy", tx, ""); dsl_scan_ds_destroyed(ds, tx); obj = ds->ds_object; if (ds->ds_phys->ds_prev_snap_obj != 0) { ASSERT3P(ds->ds_prev, ==, NULL); VERIFY0(dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev)); after_branch_point = (ds_prev->ds_phys->ds_next_snap_obj != obj); dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); if (after_branch_point && ds_prev->ds_phys->ds_next_clones_obj != 0) { dsl_dataset_remove_from_next_clones(ds_prev, obj, tx); if (ds->ds_phys->ds_next_snap_obj != 0) { VERIFY0(zap_add_int(mos, ds_prev->ds_phys->ds_next_clones_obj, ds->ds_phys->ds_next_snap_obj, tx)); } } if (!after_branch_point) { ds_prev->ds_phys->ds_next_snap_obj = ds->ds_phys->ds_next_snap_obj; } }
void spa_history_log_version(spa_t *spa, const char *operation) { spa_history_log_internal(spa, operation, NULL, "pool version %llu; software version %llu/%d; uts %s %s %s %s", (u_longlong_t)spa_version(spa), SPA_VERSION, ZPL_VERSION, utsname.nodename, utsname.release, utsname.version, utsname.machine); }
static void zhack_spa_open(const char *target, boolean_t readonly, void *tag, spa_t **spa) { int err; import_pool(target, readonly); zfeature_checks_disable = B_TRUE; err = spa_open(target, spa, tag); zfeature_checks_disable = B_FALSE; if (err != 0) fatal("cannot open '%s': %s", target, strerror(err)); if (spa_version(*spa) < SPA_VERSION_FEATURES) { fatal("'%s' has version %d, features not enabled", target, (int)spa_version(*spa)); } }
void spa_history_log_version(spa_t *spa, const char *operation, dmu_tx_t *tx) { utsname_t *u = utsname(); spa_history_log_internal(spa, operation, tx, "pool version %llu; software version %llu/%llu; uts %s %s %s %s", (u_longlong_t)spa_version(spa), SPA_VERSION, ZPL_VERSION, u->nodename, u->release, u->version, u->machine); }
uint64_t bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx) { int size; size = spa_version(dmu_objset_spa(mos)) < SPA_VERSION_BPLIST_ACCOUNT ? BPLIST_SIZE_V0 : sizeof (bplist_phys_t); return (dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize, DMU_OT_BPLIST_HDR, size, tx)); }
int zfs_set_version(zfs_sb_t *zsb, uint64_t newvers) { int error; objset_t *os = zsb->z_os; dmu_tx_t *tx; if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) return (SET_ERROR(EINVAL)); if (newvers < zsb->z_version) return (SET_ERROR(EINVAL)); if (zfs_spa_version_map(newvers) > spa_version(dmu_objset_spa(zsb->z_os))) return (SET_ERROR(ENOTSUP)); tx = dmu_tx_create(os); dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); if (newvers >= ZPL_VERSION_SA && !zsb->z_use_sa) { dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, ZFS_SA_ATTRS); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); } error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); return (error); } error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, &newvers, tx); if (error) { dmu_tx_commit(tx); return (error); } if (newvers >= ZPL_VERSION_SA && !zsb->z_use_sa) { uint64_t sa_obj; ASSERT3U(spa_version(dmu_objset_spa(zsb->z_os)), >=, SPA_VERSION_SA); sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, DMU_OT_NONE, 0, tx); error = zap_add(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); ASSERT0(error); VERIFY(0 == sa_set_sa_object(os, sa_obj)); sa_register_update_callback(os, zfs_sa_upgrade); }
static int dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx) { dsl_dataset_user_hold_arg_t *dduha = arg; dsl_pool_t *dp = dmu_tx_pool(tx); nvpair_t *pair; if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) return (SET_ERROR(ENOTSUP)); if (!dmu_tx_is_syncing(tx)) return (0); for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL; pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) { dsl_dataset_t *ds; int error = 0; char *htag, *name; /* must be a snapshot */ name = nvpair_name(pair); if (strchr(name, '@') == NULL) error = SET_ERROR(EINVAL); if (error == 0) error = nvpair_value_string(pair, &htag); if (error == 0) error = dsl_dataset_hold(dp, name, FTAG, &ds); if (error == 0) { error = dsl_dataset_user_hold_check_one(ds, htag, dduha->dduha_minor != 0, tx); dsl_dataset_rele(ds, FTAG); } if (error == 0) { fnvlist_add_string(dduha->dduha_chkholds, name, htag); } else { /* * We register ENOENT errors so they can be correctly * reported if needed, such as when all holds fail. */ fnvlist_add_int32(dduha->dduha_errlist, name, error); if (error != ENOENT) return (error); } } return (0); }
void spa_history_log_version(spa_t *spa, const char *operation) { #ifdef _KERNEL uint64_t current_vers = spa_version(spa); spa_history_log_internal(spa, operation, NULL, "pool version %llu; software version %llu/%d; uts %s %s %s %s", (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION, utsname.nodename, utsname.release, utsname.version, utsname.machine); cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", operation, (u_longlong_t)current_vers, spa_name(spa), SPA_VERSION); #endif }
/* * Generate the pool's configuration based on the current in-core state. * We infer whether to generate a complete config or just one top-level config * based on whether vd is the root vdev. */ nvlist_t * spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) { nvlist_t *config, *nvroot; vdev_t *rvd = spa->spa_root_vdev; ASSERT(spa_config_held(spa, RW_READER)); if (vd == NULL) vd = rvd; /* * If txg is -1, report the current value of spa->spa_config_txg. */ if (txg == -1ULL) txg = spa->spa_config_txg; VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, spa_version(spa)) == 0); VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, spa_name(spa)) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, spa_state(spa)) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, txg) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, spa_guid(spa)) == 0); if (vd != rvd) { VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID, vd->vdev_top->vdev_guid) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0); if (vd->vdev_isspare) VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE, 1ULL) == 0); vd = vd->vdev_top; /* label contains top config */ } nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE); VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); nvlist_free(nvroot); return (config); }
static int dsl_deleg_check(void *arg, dmu_tx_t *tx) { dsl_deleg_arg_t *dda = arg; dsl_dir_t *dd; int error; if (spa_version(dmu_tx_pool(tx)->dp_spa) < SPA_VERSION_DELEGATED_PERMS) { return (SET_ERROR(ENOTSUP)); } error = dsl_dir_hold(dmu_tx_pool(tx), dda->dda_name, FTAG, &dd, NULL); if (error == 0) dsl_dir_rele(dd, FTAG); return (error); }
void spa_history_log_version(spa_t *spa, history_internal_events_t event) { #ifdef _KERNEL uint64_t current_vers = spa_version(spa); if (current_vers >= SPA_VERSION_ZPOOL_HISTORY) { spa_history_log_internal(event, spa, NULL, "pool spa %llu; zfs spa %llu; zpl %d; uts %s %s %s %s", (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION, utsname.nodename, utsname.release, utsname.version, utsname.machine); } cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", event == LOG_POOL_IMPORT ? "imported" : event == LOG_POOL_CREATE ? "created" : "accessed", (u_longlong_t)current_vers, spa_name(spa), SPA_VERSION); #endif }
/* * set all create time permission on new dataset. */ void dsl_deleg_set_create_perms(dsl_dir_t *sdd, dmu_tx_t *tx, cred_t *cr) { dsl_dir_t *dd; uint64_t uid = crgetuid(cr); if (spa_version(dmu_objset_spa(sdd->dd_pool->dp_meta_objset)) < SPA_VERSION_DELEGATED_PERMS) return; for (dd = sdd->dd_parent; dd != NULL; dd = dd->dd_parent) { uint64_t pzapobj = dd->dd_phys->dd_deleg_zapobj; if (pzapobj == 0) continue; copy_create_perms(sdd, pzapobj, B_FALSE, uid, tx); copy_create_perms(sdd, pzapobj, B_TRUE, uid, tx); } }
static int dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx) { dsl_dataset_user_hold_arg_t *dduha = arg; dsl_pool_t *dp = dmu_tx_pool(tx); nvpair_t *pair; int rv = 0; if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) return (ENOTSUP); for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL; pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) { int error = 0; dsl_dataset_t *ds; char *htag; /* must be a snapshot */ if (strchr(nvpair_name(pair), '@') == NULL) error = EINVAL; if (error == 0) error = nvpair_value_string(pair, &htag); if (error == 0) { error = dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds); } if (error == 0) { error = dsl_dataset_user_hold_check_one(ds, htag, dduha->dduha_minor != 0, tx); dsl_dataset_rele(ds, FTAG); } if (error != 0) { rv = error; fnvlist_add_int32(dduha->dduha_errlist, nvpair_name(pair), error); } } return (rv); }
/* * zfs_init_fs - Initialize the zfsvfs struct and the file system * incore "master" object. Verify version compatibility. */ int zfs_init_fs(zfsvfs_t *zfsvfs, znode_t **zpp, cred_t *cr) { extern int zfsfstype; objset_t *os = zfsvfs->z_os; int i, error; dmu_object_info_t doi; uint64_t fsid_guid; uint64_t zval; *zpp = NULL; /* * XXX - hack to auto-create the pool root filesystem at * the first attempted mount. */ if (dmu_object_info(os, MASTER_NODE_OBJ, &doi) == ENOENT) { dmu_tx_t *tx = dmu_tx_create(os); uint64_t zpl_version; nvlist_t *zprops; dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL); /* master */ dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL); /* del queue */ dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); /* root node */ error = dmu_tx_assign(tx, TXG_WAIT); ASSERT3U(error, ==, 0); if (spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID) zpl_version = ZPL_VERSION; else zpl_version = ZPL_VERSION_FUID - 1; VERIFY(nvlist_alloc(&zprops, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(zprops, zfs_prop_to_name(ZFS_PROP_VERSION), zpl_version) == 0); zfs_create_fs(os, cr, zprops, tx); nvlist_free(zprops); dmu_tx_commit(tx); }
/* ARGSUSED */ static int zcp_synctask_snapshot(lua_State *state, boolean_t sync, nvlist_t *err_details) { int err; dsl_dataset_snapshot_arg_t ddsa = { 0 }; const char *dsname = lua_tostring(state, 1); zcp_run_info_t *ri = zcp_run_info(state); /* * On old pools, the ZIL must not be active when a snapshot is created, * but we can't suspend the ZIL because we're already in syncing * context. */ if (spa_version(ri->zri_pool->dp_spa) < SPA_VERSION_FAST_SNAP) { return (ENOTSUP); } /* * We only allow for a single snapshot rather than a list, so the * error list output is unnecessary. */ ddsa.ddsa_errors = NULL; ddsa.ddsa_props = NULL; ddsa.ddsa_cr = ri->zri_cred; ddsa.ddsa_snaps = fnvlist_alloc(); fnvlist_add_boolean(ddsa.ddsa_snaps, dsname); zcp_cleanup_handler_t *zch = zcp_register_cleanup(state, (zcp_cleanup_t *)&fnvlist_free, ddsa.ddsa_snaps); err = zcp_sync_task(state, dsl_dataset_snapshot_check, dsl_dataset_snapshot_sync, &ddsa, sync, dsname); zcp_deregister_cleanup(state, zch); fnvlist_free(ddsa.ddsa_snaps); return (err); }
int spa_history_log_nvl(spa_t *spa, nvlist_t *nvl) { int err = 0; dmu_tx_t *tx; nvlist_t *nvarg, *in_nvl = NULL; if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa)) return (SET_ERROR(EINVAL)); err = nvlist_lookup_nvlist(nvl, ZPOOL_HIST_INPUT_NVL, &in_nvl); if (err == 0) { (void) nvlist_remove_all(in_nvl, ZPOOL_HIDDEN_ARGS); } tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); err = dmu_tx_assign(tx, TXG_WAIT); if (err) { dmu_tx_abort(tx); return (err); } VERIFY0(nvlist_dup(nvl, &nvarg, KM_SLEEP)); if (spa_history_zone() != NULL) { fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE, spa_history_zone()); } fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED())); /* Kick this off asynchronously; errors are ignored. */ dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync, nvarg, 0, ZFS_SPACE_CHECK_NONE, tx); dmu_tx_commit(tx); /* spa_history_log_sync will free nvl */ return (err); }
uint64_t dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name, dmu_tx_t *tx) { objset_t *mos = dp->dp_meta_objset; uint64_t ddobj; dsl_dir_phys_t *ddphys; dmu_buf_t *dbuf; ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0, DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx); if (pds) { VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj, name, sizeof (uint64_t), 1, &ddobj, tx)); } else { /* it's the root dir */ VERIFY(0 == zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx)); } VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf)); dmu_buf_will_dirty(dbuf, tx); ddphys = dbuf->db_data; ddphys->dd_creation_time = gethrestime_sec(); if (pds) ddphys->dd_parent_obj = pds->dd_object; ddphys->dd_props_zapobj = zap_create(mos, DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx); ddphys->dd_child_dir_zapobj = zap_create(mos, DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN) ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN; dmu_buf_rele(dbuf, FTAG); return (ddobj); }
/* * Predict the effective value of the given special property if it were set with * the given value and source. This is not a general purpose function. It exists * only to handle the special requirements of the quota and reservation * properties. The fact that these properties are non-inheritable greatly * simplifies the prediction logic. * * Returns 0 on success, a positive error code on failure, or -1 if called with * a property not handled by this function. */ int dsl_prop_predict(dsl_dir_t *dd, const char *propname, zprop_source_t source, uint64_t value, uint64_t *newvalp) { zfs_prop_t prop = zfs_name_to_prop(propname); objset_t *mos; uint64_t zapobj; uint64_t version; char *recvdstr; int err = 0; switch (prop) { case ZFS_PROP_QUOTA: case ZFS_PROP_RESERVATION: case ZFS_PROP_REFQUOTA: case ZFS_PROP_REFRESERVATION: break; default: return (-1); } mos = dd->dd_pool->dp_meta_objset; zapobj = dsl_dir_phys(dd)->dd_props_zapobj; recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX); version = spa_version(dd->dd_pool->dp_spa); if (version < SPA_VERSION_RECVD_PROPS) { if (source & ZPROP_SRC_NONE) source = ZPROP_SRC_NONE; else if (source & ZPROP_SRC_RECEIVED) source = ZPROP_SRC_LOCAL; } switch ((int)source) { case ZPROP_SRC_NONE: /* Revert to the received value, if any. */ err = zap_lookup(mos, zapobj, recvdstr, 8, 1, newvalp); if (err == ENOENT) *newvalp = 0; break; case ZPROP_SRC_LOCAL: *newvalp = value; break; case ZPROP_SRC_RECEIVED: /* * If there's no local setting, then the new received value will * be the effective value. */ err = zap_lookup(mos, zapobj, propname, 8, 1, newvalp); if (err == ENOENT) *newvalp = value; break; case (ZPROP_SRC_NONE | ZPROP_SRC_RECEIVED): /* * We're clearing the received value, so the local setting (if * it exists) remains the effective value. */ err = zap_lookup(mos, zapobj, propname, 8, 1, newvalp); if (err == ENOENT) *newvalp = 0; break; default: panic("unexpected property source: %d", source); } strfree(recvdstr); if (err == ENOENT) return (0); return (err); }
int dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, objset_t **osp) { objset_t *os; int i, err = 0; ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); os = kmem_zalloc(sizeof (objset_t), KM_PUSHPAGE); os->os_dsl_dataset = ds; os->os_spa = spa; os->os_rootbp = bp; if (!BP_IS_HOLE(os->os_rootbp)) { uint32_t aflags = ARC_WAIT; zbookmark_t zb; SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); if (DMU_OS_IS_L2CACHEABLE(os)) aflags |= ARC_L2CACHE; dprintf_bp(os->os_rootbp, "reading %s", ""); /* * XXX when bprewrite scrub can change the bp, * and this is called from dmu_objset_open_ds_os, the bp * could change, and we'll need a lock. */ err = dsl_read_nolock(NULL, spa, os->os_rootbp, arc_getbuf_func, &os->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); if (err) { kmem_free(os, sizeof (objset_t)); /* convert checksum errors into IO errors */ if (err == ECKSUM) err = EIO; return (err); } /* Increase the blocksize if we are permitted. */ if (spa_version(spa) >= SPA_VERSION_USERSPACE && arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) { arc_buf_t *buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &os->os_phys_buf, ARC_BUFC_METADATA); bzero(buf->b_data, sizeof (objset_phys_t)); bcopy(os->os_phys_buf->b_data, buf->b_data, arc_buf_size(os->os_phys_buf)); (void) arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf); os->os_phys_buf = buf; } os->os_phys = os->os_phys_buf->b_data; os->os_flags = os->os_phys->os_flags; } else { int size = spa_version(spa) >= SPA_VERSION_USERSPACE ? sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE; os->os_phys_buf = arc_buf_alloc(spa, size, &os->os_phys_buf, ARC_BUFC_METADATA); os->os_phys = os->os_phys_buf->b_data; bzero(os->os_phys, size); } /* * Note: the changed_cb will be called once before the register * func returns, thus changing the checksum/compression from the * default (fletcher2/off). Snapshots don't need to know about * checksum/compression/copies. But they do need to know about * encryption so that clones from the snaphost inherit the * same encryption property regardless of where in the namespace * they get created. */ if (ds) { err = dsl_prop_register(ds, "primarycache", primary_cache_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "secondarycache", secondary_cache_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "encryption", crypt_changed_cb, os); if (!dsl_dataset_is_snapshot(ds)) { if (err == 0) err = dsl_prop_register(ds, "checksum", checksum_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "compression", compression_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "copies", copies_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "dedup", dedup_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "logbias", logbias_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "sync", sync_changed_cb, os); } if (err) { VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf) == 1); kmem_free(os, sizeof (objset_t)); return (err); } } else if (ds == NULL) { /* * It's the meta-objset. * Encryption is off for ZFS metadata but on for ZPL metadata * and file/zvol contents. */ os->os_checksum = ZIO_CHECKSUM_FLETCHER_4; os->os_compress = ZIO_COMPRESS_LZJB; os->os_copies = spa_max_replication(spa); os->os_dedup_checksum = ZIO_CHECKSUM_OFF; os->os_dedup_verify = 0; os->os_logbias = 0; os->os_sync = 0; os->os_primary_cache = ZFS_CACHE_ALL; os->os_secondary_cache = ZFS_CACHE_ALL; os->os_crypt = ZIO_CRYPT_OFF; } if (ds == NULL || !dsl_dataset_is_snapshot(ds)) os->os_zil_header = os->os_phys->os_zil_header; os->os_zil = zil_alloc(os, &os->os_zil_header); for (i = 0; i < TXG_SIZE; i++) { list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); list_create(&os->os_free_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); } list_create(&os->os_dnodes, sizeof (dnode_t), offsetof(dnode_t, dn_link)); list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_link)); mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); DMU_META_DNODE(os) = dnode_special_open(os, &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT, &os->os_meta_dnode); if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) { DMU_USERUSED_DNODE(os) = dnode_special_open(os, &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT, &os->os_userused_dnode); DMU_GROUPUSED_DNODE(os) = dnode_special_open(os, &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode); } /* * We should be the only thread trying to do this because we * have ds_opening_lock */ if (ds) { mutex_enter(&ds->ds_lock); ASSERT(ds->ds_objset == NULL); ds->ds_objset = os; mutex_exit(&ds->ds_lock); } *osp = os; return (0); }
dsl_pool_t * dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) { int err; dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg); objset_t *os; dsl_dataset_t *ds; uint64_t obj; rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); /* create and open the MOS (meta-objset) */ dp->dp_meta_objset = dmu_objset_create_impl(spa, NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx); /* create the pool directory */ err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx); ASSERT0(err); /* Initialize scan structures */ VERIFY0(dsl_scan_init(dp, txg)); /* create and open the root dir */ dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx); VERIFY0(dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, dp, &dp->dp_root_dir)); /* create and open the meta-objset dir */ (void) dsl_dir_create_sync(dp, dp->dp_root_dir, MOS_DIR_NAME, tx); VERIFY0(dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir)); if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { /* create and open the free dir */ (void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx); VERIFY0(dsl_pool_open_special_dir(dp, FREE_DIR_NAME, &dp->dp_free_dir)); /* create and open the free_bplist */ obj = bpobj_alloc(dp->dp_meta_objset, SPA_MAXBLOCKSIZE, tx); VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0); VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); } if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB) dsl_pool_create_origin(dp, tx); /* create the root dataset */ obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx); /* create the root objset */ VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); os = dmu_objset_create_impl(dp->dp_spa, ds, dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx); #ifdef _KERNEL zfs_create_fs(os, kcred, zplprops, tx); #endif dsl_dataset_rele(ds, FTAG); dmu_tx_commit(tx); rrw_exit(&dp->dp_config_rwlock, FTAG); return (dp); }
int dsl_pool_open(dsl_pool_t *dp) { int err; dsl_dir_t *dd; dsl_dataset_t *ds; uint64_t obj; rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &dp->dp_root_dir_obj); if (err) goto out; err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, dp, &dp->dp_root_dir); if (err) goto out; err = dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir); if (err) goto out; if (spa_version(dp->dp_spa) >= SPA_VERSION_ORIGIN) { err = dsl_pool_open_special_dir(dp, ORIGIN_DIR_NAME, &dd); if (err) goto out; err = dsl_dataset_hold_obj(dp, dd->dd_phys->dd_head_dataset_obj, FTAG, &ds); if (err == 0) { err = dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, dp, &dp->dp_origin_snap); dsl_dataset_rele(ds, FTAG); } dsl_dir_rele(dd, dp); if (err) goto out; } if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) { err = dsl_pool_open_special_dir(dp, FREE_DIR_NAME, &dp->dp_free_dir); if (err) goto out; err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj); if (err) goto out; VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); } if (spa_feature_is_active(dp->dp_spa, &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) { err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1, &dp->dp_bptree_obj); if (err != 0) goto out; } if (spa_feature_is_active(dp->dp_spa, &spa_feature_table[SPA_FEATURE_EMPTY_BPOBJ])) { err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_EMPTY_BPOBJ, sizeof (uint64_t), 1, &dp->dp_empty_bpobj); if (err != 0) goto out; } err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1, &dp->dp_tmp_userrefs_obj); if (err == ENOENT) err = 0; if (err) goto out; err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg); out: rrw_exit(&dp->dp_config_rwlock, FTAG); return (err); }
static void dsl_scan_setup_sync(void *arg, dmu_tx_t *tx) { dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan; pool_scan_func_t *funcp = arg; dmu_object_type_t ot = 0; dsl_pool_t *dp = scn->scn_dp; spa_t *spa = dp->dp_spa; ASSERT(scn->scn_phys.scn_state != DSS_SCANNING); ASSERT(*funcp > POOL_SCAN_NONE && *funcp < POOL_SCAN_FUNCS); bzero(&scn->scn_phys, sizeof (scn->scn_phys)); scn->scn_phys.scn_func = *funcp; scn->scn_phys.scn_state = DSS_SCANNING; scn->scn_phys.scn_min_txg = 0; scn->scn_phys.scn_max_txg = tx->tx_txg; scn->scn_phys.scn_ddt_class_max = DDT_CLASSES - 1; /* the entire DDT */ scn->scn_phys.scn_start_time = gethrestime_sec(); scn->scn_phys.scn_errors = 0; scn->scn_phys.scn_to_examine = spa->spa_root_vdev->vdev_stat.vs_alloc; scn->scn_restart_txg = 0; spa_scan_stat_init(spa); if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max; /* rewrite all disk labels */ vdev_config_dirty(spa->spa_root_vdev); if (vdev_resilver_needed(spa->spa_root_vdev, &scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) { spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_START); } else { spa_event_notify(spa, NULL, ESC_ZFS_SCRUB_START); } spa->spa_scrub_started = B_TRUE; /* * If this is an incremental scrub, limit the DDT scrub phase * to just the auto-ditto class (for correctness); the rest * of the scrub should go faster using top-down pruning. */ if (scn->scn_phys.scn_min_txg > TXG_INITIAL) scn->scn_phys.scn_ddt_class_max = DDT_CLASS_DITTO; } /* back to the generic stuff */ if (dp->dp_blkstats == NULL) { dp->dp_blkstats = kmem_alloc(sizeof (zfs_all_blkstats_t), KM_SLEEP); } bzero(dp->dp_blkstats, sizeof (zfs_all_blkstats_t)); if (spa_version(spa) < SPA_VERSION_DSL_SCRUB) ot = DMU_OT_ZAP_OTHER; scn->scn_phys.scn_queue_obj = zap_create(dp->dp_meta_objset, ot ? ot : DMU_OT_SCAN_QUEUE, DMU_OT_NONE, 0, tx); dsl_scan_sync_state(scn, tx); spa_history_log_internal(spa, "scan setup", tx, "func=%u mintxg=%llu maxtxg=%llu", *funcp, scn->scn_phys.scn_min_txg, scn->scn_phys.scn_max_txg); }
int dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, objset_t **osp) { objset_t *os; int i, err; ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); os = kmem_zalloc(sizeof (objset_t), KM_SLEEP); os->os_dsl_dataset = ds; os->os_spa = spa; os->os_rootbp = bp; if (!BP_IS_HOLE(os->os_rootbp)) { arc_flags_t aflags = ARC_FLAG_WAIT; zbookmark_phys_t zb; SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); if (DMU_OS_IS_L2CACHEABLE(os)) aflags |= ARC_FLAG_L2CACHE; if (DMU_OS_IS_L2COMPRESSIBLE(os)) aflags |= ARC_FLAG_L2COMPRESS; dprintf_bp(os->os_rootbp, "reading %s", ""); err = arc_read(NULL, spa, os->os_rootbp, arc_getbuf_func, &os->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); if (err != 0) { kmem_free(os, sizeof (objset_t)); /* convert checksum errors into IO errors */ if (err == ECKSUM) err = SET_ERROR(EIO); return (err); } /* Increase the blocksize if we are permitted. */ if (spa_version(spa) >= SPA_VERSION_USERSPACE && arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) { arc_buf_t *buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &os->os_phys_buf, ARC_BUFC_METADATA); bzero(buf->b_data, sizeof (objset_phys_t)); bcopy(os->os_phys_buf->b_data, buf->b_data, arc_buf_size(os->os_phys_buf)); (void) arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf); os->os_phys_buf = buf; } os->os_phys = os->os_phys_buf->b_data; os->os_flags = os->os_phys->os_flags; } else { int size = spa_version(spa) >= SPA_VERSION_USERSPACE ? sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE; os->os_phys_buf = arc_buf_alloc(spa, size, &os->os_phys_buf, ARC_BUFC_METADATA); os->os_phys = os->os_phys_buf->b_data; bzero(os->os_phys, size); } /* * Note: the changed_cb will be called once before the register * func returns, thus changing the checksum/compression from the * default (fletcher2/off). Snapshots don't need to know about * checksum/compression/copies. */ if (ds != NULL) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE), primary_cache_changed_cb, os); if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE), secondary_cache_changed_cb, os); } if (!ds->ds_is_snapshot) { if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_COMPRESSION), compression_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_COPIES), copies_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_DEDUP), dedup_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_LOGBIAS), logbias_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_SYNC), sync_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name( ZFS_PROP_REDUNDANT_METADATA), redundant_metadata_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE), recordsize_changed_cb, os); } } if (err != 0) { VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf)); kmem_free(os, sizeof (objset_t)); return (err); } } else { /* It's the meta-objset. */ os->os_checksum = ZIO_CHECKSUM_FLETCHER_4; os->os_compress = ZIO_COMPRESS_ON; os->os_copies = spa_max_replication(spa); os->os_dedup_checksum = ZIO_CHECKSUM_OFF; os->os_dedup_verify = B_FALSE; os->os_logbias = ZFS_LOGBIAS_LATENCY; os->os_sync = ZFS_SYNC_STANDARD; os->os_primary_cache = ZFS_CACHE_ALL; os->os_secondary_cache = ZFS_CACHE_ALL; } if (ds == NULL || !ds->ds_is_snapshot) os->os_zil_header = os->os_phys->os_zil_header; os->os_zil = zil_alloc(os, &os->os_zil_header); for (i = 0; i < TXG_SIZE; i++) { list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); list_create(&os->os_free_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); } list_create(&os->os_dnodes, sizeof (dnode_t), offsetof(dnode_t, dn_link)); list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_link)); mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); dnode_special_open(os, &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT, &os->os_meta_dnode); if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) { dnode_special_open(os, &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT, &os->os_userused_dnode); dnode_special_open(os, &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode); } *osp = os; return (0); }