void rrm_exit(rrmlock_t *rrl, void *tag) { int i; if (rrl->locks[0].rr_writer == curthread) { for (i = 0; i < RRM_NUM_LOCKS; i++) rrw_exit(&rrl->locks[i], tag); } else { rrw_exit(&rrl->locks[RRM_TD_LOCK()], tag); } }
int dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp) { int err = 0; /* * We shouldn't be doing anything with dsl_dataset_t's unless the * pool_config lock is held, or the dataset is long-held. */ ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool) || dsl_dataset_long_held(ds)); mutex_enter(&ds->ds_opening_lock); if (ds->ds_objset == NULL) { objset_t *os; rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); err = dmu_objset_open_impl(dsl_dataset_get_spa(ds), ds, dsl_dataset_get_blkptr(ds), &os); rrw_exit(&ds->ds_bp_rwlock, FTAG); if (err == 0) { mutex_enter(&ds->ds_lock); ASSERT(ds->ds_objset == NULL); ds->ds_objset = os; mutex_exit(&ds->ds_lock); } } *osp = ds->ds_objset; mutex_exit(&ds->ds_opening_lock); return (err); }
/* * Called in syncing context to execute the synctask. */ void dsl_sync_task_sync(dsl_sync_task_t *dst, dmu_tx_t *tx) { dsl_pool_t *dp = dst->dst_pool; ASSERT0(dst->dst_error); /* * Check for sufficient space. * * When the sync task was created, the caller specified the * type of space checking required. See the comment in * zfs_space_check_t for details on the semantics of each * type of space checking. * * We just check against what's on-disk; we don't want any * in-flight accounting to get in our way, because open context * may have already used up various in-core limits * (arc_tempreserve, dsl_pool_tempreserve). */ if (dst->dst_space_check != ZFS_SPACE_CHECK_NONE) { uint64_t quota = dsl_pool_adjustedsize(dp, dst->dst_space_check == ZFS_SPACE_CHECK_RESERVED) - metaslab_class_get_deferred(spa_normal_class(dp->dp_spa)); uint64_t used = dsl_dir_phys(dp->dp_root_dir)->dd_used_bytes; /* MOS space is triple-dittoed, so we multiply by 3. */ if (dst->dst_space > 0 && used + dst->dst_space * 3 > quota) { dst->dst_error = SET_ERROR(ENOSPC); if (dst->dst_nowaiter) kmem_free(dst, sizeof (*dst)); return; } } /* * Check for errors by calling checkfunc. */ rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); dst->dst_error = dst->dst_checkfunc(dst->dst_arg, tx); if (dst->dst_error == 0) dst->dst_syncfunc(dst->dst_arg, tx); rrw_exit(&dp->dp_config_rwlock, FTAG); if (dst->dst_nowaiter) kmem_free(dst, sizeof (*dst)); }
/* * Reopen zfsvfs_t::z_os and release VOPs. */ int zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode) { int err; ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); if (err) { zfsvfs->z_os = NULL; } else { znode_t *zp; VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); /* * Attempt to re-establish all the active znodes with * their dbufs. If a zfs_rezget() fails, then we'll let * any potential callers discover that via ZFS_ENTER_VERIFY_VP * when they try to use their znode. */ mutex_enter(&zfsvfs->z_znodes_lock); for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = list_next(&zfsvfs->z_all_znodes, zp)) { (void) zfs_rezget(zp); } mutex_exit(&zfsvfs->z_znodes_lock); } /* release the VOPs */ rw_exit(&zfsvfs->z_teardown_inactive_lock); rrw_exit(&zfsvfs->z_teardown_lock, FTAG); if (err) { /* * Since we couldn't reopen zfsvfs::z_os, force * unmount this file system. */ if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); } return (err); }
/* * Called in syncing context to execute the synctask. */ void dsl_sync_task_sync(dsl_sync_task_t *dst, dmu_tx_t *tx) { dsl_pool_t *dp = dst->dst_pool; uint64_t quota, used; ASSERT0(dst->dst_error); /* * Check for sufficient space. We just check against what's * on-disk; we don't want any in-flight accounting to get in our * way, because open context may have already used up various * in-core limits (arc_tempreserve, dsl_pool_tempreserve). */ quota = dsl_pool_adjustedsize(dp, B_FALSE) - metaslab_class_get_deferred(spa_normal_class(dp->dp_spa)); used = dp->dp_root_dir->dd_phys->dd_used_bytes; /* MOS space is triple-dittoed, so we multiply by 3. */ if (dst->dst_space > 0 && used + dst->dst_space * 3 > quota) { dst->dst_error = SET_ERROR(ENOSPC); if (dst->dst_nowaiter) kmem_free(dst, sizeof (*dst)); return; } /* * Check for errors by calling checkfunc. */ rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); dst->dst_error = dst->dst_checkfunc(dst->dst_arg, tx); if (dst->dst_error == 0) dst->dst_syncfunc(dst->dst_arg, tx); rrw_exit(&dp->dp_config_rwlock, FTAG); if (dst->dst_nowaiter) kmem_free(dst, sizeof (*dst)); }
dsl_pool_t * dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) { int err; dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg); objset_t *os; dsl_dataset_t *ds; uint64_t obj; rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); /* create and open the MOS (meta-objset) */ dp->dp_meta_objset = dmu_objset_create_impl(spa, NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx); /* create the pool directory */ err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx); ASSERT0(err); /* Initialize scan structures */ VERIFY0(dsl_scan_init(dp, txg)); /* create and open the root dir */ dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx); VERIFY0(dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, dp, &dp->dp_root_dir)); /* create and open the meta-objset dir */ (void) dsl_dir_create_sync(dp, dp->dp_root_dir, MOS_DIR_NAME, tx); VERIFY0(dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir)); if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { /* create and open the free dir */ (void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx); VERIFY0(dsl_pool_open_special_dir(dp, FREE_DIR_NAME, &dp->dp_free_dir)); /* create and open the free_bplist */ obj = bpobj_alloc(dp->dp_meta_objset, SPA_MAXBLOCKSIZE, tx); VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0); VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); } if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB) dsl_pool_create_origin(dp, tx); /* create the root dataset */ obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx); /* create the root objset */ VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); os = dmu_objset_create_impl(dp->dp_spa, ds, dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx); #ifdef _KERNEL zfs_create_fs(os, kcred, zplprops, tx); #endif dsl_dataset_rele(ds, FTAG); dmu_tx_commit(tx); rrw_exit(&dp->dp_config_rwlock, FTAG); return (dp); }
int dsl_pool_open(dsl_pool_t *dp) { int err; dsl_dir_t *dd; dsl_dataset_t *ds; uint64_t obj; rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &dp->dp_root_dir_obj); if (err) goto out; err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, dp, &dp->dp_root_dir); if (err) goto out; err = dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir); if (err) goto out; if (spa_version(dp->dp_spa) >= SPA_VERSION_ORIGIN) { err = dsl_pool_open_special_dir(dp, ORIGIN_DIR_NAME, &dd); if (err) goto out; err = dsl_dataset_hold_obj(dp, dd->dd_phys->dd_head_dataset_obj, FTAG, &ds); if (err == 0) { err = dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj, dp, &dp->dp_origin_snap); dsl_dataset_rele(ds, FTAG); } dsl_dir_rele(dd, dp); if (err) goto out; } if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) { err = dsl_pool_open_special_dir(dp, FREE_DIR_NAME, &dp->dp_free_dir); if (err) goto out; err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj); if (err) goto out; VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); } if (spa_feature_is_active(dp->dp_spa, &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) { err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1, &dp->dp_bptree_obj); if (err != 0) goto out; } if (spa_feature_is_active(dp->dp_spa, &spa_feature_table[SPA_FEATURE_EMPTY_BPOBJ])) { err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_EMPTY_BPOBJ, sizeof (uint64_t), 1, &dp->dp_empty_bpobj); if (err != 0) goto out; } err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1, &dp->dp_tmp_userrefs_obj); if (err == ENOENT) err = 0; if (err) goto out; err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg); out: rrw_exit(&dp->dp_config_rwlock, FTAG); return (err); }
dsl_pool_t * dsl_pool_create(spa_t *spa, nvlist_t *zplprops, dsl_crypto_params_t *dcp, uint64_t txg) { int err; dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg); dsl_dataset_t *ds; uint64_t obj; rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); /* create and open the MOS (meta-objset) */ dp->dp_meta_objset = dmu_objset_create_impl(spa, NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx); spa->spa_meta_objset = dp->dp_meta_objset; /* create the pool directory */ err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx); ASSERT0(err); /* Initialize scan structures */ VERIFY0(dsl_scan_init(dp, txg)); /* create and open the root dir */ dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx); VERIFY0(dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, dp, &dp->dp_root_dir)); /* create and open the meta-objset dir */ (void) dsl_dir_create_sync(dp, dp->dp_root_dir, MOS_DIR_NAME, tx); VERIFY0(dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir)); if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { /* create and open the free dir */ (void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx); VERIFY0(dsl_pool_open_special_dir(dp, FREE_DIR_NAME, &dp->dp_free_dir)); /* create and open the free_bplist */ obj = bpobj_alloc(dp->dp_meta_objset, SPA_OLD_MAXBLOCKSIZE, tx); VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0); VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); } if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB) dsl_pool_create_origin(dp, tx); /* * Some features may be needed when creating the root dataset, so we * create the feature objects here. */ if (spa_version(spa) >= SPA_VERSION_FEATURES) spa_feature_create_zap_objects(spa, tx); if (dcp != NULL && dcp->cp_crypt != ZIO_CRYPT_OFF && dcp->cp_crypt != ZIO_CRYPT_INHERIT) spa_feature_enable(spa, SPA_FEATURE_ENCRYPTION, tx); /* create the root dataset */ obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, dcp, 0, tx); /* create the root objset */ VERIFY0(dsl_dataset_hold_obj_flags(dp, obj, DS_HOLD_FLAG_DECRYPT, FTAG, &ds)); #ifdef _KERNEL { objset_t *os; rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); os = dmu_objset_create_impl(dp->dp_spa, ds, dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx); rrw_exit(&ds->ds_bp_rwlock, FTAG); zfs_create_fs(os, kcred, zplprops, tx); } #endif dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG); dmu_tx_commit(tx); rrw_exit(&dp->dp_config_rwlock, FTAG); return (dp); }
int dsl_pool_open(dsl_pool_t *dp) { int err; dsl_dir_t *dd; dsl_dataset_t *ds; uint64_t obj; rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &dp->dp_root_dir_obj); if (err) goto out; err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, dp, &dp->dp_root_dir); if (err) goto out; err = dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir); if (err) goto out; if (spa_version(dp->dp_spa) >= SPA_VERSION_ORIGIN) { err = dsl_pool_open_special_dir(dp, ORIGIN_DIR_NAME, &dd); if (err) goto out; err = dsl_dataset_hold_obj(dp, dsl_dir_phys(dd)->dd_head_dataset_obj, FTAG, &ds); if (err == 0) { err = dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, dp, &dp->dp_origin_snap); dsl_dataset_rele(ds, FTAG); } dsl_dir_rele(dd, dp); if (err) goto out; } if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) { err = dsl_pool_open_special_dir(dp, FREE_DIR_NAME, &dp->dp_free_dir); if (err) goto out; err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj); if (err) goto out; VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); } if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_OBSOLETE_COUNTS)) { err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_OBSOLETE_BPOBJ, sizeof (uint64_t), 1, &obj); if (err == 0) { VERIFY0(bpobj_open(&dp->dp_obsolete_bpobj, dp->dp_meta_objset, obj)); } else if (err == ENOENT) { /* * We might not have created the remap bpobj yet. */ err = 0; } else { goto out; } } /* * Note: errors ignored, because the these special dirs, used for * space accounting, are only created on demand. */ (void) dsl_pool_open_special_dir(dp, LEAK_DIR_NAME, &dp->dp_leak_dir); if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY)) { err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1, &dp->dp_bptree_obj); if (err != 0) goto out; } if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMPTY_BPOBJ)) { err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_EMPTY_BPOBJ, sizeof (uint64_t), 1, &dp->dp_empty_bpobj); if (err != 0) goto out; } err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1, &dp->dp_tmp_userrefs_obj); if (err == ENOENT) err = 0; if (err) goto out; err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg); out: rrw_exit(&dp->dp_config_rwlock, FTAG); return (err); }
/* * Reopen zfs_sb_t and release VFS ops. */ int zfs_resume_fs(zfs_sb_t *zsb, const char *osname) { int err, err2; ASSERT(RRW_WRITE_HELD(&zsb->z_teardown_lock)); ASSERT(RW_WRITE_HELD(&zsb->z_teardown_inactive_lock)); err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zsb, &zsb->z_os); if (err) { zsb->z_os = NULL; } else { znode_t *zp; uint64_t sa_obj = 0; err2 = zap_lookup(zsb->z_os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); if ((err || err2) && zsb->z_version >= ZPL_VERSION_SA) goto bail; if ((err = sa_setup(zsb->z_os, sa_obj, zfs_attr_table, ZPL_END, &zsb->z_attr_table)) != 0) goto bail; VERIFY(zfs_sb_setup(zsb, B_FALSE) == 0); zsb->z_rollback_time = jiffies; /* * Attempt to re-establish all the active inodes with their * dbufs. If a zfs_rezget() fails, then we unhash the inode * and mark it stale. This prevents a collision if a new * inode/object is created which must use the same inode * number. The stale inode will be be released when the * VFS prunes the dentry holding the remaining references * on the stale inode. */ mutex_enter(&zsb->z_znodes_lock); for (zp = list_head(&zsb->z_all_znodes); zp; zp = list_next(&zsb->z_all_znodes, zp)) { err2 = zfs_rezget(zp); if (err2) { remove_inode_hash(ZTOI(zp)); zp->z_is_stale = B_TRUE; } } mutex_exit(&zsb->z_znodes_lock); } bail: /* release the VFS ops */ rw_exit(&zsb->z_teardown_inactive_lock); rrw_exit(&zsb->z_teardown_lock, FTAG); if (err) { /* * Since we couldn't reopen zfs_sb_t or, setup the * sa framework, force unmount this file system. */ if (zsb->z_os) (void) zfs_umount(zsb->z_sb); } return (err); }
void dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx) { int err; int after_branch_point = FALSE; dsl_pool_t *dp = ds->ds_dir->dd_pool; objset_t *mos = dp->dp_meta_objset; dsl_dataset_t *ds_prev = NULL; uint64_t obj; ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg); rrw_exit(&ds->ds_bp_rwlock, FTAG); ASSERT(refcount_is_zero(&ds->ds_longholds)); if (defer && (ds->ds_userrefs > 0 || dsl_dataset_phys(ds)->ds_num_children > 1)) { ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_DEFER_DESTROY; spa_history_log_internal_ds(ds, "defer_destroy", tx, ""); return; } ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1); /* We need to log before removing it from the namespace. */ spa_history_log_internal_ds(ds, "destroy", tx, ""); dsl_scan_ds_destroyed(ds, tx); obj = ds->ds_object; boolean_t book_exists = dsl_bookmark_ds_destroyed(ds, tx); for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { if (dsl_dataset_feature_is_active(ds, f)) dsl_dataset_deactivate_feature(ds, f, tx); } if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { ASSERT3P(ds->ds_prev, ==, NULL); VERIFY0(dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &ds_prev)); after_branch_point = (dsl_dataset_phys(ds_prev)->ds_next_snap_obj != obj); dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); if (after_branch_point && dsl_dataset_phys(ds_prev)->ds_next_clones_obj != 0) { dsl_dataset_remove_from_next_clones(ds_prev, obj, tx); if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0) { VERIFY0(zap_add_int(mos, dsl_dataset_phys(ds_prev)-> ds_next_clones_obj, dsl_dataset_phys(ds)->ds_next_snap_obj, tx)); } } if (!after_branch_point) { dsl_dataset_phys(ds_prev)->ds_next_snap_obj = dsl_dataset_phys(ds)->ds_next_snap_obj; } }
/* * Reopen zfs_sb_t and release VFS ops. */ int zfs_resume_fs(zfs_sb_t *zsb, const char *osname) { int err, err2; znode_t *zp; uint64_t sa_obj = 0; ASSERT(RRW_WRITE_HELD(&zsb->z_teardown_lock)); ASSERT(RW_WRITE_HELD(&zsb->z_teardown_inactive_lock)); /* * We already own this, so just hold and rele it to update the * objset_t, as the one we had before may have been evicted. */ VERIFY0(dmu_objset_hold(osname, zsb, &zsb->z_os)); VERIFY3P(zsb->z_os->os_dsl_dataset->ds_owner, ==, zsb); VERIFY(dsl_dataset_long_held(zsb->z_os->os_dsl_dataset)); dmu_objset_rele(zsb->z_os, zsb); /* * Make sure version hasn't changed */ err = zfs_get_zplprop(zsb->z_os, ZFS_PROP_VERSION, &zsb->z_version); if (err) goto bail; err = zap_lookup(zsb->z_os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); if (err && zsb->z_version >= ZPL_VERSION_SA) goto bail; if ((err = sa_setup(zsb->z_os, sa_obj, zfs_attr_table, ZPL_END, &zsb->z_attr_table)) != 0) goto bail; if (zsb->z_version >= ZPL_VERSION_SA) sa_register_update_callback(zsb->z_os, zfs_sa_upgrade); VERIFY(zfs_sb_setup(zsb, B_FALSE) == 0); zfs_set_fuid_feature(zsb); zsb->z_rollback_time = jiffies; /* * Attempt to re-establish all the active inodes with their * dbufs. If a zfs_rezget() fails, then we unhash the inode * and mark it stale. This prevents a collision if a new * inode/object is created which must use the same inode * number. The stale inode will be be released when the * VFS prunes the dentry holding the remaining references * on the stale inode. */ mutex_enter(&zsb->z_znodes_lock); for (zp = list_head(&zsb->z_all_znodes); zp; zp = list_next(&zsb->z_all_znodes, zp)) { err2 = zfs_rezget(zp); if (err2) { remove_inode_hash(ZTOI(zp)); zp->z_is_stale = B_TRUE; } } mutex_exit(&zsb->z_znodes_lock); bail: /* release the VFS ops */ rw_exit(&zsb->z_teardown_inactive_lock); rrw_exit(&zsb->z_teardown_lock, FTAG); if (err) { /* * Since we couldn't setup the sa framework, try to force * unmount this file system. */ if (zsb->z_os) (void) zfs_umount(zsb->z_sb); } return (err); }
/*ARGSUSED*/ static int zfs_umount(vfs_t *vfsp, int fflag) { zfsvfs_t *zfsvfs = vfsp->vfs_data; objset_t *os; cred_t *cr = curthread->td_ucred; int ret; ret = secpolicy_fs_unmount(cr, vfsp); if (ret) { ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), ZFS_DELEG_PERM_MOUNT, cr); if (ret) return (ret); } /* * We purge the parent filesystem's vfsp as the parent filesystem * and all of its snapshots have their vnode's v_vfsp set to the * parent's filesystem's vfsp. Note, 'z_parent' is self * referential for non-snapshots. */ (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); /* * Unmount any snapshots mounted under .zfs before unmounting the * dataset itself. */ if (zfsvfs->z_ctldir != NULL) { if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) return (ret); ret = vflush(vfsp, 0, 0, curthread); ASSERT(ret == EBUSY); if (!(fflag & MS_FORCE)) { if (zfsvfs->z_ctldir->v_count > 1) return (EBUSY); ASSERT(zfsvfs->z_ctldir->v_count == 1); } zfsctl_destroy(zfsvfs); ASSERT(zfsvfs->z_ctldir == NULL); } if (fflag & MS_FORCE) { /* * Mark file system as unmounted before calling * vflush(FORCECLOSE). This way we ensure no future vnops * will be called and risk operating on DOOMED vnodes. */ rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); zfsvfs->z_unmounted = B_TRUE; rrw_exit(&zfsvfs->z_teardown_lock, FTAG); } /* * Flush all the files. */ ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, curthread); if (ret != 0) { if (!zfsvfs->z_issnap) { zfsctl_create(zfsvfs); ASSERT(zfsvfs->z_ctldir != NULL); } return (ret); } if (!(fflag & MS_FORCE)) { /* * Check the number of active vnodes in the file system. * Our count is maintained in the vfs structure, but the * number is off by 1 to indicate a hold on the vfs * structure itself. * * The '.zfs' directory maintains a reference of its * own, and any active references underneath are * reflected in the vnode count. */ if (zfsvfs->z_ctldir == NULL) { if (vfsp->vfs_count > 1) return (EBUSY); } else { if (vfsp->vfs_count > 2 || zfsvfs->z_ctldir->v_count > 1) return (EBUSY); } } else { MNT_ILOCK(vfsp); vfsp->mnt_kern_flag |= MNTK_UNMOUNTF; MNT_IUNLOCK(vfsp); } VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); os = zfsvfs->z_os; /* * z_os will be NULL if there was an error in * attempting to reopen zfsvfs. */ if (os != NULL) { /* * Unset the objset user_ptr. */ mutex_enter(&os->os->os_user_ptr_lock); dmu_objset_set_user(os, NULL); mutex_exit(&os->os->os_user_ptr_lock); /* * Finally release the objset */ dmu_objset_close(os); } /* * We can now safely destroy the '.zfs' directory node. */ if (zfsvfs->z_ctldir != NULL) zfsctl_destroy(zfsvfs); if (zfsvfs->z_issnap) { vnode_t *svp = vfsp->mnt_vnodecovered; if (svp->v_count >= 2) VN_RELE(svp); } zfs_freevfs(vfsp); return (0); }
/* * Teardown the zfsvfs::z_os. * * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock' * and 'z_teardown_inactive_lock' held. */ static int zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) { znode_t *zp; rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); if (!unmounting) { /* * We purge the parent filesystem's vfsp as the parent * filesystem and all of its snapshots have their vnode's * v_vfsp set to the parent's filesystem's vfsp. Note, * 'z_parent' is self referential for non-snapshots. */ (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); #ifdef FREEBSD_NAMECACHE cache_purgevfs(zfsvfs->z_parent->z_vfs); #endif } /* * Close the zil. NB: Can't close the zil while zfs_inactive * threads are blocked as zil_close can call zfs_inactive. */ if (zfsvfs->z_log) { zil_close(zfsvfs->z_log); zfsvfs->z_log = NULL; } rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); /* * If we are not unmounting (ie: online recv) and someone already * unmounted this file system while we were doing the switcheroo, * or a reopen of z_os failed then just bail out now. */ if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { rw_exit(&zfsvfs->z_teardown_inactive_lock); rrw_exit(&zfsvfs->z_teardown_lock, FTAG); return (EIO); } /* * At this point there are no vops active, and any new vops will * fail with EIO since we have z_teardown_lock for writer (only * relavent for forced unmount). * * Release all holds on dbufs. */ mutex_enter(&zfsvfs->z_znodes_lock); for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; zp = list_next(&zfsvfs->z_all_znodes, zp)) if (zp->z_dbuf) { ASSERT(ZTOV(zp)->v_count >= 0); zfs_znode_dmu_fini(zp); } mutex_exit(&zfsvfs->z_znodes_lock); /* * If we are unmounting, set the unmounted flag and let new vops * unblock. zfs_inactive will have the unmounted behavior, and all * other vops will fail with EIO. */ if (unmounting) { zfsvfs->z_unmounted = B_TRUE; rrw_exit(&zfsvfs->z_teardown_lock, FTAG); rw_exit(&zfsvfs->z_teardown_inactive_lock); #ifdef __FreeBSD__ /* * Some znodes might not be fully reclaimed, wait for them. */ mutex_enter(&zfsvfs->z_znodes_lock); while (list_head(&zfsvfs->z_all_znodes) != NULL) { msleep(zfsvfs, &zfsvfs->z_znodes_lock, 0, "zteardown", 0); } mutex_exit(&zfsvfs->z_znodes_lock); #endif } /* * z_os will be NULL if there was an error in attempting to reopen * zfsvfs, so just return as the properties had already been * unregistered and cached data had been evicted before. */ if (zfsvfs->z_os == NULL) return (0); /* * Unregister properties. */ zfs_unregister_callbacks(zfsvfs); /* * Evict cached data */ if (dmu_objset_evict_dbufs(zfsvfs->z_os)) { txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); (void) dmu_objset_evict_dbufs(zfsvfs->z_os); } return (0); }
int dsl_crypto_key_load(const char *dsname, zcrypt_key_t *wrappingkey) { dsl_dataset_t *ds; uint64_t crypt; int error; dsl_pool_t *dp; error = dsl_pool_hold(dsname, FTAG, &dp); if (error != 0) return (error); if ((error = dsl_dataset_hold(dp, dsname, FTAG, &ds)) != 0) { dsl_pool_rele(dp, FTAG); return (error); } /* * This is key load not key change so if ds->ds_key is already * set we fail. */ if (zcrypt_keystore_find_node(dsl_dataset_get_spa(ds), ds->ds_object, B_FALSE) != NULL) { dsl_dataset_rele(ds, FTAG); dsl_pool_rele(dp, FTAG); return (EEXIST); } /* * Find out what size of key we expect. * * For now the wrapping key size (and type) matches the size * of the dataset key, this may not always be the case * (particularly if we ever support wrapping dataset keys * with asymmetric keys (eg RSA)). * * When alternate wrapping keys are added it maybe done using * a index property. */ rrw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER, FTAG); error = dsl_prop_get_ds(ds, zfs_prop_to_name(ZFS_PROP_ENCRYPTION), 8, 1, &crypt, NULL/*, DSL_PROP_GET_EFFECTIVE*/); rrw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock, FTAG); if (error != 0) { dsl_dataset_rele(ds, FTAG); dsl_pool_rele(dp, FTAG); return (error); } if (crypt == ZIO_CRYPT_OFF) { dsl_dataset_rele(ds, FTAG); dsl_pool_rele(dp, FTAG); return (ENOTSUP); } ASSERT(crypt != ZIO_CRYPT_INHERIT); error = dsl_keychain_load(ds, crypt, wrappingkey); dsl_dataset_rele(ds, FTAG); dsl_pool_rele(dp, FTAG); return (error); }
/* * Teardown the zfs_sb_t. * * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock' * and 'z_teardown_inactive_lock' held. */ int zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting) { znode_t *zp; rrw_enter(&zsb->z_teardown_lock, RW_WRITER, FTAG); if (!unmounting) { /* * We purge the parent filesystem's super block as the * parent filesystem and all of its snapshots have their * inode's super block set to the parent's filesystem's * super block. Note, 'z_parent' is self referential * for non-snapshots. */ shrink_dcache_sb(zsb->z_parent->z_sb); } /* * If someone has not already unmounted this file system, * drain the iput_taskq to ensure all active references to the * zfs_sb_t have been handled only then can it be safely destroyed. */ if (zsb->z_os) taskq_wait(dsl_pool_iput_taskq(dmu_objset_pool(zsb->z_os))); /* * Close the zil. NB: Can't close the zil while zfs_inactive * threads are blocked as zil_close can call zfs_inactive. */ if (zsb->z_log) { zil_close(zsb->z_log); zsb->z_log = NULL; } rw_enter(&zsb->z_teardown_inactive_lock, RW_WRITER); /* * If we are not unmounting (ie: online recv) and someone already * unmounted this file system while we were doing the switcheroo, * or a reopen of z_os failed then just bail out now. */ if (!unmounting && (zsb->z_unmounted || zsb->z_os == NULL)) { rw_exit(&zsb->z_teardown_inactive_lock); rrw_exit(&zsb->z_teardown_lock, FTAG); return (EIO); } /* * At this point there are no VFS ops active, and any new VFS ops * will fail with EIO since we have z_teardown_lock for writer (only * relevant for forced unmount). * * Release all holds on dbufs. */ mutex_enter(&zsb->z_znodes_lock); for (zp = list_head(&zsb->z_all_znodes); zp != NULL; zp = list_next(&zsb->z_all_znodes, zp)) { if (zp->z_sa_hdl) { ASSERT(atomic_read(&ZTOI(zp)->i_count) > 0); zfs_znode_dmu_fini(zp); } } mutex_exit(&zsb->z_znodes_lock); /* * If we are unmounting, set the unmounted flag and let new VFS ops * unblock. zfs_inactive will have the unmounted behavior, and all * other VFS ops will fail with EIO. */ if (unmounting) { zsb->z_unmounted = B_TRUE; rrw_exit(&zsb->z_teardown_lock, FTAG); rw_exit(&zsb->z_teardown_inactive_lock); } /* * z_os will be NULL if there was an error in attempting to reopen * zsb, so just return as the properties had already been * * unregistered and cached data had been evicted before. */ if (zsb->z_os == NULL) return (0); /* * Unregister properties. */ zfs_unregister_callbacks(zsb); /* * Evict cached data */ if (dsl_dataset_is_dirty(dmu_objset_ds(zsb->z_os)) && !zfs_is_readonly(zsb)) txg_wait_synced(dmu_objset_pool(zsb->z_os), 0); dmu_objset_evict_dbufs(zsb->z_os); return (0); }
static int dsl_crypto_key_change_find(const char *dsname, void *arg) { struct wkey_change_arg *ca = arg; struct kcnode *kcn; dsl_dataset_t *ds; objset_t *os; uint64_t crypt; char caource[MAXNAMELEN]; char setpoint[MAXNAMELEN]; int err; dsl_pool_t *dp; err = dsl_pool_hold(dsname, FTAG, &dp); if (err != 0) return (err); kcn = kmem_alloc(sizeof (struct kcnode), KM_SLEEP); if ((err = dsl_dataset_hold(dp, dsname, kcn, &ds)) != 0) { kmem_free(kcn, sizeof (struct kcnode)); dsl_pool_rele(dp, FTAG); return (err); } if ((err = dmu_objset_from_ds(ds, &os)) != 0) { dsl_dataset_rele(ds, kcn); dsl_pool_rele(dp, FTAG); kmem_free(kcn, sizeof (struct kcnode)); return (err); } /* * Check that this child dataset of ca->parent * is actually inheriting keysource from ca->parent and * not somewhere else (eg local, or some other dataset). */ rrw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER, FTAG); VERIFY(dsl_prop_get_ds(ds, zfs_prop_to_name(ZFS_PROP_ENCRYPTION), 8, 1, &crypt, NULL/*, DSL_PROP_GET_EFFECTIVE*/) == 0); VERIFY(dsl_prop_get_ds(ds, zfs_prop_to_name(ZFS_PROP_KEYSOURCE), 1, sizeof (caource), &caource, setpoint/*, DSL_PROP_GET_EFFECTIVE*/) == 0); rrw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock, FTAG); if (crypt == ZIO_CRYPT_OFF || ((strcmp(ca->ca_parent, setpoint) != 0 && strcmp(ca->ca_parent, dsname) != 0))) { dsl_dataset_rele(ds, kcn); dsl_pool_rele(dp, FTAG); kmem_free(kcn, sizeof (struct kcnode)); return (0); } //dsl_sync_task_create(ca->ca_dstg, dsl_crypto_key_change_check, // dsl_crypto_key_change_sync, ds, arg, 1); ca->ca_ds = ds; err = dsl_sync_task(dsname, dsl_crypto_key_change_check, dsl_crypto_key_change_sync, arg, 1, ZFS_SPACE_CHECK_NONE); kcn->kc_ds = ds; list_insert_tail(&ca->ca_nodes, kcn); return (0); }
/* * Teardown the zfs_sb_t. * * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock' * and 'z_teardown_inactive_lock' held. */ int zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting) { znode_t *zp; /* * If someone has not already unmounted this file system, * drain the iput_taskq to ensure all active references to the * zfs_sb_t have been handled only then can it be safely destroyed. */ if (zsb->z_os) { /* * If we're unmounting we have to wait for the list to * drain completely. * * If we're not unmounting there's no guarantee the list * will drain completely, but iputs run from the taskq * may add the parents of dir-based xattrs to the taskq * so we want to wait for these. * * We can safely read z_nr_znodes without locking because the * VFS has already blocked operations which add to the * z_all_znodes list and thus increment z_nr_znodes. */ int round = 0; while (zsb->z_nr_znodes > 0) { taskq_wait_outstanding(dsl_pool_iput_taskq( dmu_objset_pool(zsb->z_os)), 0); if (++round > 1 && !unmounting) break; } } rrw_enter(&zsb->z_teardown_lock, RW_WRITER, FTAG); if (!unmounting) { /* * We purge the parent filesystem's super block as the * parent filesystem and all of its snapshots have their * inode's super block set to the parent's filesystem's * super block. Note, 'z_parent' is self referential * for non-snapshots. */ shrink_dcache_sb(zsb->z_parent->z_sb); } /* * Close the zil. NB: Can't close the zil while zfs_inactive * threads are blocked as zil_close can call zfs_inactive. */ if (zsb->z_log) { zil_close(zsb->z_log); zsb->z_log = NULL; } rw_enter(&zsb->z_teardown_inactive_lock, RW_WRITER); /* * If we are not unmounting (ie: online recv) and someone already * unmounted this file system while we were doing the switcheroo, * or a reopen of z_os failed then just bail out now. */ if (!unmounting && (zsb->z_unmounted || zsb->z_os == NULL)) { rw_exit(&zsb->z_teardown_inactive_lock); rrw_exit(&zsb->z_teardown_lock, FTAG); return (SET_ERROR(EIO)); } /* * At this point there are no VFS ops active, and any new VFS ops * will fail with EIO since we have z_teardown_lock for writer (only * relevant for forced unmount). * * Release all holds on dbufs. */ if (!unmounting) { mutex_enter(&zsb->z_znodes_lock); for (zp = list_head(&zsb->z_all_znodes); zp != NULL; zp = list_next(&zsb->z_all_znodes, zp)) { if (zp->z_sa_hdl) zfs_znode_dmu_fini(zp); } mutex_exit(&zsb->z_znodes_lock); } /* * If we are unmounting, set the unmounted flag and let new VFS ops * unblock. zfs_inactive will have the unmounted behavior, and all * other VFS ops will fail with EIO. */ if (unmounting) { zsb->z_unmounted = B_TRUE; rrw_exit(&zsb->z_teardown_lock, FTAG); rw_exit(&zsb->z_teardown_inactive_lock); } /* * z_os will be NULL if there was an error in attempting to reopen * zsb, so just return as the properties had already been * * unregistered and cached data had been evicted before. */ if (zsb->z_os == NULL) return (0); /* * Unregister properties. */ zfs_unregister_callbacks(zsb); /* * Evict cached data */ if (dsl_dataset_is_dirty(dmu_objset_ds(zsb->z_os)) && !zfs_is_readonly(zsb)) txg_wait_synced(dmu_objset_pool(zsb->z_os), 0); dmu_objset_evict_dbufs(zsb->z_os); return (0); }