Beispiel #1
0
void
vdev_indirect_births_add_entry(vdev_indirect_births_t *vib,
    uint64_t max_offset, uint64_t txg, dmu_tx_t *tx)
{
	vdev_indirect_birth_entry_phys_t vibe;
	uint64_t old_size;
	uint64_t new_size;
	vdev_indirect_birth_entry_phys_t *new_entries;

	ASSERT(dmu_tx_is_syncing(tx));
	ASSERT(dsl_pool_sync_context(dmu_tx_pool(tx)));
	ASSERT(vdev_indirect_births_verify(vib));

	dmu_buf_will_dirty(vib->vib_dbuf, tx);

	vibe.vibe_offset = max_offset;
	vibe.vibe_phys_birth_txg = txg;

	old_size = vdev_indirect_births_size_impl(vib);
	dmu_write(vib->vib_objset, vib->vib_object, old_size, sizeof (vibe),
	    &vibe, tx);
	vib->vib_phys->vib_count++;
	new_size = vdev_indirect_births_size_impl(vib);

	new_entries = kmem_alloc(new_size, KM_SLEEP);
	if (old_size > 0) {
		bcopy(vib->vib_entries, new_entries, old_size);
		kmem_free(vib->vib_entries, old_size);
	}
	new_entries[vib->vib_phys->vib_count - 1] = vibe;
	vib->vib_entries = new_entries;
}
Beispiel #2
0
zcrypt_keystore_node_t *
zcrypt_keystore_find_node(spa_t *spa, uint64_t dsobj,
    boolean_t config_rwlock_held)
{
	zcrypt_keystore_node_t search;
	zcrypt_keystore_node_t *found = NULL;

	rw_enter(&spa->spa_keystore->sk_lock, RW_READER);
	if (avl_is_empty(&spa->spa_keystore->sk_dslkeys))
		goto out;

	search.skn_os = dsobj;
	found = avl_find(&spa->spa_keystore->sk_dslkeys, &search, NULL);
	if (found == NULL) {
		int error;
		dsl_pool_t *dp = spa_get_dsl(spa);
		dsl_dataset_t *ds;
		boolean_t need_lock;

		rw_exit(&spa->spa_keystore->sk_lock);
		need_lock = !dsl_pool_sync_context(dp) && !config_rwlock_held;
		if (need_lock)
			rw_enter(&dp->dp_config_rwlock, RW_READER);
		error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
		if (need_lock)
			rw_exit(&dp->dp_config_rwlock);
		rw_enter(&spa->spa_keystore->sk_lock, RW_READER);

		if (!error) {
			if (dsl_dataset_is_snapshot(ds)) {
				search.skn_os =
				    ds->ds_dir->dd_phys->dd_head_dataset_obj;
				found = avl_find(&spa->spa_keystore->sk_dslkeys,
				    &search, NULL);
			}
			dsl_dataset_rele(ds, FTAG);
		}
	}
out:
	rw_exit(&spa->spa_keystore->sk_lock);

	return (found);
}
Beispiel #3
0
void
spa_feature_create_zap_objects(spa_t *spa, dmu_tx_t *tx)
{
    /*
     * We create feature flags ZAP objects in two instances: during pool
     * creation and during pool upgrade.
     */
    ASSERT(dsl_pool_sync_context(spa_get_dsl(spa)) || (!spa->spa_sync_on &&
            tx->tx_txg == TXG_INITIAL));

    spa->spa_feat_for_read_obj = zap_create_link(spa->spa_meta_objset,
                                 DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
                                 DMU_POOL_FEATURES_FOR_READ, tx);
    spa->spa_feat_for_write_obj = zap_create_link(spa->spa_meta_objset,
                                  DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
                                  DMU_POOL_FEATURES_FOR_WRITE, tx);
    spa->spa_feat_desc_obj = zap_create_link(spa->spa_meta_objset,
                             DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
                             DMU_POOL_FEATURE_DESCRIPTIONS, tx);
}
Beispiel #4
0
void
dsl_free_sync(zio_t *pio, dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp)
{
	ASSERT(dsl_pool_sync_context(dp));
	zio_nowait(zio_free_sync(pio, dp->dp_spa, txg, bpp, pio->io_flags));
}
Beispiel #5
0
int
dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
    const char *tail, void *tag, dsl_dir_t **ddp)
{
	dmu_buf_t *dbuf;
	dsl_dir_t *dd;
	int err;

	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
	    dsl_pool_sync_context(dp));

	err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
	if (err)
		return (err);
	dd = dmu_buf_get_user(dbuf);
#ifdef ZFS_DEBUG
	{
		dmu_object_info_t doi;
		dmu_object_info_from_db(dbuf, &doi);
		ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR);
		ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
	}
#endif
	if (dd == NULL) {
		dsl_dir_t *winner;

		dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP);
		dd->dd_object = ddobj;
		dd->dd_dbuf = dbuf;
		dd->dd_pool = dp;
		dd->dd_phys = dbuf->db_data;
		mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);

		list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t),
		    offsetof(dsl_prop_cb_record_t, cbr_node));

		dsl_dir_snap_cmtime_update(dd);

		if (dd->dd_phys->dd_parent_obj) {
			err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj,
			    NULL, dd, &dd->dd_parent);
			if (err)
				goto errout;
			if (tail) {
#ifdef ZFS_DEBUG
				uint64_t foundobj;

				err = zap_lookup(dp->dp_meta_objset,
				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
				    tail, sizeof (foundobj), 1, &foundobj);
				ASSERT(err || foundobj == ddobj);
#endif
				(void) strcpy(dd->dd_myname, tail);
			} else {
				err = zap_value_search(dp->dp_meta_objset,
				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
				    ddobj, 0, dd->dd_myname);
			}
			if (err)
				goto errout;
		} else {
			(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
		}

		if (dsl_dir_is_clone(dd)) {
			dmu_buf_t *origin_bonus;
			dsl_dataset_phys_t *origin_phys;

			/*
			 * We can't open the origin dataset, because
			 * that would require opening this dsl_dir.
			 * Just look at its phys directly instead.
			 */
			err = dmu_bonus_hold(dp->dp_meta_objset,
			    dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus);
			if (err)
				goto errout;
			origin_phys = origin_bonus->db_data;
			dd->dd_origin_txg =
			    origin_phys->ds_creation_txg;
			dmu_buf_rele(origin_bonus, FTAG);
		}

		winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys,
		    dsl_dir_evict);
		if (winner) {
			if (dd->dd_parent)
				dsl_dir_close(dd->dd_parent, dd);
			mutex_destroy(&dd->dd_lock);
			kmem_free(dd, sizeof (dsl_dir_t));
			dd = winner;
		} else {
			spa_open_ref(dp->dp_spa, dd);
		}
	}

	/*
	 * The dsl_dir_t has both open-to-close and instantiate-to-evict
	 * holds on the spa.  We need the open-to-close holds because
	 * otherwise the spa_refcnt wouldn't change when we open a
	 * dir which the spa also has open, so we could incorrectly
	 * think it was OK to unload/export/destroy the pool.  We need
	 * the instantiate-to-evict hold because the dsl_dir_t has a
	 * pointer to the dd_pool, which has a pointer to the spa_t.
	 */
	spa_open_ref(dp->dp_spa, tag);
	ASSERT3P(dd->dd_pool, ==, dp);
	ASSERT3U(dd->dd_object, ==, ddobj);
	ASSERT3P(dd->dd_dbuf, ==, dbuf);
	*ddp = dd;
	return (0);

errout:
	if (dd->dd_parent)
		dsl_dir_close(dd->dd_parent, dd);
	mutex_destroy(&dd->dd_lock);
	kmem_free(dd, sizeof (dsl_dir_t));
	dmu_buf_rele(dbuf, tag);
	return (err);
}
Beispiel #6
0
/*
 * same as dsl_open_dir, ignore the first component of name and use the
 * spa instead
 */
int
dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
    dsl_dir_t **ddp, const char **tailp)
{
	char buf[MAXNAMELEN];
	const char *next, *nextnext = NULL;
	int err;
	dsl_dir_t *dd;
	dsl_pool_t *dp;
	uint64_t ddobj;
	int openedspa = FALSE;

	dprintf("%s\n", name);

	err = getcomponent(name, buf, &next);
	if (err)
		return (err);
	if (spa == NULL) {
		err = spa_open(buf, &spa, FTAG);
		if (err) {
			dprintf("spa_open(%s) failed\n", buf);
			return (err);
		}
		openedspa = TRUE;

		/* XXX this assertion belongs in spa_open */
		ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa)));
	}

	dp = spa_get_dsl(spa);

	rw_enter(&dp->dp_config_rwlock, RW_READER);
	err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
	if (err) {
		rw_exit(&dp->dp_config_rwlock);
		if (openedspa)
			spa_close(spa, FTAG);
		return (err);
	}

	while (next != NULL) {
		dsl_dir_t *child_ds;
		err = getcomponent(next, buf, &nextnext);
		if (err)
			break;
		ASSERT(next[0] != '\0');
		if (next[0] == '@')
			break;
		dprintf("looking up %s in obj%lld\n",
		    buf, dd->dd_phys->dd_child_dir_zapobj);

		err = zap_lookup(dp->dp_meta_objset,
		    dd->dd_phys->dd_child_dir_zapobj,
		    buf, sizeof (ddobj), 1, &ddobj);
		if (err) {
			if (err == ENOENT)
				err = 0;
			break;
		}

		err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds);
		if (err)
			break;
		dsl_dir_close(dd, tag);
		dd = child_ds;
		next = nextnext;
	}
	rw_exit(&dp->dp_config_rwlock);

	if (err) {
		dsl_dir_close(dd, tag);
		if (openedspa)
			spa_close(spa, FTAG);
		return (err);
	}

	/*
	 * It's an error if there's more than one component left, or
	 * tailp==NULL and there's any component left.
	 */
	if (next != NULL &&
	    (tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
		/* bad path name */
		dsl_dir_close(dd, tag);
		dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
		err = ENOENT;
	}
	if (tailp)
		*tailp = next;
	if (openedspa)
		spa_close(spa, FTAG);
	*ddp = dd;
	return (err);
}
Beispiel #7
0
/*
 * Find all objsets under name, call func on each
 */
int
dmu_objset_find_spa(spa_t *spa, const char *name,
    int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags)
{
	dsl_dir_t *dd;
	dsl_pool_t *dp;
	dsl_dataset_t *ds;
	zap_cursor_t zc;
	zap_attribute_t *attr;
	char *child;
	uint64_t thisobj;
	int err;

	if (name == NULL)
		name = spa_name(spa);
	err = dsl_dir_open_spa(spa, name, FTAG, &dd, NULL);
	if (err)
		return (err);

	/* Don't visit hidden ($MOS & $ORIGIN) objsets. */
	if (dd->dd_myname[0] == '$') {
		dsl_dir_close(dd, FTAG);
		return (0);
	}

	thisobj = dd->dd_phys->dd_head_dataset_obj;
	attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
	dp = dd->dd_pool;

	/*
	 * Iterate over all children.
	 */
	if (flags & DS_FIND_CHILDREN) {
		for (zap_cursor_init(&zc, dp->dp_meta_objset,
		    dd->dd_phys->dd_child_dir_zapobj);
		    zap_cursor_retrieve(&zc, attr) == 0;
		    (void) zap_cursor_advance(&zc)) {
			ASSERT(attr->za_integer_length == sizeof (uint64_t));
			ASSERT(attr->za_num_integers == 1);

			child = kmem_alloc(MAXPATHLEN, KM_SLEEP);
			(void) strcpy(child, name);
			(void) strcat(child, "/");
			(void) strcat(child, attr->za_name);
			err = dmu_objset_find_spa(spa, child, func, arg, flags);
			kmem_free(child, MAXPATHLEN);
			if (err)
				break;
		}
		zap_cursor_fini(&zc);

		if (err) {
			dsl_dir_close(dd, FTAG);
			kmem_free(attr, sizeof (zap_attribute_t));
			return (err);
		}
	}

	/*
	 * Iterate over all snapshots.
	 */
	if (flags & DS_FIND_SNAPSHOTS) {
		if (!dsl_pool_sync_context(dp))
			rw_enter(&dp->dp_config_rwlock, RW_READER);
		err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
		if (!dsl_pool_sync_context(dp))
			rw_exit(&dp->dp_config_rwlock);

		if (err == 0) {
			uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
			dsl_dataset_rele(ds, FTAG);

			for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj);
			    zap_cursor_retrieve(&zc, attr) == 0;
			    (void) zap_cursor_advance(&zc)) {
				ASSERT(attr->za_integer_length ==
				    sizeof (uint64_t));
				ASSERT(attr->za_num_integers == 1);

				child = kmem_alloc(MAXPATHLEN, KM_SLEEP);
				(void) strcpy(child, name);
				(void) strcat(child, "@");
				(void) strcat(child, attr->za_name);
				err = func(spa, attr->za_first_integer,
				    child, arg);
				kmem_free(child, MAXPATHLEN);
				if (err)
					break;
			}
			zap_cursor_fini(&zc);
		}
	}

	dsl_dir_close(dd, FTAG);
	kmem_free(attr, sizeof (zap_attribute_t));

	if (err)
		return (err);

	/*
	 * Apply to self if appropriate.
	 */
	err = func(spa, thisobj, name, arg);
	return (err);
}
Beispiel #8
0
boolean_t
vdev_indirect_should_condense(vdev_t *vd)
{
	vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
	spa_t *spa = vd->vdev_spa;

	ASSERT(dsl_pool_sync_context(spa->spa_dsl_pool));

	if (!zfs_condense_indirect_vdevs_enable)
		return (B_FALSE);

	/*
	 * We can only condense one indirect vdev at a time.
	 */
	if (spa->spa_condensing_indirect != NULL)
		return (B_FALSE);

	if (spa_shutting_down(spa))
		return (B_FALSE);

	/*
	 * The mapping object size must not change while we are
	 * condensing, so we can only condense indirect vdevs
	 * (not vdevs that are still in the middle of being removed).
	 */
	if (vd->vdev_ops != &vdev_indirect_ops)
		return (B_FALSE);

	/*
	 * If nothing new has been marked obsolete, there is no
	 * point in condensing.
	 */
	if (vd->vdev_obsolete_sm == NULL) {
		ASSERT0(vdev_obsolete_sm_object(vd));
		return (B_FALSE);
	}

	ASSERT(vd->vdev_obsolete_sm != NULL);

	ASSERT3U(vdev_obsolete_sm_object(vd), ==,
	    space_map_object(vd->vdev_obsolete_sm));

	uint64_t bytes_mapped = vdev_indirect_mapping_bytes_mapped(vim);
	uint64_t bytes_obsolete = space_map_allocated(vd->vdev_obsolete_sm);
	uint64_t mapping_size = vdev_indirect_mapping_size(vim);
	uint64_t obsolete_sm_size = space_map_length(vd->vdev_obsolete_sm);

	ASSERT3U(bytes_obsolete, <=, bytes_mapped);

	/*
	 * If a high percentage of the bytes that are mapped have become
	 * obsolete, condense (unless the mapping is already small enough).
	 * This has a good chance of reducing the amount of memory used
	 * by the mapping.
	 */
	if (bytes_obsolete * 100 / bytes_mapped >=
	    zfs_indirect_condense_obsolete_pct &&
	    mapping_size > zfs_condense_min_mapping_bytes) {
		zfs_dbgmsg("should condense vdev %llu because obsolete "
		    "spacemap covers %d%% of %lluMB mapping",
		    (u_longlong_t)vd->vdev_id,
		    (int)(bytes_obsolete * 100 / bytes_mapped),
		    (u_longlong_t)bytes_mapped / 1024 / 1024);
		return (B_TRUE);
	}

	/*
	 * If the obsolete space map takes up too much space on disk,
	 * condense in order to free up this disk space.
	 */
	if (obsolete_sm_size >= zfs_condense_max_obsolete_bytes) {
		zfs_dbgmsg("should condense vdev %llu because obsolete sm "
		    "length %lluMB >= max size %lluMB",
		    (u_longlong_t)vd->vdev_id,
		    (u_longlong_t)obsolete_sm_size / 1024 / 1024,
		    (u_longlong_t)zfs_condense_max_obsolete_bytes /
		    1024 / 1024);
		return (B_TRUE);
	}

	return (B_FALSE);
}