Beispiel #1
0
static void
dmu_objset_create_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
{
	dsl_dir_t *dd = arg1;
	struct oscarg *oa = arg2;
	dsl_dataset_t *ds;
	blkptr_t *bp;
	uint64_t dsobj;

	ASSERT(dmu_tx_is_syncing(tx));

	dsobj = dsl_dataset_create_sync(dd, oa->lastname,
	    oa->clone_parent, oa->flags, cr, tx);

	VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, dsobj, FTAG, &ds));
	bp = dsl_dataset_get_blkptr(ds);
	if (BP_IS_HOLE(bp)) {
		objset_impl_t *osi;

		/* This is an empty dmu_objset; not a clone. */
		osi = dmu_objset_create_impl(dsl_dataset_get_spa(ds),
		    ds, bp, oa->type, tx);

		if (oa->userfunc)
			oa->userfunc(&osi->os, oa->userarg, cr, tx);
	}

	spa_history_internal_log(LOG_DS_CREATE, dd->dd_pool->dp_spa,
	    tx, cr, "dataset = %llu", dsobj);

	dsl_dataset_rele(ds, FTAG);
}
Beispiel #2
0
/* ARGSUSED */
int
dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
{
	dsl_dir_t *dd = arg1;
	dsl_pool_t *dp = dd->dd_pool;
	objset_t *mos = dp->dp_meta_objset;
	int err;
	uint64_t count;

	/*
	 * There should be exactly two holds, both from
	 * dsl_dataset_destroy: one on the dd directory, and one on its
	 * head ds.  If there are more holds, then a concurrent thread is
	 * performing a lookup inside this dir while we're trying to destroy
	 * it.  To minimize this possibility, we perform this check only
	 * in syncing context and fail the operation if we encounter
	 * additional holds.  The dp_config_rwlock ensures that nobody else
	 * opens it after we check.
	 */
	if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 2)
		return (EBUSY);

	err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count);
	if (err)
		return (err);
	if (count != 0)
		return (EEXIST);

	return (0);
}
Beispiel #3
0
/*
 * The nvlist will be consumed by this call.
 */
static void
log_internal(nvlist_t *nvl, const char *operation, spa_t *spa,
             dmu_tx_t *tx, const char *fmt, va_list adx)
{
    char *msg;

    /*
     * If this is part of creating a pool, not everything is
     * initialized yet, so don't bother logging the internal events.
     * Likewise if the pool is not writeable.
     */
    if (tx->tx_txg == TXG_INITIAL || !spa_writeable(spa)) {
        fnvlist_free(nvl);
        return;
    }

    msg = kmem_vasprintf(fmt, adx);
    fnvlist_add_string(nvl, ZPOOL_HIST_INT_STR, msg);
    strfree(msg);

    fnvlist_add_string(nvl, ZPOOL_HIST_INT_NAME, operation);
    fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg);

    if (dmu_tx_is_syncing(tx)) {
        spa_history_log_sync(nvl, tx);
    } else {
        dsl_sync_task_nowait(spa_get_dsl(spa),
                             spa_history_log_sync, nvl, 0, ZFS_SPACE_CHECK_NONE, tx);
    }
    /* spa_history_log_sync() will free nvl */
}
Beispiel #4
0
void
vdev_indirect_births_add_entry(vdev_indirect_births_t *vib,
    uint64_t max_offset, uint64_t txg, dmu_tx_t *tx)
{
	vdev_indirect_birth_entry_phys_t vibe;
	uint64_t old_size;
	uint64_t new_size;
	vdev_indirect_birth_entry_phys_t *new_entries;

	ASSERT(dmu_tx_is_syncing(tx));
	ASSERT(dsl_pool_sync_context(dmu_tx_pool(tx)));
	ASSERT(vdev_indirect_births_verify(vib));

	dmu_buf_will_dirty(vib->vib_dbuf, tx);

	vibe.vibe_offset = max_offset;
	vibe.vibe_phys_birth_txg = txg;

	old_size = vdev_indirect_births_size_impl(vib);
	dmu_write(vib->vib_objset, vib->vib_object, old_size, sizeof (vibe),
	    &vibe, tx);
	vib->vib_phys->vib_count++;
	new_size = vdev_indirect_births_size_impl(vib);

	new_entries = kmem_alloc(new_size, KM_SLEEP);
	if (old_size > 0) {
		bcopy(vib->vib_entries, new_entries, old_size);
		kmem_free(vib->vib_entries, old_size);
	}
	new_entries[vib->vib_phys->vib_count - 1] = vibe;
	vib->vib_entries = new_entries;
}
static void
dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
	dsl_dir_t *dd = arg1;
	spa_t *spa = dd->dd_pool->dp_spa;
	struct oscarg *oa = arg2;
	uint64_t obj;

	ASSERT(dmu_tx_is_syncing(tx));

	obj = dsl_dataset_create_sync(dd, oa->lastname,
        oa->clone_origin, oa->crypto_ctx, oa->flags, oa->cr, tx);

	if (oa->clone_origin == NULL) {
		dsl_pool_t *dp = dd->dd_pool;
		dsl_dataset_t *ds;
		blkptr_t *bp;
		objset_t *os;

		VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
		bp = dsl_dataset_get_blkptr(ds);
		ASSERT(BP_IS_HOLE(bp));

		os = dmu_objset_create_impl(spa, ds, bp, oa->type,
                                    oa->crypto_ctx, tx);

		if (oa->userfunc)
			oa->userfunc(os, oa->userarg, oa->cr, tx);
		dsl_dataset_rele(ds, FTAG);
	}
Beispiel #6
0
void
spa_history_internal_log(history_internal_events_t event, spa_t *spa,
    dmu_tx_t *tx, cred_t *cr, const char *fmt, ...)
{
	history_arg_t *hap;
	char *str;
	va_list adx;

	hap = kmem_alloc(sizeof (history_arg_t), KM_SLEEP);
	str = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);

	va_start(adx, fmt);
	(void) vsnprintf(str, HIS_MAX_RECORD_LEN, fmt, adx);
	va_end(adx);

	hap->ha_log_type = LOG_INTERNAL;
	hap->ha_history_str = str;
	hap->ha_event = event;
	hap->ha_zone[0] = '\0';

	if (dmu_tx_is_syncing(tx)) {
		spa_history_log_sync(spa, hap, cr, tx);
	} else {
		dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
		    spa_history_log_sync, spa, hap, 0, tx);
	}
	/* spa_history_log_sync() will free hap and str */
}
Beispiel #7
0
static void
log_internal(history_internal_events_t event, spa_t *spa,
    dmu_tx_t *tx, const char *fmt, va_list adx)
{
	history_arg_t *ha;
	va_list adx_copy;

	/*
	 * If this is part of creating a pool, not everything is
	 * initialized yet, so don't bother logging the internal events.
	 */
	if (tx->tx_txg == TXG_INITIAL)
		return;

	ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP);
	va_copy(adx_copy, adx);
	ha->ha_history_str = kmem_vasprintf(fmt, adx_copy);
	va_end(adx_copy);
	ha->ha_log_type = LOG_INTERNAL;
	ha->ha_event = event;
	ha->ha_zone = NULL;
	ha->ha_uid = 0;

	if (dmu_tx_is_syncing(tx)) {
		spa_history_log_sync(spa, ha, tx);
	} else {
		dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
		    spa_history_log_sync, spa, ha, 0, tx);
	}
	/* spa_history_log_sync() will free ha and strings */
}
Beispiel #8
0
/* called from dsl for meta-objset */
objset_t *
dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
    dmu_objset_type_t type, dmu_tx_t *tx)
{
	objset_t *os;
	dnode_t *mdn;

	ASSERT(dmu_tx_is_syncing(tx));

	if (ds != NULL)
		VERIFY0(dmu_objset_from_ds(ds, &os));
	else
		VERIFY0(dmu_objset_open_impl(spa, NULL, bp, &os));

	mdn = DMU_META_DNODE(os);

	dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT,
	    DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx);

	/*
	 * We don't want to have to increase the meta-dnode's nlevels
	 * later, because then we could do it in quescing context while
	 * we are also accessing it in open context.
	 *
	 * This precaution is not necessary for the MOS (ds == NULL),
	 * because the MOS is only updated in syncing context.
	 * This is most fortunate: the MOS is the only objset that
	 * needs to be synced multiple times as spa_sync() iterates
	 * to convergence, so minimizing its dn_nlevels matters.
	 */
	if (ds != NULL) {
		int levels = 1;

		/*
		 * Determine the number of levels necessary for the meta-dnode
		 * to contain DN_MAX_OBJECT dnodes.
		 */
		while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift +
		    (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) <
		    DN_MAX_OBJECT * sizeof (dnode_phys_t))
			levels++;

		mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] =
		    mdn->dn_nlevels = levels;
	}

	ASSERT(type != DMU_OST_NONE);
	ASSERT(type != DMU_OST_ANY);
	ASSERT(type < DMU_OST_NUMTYPES);
	os->os_phys->os_type = type;
	if (dmu_objset_userused_enabled(os)) {
		os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
		os->os_flags = os->os_phys->os_flags;
	}

	dsl_dataset_dirty(ds, tx);

	return (os);
}
Beispiel #9
0
/*
 * This sync task completes (finishes) a condense, deleting the old
 * mapping and replacing it with the new one.
 */
static void
spa_condense_indirect_complete_sync(void *arg, dmu_tx_t *tx)
{
	spa_condensing_indirect_t *sci = arg;
	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
	spa_condensing_indirect_phys_t *scip =
	    &spa->spa_condensing_indirect_phys;
	vdev_t *vd = vdev_lookup_top(spa, scip->scip_vdev);
	vdev_indirect_config_t *vic = &vd->vdev_indirect_config;
	objset_t *mos = spa->spa_meta_objset;
	vdev_indirect_mapping_t *old_mapping = vd->vdev_indirect_mapping;
	uint64_t old_count = vdev_indirect_mapping_num_entries(old_mapping);
	uint64_t new_count =
	    vdev_indirect_mapping_num_entries(sci->sci_new_mapping);

	ASSERT(dmu_tx_is_syncing(tx));
	ASSERT3P(vd->vdev_ops, ==, &vdev_indirect_ops);
	ASSERT3P(sci, ==, spa->spa_condensing_indirect);
	for (int i = 0; i < TXG_SIZE; i++) {
		ASSERT(list_is_empty(&sci->sci_new_mapping_entries[i]));
	}
	ASSERT(vic->vic_mapping_object != 0);
	ASSERT3U(vd->vdev_id, ==, scip->scip_vdev);
	ASSERT(scip->scip_next_mapping_object != 0);
	ASSERT(scip->scip_prev_obsolete_sm_object != 0);

	/*
	 * Reset vdev_indirect_mapping to refer to the new object.
	 */
	rw_enter(&vd->vdev_indirect_rwlock, RW_WRITER);
	vdev_indirect_mapping_close(vd->vdev_indirect_mapping);
	vd->vdev_indirect_mapping = sci->sci_new_mapping;
	rw_exit(&vd->vdev_indirect_rwlock);

	sci->sci_new_mapping = NULL;
	vdev_indirect_mapping_free(mos, vic->vic_mapping_object, tx);
	vic->vic_mapping_object = scip->scip_next_mapping_object;
	scip->scip_next_mapping_object = 0;

	space_map_free_obj(mos, scip->scip_prev_obsolete_sm_object, tx);
	spa_feature_decr(spa, SPA_FEATURE_OBSOLETE_COUNTS, tx);
	scip->scip_prev_obsolete_sm_object = 0;

	scip->scip_vdev = 0;

	VERIFY0(zap_remove(mos, DMU_POOL_DIRECTORY_OBJECT,
	    DMU_POOL_CONDENSING_INDIRECT, tx));
	spa_condensing_indirect_destroy(spa->spa_condensing_indirect);
	spa->spa_condensing_indirect = NULL;

	zfs_dbgmsg("finished condense of vdev %llu in txg %llu: "
	    "new mapping object %llu has %llu entries "
	    "(was %llu entries)",
	    vd->vdev_id, dmu_tx_get_txg(tx), vic->vic_mapping_object,
	    new_count, old_count);

	vdev_config_dirty(spa->spa_root_vdev);
}
Beispiel #10
0
uint64_t
vdev_indirect_births_alloc(objset_t *os, dmu_tx_t *tx)
{
	ASSERT(dmu_tx_is_syncing(tx));

	return (dmu_object_alloc(os,
	    DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE,
	    DMU_OTN_UINT64_METADATA, sizeof (vdev_indirect_birth_phys_t),
	    tx));
}
Beispiel #11
0
void
dmu_tx_add_new_object(dmu_tx_t *tx, dnode_t *dn)
{
	/*
	 * If we're syncing, they can manipulate any object anyhow, and
	 * the hold on the dnode_t can cause problems.
	 */
	if (!dmu_tx_is_syncing(tx))
		(void) dmu_tx_hold_dnode_impl(tx, dn, THT_NEWOBJECT, 0, 0);
}
Beispiel #12
0
/*
 * Mark the DVA vdev_id:offset:size as being obsolete in the given tx. This
 * wrapper is provided because the DMU does not know about vdev_t's and
 * cannot directly call vdev_indirect_mark_obsolete.
 */
void
spa_vdev_indirect_mark_obsolete(spa_t *spa, uint64_t vdev_id, uint64_t offset,
    uint64_t size, dmu_tx_t *tx)
{
	vdev_t *vd = vdev_lookup_top(spa, vdev_id);
	ASSERT(dmu_tx_is_syncing(tx));

	/* The DMU can only remap indirect vdevs. */
	ASSERT3P(vd->vdev_ops, ==, &vdev_indirect_ops);
	vdev_indirect_mark_obsolete(vd, offset, size);
}
Beispiel #13
0
void
dmu_tx_add_new_object(dmu_tx_t *tx, objset_t *os, uint64_t object)
{
	/*
	 * If we're syncing, they can manipulate any object anyhow, and
	 * the hold on the dnode_t can cause problems.
	 */
	if (!dmu_tx_is_syncing(tx)) {
		(void) dmu_tx_hold_object_impl(tx, os,
		    object, THT_NEWOBJECT, 0, 0);
	}
}
Beispiel #14
0
static int
dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx)
{
	dsl_dataset_user_hold_arg_t *dduha = arg;
	dsl_pool_t *dp = dmu_tx_pool(tx);
	nvpair_t *pair;

	if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
		return (SET_ERROR(ENOTSUP));

	if (!dmu_tx_is_syncing(tx))
		return (0);

	for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL);
	    pair != NULL; pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
		dsl_dataset_t *ds;
		int error = 0;
		char *htag, *name;

		/* must be a snapshot */
		name = nvpair_name(pair);
		if (strchr(name, '@') == NULL)
			error = SET_ERROR(EINVAL);

		if (error == 0)
			error = nvpair_value_string(pair, &htag);

		if (error == 0)
			error = dsl_dataset_hold(dp, name, FTAG, &ds);

		if (error == 0) {
			error = dsl_dataset_user_hold_check_one(ds, htag,
			    dduha->dduha_minor != 0, tx);
			dsl_dataset_rele(ds, FTAG);
		}

		if (error == 0) {
			fnvlist_add_string(dduha->dduha_chkholds, name, htag);
		} else {
			/*
			 * We register ENOENT errors so they can be correctly
			 * reported if needed, such as when all holds fail.
			 */
			fnvlist_add_int32(dduha->dduha_errlist, name, error);
			if (error != ENOENT)
				return (error);
		}
	}

	return (0);
}
Beispiel #15
0
static int
dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
{
	dsl_dataset_user_release_arg_t *ddura;
	dsl_holdfunc_t *holdfunc;
	dsl_pool_t *dp;
	nvpair_t *pair;

	if (!dmu_tx_is_syncing(tx))
		return (0);

	dp = dmu_tx_pool(tx);

	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));

	ddura = arg;
	holdfunc = ddura->ddura_holdfunc;

	for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL);
	    pair != NULL; pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
		int error;
		dsl_dataset_t *ds;
		nvlist_t *holds;
		const char *snapname = nvpair_name(pair);

		error = nvpair_value_nvlist(pair, &holds);
		if (error != 0)
			error = (SET_ERROR(EINVAL));
		else
			error = holdfunc(dp, snapname, FTAG, &ds);
		if (error == 0) {
			error = dsl_dataset_user_release_check_one(ddura, ds,
			    holds, snapname);
			dsl_dataset_rele(ds, FTAG);
		}
		if (error != 0) {
			if (ddura->ddura_errlist != NULL) {
				fnvlist_add_int32(ddura->ddura_errlist,
				    snapname, error);
			}
			/*
			 * Non-existent snapshots are put on the errlist,
			 * but don't cause an overall failure.
			 */
			if (error != ENOENT)
				return (error);
		}
	}

	return (0);
}
Beispiel #16
0
/*
 * This sync task appends entries to the new mapping object.
 */
static void
spa_condense_indirect_commit_sync(void *arg, dmu_tx_t *tx)
{
	spa_condensing_indirect_t *sci = arg;
	uint64_t txg = dmu_tx_get_txg(tx);
	spa_t *spa = dmu_tx_pool(tx)->dp_spa;

	ASSERT(dmu_tx_is_syncing(tx));
	ASSERT3P(sci, ==, spa->spa_condensing_indirect);

	vdev_indirect_mapping_add_entries(sci->sci_new_mapping,
	    &sci->sci_new_mapping_entries[txg & TXG_MASK], tx);
	ASSERT(list_is_empty(&sci->sci_new_mapping_entries[txg & TXG_MASK]));
}
Beispiel #17
0
/*
 * Sync task to begin the condensing process.
 */
void
spa_condense_indirect_start_sync(vdev_t *vd, dmu_tx_t *tx)
{
	spa_t *spa = vd->vdev_spa;
	spa_condensing_indirect_phys_t *scip =
	    &spa->spa_condensing_indirect_phys;

	ASSERT0(scip->scip_next_mapping_object);
	ASSERT0(scip->scip_prev_obsolete_sm_object);
	ASSERT0(scip->scip_vdev);
	ASSERT(dmu_tx_is_syncing(tx));
	ASSERT3P(vd->vdev_ops, ==, &vdev_indirect_ops);
	ASSERT(spa_feature_is_active(spa, SPA_FEATURE_OBSOLETE_COUNTS));
	ASSERT(vdev_indirect_mapping_num_entries(vd->vdev_indirect_mapping));

	uint64_t obsolete_sm_obj = vdev_obsolete_sm_object(vd);
	ASSERT(obsolete_sm_obj != 0);

	scip->scip_vdev = vd->vdev_id;
	scip->scip_next_mapping_object =
	    vdev_indirect_mapping_alloc(spa->spa_meta_objset, tx);

	scip->scip_prev_obsolete_sm_object = obsolete_sm_obj;

	/*
	 * We don't need to allocate a new space map object, since
	 * vdev_indirect_sync_obsolete will allocate one when needed.
	 */
	space_map_close(vd->vdev_obsolete_sm);
	vd->vdev_obsolete_sm = NULL;
	VERIFY0(zap_remove(spa->spa_meta_objset, vd->vdev_top_zap,
	    VDEV_TOP_ZAP_INDIRECT_OBSOLETE_SM, tx));

	VERIFY0(zap_add(spa->spa_dsl_pool->dp_meta_objset,
	    DMU_POOL_DIRECTORY_OBJECT,
	    DMU_POOL_CONDENSING_INDIRECT, sizeof (uint64_t),
	    sizeof (*scip) / sizeof (uint64_t), scip, tx));

	ASSERT3P(spa->spa_condensing_indirect, ==, NULL);
	spa->spa_condensing_indirect = spa_condensing_indirect_create(spa);

	zfs_dbgmsg("starting condense of vdev %llu in txg %llu: "
	    "posm=%llu nm=%llu",
	    vd->vdev_id, dmu_tx_get_txg(tx),
	    (u_longlong_t)scip->scip_prev_obsolete_sm_object,
	    (u_longlong_t)scip->scip_next_mapping_object);

	zthr_wakeup(spa->spa_condense_zthr);
}
Beispiel #18
0
void
dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
{
	ASSERT(dmu_tx_is_syncing(tx));

	mutex_enter(&dd->dd_lock);
	ASSERT0(dd->dd_tempreserved[tx->tx_txg&TXG_MASK]);
	dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
	    dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024);
	dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0;
	mutex_exit(&dd->dd_lock);

	/* release the hold from dsl_dir_dirty */
	dmu_buf_rele(dd->dd_dbuf, dd);
}
Beispiel #19
0
static int
dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx)
{
	dmu_snapshots_destroy_arg_t *dsda = arg;
	dsl_pool_t *dp = dmu_tx_pool(tx);
	nvpair_t *pair;
	int error = 0;

	if (!dmu_tx_is_syncing(tx))
		return (0);

	for (pair = nvlist_next_nvpair(dsda->dsda_snaps, NULL);
	    pair != NULL; pair = nvlist_next_nvpair(dsda->dsda_snaps, pair)) {
		dsl_dataset_t *ds;

		error = dsl_dataset_hold(dp, nvpair_name(pair),
		    FTAG, &ds);

		/*
		 * If the snapshot does not exist, silently ignore it
		 * (it's "already destroyed").
		 */
		if (error == ENOENT)
			continue;

		if (error == 0) {
			error = dsl_destroy_snapshot_check_impl(ds,
			    dsda->dsda_defer);
			dsl_dataset_rele(ds, FTAG);
		}

		if (error == 0) {
			fnvlist_add_boolean(dsda->dsda_successful_snaps,
			    nvpair_name(pair));
		} else {
			fnvlist_add_int32(dsda->dsda_errlist,
			    nvpair_name(pair), error);
		}
	}

	pair = nvlist_next_nvpair(dsda->dsda_errlist, NULL);
	if (pair != NULL)
		return (fnvpair_value_int32(pair));

	return (0);
}
Beispiel #20
0
static void
log_internal(history_internal_events_t event, spa_t *spa,
    dmu_tx_t *tx, const char *fmt, va_list adx)
{
	history_arg_t *ha;

	/*
	 * If this is part of creating a pool, not everything is
	 * initialized yet, so don't bother logging the internal events.
	 */
	if (tx->tx_txg == TXG_INITIAL)
		return;

	ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP);
#ifdef __DARWIN__
	/*
	 * vsnprintf(NULL, 0, ...) is broken on darwin!
	 */
	{
		char *buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);

		(void) vsnprintf(buf, HIS_MAX_RECORD_LEN, fmt, adx);
		ha->ha_history_str = strdup(buf);
		kmem_free(buf, HIS_MAX_RECORD_LEN);
	}
#else
	ha->ha_history_str = kmem_alloc(vsnprintf(NULL, 0, fmt, adx) + 1,
	    KM_SLEEP);

	(void) vsprintf(ha->ha_history_str, fmt, adx);
#endif /* __DARWIN__ */

	ha->ha_log_type = LOG_INTERNAL;
	ha->ha_event = event;
	ha->ha_zone = NULL;
	ha->ha_uid = 0;

	if (dmu_tx_is_syncing(tx)) {
		spa_history_log_sync(spa, ha, tx);
	} else {
		dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
		    spa_history_log_sync, spa, ha, 0, tx);
	}
	/* spa_history_log_sync() will free ha and strings */
}
Beispiel #21
0
static int
dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
{
	dsl_dataset_user_release_arg_t *ddura = arg;
	dsl_pool_t *dp = dmu_tx_pool(tx);
	nvpair_t *pair;
	int rv = 0;

	if (!dmu_tx_is_syncing(tx))
		return (0);

	for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
	    pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
		const char *name = nvpair_name(pair);
		int error;
		dsl_dataset_t *ds;
		nvlist_t *holds;

		error = nvpair_value_nvlist(pair, &holds);
		if (error != 0)
			return (EINVAL);

		error = dsl_dataset_hold(dp, name, FTAG, &ds);
		if (error == 0) {
			boolean_t deleteme;
			error = dsl_dataset_user_release_check_one(ds,
			    holds, &deleteme);
			if (error == 0 && deleteme) {
				fnvlist_add_boolean(ddura->ddura_todelete,
				    name);
			}
			dsl_dataset_rele(ds, FTAG);
		}
		if (error != 0) {
			if (ddura->ddura_errlist != NULL) {
				fnvlist_add_int32(ddura->ddura_errlist,
				    name, error);
			}
			rv = error;
		}
	}
	return (rv);
}
Beispiel #22
0
static int
dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx)
{
	dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
	dsl_pool_t *dp = dmu_tx_pool(tx);
	dsl_dataset_t *ds;
	int error;

	if (!dmu_tx_is_syncing(tx))
		return (0);

	error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds);
	if (error)
		return (error);

	error = dsl_dataset_user_release_check_one(ds,
	    ddurta->ddurta_holds, &ddurta->ddurta_deleteme);
	dsl_dataset_rele(ds, FTAG);
	return (error);
}
Beispiel #23
0
/*
 * The nvlist will be consumed by this call.
 */
static void
log_internal(nvlist_t *nvl, const char *operation, spa_t *spa,
    dmu_tx_t *tx, const char *fmt, va_list adx)
{
	char *msg;
	va_list adx1;
	int size;

	/*
	 * If this is part of creating a pool, not everything is
	 * initialized yet, so don't bother logging the internal events.
	 * Likewise if the pool is not writeable.
	 */
	if (tx->tx_txg == TXG_INITIAL || !spa_writeable(spa)) {
		fnvlist_free(nvl);
		return;
	}

	va_copy(adx1, adx);
	size = vsnprintf(NULL, 0, fmt, adx1) + 1;
	msg = kmem_alloc(size, KM_PUSHPAGE);
	va_end(adx1);
	va_copy(adx1, adx);
	(void) vsprintf(msg, fmt, adx1);
	va_end(adx1);
	fnvlist_add_string(nvl, ZPOOL_HIST_INT_STR, msg);
	kmem_free(msg, size);

	fnvlist_add_string(nvl, ZPOOL_HIST_INT_NAME, operation);
	fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg);

	if (dmu_tx_is_syncing(tx)) {
		spa_history_log_sync(nvl, tx);
	} else {
		dsl_sync_task_nowait(spa_get_dsl(spa),
		    spa_history_log_sync, nvl, 0, tx);
	}
	/* spa_history_log_sync() will free nvl */
}
Beispiel #24
0
/* called from dsl */
void
dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx)
{
	int txgoff;
	zbookmark_t zb;
	writeprops_t wp = { 0 };
	zio_t *zio;
	list_t *list;
	list_t *newlist = NULL;
	dbuf_dirty_record_t *dr;

	dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg);

	ASSERT(dmu_tx_is_syncing(tx));
	/* XXX the write_done callback should really give us the tx... */
	os->os_synctx = tx;

	if (os->os_dsl_dataset == NULL) {
		/*
		 * This is the MOS.  If we have upgraded,
		 * spa_max_replication() could change, so reset
		 * os_copies here.
		 */
		os->os_copies = spa_max_replication(os->os_spa);
	}

	/*
	 * Create the root block IO
	 */
	zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0;
	zb.zb_object = 0;
	zb.zb_level = -1;	/* for block ordering; it's level 0 on disk */
	zb.zb_blkid = 0;

	wp.wp_type = DMU_OT_OBJSET;
	wp.wp_level = 0;	/* on-disk BP level; see above */
	wp.wp_copies = os->os_copies;
	wp.wp_oschecksum = os->os_checksum;
	wp.wp_oscompress = os->os_compress;

	if (BP_IS_OLDER(os->os_rootbp, tx->tx_txg)) {
		(void) dsl_dataset_block_kill(os->os_dsl_dataset,
		    os->os_rootbp, pio, tx);
	}

	arc_release(os->os_phys_buf, &os->os_phys_buf);

	zio = arc_write(pio, os->os_spa, &wp, DMU_OS_IS_L2CACHEABLE(os),
	    tx->tx_txg, os->os_rootbp, os->os_phys_buf, ready, NULL, os,
	    ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);

	/*
	 * Sync special dnodes - the parent IO for the sync is the root block
	 */
	os->os_meta_dnode->dn_zio = zio;
	dnode_sync(os->os_meta_dnode, tx);

	os->os_phys->os_flags = os->os_flags;

	if (os->os_userused_dnode &&
	    os->os_userused_dnode->dn_type != DMU_OT_NONE) {
		os->os_userused_dnode->dn_zio = zio;
		dnode_sync(os->os_userused_dnode, tx);
		os->os_groupused_dnode->dn_zio = zio;
		dnode_sync(os->os_groupused_dnode, tx);
	}

	txgoff = tx->tx_txg & TXG_MASK;

	if (dmu_objset_userused_enabled(os)) {
		newlist = &os->os_synced_dnodes;
		/*
		 * We must create the list here because it uses the
		 * dn_dirty_link[] of this txg.
		 */
		list_create(newlist, sizeof (dnode_t),
		    offsetof(dnode_t, dn_dirty_link[txgoff]));
	}

	dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx);
	dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx);

	list = &os->os_meta_dnode->dn_dirty_records[txgoff];
	while ((dr = list_head(list))) {
		ASSERT(dr->dr_dbuf->db_level == 0);
		list_remove(list, dr);
		if (dr->dr_zio)
			zio_nowait(dr->dr_zio);
	}
	/*
	 * Free intent log blocks up to this tx.
	 */
	zil_sync(os->os_zil, tx);
	os->os_phys->os_zil_header = os->os_zil_header;
	zio_nowait(zio);
}
Beispiel #25
0
/* called from dsl for meta-objset */
objset_t *
dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
                       dmu_objset_type_t type, dsl_crypto_ctx_t *dcc, dmu_tx_t *tx)
{
	objset_t *os;
	dnode_t *mdn;

	ASSERT(dmu_tx_is_syncing(tx));
	if (ds != NULL)
		VERIFY(0 == dmu_objset_from_ds(ds, &os));
	else
		VERIFY(0 == dmu_objset_open_impl(spa, NULL, bp, &os));

	mdn = DMU_META_DNODE(os);

	dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT,
	    DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx);

	/*
	 * We don't want to have to increase the meta-dnode's nlevels
	 * later, because then we could do it in quescing context while
	 * we are also accessing it in open context.
	 *
	 * This precaution is not necessary for the MOS (ds == NULL),
	 * because the MOS is only updated in syncing context.
	 * This is most fortunate: the MOS is the only objset that
	 * needs to be synced multiple times as spa_sync() iterates
	 * to convergence, so minimizing its dn_nlevels matters.
	 */
	if (ds != NULL) {
		int levels = 1;

		/*
		 * Determine the number of levels necessary for the meta-dnode
		 * to contain DN_MAX_OBJECT dnodes.
		 */
		while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift +
		    (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) <
		    DN_MAX_OBJECT * sizeof (dnode_phys_t))
			levels++;

		mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] =
		    mdn->dn_nlevels = levels;
	}

	ASSERT(type != DMU_OST_NONE);
	ASSERT(type != DMU_OST_ANY);
	ASSERT(type < DMU_OST_NUMTYPES);
    /*
     * Note: although we should not be dirtying the objset outside of
     * sync context, the os_type is used directly from the os_phys
     * in dsl_scan so it may not be save to put os_type in the in-core
     * objset_t and set it in the phys in sync context (as with os_flags)
     */
    dmu_objset_dirty(os);
	os->os_phys->os_type = type;
	if (dmu_objset_userused_enabled(os)) {
		os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
        // FIXME - removed in newer.
		// os->os_flags = os->os_phys->os_flags;
	}

    if (dcc != NULL && dcc->dcc_crypt != ZIO_CRYPT_INHERIT) {
        os->os_crypt = zio_crypt_select(dcc->dcc_crypt,
                                        ZIO_CRYPT_ON_VALUE);
    }

	dsl_dataset_dirty(ds, tx);

	return (os);
}
Beispiel #26
0
/* called from dsl */
void
dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
{
	int txgoff;
	zbookmark_t zb;
	zio_prop_t zp;
	zio_t *zio;
	list_t *list;
	list_t *newlist = NULL;
	dbuf_dirty_record_t *dr;

	dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg);

	ASSERT(dmu_tx_is_syncing(tx));
	/* XXX the write_done callback should really give us the tx... */
	os->os_synctx = tx;

	if (os->os_dsl_dataset == NULL) {
		/*
		 * This is the MOS.  If we have upgraded,
		 * spa_max_replication() could change, so reset
		 * os_copies here.
		 */
		os->os_copies = spa_max_replication(os->os_spa);
	}

	/*
	 * Create the root block IO
	 */
	SET_BOOKMARK(&zb, os->os_dsl_dataset ?
	    os->os_dsl_dataset->ds_object : DMU_META_OBJSET,
	    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
	arc_release(os->os_phys_buf, &os->os_phys_buf);

	dmu_write_policy(os, NULL, 0, 0, &zp);

	zio = arc_write(pio, os->os_spa, tx->tx_txg,
	    os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), &zp,
	    dmu_objset_write_ready, dmu_objset_write_done, os,
	    ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);

	/*
	 * Sync special dnodes - the parent IO for the sync is the root block
	 */
	DMU_META_DNODE(os)->dn_zio = zio;
	dnode_sync(DMU_META_DNODE(os), tx);

	os->os_phys->os_flags = os->os_flags;

	if (DMU_USERUSED_DNODE(os) &&
	    DMU_USERUSED_DNODE(os)->dn_type != DMU_OT_NONE) {
		DMU_USERUSED_DNODE(os)->dn_zio = zio;
		dnode_sync(DMU_USERUSED_DNODE(os), tx);
		DMU_GROUPUSED_DNODE(os)->dn_zio = zio;
		dnode_sync(DMU_GROUPUSED_DNODE(os), tx);
	}

	txgoff = tx->tx_txg & TXG_MASK;

	if (dmu_objset_userused_enabled(os)) {
		newlist = &os->os_synced_dnodes;
		/*
		 * We must create the list here because it uses the
		 * dn_dirty_link[] of this txg.
		 */
		list_create(newlist, sizeof (dnode_t),
		    offsetof(dnode_t, dn_dirty_link[txgoff]));
	}

	dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx);
	dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx);

	list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff];
	while (dr = list_head(list)) {
		ASSERT0(dr->dr_dbuf->db_level);
		list_remove(list, dr);
		if (dr->dr_zio)
			zio_nowait(dr->dr_zio);
	}
	/*
	 * Free intent log blocks up to this tx.
	 */
	zil_sync(os->os_zil, tx);
	os->os_phys->os_zil_header = os->os_zil_header;
	zio_nowait(zio);
}
Beispiel #27
0
/* called from dsl */
void
dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx)
{
	int txgoff;
	zbookmark_t zb;
	writeprops_t wp = { 0 };
	zio_t *zio;
	list_t *list;
	dbuf_dirty_record_t *dr;

	dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg);

	ASSERT(dmu_tx_is_syncing(tx));
	/* XXX the write_done callback should really give us the tx... */
	os->os_synctx = tx;

	if (os->os_dsl_dataset == NULL) {
		/*
		 * This is the MOS.  If we have upgraded,
		 * spa_max_replication() could change, so reset
		 * os_copies here.
		 */
		os->os_copies = spa_max_replication(os->os_spa);
	}

	/*
	 * Create the root block IO
	 */
	zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0;
	zb.zb_object = 0;
	zb.zb_level = -1;
	zb.zb_blkid = 0;
	if (BP_IS_OLDER(os->os_rootbp, tx->tx_txg)) {
		(void) dsl_dataset_block_kill(os->os_dsl_dataset,
		    os->os_rootbp, pio, tx);
	}
	wp.wp_type = DMU_OT_OBJSET;
	wp.wp_copies = os->os_copies;
	wp.wp_level = (uint8_t)-1;
	wp.wp_oschecksum = os->os_checksum;
	wp.wp_oscompress = os->os_compress;
	arc_release(os->os_phys_buf, &os->os_phys_buf);
	zio = arc_write(pio, os->os_spa, &wp,
	    tx->tx_txg, os->os_rootbp, os->os_phys_buf, ready, NULL, os,
	    ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_METADATA,
	    &zb);

	/*
	 * Sync meta-dnode - the parent IO for the sync is the root block
	 */
	os->os_meta_dnode->dn_zio = zio;
	dnode_sync(os->os_meta_dnode, tx);

	txgoff = tx->tx_txg & TXG_MASK;

	dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], tx);
	dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], tx);

	list = &os->os_meta_dnode->dn_dirty_records[txgoff];
	while (dr = list_head(list)) {
		ASSERT(dr->dr_dbuf->db_level == 0);
		list_remove(list, dr);
		if (dr->dr_zio)
			zio_nowait(dr->dr_zio);
	}
	/*
	 * Free intent log blocks up to this tx.
	 */
	zil_sync(os->os_zil, tx);
	os->os_phys->os_zil_header = os->os_zil_header;
	zio_nowait(zio);
}