Ejemplo n.º 1
0
int
spa_history_log_nvl(spa_t *spa, nvlist_t *nvl)
{
    int err = 0;
    dmu_tx_t *tx;
    nvlist_t *nvarg;

    if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa))
        return (SET_ERROR(EINVAL));

    tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
    err = dmu_tx_assign(tx, TXG_WAIT);
    if (err) {
        dmu_tx_abort(tx);
        return (err);
    }

    VERIFY0(nvlist_dup(nvl, &nvarg, KM_SLEEP));
    if (spa_history_zone() != NULL) {
        fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE,
                           spa_history_zone());
    }
    fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED()));

    /* Kick this off asynchronously; errors are ignored. */
    dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync,
                         nvarg, 0, ZFS_SPACE_CHECK_NONE, tx);
    dmu_tx_commit(tx);

    /* spa_history_log_sync will free nvl */
    return (err);

}
Ejemplo n.º 2
0
/*
 * Write out a history event.
 */
int
spa_history_log(spa_t *spa, const char *history_str, history_log_type_t what)
{
	history_arg_t *ha;
	int err = 0;
	dmu_tx_t *tx;

	ASSERT(what != LOG_INTERNAL);

	tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
	err = dmu_tx_assign(tx, TXG_WAIT);
	if (err) {
		dmu_tx_abort(tx);
		return (err);
	}

	ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP);
	ha->ha_history_str = strdup(history_str);
	ha->ha_zone = strdup(spa_history_zone());
	ha->ha_log_type = what;
	ha->ha_uid = crgetuid(CRED());

	/* Kick this off asynchronously; errors are ignored. */
	dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
	    spa_history_log_sync, spa, ha, 0, tx);
	dmu_tx_commit(tx);

	/* spa_history_log_sync will free ha and strings */
	return (err);
}
Ejemplo n.º 3
0
void
spa_history_log_internal(spa_t *spa, const char *operation,
    dmu_tx_t *tx, const char *fmt, ...)
{
	dmu_tx_t *htx = tx;
	va_list adx;
	nvlist_t *nvl;

	/* create a tx if we didn't get one */
	if (tx == NULL) {
        if (!spa_get_dsl(spa)) return;
		htx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
		if (dmu_tx_assign(htx, TXG_WAIT) != 0) {
			dmu_tx_abort(htx);
			return;
		}
	}

	va_start(adx, fmt);
	VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE));
	//log_internal(nvl, operation, spa, htx, fmt, adx);
	va_end(adx);

	/* if we didn't get a tx from the caller, commit the one we made */
	if (tx == NULL)
		dmu_tx_commit(htx);
}
Ejemplo n.º 4
0
/* Takes care of physical writing and limiting # of concurrent ZIOs. */
static int
vdev_initialize_write(vdev_t *vd, uint64_t start, uint64_t size, abd_t *data)
{
	spa_t *spa = vd->vdev_spa;

	/* Limit inflight initializing I/Os */
	mutex_enter(&vd->vdev_initialize_io_lock);
	while (vd->vdev_initialize_inflight >= zfs_initialize_limit) {
		cv_wait(&vd->vdev_initialize_io_cv,
		    &vd->vdev_initialize_io_lock);
	}
	vd->vdev_initialize_inflight++;
	mutex_exit(&vd->vdev_initialize_io_lock);

	dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
	VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
	uint64_t txg = dmu_tx_get_txg(tx);

	spa_config_enter(spa, SCL_STATE_ALL, vd, RW_READER);
	mutex_enter(&vd->vdev_initialize_lock);

	if (vd->vdev_initialize_offset[txg & TXG_MASK] == 0) {
		uint64_t *guid = kmem_zalloc(sizeof (uint64_t), KM_SLEEP);
		*guid = vd->vdev_guid;

		/* This is the first write of this txg. */
		dsl_sync_task_nowait(spa_get_dsl(spa),
		    vdev_initialize_zap_update_sync, guid, 2,
		    ZFS_SPACE_CHECK_RESERVED, tx);
	}

	/*
	 * We know the vdev struct will still be around since all
	 * consumers of vdev_free must stop the initialization first.
	 */
	if (vdev_initialize_should_stop(vd)) {
		mutex_enter(&vd->vdev_initialize_io_lock);
		ASSERT3U(vd->vdev_initialize_inflight, >, 0);
		vd->vdev_initialize_inflight--;
		mutex_exit(&vd->vdev_initialize_io_lock);
		spa_config_exit(vd->vdev_spa, SCL_STATE_ALL, vd);
		mutex_exit(&vd->vdev_initialize_lock);
		dmu_tx_commit(tx);
		return (SET_ERROR(EINTR));
	}
	mutex_exit(&vd->vdev_initialize_lock);

	vd->vdev_initialize_offset[txg & TXG_MASK] = start + size;
	zio_nowait(zio_write_phys(spa->spa_txg_zio[txg & TXG_MASK], vd, start,
	    size, data, ZIO_CHECKSUM_OFF, vdev_initialize_cb, NULL,
	    ZIO_PRIORITY_INITIALIZING, ZIO_FLAG_CANFAIL, B_FALSE));
	/* vdev_initialize_cb releases SCL_STATE_ALL */

	dmu_tx_commit(tx);

	return (0);
}
Ejemplo n.º 5
0
static void
vdev_initialize_change_state(vdev_t *vd, vdev_initializing_state_t new_state)
{
	ASSERT(MUTEX_HELD(&vd->vdev_initialize_lock));
	spa_t *spa = vd->vdev_spa;

	if (new_state == vd->vdev_initialize_state)
		return;

	/*
	 * Copy the vd's guid, this will be freed by the sync task.
	 */
	uint64_t *guid = kmem_zalloc(sizeof (uint64_t), KM_SLEEP);
	*guid = vd->vdev_guid;

	/*
	 * If we're suspending, then preserving the original start time.
	 */
	if (vd->vdev_initialize_state != VDEV_INITIALIZE_SUSPENDED) {
		vd->vdev_initialize_action_time = gethrestime_sec();
	}
	vd->vdev_initialize_state = new_state;

	dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
	VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
	dsl_sync_task_nowait(spa_get_dsl(spa), vdev_initialize_zap_update_sync,
	    guid, 2, ZFS_SPACE_CHECK_RESERVED, tx);

	switch (new_state) {
	case VDEV_INITIALIZE_ACTIVE:
		spa_history_log_internal(spa, "initialize", tx,
		    "vdev=%s activated", vd->vdev_path);
		break;
	case VDEV_INITIALIZE_SUSPENDED:
		spa_history_log_internal(spa, "initialize", tx,
		    "vdev=%s suspended", vd->vdev_path);
		break;
	case VDEV_INITIALIZE_CANCELED:
		spa_history_log_internal(spa, "initialize", tx,
		    "vdev=%s canceled", vd->vdev_path);
		break;
	case VDEV_INITIALIZE_COMPLETE:
		spa_history_log_internal(spa, "initialize", tx,
		    "vdev=%s complete", vd->vdev_path);
		break;
	default:
		panic("invalid state %llu", (unsigned long long)new_state);
	}

	dmu_tx_commit(tx);
}
Ejemplo n.º 6
0
void
spa_history_internal_log(history_internal_events_t event, spa_t *spa,
    dmu_tx_t *tx, cred_t *cr, const char *fmt, ...)
{
	history_arg_t *hap;
	char *str;
	va_list adx;

	hap = kmem_alloc(sizeof (history_arg_t), KM_SLEEP);
	str = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);

	va_start(adx, fmt);
	(void) vsnprintf(str, HIS_MAX_RECORD_LEN, fmt, adx);
	va_end(adx);

	hap->ha_log_type = LOG_INTERNAL;
	hap->ha_history_str = str;
	hap->ha_event = event;
	hap->ha_zone[0] = '\0';

	if (dmu_tx_is_syncing(tx)) {
		spa_history_log_sync(spa, hap, cr, tx);
	} else {
		dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
		    spa_history_log_sync, spa, hap, 0, tx);
	}
	/* spa_history_log_sync() will free hap and str */
}
Ejemplo n.º 7
0
/*
 * Open-context function to add one entry to the new mapping.  The new
 * entry will be remembered and written from syncing context.
 */
static void
spa_condense_indirect_commit_entry(spa_t *spa,
    vdev_indirect_mapping_entry_phys_t *vimep, uint32_t count)
{
	spa_condensing_indirect_t *sci = spa->spa_condensing_indirect;

	ASSERT3U(count, <, DVA_GET_ASIZE(&vimep->vimep_dst));

	dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
	dmu_tx_hold_space(tx, sizeof (*vimep) + sizeof (count));
	VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
	int txgoff = dmu_tx_get_txg(tx) & TXG_MASK;

	/*
	 * If we are the first entry committed this txg, kick off the sync
	 * task to write to the MOS on our behalf.
	 */
	if (list_is_empty(&sci->sci_new_mapping_entries[txgoff])) {
		dsl_sync_task_nowait(dmu_tx_pool(tx),
		    spa_condense_indirect_commit_sync, sci,
		    0, ZFS_SPACE_CHECK_NONE, tx);
	}

	vdev_indirect_mapping_entry_t *vime =
	    kmem_alloc(sizeof (*vime), KM_SLEEP);
	vime->vime_mapping = *vimep;
	vime->vime_obsolete_count = count;
	list_insert_tail(&sci->sci_new_mapping_entries[txgoff], vime);

	dmu_tx_commit(tx);
}
Ejemplo n.º 8
0
static void
dsl_dataset_user_release_onexit(void *arg)
{
	zfs_hold_cleanup_arg_t *ca = arg;
	spa_t *spa;
	int error;

	error = spa_open(ca->zhca_spaname, &spa, FTAG);
	if (error != 0) {
		zfs_dbgmsg("couldn't release holds on pool=%s "
		    "because pool is no longer loaded",
		    ca->zhca_spaname);
		return;
	}
	if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
		zfs_dbgmsg("couldn't release holds on pool=%s "
		    "because pool is no longer loaded (guid doesn't match)",
		    ca->zhca_spaname);
		spa_close(spa, FTAG);
		return;
	}

	(void) dsl_dataset_user_release_tmp(spa_get_dsl(spa), ca->zhca_holds);
	fnvlist_free(ca->zhca_holds);
	kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
	spa_close(spa, FTAG);
}
Ejemplo n.º 9
0
static void
log_internal(history_internal_events_t event, spa_t *spa,
    dmu_tx_t *tx, const char *fmt, va_list adx)
{
	history_arg_t *ha;
	va_list adx_copy;

	/*
	 * If this is part of creating a pool, not everything is
	 * initialized yet, so don't bother logging the internal events.
	 */
	if (tx->tx_txg == TXG_INITIAL)
		return;

	ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP);
	va_copy(adx_copy, adx);
	ha->ha_history_str = kmem_vasprintf(fmt, adx_copy);
	va_end(adx_copy);
	ha->ha_log_type = LOG_INTERNAL;
	ha->ha_event = event;
	ha->ha_zone = NULL;
	ha->ha_uid = 0;

	if (dmu_tx_is_syncing(tx)) {
		spa_history_log_sync(spa, ha, tx);
	} else {
		dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
		    spa_history_log_sync, spa, ha, 0, tx);
	}
	/* spa_history_log_sync() will free ha and strings */
}
Ejemplo n.º 10
0
/*
 * The nvlist will be consumed by this call.
 */
static void
log_internal(nvlist_t *nvl, const char *operation, spa_t *spa,
             dmu_tx_t *tx, const char *fmt, va_list adx)
{
    char *msg;

    /*
     * If this is part of creating a pool, not everything is
     * initialized yet, so don't bother logging the internal events.
     * Likewise if the pool is not writeable.
     */
    if (tx->tx_txg == TXG_INITIAL || !spa_writeable(spa)) {
        fnvlist_free(nvl);
        return;
    }

    msg = kmem_vasprintf(fmt, adx);
    fnvlist_add_string(nvl, ZPOOL_HIST_INT_STR, msg);
    strfree(msg);

    fnvlist_add_string(nvl, ZPOOL_HIST_INT_NAME, operation);
    fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg);

    if (dmu_tx_is_syncing(tx)) {
        spa_history_log_sync(nvl, tx);
    } else {
        dsl_sync_task_nowait(spa_get_dsl(spa),
                             spa_history_log_sync, nvl, 0, ZFS_SPACE_CHECK_NONE, tx);
    }
    /* spa_history_log_sync() will free nvl */
}
Ejemplo n.º 11
0
int
dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive)
{
	dsl_sync_task_t *dst;
	struct osnode *osn;
	struct snaparg sn = { 0 };
	spa_t *spa;
	int err;

	(void) strcpy(sn.failed, fsname);

	err = spa_open(fsname, &spa, FTAG);
	if (err)
		return (err);

	sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
	sn.snapname = snapname;
	list_create(&sn.objsets, sizeof (struct osnode),
	    offsetof(struct osnode, node));

	if (recursive) {
		sn.checkperms = B_TRUE;
		err = dmu_objset_find(fsname,
		    dmu_objset_snapshot_one, &sn, DS_FIND_CHILDREN);
	} else {
		sn.checkperms = B_FALSE;
		err = dmu_objset_snapshot_one(fsname, &sn);
	}

	if (err)
		goto out;

	err = dsl_sync_task_group_wait(sn.dstg);

	for (dst = list_head(&sn.dstg->dstg_tasks); dst;
	    dst = list_next(&sn.dstg->dstg_tasks, dst)) {
		dsl_dataset_t *ds = dst->dst_arg1;
		if (dst->dst_err)
			dsl_dataset_name(ds, sn.failed);
	}

out:
	while (osn = list_head(&sn.objsets)) {
		list_remove(&sn.objsets, osn);
		zil_resume(dmu_objset_zil(osn->os));
		dmu_objset_close(osn->os);
		kmem_free(osn, sizeof (struct osnode));
	}
	list_destroy(&sn.objsets);

	if (err)
		(void) strcpy(fsname, sn.failed);
	dsl_sync_task_group_destroy(sn.dstg);
	spa_close(spa, FTAG);
	return (err);
}
Ejemplo n.º 12
0
dsl_pool_t *
dmu_objset_pool(objset_t *os)
{
	dsl_dataset_t *ds;

	if ((ds = os->os_dsl_dataset) != NULL && ds->ds_dir)
		return (ds->ds_dir->dd_pool);
	else
		return (spa_get_dsl(os->os_spa));
}
Ejemplo n.º 13
0
/*
 * Called from open context to perform a callback in syncing context.  Waits
 * for the operation to complete.
 *
 * The checkfunc will be called from open context as a preliminary check
 * which can quickly fail.  If it succeeds, it will be called again from
 * syncing context.  The checkfunc should generally be designed to work
 * properly in either context, but if necessary it can check
 * dmu_tx_is_syncing(tx).
 *
 * The synctask infrastructure enforces proper locking strategy with respect
 * to the dp_config_rwlock -- the lock will always be held when the callbacks
 * are called.  It will be held for read during the open-context (preliminary)
 * call to the checkfunc, and then held for write from syncing context during
 * the calls to the check and sync funcs.
 *
 * A dataset or pool name can be passed as the first argument.  Typically,
 * the check func will hold, check the return value of the hold, and then
 * release the dataset.  The sync func will VERIFYO(hold()) the dataset.
 * This is safe because no changes can be made between the check and sync funcs,
 * and the sync func will only be called if the check func successfully opened
 * the dataset.
 */
int
dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc,
    dsl_syncfunc_t *syncfunc, void *arg,
    int blocks_modified, zfs_space_check_t space_check)
{
	spa_t *spa;
	dmu_tx_t *tx;
	int err;
	dsl_sync_task_t dst = { { { NULL } } };
	dsl_pool_t *dp;

	err = spa_open(pool, &spa, FTAG);
	if (err != 0)
		return (err);
	dp = spa_get_dsl(spa);

top:
	tx = dmu_tx_create_dd(dp->dp_mos_dir);
	VERIFY0(dmu_tx_assign(tx, TXG_WAIT));

	dst.dst_pool = dp;
	dst.dst_txg = dmu_tx_get_txg(tx);
	dst.dst_space = blocks_modified << DST_AVG_BLKSHIFT;
	dst.dst_space_check = space_check;
	dst.dst_checkfunc = checkfunc != NULL ? checkfunc : dsl_null_checkfunc;
	dst.dst_syncfunc = syncfunc;
	dst.dst_arg = arg;
	dst.dst_error = 0;
	dst.dst_nowaiter = B_FALSE;

	dsl_pool_config_enter(dp, FTAG);
	err = dst.dst_checkfunc(arg, tx);
	dsl_pool_config_exit(dp, FTAG);

	if (err != 0) {
		dmu_tx_commit(tx);
		spa_close(spa, FTAG);
		return (err);
	}

	VERIFY(txg_list_add_tail(&dp->dp_sync_tasks, &dst, dst.dst_txg));

	dmu_tx_commit(tx);

	txg_wait_synced(dp, dst.dst_txg);

	if (dst.dst_error == EAGAIN) {
		txg_wait_synced(dp, dst.dst_txg + TXG_DEFER_SIZE);
		goto top;
	}

	spa_close(spa, FTAG);
	return (dst.dst_error);
}
Ejemplo n.º 14
0
/*
 * Write out a history event.
 */
int
spa_history_log(spa_t *spa, const char *history_str, history_log_type_t what)
{
	history_arg_t ha;

	ASSERT(what != LOG_INTERNAL);

	ha.ha_history_str = history_str;
	ha.ha_log_type = what;
	(void) strlcpy(ha.ha_zone, spa_history_zone(), sizeof (ha.ha_zone));
	return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_history_log_sync,
	    spa, &ha, 0));
}
Ejemplo n.º 15
0
int
dmu_objset_snapshot(char *fsname, char *snapname,
    nvlist_t *props, boolean_t recursive)
{
	dsl_sync_task_t *dst;
	struct snaparg *sn;
	spa_t *spa;
	int err;

	sn = kmem_alloc(sizeof (struct snaparg), KM_SLEEP);
	(void) strcpy(sn->failed, fsname);

	err = spa_open(fsname, &spa, FTAG);
	if (err) {
		kmem_free(sn, sizeof (struct snaparg));
		return (err);
	}

	sn->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
	sn->snapname = snapname;
	sn->props = props;

	if (recursive) {
		sn->checkperms = B_TRUE;
		err = dmu_objset_find(fsname,
		    dmu_objset_snapshot_one, sn, DS_FIND_CHILDREN);
	} else {
		sn->checkperms = B_FALSE;
		err = dmu_objset_snapshot_one(fsname, sn);
	}

	if (err == 0)
		err = dsl_sync_task_group_wait(sn->dstg);

	for (dst = list_head(&sn->dstg->dstg_tasks); dst;
	    dst = list_next(&sn->dstg->dstg_tasks, dst)) {
		objset_t *os = dst->dst_arg1;
		dsl_dataset_t *ds = os->os->os_dsl_dataset;
		if (dst->dst_err)
			dsl_dataset_name(ds, sn->failed);
		zil_resume(dmu_objset_zil(os));
		dmu_objset_close(os);
	}

	if (err)
		(void) strcpy(fsname, sn->failed);
	dsl_sync_task_group_destroy(sn->dstg);
	spa_close(spa, FTAG);
	kmem_free(sn, sizeof (struct snaparg));
	return (err);
}
Ejemplo n.º 16
0
static int
vdev_queue_max_async_writes(spa_t *spa)
{
	int writes;
	uint64_t dirty = 0;
	dsl_pool_t *dp = spa_get_dsl(spa);
	uint64_t min_bytes = zfs_dirty_data_max *
	    zfs_vdev_async_write_active_min_dirty_percent / 100;
	uint64_t max_bytes = zfs_dirty_data_max *
	    zfs_vdev_async_write_active_max_dirty_percent / 100;

	/*
	 * Async writes may occur before the assignment of the spa's
	 * dsl_pool_t if a self-healing zio is issued prior to the
	 * completion of dmu_objset_open_impl().
	 */
	if (dp == NULL)
		return (zfs_vdev_async_write_max_active);

	/*
	 * Sync tasks correspond to interactive user actions. To reduce the
	 * execution time of those actions we push data out as fast as possible.
	 */
	if (spa_has_pending_synctask(spa))
		return (zfs_vdev_async_write_max_active);

	dirty = dp->dp_dirty_total;
	if (dirty < min_bytes)
		return (zfs_vdev_async_write_min_active);
	if (dirty > max_bytes)
		return (zfs_vdev_async_write_max_active);

	/*
	 * linear interpolation:
	 * slope = (max_writes - min_writes) / (max_bytes - min_bytes)
	 * move right by min_bytes
	 * move up by min_writes
	 */
	writes = (dirty - min_bytes) *
	    (zfs_vdev_async_write_max_active -
	    zfs_vdev_async_write_min_active) /
	    (max_bytes - min_bytes) +
	    zfs_vdev_async_write_min_active;
	ASSERT3U(writes, >=, zfs_vdev_async_write_min_active);
	ASSERT3U(writes, <=, zfs_vdev_async_write_max_active);
	return (writes);
}
Ejemplo n.º 17
0
static void
log_internal(history_internal_events_t event, spa_t *spa,
    dmu_tx_t *tx, const char *fmt, va_list adx)
{
	history_arg_t *ha;

	/*
	 * If this is part of creating a pool, not everything is
	 * initialized yet, so don't bother logging the internal events.
	 */
	if (tx->tx_txg == TXG_INITIAL)
		return;

	ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP);
#ifdef __DARWIN__
	/*
	 * vsnprintf(NULL, 0, ...) is broken on darwin!
	 */
	{
		char *buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);

		(void) vsnprintf(buf, HIS_MAX_RECORD_LEN, fmt, adx);
		ha->ha_history_str = strdup(buf);
		kmem_free(buf, HIS_MAX_RECORD_LEN);
	}
#else
	ha->ha_history_str = kmem_alloc(vsnprintf(NULL, 0, fmt, adx) + 1,
	    KM_SLEEP);

	(void) vsprintf(ha->ha_history_str, fmt, adx);
#endif /* __DARWIN__ */

	ha->ha_log_type = LOG_INTERNAL;
	ha->ha_event = event;
	ha->ha_zone = NULL;
	ha->ha_uid = 0;

	if (dmu_tx_is_syncing(tx)) {
		spa_history_log_sync(spa, ha, tx);
	} else {
		dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
		    spa_history_log_sync, spa, ha, 0, tx);
	}
	/* spa_history_log_sync() will free ha and strings */
}
Ejemplo n.º 18
0
zcrypt_keystore_node_t *
zcrypt_keystore_find_node(spa_t *spa, uint64_t dsobj,
    boolean_t config_rwlock_held)
{
	zcrypt_keystore_node_t search;
	zcrypt_keystore_node_t *found = NULL;

	rw_enter(&spa->spa_keystore->sk_lock, RW_READER);
	if (avl_is_empty(&spa->spa_keystore->sk_dslkeys))
		goto out;

	search.skn_os = dsobj;
	found = avl_find(&spa->spa_keystore->sk_dslkeys, &search, NULL);
	if (found == NULL) {
		int error;
		dsl_pool_t *dp = spa_get_dsl(spa);
		dsl_dataset_t *ds;
		boolean_t need_lock;

		rw_exit(&spa->spa_keystore->sk_lock);
		need_lock = !dsl_pool_sync_context(dp) && !config_rwlock_held;
		if (need_lock)
			rw_enter(&dp->dp_config_rwlock, RW_READER);
		error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
		if (need_lock)
			rw_exit(&dp->dp_config_rwlock);
		rw_enter(&spa->spa_keystore->sk_lock, RW_READER);

		if (!error) {
			if (dsl_dataset_is_snapshot(ds)) {
				search.skn_os =
				    ds->ds_dir->dd_phys->dd_head_dataset_obj;
				found = avl_find(&spa->spa_keystore->sk_dslkeys,
				    &search, NULL);
			}
			dsl_dataset_rele(ds, FTAG);
		}
	}
out:
	rw_exit(&spa->spa_keystore->sk_lock);

	return (found);
}
Ejemplo n.º 19
0
static void
traverse_zil(traverse_data_t *td, zil_header_t *zh)
{
	uint64_t claim_txg = zh->zh_claim_txg;
	zilog_t *zilog;

	/*
	 * We only want to visit blocks that have been claimed but not yet
	 * replayed; plus, in read-only mode, blocks that are already stable.
	 */
	if (claim_txg == 0 && spa_writeable(td->td_spa))
		return;

	zilog = zil_alloc(spa_get_dsl(td->td_spa)->dp_meta_objset, zh);

	(void) zil_parse(zilog, traverse_zil_block, traverse_zil_record, td,
	    claim_txg);

	zil_free(zilog);
}
Ejemplo n.º 20
0
void
spa_feature_create_zap_objects(spa_t *spa, dmu_tx_t *tx)
{
    /*
     * We create feature flags ZAP objects in two instances: during pool
     * creation and during pool upgrade.
     */
    ASSERT(dsl_pool_sync_context(spa_get_dsl(spa)) || (!spa->spa_sync_on &&
            tx->tx_txg == TXG_INITIAL));

    spa->spa_feat_for_read_obj = zap_create_link(spa->spa_meta_objset,
                                 DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
                                 DMU_POOL_FEATURES_FOR_READ, tx);
    spa->spa_feat_for_write_obj = zap_create_link(spa->spa_meta_objset,
                                  DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
                                  DMU_POOL_FEATURES_FOR_WRITE, tx);
    spa->spa_feat_desc_obj = zap_create_link(spa->spa_meta_objset,
                             DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
                             DMU_POOL_FEATURE_DESCRIPTIONS, tx);
}
Ejemplo n.º 21
0
/*
 * The nvlist will be consumed by this call.
 */
static void
log_internal(nvlist_t *nvl, const char *operation, spa_t *spa,
    dmu_tx_t *tx, const char *fmt, va_list adx)
{
	char *msg;
	va_list adx1;
	int size;

	/*
	 * If this is part of creating a pool, not everything is
	 * initialized yet, so don't bother logging the internal events.
	 * Likewise if the pool is not writeable.
	 */
	if (tx->tx_txg == TXG_INITIAL || !spa_writeable(spa)) {
		fnvlist_free(nvl);
		return;
	}

	va_copy(adx1, adx);
	size = vsnprintf(NULL, 0, fmt, adx1) + 1;
	msg = kmem_alloc(size, KM_PUSHPAGE);
	va_end(adx1);
	va_copy(adx1, adx);
	(void) vsprintf(msg, fmt, adx1);
	va_end(adx1);
	fnvlist_add_string(nvl, ZPOOL_HIST_INT_STR, msg);
	kmem_free(msg, size);

	fnvlist_add_string(nvl, ZPOOL_HIST_INT_NAME, operation);
	fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg);

	if (dmu_tx_is_syncing(tx)) {
		spa_history_log_sync(nvl, tx);
	} else {
		dsl_sync_task_nowait(spa_get_dsl(spa),
		    spa_history_log_sync, nvl, 0, tx);
	}
	/* spa_history_log_sync() will free nvl */
}
Ejemplo n.º 22
0
void
spa_history_log_internal(history_internal_events_t event, spa_t *spa,
    dmu_tx_t *tx, const char *fmt, ...)
{
	dmu_tx_t *htx = tx;
	va_list adx;

	/* create a tx if we didn't get one */
	if (tx == NULL) {
		htx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
		if (dmu_tx_assign(htx, TXG_WAIT) != 0) {
			dmu_tx_abort(htx);
			return;
		}
	}

	va_start(adx, fmt);
	log_internal(event, spa, htx, fmt, adx);
	va_end(adx);

	/* if we didn't get a tx from the caller, commit the one we made */
	if (tx == NULL)
		dmu_tx_commit(htx);
}
Ejemplo n.º 23
0
static void
zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
{
	zil_replay_arg_t *zr = zra;
	const zil_header_t *zh = zilog->zl_header;
	uint64_t reclen = lr->lrc_reclen;
	uint64_t txtype = lr->lrc_txtype;
	char *name;
	int pass, error;

	if (!zilog->zl_replay)			/* giving up */
		return;

	if (lr->lrc_txg < claim_txg)		/* already committed */
		return;

	if (lr->lrc_seq <= zh->zh_replay_seq)	/* already replayed */
		return;

	/* Strip case-insensitive bit, still present in log record */
	txtype &= ~TX_CI;

	if (txtype == 0 || txtype >= TX_MAX_TYPE) {
		error = EINVAL;
		goto bad;
	}

	/*
	 * Make a copy of the data so we can revise and extend it.
	 */
	bcopy(lr, zr->zr_lrbuf, reclen);

	/*
	 * The log block containing this lr may have been byteswapped
	 * so that we can easily examine common fields like lrc_txtype.
	 * However, the log is a mix of different data types, and only the
	 * replay vectors know how to byteswap their records.  Therefore, if
	 * the lr was byteswapped, undo it before invoking the replay vector.
	 */
	if (zr->zr_byteswap)
		byteswap_uint64_array(zr->zr_lrbuf, reclen);

	/*
	 * We must now do two things atomically: replay this log record,
	 * and update the log header sequence number to reflect the fact that
	 * we did so. At the end of each replay function the sequence number
	 * is updated if we are in replay mode.
	 */
	for (pass = 1; pass <= 2; pass++) {
		zilog->zl_replaying_seq = lr->lrc_seq;
		/* Only byteswap (if needed) on the 1st pass.  */
		error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lrbuf,
		    zr->zr_byteswap && pass == 1);

		if (!error)
			return;

		/*
		 * The DMU's dnode layer doesn't see removes until the txg
		 * commits, so a subsequent claim can spuriously fail with
		 * EEXIST. So if we receive any error we try syncing out
		 * any removes then retry the transaction.
		 */
		if (pass == 1)
			txg_wait_synced(spa_get_dsl(zilog->zl_spa), 0);
	}

bad:
	ASSERT(error);
	name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
	dmu_objset_name(zr->zr_os, name);
	cmn_err(CE_WARN, "ZFS replay transaction error %d, "
	    "dataset %s, seq 0x%llx, txtype %llu %s\n",
	    error, name, (u_longlong_t)lr->lrc_seq, (u_longlong_t)txtype,
	    (lr->lrc_txtype & TX_CI) ? "CI" : "");
	zilog->zl_replay = B_FALSE;
	kmem_free(name, MAXNAMELEN);
}
Ejemplo n.º 24
0
/*
 * Read out the command history.
 */
int
spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf)
{
	objset_t *mos = spa->spa_meta_objset;
	dmu_buf_t *dbp;
	uint64_t read_len, phys_read_off, phys_eof;
	uint64_t leftover = 0;
	spa_history_phys_t *shpp;
	int err;

	/*
	 * If the command history  doesn't exist (older pool),
	 * that's ok, just return ENOENT.
	 */
	if (!spa->spa_history)
		return (ENOENT);

	/*
	 * The history is logged asynchronously, so when they request
	 * the first chunk of history, make sure everything has been
	 * synced to disk so that we get it.
	 */
	if (*offp == 0 && spa_writeable(spa))
		txg_wait_synced(spa_get_dsl(spa), 0);

	if ((err = dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)) != 0)
		return (err);
	shpp = dbp->db_data;

#ifdef ZFS_DEBUG
	{
		dmu_object_info_t doi;
		dmu_object_info_from_db(dbp, &doi);
		ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS);
	}
#endif

	mutex_enter(&spa->spa_history_lock);
	phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp);

	if (*offp < shpp->sh_pool_create_len) {
		/* read in just the zpool create history */
		phys_read_off = *offp;
		read_len = MIN(*len, shpp->sh_pool_create_len -
		    phys_read_off);
	} else {
		/*
		 * Need to reset passed in offset to BOF if the passed in
		 * offset has since been overwritten.
		 */
		*offp = MAX(*offp, shpp->sh_bof);
		phys_read_off = spa_history_log_to_phys(*offp, shpp);

		/*
		 * Read up to the minimum of what the user passed down or
		 * the EOF (physical or logical).  If we hit physical EOF,
		 * use 'leftover' to read from the physical BOF.
		 */
		if (phys_read_off <= phys_eof) {
			read_len = MIN(*len, phys_eof - phys_read_off);
		} else {
			read_len = MIN(*len,
			    shpp->sh_phys_max_off - phys_read_off);
			if (phys_read_off + *len > shpp->sh_phys_max_off) {
				leftover = MIN(*len - read_len,
				    phys_eof - shpp->sh_pool_create_len);
			}
		}
	}

	/* offset for consumer to use next */
	*offp += read_len + leftover;

	/* tell the consumer how much you actually read */
	*len = read_len + leftover;

	if (read_len == 0) {
		mutex_exit(&spa->spa_history_lock);
		dmu_buf_rele(dbp, FTAG);
		return (0);
	}

	err = dmu_read(mos, spa->spa_history, phys_read_off, read_len, buf,
	    DMU_READ_PREFETCH);
	if (leftover && err == 0) {
		err = dmu_read(mos, spa->spa_history, shpp->sh_pool_create_len,
		    leftover, buf + read_len, DMU_READ_PREFETCH);
	}
	mutex_exit(&spa->spa_history_lock);

	dmu_buf_rele(dbp, FTAG);
	return (err);
}
Ejemplo n.º 25
0
Archivo: zil.c Proyecto: harshada/zfs
static void
zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
{
	zil_replay_arg_t *zr = zra;
	const zil_header_t *zh = zilog->zl_header;
	uint64_t reclen = lr->lrc_reclen;
	uint64_t txtype = lr->lrc_txtype;
	char *name;
	int pass, error;

	if (!zilog->zl_replay)			/* giving up */
		return;

	if (lr->lrc_txg < claim_txg)		/* already committed */
		return;

	if (lr->lrc_seq <= zh->zh_replay_seq)	/* already replayed */
		return;

	/* Strip case-insensitive bit, still present in log record */
	txtype &= ~TX_CI;

	if (txtype == 0 || txtype >= TX_MAX_TYPE) {
		error = EINVAL;
		goto bad;
	}

	/*
	 * Make a copy of the data so we can revise and extend it.
	 */
	bcopy(lr, zr->zr_lrbuf, reclen);

	/*
	 * The log block containing this lr may have been byteswapped
	 * so that we can easily examine common fields like lrc_txtype.
	 * However, the log is a mix of different data types, and only the
	 * replay vectors know how to byteswap their records.  Therefore, if
	 * the lr was byteswapped, undo it before invoking the replay vector.
	 */
	if (zr->zr_byteswap)
		byteswap_uint64_array(zr->zr_lrbuf, reclen);

	/*
	 * If this is a TX_WRITE with a blkptr, suck in the data.
	 */
	if (txtype == TX_WRITE && reclen == sizeof (lr_write_t)) {
		lr_write_t *lrw = (lr_write_t *)lr;
		blkptr_t *wbp = &lrw->lr_blkptr;
		uint64_t wlen = lrw->lr_length;
		char *wbuf = zr->zr_lrbuf + reclen;

		if (BP_IS_HOLE(wbp)) {	/* compressed to a hole */
			bzero(wbuf, wlen);
		} else {
			/*
			 * A subsequent write may have overwritten this block,
			 * in which case wbp may have been been freed and
			 * reallocated, and our read of wbp may fail with a
			 * checksum error.  We can safely ignore this because
			 * the later write will provide the correct data.
			 */
			zbookmark_t zb;

			zb.zb_objset = dmu_objset_id(zilog->zl_os);
			zb.zb_object = lrw->lr_foid;
			zb.zb_level = -1;
			zb.zb_blkid = lrw->lr_offset / BP_GET_LSIZE(wbp);

			(void) zio_wait(zio_read(NULL, zilog->zl_spa,
			    wbp, wbuf, BP_GET_LSIZE(wbp), NULL, NULL,
			    ZIO_PRIORITY_SYNC_READ,
			    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, &zb));
			(void) memmove(wbuf, wbuf + lrw->lr_blkoff, wlen);
		}
	}

	/*
	 * We must now do two things atomically: replay this log record,
	 * and update the log header sequence number to reflect the fact that
	 * we did so. At the end of each replay function the sequence number
	 * is updated if we are in replay mode.
	 */
	for (pass = 1; pass <= 2; pass++) {
		zilog->zl_replaying_seq = lr->lrc_seq;
		/* Only byteswap (if needed) on the 1st pass.  */
		error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lrbuf,
		    zr->zr_byteswap && pass == 1);

		if (!error)
			return;

		/*
		 * The DMU's dnode layer doesn't see removes until the txg
		 * commits, so a subsequent claim can spuriously fail with
		 * EEXIST. So if we receive any error we try syncing out
		 * any removes then retry the transaction.
		 */
		if (pass == 1)
			txg_wait_synced(spa_get_dsl(zilog->zl_spa), 0);
	}

bad:
	ASSERT(error);
	name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
	dmu_objset_name(zr->zr_os, name);
	cmn_err(CE_WARN, "ZFS replay transaction error %d, "
	    "dataset %s, seq 0x%llx, txtype %llu %s\n",
	    error, name, (u_longlong_t)lr->lrc_seq, (u_longlong_t)txtype,
	    (lr->lrc_txtype & TX_CI) ? "CI" : "");
	zilog->zl_replay = B_FALSE;
	kmem_free(name, MAXNAMELEN);
}
Ejemplo n.º 26
0
/*
 * same as dsl_open_dir, ignore the first component of name and use the
 * spa instead
 */
int
dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
    dsl_dir_t **ddp, const char **tailp)
{
	char buf[MAXNAMELEN];
	const char *next, *nextnext = NULL;
	int err;
	dsl_dir_t *dd;
	dsl_pool_t *dp;
	uint64_t ddobj;
	int openedspa = FALSE;

	dprintf("%s\n", name);

	err = getcomponent(name, buf, &next);
	if (err)
		return (err);
	if (spa == NULL) {
		err = spa_open(buf, &spa, FTAG);
		if (err) {
			dprintf("spa_open(%s) failed\n", buf);
			return (err);
		}
		openedspa = TRUE;

		/* XXX this assertion belongs in spa_open */
		ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa)));
	}

	dp = spa_get_dsl(spa);

	rw_enter(&dp->dp_config_rwlock, RW_READER);
	err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
	if (err) {
		rw_exit(&dp->dp_config_rwlock);
		if (openedspa)
			spa_close(spa, FTAG);
		return (err);
	}

	while (next != NULL) {
		dsl_dir_t *child_ds;
		err = getcomponent(next, buf, &nextnext);
		if (err)
			break;
		ASSERT(next[0] != '\0');
		if (next[0] == '@')
			break;
		dprintf("looking up %s in obj%lld\n",
		    buf, dd->dd_phys->dd_child_dir_zapobj);

		err = zap_lookup(dp->dp_meta_objset,
		    dd->dd_phys->dd_child_dir_zapobj,
		    buf, sizeof (ddobj), 1, &ddobj);
		if (err) {
			if (err == ENOENT)
				err = 0;
			break;
		}

		err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds);
		if (err)
			break;
		dsl_dir_close(dd, tag);
		dd = child_ds;
		next = nextnext;
	}
	rw_exit(&dp->dp_config_rwlock);

	if (err) {
		dsl_dir_close(dd, tag);
		if (openedspa)
			spa_close(spa, FTAG);
		return (err);
	}

	/*
	 * It's an error if there's more than one component left, or
	 * tailp==NULL and there's any component left.
	 */
	if (next != NULL &&
	    (tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
		/* bad path name */
		dsl_dir_close(dd, tag);
		dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
		err = ENOENT;
	}
	if (tailp)
		*tailp = next;
	if (openedspa)
		spa_close(spa, FTAG);
	*ddp = dd;
	return (err);
}