예제 #1
0
파일: trim_map.c 프로젝트: vkhromov/freebsd
static void
trim_thread(void *arg)
{
	spa_t *spa = arg;
	zio_t *zio;

	for (;;) {
		mutex_enter(&spa->spa_trim_lock);
		if (spa->spa_trim_thread == NULL) {
			spa->spa_trim_thread = curthread;
			cv_signal(&spa->spa_trim_cv);
			mutex_exit(&spa->spa_trim_lock);
			thread_exit();
		}
		cv_wait(&spa->spa_trim_cv, &spa->spa_trim_lock);
		mutex_exit(&spa->spa_trim_lock);

		zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);

		spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
		trim_map_commit(spa, zio, spa->spa_root_vdev);
		(void) zio_wait(zio);
		trim_map_commit_done(spa, spa->spa_root_vdev);
		spa_config_exit(spa, SCL_STATE, FTAG);
	}
}
예제 #2
0
static void
trim_thread(void *arg)
{
	spa_t *spa = arg;
	zio_t *zio;

#ifdef _KERNEL
	(void) snprintf(curthread->td_name, sizeof(curthread->td_name),
	    "trim %s", spa_name(spa));
#endif

	for (;;) {
		mutex_enter(&spa->spa_trim_lock);
		if (spa->spa_trim_thread == NULL) {
			spa->spa_trim_thread = curthread;
			cv_signal(&spa->spa_trim_cv);
			mutex_exit(&spa->spa_trim_lock);
			thread_exit();
		}

		(void) cv_timedwait(&spa->spa_trim_cv, &spa->spa_trim_lock,
		    hz * trim_max_interval);
		mutex_exit(&spa->spa_trim_lock);

		zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);

		spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
		trim_map_commit(spa, zio, spa->spa_root_vdev);
		(void) zio_wait(zio);
		trim_map_commit_done(spa, spa->spa_root_vdev);
		spa_config_exit(spa, SCL_STATE, FTAG);
	}
}
예제 #3
0
/* Takes care of physical writing and limiting # of concurrent ZIOs. */
static int
vdev_initialize_write(vdev_t *vd, uint64_t start, uint64_t size, abd_t *data)
{
	spa_t *spa = vd->vdev_spa;

	/* Limit inflight initializing I/Os */
	mutex_enter(&vd->vdev_initialize_io_lock);
	while (vd->vdev_initialize_inflight >= zfs_initialize_limit) {
		cv_wait(&vd->vdev_initialize_io_cv,
		    &vd->vdev_initialize_io_lock);
	}
	vd->vdev_initialize_inflight++;
	mutex_exit(&vd->vdev_initialize_io_lock);

	dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
	VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
	uint64_t txg = dmu_tx_get_txg(tx);

	spa_config_enter(spa, SCL_STATE_ALL, vd, RW_READER);
	mutex_enter(&vd->vdev_initialize_lock);

	if (vd->vdev_initialize_offset[txg & TXG_MASK] == 0) {
		uint64_t *guid = kmem_zalloc(sizeof (uint64_t), KM_SLEEP);
		*guid = vd->vdev_guid;

		/* This is the first write of this txg. */
		dsl_sync_task_nowait(spa_get_dsl(spa),
		    vdev_initialize_zap_update_sync, guid, 2,
		    ZFS_SPACE_CHECK_RESERVED, tx);
	}

	/*
	 * We know the vdev struct will still be around since all
	 * consumers of vdev_free must stop the initialization first.
	 */
	if (vdev_initialize_should_stop(vd)) {
		mutex_enter(&vd->vdev_initialize_io_lock);
		ASSERT3U(vd->vdev_initialize_inflight, >, 0);
		vd->vdev_initialize_inflight--;
		mutex_exit(&vd->vdev_initialize_io_lock);
		spa_config_exit(vd->vdev_spa, SCL_STATE_ALL, vd);
		mutex_exit(&vd->vdev_initialize_lock);
		dmu_tx_commit(tx);
		return (SET_ERROR(EINTR));
	}
	mutex_exit(&vd->vdev_initialize_lock);

	vd->vdev_initialize_offset[txg & TXG_MASK] = start + size;
	zio_nowait(zio_write_phys(spa->spa_txg_zio[txg & TXG_MASK], vd, start,
	    size, data, ZIO_CHECKSUM_OFF, vdev_initialize_cb, NULL,
	    ZIO_PRIORITY_INITIALIZING, ZIO_FLAG_CANFAIL, B_FALSE));
	/* vdev_initialize_cb releases SCL_STATE_ALL */

	dmu_tx_commit(tx);

	return (0);
}
예제 #4
0
파일: mmp.c 프로젝트: LLNL/zfs
/*
 * Choose a random vdev, label, and MMP block, and write over it
 * with a copy of the last-synced uberblock, whose timestamp
 * has been updated to reflect that the pool is in use.
 */
static void
mmp_write_uberblock(spa_t *spa)
{
	int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
	mmp_thread_t *mmp = &spa->spa_mmp;
	uberblock_t *ub;
	vdev_t *vd;
	int label;
	uint64_t offset;

	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
	vd = mmp_random_leaf(spa->spa_root_vdev);
	if (vd == NULL) {
		spa_config_exit(spa, SCL_STATE, FTAG);
		return;
	}

	mutex_enter(&mmp->mmp_io_lock);

	if (mmp->mmp_zio_root == NULL)
		mmp->mmp_zio_root = zio_root(spa, NULL, NULL,
		    flags | ZIO_FLAG_GODFATHER);

	ub = &mmp->mmp_ub;
	ub->ub_timestamp = gethrestime_sec();
	ub->ub_mmp_magic = MMP_MAGIC;
	ub->ub_mmp_delay = mmp->mmp_delay;
	vd->vdev_mmp_pending = gethrtime();

	zio_t *zio  = zio_null(mmp->mmp_zio_root, spa, NULL, NULL, NULL, flags);
	abd_t *ub_abd = abd_alloc_for_io(VDEV_UBERBLOCK_SIZE(vd), B_TRUE);
	abd_zero(ub_abd, VDEV_UBERBLOCK_SIZE(vd));
	abd_copy_from_buf(ub_abd, ub, sizeof (uberblock_t));

	mutex_exit(&mmp->mmp_io_lock);

	offset = VDEV_UBERBLOCK_OFFSET(vd, VDEV_UBERBLOCK_COUNT(vd) -
	    MMP_BLOCKS_PER_LABEL + spa_get_random(MMP_BLOCKS_PER_LABEL));

	label = spa_get_random(VDEV_LABELS);
	vdev_label_write(zio, vd, label, ub_abd, offset,
	    VDEV_UBERBLOCK_SIZE(vd), mmp_write_done, mmp,
	    flags | ZIO_FLAG_DONT_PROPAGATE);

	spa_mmp_history_add(ub->ub_txg, ub->ub_timestamp, ub->ub_mmp_delay, vd,
	    label);

	zio_nowait(zio);
}
예제 #5
0
static void
spa_load_stats_update(spa_t *spa)
{
	int i, nnormal = 0, nspecial = 0;
	vdev_t *rvd;
	spa_special_stat_t spa_stat = {0};

	/*
	 * walk the top level vdevs and calculate average stats for
	 * the normal and special classes
	 */
	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);

	rvd = spa->spa_root_vdev;
	ASSERT(rvd);

	for (i = 0; i < rvd->vdev_children; i++) {
		vdev_t *vd = rvd->vdev_child[i];
		ASSERT(vd);

		if (vd->vdev_islog || vd->vdev_ishole ||
		    vd->vdev_isspare || vd->vdev_isl2cache)
			continue;
		if (vd->vdev_isspecial) {
			spa_vdev_walk_stats(vd, &spa_stat.special_lt,
			    &spa_stat.special_ut, &nspecial);
		} else {
			spa_vdev_walk_stats(vd, &spa_stat.normal_lt,
			    &spa_stat.normal_ut, &nnormal);
		}
	}

	spa_config_exit(spa, SCL_VDEV, FTAG);

	DTRACE_PROBE4(spa_vdev_stats, int, nnormal, int, nspecial,
	    int, spa_stat.normal_ut, int, spa_stat.special_ut);

	/* calculate averages */
	if (nnormal) {
		spa->spa_special_stat.normal_lt = spa_stat.normal_lt/nnormal;
		spa->spa_special_stat.normal_ut = spa_stat.normal_ut/nnormal;
	}
	if (nspecial) {
		spa->spa_special_stat.special_lt = spa_stat.special_lt/nspecial;
		spa->spa_special_stat.special_ut = spa_stat.special_ut/nspecial;
	}
}
예제 #6
0
/*
 * Update all disk labels, generate a fresh config based on the current
 * in-core state, and sync the global config cache (do not sync the config
 * cache if this is a booting rootpool).
 */
void
spa_config_update(spa_t *spa, int what)
{
	vdev_t *rvd = spa->spa_root_vdev;
	uint64_t txg;
	int c;

	ASSERT(MUTEX_HELD(&spa_namespace_lock));

	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
	txg = spa_last_synced_txg(spa) + 1;
	if (what == SPA_CONFIG_UPDATE_POOL) {
		vdev_config_dirty(rvd);
	} else {
		/*
		 * If we have top-level vdevs that were added but have
		 * not yet been prepared for allocation, do that now.
		 * (It's safe now because the config cache is up to date,
		 * so it will be able to translate the new DVAs.)
		 * See comments in spa_vdev_add() for full details.
		 */
		for (c = 0; c < rvd->vdev_children; c++) {
			vdev_t *tvd = rvd->vdev_child[c];
			if (tvd->vdev_ms_array == 0)
				vdev_metaslab_set_size(tvd);
			vdev_expand(tvd, txg);
		}
	}
	spa_config_exit(spa, SCL_ALL, FTAG);

	/*
	 * Wait for the mosconfig to be regenerated and synced.
	 */
	txg_wait_synced(spa->spa_dsl_pool, txg);

	/*
	 * Update the global config cache to reflect the new mosconfig.
	 */
	if (!spa->spa_is_root) {
		spa_write_cachefile(spa, B_FALSE,
		    what != SPA_CONFIG_UPDATE_POOL);
	}

	if (what == SPA_CONFIG_UPDATE_POOL)
		spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS);
}
예제 #7
0
파일: zil.c 프로젝트: harshada/zfs
void
zil_flush_vdevs(zilog_t *zilog)
{
	spa_t *spa = zilog->zl_spa;
	avl_tree_t *t = &zilog->zl_vdev_tree;
	void *cookie = NULL;
	zil_vdev_node_t *zv;
	zio_t *zio;

	ASSERT(zilog->zl_writer);

	/*
	 * We don't need zl_vdev_lock here because we're the zl_writer,
	 * and all zl_get_data() callbacks are done.
	 */
	if (avl_numnodes(t) == 0)
		return;

	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);

	zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);

	while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) {
		vdev_t *vd = vdev_lookup_top(spa, zv->zv_vdev);
		if (vd != NULL)
			zio_flush(zio, vd);
		kmem_free(zv, sizeof (*zv));
	}

	/*
	 * Wait for all the flushes to complete.  Not all devices actually
	 * support the DKIOCFLUSHWRITECACHE ioctl, so it's OK if it fails.
	 */
	(void) zio_wait(zio);

	spa_config_exit(spa, SCL_STATE, FTAG);
}
예제 #8
0
/*
 * Generate the pool's configuration based on the current in-core state.
 *
 * We infer whether to generate a complete config or just one top-level config
 * based on whether vd is the root vdev.
 */
nvlist_t *
spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
{
	nvlist_t *config, *nvroot;
	vdev_t *rvd = spa->spa_root_vdev;
	unsigned long hostid = 0;
	boolean_t locked = B_FALSE;
	uint64_t split_guid;

	if (vd == NULL) {
		vd = rvd;
		locked = B_TRUE;
		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
	}

	ASSERT(spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_READER) ==
	    (SCL_CONFIG | SCL_STATE));

	/*
	 * If txg is -1, report the current value of spa->spa_config_txg.
	 */
	if (txg == -1ULL)
		txg = spa->spa_config_txg;

	VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0);

	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
	    spa_version(spa)) == 0);
	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
	    spa_name(spa)) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
	    spa_state(spa)) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
	    txg) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
	    spa_guid(spa)) == 0);
	VERIFY(spa->spa_comment == NULL || nvlist_add_string(config,
	    ZPOOL_CONFIG_COMMENT, spa->spa_comment) == 0);


#ifdef	_KERNEL
	hostid = zone_get_hostid(NULL);
#else	/* _KERNEL */
	/*
	 * We're emulating the system's hostid in userland, so we can't use
	 * zone_get_hostid().
	 */
	(void) ddi_strtoul(hw_serial, NULL, 10, &hostid);
#endif	/* _KERNEL */
	if (hostid != 0) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
		    hostid) == 0);
	}
	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
	    utsname.nodename) == 0);

	if (vd != rvd) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID,
		    vd->vdev_top->vdev_guid) == 0);
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID,
		    vd->vdev_guid) == 0);
		if (vd->vdev_isspare)
			VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE,
			    1ULL) == 0);
		if (vd->vdev_islog)
			VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG,
			    1ULL) == 0);
		vd = vd->vdev_top;		/* label contains top config */
	} else {
		/*
		 * Only add the (potentially large) split information
		 * in the mos config, and not in the vdev labels
		 */
		if (spa->spa_config_splitting != NULL)
			VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT,
			    spa->spa_config_splitting) == 0);
	}

	/*
	 * Add the top-level config.  We even add this on pools which
	 * don't support holes in the namespace.
	 */
	vdev_top_config_generate(spa, config);

	/*
	 * If we're splitting, record the original pool's guid.
	 */
	if (spa->spa_config_splitting != NULL &&
	    nvlist_lookup_uint64(spa->spa_config_splitting,
	    ZPOOL_CONFIG_SPLIT_GUID, &split_guid) == 0) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID,
		    split_guid) == 0);
	}

	nvroot = vdev_config_generate(spa, vd, getstats, 0);
	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
	nvlist_free(nvroot);

	/*
	 * Store what's necessary for reading the MOS in the label.
	 */
	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
	    spa->spa_label_features) == 0);

	if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) {
		ddt_histogram_t *ddh;
		ddt_stat_t *dds;
		ddt_object_t *ddo;

		ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP);
		ddt_get_dedup_histogram(spa, ddh);
		VERIFY(nvlist_add_uint64_array(config,
		    ZPOOL_CONFIG_DDT_HISTOGRAM,
		    (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)) == 0);
		kmem_free(ddh, sizeof (ddt_histogram_t));

		ddo = kmem_zalloc(sizeof (ddt_object_t), KM_SLEEP);
		ddt_get_dedup_object_stats(spa, ddo);
		VERIFY(nvlist_add_uint64_array(config,
		    ZPOOL_CONFIG_DDT_OBJ_STATS,
		    (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)) == 0);
		kmem_free(ddo, sizeof (ddt_object_t));

		dds = kmem_zalloc(sizeof (ddt_stat_t), KM_SLEEP);
		ddt_get_dedup_stats(spa, dds);
		VERIFY(nvlist_add_uint64_array(config,
		    ZPOOL_CONFIG_DDT_STATS,
		    (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t)) == 0);
		kmem_free(dds, sizeof (ddt_stat_t));
	}

	if (locked)
		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);

	return (config);
}
예제 #9
0
파일: spa_config.c 프로젝트: koplover/zfs
/*
 * Generate the pool's configuration based on the current in-core state.
 *
 * We infer whether to generate a complete config or just one top-level config
 * based on whether vd is the root vdev.
 */
nvlist_t *
spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
{
	nvlist_t *config, *nvroot;
	vdev_t *rvd = spa->spa_root_vdev;
	unsigned long hostid = 0;
	boolean_t locked = B_FALSE;
	uint64_t split_guid;
	char *pool_name;

	if (vd == NULL) {
		vd = rvd;
		locked = B_TRUE;
		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
	}

	ASSERT(spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_READER) ==
	    (SCL_CONFIG | SCL_STATE));

	/*
	 * If txg is -1, report the current value of spa->spa_config_txg.
	 */
	if (txg == -1ULL)
		txg = spa->spa_config_txg;

	/*
	 * Originally, users had to handle spa namespace collisions by either
	 * exporting the already imported pool or by specifying a new name for
	 * the pool with a conflicting name. In the case of root pools from
	 * virtual guests, neither approach to collision resolution is
	 * reasonable. This is addressed by extending the new name syntax with
	 * an option to specify that the new name is temporary. When specified,
	 * ZFS_IMPORT_TEMP_NAME will be set in spa->spa_import_flags to tell us
	 * to use the previous name, which we do below.
	 */
	if (spa->spa_import_flags & ZFS_IMPORT_TEMP_NAME) {
		VERIFY0(nvlist_lookup_string(spa->spa_config,
			ZPOOL_CONFIG_POOL_NAME, &pool_name));
	} else
		pool_name = spa_name(spa);

	VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0);

	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
	    spa_version(spa)) == 0);
	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
	    pool_name) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
	    spa_state(spa)) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
	    txg) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
	    spa_guid(spa)) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA,
	    spa->spa_errata) == 0);
	VERIFY(spa->spa_comment == NULL || nvlist_add_string(config,
	    ZPOOL_CONFIG_COMMENT, spa->spa_comment) == 0);


#ifdef	_KERNEL
	hostid = zone_get_hostid(NULL);
#else	/* _KERNEL */
	/*
	 * We're emulating the system's hostid in userland, so we can't use
	 * zone_get_hostid().
	 */
	(void) ddi_strtoul(hw_serial, NULL, 10, &hostid);
#endif	/* _KERNEL */
	if (hostid != 0) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
		    hostid) == 0);
	}
	VERIFY0(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
	    utsname()->nodename));

	if (vd != rvd) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID,
		    vd->vdev_top->vdev_guid) == 0);
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID,
		    vd->vdev_guid) == 0);
		if (vd->vdev_isspare)
			VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE,
			    1ULL) == 0);
		if (vd->vdev_islog)
			VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG,
			    1ULL) == 0);
		vd = vd->vdev_top;		/* label contains top config */
	} else {
		/*
		 * Only add the (potentially large) split information
		 * in the mos config, and not in the vdev labels
		 */
		if (spa->spa_config_splitting != NULL)
			VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT,
			    spa->spa_config_splitting) == 0);
	}

	/*
	 * Add the top-level config.  We even add this on pools which
	 * don't support holes in the namespace.
	 */
	vdev_top_config_generate(spa, config);

	/*
	 * If we're splitting, record the original pool's guid.
	 */
	if (spa->spa_config_splitting != NULL &&
	    nvlist_lookup_uint64(spa->spa_config_splitting,
	    ZPOOL_CONFIG_SPLIT_GUID, &split_guid) == 0) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID,
		    split_guid) == 0);
	}

	nvroot = vdev_config_generate(spa, vd, getstats, 0);
	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
	nvlist_free(nvroot);

	/*
	 * Store what's necessary for reading the MOS in the label.
	 */
	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
	    spa->spa_label_features) == 0);

	if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) {
		ddt_histogram_t *ddh;
		ddt_stat_t *dds;
		ddt_object_t *ddo;

		ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP);
		ddt_get_dedup_histogram(spa, ddh);
		VERIFY(nvlist_add_uint64_array(config,
		    ZPOOL_CONFIG_DDT_HISTOGRAM,
		    (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)) == 0);
		kmem_free(ddh, sizeof (ddt_histogram_t));

		ddo = kmem_zalloc(sizeof (ddt_object_t), KM_SLEEP);
		ddt_get_dedup_object_stats(spa, ddo);
		VERIFY(nvlist_add_uint64_array(config,
		    ZPOOL_CONFIG_DDT_OBJ_STATS,
		    (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)) == 0);
		kmem_free(ddo, sizeof (ddt_object_t));

		dds = kmem_zalloc(sizeof (ddt_stat_t), KM_SLEEP);
		ddt_get_dedup_stats(spa, dds);
		VERIFY(nvlist_add_uint64_array(config,
		    ZPOOL_CONFIG_DDT_STATS,
		    (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t)) == 0);
		kmem_free(dds, sizeof (ddt_stat_t));
	}

	if (locked)
		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);

	return (config);
}
예제 #10
0
파일: zfs_vfsops.c 프로젝트: roddi/mac-zfs
static int
zfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
{
	zfsvfs_t *zfsvfs = vfs_fsprivate(mp);
	uint64_t refdbytes, availbytes, usedobjs, availobjs;

	ZFS_ENTER(zfsvfs);

	dmu_objset_space(zfsvfs->z_os,
	    &refdbytes, &availbytes, &usedobjs, &availobjs);

	VFSATTR_RETURN(fsap, f_objcount, usedobjs);
	VFSATTR_RETURN(fsap, f_maxobjcount, 0x7fffffffffffffff);
	/*
	 * Carbon depends on f_filecount and f_dircount so
	 * make up some values based on total objects.
	 */
	VFSATTR_RETURN(fsap, f_filecount, usedobjs - (usedobjs / 4));
	VFSATTR_RETURN(fsap, f_dircount, usedobjs / 4);

	/*
	 * The underlying storage pool actually uses multiple block sizes.
	 * We report the fragsize as the smallest block size we support,
	 * and we report our blocksize as the filesystem's maximum blocksize.
	 */
	VFSATTR_RETURN(fsap, f_bsize, 1UL << SPA_MINBLOCKSHIFT);
	VFSATTR_RETURN(fsap, f_iosize, zfsvfs->z_max_blksz);

	/*
	 * The following report "total" blocks of various kinds in the
	 * file system, but reported in terms of f_frsize - the
	 * "fragment" size.
	 */
	VFSATTR_RETURN(fsap, f_blocks,
	               (u_int64_t)((refdbytes + availbytes) >> SPA_MINBLOCKSHIFT));
	VFSATTR_RETURN(fsap, f_bfree, (u_int64_t)(availbytes >> SPA_MINBLOCKSHIFT));
	VFSATTR_RETURN(fsap, f_bavail, fsap->f_bfree);  /* no root reservation */
	VFSATTR_RETURN(fsap, f_bused, fsap->f_blocks - fsap->f_bfree);

	/*
	 * statvfs() should really be called statufs(), because it assumes
	 * static metadata.  ZFS doesn't preallocate files, so the best
	 * we can do is report the max that could possibly fit in f_files,
	 * and that minus the number actually used in f_ffree.
	 * For f_ffree, report the smaller of the number of object available
	 * and the number of blocks (each object will take at least a block).
	 */
	VFSATTR_RETURN(fsap, f_ffree, (u_int64_t)MIN(availobjs, fsap->f_bfree));
	VFSATTR_RETURN(fsap, f_files,  fsap->f_ffree + usedobjs);

#if 0
	statp->f_flag = vf_to_stf(vfsp->vfs_flag);
#endif

	if (VFSATTR_IS_ACTIVE(fsap, f_fsid)) {
		VFSATTR_RETURN(fsap, f_fsid, vfs_statfs(mp)->f_fsid);
	}
	if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
		bcopy(&zfs_capabilities, &fsap->f_capabilities, sizeof (zfs_capabilities));
		VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
	}
	if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
		bcopy(&zfs_attributes, &fsap->f_attributes.validattr, sizeof (zfs_attributes));
		bcopy(&zfs_attributes, &fsap->f_attributes.nativeattr, sizeof (zfs_attributes));
		VFSATTR_SET_SUPPORTED(fsap, f_attributes);
	}
	if (VFSATTR_IS_ACTIVE(fsap, f_create_time)) {
		dmu_objset_stats_t dmu_stat;

		dmu_objset_fast_stat(zfsvfs->z_os, &dmu_stat);
		fsap->f_create_time.tv_sec = dmu_stat.dds_creation_time;
		fsap->f_create_time.tv_nsec = 0;
		VFSATTR_SET_SUPPORTED(fsap, f_create_time);
	}
	if (VFSATTR_IS_ACTIVE(fsap, f_modify_time)) {
		if (zfsvfs->z_mtime_vp != NULL) {
			znode_t *mzp;

			mzp = VTOZ(zfsvfs->z_mtime_vp);
			ZFS_TIME_DECODE(&fsap->f_modify_time, mzp->z_phys->zp_mtime);
		} else {
			fsap->f_modify_time.tv_sec = 0;
			fsap->f_modify_time.tv_nsec = 0;
		}
		VFSATTR_SET_SUPPORTED(fsap, f_modify_time);
	}
	/*
	 * For Carbon compatibility, pretend to support this legacy/unused 
	 * attribute
	 */
	if (VFSATTR_IS_ACTIVE(fsap, f_backup_time)) {
		fsap->f_backup_time.tv_sec = 0;
		fsap->f_backup_time.tv_nsec = 0;
		VFSATTR_SET_SUPPORTED(fsap, f_backup_time);
	}
	if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
		spa_t *spa = dmu_objset_spa(zfsvfs->z_os);
		spa_config_enter(spa, RW_READER, FTAG);
		strlcpy(fsap->f_vol_name, spa_name(spa), MAXPATHLEN);
		spa_config_exit(spa, FTAG);
		VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
	}
	VFSATTR_RETURN(fsap, f_fssubtype, 0);
	VFSATTR_RETURN(fsap, f_signature, 0x5a21);  /* 'Z!' */
	VFSATTR_RETURN(fsap, f_carbon_fsid, 0);

	ZFS_EXIT(zfsvfs);
	return (0);
}
예제 #11
0
/* ARGSUSED */
static int
spa_condense_indirect_thread(void *arg, zthr_t *zthr)
{
	spa_t *spa = arg;
	vdev_t *vd;

	ASSERT3P(spa->spa_condensing_indirect, !=, NULL);
	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
	vd = vdev_lookup_top(spa, spa->spa_condensing_indirect_phys.scip_vdev);
	ASSERT3P(vd, !=, NULL);
	spa_config_exit(spa, SCL_VDEV, FTAG);

	spa_condensing_indirect_t *sci = spa->spa_condensing_indirect;
	spa_condensing_indirect_phys_t *scip =
	    &spa->spa_condensing_indirect_phys;
	uint32_t *counts;
	uint64_t start_index;
	vdev_indirect_mapping_t *old_mapping = vd->vdev_indirect_mapping;
	space_map_t *prev_obsolete_sm = NULL;

	ASSERT3U(vd->vdev_id, ==, scip->scip_vdev);
	ASSERT(scip->scip_next_mapping_object != 0);
	ASSERT(scip->scip_prev_obsolete_sm_object != 0);
	ASSERT3P(vd->vdev_ops, ==, &vdev_indirect_ops);

	for (int i = 0; i < TXG_SIZE; i++) {
		/*
		 * The list must start out empty in order for the
		 * _commit_sync() sync task to be properly registered
		 * on the first call to _commit_entry(); so it's wise
		 * to double check and ensure we actually are starting
		 * with empty lists.
		 */
		ASSERT(list_is_empty(&sci->sci_new_mapping_entries[i]));
	}

	VERIFY0(space_map_open(&prev_obsolete_sm, spa->spa_meta_objset,
	    scip->scip_prev_obsolete_sm_object, 0, vd->vdev_asize, 0));
	space_map_update(prev_obsolete_sm);
	counts = vdev_indirect_mapping_load_obsolete_counts(old_mapping);
	if (prev_obsolete_sm != NULL) {
		vdev_indirect_mapping_load_obsolete_spacemap(old_mapping,
		    counts, prev_obsolete_sm);
	}
	space_map_close(prev_obsolete_sm);

	/*
	 * Generate new mapping.  Determine what index to continue from
	 * based on the max offset that we've already written in the
	 * new mapping.
	 */
	uint64_t max_offset =
	    vdev_indirect_mapping_max_offset(sci->sci_new_mapping);
	if (max_offset == 0) {
		/* We haven't written anything to the new mapping yet. */
		start_index = 0;
	} else {
		/*
		 * Pick up from where we left off. _entry_for_offset()
		 * returns a pointer into the vim_entries array. If
		 * max_offset is greater than any of the mappings
		 * contained in the table  NULL will be returned and
		 * that indicates we've exhausted our iteration of the
		 * old_mapping.
		 */

		vdev_indirect_mapping_entry_phys_t *entry =
		    vdev_indirect_mapping_entry_for_offset_or_next(old_mapping,
		    max_offset);

		if (entry == NULL) {
			/*
			 * We've already written the whole new mapping.
			 * This special value will cause us to skip the
			 * generate_new_mapping step and just do the sync
			 * task to complete the condense.
			 */
			start_index = UINT64_MAX;
		} else {
			start_index = entry - old_mapping->vim_entries;
			ASSERT3U(start_index, <,
			    vdev_indirect_mapping_num_entries(old_mapping));
		}
	}

	spa_condense_indirect_generate_new_mapping(vd, counts,
	    start_index, zthr);

	vdev_indirect_mapping_free_obsolete_counts(old_mapping, counts);

	/*
	 * If the zthr has received a cancellation signal while running
	 * in generate_new_mapping() or at any point after that, then bail
	 * early. We don't want to complete the condense if the spa is
	 * shutting down.
	 */
	if (zthr_iscancelled(zthr))
		return (0);

	VERIFY0(dsl_sync_task(spa_name(spa), NULL,
	    spa_condense_indirect_complete_sync, sci, 0,
	    ZFS_SPACE_CHECK_EXTRA_RESERVED));

	return (0);
	thread_exit();
}
예제 #12
0
파일: txg.c 프로젝트: Acidburn0zzz/zfs
static void
txg_sync_thread(dsl_pool_t *dp)
{
	spa_t *spa = dp->dp_spa;
	tx_state_t *tx = &dp->dp_tx;
	callb_cpr_t cpr;
	vdev_stat_t *vs1, *vs2;
	clock_t start, delta;

#ifdef _KERNEL
	/*
	 * Annotate this process with a flag that indicates that it is
	 * unsafe to use KM_SLEEP during memory allocations due to the
	 * potential for a deadlock.  KM_PUSHPAGE should be used instead.
	 */
	current->flags |= PF_NOFS;
#endif /* _KERNEL */

	txg_thread_enter(tx, &cpr);

	vs1 = kmem_alloc(sizeof (vdev_stat_t), KM_PUSHPAGE);
	vs2 = kmem_alloc(sizeof (vdev_stat_t), KM_PUSHPAGE);

	start = delta = 0;
	for (;;) {
		clock_t timer, timeout;
		uint64_t txg;
		uint64_t ndirty;

		timeout = zfs_txg_timeout * hz;

		/*
		 * We sync when we're scanning, there's someone waiting
		 * on us, or the quiesce thread has handed off a txg to
		 * us, or we have reached our timeout.
		 */
		timer = (delta >= timeout ? 0 : timeout - delta);
		while (!dsl_scan_active(dp->dp_scan) &&
		    !tx->tx_exiting && timer > 0 &&
		    tx->tx_synced_txg >= tx->tx_sync_txg_waiting &&
		    tx->tx_quiesced_txg == 0 &&
		    dp->dp_dirty_total < zfs_dirty_data_sync) {
			dprintf("waiting; tx_synced=%llu waiting=%llu dp=%p\n",
			    tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp);
			txg_thread_wait(tx, &cpr, &tx->tx_sync_more_cv, timer);
			delta = ddi_get_lbolt() - start;
			timer = (delta > timeout ? 0 : timeout - delta);
		}

		/*
		 * Wait until the quiesce thread hands off a txg to us,
		 * prompting it to do so if necessary.
		 */
		while (!tx->tx_exiting && tx->tx_quiesced_txg == 0) {
			if (tx->tx_quiesce_txg_waiting < tx->tx_open_txg+1)
				tx->tx_quiesce_txg_waiting = tx->tx_open_txg+1;
			cv_broadcast(&tx->tx_quiesce_more_cv);
			txg_thread_wait(tx, &cpr, &tx->tx_quiesce_done_cv, 0);
		}

		if (tx->tx_exiting) {
			kmem_free(vs2, sizeof (vdev_stat_t));
			kmem_free(vs1, sizeof (vdev_stat_t));
			txg_thread_exit(tx, &cpr, &tx->tx_sync_thread);
		}

		spa_config_enter(spa, SCL_ALL, FTAG, RW_READER);
		vdev_get_stats(spa->spa_root_vdev, vs1);
		spa_config_exit(spa, SCL_ALL, FTAG);

		/*
		 * Consume the quiesced txg which has been handed off to
		 * us.  This may cause the quiescing thread to now be
		 * able to quiesce another txg, so we must signal it.
		 */
		txg = tx->tx_quiesced_txg;
		tx->tx_quiesced_txg = 0;
		tx->tx_syncing_txg = txg;
		DTRACE_PROBE2(txg__syncing, dsl_pool_t *, dp, uint64_t, txg);
		cv_broadcast(&tx->tx_quiesce_more_cv);

		dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
		    txg, tx->tx_quiesce_txg_waiting, tx->tx_sync_txg_waiting);
		mutex_exit(&tx->tx_sync_lock);

		spa_txg_history_set(spa, txg, TXG_STATE_WAIT_FOR_SYNC,
		    gethrtime());
		ndirty = dp->dp_dirty_pertxg[txg & TXG_MASK];

		start = ddi_get_lbolt();
		spa_sync(spa, txg);
		delta = ddi_get_lbolt() - start;

		mutex_enter(&tx->tx_sync_lock);
		tx->tx_synced_txg = txg;
		tx->tx_syncing_txg = 0;
		DTRACE_PROBE2(txg__synced, dsl_pool_t *, dp, uint64_t, txg);
		cv_broadcast(&tx->tx_sync_done_cv);

		/*
		 * Dispatch commit callbacks to worker threads.
		 */
		txg_dispatch_callbacks(dp, txg);

		spa_config_enter(spa, SCL_ALL, FTAG, RW_READER);
		vdev_get_stats(spa->spa_root_vdev, vs2);
		spa_config_exit(spa, SCL_ALL, FTAG);
		spa_txg_history_set_io(spa, txg,
		    vs2->vs_bytes[ZIO_TYPE_READ]-vs1->vs_bytes[ZIO_TYPE_READ],
		    vs2->vs_bytes[ZIO_TYPE_WRITE]-vs1->vs_bytes[ZIO_TYPE_WRITE],
		    vs2->vs_ops[ZIO_TYPE_READ]-vs1->vs_ops[ZIO_TYPE_READ],
		    vs2->vs_ops[ZIO_TYPE_WRITE]-vs1->vs_ops[ZIO_TYPE_WRITE],
		    ndirty);
		spa_txg_history_set(spa, txg, TXG_STATE_SYNCED, gethrtime());
	}
}
예제 #13
0
파일: spa_config.c 프로젝트: harshada/zfs
/*
 * Generate the pool's configuration based on the current in-core state.
 * We infer whether to generate a complete config or just one top-level config
 * based on whether vd is the root vdev.
 */
nvlist_t *
spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
{
	nvlist_t *config, *nvroot;
	vdev_t *rvd = spa->spa_root_vdev;
	unsigned long hostid = 0;
	boolean_t locked = B_FALSE;

	if (vd == NULL) {
		vd = rvd;
		locked = B_TRUE;
		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
	}

	ASSERT(spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_READER) ==
	    (SCL_CONFIG | SCL_STATE));

	/*
	 * If txg is -1, report the current value of spa->spa_config_txg.
	 */
	if (txg == -1ULL)
		txg = spa->spa_config_txg;

	VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0);

	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
	    spa_version(spa)) == 0);
	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
	    spa_name(spa)) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
	    spa_state(spa)) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
	    txg) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
	    spa_guid(spa)) == 0);
#ifdef	_KERNEL
	hostid = zone_get_hostid(NULL);
#else	/* _KERNEL */
	/*
	 * We're emulating the system's hostid in userland, so we can't use
	 * zone_get_hostid().
	 */
	(void) ddi_strtoul(hw_serial, NULL, 10, &hostid);
#endif	/* _KERNEL */
	if (hostid != 0) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
		    hostid) == 0);
	}
	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
	    utsname.nodename) == 0);

	if (vd != rvd) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID,
		    vd->vdev_top->vdev_guid) == 0);
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID,
		    vd->vdev_guid) == 0);
		if (vd->vdev_isspare)
			VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE,
			    1ULL) == 0);
		if (vd->vdev_islog)
			VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG,
			    1ULL) == 0);
		vd = vd->vdev_top;		/* label contains top config */
	}

	nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE, B_FALSE);
	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
	nvlist_free(nvroot);

	if (locked)
		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);

	return (config);
}