Пример #1
0
static void
dsl_onexit_hold_cleanup(spa_t *spa, nvlist_t *holds, minor_t minor)
{
	zfs_hold_cleanup_arg_t *ca;

	if (minor == 0 || nvlist_empty(holds)) {
		fnvlist_free(holds);
		return;
	}

	ASSERT(spa != NULL);
	ca = kmem_alloc(sizeof (*ca), KM_SLEEP);

	(void) strlcpy(ca->zhca_spaname, spa_name(spa),
	    sizeof (ca->zhca_spaname));
	ca->zhca_spa_load_guid = spa_load_guid(spa);
	ca->zhca_holds = holds;
	VERIFY0(zfs_onexit_add_cb(minor,
	    dsl_dataset_user_release_onexit, ca, NULL));
}
Пример #2
0
void
spa_history_log_version(spa_t *spa, history_internal_events_t event)
{
#ifdef _KERNEL
	uint64_t current_vers = spa_version(spa);

	if (current_vers >= SPA_VERSION_ZPOOL_HISTORY) {
		spa_history_log_internal(event, spa, NULL,
		    "pool spa %llu; zfs spa %llu; zpl %d; uts %s %s %s %s",
		    (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION,
		    utsname.nodename, utsname.release, utsname.version,
		    utsname.machine);
	}
#ifndef __DARWIN__
	cmn_err(CE_CONT, "!%s version %llu pool %s using %llu",
	    event == LOG_POOL_IMPORT ? "imported" :
	    event == LOG_POOL_CREATE ? "created" : "accessed",
	    (u_longlong_t)current_vers, spa_name(spa), SPA_VERSION);
#endif
#endif
}
Пример #3
0
int
zcp_dataset_hold_error(lua_State *state, dsl_pool_t *dp, const char *dsname,
    int error)
{
	if (error == ENOENT) {
		(void) zcp_argerror(state, 1, "no such dataset '%s'", dsname);
		return (0); /* not reached; zcp_argerror will longjmp */
	} else if (error == EXDEV) {
		(void) zcp_argerror(state, 1,
		    "dataset '%s' is not in the target pool '%s'",
		    dsname, spa_name(dp->dp_spa));
		return (0); /* not reached; zcp_argerror will longjmp */
	} else if (error == EIO) {
		(void) luaL_error(state,
		    "I/O error while accessing dataset '%s'", dsname);
		return (0); /* not reached; luaL_error will longjmp */
	} else if (error != 0) {
		(void) luaL_error(state,
		    "unexpected error %d while accessing dataset '%s'",
		    error, dsname);
		return (0); /* not reached; luaL_error will longjmp */
	}
	return (0);
}
Пример #4
0
/*
 * Synchronize pool configuration to disk.  This must be called with the
 * namespace lock held. Synchronizing the pool cache is typically done after
 * the configuration has been synced to the MOS. This exposes a window where
 * the MOS config will have been updated but the cache file has not. If
 * the system were to crash at that instant then the cached config may not
 * contain the correct information to open the pool and an explicit import
 * would be required.
 */
void
spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
{
	spa_config_dirent_t *dp, *tdp;
	nvlist_t *nvl;
	char *pool_name;
	boolean_t ccw_failure;
	int error = 0;

	ASSERT(MUTEX_HELD(&spa_namespace_lock));

	if (rootdir == NULL || !(spa_mode_global & FWRITE))
		return;

	/*
	 * Iterate over all cachefiles for the pool, past or present.  When the
	 * cachefile is changed, the new one is pushed onto this list, allowing
	 * us to update previous cachefiles that no longer contain this pool.
	 */
	ccw_failure = B_FALSE;
	for (dp = list_head(&target->spa_config_list); dp != NULL;
	    dp = list_next(&target->spa_config_list, dp)) {
		spa_t *spa = NULL;
		if (dp->scd_path == NULL)
			continue;

		/*
		 * Iterate over all pools, adding any matching pools to 'nvl'.
		 */
		nvl = NULL;
		while ((spa = spa_next(spa)) != NULL) {
			/*
			 * Skip over our own pool if we're about to remove
			 * ourselves from the spa namespace or any pool that
			 * is readonly. Since we cannot guarantee that a
			 * readonly pool would successfully import upon reboot,
			 * we don't allow them to be written to the cache file.
			 */
			if ((spa == target && removing) ||
			    !spa_writeable(spa))
				continue;

			mutex_enter(&spa->spa_props_lock);
			tdp = list_head(&spa->spa_config_list);
			if (spa->spa_config == NULL ||
			    tdp == NULL ||
			    tdp->scd_path == NULL ||
			    strcmp(tdp->scd_path, dp->scd_path) != 0) {
				mutex_exit(&spa->spa_props_lock);
				continue;
			}

			if (nvl == NULL)
				nvl = fnvlist_alloc();

			if (spa->spa_import_flags & ZFS_IMPORT_TEMP_NAME)
				pool_name = fnvlist_lookup_string(
				    spa->spa_config, ZPOOL_CONFIG_POOL_NAME);
			else
				pool_name = spa_name(spa);

			fnvlist_add_nvlist(nvl, pool_name, spa->spa_config);
			mutex_exit(&spa->spa_props_lock);
		}

		error = spa_config_write(dp, nvl);
		if (error != 0)
			ccw_failure = B_TRUE;
		nvlist_free(nvl);
	}

	if (ccw_failure) {
		/*
		 * Keep trying so that configuration data is
		 * written if/when any temporary filesystem
		 * resource issues are resolved.
		 */
		if (target->spa_ccw_fail_time == 0) {
			zfs_ereport_post(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE,
			    target, NULL, NULL, 0, 0);
		}
		target->spa_ccw_fail_time = gethrtime();
		spa_async_request(target, SPA_ASYNC_CONFIG_UPDATE);
	} else {
		/*
		 * Do not rate limit future attempts to update
		 * the config cache.
		 */
		target->spa_ccw_fail_time = 0;
	}

	/*
	 * Remove any config entries older than the current one.
	 */
	dp = list_head(&target->spa_config_list);
	while ((tdp = list_next(&target->spa_config_list, dp)) != NULL) {
		list_remove(&target->spa_config_list, tdp);
		if (tdp->scd_path != NULL)
			spa_strfree(tdp->scd_path);
		kmem_free(tdp, sizeof (spa_config_dirent_t));
	}

	spa_config_generation++;

	if (postsysevent)
		spa_event_notify(target, NULL, ESC_ZFS_CONFIG_SYNC);
}
Пример #5
0
/*
 * Synchronize pool configuration to disk.  This must be called with the
 * namespace lock held. Synchronizing the pool cache is typically done after
 * the configuration has been synced to the MOS. This exposes a window where
 * the MOS config will have been updated but the cache file has not. If
 * the system were to crash at that instant then the cached config may not
 * contain the correct information to open the pool and an explicity import
 * would be required.
 */
void
spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
{
	spa_config_dirent_t *dp, *tdp;
	nvlist_t *nvl;
	char *pool_name;

	ASSERT(MUTEX_HELD(&spa_namespace_lock));

	if (rootdir == NULL || !(spa_mode_global & FWRITE))
		return;

	/*
	 * Iterate over all cachefiles for the pool, past or present.  When the
	 * cachefile is changed, the new one is pushed onto this list, allowing
	 * us to update previous cachefiles that no longer contain this pool.
	 */
	for (dp = list_head(&target->spa_config_list); dp != NULL;
	    dp = list_next(&target->spa_config_list, dp)) {
		spa_t *spa = NULL;
		if (dp->scd_path == NULL)
			continue;

		/*
		 * Iterate over all pools, adding any matching pools to 'nvl'.
		 */
		nvl = NULL;
		while ((spa = spa_next(spa)) != NULL) {
			/*
			 * Skip over our own pool if we're about to remove
			 * ourselves from the spa namespace or any pool that
			 * is readonly. Since we cannot guarantee that a
			 * readonly pool would successfully import upon reboot,
			 * we don't allow them to be written to the cache file.
			 */
			if ((spa == target && removing) ||
			    !spa_writeable(spa))
				continue;

			mutex_enter(&spa->spa_props_lock);
			tdp = list_head(&spa->spa_config_list);
			if (spa->spa_config == NULL ||
			    tdp->scd_path == NULL ||
			    strcmp(tdp->scd_path, dp->scd_path) != 0) {
				mutex_exit(&spa->spa_props_lock);
				continue;
			}

			if (nvl == NULL)
				VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME,
				    KM_SLEEP) == 0);

			if (spa->spa_import_flags & ZFS_IMPORT_TEMP_NAME) {
				VERIFY0(nvlist_lookup_string(spa->spa_config,
					ZPOOL_CONFIG_POOL_NAME, &pool_name));
			} else
				pool_name = spa_name(spa);

			VERIFY(nvlist_add_nvlist(nvl, pool_name,
			    spa->spa_config) == 0);
			mutex_exit(&spa->spa_props_lock);
		}

		spa_config_write(dp, nvl);
		nvlist_free(nvl);
	}

	/*
	 * Remove any config entries older than the current one.
	 */
	dp = list_head(&target->spa_config_list);
	while ((tdp = list_next(&target->spa_config_list, dp)) != NULL) {
		list_remove(&target->spa_config_list, tdp);
		if (tdp->scd_path != NULL)
			spa_strfree(tdp->scd_path);
		kmem_free(tdp, sizeof (spa_config_dirent_t));
	}

	spa_config_generation++;

	if (postsysevent)
		spa_event_notify(target, NULL, FM_EREPORT_ZFS_CONFIG_SYNC);
}
Пример #6
0
/*
 * Find all objsets under name, call func on each
 */
int
dmu_objset_find_spa(spa_t *spa, const char *name,
    int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags)
{
	dsl_dir_t *dd;
	dsl_pool_t *dp;
	dsl_dataset_t *ds;
	zap_cursor_t zc;
	zap_attribute_t *attr;
	char *child;
	uint64_t thisobj;
	int err;

	if (name == NULL)
		name = spa_name(spa);
	err = dsl_dir_open_spa(spa, name, FTAG, &dd, NULL);
	if (err)
		return (err);

	/* Don't visit hidden ($MOS & $ORIGIN) objsets. */
	if (dd->dd_myname[0] == '$') {
		dsl_dir_close(dd, FTAG);
		return (0);
	}

	thisobj = dd->dd_phys->dd_head_dataset_obj;
	attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
	dp = dd->dd_pool;

	/*
	 * Iterate over all children.
	 */
	if (flags & DS_FIND_CHILDREN) {
		for (zap_cursor_init(&zc, dp->dp_meta_objset,
		    dd->dd_phys->dd_child_dir_zapobj);
		    zap_cursor_retrieve(&zc, attr) == 0;
		    (void) zap_cursor_advance(&zc)) {
			ASSERT(attr->za_integer_length == sizeof (uint64_t));
			ASSERT(attr->za_num_integers == 1);

			child = kmem_alloc(MAXPATHLEN, KM_SLEEP);
			(void) strcpy(child, name);
			(void) strcat(child, "/");
			(void) strcat(child, attr->za_name);
			err = dmu_objset_find_spa(spa, child, func, arg, flags);
			kmem_free(child, MAXPATHLEN);
			if (err)
				break;
		}
		zap_cursor_fini(&zc);

		if (err) {
			dsl_dir_close(dd, FTAG);
			kmem_free(attr, sizeof (zap_attribute_t));
			return (err);
		}
	}

	/*
	 * Iterate over all snapshots.
	 */
	if (flags & DS_FIND_SNAPSHOTS) {
		if (!dsl_pool_sync_context(dp))
			rw_enter(&dp->dp_config_rwlock, RW_READER);
		err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
		if (!dsl_pool_sync_context(dp))
			rw_exit(&dp->dp_config_rwlock);

		if (err == 0) {
			uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
			dsl_dataset_rele(ds, FTAG);

			for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj);
			    zap_cursor_retrieve(&zc, attr) == 0;
			    (void) zap_cursor_advance(&zc)) {
				ASSERT(attr->za_integer_length ==
				    sizeof (uint64_t));
				ASSERT(attr->za_num_integers == 1);

				child = kmem_alloc(MAXPATHLEN, KM_SLEEP);
				(void) strcpy(child, name);
				(void) strcat(child, "@");
				(void) strcat(child, attr->za_name);
				err = func(spa, attr->za_first_integer,
				    child, arg);
				kmem_free(child, MAXPATHLEN);
				if (err)
					break;
			}
			zap_cursor_fini(&zc);
		}
	}

	dsl_dir_close(dd, FTAG);
	kmem_free(attr, sizeof (zap_attribute_t));

	if (err)
		return (err);

	/*
	 * Apply to self if appropriate.
	 */
	err = func(spa, thisobj, name, arg);
	return (err);
}
Пример #7
0
static int
zfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
{
	zfsvfs_t *zfsvfs = vfs_fsprivate(mp);
	uint64_t refdbytes, availbytes, usedobjs, availobjs;

	ZFS_ENTER(zfsvfs);

	dmu_objset_space(zfsvfs->z_os,
	    &refdbytes, &availbytes, &usedobjs, &availobjs);

	VFSATTR_RETURN(fsap, f_objcount, usedobjs);
	VFSATTR_RETURN(fsap, f_maxobjcount, 0x7fffffffffffffff);
	/*
	 * Carbon depends on f_filecount and f_dircount so
	 * make up some values based on total objects.
	 */
	VFSATTR_RETURN(fsap, f_filecount, usedobjs - (usedobjs / 4));
	VFSATTR_RETURN(fsap, f_dircount, usedobjs / 4);

	/*
	 * The underlying storage pool actually uses multiple block sizes.
	 * We report the fragsize as the smallest block size we support,
	 * and we report our blocksize as the filesystem's maximum blocksize.
	 */
	VFSATTR_RETURN(fsap, f_bsize, 1UL << SPA_MINBLOCKSHIFT);
	VFSATTR_RETURN(fsap, f_iosize, zfsvfs->z_max_blksz);

	/*
	 * The following report "total" blocks of various kinds in the
	 * file system, but reported in terms of f_frsize - the
	 * "fragment" size.
	 */
	VFSATTR_RETURN(fsap, f_blocks,
	               (u_int64_t)((refdbytes + availbytes) >> SPA_MINBLOCKSHIFT));
	VFSATTR_RETURN(fsap, f_bfree, (u_int64_t)(availbytes >> SPA_MINBLOCKSHIFT));
	VFSATTR_RETURN(fsap, f_bavail, fsap->f_bfree);  /* no root reservation */
	VFSATTR_RETURN(fsap, f_bused, fsap->f_blocks - fsap->f_bfree);

	/*
	 * statvfs() should really be called statufs(), because it assumes
	 * static metadata.  ZFS doesn't preallocate files, so the best
	 * we can do is report the max that could possibly fit in f_files,
	 * and that minus the number actually used in f_ffree.
	 * For f_ffree, report the smaller of the number of object available
	 * and the number of blocks (each object will take at least a block).
	 */
	VFSATTR_RETURN(fsap, f_ffree, (u_int64_t)MIN(availobjs, fsap->f_bfree));
	VFSATTR_RETURN(fsap, f_files,  fsap->f_ffree + usedobjs);

#if 0
	statp->f_flag = vf_to_stf(vfsp->vfs_flag);
#endif

	if (VFSATTR_IS_ACTIVE(fsap, f_fsid)) {
		VFSATTR_RETURN(fsap, f_fsid, vfs_statfs(mp)->f_fsid);
	}
	if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
		bcopy(&zfs_capabilities, &fsap->f_capabilities, sizeof (zfs_capabilities));
		VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
	}
	if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
		bcopy(&zfs_attributes, &fsap->f_attributes.validattr, sizeof (zfs_attributes));
		bcopy(&zfs_attributes, &fsap->f_attributes.nativeattr, sizeof (zfs_attributes));
		VFSATTR_SET_SUPPORTED(fsap, f_attributes);
	}
	if (VFSATTR_IS_ACTIVE(fsap, f_create_time)) {
		dmu_objset_stats_t dmu_stat;

		dmu_objset_fast_stat(zfsvfs->z_os, &dmu_stat);
		fsap->f_create_time.tv_sec = dmu_stat.dds_creation_time;
		fsap->f_create_time.tv_nsec = 0;
		VFSATTR_SET_SUPPORTED(fsap, f_create_time);
	}
	if (VFSATTR_IS_ACTIVE(fsap, f_modify_time)) {
		if (zfsvfs->z_mtime_vp != NULL) {
			znode_t *mzp;

			mzp = VTOZ(zfsvfs->z_mtime_vp);
			ZFS_TIME_DECODE(&fsap->f_modify_time, mzp->z_phys->zp_mtime);
		} else {
			fsap->f_modify_time.tv_sec = 0;
			fsap->f_modify_time.tv_nsec = 0;
		}
		VFSATTR_SET_SUPPORTED(fsap, f_modify_time);
	}
	/*
	 * For Carbon compatibility, pretend to support this legacy/unused 
	 * attribute
	 */
	if (VFSATTR_IS_ACTIVE(fsap, f_backup_time)) {
		fsap->f_backup_time.tv_sec = 0;
		fsap->f_backup_time.tv_nsec = 0;
		VFSATTR_SET_SUPPORTED(fsap, f_backup_time);
	}
	if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
		spa_t *spa = dmu_objset_spa(zfsvfs->z_os);
		spa_config_enter(spa, RW_READER, FTAG);
		strlcpy(fsap->f_vol_name, spa_name(spa), MAXPATHLEN);
		spa_config_exit(spa, FTAG);
		VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
	}
	VFSATTR_RETURN(fsap, f_fssubtype, 0);
	VFSATTR_RETURN(fsap, f_signature, 0x5a21);  /* 'Z!' */
	VFSATTR_RETURN(fsap, f_carbon_fsid, 0);

	ZFS_EXIT(zfsvfs);
	return (0);
}
Пример #8
0
/*
 * Generate the pool's configuration based on the current in-core state.
 * We infer whether to generate a complete config or just one top-level config
 * based on whether vd is the root vdev.
 */
nvlist_t *
spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
{
	nvlist_t *config, *nvroot;
	vdev_t *rvd = spa->spa_root_vdev;
	unsigned long hostid = 0;
#ifdef __APPLE__
	boolean_t skipdevpaths = (txg != -1ULL && txg != 0ULL);
#endif
	ASSERT(spa_config_held(spa, RW_READER) ||
	    spa_config_held(spa, RW_WRITER));

	if (vd == NULL)
		vd = rvd;

	/*
	 * If txg is -1, report the current value of spa->spa_config_txg.
	 */
	if (txg == -1ULL)
		txg = spa->spa_config_txg;

	VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0);

	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
	    spa_version(spa)) == 0);
	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
	    spa_name(spa)) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
	    spa_state(spa)) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
	    txg) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
	    spa_guid(spa)) == 0);
#ifndef __APPLE__	
	(void) ddi_strtoul(hw_serial, NULL, 10, &hostid);
#endif
	if (hostid != 0) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
		    hostid) == 0);
	}
	/*XXX NOEL: If host id identification is a feature we are
	 * interested in having (primarily for poor man's cluster
	 * support), then query for the hostid here and add it to the
	 * nvlist
	 */
#ifndef __APPLE__
	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
	    utsname.nodename) == 0);
#endif

	if (vd != rvd) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID,
		    vd->vdev_top->vdev_guid) == 0);
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID,
		    vd->vdev_guid) == 0);
		if (vd->vdev_isspare)
			VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE,
			    1ULL) == 0);
		if (vd->vdev_islog)
			VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG,
			    1ULL) == 0);
		vd = vd->vdev_top;		/* label contains top config */
	}

#ifdef __APPLE__
	nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE, B_FALSE,
	                              skipdevpaths);
#else
	nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE, B_FALSE);
#endif
	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
	nvlist_free(nvroot);

	return (config);
}
Пример #9
0
Файл: mmp.c Проект: chrisrd/zfs
static void
mmp_thread(void *arg)
{
	spa_t *spa = (spa_t *)arg;
	mmp_thread_t *mmp = &spa->spa_mmp;
	boolean_t last_spa_suspended = spa_suspended(spa);
	boolean_t last_spa_multihost = spa_multihost(spa);
	callb_cpr_t cpr;
	hrtime_t max_fail_ns = zfs_multihost_fail_intervals *
	    MSEC2NSEC(MAX(zfs_multihost_interval, MMP_MIN_INTERVAL));

	mmp_thread_enter(mmp, &cpr);

	/*
	 * The mmp_write_done() function calculates mmp_delay based on the
	 * prior value of mmp_delay and the elapsed time since the last write.
	 * For the first mmp write, there is no "last write", so we start
	 * with fake, but reasonable, default non-zero values.
	 */
	mmp->mmp_delay = MSEC2NSEC(MAX(zfs_multihost_interval,
	    MMP_MIN_INTERVAL)) / MAX(vdev_count_leaves(spa), 1);
	mmp->mmp_last_write = gethrtime() - mmp->mmp_delay;

	while (!mmp->mmp_thread_exiting) {
		uint64_t mmp_fail_intervals = zfs_multihost_fail_intervals;
		uint64_t mmp_interval = MSEC2NSEC(
		    MAX(zfs_multihost_interval, MMP_MIN_INTERVAL));
		boolean_t suspended = spa_suspended(spa);
		boolean_t multihost = spa_multihost(spa);
		hrtime_t start, next_time;

		start = gethrtime();
		if (multihost) {
			next_time = start + mmp_interval /
			    MAX(vdev_count_leaves(spa), 1);
		} else {
			next_time = start + MSEC2NSEC(MMP_DEFAULT_INTERVAL);
		}

		/*
		 * When MMP goes off => on, or spa goes suspended =>
		 * !suspended, we know no writes occurred recently.  We
		 * update mmp_last_write to give us some time to try.
		 */
		if ((!last_spa_multihost && multihost) ||
		    (last_spa_suspended && !suspended)) {
			mutex_enter(&mmp->mmp_io_lock);
			mmp->mmp_last_write = gethrtime();
			mutex_exit(&mmp->mmp_io_lock);
		} else if (last_spa_multihost && !multihost) {
			mutex_enter(&mmp->mmp_io_lock);
			mmp->mmp_delay = 0;
			mutex_exit(&mmp->mmp_io_lock);
		}
		last_spa_multihost = multihost;
		last_spa_suspended = suspended;

		/*
		 * Smooth max_fail_ns when its factors are decreased, because
		 * making (max_fail_ns < mmp_interval) results in the pool being
		 * immediately suspended before writes can occur at the new
		 * higher frequency.
		 */
		if ((mmp_interval * mmp_fail_intervals) < max_fail_ns) {
			max_fail_ns = ((31 * max_fail_ns) + (mmp_interval *
			    mmp_fail_intervals)) / 32;
		} else {
			max_fail_ns = mmp_interval * mmp_fail_intervals;
		}

		/*
		 * Suspend the pool if no MMP write has succeeded in over
		 * mmp_interval * mmp_fail_intervals nanoseconds.
		 */
		if (!suspended && mmp_fail_intervals && multihost &&
		    (start - mmp->mmp_last_write) > max_fail_ns) {
			cmn_err(CE_WARN, "MMP writes to pool '%s' have not "
			    "succeeded in over %llus; suspending pool",
			    spa_name(spa),
			    NSEC2SEC(start - mmp->mmp_last_write));
			zio_suspend(spa, NULL);
		}

		if (multihost)
			mmp_write_uberblock(spa);

		CALLB_CPR_SAFE_BEGIN(&cpr);
		(void) cv_timedwait_sig(&mmp->mmp_thread_cv,
		    &mmp->mmp_thread_lock, ddi_get_lbolt() +
		    ((next_time - gethrtime()) / (NANOSEC / hz)));
		CALLB_CPR_SAFE_END(&cpr, &mmp->mmp_thread_lock);
	}

	/* Outstanding writes are allowed to complete. */
	if (mmp->mmp_zio_root)
		zio_wait(mmp->mmp_zio_root);

	mmp->mmp_zio_root = NULL;
	mmp_thread_exit(mmp, &mmp->mmp_thread, &cpr);
}
Пример #10
0
/* ARGSUSED */
static int
spa_condense_indirect_thread(void *arg, zthr_t *zthr)
{
	spa_t *spa = arg;
	vdev_t *vd;

	ASSERT3P(spa->spa_condensing_indirect, !=, NULL);
	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
	vd = vdev_lookup_top(spa, spa->spa_condensing_indirect_phys.scip_vdev);
	ASSERT3P(vd, !=, NULL);
	spa_config_exit(spa, SCL_VDEV, FTAG);

	spa_condensing_indirect_t *sci = spa->spa_condensing_indirect;
	spa_condensing_indirect_phys_t *scip =
	    &spa->spa_condensing_indirect_phys;
	uint32_t *counts;
	uint64_t start_index;
	vdev_indirect_mapping_t *old_mapping = vd->vdev_indirect_mapping;
	space_map_t *prev_obsolete_sm = NULL;

	ASSERT3U(vd->vdev_id, ==, scip->scip_vdev);
	ASSERT(scip->scip_next_mapping_object != 0);
	ASSERT(scip->scip_prev_obsolete_sm_object != 0);
	ASSERT3P(vd->vdev_ops, ==, &vdev_indirect_ops);

	for (int i = 0; i < TXG_SIZE; i++) {
		/*
		 * The list must start out empty in order for the
		 * _commit_sync() sync task to be properly registered
		 * on the first call to _commit_entry(); so it's wise
		 * to double check and ensure we actually are starting
		 * with empty lists.
		 */
		ASSERT(list_is_empty(&sci->sci_new_mapping_entries[i]));
	}

	VERIFY0(space_map_open(&prev_obsolete_sm, spa->spa_meta_objset,
	    scip->scip_prev_obsolete_sm_object, 0, vd->vdev_asize, 0));
	space_map_update(prev_obsolete_sm);
	counts = vdev_indirect_mapping_load_obsolete_counts(old_mapping);
	if (prev_obsolete_sm != NULL) {
		vdev_indirect_mapping_load_obsolete_spacemap(old_mapping,
		    counts, prev_obsolete_sm);
	}
	space_map_close(prev_obsolete_sm);

	/*
	 * Generate new mapping.  Determine what index to continue from
	 * based on the max offset that we've already written in the
	 * new mapping.
	 */
	uint64_t max_offset =
	    vdev_indirect_mapping_max_offset(sci->sci_new_mapping);
	if (max_offset == 0) {
		/* We haven't written anything to the new mapping yet. */
		start_index = 0;
	} else {
		/*
		 * Pick up from where we left off. _entry_for_offset()
		 * returns a pointer into the vim_entries array. If
		 * max_offset is greater than any of the mappings
		 * contained in the table  NULL will be returned and
		 * that indicates we've exhausted our iteration of the
		 * old_mapping.
		 */

		vdev_indirect_mapping_entry_phys_t *entry =
		    vdev_indirect_mapping_entry_for_offset_or_next(old_mapping,
		    max_offset);

		if (entry == NULL) {
			/*
			 * We've already written the whole new mapping.
			 * This special value will cause us to skip the
			 * generate_new_mapping step and just do the sync
			 * task to complete the condense.
			 */
			start_index = UINT64_MAX;
		} else {
			start_index = entry - old_mapping->vim_entries;
			ASSERT3U(start_index, <,
			    vdev_indirect_mapping_num_entries(old_mapping));
		}
	}

	spa_condense_indirect_generate_new_mapping(vd, counts,
	    start_index, zthr);

	vdev_indirect_mapping_free_obsolete_counts(old_mapping, counts);

	/*
	 * If the zthr has received a cancellation signal while running
	 * in generate_new_mapping() or at any point after that, then bail
	 * early. We don't want to complete the condense if the spa is
	 * shutting down.
	 */
	if (zthr_iscancelled(zthr))
		return (0);

	VERIFY0(dsl_sync_task(spa_name(spa), NULL,
	    spa_condense_indirect_complete_sync, sci, 0,
	    ZFS_SPACE_CHECK_EXTRA_RESERVED));

	return (0);
	thread_exit();
}
Пример #11
0
int
dsl_scan_cancel(dsl_pool_t *dp)
{
	return (dsl_sync_task(spa_name(dp->dp_spa), dsl_scan_cancel_check,
	    dsl_scan_cancel_sync, NULL, 3));
}
Пример #12
0
int
dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
    const char *tail, void *tag, dsl_dir_t **ddp)
{
	dmu_buf_t *dbuf;
	dsl_dir_t *dd;
	int err;

	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
	    dsl_pool_sync_context(dp));

	err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
	if (err)
		return (err);
	dd = dmu_buf_get_user(dbuf);
#ifdef ZFS_DEBUG
	{
		dmu_object_info_t doi;
		dmu_object_info_from_db(dbuf, &doi);
		ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR);
	}
#endif
	/* XXX assert bonus buffer size is correct */
	if (dd == NULL) {
		dsl_dir_t *winner;
#ifndef __APPLE__
		int err;
#endif

		dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP);
		dd->dd_object = ddobj;
		dd->dd_dbuf = dbuf;
		dd->dd_pool = dp;
		dd->dd_phys = dbuf->db_data;
		dd->dd_used_bytes = dd->dd_phys->dd_used_bytes;
		mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);

		list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t),
		    offsetof(dsl_prop_cb_record_t, cbr_node));

		if (dd->dd_phys->dd_parent_obj) {
			err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj,
			    NULL, dd, &dd->dd_parent);
			if (err) {
				mutex_destroy(&dd->dd_lock);
				kmem_free(dd, sizeof (dsl_dir_t));
				dmu_buf_rele(dbuf, tag);
				return (err);
			}
			if (tail) {
#ifdef ZFS_DEBUG
				uint64_t foundobj;

				err = zap_lookup(dp->dp_meta_objset,
				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
				    tail, sizeof (foundobj), 1, &foundobj);
				ASSERT(err || foundobj == ddobj);
#endif
				(void) strcpy(dd->dd_myname, tail);
			} else {
				err = zap_value_search(dp->dp_meta_objset,
				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
				    ddobj, 0, dd->dd_myname);
			}
			if (err) {
				dsl_dir_close(dd->dd_parent, dd);
				mutex_destroy(&dd->dd_lock);
				kmem_free(dd, sizeof (dsl_dir_t));
				dmu_buf_rele(dbuf, tag);
				return (err);
			}
		} else {
			(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
		}

		winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys,
		    dsl_dir_evict);
		if (winner) {
			if (dd->dd_parent)
				dsl_dir_close(dd->dd_parent, dd);
			mutex_destroy(&dd->dd_lock);
			kmem_free(dd, sizeof (dsl_dir_t));
			dd = winner;
		} else {
			spa_open_ref(dp->dp_spa, dd);
		}
	}

	/*
	 * The dsl_dir_t has both open-to-close and instantiate-to-evict
	 * holds on the spa.  We need the open-to-close holds because
	 * otherwise the spa_refcnt wouldn't change when we open a
	 * dir which the spa also has open, so we could incorrectly
	 * think it was OK to unload/export/destroy the pool.  We need
	 * the instantiate-to-evict hold because the dsl_dir_t has a
	 * pointer to the dd_pool, which has a pointer to the spa_t.
	 */
	spa_open_ref(dp->dp_spa, tag);
	ASSERT3P(dd->dd_pool, ==, dp);
	ASSERT3U(dd->dd_object, ==, ddobj);
	ASSERT3P(dd->dd_dbuf, ==, dbuf);
	*ddp = dd;
	return (0);
}
Пример #13
0
void
dataset_kstats_create(dataset_kstats_t *dk, objset_t *objset)
{
	/*
	 * There should not be anything wrong with having kstats for
	 * snapshots. Since we are not sure how useful they would be
	 * though nor how much their memory overhead would matter in
	 * a filesystem with many snapshots, we skip them for now.
	 */
	if (dmu_objset_is_snapshot(objset))
		return;

	/*
	 * At the time of this writing, KSTAT_STRLEN is 255 in Linux,
	 * and the spa_name can theoretically be up to 256 characters.
	 * In reality though the spa_name can be 240 characters max
	 * [see origin directory name check in pool_namecheck()]. Thus,
	 * the naming scheme for the module name below should not cause
	 * any truncations. In the event that a truncation does happen
	 * though, due to some future change, we silently skip creating
	 * the kstat and log the event.
	 */
	char kstat_module_name[KSTAT_STRLEN];
	int n = snprintf(kstat_module_name, sizeof (kstat_module_name),
	    "zfs/%s", spa_name(dmu_objset_spa(objset)));
	if (n < 0) {
		zfs_dbgmsg("failed to create dataset kstat for objset %lld: "
		    " snprintf() for kstat module name returned %d",
		    (unsigned long long)dmu_objset_id(objset), n);
		return;
	} else if (n >= KSTAT_STRLEN) {
		zfs_dbgmsg("failed to create dataset kstat for objset %lld: "
		    "kstat module name length (%d) exceeds limit (%d)",
		    (unsigned long long)dmu_objset_id(objset),
		    n, KSTAT_STRLEN);
		return;
	}

	char kstat_name[KSTAT_STRLEN];
	n = snprintf(kstat_name, sizeof (kstat_name), "objset-0x%llx",
	    (unsigned long long)dmu_objset_id(objset));
	if (n < 0) {
		zfs_dbgmsg("failed to create dataset kstat for objset %lld: "
		    " snprintf() for kstat name returned %d",
		    (unsigned long long)dmu_objset_id(objset), n);
		return;
	}
	ASSERT3U(n, <, KSTAT_STRLEN);

	kstat_t *kstat = kstat_create(kstat_module_name, 0, kstat_name,
	    "dataset", KSTAT_TYPE_NAMED,
	    sizeof (empty_dataset_kstats) / sizeof (kstat_named_t),
	    KSTAT_FLAG_VIRTUAL);
	if (kstat == NULL)
		return;

	dataset_kstat_values_t *dk_kstats =
	    kmem_alloc(sizeof (empty_dataset_kstats), KM_SLEEP);
	bcopy(&empty_dataset_kstats, dk_kstats,
	    sizeof (empty_dataset_kstats));

	char *ds_name = kmem_zalloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
	dsl_dataset_name(objset->os_dsl_dataset, ds_name);
	KSTAT_NAMED_STR_PTR(&dk_kstats->dkv_ds_name) = ds_name;
	KSTAT_NAMED_STR_BUFLEN(&dk_kstats->dkv_ds_name) =
	    ZFS_MAX_DATASET_NAME_LEN;

	kstat->ks_data = dk_kstats;
	kstat->ks_update = dataset_kstats_update;
	kstat->ks_private = dk;

	kstat_install(kstat);
	dk->dk_kstats = kstat;

	aggsum_init(&dk->dk_aggsums.das_writes, 0);
	aggsum_init(&dk->dk_aggsums.das_nwritten, 0);
	aggsum_init(&dk->dk_aggsums.das_reads, 0);
	aggsum_init(&dk->dk_aggsums.das_nread, 0);
	aggsum_init(&dk->dk_aggsums.das_nunlinks, 0);
	aggsum_init(&dk->dk_aggsums.das_nunlinked, 0);
}
Пример #14
0
/*
 * Generate the pool's configuration based on the current in-core state.
 * We infer whether to generate a complete config or just one top-level config
 * based on whether vd is the root vdev.
 */
nvlist_t *
spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
{
    nvlist_t *config, *nvroot;
    vdev_t *rvd = spa->spa_root_vdev;
    unsigned long hostid = 0;

    ASSERT(spa_config_held(spa, RW_READER) ||
           spa_config_held(spa, RW_WRITER));

    if (vd == NULL)
        vd = rvd;

    /*
     * If txg is -1, report the current value of spa->spa_config_txg.
     */
    if (txg == -1ULL)
        txg = spa->spa_config_txg;

    VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0);

    VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
                             spa_version(spa)) == 0);
    VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
                             spa_name(spa)) == 0);
    VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
                             spa_state(spa)) == 0);
    VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
                             txg) == 0);
    VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
                             spa_guid(spa)) == 0);
    (void) ddi_strtoul(hw_serial, NULL, 10, &hostid);
    if (hostid != 0) {
        VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
                                 hostid) == 0);
    }
    VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
                             utsname.nodename) == 0);

    if (vd != rvd) {
        VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID,
                                 vd->vdev_top->vdev_guid) == 0);
        VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID,
                                 vd->vdev_guid) == 0);
        if (vd->vdev_isspare)
            VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE,
                                     1ULL) == 0);
        if (vd->vdev_islog)
            VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG,
                                     1ULL) == 0);
        vd = vd->vdev_top;		/* label contains top config */
    }

    nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE, B_FALSE);
    VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
    nvlist_free(nvroot);

#ifdef DEBUG
    if ( config ) {
        printf("spa_config_generate::config=0x%x\n", config);
    }
#endif

    return (config);
}
Пример #15
0
int
__dbuf_stats_hash_table_data(char *buf, size_t size, dmu_buf_impl_t *db)
{
	arc_buf_info_t abi = { 0 };
	dmu_object_info_t doi = { 0 };
	dnode_t *dn = DB_DNODE(db);
	size_t nwritten;

	if (db->db_buf)
		arc_buf_info(db->db_buf, &abi, zfs_dbuf_state_index);

	if (dn)
		__dmu_object_info_from_dnode(dn, &doi);

	nwritten = snprintf(buf, size,
	    "%-16s %-8llu %-8lld %-8lld %-8lld %-8llu %-8llu %-5d %-5d %-5lu | "
	    "%-5d %-5d 0x%-6x %-6lu %-8llu %-12llu "
	    "%-6lu %-6lu %-6lu %-6lu %-6lu %-8llu %-8llu %-8d %-5lu | "
	    "%-6d %-6d %-8lu %-8lu %-6llu %-6lu %-5lu %-8llu %-8llu\n",
	    /* dmu_buf_impl_t */
	    spa_name(dn->dn_objset->os_spa),
	    (u_longlong_t)dmu_objset_id(db->db_objset),
	    (longlong_t)db->db.db_object,
	    (longlong_t)db->db_level,
	    (longlong_t)db->db_blkid,
	    (u_longlong_t)db->db.db_offset,
	    (u_longlong_t)db->db.db_size,
	    !!dbuf_is_metadata(db),
	    db->db_state,
	    (ulong_t)refcount_count(&db->db_holds),
	    /* arc_buf_info_t */
	    abi.abi_state_type,
	    abi.abi_state_contents,
	    abi.abi_flags,
	    (ulong_t)abi.abi_bufcnt,
	    (u_longlong_t)abi.abi_size,
	    (u_longlong_t)abi.abi_access,
	    (ulong_t)abi.abi_mru_hits,
	    (ulong_t)abi.abi_mru_ghost_hits,
	    (ulong_t)abi.abi_mfu_hits,
	    (ulong_t)abi.abi_mfu_ghost_hits,
	    (ulong_t)abi.abi_l2arc_hits,
	    (u_longlong_t)abi.abi_l2arc_dattr,
	    (u_longlong_t)abi.abi_l2arc_asize,
	    abi.abi_l2arc_compress,
	    (ulong_t)abi.abi_holds,
	    /* dmu_object_info_t */
	    doi.doi_type,
	    doi.doi_bonus_type,
	    (ulong_t)doi.doi_data_block_size,
	    (ulong_t)doi.doi_metadata_block_size,
	    (u_longlong_t)doi.doi_bonus_size,
	    (ulong_t)doi.doi_indirection,
	    (ulong_t)refcount_count(&dn->dn_holds),
	    (u_longlong_t)doi.doi_fill_count,
	    (u_longlong_t)doi.doi_max_offset);

	if (nwritten >= size)
		return (size);

	return (nwritten + 1);
}
Пример #16
0
static void
zhack_do_feature_ref(int argc, char **argv)
{
	char c;
	char *target;
	boolean_t decr = B_FALSE;
	spa_t *spa;
	objset_t *mos;
	zfeature_info_t feature;
	zfeature_info_t *nodeps[] = { NULL };

	/*
	 * fi_desc does not matter here because it was written to disk
	 * when the feature was enabled, but we need to properly set the
	 * feature for read or write based on the information we read off
	 * disk later.
	 */
	feature.fi_uname = "zhack";
	feature.fi_mos = B_FALSE;
	feature.fi_desc = NULL;
	feature.fi_depends = nodeps;

	optind = 1;
	while ((c = getopt(argc, argv, "md")) != -1) {
		switch (c) {
		case 'm':
			feature.fi_mos = B_TRUE;
			break;
		case 'd':
			decr = B_TRUE;
			break;
		default:
			usage();
			break;
		}
	}
	argc -= optind;
	argv += optind;

	if (argc < 2) {
		(void) fprintf(stderr, "error: missing feature or pool name\n");
		usage();
	}
	target = argv[0];
	feature.fi_guid = argv[1];

	if (!zfeature_is_valid_guid(feature.fi_guid))
		fatal("invalid feature guid: %s", feature.fi_guid);

	zhack_spa_open(target, B_FALSE, FTAG, &spa);
	mos = spa->spa_meta_objset;

	if (0 == zfeature_lookup_guid(feature.fi_guid, NULL))
		fatal("'%s' is a real feature, will not change refcount");

	if (0 == zap_contains(mos, spa->spa_feat_for_read_obj,
	    feature.fi_guid)) {
		feature.fi_can_readonly = B_FALSE;
	} else if (0 == zap_contains(mos, spa->spa_feat_for_write_obj,
	    feature.fi_guid)) {
		feature.fi_can_readonly = B_TRUE;
	} else {
		fatal("feature is not enabled: %s", feature.fi_guid);
	}

	if (decr && !spa_feature_is_active(spa, &feature))
		fatal("feature refcount already 0: %s", feature.fi_guid);

	VERIFY0(dsl_sync_task(spa_name(spa), NULL,
	    decr ? feature_decr_sync : feature_incr_sync, &feature, 5));

	spa_close(spa, FTAG);
}
Пример #17
0
/*
 * Generate the pool's configuration based on the current in-core state.
 *
 * We infer whether to generate a complete config or just one top-level config
 * based on whether vd is the root vdev.
 */
nvlist_t *
spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
{
	nvlist_t *config, *nvroot;
	vdev_t *rvd = spa->spa_root_vdev;
	unsigned long hostid = 0;
	boolean_t locked = B_FALSE;
	uint64_t split_guid;
	char *pool_name;

	if (vd == NULL) {
		vd = rvd;
		locked = B_TRUE;
		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
	}

	ASSERT(spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_READER) ==
	    (SCL_CONFIG | SCL_STATE));

	/*
	 * If txg is -1, report the current value of spa->spa_config_txg.
	 */
	if (txg == -1ULL)
		txg = spa->spa_config_txg;

	/*
	 * Originally, users had to handle spa namespace collisions by either
	 * exporting the already imported pool or by specifying a new name for
	 * the pool with a conflicting name. In the case of root pools from
	 * virtual guests, neither approach to collision resolution is
	 * reasonable. This is addressed by extending the new name syntax with
	 * an option to specify that the new name is temporary. When specified,
	 * ZFS_IMPORT_TEMP_NAME will be set in spa->spa_import_flags to tell us
	 * to use the previous name, which we do below.
	 */
	if (spa->spa_import_flags & ZFS_IMPORT_TEMP_NAME) {
		VERIFY0(nvlist_lookup_string(spa->spa_config,
			ZPOOL_CONFIG_POOL_NAME, &pool_name));
	} else
		pool_name = spa_name(spa);

	VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0);

	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
	    spa_version(spa)) == 0);
	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
	    pool_name) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
	    spa_state(spa)) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
	    txg) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
	    spa_guid(spa)) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA,
	    spa->spa_errata) == 0);
	VERIFY(spa->spa_comment == NULL || nvlist_add_string(config,
	    ZPOOL_CONFIG_COMMENT, spa->spa_comment) == 0);


#ifdef	_KERNEL
	hostid = zone_get_hostid(NULL);
#else	/* _KERNEL */
	/*
	 * We're emulating the system's hostid in userland, so we can't use
	 * zone_get_hostid().
	 */
	(void) ddi_strtoul(hw_serial, NULL, 10, &hostid);
#endif	/* _KERNEL */
	if (hostid != 0) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
		    hostid) == 0);
	}
	VERIFY0(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
	    utsname()->nodename));

	if (vd != rvd) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID,
		    vd->vdev_top->vdev_guid) == 0);
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID,
		    vd->vdev_guid) == 0);
		if (vd->vdev_isspare)
			VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE,
			    1ULL) == 0);
		if (vd->vdev_islog)
			VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG,
			    1ULL) == 0);
		vd = vd->vdev_top;		/* label contains top config */
	} else {
		/*
		 * Only add the (potentially large) split information
		 * in the mos config, and not in the vdev labels
		 */
		if (spa->spa_config_splitting != NULL)
			VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT,
			    spa->spa_config_splitting) == 0);
	}

	/*
	 * Add the top-level config.  We even add this on pools which
	 * don't support holes in the namespace.
	 */
	vdev_top_config_generate(spa, config);

	/*
	 * If we're splitting, record the original pool's guid.
	 */
	if (spa->spa_config_splitting != NULL &&
	    nvlist_lookup_uint64(spa->spa_config_splitting,
	    ZPOOL_CONFIG_SPLIT_GUID, &split_guid) == 0) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID,
		    split_guid) == 0);
	}

	nvroot = vdev_config_generate(spa, vd, getstats, 0);
	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
	nvlist_free(nvroot);

	/*
	 * Store what's necessary for reading the MOS in the label.
	 */
	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
	    spa->spa_label_features) == 0);

	if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) {
		ddt_histogram_t *ddh;
		ddt_stat_t *dds;
		ddt_object_t *ddo;

		ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP);
		ddt_get_dedup_histogram(spa, ddh);
		VERIFY(nvlist_add_uint64_array(config,
		    ZPOOL_CONFIG_DDT_HISTOGRAM,
		    (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)) == 0);
		kmem_free(ddh, sizeof (ddt_histogram_t));

		ddo = kmem_zalloc(sizeof (ddt_object_t), KM_SLEEP);
		ddt_get_dedup_object_stats(spa, ddo);
		VERIFY(nvlist_add_uint64_array(config,
		    ZPOOL_CONFIG_DDT_OBJ_STATS,
		    (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)) == 0);
		kmem_free(ddo, sizeof (ddt_object_t));

		dds = kmem_zalloc(sizeof (ddt_stat_t), KM_SLEEP);
		ddt_get_dedup_stats(spa, dds);
		VERIFY(nvlist_add_uint64_array(config,
		    ZPOOL_CONFIG_DDT_STATS,
		    (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t)) == 0);
		kmem_free(dds, sizeof (ddt_stat_t));
	}

	if (locked)
		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);

	return (config);
}
Пример #18
0
/*
 * The full semantics of this function are described in the comment above
 * lzc_release().
 *
 * To summarize:
 * Releases holds specified in the nvl holds.
 *
 * holds is nvl of snapname -> { holdname, ... }
 * errlist will be filled in with snapname -> error
 *
 * If tmpdp is not NULL the names for holds should be the dsobj's of snapshots,
 * otherwise they should be the names of shapshots.
 *
 * As a release may cause snapshots to be destroyed this trys to ensure they
 * aren't mounted.
 *
 * The release of non-existent holds are skipped.
 *
 * At least one hold must have been released for the this function to succeed
 * and return 0.
 */
static int
dsl_dataset_user_release_impl(nvlist_t *holds, nvlist_t *errlist,
    dsl_pool_t *tmpdp)
{
	dsl_dataset_user_release_arg_t ddura;
	nvpair_t *pair;
	char *pool;
	int error;

	pair = nvlist_next_nvpair(holds, NULL);
	if (pair == NULL)
		return (0);

	/*
	 * The release may cause snapshots to be destroyed; make sure they
	 * are not mounted.
	 */
	if (tmpdp != NULL) {
		/* Temporary holds are specified by dsobj string. */
		ddura.ddura_holdfunc = dsl_dataset_hold_obj_string;
		pool = spa_name(tmpdp->dp_spa);
#ifdef _KERNEL
		for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
		    pair = nvlist_next_nvpair(holds, pair)) {
			dsl_dataset_t *ds;

			dsl_pool_config_enter(tmpdp, FTAG);
			error = dsl_dataset_hold_obj_string(tmpdp,
			    nvpair_name(pair), FTAG, &ds);
			if (error == 0) {
				char name[MAXNAMELEN];
				dsl_dataset_name(ds, name);
				dsl_pool_config_exit(tmpdp, FTAG);
				dsl_dataset_rele(ds, FTAG);
				(void) zfs_unmount_snap(name);
			} else {
				dsl_pool_config_exit(tmpdp, FTAG);
			}
		}
#endif
	} else {
		/* Non-temporary holds are specified by name. */
		ddura.ddura_holdfunc = dsl_dataset_hold;
		pool = nvpair_name(pair);
#ifdef _KERNEL
		for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
		    pair = nvlist_next_nvpair(holds, pair)) {
			(void) zfs_unmount_snap(nvpair_name(pair));
		}
#endif
	}

	ddura.ddura_holds = holds;
	ddura.ddura_errlist = errlist;
	ddura.ddura_todelete = fnvlist_alloc();
	ddura.ddura_chkholds = fnvlist_alloc();

	error = dsl_sync_task(pool, dsl_dataset_user_release_check,
	    dsl_dataset_user_release_sync, &ddura, 0);
	fnvlist_free(ddura.ddura_todelete);
	fnvlist_free(ddura.ddura_chkholds);

	return (error);
}
Пример #19
0
/*
 * Generate the pool's configuration based on the current in-core state.
 *
 * We infer whether to generate a complete config or just one top-level config
 * based on whether vd is the root vdev.
 */
nvlist_t *
spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
{
	nvlist_t *config, *nvroot;
	vdev_t *rvd = spa->spa_root_vdev;
	unsigned long hostid = 0;
	boolean_t locked = B_FALSE;
	uint64_t split_guid;

	if (vd == NULL) {
		vd = rvd;
		locked = B_TRUE;
		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
	}

	ASSERT(spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_READER) ==
	    (SCL_CONFIG | SCL_STATE));

	/*
	 * If txg is -1, report the current value of spa->spa_config_txg.
	 */
	if (txg == -1ULL)
		txg = spa->spa_config_txg;

	VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0);

	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
	    spa_version(spa)) == 0);
	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
	    spa_name(spa)) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
	    spa_state(spa)) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
	    txg) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
	    spa_guid(spa)) == 0);
	VERIFY(spa->spa_comment == NULL || nvlist_add_string(config,
	    ZPOOL_CONFIG_COMMENT, spa->spa_comment) == 0);


#ifdef	_KERNEL
	hostid = zone_get_hostid(NULL);
#else	/* _KERNEL */
	/*
	 * We're emulating the system's hostid in userland, so we can't use
	 * zone_get_hostid().
	 */
	(void) ddi_strtoul(hw_serial, NULL, 10, &hostid);
#endif	/* _KERNEL */
	if (hostid != 0) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
		    hostid) == 0);
	}
	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
	    utsname.nodename) == 0);

	if (vd != rvd) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID,
		    vd->vdev_top->vdev_guid) == 0);
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID,
		    vd->vdev_guid) == 0);
		if (vd->vdev_isspare)
			VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE,
			    1ULL) == 0);
		if (vd->vdev_islog)
			VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG,
			    1ULL) == 0);
		vd = vd->vdev_top;		/* label contains top config */
	} else {
		/*
		 * Only add the (potentially large) split information
		 * in the mos config, and not in the vdev labels
		 */
		if (spa->spa_config_splitting != NULL)
			VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT,
			    spa->spa_config_splitting) == 0);
	}

	/*
	 * Add the top-level config.  We even add this on pools which
	 * don't support holes in the namespace.
	 */
	vdev_top_config_generate(spa, config);

	/*
	 * If we're splitting, record the original pool's guid.
	 */
	if (spa->spa_config_splitting != NULL &&
	    nvlist_lookup_uint64(spa->spa_config_splitting,
	    ZPOOL_CONFIG_SPLIT_GUID, &split_guid) == 0) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID,
		    split_guid) == 0);
	}

	nvroot = vdev_config_generate(spa, vd, getstats, 0);
	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
	nvlist_free(nvroot);

	/*
	 * Store what's necessary for reading the MOS in the label.
	 */
	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
	    spa->spa_label_features) == 0);

	if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) {
		ddt_histogram_t *ddh;
		ddt_stat_t *dds;
		ddt_object_t *ddo;

		ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP);
		ddt_get_dedup_histogram(spa, ddh);
		VERIFY(nvlist_add_uint64_array(config,
		    ZPOOL_CONFIG_DDT_HISTOGRAM,
		    (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)) == 0);
		kmem_free(ddh, sizeof (ddt_histogram_t));

		ddo = kmem_zalloc(sizeof (ddt_object_t), KM_SLEEP);
		ddt_get_dedup_object_stats(spa, ddo);
		VERIFY(nvlist_add_uint64_array(config,
		    ZPOOL_CONFIG_DDT_OBJ_STATS,
		    (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)) == 0);
		kmem_free(ddo, sizeof (ddt_object_t));

		dds = kmem_zalloc(sizeof (ddt_stat_t), KM_SLEEP);
		ddt_get_dedup_stats(spa, dds);
		VERIFY(nvlist_add_uint64_array(config,
		    ZPOOL_CONFIG_DDT_STATS,
		    (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t)) == 0);
		kmem_free(dds, sizeof (ddt_stat_t));
	}

	if (locked)
		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);

	return (config);
}
Пример #20
0
int
dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
    const char *tail, void *tag, dsl_dir_t **ddp)
{
	dmu_buf_t *dbuf;
	dsl_dir_t *dd;
	int err;

	ASSERT(dsl_pool_config_held(dp));

	err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
	if (err != 0)
		return (err);
	dd = dmu_buf_get_user(dbuf);
#ifdef ZFS_DEBUG
	{
		dmu_object_info_t doi;
		dmu_object_info_from_db(dbuf, &doi);
		ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_DSL_DIR);
		ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
	}
#endif
	if (dd == NULL) {
		dsl_dir_t *winner;

		dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP);
		dd->dd_object = ddobj;
		dd->dd_dbuf = dbuf;
		dd->dd_pool = dp;
		dd->dd_phys = dbuf->db_data;
		mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);

		list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t),
		    offsetof(dsl_prop_cb_record_t, cbr_node));

		dsl_dir_snap_cmtime_update(dd);

		if (dd->dd_phys->dd_parent_obj) {
			err = dsl_dir_hold_obj(dp, dd->dd_phys->dd_parent_obj,
			    NULL, dd, &dd->dd_parent);
			if (err != 0)
				goto errout;
			if (tail) {
#ifdef ZFS_DEBUG
				uint64_t foundobj;

				err = zap_lookup(dp->dp_meta_objset,
				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
				    tail, sizeof (foundobj), 1, &foundobj);
				ASSERT(err || foundobj == ddobj);
#endif
				(void) strcpy(dd->dd_myname, tail);
			} else {
				err = zap_value_search(dp->dp_meta_objset,
				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
				    ddobj, 0, dd->dd_myname);
			}
			if (err != 0)
				goto errout;
		} else {
			(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
		}

		if (dsl_dir_is_clone(dd)) {
			dmu_buf_t *origin_bonus;
			dsl_dataset_phys_t *origin_phys;

			/*
			 * We can't open the origin dataset, because
			 * that would require opening this dsl_dir.
			 * Just look at its phys directly instead.
			 */
			err = dmu_bonus_hold(dp->dp_meta_objset,
			    dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus);
			if (err != 0)
				goto errout;
			origin_phys = origin_bonus->db_data;
			dd->dd_origin_txg =
			    origin_phys->ds_creation_txg;
			dmu_buf_rele(origin_bonus, FTAG);
		}

		winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys,
		    dsl_dir_evict);
		if (winner) {
			if (dd->dd_parent)
				dsl_dir_rele(dd->dd_parent, dd);
			mutex_destroy(&dd->dd_lock);
			kmem_free(dd, sizeof (dsl_dir_t));
			dd = winner;
		} else {
			spa_open_ref(dp->dp_spa, dd);
		}
	}

	/*
	 * The dsl_dir_t has both open-to-close and instantiate-to-evict
	 * holds on the spa.  We need the open-to-close holds because
	 * otherwise the spa_refcnt wouldn't change when we open a
	 * dir which the spa also has open, so we could incorrectly
	 * think it was OK to unload/export/destroy the pool.  We need
	 * the instantiate-to-evict hold because the dsl_dir_t has a
	 * pointer to the dd_pool, which has a pointer to the spa_t.
	 */
	spa_open_ref(dp->dp_spa, tag);
	ASSERT3P(dd->dd_pool, ==, dp);
	ASSERT3U(dd->dd_object, ==, ddobj);
	ASSERT3P(dd->dd_dbuf, ==, dbuf);
	*ddp = dd;
	return (0);

errout:
	if (dd->dd_parent)
		dsl_dir_rele(dd->dd_parent, dd);
	mutex_destroy(&dd->dd_lock);
	kmem_free(dd, sizeof (dsl_dir_t));
	dmu_buf_rele(dbuf, tag);
	return (err);
}
Пример #21
0
/*ARGSUSED*/
static void
spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
	spa_t		*spa = arg1;
	history_arg_t	*hap = arg2;
	const char	*history_str = hap->ha_history_str;
	objset_t	*mos = spa->spa_meta_objset;
	dmu_buf_t	*dbp;
	spa_history_phys_t *shpp;
	size_t		reclen;
	uint64_t	le_len;
	nvlist_t	*nvrecord;
	char		*record_packed = NULL;
	int		ret;

	/*
	 * If we have an older pool that doesn't have a command
	 * history object, create it now.
	 */
	mutex_enter(&spa->spa_history_lock);
	if (!spa->spa_history)
		spa_history_create_obj(spa, tx);
	mutex_exit(&spa->spa_history_lock);

	/*
	 * Get the offset of where we need to write via the bonus buffer.
	 * Update the offset when the write completes.
	 */
	VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
	shpp = dbp->db_data;

	dmu_buf_will_dirty(dbp, tx);

#ifdef ZFS_DEBUG
	{
		dmu_object_info_t doi;
		dmu_object_info_from_db(dbp, &doi);
		ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS);
	}
#endif

	VERIFY(nvlist_alloc(&nvrecord, NV_UNIQUE_NAME, KM_SLEEP) == 0);
	VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TIME,
	    gethrestime_sec()) == 0);
	VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_WHO, hap->ha_uid) == 0);
	if (hap->ha_zone != NULL)
		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_ZONE,
		    hap->ha_zone) == 0);
#ifdef _KERNEL
	VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_HOST,
	    utsname.nodename) == 0);
#endif
	if (hap->ha_log_type == LOG_CMD_POOL_CREATE ||
	    hap->ha_log_type == LOG_CMD_NORMAL) {
		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_CMD,
		    history_str) == 0);

		zfs_dbgmsg("command: %s", history_str);
	} else {
		VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_INT_EVENT,
		    hap->ha_event) == 0);
		VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TXG,
		    tx->tx_txg) == 0);
		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_INT_STR,
		    history_str) == 0);

		zfs_dbgmsg("internal %s pool:%s txg:%llu %s",
		    zfs_history_event_names[hap->ha_event], spa_name(spa),
		    (longlong_t)tx->tx_txg, history_str);

	}

	VERIFY(nvlist_size(nvrecord, &reclen, NV_ENCODE_XDR) == 0);
	record_packed = kmem_alloc(reclen, KM_SLEEP);

	VERIFY(nvlist_pack(nvrecord, &record_packed, &reclen,
	    NV_ENCODE_XDR, KM_SLEEP) == 0);

	mutex_enter(&spa->spa_history_lock);
	if (hap->ha_log_type == LOG_CMD_POOL_CREATE)
		VERIFY(shpp->sh_eof == shpp->sh_pool_create_len);

	/* write out the packed length as little endian */
	le_len = LE_64((uint64_t)reclen);
	ret = spa_history_write(spa, &le_len, sizeof (le_len), shpp, tx);
	if (!ret)
		ret = spa_history_write(spa, record_packed, reclen, shpp, tx);

	if (!ret && hap->ha_log_type == LOG_CMD_POOL_CREATE) {
		shpp->sh_pool_create_len += sizeof (le_len) + reclen;
		shpp->sh_bof = shpp->sh_pool_create_len;
	}

	mutex_exit(&spa->spa_history_lock);
	nvlist_free(nvrecord);
	kmem_free(record_packed, reclen);
	dmu_buf_rele(dbp, FTAG);

	strfree(hap->ha_history_str);
	if (hap->ha_zone != NULL)
		strfree(hap->ha_zone);
	kmem_free(hap, sizeof (history_arg_t));
}
Пример #22
0
/*
 * Generate the pool's configuration based on the current in-core state.
 * We infer whether to generate a complete config or just one top-level config
 * based on whether vd is the root vdev.
 */
nvlist_t *
spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
{
	nvlist_t *config, *nvroot;
	vdev_t *rvd = spa->spa_root_vdev;
	unsigned long hostid = 0;
	boolean_t locked = B_FALSE;

	if (vd == NULL) {
		vd = rvd;
		locked = B_TRUE;
		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
	}

	ASSERT(spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_READER) ==
	    (SCL_CONFIG | SCL_STATE));

	/*
	 * If txg is -1, report the current value of spa->spa_config_txg.
	 */
	if (txg == -1ULL)
		txg = spa->spa_config_txg;

	VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0);

	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
	    spa_version(spa)) == 0);
	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
	    spa_name(spa)) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
	    spa_state(spa)) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
	    txg) == 0);
	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
	    spa_guid(spa)) == 0);
#ifdef	_KERNEL
	hostid = zone_get_hostid(NULL);
#else	/* _KERNEL */
	/*
	 * We're emulating the system's hostid in userland, so we can't use
	 * zone_get_hostid().
	 */
	(void) ddi_strtoul(hw_serial, NULL, 10, &hostid);
#endif	/* _KERNEL */
	if (hostid != 0) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
		    hostid) == 0);
	}
	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
	    utsname.nodename) == 0);

	if (vd != rvd) {
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID,
		    vd->vdev_top->vdev_guid) == 0);
		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID,
		    vd->vdev_guid) == 0);
		if (vd->vdev_isspare)
			VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE,
			    1ULL) == 0);
		if (vd->vdev_islog)
			VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG,
			    1ULL) == 0);
		vd = vd->vdev_top;		/* label contains top config */
	}

	nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE, B_FALSE);
	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
	nvlist_free(nvroot);

	if (locked)
		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);

	return (config);
}
Пример #23
0
/*
 * Return the dsl_dir_t, and possibly the last component which couldn't
 * be found in *tail.  The name must be in the specified dsl_pool_t.  This
 * thread must hold the dp_config_rwlock for the pool.  Returns NULL if the
 * path is bogus, or if tail==NULL and we couldn't parse the whole name.
 * (*tail)[0] == '@' means that the last component is a snapshot.
 */
int
dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
    dsl_dir_t **ddp, const char **tailp)
{
	char buf[MAXNAMELEN];
	const char *spaname, *next, *nextnext = NULL;
	int err;
	dsl_dir_t *dd;
	uint64_t ddobj;

	err = getcomponent(name, buf, &next);
	if (err != 0)
		return (err);

	/* Make sure the name is in the specified pool. */
	spaname = spa_name(dp->dp_spa);
	if (strcmp(buf, spaname) != 0)
		return (SET_ERROR(EINVAL));

	ASSERT(dsl_pool_config_held(dp));

	err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
	if (err != 0) {
		return (err);
	}

	while (next != NULL) {
		dsl_dir_t *child_ds;
		err = getcomponent(next, buf, &nextnext);
		if (err != 0)
			break;
		ASSERT(next[0] != '\0');
		if (next[0] == '@')
			break;
		dprintf("looking up %s in obj%lld\n",
		    buf, dd->dd_phys->dd_child_dir_zapobj);

		err = zap_lookup(dp->dp_meta_objset,
		    dd->dd_phys->dd_child_dir_zapobj,
		    buf, sizeof (ddobj), 1, &ddobj);
		if (err != 0) {
			if (err == ENOENT)
				err = 0;
			break;
		}

		err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_ds);
		if (err != 0)
			break;
		dsl_dir_rele(dd, tag);
		dd = child_ds;
		next = nextnext;
	}

	if (err != 0) {
		dsl_dir_rele(dd, tag);
		return (err);
	}

	/*
	 * It's an error if there's more than one component left, or
	 * tailp==NULL and there's any component left.
	 */
	if (next != NULL &&
	    (tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
		/* bad path name */
		dsl_dir_rele(dd, tag);
		dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
		err = SET_ERROR(ENOENT);
	}
	if (tailp != NULL)
		*tailp = next;
	*ddp = dd;
	return (err);
}
Пример #24
0
static void
zhack_do_feature_enable(int argc, char **argv)
{
	char c;
	char *desc, *target;
	spa_t *spa;
	objset_t *mos;
	zfeature_info_t feature;
	spa_feature_t nodeps[] = { SPA_FEATURE_NONE };

	/*
	 * Features are not added to the pool's label until their refcounts
	 * are incremented, so fi_mos can just be left as false for now.
	 */
	desc = NULL;
	feature.fi_uname = "zhack";
	feature.fi_flags = 0;
	feature.fi_depends = nodeps;
	feature.fi_feature = SPA_FEATURE_NONE;

	optind = 1;
	while ((c = getopt(argc, argv, "rmd:")) != -1) {
		switch (c) {
		case 'r':
			feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT;
			break;
		case 'd':
			desc = strdup(optarg);
			break;
		default:
			usage();
			break;
		}
	}

	if (desc == NULL)
		desc = strdup("zhack injected");
	feature.fi_desc = desc;

	argc -= optind;
	argv += optind;

	if (argc < 2) {
		(void) fprintf(stderr, "error: missing feature or pool name\n");
		usage();
	}
	target = argv[0];
	feature.fi_guid = argv[1];

	if (!zfeature_is_valid_guid(feature.fi_guid))
		fatal(NULL, FTAG, "invalid feature guid: %s", feature.fi_guid);

	zhack_spa_open(target, B_FALSE, FTAG, &spa);
	mos = spa->spa_meta_objset;

	if (zfeature_is_supported(feature.fi_guid))
		fatal(spa, FTAG, "'%s' is a real feature, will not enable");
	if (0 == zap_contains(mos, spa->spa_feat_desc_obj, feature.fi_guid))
		fatal(spa, FTAG, "feature already enabled: %s",
		    feature.fi_guid);

	VERIFY0(dsl_sync_task(spa_name(spa), NULL,
	    zhack_feature_enable_sync, &feature, 5, ZFS_SPACE_CHECK_NORMAL));

	spa_close(spa, FTAG);

	free(desc);
}
Пример #25
0
static int
vdev_file_io_start(zio_t *zio)
{
	vdev_t *vd = zio->io_vd;
	vdev_file_t *vf = vd->vdev_tsd;
#ifdef LINUX_AIO
	struct iocb *iocbp = &zio->io_aio;
#endif

	ssize_t resid;
	int error;

	if (zio->io_type == ZIO_TYPE_IOCTL) {
		zio_vdev_io_bypass(zio);

		/* XXPOLICY */
		if (!vdev_readable(vd)) {
			zio->io_error = ENXIO;
			return (ZIO_PIPELINE_CONTINUE);
		}

		switch (zio->io_cmd) {
		case DKIOCFLUSHWRITECACHE:
			if (zfs_nocacheflush)
				break;

			/* This doesn't actually do much with O_DIRECT... */
			zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC,
			    kcred, NULL);
			dprintf("fsync(%s) = %d\n", vdev_description(vd),
			    zio->io_error);

			if (vd->vdev_nowritecache) {
				zio->io_error = ENOTSUP;
				break;
			}

			/* Flush the write cache */
			error = flushwc(vf->vf_vnode);
			dprintf("flushwc(%s) = %d\n", vdev_description(vd),
			    error);

			if (error) {
#ifdef _KERNEL
				cmn_err(CE_WARN, "Failed to flush write cache "
				    "on device '%s'. Data on pool '%s' may be lost "
				    "if power fails. No further warnings will "
				    "be given.", vdev_description(vd),
				    spa_name(vd->vdev_spa));
#endif

				vd->vdev_nowritecache = B_TRUE;
				zio->io_error = error;
			}

			break;
		default:
			zio->io_error = ENOTSUP;
		}

		return (ZIO_PIPELINE_CONTINUE);
	}

	/*
	 * In the kernel, don't bother double-caching, but in userland,
	 * we want to test the vdev_cache code.
	 */
	if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0)
		return (ZIO_PIPELINE_STOP);

	if ((zio = vdev_queue_io(zio)) == NULL)
		return (ZIO_PIPELINE_STOP);

	/* XXPOLICY */
	if (zio->io_type == ZIO_TYPE_WRITE)
		error = vdev_writeable(vd) ? vdev_error_inject(vd, zio) : ENXIO;
	else
		error = vdev_readable(vd) ? vdev_error_inject(vd, zio) : ENXIO;
	error = (vd->vdev_remove_wanted || vd->vdev_is_failing) ? ENXIO : error;
	if (error) {
		zio->io_error = error;
		zio_interrupt(zio);
		return (ZIO_PIPELINE_STOP);
	}

#ifdef LINUX_AIO
	if (zio->io_aio_ctx && zio->io_aio_ctx->zac_enabled) {
		if (zio->io_type == ZIO_TYPE_READ)
			io_prep_pread(&zio->io_aio, vf->vf_vnode->v_fd,
			    zio->io_data, zio->io_size, zio->io_offset);
		else
			io_prep_pwrite(&zio->io_aio, vf->vf_vnode->v_fd,
			    zio->io_data, zio->io_size, zio->io_offset);

		zio->io_aio.data = zio;

		do {
			error = io_submit(zio->io_aio_ctx->zac_ctx, 1, &iocbp);
		} while (error == -EINTR);

		if (error < 0) {
			zio->io_error = -error;
			zio_interrupt(zio);
		} else
			VERIFY(error == 1);

		return (ZIO_PIPELINE_STOP);
	}
#endif

	zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ?
	    UIO_READ : UIO_WRITE, vf->vf_vnode, zio->io_data,
	    zio->io_size, zio->io_offset, UIO_SYSSPACE,
	    0, RLIM64_INFINITY, kcred, &resid);

	if (resid != 0 && zio->io_error == 0)
		zio->io_error = ENOSPC;

	zio_interrupt(zio);

	return (ZIO_PIPELINE_STOP);
}
Пример #26
0
static void
zhack_do_feature_ref(int argc, char **argv)
{
	char c;
	char *target;
	boolean_t decr = B_FALSE;
	spa_t *spa;
	objset_t *mos;
	zfeature_info_t feature;
	spa_feature_t nodeps[] = { SPA_FEATURE_NONE };

	/*
	 * fi_desc does not matter here because it was written to disk
	 * when the feature was enabled, but we need to properly set the
	 * feature for read or write based on the information we read off
	 * disk later.
	 */
	feature.fi_uname = "zhack";
	feature.fi_flags = 0;
	feature.fi_desc = NULL;
	feature.fi_depends = nodeps;
	feature.fi_feature = SPA_FEATURE_NONE;

	optind = 1;
	while ((c = getopt(argc, argv, "md")) != -1) {
		switch (c) {
		case 'm':
			feature.fi_flags |= ZFEATURE_FLAG_MOS;
			break;
		case 'd':
			decr = B_TRUE;
			break;
		default:
			usage();
			break;
		}
	}
	argc -= optind;
	argv += optind;

	if (argc < 2) {
		(void) fprintf(stderr, "error: missing feature or pool name\n");
		usage();
	}
	target = argv[0];
	feature.fi_guid = argv[1];

	if (!zfeature_is_valid_guid(feature.fi_guid))
		fatal(NULL, FTAG, "invalid feature guid: %s", feature.fi_guid);

	zhack_spa_open(target, B_FALSE, FTAG, &spa);
	mos = spa->spa_meta_objset;

	if (zfeature_is_supported(feature.fi_guid)) {
		fatal(spa, FTAG,
		    "'%s' is a real feature, will not change refcount");
	}

	if (0 == zap_contains(mos, spa->spa_feat_for_read_obj,
	    feature.fi_guid)) {
		feature.fi_flags &= ~ZFEATURE_FLAG_READONLY_COMPAT;
	} else if (0 == zap_contains(mos, spa->spa_feat_for_write_obj,
	    feature.fi_guid)) {
		feature.fi_flags |= ZFEATURE_FLAG_READONLY_COMPAT;
	} else {
		fatal(spa, FTAG, "feature is not enabled: %s", feature.fi_guid);
	}

	if (decr) {
		uint64_t count;
		if (feature_get_refcount_from_disk(spa, &feature,
		    &count) == 0 && count == 0) {
			fatal(spa, FTAG, "feature refcount already 0: %s",
			    feature.fi_guid);
		}
	}

	VERIFY0(dsl_sync_task(spa_name(spa), NULL,
	    decr ? feature_decr_sync : feature_incr_sync, &feature,
	    5, ZFS_SPACE_CHECK_NORMAL));

	spa_close(spa, FTAG);
}