static void trim_thread(void *arg) { spa_t *spa = arg; zio_t *zio; for (;;) { mutex_enter(&spa->spa_trim_lock); if (spa->spa_trim_thread == NULL) { spa->spa_trim_thread = curthread; cv_signal(&spa->spa_trim_cv); mutex_exit(&spa->spa_trim_lock); thread_exit(); } cv_wait(&spa->spa_trim_cv, &spa->spa_trim_lock); mutex_exit(&spa->spa_trim_lock); zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); trim_map_commit(spa, zio, spa->spa_root_vdev); (void) zio_wait(zio); trim_map_commit_done(spa, spa->spa_root_vdev); spa_config_exit(spa, SCL_STATE, FTAG); } }
static void trim_thread(void *arg) { spa_t *spa = arg; zio_t *zio; #ifdef _KERNEL (void) snprintf(curthread->td_name, sizeof(curthread->td_name), "trim %s", spa_name(spa)); #endif for (;;) { mutex_enter(&spa->spa_trim_lock); if (spa->spa_trim_thread == NULL) { spa->spa_trim_thread = curthread; cv_signal(&spa->spa_trim_cv); mutex_exit(&spa->spa_trim_lock); thread_exit(); } (void) cv_timedwait(&spa->spa_trim_cv, &spa->spa_trim_lock, hz * trim_max_interval); mutex_exit(&spa->spa_trim_lock); zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); trim_map_commit(spa, zio, spa->spa_root_vdev); (void) zio_wait(zio); trim_map_commit_done(spa, spa->spa_root_vdev); spa_config_exit(spa, SCL_STATE, FTAG); } }
/* Takes care of physical writing and limiting # of concurrent ZIOs. */ static int vdev_initialize_write(vdev_t *vd, uint64_t start, uint64_t size, abd_t *data) { spa_t *spa = vd->vdev_spa; /* Limit inflight initializing I/Os */ mutex_enter(&vd->vdev_initialize_io_lock); while (vd->vdev_initialize_inflight >= zfs_initialize_limit) { cv_wait(&vd->vdev_initialize_io_cv, &vd->vdev_initialize_io_lock); } vd->vdev_initialize_inflight++; mutex_exit(&vd->vdev_initialize_io_lock); dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); VERIFY0(dmu_tx_assign(tx, TXG_WAIT)); uint64_t txg = dmu_tx_get_txg(tx); spa_config_enter(spa, SCL_STATE_ALL, vd, RW_READER); mutex_enter(&vd->vdev_initialize_lock); if (vd->vdev_initialize_offset[txg & TXG_MASK] == 0) { uint64_t *guid = kmem_zalloc(sizeof (uint64_t), KM_SLEEP); *guid = vd->vdev_guid; /* This is the first write of this txg. */ dsl_sync_task_nowait(spa_get_dsl(spa), vdev_initialize_zap_update_sync, guid, 2, ZFS_SPACE_CHECK_RESERVED, tx); } /* * We know the vdev struct will still be around since all * consumers of vdev_free must stop the initialization first. */ if (vdev_initialize_should_stop(vd)) { mutex_enter(&vd->vdev_initialize_io_lock); ASSERT3U(vd->vdev_initialize_inflight, >, 0); vd->vdev_initialize_inflight--; mutex_exit(&vd->vdev_initialize_io_lock); spa_config_exit(vd->vdev_spa, SCL_STATE_ALL, vd); mutex_exit(&vd->vdev_initialize_lock); dmu_tx_commit(tx); return (SET_ERROR(EINTR)); } mutex_exit(&vd->vdev_initialize_lock); vd->vdev_initialize_offset[txg & TXG_MASK] = start + size; zio_nowait(zio_write_phys(spa->spa_txg_zio[txg & TXG_MASK], vd, start, size, data, ZIO_CHECKSUM_OFF, vdev_initialize_cb, NULL, ZIO_PRIORITY_INITIALIZING, ZIO_FLAG_CANFAIL, B_FALSE)); /* vdev_initialize_cb releases SCL_STATE_ALL */ dmu_tx_commit(tx); return (0); }
/* * Choose a random vdev, label, and MMP block, and write over it * with a copy of the last-synced uberblock, whose timestamp * has been updated to reflect that the pool is in use. */ static void mmp_write_uberblock(spa_t *spa) { int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL; mmp_thread_t *mmp = &spa->spa_mmp; uberblock_t *ub; vdev_t *vd; int label; uint64_t offset; spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); vd = mmp_random_leaf(spa->spa_root_vdev); if (vd == NULL) { spa_config_exit(spa, SCL_STATE, FTAG); return; } mutex_enter(&mmp->mmp_io_lock); if (mmp->mmp_zio_root == NULL) mmp->mmp_zio_root = zio_root(spa, NULL, NULL, flags | ZIO_FLAG_GODFATHER); ub = &mmp->mmp_ub; ub->ub_timestamp = gethrestime_sec(); ub->ub_mmp_magic = MMP_MAGIC; ub->ub_mmp_delay = mmp->mmp_delay; vd->vdev_mmp_pending = gethrtime(); zio_t *zio = zio_null(mmp->mmp_zio_root, spa, NULL, NULL, NULL, flags); abd_t *ub_abd = abd_alloc_for_io(VDEV_UBERBLOCK_SIZE(vd), B_TRUE); abd_zero(ub_abd, VDEV_UBERBLOCK_SIZE(vd)); abd_copy_from_buf(ub_abd, ub, sizeof (uberblock_t)); mutex_exit(&mmp->mmp_io_lock); offset = VDEV_UBERBLOCK_OFFSET(vd, VDEV_UBERBLOCK_COUNT(vd) - MMP_BLOCKS_PER_LABEL + spa_get_random(MMP_BLOCKS_PER_LABEL)); label = spa_get_random(VDEV_LABELS); vdev_label_write(zio, vd, label, ub_abd, offset, VDEV_UBERBLOCK_SIZE(vd), mmp_write_done, mmp, flags | ZIO_FLAG_DONT_PROPAGATE); spa_mmp_history_add(ub->ub_txg, ub->ub_timestamp, ub->ub_mmp_delay, vd, label); zio_nowait(zio); }
static void spa_load_stats_update(spa_t *spa) { int i, nnormal = 0, nspecial = 0; vdev_t *rvd; spa_special_stat_t spa_stat = {0}; /* * walk the top level vdevs and calculate average stats for * the normal and special classes */ spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); rvd = spa->spa_root_vdev; ASSERT(rvd); for (i = 0; i < rvd->vdev_children; i++) { vdev_t *vd = rvd->vdev_child[i]; ASSERT(vd); if (vd->vdev_islog || vd->vdev_ishole || vd->vdev_isspare || vd->vdev_isl2cache) continue; if (vd->vdev_isspecial) { spa_vdev_walk_stats(vd, &spa_stat.special_lt, &spa_stat.special_ut, &nspecial); } else { spa_vdev_walk_stats(vd, &spa_stat.normal_lt, &spa_stat.normal_ut, &nnormal); } } spa_config_exit(spa, SCL_VDEV, FTAG); DTRACE_PROBE4(spa_vdev_stats, int, nnormal, int, nspecial, int, spa_stat.normal_ut, int, spa_stat.special_ut); /* calculate averages */ if (nnormal) { spa->spa_special_stat.normal_lt = spa_stat.normal_lt/nnormal; spa->spa_special_stat.normal_ut = spa_stat.normal_ut/nnormal; } if (nspecial) { spa->spa_special_stat.special_lt = spa_stat.special_lt/nspecial; spa->spa_special_stat.special_ut = spa_stat.special_ut/nspecial; } }
/* * Update all disk labels, generate a fresh config based on the current * in-core state, and sync the global config cache (do not sync the config * cache if this is a booting rootpool). */ void spa_config_update(spa_t *spa, int what) { vdev_t *rvd = spa->spa_root_vdev; uint64_t txg; int c; ASSERT(MUTEX_HELD(&spa_namespace_lock)); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); txg = spa_last_synced_txg(spa) + 1; if (what == SPA_CONFIG_UPDATE_POOL) { vdev_config_dirty(rvd); } else { /* * If we have top-level vdevs that were added but have * not yet been prepared for allocation, do that now. * (It's safe now because the config cache is up to date, * so it will be able to translate the new DVAs.) * See comments in spa_vdev_add() for full details. */ for (c = 0; c < rvd->vdev_children; c++) { vdev_t *tvd = rvd->vdev_child[c]; if (tvd->vdev_ms_array == 0) vdev_metaslab_set_size(tvd); vdev_expand(tvd, txg); } } spa_config_exit(spa, SCL_ALL, FTAG); /* * Wait for the mosconfig to be regenerated and synced. */ txg_wait_synced(spa->spa_dsl_pool, txg); /* * Update the global config cache to reflect the new mosconfig. */ if (!spa->spa_is_root) { spa_write_cachefile(spa, B_FALSE, what != SPA_CONFIG_UPDATE_POOL); } if (what == SPA_CONFIG_UPDATE_POOL) spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS); }
void zil_flush_vdevs(zilog_t *zilog) { spa_t *spa = zilog->zl_spa; avl_tree_t *t = &zilog->zl_vdev_tree; void *cookie = NULL; zil_vdev_node_t *zv; zio_t *zio; ASSERT(zilog->zl_writer); /* * We don't need zl_vdev_lock here because we're the zl_writer, * and all zl_get_data() callbacks are done. */ if (avl_numnodes(t) == 0) return; spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) { vdev_t *vd = vdev_lookup_top(spa, zv->zv_vdev); if (vd != NULL) zio_flush(zio, vd); kmem_free(zv, sizeof (*zv)); } /* * Wait for all the flushes to complete. Not all devices actually * support the DKIOCFLUSHWRITECACHE ioctl, so it's OK if it fails. */ (void) zio_wait(zio); spa_config_exit(spa, SCL_STATE, FTAG); }
/* * Generate the pool's configuration based on the current in-core state. * * We infer whether to generate a complete config or just one top-level config * based on whether vd is the root vdev. */ nvlist_t * spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) { nvlist_t *config, *nvroot; vdev_t *rvd = spa->spa_root_vdev; unsigned long hostid = 0; boolean_t locked = B_FALSE; uint64_t split_guid; if (vd == NULL) { vd = rvd; locked = B_TRUE; spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); } ASSERT(spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_READER) == (SCL_CONFIG | SCL_STATE)); /* * If txg is -1, report the current value of spa->spa_config_txg. */ if (txg == -1ULL) txg = spa->spa_config_txg; VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, spa_version(spa)) == 0); VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, spa_name(spa)) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, spa_state(spa)) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, txg) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, spa_guid(spa)) == 0); VERIFY(spa->spa_comment == NULL || nvlist_add_string(config, ZPOOL_CONFIG_COMMENT, spa->spa_comment) == 0); #ifdef _KERNEL hostid = zone_get_hostid(NULL); #else /* _KERNEL */ /* * We're emulating the system's hostid in userland, so we can't use * zone_get_hostid(). */ (void) ddi_strtoul(hw_serial, NULL, 10, &hostid); #endif /* _KERNEL */ if (hostid != 0) { VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, hostid) == 0); } VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, utsname.nodename) == 0); if (vd != rvd) { VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID, vd->vdev_top->vdev_guid) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0); if (vd->vdev_isspare) VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE, 1ULL) == 0); if (vd->vdev_islog) VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG, 1ULL) == 0); vd = vd->vdev_top; /* label contains top config */ } else { /* * Only add the (potentially large) split information * in the mos config, and not in the vdev labels */ if (spa->spa_config_splitting != NULL) VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT, spa->spa_config_splitting) == 0); } /* * Add the top-level config. We even add this on pools which * don't support holes in the namespace. */ vdev_top_config_generate(spa, config); /* * If we're splitting, record the original pool's guid. */ if (spa->spa_config_splitting != NULL && nvlist_lookup_uint64(spa->spa_config_splitting, ZPOOL_CONFIG_SPLIT_GUID, &split_guid) == 0) { VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID, split_guid) == 0); } nvroot = vdev_config_generate(spa, vd, getstats, 0); VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); nvlist_free(nvroot); /* * Store what's necessary for reading the MOS in the label. */ VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ, spa->spa_label_features) == 0); if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) { ddt_histogram_t *ddh; ddt_stat_t *dds; ddt_object_t *ddo; ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP); ddt_get_dedup_histogram(spa, ddh); VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_DDT_HISTOGRAM, (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)) == 0); kmem_free(ddh, sizeof (ddt_histogram_t)); ddo = kmem_zalloc(sizeof (ddt_object_t), KM_SLEEP); ddt_get_dedup_object_stats(spa, ddo); VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_DDT_OBJ_STATS, (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)) == 0); kmem_free(ddo, sizeof (ddt_object_t)); dds = kmem_zalloc(sizeof (ddt_stat_t), KM_SLEEP); ddt_get_dedup_stats(spa, dds); VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_DDT_STATS, (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t)) == 0); kmem_free(dds, sizeof (ddt_stat_t)); } if (locked) spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); return (config); }
/* * Generate the pool's configuration based on the current in-core state. * * We infer whether to generate a complete config or just one top-level config * based on whether vd is the root vdev. */ nvlist_t * spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) { nvlist_t *config, *nvroot; vdev_t *rvd = spa->spa_root_vdev; unsigned long hostid = 0; boolean_t locked = B_FALSE; uint64_t split_guid; char *pool_name; if (vd == NULL) { vd = rvd; locked = B_TRUE; spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); } ASSERT(spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_READER) == (SCL_CONFIG | SCL_STATE)); /* * If txg is -1, report the current value of spa->spa_config_txg. */ if (txg == -1ULL) txg = spa->spa_config_txg; /* * Originally, users had to handle spa namespace collisions by either * exporting the already imported pool or by specifying a new name for * the pool with a conflicting name. In the case of root pools from * virtual guests, neither approach to collision resolution is * reasonable. This is addressed by extending the new name syntax with * an option to specify that the new name is temporary. When specified, * ZFS_IMPORT_TEMP_NAME will be set in spa->spa_import_flags to tell us * to use the previous name, which we do below. */ if (spa->spa_import_flags & ZFS_IMPORT_TEMP_NAME) { VERIFY0(nvlist_lookup_string(spa->spa_config, ZPOOL_CONFIG_POOL_NAME, &pool_name)); } else pool_name = spa_name(spa); VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, spa_version(spa)) == 0); VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, pool_name) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, spa_state(spa)) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, txg) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, spa_guid(spa)) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA, spa->spa_errata) == 0); VERIFY(spa->spa_comment == NULL || nvlist_add_string(config, ZPOOL_CONFIG_COMMENT, spa->spa_comment) == 0); #ifdef _KERNEL hostid = zone_get_hostid(NULL); #else /* _KERNEL */ /* * We're emulating the system's hostid in userland, so we can't use * zone_get_hostid(). */ (void) ddi_strtoul(hw_serial, NULL, 10, &hostid); #endif /* _KERNEL */ if (hostid != 0) { VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, hostid) == 0); } VERIFY0(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, utsname()->nodename)); if (vd != rvd) { VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID, vd->vdev_top->vdev_guid) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0); if (vd->vdev_isspare) VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE, 1ULL) == 0); if (vd->vdev_islog) VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG, 1ULL) == 0); vd = vd->vdev_top; /* label contains top config */ } else { /* * Only add the (potentially large) split information * in the mos config, and not in the vdev labels */ if (spa->spa_config_splitting != NULL) VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT, spa->spa_config_splitting) == 0); } /* * Add the top-level config. We even add this on pools which * don't support holes in the namespace. */ vdev_top_config_generate(spa, config); /* * If we're splitting, record the original pool's guid. */ if (spa->spa_config_splitting != NULL && nvlist_lookup_uint64(spa->spa_config_splitting, ZPOOL_CONFIG_SPLIT_GUID, &split_guid) == 0) { VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID, split_guid) == 0); } nvroot = vdev_config_generate(spa, vd, getstats, 0); VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); nvlist_free(nvroot); /* * Store what's necessary for reading the MOS in the label. */ VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ, spa->spa_label_features) == 0); if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) { ddt_histogram_t *ddh; ddt_stat_t *dds; ddt_object_t *ddo; ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP); ddt_get_dedup_histogram(spa, ddh); VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_DDT_HISTOGRAM, (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)) == 0); kmem_free(ddh, sizeof (ddt_histogram_t)); ddo = kmem_zalloc(sizeof (ddt_object_t), KM_SLEEP); ddt_get_dedup_object_stats(spa, ddo); VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_DDT_OBJ_STATS, (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)) == 0); kmem_free(ddo, sizeof (ddt_object_t)); dds = kmem_zalloc(sizeof (ddt_stat_t), KM_SLEEP); ddt_get_dedup_stats(spa, dds); VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_DDT_STATS, (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t)) == 0); kmem_free(dds, sizeof (ddt_stat_t)); } if (locked) spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); return (config); }
static int zfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context) { zfsvfs_t *zfsvfs = vfs_fsprivate(mp); uint64_t refdbytes, availbytes, usedobjs, availobjs; ZFS_ENTER(zfsvfs); dmu_objset_space(zfsvfs->z_os, &refdbytes, &availbytes, &usedobjs, &availobjs); VFSATTR_RETURN(fsap, f_objcount, usedobjs); VFSATTR_RETURN(fsap, f_maxobjcount, 0x7fffffffffffffff); /* * Carbon depends on f_filecount and f_dircount so * make up some values based on total objects. */ VFSATTR_RETURN(fsap, f_filecount, usedobjs - (usedobjs / 4)); VFSATTR_RETURN(fsap, f_dircount, usedobjs / 4); /* * The underlying storage pool actually uses multiple block sizes. * We report the fragsize as the smallest block size we support, * and we report our blocksize as the filesystem's maximum blocksize. */ VFSATTR_RETURN(fsap, f_bsize, 1UL << SPA_MINBLOCKSHIFT); VFSATTR_RETURN(fsap, f_iosize, zfsvfs->z_max_blksz); /* * The following report "total" blocks of various kinds in the * file system, but reported in terms of f_frsize - the * "fragment" size. */ VFSATTR_RETURN(fsap, f_blocks, (u_int64_t)((refdbytes + availbytes) >> SPA_MINBLOCKSHIFT)); VFSATTR_RETURN(fsap, f_bfree, (u_int64_t)(availbytes >> SPA_MINBLOCKSHIFT)); VFSATTR_RETURN(fsap, f_bavail, fsap->f_bfree); /* no root reservation */ VFSATTR_RETURN(fsap, f_bused, fsap->f_blocks - fsap->f_bfree); /* * statvfs() should really be called statufs(), because it assumes * static metadata. ZFS doesn't preallocate files, so the best * we can do is report the max that could possibly fit in f_files, * and that minus the number actually used in f_ffree. * For f_ffree, report the smaller of the number of object available * and the number of blocks (each object will take at least a block). */ VFSATTR_RETURN(fsap, f_ffree, (u_int64_t)MIN(availobjs, fsap->f_bfree)); VFSATTR_RETURN(fsap, f_files, fsap->f_ffree + usedobjs); #if 0 statp->f_flag = vf_to_stf(vfsp->vfs_flag); #endif if (VFSATTR_IS_ACTIVE(fsap, f_fsid)) { VFSATTR_RETURN(fsap, f_fsid, vfs_statfs(mp)->f_fsid); } if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) { bcopy(&zfs_capabilities, &fsap->f_capabilities, sizeof (zfs_capabilities)); VFSATTR_SET_SUPPORTED(fsap, f_capabilities); } if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) { bcopy(&zfs_attributes, &fsap->f_attributes.validattr, sizeof (zfs_attributes)); bcopy(&zfs_attributes, &fsap->f_attributes.nativeattr, sizeof (zfs_attributes)); VFSATTR_SET_SUPPORTED(fsap, f_attributes); } if (VFSATTR_IS_ACTIVE(fsap, f_create_time)) { dmu_objset_stats_t dmu_stat; dmu_objset_fast_stat(zfsvfs->z_os, &dmu_stat); fsap->f_create_time.tv_sec = dmu_stat.dds_creation_time; fsap->f_create_time.tv_nsec = 0; VFSATTR_SET_SUPPORTED(fsap, f_create_time); } if (VFSATTR_IS_ACTIVE(fsap, f_modify_time)) { if (zfsvfs->z_mtime_vp != NULL) { znode_t *mzp; mzp = VTOZ(zfsvfs->z_mtime_vp); ZFS_TIME_DECODE(&fsap->f_modify_time, mzp->z_phys->zp_mtime); } else { fsap->f_modify_time.tv_sec = 0; fsap->f_modify_time.tv_nsec = 0; } VFSATTR_SET_SUPPORTED(fsap, f_modify_time); } /* * For Carbon compatibility, pretend to support this legacy/unused * attribute */ if (VFSATTR_IS_ACTIVE(fsap, f_backup_time)) { fsap->f_backup_time.tv_sec = 0; fsap->f_backup_time.tv_nsec = 0; VFSATTR_SET_SUPPORTED(fsap, f_backup_time); } if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) { spa_t *spa = dmu_objset_spa(zfsvfs->z_os); spa_config_enter(spa, RW_READER, FTAG); strlcpy(fsap->f_vol_name, spa_name(spa), MAXPATHLEN); spa_config_exit(spa, FTAG); VFSATTR_SET_SUPPORTED(fsap, f_vol_name); } VFSATTR_RETURN(fsap, f_fssubtype, 0); VFSATTR_RETURN(fsap, f_signature, 0x5a21); /* 'Z!' */ VFSATTR_RETURN(fsap, f_carbon_fsid, 0); ZFS_EXIT(zfsvfs); return (0); }
/* ARGSUSED */ static int spa_condense_indirect_thread(void *arg, zthr_t *zthr) { spa_t *spa = arg; vdev_t *vd; ASSERT3P(spa->spa_condensing_indirect, !=, NULL); spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER); vd = vdev_lookup_top(spa, spa->spa_condensing_indirect_phys.scip_vdev); ASSERT3P(vd, !=, NULL); spa_config_exit(spa, SCL_VDEV, FTAG); spa_condensing_indirect_t *sci = spa->spa_condensing_indirect; spa_condensing_indirect_phys_t *scip = &spa->spa_condensing_indirect_phys; uint32_t *counts; uint64_t start_index; vdev_indirect_mapping_t *old_mapping = vd->vdev_indirect_mapping; space_map_t *prev_obsolete_sm = NULL; ASSERT3U(vd->vdev_id, ==, scip->scip_vdev); ASSERT(scip->scip_next_mapping_object != 0); ASSERT(scip->scip_prev_obsolete_sm_object != 0); ASSERT3P(vd->vdev_ops, ==, &vdev_indirect_ops); for (int i = 0; i < TXG_SIZE; i++) { /* * The list must start out empty in order for the * _commit_sync() sync task to be properly registered * on the first call to _commit_entry(); so it's wise * to double check and ensure we actually are starting * with empty lists. */ ASSERT(list_is_empty(&sci->sci_new_mapping_entries[i])); } VERIFY0(space_map_open(&prev_obsolete_sm, spa->spa_meta_objset, scip->scip_prev_obsolete_sm_object, 0, vd->vdev_asize, 0)); space_map_update(prev_obsolete_sm); counts = vdev_indirect_mapping_load_obsolete_counts(old_mapping); if (prev_obsolete_sm != NULL) { vdev_indirect_mapping_load_obsolete_spacemap(old_mapping, counts, prev_obsolete_sm); } space_map_close(prev_obsolete_sm); /* * Generate new mapping. Determine what index to continue from * based on the max offset that we've already written in the * new mapping. */ uint64_t max_offset = vdev_indirect_mapping_max_offset(sci->sci_new_mapping); if (max_offset == 0) { /* We haven't written anything to the new mapping yet. */ start_index = 0; } else { /* * Pick up from where we left off. _entry_for_offset() * returns a pointer into the vim_entries array. If * max_offset is greater than any of the mappings * contained in the table NULL will be returned and * that indicates we've exhausted our iteration of the * old_mapping. */ vdev_indirect_mapping_entry_phys_t *entry = vdev_indirect_mapping_entry_for_offset_or_next(old_mapping, max_offset); if (entry == NULL) { /* * We've already written the whole new mapping. * This special value will cause us to skip the * generate_new_mapping step and just do the sync * task to complete the condense. */ start_index = UINT64_MAX; } else { start_index = entry - old_mapping->vim_entries; ASSERT3U(start_index, <, vdev_indirect_mapping_num_entries(old_mapping)); } } spa_condense_indirect_generate_new_mapping(vd, counts, start_index, zthr); vdev_indirect_mapping_free_obsolete_counts(old_mapping, counts); /* * If the zthr has received a cancellation signal while running * in generate_new_mapping() or at any point after that, then bail * early. We don't want to complete the condense if the spa is * shutting down. */ if (zthr_iscancelled(zthr)) return (0); VERIFY0(dsl_sync_task(spa_name(spa), NULL, spa_condense_indirect_complete_sync, sci, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED)); return (0); thread_exit(); }
static void txg_sync_thread(dsl_pool_t *dp) { spa_t *spa = dp->dp_spa; tx_state_t *tx = &dp->dp_tx; callb_cpr_t cpr; vdev_stat_t *vs1, *vs2; clock_t start, delta; #ifdef _KERNEL /* * Annotate this process with a flag that indicates that it is * unsafe to use KM_SLEEP during memory allocations due to the * potential for a deadlock. KM_PUSHPAGE should be used instead. */ current->flags |= PF_NOFS; #endif /* _KERNEL */ txg_thread_enter(tx, &cpr); vs1 = kmem_alloc(sizeof (vdev_stat_t), KM_PUSHPAGE); vs2 = kmem_alloc(sizeof (vdev_stat_t), KM_PUSHPAGE); start = delta = 0; for (;;) { clock_t timer, timeout; uint64_t txg; uint64_t ndirty; timeout = zfs_txg_timeout * hz; /* * We sync when we're scanning, there's someone waiting * on us, or the quiesce thread has handed off a txg to * us, or we have reached our timeout. */ timer = (delta >= timeout ? 0 : timeout - delta); while (!dsl_scan_active(dp->dp_scan) && !tx->tx_exiting && timer > 0 && tx->tx_synced_txg >= tx->tx_sync_txg_waiting && tx->tx_quiesced_txg == 0 && dp->dp_dirty_total < zfs_dirty_data_sync) { dprintf("waiting; tx_synced=%llu waiting=%llu dp=%p\n", tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp); txg_thread_wait(tx, &cpr, &tx->tx_sync_more_cv, timer); delta = ddi_get_lbolt() - start; timer = (delta > timeout ? 0 : timeout - delta); } /* * Wait until the quiesce thread hands off a txg to us, * prompting it to do so if necessary. */ while (!tx->tx_exiting && tx->tx_quiesced_txg == 0) { if (tx->tx_quiesce_txg_waiting < tx->tx_open_txg+1) tx->tx_quiesce_txg_waiting = tx->tx_open_txg+1; cv_broadcast(&tx->tx_quiesce_more_cv); txg_thread_wait(tx, &cpr, &tx->tx_quiesce_done_cv, 0); } if (tx->tx_exiting) { kmem_free(vs2, sizeof (vdev_stat_t)); kmem_free(vs1, sizeof (vdev_stat_t)); txg_thread_exit(tx, &cpr, &tx->tx_sync_thread); } spa_config_enter(spa, SCL_ALL, FTAG, RW_READER); vdev_get_stats(spa->spa_root_vdev, vs1); spa_config_exit(spa, SCL_ALL, FTAG); /* * Consume the quiesced txg which has been handed off to * us. This may cause the quiescing thread to now be * able to quiesce another txg, so we must signal it. */ txg = tx->tx_quiesced_txg; tx->tx_quiesced_txg = 0; tx->tx_syncing_txg = txg; DTRACE_PROBE2(txg__syncing, dsl_pool_t *, dp, uint64_t, txg); cv_broadcast(&tx->tx_quiesce_more_cv); dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n", txg, tx->tx_quiesce_txg_waiting, tx->tx_sync_txg_waiting); mutex_exit(&tx->tx_sync_lock); spa_txg_history_set(spa, txg, TXG_STATE_WAIT_FOR_SYNC, gethrtime()); ndirty = dp->dp_dirty_pertxg[txg & TXG_MASK]; start = ddi_get_lbolt(); spa_sync(spa, txg); delta = ddi_get_lbolt() - start; mutex_enter(&tx->tx_sync_lock); tx->tx_synced_txg = txg; tx->tx_syncing_txg = 0; DTRACE_PROBE2(txg__synced, dsl_pool_t *, dp, uint64_t, txg); cv_broadcast(&tx->tx_sync_done_cv); /* * Dispatch commit callbacks to worker threads. */ txg_dispatch_callbacks(dp, txg); spa_config_enter(spa, SCL_ALL, FTAG, RW_READER); vdev_get_stats(spa->spa_root_vdev, vs2); spa_config_exit(spa, SCL_ALL, FTAG); spa_txg_history_set_io(spa, txg, vs2->vs_bytes[ZIO_TYPE_READ]-vs1->vs_bytes[ZIO_TYPE_READ], vs2->vs_bytes[ZIO_TYPE_WRITE]-vs1->vs_bytes[ZIO_TYPE_WRITE], vs2->vs_ops[ZIO_TYPE_READ]-vs1->vs_ops[ZIO_TYPE_READ], vs2->vs_ops[ZIO_TYPE_WRITE]-vs1->vs_ops[ZIO_TYPE_WRITE], ndirty); spa_txg_history_set(spa, txg, TXG_STATE_SYNCED, gethrtime()); } }
/* * Generate the pool's configuration based on the current in-core state. * We infer whether to generate a complete config or just one top-level config * based on whether vd is the root vdev. */ nvlist_t * spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) { nvlist_t *config, *nvroot; vdev_t *rvd = spa->spa_root_vdev; unsigned long hostid = 0; boolean_t locked = B_FALSE; if (vd == NULL) { vd = rvd; locked = B_TRUE; spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER); } ASSERT(spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_READER) == (SCL_CONFIG | SCL_STATE)); /* * If txg is -1, report the current value of spa->spa_config_txg. */ if (txg == -1ULL) txg = spa->spa_config_txg; VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, spa_version(spa)) == 0); VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, spa_name(spa)) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, spa_state(spa)) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, txg) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, spa_guid(spa)) == 0); #ifdef _KERNEL hostid = zone_get_hostid(NULL); #else /* _KERNEL */ /* * We're emulating the system's hostid in userland, so we can't use * zone_get_hostid(). */ (void) ddi_strtoul(hw_serial, NULL, 10, &hostid); #endif /* _KERNEL */ if (hostid != 0) { VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, hostid) == 0); } VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, utsname.nodename) == 0); if (vd != rvd) { VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID, vd->vdev_top->vdev_guid) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID, vd->vdev_guid) == 0); if (vd->vdev_isspare) VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE, 1ULL) == 0); if (vd->vdev_islog) VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG, 1ULL) == 0); vd = vd->vdev_top; /* label contains top config */ } nvroot = vdev_config_generate(spa, vd, getstats, B_FALSE, B_FALSE); VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0); nvlist_free(nvroot); if (locked) spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG); return (config); }