/* * Post a history sysevent. * * The nvlist_t* passed into this function will be transformed into a new * nvlist where: * * 1. Nested nvlists will be flattened to a single level * 2. Keys will have their names normalized (to remove any problematic * characters, such as whitespace) * * The nvlist_t passed into this function will duplicated and should be freed * by caller. * */ static void spa_history_log_notify(spa_t *spa, nvlist_t *nvl) { nvlist_t *hist_nvl = fnvlist_alloc(); uint64_t uint64; char *string; if (nvlist_lookup_string(nvl, ZPOOL_HIST_CMD, &string) == 0) fnvlist_add_string(hist_nvl, ZFS_EV_HIST_CMD, string); if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME, &string) == 0) fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_NAME, string); if (nvlist_lookup_string(nvl, ZPOOL_HIST_ZONE, &string) == 0) fnvlist_add_string(hist_nvl, ZFS_EV_HIST_ZONE, string); if (nvlist_lookup_string(nvl, ZPOOL_HIST_HOST, &string) == 0) fnvlist_add_string(hist_nvl, ZFS_EV_HIST_HOST, string); if (nvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME, &string) == 0) fnvlist_add_string(hist_nvl, ZFS_EV_HIST_DSNAME, string); if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR, &string) == 0) fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_STR, string); if (nvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL, &string) == 0) fnvlist_add_string(hist_nvl, ZFS_EV_HIST_IOCTL, string); if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME, &string) == 0) fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_NAME, string); if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID, &uint64) == 0) fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_DSID, uint64); if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG, &uint64) == 0) fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_TXG, uint64); if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_TIME, &uint64) == 0) fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_TIME, uint64); if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_WHO, &uint64) == 0) fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_WHO, uint64); if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_INT_EVENT, &uint64) == 0) fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_INT_EVENT, uint64); spa_event_notify(spa, NULL, hist_nvl, ESC_ZFS_HISTORY_EVENT); nvlist_free(hist_nvl); }
/* ARGSUSED */ static void dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) { static const char *old_names[] = { "scrub_bookmark", "scrub_ddt_bookmark", "scrub_ddt_class_max", "scrub_queue", "scrub_min_txg", "scrub_max_txg", "scrub_func", "scrub_errors", NULL }; dsl_pool_t *dp = scn->scn_dp; spa_t *spa = dp->dp_spa; int i; /* Remove any remnants of an old-style scrub. */ for (i = 0; old_names[i]; i++) { (void) zap_remove(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, old_names[i], tx); } if (scn->scn_phys.scn_queue_obj != 0) { VERIFY(0 == dmu_object_free(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, tx)); scn->scn_phys.scn_queue_obj = 0; } /* * If we were "restarted" from a stopped state, don't bother * with anything else. */ if (scn->scn_phys.scn_state != DSS_SCANNING) return; if (complete) scn->scn_phys.scn_state = DSS_FINISHED; else scn->scn_phys.scn_state = DSS_CANCELED; spa_history_log_internal(spa, "scan done", tx, "complete=%u", complete); if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { mutex_enter(&spa->spa_scrub_lock); while (spa->spa_scrub_inflight > 0) { cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock); } mutex_exit(&spa->spa_scrub_lock); spa->spa_scrub_started = B_FALSE; spa->spa_scrub_active = B_FALSE; /* * If the scrub/resilver completed, update all DTLs to * reflect this. Whether it succeeded or not, vacate * all temporary scrub DTLs. */ vdev_dtl_reassess(spa->spa_root_vdev, tx->tx_txg, complete ? scn->scn_phys.scn_max_txg : 0, B_TRUE); if (complete) { spa_event_notify(spa, NULL, scn->scn_phys.scn_min_txg ? ESC_ZFS_RESILVER_FINISH : ESC_ZFS_SCRUB_FINISH); } spa_errlog_rotate(spa); /* * We may have finished replacing a device. * Let the async thread assess this and handle the detach. */ spa_async_request(spa, SPA_ASYNC_RESILVER_DONE); } scn->scn_phys.scn_end_time = gethrestime_sec(); }
static void dsl_scan_setup_sync(void *arg, dmu_tx_t *tx) { dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan; pool_scan_func_t *funcp = arg; dmu_object_type_t ot = 0; dsl_pool_t *dp = scn->scn_dp; spa_t *spa = dp->dp_spa; ASSERT(scn->scn_phys.scn_state != DSS_SCANNING); ASSERT(*funcp > POOL_SCAN_NONE && *funcp < POOL_SCAN_FUNCS); bzero(&scn->scn_phys, sizeof (scn->scn_phys)); scn->scn_phys.scn_func = *funcp; scn->scn_phys.scn_state = DSS_SCANNING; scn->scn_phys.scn_min_txg = 0; scn->scn_phys.scn_max_txg = tx->tx_txg; scn->scn_phys.scn_ddt_class_max = DDT_CLASSES - 1; /* the entire DDT */ scn->scn_phys.scn_start_time = gethrestime_sec(); scn->scn_phys.scn_errors = 0; scn->scn_phys.scn_to_examine = spa->spa_root_vdev->vdev_stat.vs_alloc; scn->scn_restart_txg = 0; spa_scan_stat_init(spa); if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max; /* rewrite all disk labels */ vdev_config_dirty(spa->spa_root_vdev); if (vdev_resilver_needed(spa->spa_root_vdev, &scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) { spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_START); } else { spa_event_notify(spa, NULL, ESC_ZFS_SCRUB_START); } spa->spa_scrub_started = B_TRUE; /* * If this is an incremental scrub, limit the DDT scrub phase * to just the auto-ditto class (for correctness); the rest * of the scrub should go faster using top-down pruning. */ if (scn->scn_phys.scn_min_txg > TXG_INITIAL) scn->scn_phys.scn_ddt_class_max = DDT_CLASS_DITTO; } /* back to the generic stuff */ if (dp->dp_blkstats == NULL) { dp->dp_blkstats = kmem_alloc(sizeof (zfs_all_blkstats_t), KM_SLEEP); } bzero(dp->dp_blkstats, sizeof (zfs_all_blkstats_t)); if (spa_version(spa) < SPA_VERSION_DSL_SCRUB) ot = DMU_OT_ZAP_OTHER; scn->scn_phys.scn_queue_obj = zap_create(dp->dp_meta_objset, ot ? ot : DMU_OT_SCAN_QUEUE, DMU_OT_NONE, 0, tx); dsl_scan_sync_state(scn, tx); spa_history_log_internal(spa, "scan setup", tx, "func=%u mintxg=%llu maxtxg=%llu", *funcp, scn->scn_phys.scn_min_txg, scn->scn_phys.scn_max_txg); }
/* * Synchronize pool configuration to disk. This must be called with the * namespace lock held. Synchronizing the pool cache is typically done after * the configuration has been synced to the MOS. This exposes a window where * the MOS config will have been updated but the cache file has not. If * the system were to crash at that instant then the cached config may not * contain the correct information to open the pool and an explicity import * would be required. */ void spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent) { spa_config_dirent_t *dp, *tdp; nvlist_t *nvl; boolean_t ccw_failure; int error; ASSERT(MUTEX_HELD(&spa_namespace_lock)); if (rootdir == NULL || !(spa_mode_global & FWRITE)) return; /* * Iterate over all cachefiles for the pool, past or present. When the * cachefile is changed, the new one is pushed onto this list, allowing * us to update previous cachefiles that no longer contain this pool. */ ccw_failure = B_FALSE; for (dp = list_head(&target->spa_config_list); dp != NULL; dp = list_next(&target->spa_config_list, dp)) { spa_t *spa = NULL; if (dp->scd_path == NULL) continue; /* * Iterate over all pools, adding any matching pools to 'nvl'. */ nvl = NULL; while ((spa = spa_next(spa)) != NULL) { nvlist_t *nvroot = NULL; /* * Skip over our own pool if we're about to remove * ourselves from the spa namespace or any pool that * is readonly. Since we cannot guarantee that a * readonly pool would successfully import upon reboot, * we don't allow them to be written to the cache file. */ if ((spa == target && removing) || (spa_state(spa) == POOL_STATE_ACTIVE && !spa_writeable(spa))) continue; mutex_enter(&spa->spa_props_lock); tdp = list_head(&spa->spa_config_list); if (spa->spa_config == NULL || tdp->scd_path == NULL || strcmp(tdp->scd_path, dp->scd_path) != 0) { mutex_exit(&spa->spa_props_lock); continue; } if (nvl == NULL) VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_nvlist(nvl, spa->spa_name, spa->spa_config) == 0); mutex_exit(&spa->spa_props_lock); if (nvlist_lookup_nvlist(nvl, spa->spa_name, &nvroot) == 0) spa_config_clean(nvroot); } error = spa_config_write(dp, nvl); if (error != 0) ccw_failure = B_TRUE; nvlist_free(nvl); } if (ccw_failure) { /* * Keep trying so that configuration data is * written if/when any temporary filesystem * resource issues are resolved. */ if (target->spa_ccw_fail_time == 0) { zfs_ereport_post(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE, target, NULL, NULL, 0, 0); } target->spa_ccw_fail_time = gethrtime(); spa_async_request(target, SPA_ASYNC_CONFIG_UPDATE); } else { /* * Do not rate limit future attempts to update * the config cache. */ target->spa_ccw_fail_time = 0; } /* * Remove any config entries older than the current one. */ dp = list_head(&target->spa_config_list); while ((tdp = list_next(&target->spa_config_list, dp)) != NULL) { list_remove(&target->spa_config_list, tdp); if (tdp->scd_path != NULL) spa_strfree(tdp->scd_path); kmem_free(tdp, sizeof (spa_config_dirent_t)); } spa_config_generation++; if (postsysevent) spa_event_notify(target, NULL, ESC_ZFS_CONFIG_SYNC); }
/* * Synchronize pool configuration to disk. This must be called with the * namespace lock held. Synchronizing the pool cache is typically done after * the configuration has been synced to the MOS. This exposes a window where * the MOS config will have been updated but the cache file has not. If * the system were to crash at that instant then the cached config may not * contain the correct information to open the pool and an explicity import * would be required. */ void spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent) { spa_config_dirent_t *dp, *tdp; nvlist_t *nvl; char *pool_name; ASSERT(MUTEX_HELD(&spa_namespace_lock)); if (rootdir == NULL || !(spa_mode_global & FWRITE)) return; /* * Iterate over all cachefiles for the pool, past or present. When the * cachefile is changed, the new one is pushed onto this list, allowing * us to update previous cachefiles that no longer contain this pool. */ for (dp = list_head(&target->spa_config_list); dp != NULL; dp = list_next(&target->spa_config_list, dp)) { spa_t *spa = NULL; if (dp->scd_path == NULL) continue; /* * Iterate over all pools, adding any matching pools to 'nvl'. */ nvl = NULL; while ((spa = spa_next(spa)) != NULL) { /* * Skip over our own pool if we're about to remove * ourselves from the spa namespace or any pool that * is readonly. Since we cannot guarantee that a * readonly pool would successfully import upon reboot, * we don't allow them to be written to the cache file. */ if ((spa == target && removing) || !spa_writeable(spa)) continue; mutex_enter(&spa->spa_props_lock); tdp = list_head(&spa->spa_config_list); if (spa->spa_config == NULL || tdp->scd_path == NULL || strcmp(tdp->scd_path, dp->scd_path) != 0) { mutex_exit(&spa->spa_props_lock); continue; } if (nvl == NULL) VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); if (spa->spa_import_flags & ZFS_IMPORT_TEMP_NAME) { VERIFY0(nvlist_lookup_string(spa->spa_config, ZPOOL_CONFIG_POOL_NAME, &pool_name)); } else pool_name = spa_name(spa); VERIFY(nvlist_add_nvlist(nvl, pool_name, spa->spa_config) == 0); mutex_exit(&spa->spa_props_lock); } spa_config_write(dp, nvl); nvlist_free(nvl); } /* * Remove any config entries older than the current one. */ dp = list_head(&target->spa_config_list); while ((tdp = list_next(&target->spa_config_list, dp)) != NULL) { list_remove(&target->spa_config_list, tdp); if (tdp->scd_path != NULL) spa_strfree(tdp->scd_path); kmem_free(tdp, sizeof (spa_config_dirent_t)); } spa_config_generation++; if (postsysevent) spa_event_notify(target, NULL, FM_EREPORT_ZFS_CONFIG_SYNC); }
/* * Synchronize pool configuration to disk. This must be called with the * namespace lock held. */ void spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent) { spa_config_dirent_t *dp, *tdp; nvlist_t *nvl; ASSERT(MUTEX_HELD(&spa_namespace_lock)); if (rootdir == NULL || !(spa_mode_global & FWRITE)) return; /* * Iterate over all cachefiles for the pool, past or present. When the * cachefile is changed, the new one is pushed onto this list, allowing * us to update previous cachefiles that no longer contain this pool. */ for (dp = list_head(&target->spa_config_list); dp != NULL; dp = list_next(&target->spa_config_list, dp)) { spa_t *spa = NULL; if (dp->scd_path == NULL) continue; /* * Iterate over all pools, adding any matching pools to 'nvl'. */ nvl = NULL; while ((spa = spa_next(spa)) != NULL) { if (spa == target && removing) continue; mutex_enter(&spa->spa_props_lock); tdp = list_head(&spa->spa_config_list); if (spa->spa_config == NULL || tdp->scd_path == NULL || strcmp(tdp->scd_path, dp->scd_path) != 0) { mutex_exit(&spa->spa_props_lock); continue; } if (nvl == NULL) VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_nvlist(nvl, spa->spa_name, spa->spa_config) == 0); mutex_exit(&spa->spa_props_lock); } spa_config_write(dp, nvl); nvlist_free(nvl); } /* * Remove any config entries older than the current one. */ dp = list_head(&target->spa_config_list); while ((tdp = list_next(&target->spa_config_list, dp)) != NULL) { list_remove(&target->spa_config_list, tdp); if (tdp->scd_path != NULL) spa_strfree(tdp->scd_path); kmem_free(tdp, sizeof (spa_config_dirent_t)); } spa_config_generation++; if (postsysevent) spa_event_notify(target, NULL, FM_EREPORT_ZFS_CONFIG_SYNC); }
/* * Checks whether allocated space on a special device * crossed either high or low watermarks. */ static void spa_check_watermarks(spa_t *spa) { metaslab_class_t *mc; uint64_t aspace, lspace; vdev_t *vd = NULL; if (!spa_has_special(spa)) return; if (spa->spa_lowat == 0 && spa->spa_hiwat == 0) return; mc = spa_special_class(spa); vd = mc->mc_rotor->mg_vd; aspace = metaslab_class_get_alloc(mc); spa->spa_lwm_space = spa_special_space_perc(spa, spa->spa_lowat); spa->spa_hwm_space = spa_special_space_perc(spa, spa->spa_hiwat); spa->spa_wrc_wm_range = spa->spa_hwm_space - spa->spa_lwm_space; if (aspace <= spa->spa_lwm_space) { if (spa->spa_watermark != SPA_WM_NONE) { spa->spa_watermark = SPA_WM_NONE; spa_event_notify(spa, vd, ESC_ZFS_NONE_WATERMARK); } spa_enable_special(spa, B_TRUE); } else if (aspace > spa->spa_hwm_space) { if (spa->spa_watermark != SPA_WM_HIGH) { spa->spa_watermark = SPA_WM_HIGH; spa_enable_special(spa, B_FALSE); spa_event_notify(spa, vd, ESC_ZFS_HIGH_WATERMARK); } } else { boolean_t wrc_route_init = B_FALSE; if (spa->spa_watermark != SPA_WM_LOW) { wrc_route_init = B_TRUE; if (spa->spa_watermark == SPA_WM_NONE) spa_enable_special(spa, B_TRUE); spa->spa_watermark = SPA_WM_LOW; spa_event_notify(spa, vd, ESC_ZFS_LOW_WATERMARK); } if (spa->spa_wrc.wrc_thread != NULL) { /* * Unlike Meta device, write cache is enabled, when * we change from SPA_WM_HIGH to SPA_WM_LOW and then * enables the throttling logic. */ if (spa->spa_watermark == SPA_WM_HIGH) spa_enable_special(spa, B_TRUE); wrc_route_init = B_TRUE; lspace = aspace - spa->spa_lwm_space; if (spa->spa_wrc_wm_range) { spa->spa_wrc_perc = (uint8_t)(lspace * 100 / spa->spa_wrc_wm_range); } else { spa->spa_wrc_perc = 50; } wrc_route_set(spa, wrc_route_init); } } DTRACE_PROBE1(check_wm, spa_t *, spa); }
/* ARGSUSED */ static void dsl_pool_scrub_setup_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) { dsl_pool_t *dp = arg1; enum scrub_func *funcp = arg2; dmu_object_type_t ot = 0; boolean_t complete = B_FALSE; dsl_pool_scrub_cancel_sync(dp, &complete, cr, tx); ASSERT(dp->dp_scrub_func == SCRUB_FUNC_NONE); ASSERT(*funcp > SCRUB_FUNC_NONE); ASSERT(*funcp < SCRUB_FUNC_NUMFUNCS); dp->dp_scrub_min_txg = 0; dp->dp_scrub_max_txg = tx->tx_txg; dp->dp_scrub_ddt_class_max = zfs_scrub_ddt_class_max; if (*funcp == SCRUB_FUNC_CLEAN) { vdev_t *rvd = dp->dp_spa->spa_root_vdev; /* rewrite all disk labels */ vdev_config_dirty(rvd); if (vdev_resilver_needed(rvd, &dp->dp_scrub_min_txg, &dp->dp_scrub_max_txg)) { spa_event_notify(dp->dp_spa, NULL, ESC_ZFS_RESILVER_START); dp->dp_scrub_max_txg = MIN(dp->dp_scrub_max_txg, tx->tx_txg); } else { spa_event_notify(dp->dp_spa, NULL, ESC_ZFS_SCRUB_START); } /* zero out the scrub stats in all vdev_stat_t's */ vdev_scrub_stat_update(rvd, dp->dp_scrub_min_txg ? POOL_SCRUB_RESILVER : POOL_SCRUB_EVERYTHING, B_FALSE); /* * If this is an incremental scrub, limit the DDT scrub phase * to just the auto-ditto class (for correctness); the rest * of the scrub should go faster using top-down pruning. */ if (dp->dp_scrub_min_txg > TXG_INITIAL) dp->dp_scrub_ddt_class_max = DDT_CLASS_DITTO; dp->dp_spa->spa_scrub_started = B_TRUE; } /* back to the generic stuff */ if (dp->dp_blkstats == NULL) { dp->dp_blkstats = kmem_alloc(sizeof (zfs_all_blkstats_t), KM_SLEEP); } bzero(dp->dp_blkstats, sizeof (zfs_all_blkstats_t)); if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) ot = DMU_OT_ZAP_OTHER; dp->dp_scrub_func = *funcp; dp->dp_scrub_queue_obj = zap_create(dp->dp_meta_objset, ot ? ot : DMU_OT_SCRUB_QUEUE, DMU_OT_NONE, 0, tx); bzero(&dp->dp_scrub_bookmark, sizeof (zbookmark_t)); bzero(&dp->dp_scrub_ddt_bookmark, sizeof (ddt_bookmark_t)); dp->dp_scrub_restart = B_FALSE; dp->dp_spa->spa_scrub_errors = 0; VERIFY(0 == zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCRUB_FUNC, sizeof (uint32_t), 1, &dp->dp_scrub_func, tx)); VERIFY(0 == zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCRUB_QUEUE, sizeof (uint64_t), 1, &dp->dp_scrub_queue_obj, tx)); VERIFY(0 == zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCRUB_MIN_TXG, sizeof (uint64_t), 1, &dp->dp_scrub_min_txg, tx)); VERIFY(0 == zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCRUB_MAX_TXG, sizeof (uint64_t), 1, &dp->dp_scrub_max_txg, tx)); VERIFY(0 == zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCRUB_BOOKMARK, sizeof (uint64_t), sizeof (dp->dp_scrub_bookmark) / sizeof (uint64_t), &dp->dp_scrub_bookmark, tx)); VERIFY(0 == zap_update(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCRUB_DDT_BOOKMARK, sizeof (uint64_t), sizeof (dp->dp_scrub_ddt_bookmark) / sizeof (uint64_t), &dp->dp_scrub_ddt_bookmark, tx)); VERIFY(0 == zap_update(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCRUB_DDT_CLASS_MAX, sizeof (uint64_t), 1, &dp->dp_scrub_ddt_class_max, tx)); VERIFY(0 == zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCRUB_ERRORS, sizeof (uint64_t), 1, &dp->dp_spa->spa_scrub_errors, tx)); spa_history_internal_log(LOG_POOL_SCRUB, dp->dp_spa, tx, cr, "func=%u mintxg=%llu maxtxg=%llu", *funcp, dp->dp_scrub_min_txg, dp->dp_scrub_max_txg); }
/* ARGSUSED */ static void dsl_pool_scrub_cancel_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) { dsl_pool_t *dp = arg1; boolean_t *completep = arg2; if (dp->dp_scrub_func == SCRUB_FUNC_NONE) return; mutex_enter(&dp->dp_scrub_cancel_lock); if (dp->dp_scrub_restart) { dp->dp_scrub_restart = B_FALSE; *completep = B_FALSE; } /* XXX this is scrub-clean specific */ mutex_enter(&dp->dp_spa->spa_scrub_lock); while (dp->dp_spa->spa_scrub_inflight > 0) { cv_wait(&dp->dp_spa->spa_scrub_io_cv, &dp->dp_spa->spa_scrub_lock); } mutex_exit(&dp->dp_spa->spa_scrub_lock); dp->dp_spa->spa_scrub_active = B_FALSE; dp->dp_scrub_func = SCRUB_FUNC_NONE; VERIFY(0 == dmu_object_free(dp->dp_meta_objset, dp->dp_scrub_queue_obj, tx)); dp->dp_scrub_queue_obj = 0; bzero(&dp->dp_scrub_bookmark, sizeof (zbookmark_t)); bzero(&dp->dp_scrub_ddt_bookmark, sizeof (ddt_bookmark_t)); VERIFY(0 == zap_remove(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCRUB_QUEUE, tx)); VERIFY(0 == zap_remove(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCRUB_MIN_TXG, tx)); VERIFY(0 == zap_remove(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCRUB_MAX_TXG, tx)); VERIFY(0 == zap_remove(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCRUB_BOOKMARK, tx)); VERIFY(0 == zap_remove(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCRUB_FUNC, tx)); VERIFY(0 == zap_remove(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCRUB_ERRORS, tx)); (void) zap_remove(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCRUB_DDT_BOOKMARK, tx); (void) zap_remove(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCRUB_DDT_CLASS_MAX, tx); spa_history_internal_log(LOG_POOL_SCRUB_DONE, dp->dp_spa, tx, cr, "complete=%u", *completep); /* below is scrub-clean specific */ vdev_scrub_stat_update(dp->dp_spa->spa_root_vdev, POOL_SCRUB_NONE, *completep); /* * If the scrub/resilver completed, update all DTLs to reflect this. * Whether it succeeded or not, vacate all temporary scrub DTLs. */ vdev_dtl_reassess(dp->dp_spa->spa_root_vdev, tx->tx_txg, *completep ? dp->dp_scrub_max_txg : 0, B_TRUE); dp->dp_spa->spa_scrub_started = B_FALSE; if (*completep) spa_event_notify(dp->dp_spa, NULL, dp->dp_scrub_min_txg ? ESC_ZFS_RESILVER_FINISH : ESC_ZFS_SCRUB_FINISH); spa_errlog_rotate(dp->dp_spa); /* * We may have finished replacing a device. * Let the async thread assess this and handle the detach. */ spa_async_request(dp->dp_spa, SPA_ASYNC_RESILVER_DONE); dp->dp_scrub_min_txg = dp->dp_scrub_max_txg = 0; mutex_exit(&dp->dp_scrub_cancel_lock); }
/* ARGSUSED */ static void dsl_pool_scrub_setup_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) { dsl_pool_t *dp = arg1; enum scrub_func *funcp = arg2; dmu_object_type_t ot = 0; boolean_t complete = B_FALSE; dsl_pool_scrub_cancel_sync(dp, &complete, cr, tx); ASSERT(dp->dp_scrub_func == SCRUB_FUNC_NONE); ASSERT(*funcp > SCRUB_FUNC_NONE); ASSERT(*funcp < SCRUB_FUNC_NUMFUNCS); dp->dp_scrub_min_txg = 0; dp->dp_scrub_max_txg = tx->tx_txg; if (*funcp == SCRUB_FUNC_CLEAN) { vdev_t *rvd = dp->dp_spa->spa_root_vdev; /* rewrite all disk labels */ vdev_config_dirty(rvd); if (vdev_resilver_needed(rvd, &dp->dp_scrub_min_txg, &dp->dp_scrub_max_txg)) { spa_event_notify(dp->dp_spa, NULL, ESC_ZFS_RESILVER_START); dp->dp_scrub_max_txg = MIN(dp->dp_scrub_max_txg, tx->tx_txg); } /* zero out the scrub stats in all vdev_stat_t's */ vdev_scrub_stat_update(rvd, dp->dp_scrub_min_txg ? POOL_SCRUB_RESILVER : POOL_SCRUB_EVERYTHING, B_FALSE); dp->dp_spa->spa_scrub_started = B_TRUE; } /* back to the generic stuff */ if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) ot = DMU_OT_ZAP_OTHER; dp->dp_scrub_func = *funcp; dp->dp_scrub_queue_obj = zap_create(dp->dp_meta_objset, ot ? ot : DMU_OT_SCRUB_QUEUE, DMU_OT_NONE, 0, tx); bzero(&dp->dp_scrub_bookmark, sizeof (zbookmark_t)); dp->dp_scrub_restart = B_FALSE; dp->dp_spa->spa_scrub_errors = 0; VERIFY(0 == zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCRUB_FUNC, sizeof (uint32_t), 1, &dp->dp_scrub_func, tx)); VERIFY(0 == zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCRUB_QUEUE, sizeof (uint64_t), 1, &dp->dp_scrub_queue_obj, tx)); VERIFY(0 == zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCRUB_MIN_TXG, sizeof (uint64_t), 1, &dp->dp_scrub_min_txg, tx)); VERIFY(0 == zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCRUB_MAX_TXG, sizeof (uint64_t), 1, &dp->dp_scrub_max_txg, tx)); VERIFY(0 == zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCRUB_BOOKMARK, sizeof (uint64_t), 4, &dp->dp_scrub_bookmark, tx)); VERIFY(0 == zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SCRUB_ERRORS, sizeof (uint64_t), 1, &dp->dp_spa->spa_scrub_errors, tx)); spa_history_internal_log(LOG_POOL_SCRUB, dp->dp_spa, tx, cr, "func=%u mintxg=%llu maxtxg=%llu", *funcp, dp->dp_scrub_min_txg, dp->dp_scrub_max_txg); }
/* * Synchronize pool configuration to disk. This must be called with the * namespace lock held. */ void spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent) { spa_t *spa = NULL; spa_config_dirent_t *dp, *tdp; nvlist_t *nvl; ASSERT(MUTEX_HELD(&spa_namespace_lock)); /* * Iterate over all cachefiles for the pool, past or present. When the * cachefile is changed, the new one is pushed onto this list, allowing * us to update previous cachefiles that no longer contain this pool. */ for (dp = list_head(&target->spa_config_list); dp != NULL; dp = list_next(&target->spa_config_list, dp)) { spa = NULL; if (dp->scd_path == NULL) continue; /* * Iterate over all pools, adding any matching pools to 'nvl'. */ nvl = NULL; while ((spa = spa_next(spa)) != NULL) { if (spa->spa_config == NULL || spa->spa_name == NULL) continue; if (spa == target && removing) continue; #ifdef __APPLE__ /* OS X - Omit disk based pools */ if (vdev_contains_disks(spa->spa_root_vdev)) continue; #endif tdp = list_head(&spa->spa_config_list); ASSERT(tdp != NULL); if (tdp->scd_path == NULL || strcmp(tdp->scd_path, dp->scd_path) != 0) continue; if (nvl == NULL) VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_nvlist(nvl, spa->spa_name, spa->spa_config) == 0); } spa_config_write(dp, nvl); nvlist_free(nvl); } /* * Remove any config entries older than the current one. */ dp = list_head(&target->spa_config_list); while ((tdp = list_next(&target->spa_config_list, dp)) != NULL) { list_remove(&target->spa_config_list, tdp); if (tdp->scd_path != NULL) spa_strfree(tdp->scd_path); kmem_free(tdp, sizeof (spa_config_dirent_t)); } spa_config_generation++; if (postsysevent) spa_event_notify(target, NULL, ESC_ZFS_CONFIG_SYNC); }