/* ARGSUSED */ static int zpool_compare(const void *larg, const void *rarg, void *unused) { zpool_handle_t *l = ((zpool_node_t *)larg)->zn_handle; zpool_handle_t *r = ((zpool_node_t *)rarg)->zn_handle; const char *lname = zpool_get_name(l); const char *rname = zpool_get_name(r); return (strcmp(lname, rname)); }
static int zfsdle_vdev_online(zpool_handle_t *zhp, void *data) { char *devname = data; boolean_t avail_spare, l2cache; vdev_state_t newstate; nvlist_t *tgt; zed_log_msg(LOG_INFO, "zfsdle_vdev_online: searching for '%s' in '%s'", devname, zpool_get_name(zhp)); if ((tgt = zpool_find_vdev_by_physpath(zhp, devname, &avail_spare, &l2cache, NULL)) != NULL) { char *path, fullpath[MAXPATHLEN]; uint64_t wholedisk = 0ULL; verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH, &path) == 0); verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk) == 0); (void) strlcpy(fullpath, path, sizeof (fullpath)); if (wholedisk) { char *spath = zfs_strip_partition(fullpath); if (!spath) { zed_log_msg(LOG_INFO, "%s: Can't alloc", __func__); return (0); } (void) strlcpy(fullpath, spath, sizeof (fullpath)); free(spath); /* * We need to reopen the pool associated with this * device so that the kernel can update the size * of the expanded device. */ (void) zpool_reopen(zhp); } if (zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) { zed_log_msg(LOG_INFO, "zfsdle_vdev_online: setting " "device '%s' to ONLINE state in pool '%s'", fullpath, zpool_get_name(zhp)); if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) (void) zpool_vdev_online(zhp, fullpath, 0, &newstate); } zpool_close(zhp); return (1); } zpool_close(zhp); return (0); }
static int zfs_iter_pool(zpool_handle_t *zhp, void *data) { nvlist_t *config, *nvl; dev_data_t *dp = data; uint64_t pool_guid; unavailpool_t *pool; zed_log_msg(LOG_INFO, "zfs_iter_pool: evaluating vdevs on %s (by %s)", zpool_get_name(zhp), dp->dd_vdev_guid ? "GUID" : dp->dd_prop); /* * For each vdev in this pool, look for a match to apply dd_func */ if ((config = zpool_get_config(zhp, NULL)) != NULL) { if (dp->dd_pool_guid == 0 || (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid) == 0 && pool_guid == dp->dd_pool_guid)) { (void) nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl); zfs_iter_vdev(zhp, nvl, data); } } /* * if this pool was originally unavailable, * then enable its datasets asynchronously */ if (g_enumeration_done) { for (pool = list_head(&g_pool_list); pool != NULL; pool = list_next(&g_pool_list, pool)) { if (pool->uap_enable_tid != 0) continue; /* entry already processed */ if (strcmp(zpool_get_name(zhp), zpool_get_name(pool->uap_zhp))) continue; if (zfs_toplevel_state(zhp) >= VDEV_STATE_DEGRADED) { /* send to a background thread; keep on list */ (void) pthread_create(&pool->uap_enable_tid, NULL, zfs_enable_ds, pool); break; } } } zpool_close(zhp); return (dp->dd_found); /* cease iteration after a match */ }
static int zfsdle_vdev_online(zpool_handle_t *zhp, void *data) { char *devname = data; boolean_t avail_spare, l2cache; vdev_state_t newstate; nvlist_t *tgt; syseventd_print(9, "zfsdle_vdev_online: searching for %s in pool %s\n", devname, zpool_get_name(zhp)); if ((tgt = zpool_find_vdev_by_physpath(zhp, devname, &avail_spare, &l2cache, NULL)) != NULL) { char *path, fullpath[MAXPATHLEN]; uint64_t wholedisk = 0ULL; verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH, &path) == 0); verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk) == 0); (void) strlcpy(fullpath, path, sizeof (fullpath)); if (wholedisk) { fullpath[strlen(fullpath) - 2] = '\0'; /* * We need to reopen the pool associated with this * device so that the kernel can update the size * of the expanded device. */ (void) zpool_reopen(zhp); } if (zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) { syseventd_print(9, "zfsdle_vdev_online: setting device" " device %s to ONLINE state in pool %s.\n", fullpath, zpool_get_name(zhp)); if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) (void) zpool_vdev_online(zhp, fullpath, 0, &newstate); } zpool_close(zhp); return (1); } zpool_close(zhp); return (0); }
static zpool_handle_t *zpool_find_handle(zfs_handle_t *zhp, const char *pool_name, int len) { libzfs_handle_t *hdl = zhp->zfs_hdl; zpool_handle_t *zph = hdl->libzfs_pool_handles; while ((zph != NULL) && (strncmp(pool_name, zpool_get_name(zph), len) != 0)) zph = zph->zpool_next; return (zph); }
int zpool_disable_volumes(zfs_handle_t *nzhp, void *data) { // Same pool? if (nzhp && nzhp->zpool_hdl && zpool_get_name(nzhp->zpool_hdl) && data && !strcmp(zpool_get_name(nzhp->zpool_hdl), (char *)data)) { if (zfs_get_type(nzhp) == ZFS_TYPE_VOLUME) { char *volume = NULL; /* * /var/run/zfs/zvol/dsk/$POOL/$volume */ volume = zfs_asprintf(nzhp->zfs_hdl, "%s/zfs/zvol/dsk/%s", ZVOL_ROOT, zfs_get_name(nzhp)); if (volume) { /* Unfortunately, diskutil does not handle our * symlink to /dev/diskX - so we need to * readlink() to find the path */ char dstlnk[MAXPATHLEN]; int ret; ret = readlink(volume, dstlnk, sizeof(dstlnk)); if (ret > 0) { printf("Attempting to eject volume " "'%s'\n", zfs_get_name(nzhp)); dstlnk[ret] = '\0'; do_unmount_volume(dstlnk, 0); } free(volume); } } } (void) zfs_iter_children(nzhp, zpool_disable_volumes, data); zfs_close(nzhp); return (0); }
static int zfs_iter_pool(zpool_handle_t *zhp, void *data) { nvlist_t *config, *nvl; dev_data_t *dp = data; uint64_t pool_guid; unavailpool_t *pool; if ((config = zpool_get_config(zhp, NULL)) != NULL) { if (dp->dd_pool_guid == 0 || (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid) == 0 && pool_guid == dp->dd_pool_guid)) { (void) nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl); zfs_iter_vdev(zhp, nvl, data); } } if (g_enumeration_done) { for (pool = list_head(&g_pool_list); pool != NULL; pool = list_next(&g_pool_list, pool)) { if (strcmp(zpool_get_name(zhp), zpool_get_name(pool->uap_zhp))) continue; if (zfs_toplevel_state(zhp) >= VDEV_STATE_DEGRADED) { list_remove(&g_pool_list, pool); (void) tpool_dispatch(g_tpool, zfs_enable_ds, pool); break; } } } zpool_close(zhp); return (0); }
static int zfs_unavail_pool(zpool_handle_t *zhp, void *data) { zed_log_msg(LOG_INFO, "zfs_unavail_pool: examining '%s' (state %d)", zpool_get_name(zhp), (int)zfs_toplevel_state(zhp)); if (zfs_toplevel_state(zhp) < VDEV_STATE_DEGRADED) { unavailpool_t *uap; uap = malloc(sizeof (unavailpool_t)); uap->uap_zhp = zhp; list_insert_tail((list_t *)data, uap); } else { zpool_close(zhp); } return (0); }
int zpool_get_stats(zpool_handle_t * zhp, void * data) { config_t * cnf = (config_t *)data; uint_t c; boolean_t missing; nvlist_t * nv, * config; vdev_stat_t * vs; if (zpool_refresh_stats(zhp, &missing) != 0) return 1; config = zpool_get_config(zhp, NULL); if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nv) != 0) { return 1; } if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c) != 0) { return 1; } if (!strcmp(zpool_get_name(zhp), cnf->zname)) { cnf->zpool.read_ops = vs->vs_ops[ZIO_TYPE_READ]; cnf->zpool.write_ops = vs->vs_ops[ZIO_TYPE_WRITE]; cnf->zpool.read_bts = vs->vs_bytes[ZIO_TYPE_READ]; cnf->zpool.write_bts = vs->vs_bytes[ZIO_TYPE_WRITE]; cnf->zpool.alloc = vs->vs_alloc; cnf->zpool.free = vs->vs_space - vs->vs_alloc; cnf->zpool.health = zpool_get_health(zhp); cnf->zpool.dedupratio = zpool_get_dedupratio(zhp); cnf->zpool.name = zpool_get_poolname(zhp); cnf->zpool.ddt_memory = get_dedup_stats(config); } zpool_close(zhp); return 0; }
ssize_t fmd_fmri_nvl2str(nvlist_t *nvl, char *buf, size_t buflen) { uint64_t pool_guid, vdev_guid; cbdata_t cb; ssize_t len; const char *name; char guidbuf[64]; (void) nvlist_lookup_uint64(nvl, FM_FMRI_ZFS_POOL, &pool_guid); /* * Attempt to convert the pool guid to a name. */ cb.cb_guid = pool_guid; cb.cb_pool = NULL; if (zpool_iter(g_zfs, find_pool, &cb) == 1) { name = zpool_get_name(cb.cb_pool); } else { (void) snprintf(guidbuf, sizeof (guidbuf), "%llx", pool_guid); name = guidbuf; } if (nvlist_lookup_uint64(nvl, FM_FMRI_ZFS_VDEV, &vdev_guid) == 0) len = snprintf(buf, buflen, "%s://pool=%s/vdev=%llx", FM_FMRI_SCHEME_ZFS, name, vdev_guid); else len = snprintf(buf, buflen, "%s://pool=%s", FM_FMRI_SCHEME_ZFS, name); if (cb.cb_pool) zpool_close(cb.cb_pool); return (len); }
/* * Determines if the pool is in use. If so, it returns true and the state of * the pool as well as the name of the pool. Both strings are allocated and * must be freed by the caller. */ int zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr, boolean_t *inuse) { nvlist_t *config; char *name; boolean_t ret; uint64_t guid, vdev_guid; zpool_handle_t *zhp; nvlist_t *pool_config; uint64_t stateval, isspare; aux_cbdata_t cb = { 0 }; boolean_t isactive; *inuse = B_FALSE; if (zpool_read_label(fd, &config, NULL) != 0 && errno == ENOMEM) { (void) no_memory(hdl); return (-1); } if (config == NULL) return (0); verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, &stateval) == 0); verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0); if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) { verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, &name) == 0); verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) == 0); } switch (stateval) { case POOL_STATE_EXPORTED: /* * A pool with an exported state may in fact be imported * read-only, so check the in-core state to see if it's * active and imported read-only. If it is, set * its state to active. */ if (pool_active(hdl, name, guid, &isactive) == 0 && isactive && (zhp = zpool_open_canfail(hdl, name)) != NULL) { if (zpool_get_prop_int(zhp, ZPOOL_PROP_READONLY, NULL)) stateval = POOL_STATE_ACTIVE; /* * All we needed the zpool handle for is the * readonly prop check. */ zpool_close(zhp); } ret = B_TRUE; break; case POOL_STATE_ACTIVE: /* * For an active pool, we have to determine if it's really part * of a currently active pool (in which case the pool will exist * and the guid will be the same), or whether it's part of an * active pool that was disconnected without being explicitly * exported. */ if (pool_active(hdl, name, guid, &isactive) != 0) { nvlist_free(config); return (-1); } if (isactive) { /* * Because the device may have been removed while * offlined, we only report it as active if the vdev is * still present in the config. Otherwise, pretend like * it's not in use. */ if ((zhp = zpool_open_canfail(hdl, name)) != NULL && (pool_config = zpool_get_config(zhp, NULL)) != NULL) { nvlist_t *nvroot; verify(nvlist_lookup_nvlist(pool_config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); ret = find_guid(nvroot, vdev_guid); } else { ret = B_FALSE; } /* * If this is an active spare within another pool, we * treat it like an unused hot spare. This allows the * user to create a pool with a hot spare that currently * in use within another pool. Since we return B_TRUE, * libdiskmgt will continue to prevent generic consumers * from using the device. */ if (ret && nvlist_lookup_uint64(config, ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare) stateval = POOL_STATE_SPARE; if (zhp != NULL) zpool_close(zhp); } else { stateval = POOL_STATE_POTENTIALLY_ACTIVE; ret = B_TRUE; } break; case POOL_STATE_SPARE: /* * For a hot spare, it can be either definitively in use, or * potentially active. To determine if it's in use, we iterate * over all pools in the system and search for one with a spare * with a matching guid. * * Due to the shared nature of spares, we don't actually report * the potentially active case as in use. This means the user * can freely create pools on the hot spares of exported pools, * but to do otherwise makes the resulting code complicated, and * we end up having to deal with this case anyway. */ cb.cb_zhp = NULL; cb.cb_guid = vdev_guid; cb.cb_type = ZPOOL_CONFIG_SPARES; if (zpool_iter(hdl, find_aux, &cb) == 1) { name = (char *)zpool_get_name(cb.cb_zhp); ret = B_TRUE; } else { ret = B_FALSE; } break; case POOL_STATE_L2CACHE: /* * Check if any pool is currently using this l2cache device. */ cb.cb_zhp = NULL; cb.cb_guid = vdev_guid; cb.cb_type = ZPOOL_CONFIG_L2CACHE; if (zpool_iter(hdl, find_aux, &cb) == 1) { name = (char *)zpool_get_name(cb.cb_zhp); ret = B_TRUE; } else { ret = B_FALSE; } break; default: ret = B_FALSE; } if (ret) { if ((*namestr = zfs_strdup(hdl, name)) == NULL) { if (cb.cb_zhp) zpool_close(cb.cb_zhp); nvlist_free(config); return (-1); } *state = (pool_state_t)stateval; } if (cb.cb_zhp) zpool_close(cb.cb_zhp); nvlist_free(config); *inuse = ret; return (0); }
/* * The device associated with the given vdev (either by devid or physical path) * has been added to the system. If 'isdisk' is set, then we only attempt a * replacement if it's a whole disk. This also implies that we should label the * disk first. * * First, we attempt to online the device (making sure to undo any spare * operation when finished). If this succeeds, then we're done. If it fails, * and the new state is VDEV_CANT_OPEN, it indicates that the device was opened, * but that the label was not what we expected. If the 'autoreplace' property * is not set, then we relabel the disk (if specified), and attempt a 'zpool * replace'. If the online is successful, but the new state is something else * (REMOVED or FAULTED), it indicates that we're out of sync or in some sort of * race, and we should avoid attempting to relabel the disk. * * Also can arrive here from a ESC_ZFS_VDEV_CHECK event */ static void zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) { char *path; vdev_state_t newstate; nvlist_t *nvroot, *newvd; pendingdev_t *device; uint64_t wholedisk = 0ULL; uint64_t offline = 0ULL; uint64_t guid = 0ULL; char *physpath = NULL, *new_devid = NULL; char rawpath[PATH_MAX], fullpath[PATH_MAX]; char devpath[PATH_MAX]; int ret; if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0) return; (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &guid); if (offline) return; /* don't intervene if it was taken offline */ zed_log_msg(LOG_INFO, "zfs_process_add: pool '%s' vdev '%s' (%llu)", zpool_get_name(zhp), path, (long long unsigned int)guid); /* * The VDEV guid is preferred for identification (gets passed in path) */ if (guid != 0) { (void) snprintf(fullpath, sizeof (fullpath), "%llu", (long long unsigned int)guid); } else { /* * otherwise use path sans partition suffix for whole disks */ (void) strlcpy(fullpath, path, sizeof (fullpath)); if (wholedisk) { char *spath = zfs_strip_partition(g_zfshdl, fullpath); (void) strlcpy(fullpath, spath, sizeof (fullpath)); free(spath); } } /* * Attempt to online the device. */ if (zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &newstate) == 0 && (newstate == VDEV_STATE_HEALTHY || newstate == VDEV_STATE_DEGRADED)) { zed_log_msg(LOG_INFO, " zpool_vdev_online: vdev %s is %s", fullpath, (newstate == VDEV_STATE_HEALTHY) ? "HEALTHY" : "DEGRADED"); return; } /* * If the pool doesn't have the autoreplace property set, then attempt * a true online (without the unspare flag), which will trigger a FMA * fault. */ if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) || !wholedisk || physpath == NULL) { (void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT, &newstate); zed_log_msg(LOG_INFO, " zpool_vdev_online: %s FORCEFAULT (%s)", fullpath, libzfs_error_description(g_zfshdl)); return; } /* * convert physical path into its current device node */ (void) snprintf(rawpath, sizeof (rawpath), "%s%s", DEV_BYPATH_PATH, physpath); if (realpath(rawpath, devpath) == NULL) { zed_log_msg(LOG_INFO, " realpath: %s failed (%s)", rawpath, strerror(errno)); (void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT, &newstate); zed_log_msg(LOG_INFO, " zpool_vdev_online: %s FORCEFAULT (%s)", fullpath, libzfs_error_description(g_zfshdl)); return; } /* * we're auto-replacing a raw disk, so label it first */ if (!labeled) { char *leafname; /* * If this is a request to label a whole disk, then attempt to * write out the label. Before we can label the disk, we need * to map the physical string that was matched on to the under * lying device node. * * If any part of this process fails, then do a force online * to trigger a ZFS fault for the device (and any hot spare * replacement). */ leafname = strrchr(devpath, '/') + 1; /* * If this is a request to label a whole disk, then attempt to * write out the label. */ if (zpool_label_disk(g_zfshdl, zhp, leafname) != 0) { zed_log_msg(LOG_INFO, " zpool_label_disk: could not " "label '%s' (%s)", leafname, libzfs_error_description(g_zfshdl)); (void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT, &newstate); return; } /* * The disk labeling is asynchronous on Linux. Just record * this label request and return as there will be another * disk add event for the partition after the labeling is * completed. */ device = malloc(sizeof (pendingdev_t)); (void) strlcpy(device->pd_physpath, physpath, sizeof (device->pd_physpath)); list_insert_tail(&g_device_list, device); zed_log_msg(LOG_INFO, " zpool_label_disk: async '%s' (%llu)", leafname, (long long unsigned int)guid); return; /* resumes at EC_DEV_ADD.ESC_DISK for partition */ } else { /* labeled */ boolean_t found = B_FALSE; /* * match up with request above to label the disk */ for (device = list_head(&g_device_list); device != NULL; device = list_next(&g_device_list, device)) { if (strcmp(physpath, device->pd_physpath) == 0) { list_remove(&g_device_list, device); free(device); found = B_TRUE; break; } } if (!found) { /* unexpected partition slice encountered */ (void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT, &newstate); return; } zed_log_msg(LOG_INFO, " zpool_label_disk: resume '%s' (%llu)", physpath, (long long unsigned int)guid); if (nvlist_lookup_string(vdev, "new_devid", &new_devid) != 0) { zed_log_msg(LOG_INFO, " auto replace: missing devid!"); return; } (void) snprintf(devpath, sizeof (devpath), "%s%s", DEV_BYID_PATH, new_devid); path = devpath; } /* * Construct the root vdev to pass to zpool_vdev_attach(). While adding * the entire vdev structure is harmless, we construct a reduced set of * path/physpath/wholedisk to keep it simple. */ if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0) { zed_log_msg(LOG_WARNING, "zfs_mod: nvlist_alloc out of memory"); return; } if (nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) { zed_log_msg(LOG_WARNING, "zfs_mod: nvlist_alloc out of memory"); nvlist_free(nvroot); return; } if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK) != 0 || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 || nvlist_add_string(newvd, ZPOOL_CONFIG_DEVID, new_devid) != 0 || (physpath != NULL && nvlist_add_string(newvd, ZPOOL_CONFIG_PHYS_PATH, physpath) != 0) || nvlist_add_uint64(newvd, ZPOOL_CONFIG_WHOLE_DISK, wholedisk) != 0 || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &newvd, 1) != 0) { zed_log_msg(LOG_WARNING, "zfs_mod: unable to add nvlist pairs"); nvlist_free(newvd); nvlist_free(nvroot); return; } nvlist_free(newvd); /* * auto replace a leaf disk at same physical location */ ret = zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE); zed_log_msg(LOG_INFO, " zpool_vdev_replace: %s with %s (%s)", fullpath, path, (ret == 0) ? "no errors" : libzfs_error_description(g_zfshdl)); nvlist_free(nvroot); }
/* * Function: be_get_list_callback * Description: Callback function used by zfs_iter to look through all * the pools on the system looking for BEs. If a BE name was * specified only that BE's information will be collected and * returned. * Parameters: * zlp - handle to the first zfs dataset. (provided by the * zfs_iter_* call) * data - pointer to the callback data and where we'll pass * the BE information back. * Returns: * 0 - Success * be_errno_t - Failure * Scope: * Private */ static int be_get_list_callback(zpool_handle_t *zlp, void *data) { list_callback_data_t *cb = (list_callback_data_t *)data; char be_ds[MAXPATHLEN]; char *open_ds = NULL; char *rpool = NULL; zfs_handle_t *zhp = NULL; int ret = 0; cb->zpool_name = rpool = (char *)zpool_get_name(zlp); /* * Generate string for the BE container dataset */ be_make_container_ds(rpool, be_container_ds, sizeof (be_container_ds)); /* * If a BE name was specified we use it's root dataset in place of * the container dataset. This is because we only want to collect * the information for the specified BE. */ if (cb->be_name != NULL) { if (!be_valid_be_name(cb->be_name)) return (BE_ERR_INVAL); /* * Generate string for the BE root dataset */ be_make_root_ds(rpool, cb->be_name, be_ds, sizeof (be_ds)); open_ds = be_ds; } else { open_ds = be_container_ds; } /* * Check if the dataset exists */ if (!zfs_dataset_exists(g_zfs, open_ds, ZFS_TYPE_FILESYSTEM)) { /* * The specified dataset does not exist in this pool or * there are no valid BE's in this pool. Try the next zpool. */ zpool_close(zlp); return (0); } if ((zhp = zfs_open(g_zfs, open_ds, ZFS_TYPE_FILESYSTEM)) == NULL) { be_print_err(gettext("be_get_list_callback: failed to open " "the BE dataset %s: %s\n"), open_ds, libzfs_error_description(g_zfs)); ret = zfs_err_to_be_err(g_zfs); zpool_close(zlp); return (ret); } /* * If a BE name was specified we iterate through the datasets * and snapshots for this BE only. Otherwise we will iterate * through the next level of datasets to find all the BE's * within the pool */ if (cb->be_name != NULL) { if (cb->be_nodes_head == NULL) { if ((cb->be_nodes_head = be_list_alloc(&ret, sizeof (be_node_list_t))) == NULL) { ZFS_CLOSE(zhp); zpool_close(zlp); return (ret); } cb->be_nodes = cb->be_nodes_head; } if ((ret = be_get_node_data(zhp, cb->be_nodes, cb->be_name, rpool, cb->current_be, be_ds)) != BE_SUCCESS) { ZFS_CLOSE(zhp); zpool_close(zlp); return (ret); } ret = zfs_iter_snapshots(zhp, be_add_children_callback, cb); } if (ret == 0) ret = zfs_iter_filesystems(zhp, be_add_children_callback, cb); ZFS_CLOSE(zhp); zpool_close(zlp); return (ret); }
/* * Unshare and unmount all datasets within the given pool. We don't want to * rely on traversing the DSL to discover the filesystems within the pool, * because this may be expensive (if not all of them are mounted), and can fail * arbitrarily (on I/O error, for example). Instead, we walk /etc/mtab and * gather all the filesystems that are currently mounted. */ int zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force) { int used, alloc; struct mnttab entry; size_t namelen; char **mountpoints = NULL; zfs_handle_t **datasets = NULL; libzfs_handle_t *hdl = zhp->zpool_hdl; int i; int ret = -1; int flags = (force ? MS_FORCE : 0); namelen = strlen(zhp->zpool_name); /* Reopen MNTTAB to prevent reading stale data from open file */ if (freopen(MNTTAB, "r", hdl->libzfs_mnttab) == NULL) return (ENOENT); used = alloc = 0; while (getmntent(hdl->libzfs_mnttab, &entry) == 0) { /* * Ignore filesystems not within this pool. */ if (entry.mnt_fstype == NULL || strncmp(entry.mnt_special, zhp->zpool_name, namelen) != 0 || (entry.mnt_special[namelen] != '/' && #ifdef __APPLE__ /* * On OS X, '@' is possible too since we're temporarily * allowing manual snapshot mounting. */ entry.mnt_special[namelen] != '@' && #endif /* __APPLE__ */ entry.mnt_special[namelen] != '\0')) continue; /* * At this point we've found a filesystem within our pool. Add * it to our growing list. */ if (used == alloc) { if (alloc == 0) { if ((mountpoints = zfs_alloc(hdl, 8 * sizeof (void *))) == NULL) goto out; if ((datasets = zfs_alloc(hdl, 8 * sizeof (void *))) == NULL) goto out; alloc = 8; } else { void *ptr; if ((ptr = zfs_realloc(hdl, mountpoints, alloc * sizeof (void *), alloc * 2 * sizeof (void *))) == NULL) goto out; mountpoints = ptr; if ((ptr = zfs_realloc(hdl, datasets, alloc * sizeof (void *), alloc * 2 * sizeof (void *))) == NULL) goto out; datasets = ptr; alloc *= 2; } } if ((mountpoints[used] = zfs_strdup(hdl, entry.mnt_mountp)) == NULL) goto out; /* * This is allowed to fail, in case there is some I/O error. It * is only used to determine if we need to remove the underlying * mountpoint, so failure is not fatal. */ datasets[used] = make_dataset_handle(hdl, entry.mnt_special); used++; } /* * At this point, we have the entire list of filesystems, so sort it by * mountpoint. */ qsort(mountpoints, used, sizeof (char *), mountpoint_compare); /* * Walk through and first unshare everything. */ for (i = 0; i < used; i++) { zfs_share_proto_t *curr_proto; for (curr_proto = share_all_proto; *curr_proto != PROTO_END; curr_proto++) { if (is_shared(hdl, mountpoints[i], *curr_proto) && unshare_one(hdl, mountpoints[i], mountpoints[i], *curr_proto) != 0) goto out; } } /* * Now unmount everything, removing the underlying directories as * appropriate. */ for (i = 0; i < used; i++) { if (unmount_one(hdl, mountpoints[i], flags) != 0) goto out; } for (i = 0; i < used; i++) { if (datasets[i]) remove_mountpoint(datasets[i]); } // Surely there exists a better way to iterate a POOL to find its ZVOLs? zfs_iter_root(hdl, zpool_disable_volumes, (void *) zpool_get_name(zhp)); ret = 0; out: for (i = 0; i < used; i++) { if (datasets[i]) zfs_close(datasets[i]); free(mountpoints[i]); } free(datasets); free(mountpoints); return (ret); }
/* * The device associated with the given vdev (either by devid or physical path) * has been added to the system. If 'isdisk' is set, then we only attempt a * replacement if it's a whole disk. This also implies that we should label the * disk first. * * First, we attempt to online the device (making sure to undo any spare * operation when finished). If this succeeds, then we're done. If it fails, * and the new state is VDEV_CANT_OPEN, it indicates that the device was opened, * but that the label was not what we expected. If the 'autoreplace' property * is enabled, then we relabel the disk (if specified), and attempt a 'zpool * replace'. If the online is successful, but the new state is something else * (REMOVED or FAULTED), it indicates that we're out of sync or in some sort of * race, and we should avoid attempting to relabel the disk. * * Also can arrive here from a ESC_ZFS_VDEV_CHECK event */ static void zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled) { char *path; vdev_state_t newstate; nvlist_t *nvroot, *newvd; pendingdev_t *device; uint64_t wholedisk = 0ULL; uint64_t offline = 0ULL; uint64_t guid = 0ULL; char *physpath = NULL, *new_devid = NULL, *enc_sysfs_path = NULL; char rawpath[PATH_MAX], fullpath[PATH_MAX]; char devpath[PATH_MAX]; int ret; int is_dm = 0; int is_sd = 0; uint_t c; vdev_stat_t *vs; if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0) return; /* Skip healthy disks */ verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &c) == 0); if (vs->vs_state == VDEV_STATE_HEALTHY) { zed_log_msg(LOG_INFO, "%s: %s is already healthy, skip it.", __func__, path); return; } (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath); (void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, &enc_sysfs_path); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline); (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &guid); if (offline) return; /* don't intervene if it was taken offline */ is_dm = zfs_dev_is_dm(path); zed_log_msg(LOG_INFO, "zfs_process_add: pool '%s' vdev '%s', phys '%s'" " wholedisk %d, dm %d (%llu)", zpool_get_name(zhp), path, physpath ? physpath : "NULL", wholedisk, is_dm, (long long unsigned int)guid); /* * The VDEV guid is preferred for identification (gets passed in path) */ if (guid != 0) { (void) snprintf(fullpath, sizeof (fullpath), "%llu", (long long unsigned int)guid); } else { /* * otherwise use path sans partition suffix for whole disks */ (void) strlcpy(fullpath, path, sizeof (fullpath)); if (wholedisk) { char *spath = zfs_strip_partition(fullpath); if (!spath) { zed_log_msg(LOG_INFO, "%s: Can't alloc", __func__); return; } (void) strlcpy(fullpath, spath, sizeof (fullpath)); free(spath); } } /* * Attempt to online the device. */ if (zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &newstate) == 0 && (newstate == VDEV_STATE_HEALTHY || newstate == VDEV_STATE_DEGRADED)) { zed_log_msg(LOG_INFO, " zpool_vdev_online: vdev %s is %s", fullpath, (newstate == VDEV_STATE_HEALTHY) ? "HEALTHY" : "DEGRADED"); return; } /* * vdev_id alias rule for using scsi_debug devices (FMA automated * testing) */ if (physpath != NULL && strcmp("scsidebug", physpath) == 0) is_sd = 1; /* * If the pool doesn't have the autoreplace property set, then use * vdev online to trigger a FMA fault by posting an ereport. */ if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) || !(wholedisk || is_dm) || (physpath == NULL)) { (void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT, &newstate); zed_log_msg(LOG_INFO, "Pool's autoreplace is not enabled or " "not a whole disk for '%s'", fullpath); return; } /* * Convert physical path into its current device node. Rawpath * needs to be /dev/disk/by-vdev for a scsi_debug device since * /dev/disk/by-path will not be present. */ (void) snprintf(rawpath, sizeof (rawpath), "%s%s", is_sd ? DEV_BYVDEV_PATH : DEV_BYPATH_PATH, physpath); if (realpath(rawpath, devpath) == NULL && !is_dm) { zed_log_msg(LOG_INFO, " realpath: %s failed (%s)", rawpath, strerror(errno)); (void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT, &newstate); zed_log_msg(LOG_INFO, " zpool_vdev_online: %s FORCEFAULT (%s)", fullpath, libzfs_error_description(g_zfshdl)); return; } /* Only autoreplace bad disks */ if ((vs->vs_state != VDEV_STATE_DEGRADED) && (vs->vs_state != VDEV_STATE_FAULTED) && (vs->vs_state != VDEV_STATE_CANT_OPEN)) { return; } nvlist_lookup_string(vdev, "new_devid", &new_devid); if (is_dm) { /* Don't label device mapper or multipath disks. */ } else if (!labeled) { /* * we're auto-replacing a raw disk, so label it first */ char *leafname; /* * If this is a request to label a whole disk, then attempt to * write out the label. Before we can label the disk, we need * to map the physical string that was matched on to the under * lying device node. * * If any part of this process fails, then do a force online * to trigger a ZFS fault for the device (and any hot spare * replacement). */ leafname = strrchr(devpath, '/') + 1; /* * If this is a request to label a whole disk, then attempt to * write out the label. */ if (zpool_label_disk(g_zfshdl, zhp, leafname) != 0) { zed_log_msg(LOG_INFO, " zpool_label_disk: could not " "label '%s' (%s)", leafname, libzfs_error_description(g_zfshdl)); (void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT, &newstate); return; } /* * The disk labeling is asynchronous on Linux. Just record * this label request and return as there will be another * disk add event for the partition after the labeling is * completed. */ device = malloc(sizeof (pendingdev_t)); (void) strlcpy(device->pd_physpath, physpath, sizeof (device->pd_physpath)); list_insert_tail(&g_device_list, device); zed_log_msg(LOG_INFO, " zpool_label_disk: async '%s' (%llu)", leafname, (u_longlong_t)guid); return; /* resumes at EC_DEV_ADD.ESC_DISK for partition */ } else /* labeled */ { boolean_t found = B_FALSE; /* * match up with request above to label the disk */ for (device = list_head(&g_device_list); device != NULL; device = list_next(&g_device_list, device)) { if (strcmp(physpath, device->pd_physpath) == 0) { list_remove(&g_device_list, device); free(device); found = B_TRUE; break; } zed_log_msg(LOG_INFO, "zpool_label_disk: %s != %s", physpath, device->pd_physpath); } if (!found) { /* unexpected partition slice encountered */ zed_log_msg(LOG_INFO, "labeled disk %s unexpected here", fullpath); (void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT, &newstate); return; } zed_log_msg(LOG_INFO, " zpool_label_disk: resume '%s' (%llu)", physpath, (u_longlong_t)guid); (void) snprintf(devpath, sizeof (devpath), "%s%s", DEV_BYID_PATH, new_devid); } /* * Construct the root vdev to pass to zpool_vdev_attach(). While adding * the entire vdev structure is harmless, we construct a reduced set of * path/physpath/wholedisk to keep it simple. */ if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0) { zed_log_msg(LOG_WARNING, "zfs_mod: nvlist_alloc out of memory"); return; } if (nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) { zed_log_msg(LOG_WARNING, "zfs_mod: nvlist_alloc out of memory"); nvlist_free(nvroot); return; } if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK) != 0 || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 || nvlist_add_string(newvd, ZPOOL_CONFIG_DEVID, new_devid) != 0 || (physpath != NULL && nvlist_add_string(newvd, ZPOOL_CONFIG_PHYS_PATH, physpath) != 0) || (enc_sysfs_path != NULL && nvlist_add_string(newvd, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, enc_sysfs_path) != 0) || nvlist_add_uint64(newvd, ZPOOL_CONFIG_WHOLE_DISK, wholedisk) != 0 || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &newvd, 1) != 0) { zed_log_msg(LOG_WARNING, "zfs_mod: unable to add nvlist pairs"); nvlist_free(newvd); nvlist_free(nvroot); return; } nvlist_free(newvd); /* * Wait for udev to verify the links exist, then auto-replace * the leaf disk at same physical location. */ if (zpool_label_disk_wait(path, 3000) != 0) { zed_log_msg(LOG_WARNING, "zfs_mod: expected replacement " "disk %s is missing", path); nvlist_free(nvroot); return; } ret = zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE); zed_log_msg(LOG_INFO, " zpool_vdev_replace: %s with %s (%s)", fullpath, path, (ret == 0) ? "no errors" : libzfs_error_description(g_zfshdl)); nvlist_free(nvroot); }
static int zfsdle_vdev_online(zpool_handle_t *zhp, void *data) { char *devname = data; boolean_t avail_spare, l2cache; nvlist_t *tgt; int error; zed_log_msg(LOG_INFO, "zfsdle_vdev_online: searching for '%s' in '%s'", devname, zpool_get_name(zhp)); if ((tgt = zpool_find_vdev_by_physpath(zhp, devname, &avail_spare, &l2cache, NULL)) != NULL) { char *path, fullpath[MAXPATHLEN]; uint64_t wholedisk; error = nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH, &path); if (error) { zpool_close(zhp); return (0); } error = nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk); if (error) wholedisk = 0; if (wholedisk) { path = strrchr(path, '/'); if (path != NULL) { path = zfs_strip_partition(path + 1); if (path == NULL) { zpool_close(zhp); return (0); } } else { zpool_close(zhp); return (0); } (void) strlcpy(fullpath, path, sizeof (fullpath)); free(path); /* * We need to reopen the pool associated with this * device so that the kernel can update the size of * the expanded device. When expanding there is no * need to restart the scrub from the beginning. */ boolean_t scrub_restart = B_FALSE; (void) zpool_reopen_one(zhp, &scrub_restart); } else { (void) strlcpy(fullpath, path, sizeof (fullpath)); } if (zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) { vdev_state_t newstate; if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) { error = zpool_vdev_online(zhp, fullpath, 0, &newstate); zed_log_msg(LOG_INFO, "zfsdle_vdev_online: " "setting device '%s' to ONLINE state " "in pool '%s': %d", fullpath, zpool_get_name(zhp), error); } } zpool_close(zhp); return (1); } zpool_close(zhp); return (0); }
/* * Print out configuration state as requested by status_callback. */ void lzwu_zpool_print_status_config(libzfs_handle_t *p_zhd, zpool_handle_t *zhp, const char *name, nvlist_t *nv, int namewidth, int depth, boolean_t isspare) { nvlist_t **child; uint_t children; unsigned c; vdev_stat_t *vs; char rbuf[6], wbuf[6], cbuf[6], repaired[7]; char *vname; uint64_t notpresent; spare_cbdata_t cb; char *state; verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &c) == 0); if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) children = 0; state = zpool_state_to_name(vs->vs_state, vs->vs_aux); if(isspare) { /* * For hot spares, we use the terms 'INUSE' and 'AVAILABLE' for * online drives. */ if(vs->vs_aux == VDEV_AUX_SPARED) state = "INUSE"; else if(vs->vs_state == VDEV_STATE_HEALTHY) state = "AVAIL"; } printf("\t%*s%-*s %-8s", depth, "", namewidth - depth, name, state); if(!isspare) { zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf)); zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf)); zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf)); printf(" %5s %5s %5s", rbuf, wbuf, cbuf); } if(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, ¬present) == 0) { char *path; verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0); printf(" was %s", path); } else if(vs->vs_aux != 0) { printf(" "); switch (vs->vs_aux) { case VDEV_AUX_OPEN_FAILED: printf("cannot open"); break; case VDEV_AUX_BAD_GUID_SUM: printf("missing device"); break; case VDEV_AUX_NO_REPLICAS: printf("insufficient replicas"); break; case VDEV_AUX_VERSION_NEWER: printf("newer version"); break; case VDEV_AUX_SPARED: verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &cb.cb_guid) == 0); if(zpool_iter(p_zhd, lzwu_find_spare, &cb) == 1) { if(strcmp(zpool_get_name(cb.cb_zhp), zpool_get_name(zhp)) == 0) printf("currently in use"); else printf("in use by pool '%s'", zpool_get_name(cb.cb_zhp)); zpool_close(cb.cb_zhp); } else printf("currently in use"); break; case VDEV_AUX_ERR_EXCEEDED: printf("too many errors"); break; case VDEV_AUX_IO_FAILURE: printf("experienced I/O failures"); break; case VDEV_AUX_BAD_LOG: printf("bad intent log"); break; case VDEV_AUX_EXTERNAL: printf("external device fault"); break; case VDEV_AUX_SPLIT_POOL: printf("split into new pool"); break; default: printf("corrupted data"); break; } } else if(vs->vs_scrub_repaired != 0 && children == 0) { /* * Report bytes resilvered/repaired on leaf devices. */ zfs_nicenum(vs->vs_scrub_repaired, repaired, sizeof (repaired)); printf(" %s %s", repaired, (vs->vs_scrub_type == POOL_SCRUB_RESILVER) ? "resilvered" : "repaired"); } printf("\n"); for(unsigned c = 0; c < children; c++) { uint64_t islog = B_FALSE, ishole = B_FALSE; /* Don't print logs or holes here */ nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG, &islog); nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE, &ishole); if(islog || ishole) continue; vname = zpool_vdev_name(p_zhd, zhp, child[c], B_TRUE); lzwu_zpool_print_status_config(p_zhd, zhp, vname, child[c], namewidth, depth + 2, isspare); free(vname); } }